From 3de59506ae5e8dc18dedb85224b3f4541133a34f Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 10 Dec 2015 18:08:51 -0500 Subject: [PATCH] [parser] Internal separators for parsing purposes include open/close parens, at sign, semicolon, etc. Ignore stray colons not internal to a word (as in Swedish abbreviations) --- src/address_parser.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/address_parser.h b/src/address_parser.h index 4f946a39..bb1bdb26 100644 --- a/src/address_parser.h +++ b/src/address_parser.h @@ -65,8 +65,8 @@ with the general error-driven averaged perceptron. #define ADDRESS_SEPARATOR_FIELD_INTERNAL 1 << 0 #define ADDRESS_SEPARATOR_FIELD 1 << 1 -#define ADDRESS_PARSER_IS_SEPARATOR(token_type) ((token_type) == COMMA || (token_type) == NEWLINE || (token_type) == HYPHEN || (token_type) == DASH || (token_type) == BREAKING_DASH ) -#define ADDRESS_PARSER_IS_IGNORABLE(token_type) ((token.type) == INVALID_CHAR || (token.type) == PERIOD) +#define ADDRESS_PARSER_IS_SEPARATOR(token_type) ((token_type) == COMMA || (token_type) == NEWLINE || (token_type) == HYPHEN || (token_type) == DASH || (token_type) == BREAKING_DASH|| (token_type) == SEMICOLON || (token_type) == PUNCT_OPEN || (token_type) == PUNCT_CLOSE || (token_type) == AT_SIGN ) +#define ADDRESS_PARSER_IS_IGNORABLE(token_type) ((token.type) == INVALID_CHAR || (token.type) == PERIOD || (token_type) == COLON ) #define SEPARATOR_LABEL "sep" #define FIELD_SEPARATOR_LABEL "fsep"