From e4dba2297d1b902d007a2af715ada5c329c288dc Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 28 Dec 2015 01:16:56 -0500 Subject: [PATCH] [mv] Moving token type checking to header --- src/libpostal.c | 21 +-------------------- src/token_types.h | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/src/libpostal.c b/src/libpostal.c index 752f5156..c94cc7c1 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -15,6 +15,7 @@ #include "normalize.h" #include "scanner.h" #include "string_utils.h" +#include "token_types.h" #include "transliterate.h" typedef struct phrase_language { @@ -30,26 +31,6 @@ KSORT_INIT(phrase_language_array, phrase_language_t, ks_lt_phrase_language) #define DEFAULT_KEY_LEN 32 -inline bool is_word_token(uint16_t type) { - return type == WORD || type == ABBREVIATION || type == ACRONYM || type == IDEOGRAPHIC_CHAR || type == HANGUL_SYLLABLE; -} - -inline bool is_ideographic(uint16_t type) { - return type == IDEOGRAPHIC_CHAR || type == HANGUL_SYLLABLE || type == IDEOGRAPHIC_NUMBER; -} - -inline bool is_numeric_token(uint16_t type) { - return type == NUMERIC; -} - -inline bool is_punctuation(uint16_t type) { - return type >= PERIOD && type < OTHER; -} - -inline bool is_special_token(uint16_t type) { - return type == EMAIL || type == URL || type == US_PHONE || type == INTL_PHONE; -} - inline uint64_t get_normalize_token_options(normalize_options_t options) { uint64_t normalize_token_options = 0; diff --git a/src/token_types.h b/src/token_types.h index c1d2d02c..e4684437 100644 --- a/src/token_types.h +++ b/src/token_types.h @@ -1,8 +1,6 @@ #ifndef TOKEN_TYPES_H #define TOKEN_TYPES_H - - // Doing these as #defines so we can duplicate the values exactly in Python #define END 0 // Null byte @@ -60,6 +58,16 @@ #define WHITESPACE 300 #define NEWLINE 301 -#define INVALID_CHAR 500 +#define INVALID_CHAR 500 + + +#define is_word_token(type) ((type) == WORD || (type) == ABBREVIATION || (type) == ACRONYM || (type) == IDEOGRAPHIC_CHAR || (type) == HANGUL_SYLLABLE) + +#define is_ideographic(type) ((type) == IDEOGRAPHIC_CHAR || (type) == HANGUL_SYLLABLE || (type) == IDEOGRAPHIC_NUMBER) + +#define is_numeric_token(type) ((type) == NUMERIC) + +#define is_punctuation(type) ((type) >= PERIOD && (type) < OTHER) + +#define is_special_token(type) ((type) == EMAIL || (type) == URL || (type) == US_PHONE || (type) == INTL_PHONE) -#endif