[mv] Moving token type checking to header

This commit is contained in:
Al
2015-12-28 01:16:56 -05:00
parent 0fa1c2389c
commit e4dba2297d
2 changed files with 13 additions and 24 deletions

View File

@@ -15,6 +15,7 @@
#include "normalize.h"
#include "scanner.h"
#include "string_utils.h"
#include "token_types.h"
#include "transliterate.h"
typedef struct phrase_language {
@@ -30,26 +31,6 @@ KSORT_INIT(phrase_language_array, phrase_language_t, ks_lt_phrase_language)
#define DEFAULT_KEY_LEN 32
inline bool is_word_token(uint16_t type) {
return type == WORD || type == ABBREVIATION || type == ACRONYM || type == IDEOGRAPHIC_CHAR || type == HANGUL_SYLLABLE;
}
inline bool is_ideographic(uint16_t type) {
return type == IDEOGRAPHIC_CHAR || type == HANGUL_SYLLABLE || type == IDEOGRAPHIC_NUMBER;
}
inline bool is_numeric_token(uint16_t type) {
return type == NUMERIC;
}
inline bool is_punctuation(uint16_t type) {
return type >= PERIOD && type < OTHER;
}
inline bool is_special_token(uint16_t type) {
return type == EMAIL || type == URL || type == US_PHONE || type == INTL_PHONE;
}
inline uint64_t get_normalize_token_options(normalize_options_t options) {
uint64_t normalize_token_options = 0;

View File

@@ -1,8 +1,6 @@
#ifndef TOKEN_TYPES_H
#define TOKEN_TYPES_H
// Doing these as #defines so we can duplicate the values exactly in Python
#define END 0 // Null byte
@@ -60,6 +58,16 @@
#define WHITESPACE 300
#define NEWLINE 301
#define INVALID_CHAR 500
#define INVALID_CHAR 500
#define is_word_token(type) ((type) == WORD || (type) == ABBREVIATION || (type) == ACRONYM || (type) == IDEOGRAPHIC_CHAR || (type) == HANGUL_SYLLABLE)
#define is_ideographic(type) ((type) == IDEOGRAPHIC_CHAR || (type) == HANGUL_SYLLABLE || (type) == IDEOGRAPHIC_NUMBER)
#define is_numeric_token(type) ((type) == NUMERIC)
#define is_punctuation(type) ((type) >= PERIOD && (type) < OTHER)
#define is_special_token(type) ((type) == EMAIL || (type) == URL || (type) == US_PHONE || (type) == INTL_PHONE)
#endif