[mv] Moving token type checking to header
This commit is contained in:
@@ -15,6 +15,7 @@
|
|||||||
#include "normalize.h"
|
#include "normalize.h"
|
||||||
#include "scanner.h"
|
#include "scanner.h"
|
||||||
#include "string_utils.h"
|
#include "string_utils.h"
|
||||||
|
#include "token_types.h"
|
||||||
#include "transliterate.h"
|
#include "transliterate.h"
|
||||||
|
|
||||||
typedef struct phrase_language {
|
typedef struct phrase_language {
|
||||||
@@ -30,26 +31,6 @@ KSORT_INIT(phrase_language_array, phrase_language_t, ks_lt_phrase_language)
|
|||||||
|
|
||||||
#define DEFAULT_KEY_LEN 32
|
#define DEFAULT_KEY_LEN 32
|
||||||
|
|
||||||
inline bool is_word_token(uint16_t type) {
|
|
||||||
return type == WORD || type == ABBREVIATION || type == ACRONYM || type == IDEOGRAPHIC_CHAR || type == HANGUL_SYLLABLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool is_ideographic(uint16_t type) {
|
|
||||||
return type == IDEOGRAPHIC_CHAR || type == HANGUL_SYLLABLE || type == IDEOGRAPHIC_NUMBER;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool is_numeric_token(uint16_t type) {
|
|
||||||
return type == NUMERIC;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool is_punctuation(uint16_t type) {
|
|
||||||
return type >= PERIOD && type < OTHER;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool is_special_token(uint16_t type) {
|
|
||||||
return type == EMAIL || type == URL || type == US_PHONE || type == INTL_PHONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uint64_t get_normalize_token_options(normalize_options_t options) {
|
inline uint64_t get_normalize_token_options(normalize_options_t options) {
|
||||||
uint64_t normalize_token_options = 0;
|
uint64_t normalize_token_options = 0;
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
#ifndef TOKEN_TYPES_H
|
#ifndef TOKEN_TYPES_H
|
||||||
#define TOKEN_TYPES_H
|
#define TOKEN_TYPES_H
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Doing these as #defines so we can duplicate the values exactly in Python
|
// Doing these as #defines so we can duplicate the values exactly in Python
|
||||||
|
|
||||||
#define END 0 // Null byte
|
#define END 0 // Null byte
|
||||||
@@ -60,6 +58,16 @@
|
|||||||
#define WHITESPACE 300
|
#define WHITESPACE 300
|
||||||
#define NEWLINE 301
|
#define NEWLINE 301
|
||||||
|
|
||||||
#define INVALID_CHAR 500
|
#define INVALID_CHAR 500
|
||||||
|
|
||||||
|
|
||||||
|
#define is_word_token(type) ((type) == WORD || (type) == ABBREVIATION || (type) == ACRONYM || (type) == IDEOGRAPHIC_CHAR || (type) == HANGUL_SYLLABLE)
|
||||||
|
|
||||||
|
#define is_ideographic(type) ((type) == IDEOGRAPHIC_CHAR || (type) == HANGUL_SYLLABLE || (type) == IDEOGRAPHIC_NUMBER)
|
||||||
|
|
||||||
|
#define is_numeric_token(type) ((type) == NUMERIC)
|
||||||
|
|
||||||
|
#define is_punctuation(type) ((type) >= PERIOD && (type) < OTHER)
|
||||||
|
|
||||||
|
#define is_special_token(type) ((type) == EMAIL || (type) == URL || (type) == US_PHONE || (type) == INTL_PHONE)
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|||||||
Reference in New Issue
Block a user