[fix] IDEOGRAM class name

This commit is contained in:
Al
2015-03-11 17:33:53 -04:00
parent a5f7c73374
commit a446290829
3 changed files with 4 additions and 2 deletions

View File

@@ -11364,7 +11364,7 @@ yy860:
} }
yy861: yy861:
#line 152 "scanner.re" #line 152 "scanner.re"
{ return IDEOGRAPH; } { return IDEOGRAM; }
#line 11369 "scanner.c" #line 11369 "scanner.c"
yy862: yy862:
yyaccept = 14; yyaccept = 14;

View File

@@ -149,7 +149,7 @@ abbreviation = ({word})"\.";
{numeric} { return NUMERIC; } {numeric} { return NUMERIC; }
{apos_word} { return WORD; } {apos_word} { return WORD; }
{ideographic_symbol} { return IDEOGRAPH; } {ideographic_symbol} { return IDEOGRAM; }
{word} { return WORD; } {word} { return WORD; }
{katakana} { return WORD; } {katakana} { return WORD; }
{any_word} { return WORD; } {any_word} { return WORD; }

View File

@@ -12,6 +12,8 @@ extern "C" {
// Word types // Word types
#define WORD 1 // Any letter-only word (includes all unicode letters) #define WORD 1 // Any letter-only word (includes all unicode letters)
#define ABBREVIATION 2 // Loose abbreviations (ending in ".") #define ABBREVIATION 2 // Loose abbreviations (ending in ".")
#define IDEOGRAM 3 // For languages that don't separate on whitespace (e.g. Chinese, Japanese, Korean), separate by character
#define PHRASE 4 // Not part of the first stage tokenizer, but may be used after phrase parsing
// Numbers and numeric types // Numbers and numeric types
#define NUMBER 50 // All digits #define NUMBER 50 // All digits