[fix] IDEOGRAM class name
This commit is contained in:
@@ -11364,7 +11364,7 @@ yy860:
|
|||||||
}
|
}
|
||||||
yy861:
|
yy861:
|
||||||
#line 152 "scanner.re"
|
#line 152 "scanner.re"
|
||||||
{ return IDEOGRAPH; }
|
{ return IDEOGRAM; }
|
||||||
#line 11369 "scanner.c"
|
#line 11369 "scanner.c"
|
||||||
yy862:
|
yy862:
|
||||||
yyaccept = 14;
|
yyaccept = 14;
|
||||||
|
|||||||
@@ -149,7 +149,7 @@ abbreviation = ({word})"\.";
|
|||||||
|
|
||||||
{numeric} { return NUMERIC; }
|
{numeric} { return NUMERIC; }
|
||||||
{apos_word} { return WORD; }
|
{apos_word} { return WORD; }
|
||||||
{ideographic_symbol} { return IDEOGRAPH; }
|
{ideographic_symbol} { return IDEOGRAM; }
|
||||||
{word} { return WORD; }
|
{word} { return WORD; }
|
||||||
{katakana} { return WORD; }
|
{katakana} { return WORD; }
|
||||||
{any_word} { return WORD; }
|
{any_word} { return WORD; }
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ extern "C" {
|
|||||||
// Word types
|
// Word types
|
||||||
#define WORD 1 // Any letter-only word (includes all unicode letters)
|
#define WORD 1 // Any letter-only word (includes all unicode letters)
|
||||||
#define ABBREVIATION 2 // Loose abbreviations (ending in ".")
|
#define ABBREVIATION 2 // Loose abbreviations (ending in ".")
|
||||||
|
#define IDEOGRAM 3 // For languages that don't separate on whitespace (e.g. Chinese, Japanese, Korean), separate by character
|
||||||
|
#define PHRASE 4 // Not part of the first stage tokenizer, but may be used after phrase parsing
|
||||||
|
|
||||||
// Numbers and numeric types
|
// Numbers and numeric types
|
||||||
#define NUMBER 50 // All digits
|
#define NUMBER 50 // All digits
|
||||||
|
|||||||
Reference in New Issue
Block a user