diff --git a/src/scanner.c b/src/scanner.c
index bd88e8c8..82a8dcdf 100644
--- a/src/scanner.c
+++ b/src/scanner.c
@@ -11364,7 +11364,7 @@ yy860:
 	}
 yy861:
 #line 152 "scanner.re"
-	{ return IDEOGRAPH; }
+	{ return IDEOGRAM; }
 #line 11369 "scanner.c"
 yy862:
 	yyaccept = 14;
diff --git a/src/scanner.re b/src/scanner.re
index f77e9d0e..a7fdbde4 100644
--- a/src/scanner.re
+++ b/src/scanner.re
@@ -149,7 +149,7 @@ abbreviation = ({word})"\.";
 
 {numeric}                       { return NUMERIC; }
 {apos_word}                     { return WORD; }
-{ideographic_symbol}            { return IDEOGRAPH; }
+{ideographic_symbol}            { return IDEOGRAM; }
 {word}                          { return WORD; }
 {katakana}                      { return WORD; }
 {any_word}                      { return WORD; }
diff --git a/src/token_types.h b/src/token_types.h
index 21741fcc..c1ae854f 100644
--- a/src/token_types.h
+++ b/src/token_types.h
@@ -12,6 +12,8 @@ extern "C" {
 // Word types
 #define WORD 1              // Any letter-only word (includes all unicode letters)
 #define ABBREVIATION 2      // Loose abbreviations (ending in ".")
+#define IDEOGRAM 3          // For languages that don't separate on whitespace (e.g. Chinese, Japanese, Korean), separate by character
+#define PHRASE 4            // Not part of the first stage tokenizer, but may be used after phrase parsing
 
 // Numbers and numeric types
 #define NUMBER 50           // All digits