[normalization] normalize tokens should not replace digits by default
This commit is contained in:
@@ -35,10 +35,9 @@ DEFAULT_TOKEN_OPTIONS = NORMALIZE_TOKEN_REPLACE_HYPHENS | \
|
||||
NORMALIZE_TOKEN_DELETE_FINAL_PERIOD | \
|
||||
NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS | \
|
||||
NORMALIZE_TOKEN_DROP_ENGLISH_POSSESSIVES | \
|
||||
NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE | \
|
||||
NORMALIZE_TOKEN_REPLACE_DIGITS
|
||||
NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE
|
||||
|
||||
DEFAULT_TOKEN_OPTIONS_NUMERIC = (DEFAULT_TOKEN_OPTIONS ^ NORMALIZE_TOKEN_REPLACE_DIGITS) | NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC
|
||||
DEFAULT_TOKEN_OPTIONS_NUMERIC = (DEFAULT_TOKEN_OPTIONS | NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC)
|
||||
|
||||
|
||||
def remove_parens(tokens):
|
||||
|
||||
Reference in New Issue
Block a user