[normalization] Adding default token options for numbers so we split alpha from numeric tokens and don't normalize digits
This commit is contained in:
@@ -38,6 +38,8 @@ DEFAULT_TOKEN_OPTIONS = NORMALIZE_TOKEN_REPLACE_HYPHENS | \
|
||||
NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE | \
|
||||
NORMALIZE_TOKEN_REPLACE_DIGITS
|
||||
|
||||
DEFAULT_TOKEN_OPTIONS_NUMERIC = (DEFAULT_TOKEN_OPTIONS ^ NORMALIZE_TOKEN_REPLACE_DIGITS) | NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC
|
||||
|
||||
|
||||
def remove_parens(tokens):
|
||||
new_tokens = []
|
||||
|
||||
Reference in New Issue
Block a user