[normalization] Adding default token options for numbers so we split alpha from numeric tokens and don't normalize digits
This commit is contained in:
@@ -38,6 +38,8 @@ DEFAULT_TOKEN_OPTIONS = NORMALIZE_TOKEN_REPLACE_HYPHENS | \
|
|||||||
NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE | \
|
NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE | \
|
||||||
NORMALIZE_TOKEN_REPLACE_DIGITS
|
NORMALIZE_TOKEN_REPLACE_DIGITS
|
||||||
|
|
||||||
|
DEFAULT_TOKEN_OPTIONS_NUMERIC = (DEFAULT_TOKEN_OPTIONS ^ NORMALIZE_TOKEN_REPLACE_DIGITS) | NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC
|
||||||
|
|
||||||
|
|
||||||
def remove_parens(tokens):
|
def remove_parens(tokens):
|
||||||
new_tokens = []
|
new_tokens = []
|
||||||
|
|||||||
Reference in New Issue
Block a user