diff --git a/scripts/geodata/text/normalize.py b/scripts/geodata/text/normalize.py index 253425a8..78448e6a 100644 --- a/scripts/geodata/text/normalize.py +++ b/scripts/geodata/text/normalize.py @@ -38,6 +38,8 @@ DEFAULT_TOKEN_OPTIONS = NORMALIZE_TOKEN_REPLACE_HYPHENS | \ NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE | \ NORMALIZE_TOKEN_REPLACE_DIGITS +DEFAULT_TOKEN_OPTIONS_NUMERIC = (DEFAULT_TOKEN_OPTIONS ^ NORMALIZE_TOKEN_REPLACE_DIGITS) | NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC + def remove_parens(tokens): new_tokens = []