diff --git a/scripts/geodata/text/normalize.py b/scripts/geodata/text/normalize.py index ca6cf10f..518d5ebf 100644 --- a/scripts/geodata/text/normalize.py +++ b/scripts/geodata/text/normalize.py @@ -37,6 +37,9 @@ DEFAULT_TOKEN_OPTIONS = NORMALIZE_TOKEN_REPLACE_HYPHENS | \ NORMALIZE_TOKEN_DROP_ENGLISH_POSSESSIVES | \ NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE +TOKEN_OPTIONS_DROP_PERIODS = NORMALIZE_TOKEN_DELETE_FINAL_PERIOD | \ + NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS + DEFAULT_TOKEN_OPTIONS_NUMERIC = (DEFAULT_TOKEN_OPTIONS | NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC) @@ -64,6 +67,10 @@ def normalize_string(s, string_options=DEFAULT_STRING_OPTIONS): return normalized +def normalize_token(s, t, token_options=DEFAULT_TOKEN_OPTIONS): + return _normalize.normalize_token(s, t, token_options) + + def normalized_tokens(s, string_options=DEFAULT_STRING_OPTIONS, token_options=DEFAULT_TOKEN_OPTIONS, strip_parentheticals=True):