[normalization] adding a normalize_token function and some token options for deleting periods

This commit is contained in:
Al
2016-12-09 17:46:26 -05:00
parent 318773ffe7
commit c0a468d7e8

View File

@@ -37,6 +37,9 @@ DEFAULT_TOKEN_OPTIONS = NORMALIZE_TOKEN_REPLACE_HYPHENS | \
NORMALIZE_TOKEN_DROP_ENGLISH_POSSESSIVES | \
NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE
TOKEN_OPTIONS_DROP_PERIODS = NORMALIZE_TOKEN_DELETE_FINAL_PERIOD | \
NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS
DEFAULT_TOKEN_OPTIONS_NUMERIC = (DEFAULT_TOKEN_OPTIONS | NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC)
@@ -64,6 +67,10 @@ def normalize_string(s, string_options=DEFAULT_STRING_OPTIONS):
return normalized
def normalize_token(s, t, token_options=DEFAULT_TOKEN_OPTIONS):
return _normalize.normalize_token(s, t, token_options)
def normalized_tokens(s, string_options=DEFAULT_STRING_OPTIONS,
token_options=DEFAULT_TOKEN_OPTIONS,
strip_parentheticals=True):