[normalization] adding a normalize_token function and some token options for deleting periods
This commit is contained in:
@@ -37,6 +37,9 @@ DEFAULT_TOKEN_OPTIONS = NORMALIZE_TOKEN_REPLACE_HYPHENS | \
|
||||
NORMALIZE_TOKEN_DROP_ENGLISH_POSSESSIVES | \
|
||||
NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE
|
||||
|
||||
TOKEN_OPTIONS_DROP_PERIODS = NORMALIZE_TOKEN_DELETE_FINAL_PERIOD | \
|
||||
NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS
|
||||
|
||||
DEFAULT_TOKEN_OPTIONS_NUMERIC = (DEFAULT_TOKEN_OPTIONS | NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC)
|
||||
|
||||
|
||||
@@ -64,6 +67,10 @@ def normalize_string(s, string_options=DEFAULT_STRING_OPTIONS):
|
||||
return normalized
|
||||
|
||||
|
||||
def normalize_token(s, t, token_options=DEFAULT_TOKEN_OPTIONS):
|
||||
return _normalize.normalize_token(s, t, token_options)
|
||||
|
||||
|
||||
def normalized_tokens(s, string_options=DEFAULT_STRING_OPTIONS,
|
||||
token_options=DEFAULT_TOKEN_OPTIONS,
|
||||
strip_parentheticals=True):
|
||||
|
||||
Reference in New Issue
Block a user