[python/normalize] importing options from the C module

This commit is contained in:
Al
2015-10-30 12:34:07 -04:00
parent cccc3e9cf5
commit cbeb08f1d1

View File

@@ -5,19 +5,38 @@ from postal.text.token_types import token_types
from postal.text.encoding import safe_decode
DEFAULT_STRING_OPTIONS = _normalize.NORMALIZE_STRING_LATIN_ASCII | \
_normalize.NORMALIZE_STRING_DECOMPOSE | \
_normalize.NORMALIZE_STRING_TRIM | \
_normalize.NORMALIZE_STRING_REPLACE_HYPHENS | \
_normalize.NORMALIZE_STRING_STRIP_ACCENTS | \
_normalize.NORMALIZE_STRING_LOWERCASE
# String options
NORMALIZE_STRING_LATIN_ASCII = _normalize.NORMALIZE_STRING_LATIN_ASCII
NORMALIZE_STRING_TRANSLITERATE = _normalize.NORMALIZE_STRING_TRANSLITERATE
NORMALIZE_STRING_STRIP_ACCENTS = _normalize.NORMALIZE_STRING_STRIP_ACCENTS
NORMALIZE_STRING_DECOMPOSE = _normalize.NORMALIZE_STRING_DECOMPOSE
NORMALIZE_STRING_LOWERCASE = _normalize.NORMALIZE_STRING_LOWERCASE
NORMALIZE_STRING_TRIM = _normalize.NORMALIZE_STRING_TRIM
NORMALIZE_STRING_REPLACE_HYPHENS = _normalize.NORMALIZE_STRING_REPLACE_HYPHENS
DEFAULT_TOKEN_OPTIONS = _normalize.NORMALIZE_TOKEN_REPLACE_HYPHENS | \
_normalize.NORMALIZE_TOKEN_DELETE_FINAL_PERIOD | \
_normalize.NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS | \
_normalize.NORMALIZE_TOKEN_DROP_ENGLISH_POSSESSIVES | \
_normalize.NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE | \
_normalize.NORMALIZE_TOKEN_REPLACE_DIGITS
DEFAULT_STRING_OPTIONS = NORMALIZE_STRING_LATIN_ASCII | \
NORMALIZE_STRING_DECOMPOSE | \
NORMALIZE_STRING_TRIM | \
NORMALIZE_STRING_REPLACE_HYPHENS | \
NORMALIZE_STRING_STRIP_ACCENTS | \
NORMALIZE_STRING_LOWERCASE
# Token options
NORMALIZE_TOKEN_REPLACE_HYPHENS = _normalize.NORMALIZE_TOKEN_REPLACE_HYPHENS
NORMALIZE_TOKEN_DELETE_HYPHENS = _normalize.NORMALIZE_TOKEN_DELETE_HYPHENS
NORMALIZE_TOKEN_DELETE_FINAL_PERIOD = _normalize.NORMALIZE_TOKEN_DELETE_FINAL_PERIOD
NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS = _normalize.NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS
NORMALIZE_TOKEN_DROP_ENGLISH_POSSESSIVES = _normalize.NORMALIZE_TOKEN_DROP_ENGLISH_POSSESSIVES
NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE = _normalize.NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE
NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC = _normalize.NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC
NORMALIZE_TOKEN_REPLACE_DIGITS = _normalize.NORMALIZE_TOKEN_REPLACE_DIGITS
DEFAULT_TOKEN_OPTIONS = NORMALIZE_TOKEN_REPLACE_HYPHENS | \
NORMALIZE_TOKEN_DELETE_FINAL_PERIOD | \
NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS | \
NORMALIZE_TOKEN_DROP_ENGLISH_POSSESSIVES | \
NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE | \
NORMALIZE_TOKEN_REPLACE_DIGITS
def remove_parens(tokens):