[python/normalize] importing options from the C module
This commit is contained in:
@@ -5,19 +5,38 @@ from postal.text.token_types import token_types
|
||||
|
||||
from postal.text.encoding import safe_decode
|
||||
|
||||
DEFAULT_STRING_OPTIONS = _normalize.NORMALIZE_STRING_LATIN_ASCII | \
|
||||
_normalize.NORMALIZE_STRING_DECOMPOSE | \
|
||||
_normalize.NORMALIZE_STRING_TRIM | \
|
||||
_normalize.NORMALIZE_STRING_REPLACE_HYPHENS | \
|
||||
_normalize.NORMALIZE_STRING_STRIP_ACCENTS | \
|
||||
_normalize.NORMALIZE_STRING_LOWERCASE
|
||||
# String options
|
||||
NORMALIZE_STRING_LATIN_ASCII = _normalize.NORMALIZE_STRING_LATIN_ASCII
|
||||
NORMALIZE_STRING_TRANSLITERATE = _normalize.NORMALIZE_STRING_TRANSLITERATE
|
||||
NORMALIZE_STRING_STRIP_ACCENTS = _normalize.NORMALIZE_STRING_STRIP_ACCENTS
|
||||
NORMALIZE_STRING_DECOMPOSE = _normalize.NORMALIZE_STRING_DECOMPOSE
|
||||
NORMALIZE_STRING_LOWERCASE = _normalize.NORMALIZE_STRING_LOWERCASE
|
||||
NORMALIZE_STRING_TRIM = _normalize.NORMALIZE_STRING_TRIM
|
||||
NORMALIZE_STRING_REPLACE_HYPHENS = _normalize.NORMALIZE_STRING_REPLACE_HYPHENS
|
||||
|
||||
DEFAULT_TOKEN_OPTIONS = _normalize.NORMALIZE_TOKEN_REPLACE_HYPHENS | \
|
||||
_normalize.NORMALIZE_TOKEN_DELETE_FINAL_PERIOD | \
|
||||
_normalize.NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS | \
|
||||
_normalize.NORMALIZE_TOKEN_DROP_ENGLISH_POSSESSIVES | \
|
||||
_normalize.NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE | \
|
||||
_normalize.NORMALIZE_TOKEN_REPLACE_DIGITS
|
||||
DEFAULT_STRING_OPTIONS = NORMALIZE_STRING_LATIN_ASCII | \
|
||||
NORMALIZE_STRING_DECOMPOSE | \
|
||||
NORMALIZE_STRING_TRIM | \
|
||||
NORMALIZE_STRING_REPLACE_HYPHENS | \
|
||||
NORMALIZE_STRING_STRIP_ACCENTS | \
|
||||
NORMALIZE_STRING_LOWERCASE
|
||||
|
||||
# Token options
|
||||
NORMALIZE_TOKEN_REPLACE_HYPHENS = _normalize.NORMALIZE_TOKEN_REPLACE_HYPHENS
|
||||
NORMALIZE_TOKEN_DELETE_HYPHENS = _normalize.NORMALIZE_TOKEN_DELETE_HYPHENS
|
||||
NORMALIZE_TOKEN_DELETE_FINAL_PERIOD = _normalize.NORMALIZE_TOKEN_DELETE_FINAL_PERIOD
|
||||
NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS = _normalize.NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS
|
||||
NORMALIZE_TOKEN_DROP_ENGLISH_POSSESSIVES = _normalize.NORMALIZE_TOKEN_DROP_ENGLISH_POSSESSIVES
|
||||
NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE = _normalize.NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE
|
||||
NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC = _normalize.NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC
|
||||
NORMALIZE_TOKEN_REPLACE_DIGITS = _normalize.NORMALIZE_TOKEN_REPLACE_DIGITS
|
||||
|
||||
DEFAULT_TOKEN_OPTIONS = NORMALIZE_TOKEN_REPLACE_HYPHENS | \
|
||||
NORMALIZE_TOKEN_DELETE_FINAL_PERIOD | \
|
||||
NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS | \
|
||||
NORMALIZE_TOKEN_DROP_ENGLISH_POSSESSIVES | \
|
||||
NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE | \
|
||||
NORMALIZE_TOKEN_REPLACE_DIGITS
|
||||
|
||||
|
||||
def remove_parens(tokens):
|
||||
|
||||
Reference in New Issue
Block a user