[scripts] Adding the tokenize/normalize wrappers directly into the internal geodata package so pypostal can be maintained in an independent repo

This commit is contained in:
Al
2016-01-12 13:26:55 -05:00
parent 622dc354e7
commit 58e53cab1c
10 changed files with 731 additions and 5 deletions

View File

@@ -11,13 +11,13 @@ sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir, os.pardir, 'python')))
from address_normalizer.text.normalize import PhraseFilter
from address_normalizer.text.tokenize import token_types
from geodata.encoding import safe_decode
from geodata.string_utils import wide_iter, wide_ord
from geodata.i18n.unicode_paths import DATA_DIR
from geodata.i18n.normalize import strip_accents
from geodata.i18n.unicode_properties import get_chars_by_script, get_script_languages
from postal.text.tokenize import tokenize
from geodata.text.tokenize import tokenize
from geodata.text.tokenize import token_types
WELL_REPRESENTED_LANGUAGES = set(['en', 'fr', 'it', 'de', 'nl', 'es', 'pt'])