[scripts] Adding the tokenize/normalize wrappers directly into the internal geodata package so pypostal can be maintained in an independent repo

2016-01-12 13:26:55 -05:00
parent 622dc354e7
commit 58e53cab1c
10 changed files with 731 additions and 5 deletions
--- a/scripts/geodata/language_id/disambiguation.py
+++ b/scripts/geodata/language_id/disambiguation.py
@@ -11,13 +11,13 @@ sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
 sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir, os.pardir, 'python')))

 from address_normalizer.text.normalize import PhraseFilter
-from address_normalizer.text.tokenize import token_types
 from geodata.encoding import safe_decode
 from geodata.string_utils import wide_iter, wide_ord
 from geodata.i18n.unicode_paths import DATA_DIR
 from geodata.i18n.normalize import strip_accents
 from geodata.i18n.unicode_properties import get_chars_by_script, get_script_languages
-from postal.text.tokenize import tokenize
+from geodata.text.tokenize import tokenize
+from geodata.text.tokenize import token_types

 WELL_REPRESENTED_LANGUAGES = set(['en', 'fr', 'it', 'de', 'nl', 'es', 'pt'])