From ee1aa564c40e164a08105ecc2160dc61784a5ade Mon Sep 17 00:00:00 2001
From: Al <albarrentine@gmail.com>
Date: Thu, 28 Apr 2016 18:03:44 -0400
Subject: [PATCH] [normalization] normalize tokens should not replace digits by
 default

---
 scripts/geodata/names/deduping.py | 7 +++----
 scripts/geodata/text/normalize.py | 5 ++---
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/scripts/geodata/names/deduping.py b/scripts/geodata/names/deduping.py
index bda2d125..c2457b0e 100644
--- a/scripts/geodata/names/deduping.py
+++ b/scripts/geodata/names/deduping.py
@@ -39,8 +39,7 @@ class NameDeduper(object):
 
     @classmethod
     def tokenize(cls, s):
-        token_options = DEFAULT_TOKEN_OPTIONS ^ NORMALIZE_TOKEN_REPLACE_DIGITS
-        return normalized_tokens(s, token_options=token_options)
+        return normalized_tokens(s)
 
     @classmethod
     def content_tokens(cls, s):
@@ -49,8 +48,8 @@ class NameDeduper(object):
             tokens = remove_parens(tokens)
         return [(cls.replacements.get(t, t), c)
                 for t, c in tokens
-                if c in cls.content_categories
-                and t not in cls.stopwords]
+                if c in cls.content_categories and
+                t not in cls.stopwords]
 
     @classmethod
     def possible_match(cls, tokens1, tokens2):
diff --git a/scripts/geodata/text/normalize.py b/scripts/geodata/text/normalize.py
index 78448e6a..ca6cf10f 100644
--- a/scripts/geodata/text/normalize.py
+++ b/scripts/geodata/text/normalize.py
@@ -35,10 +35,9 @@ DEFAULT_TOKEN_OPTIONS = NORMALIZE_TOKEN_REPLACE_HYPHENS | \
     NORMALIZE_TOKEN_DELETE_FINAL_PERIOD | \
     NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS | \
     NORMALIZE_TOKEN_DROP_ENGLISH_POSSESSIVES | \
-    NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE | \
-    NORMALIZE_TOKEN_REPLACE_DIGITS
+    NORMALIZE_TOKEN_DELETE_OTHER_APOSTROPHE
 
-DEFAULT_TOKEN_OPTIONS_NUMERIC = (DEFAULT_TOKEN_OPTIONS ^ NORMALIZE_TOKEN_REPLACE_DIGITS) | NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC
+DEFAULT_TOKEN_OPTIONS_NUMERIC = (DEFAULT_TOKEN_OPTIONS | NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC)
 
 
 def remove_parens(tokens):