From d86443a6973a1640f81460e48af408da78132da4 Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 24 May 2016 14:55:54 -0400 Subject: [PATCH] [fix] Adding basic Han numeral replacement to neighborhood deduping --- scripts/geodata/neighborhoods/reverse_geocode.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scripts/geodata/neighborhoods/reverse_geocode.py b/scripts/geodata/neighborhoods/reverse_geocode.py index fac007d0..e005a8b7 100644 --- a/scripts/geodata/neighborhoods/reverse_geocode.py +++ b/scripts/geodata/neighborhoods/reverse_geocode.py @@ -28,6 +28,17 @@ class NeighborhoodDeduper(NameDeduper): replacements = { u'saint': u'st', u'and': u'&', + u'〇': u'0', + u'一': u'1', + u'二': u'2', + u'三': u'3', + u'四': u'4', + u'五': u'5', + u'六': u'6', + u'七': u'7', + u'八': u'8', + u'九': u'9', + u'十': u'10', } discriminative_words = set([