[fix] Adding basic Han numeral replacement to neighborhood deduping

This commit is contained in:
Al
2016-05-24 14:55:54 -04:00
parent 046f445a56
commit d86443a697

View File

@@ -28,6 +28,17 @@ class NeighborhoodDeduper(NameDeduper):
replacements = {
u'saint': u'st',
u'and': u'&',
u'': u'0',
u'': u'1',
u'': u'2',
u'': u'3',
u'': u'4',
u'': u'5',
u'': u'6',
u'': u'7',
u'': u'8',
u'': u'9',
u'': u'10',
}
discriminative_words = set([