From 2e7f8f1ae7cbe7dc837073ea5d470a3f4298fe50 Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 24 Aug 2016 18:52:00 -0400 Subject: [PATCH] [abbreviations] Adding toponyms gazetteer for probabilistically abbreviating things like Mount=>Mt, Saint=>St, Fort=>Ft in place names --- scripts/geodata/address_expansions/gazetteers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/geodata/address_expansions/gazetteers.py b/scripts/geodata/address_expansions/gazetteers.py index b416473a..d96325a8 100644 --- a/scripts/geodata/address_expansions/gazetteers.py +++ b/scripts/geodata/address_expansions/gazetteers.py @@ -157,6 +157,13 @@ HOUSE_NUMBER_DICTIONARIES = ('house_number', 'no_number') POSTCODE_DICTIONARIES = ('postcode',) +TOPONYM_ABBREVIATION_DICTIONARIES = STREET_TYPES_DICTIONARIES + ('qualifiers', + 'personal_titles', + 'synonyms', + 'toponyms', + ) + + UNIT_ABBREVIATION_DICTIONARIES = ('level_types_basement', 'level_types_mezzanine', 'level_types_numbered', @@ -191,4 +198,5 @@ chains_gazetteer = create_gazetteer(CHAIN_DICTIONARY) unit_types_gazetteer = create_gazetteer(*UNIT_ABBREVIATION_DICTIONARIES) street_and_synonyms_gazetteer = create_gazetteer(*(STREET_TYPES_DICTIONARIES + (SYNONYM_DICTIONARY, ))) abbreviations_gazetteer = create_gazetteer(*ALL_ABBREVIATION_DICTIONARIES) +toponym_gazetteer = create_gazetteer(*TOPONYM_ABBREVIATION_DICTIONARIES) given_name_gazetteer = create_gazetteer(GIVEN_NAME_DICTIONARY)