[osm] abbreviate toponyms (qualifiers) with some probability so we get those versions in the model's phrase dictionaries

2016-08-22 20:29:29 -04:00
parent d281e71d2c
commit 8b57a7acf2
3 changed files with 8 additions and 1 deletions
--- a/scripts/geodata/address_expansions/gazetteers.py
+++ b/scripts/geodata/address_expansions/gazetteers.py
@@ -142,7 +142,6 @@ NAME_DICTIONARIES = (GIVEN_NAME_DICTIONARY,
                     SURNAME_DICTIONARY,)


-
 NAME_ABBREVIATION_DICTIONARIES = STREET_TYPES_DICTIONARIES + ('academic_degrees',
                                                              'building_types',
                                                              'company_types',
@@ -152,6 +151,8 @@ NAME_ABBREVIATION_DICTIONARIES = STREET_TYPES_DICTIONARIES + ('academic_degrees'
                                                              'toponyms',
                                                              )

+QUALIFIERS_DICTIONARY = 'qualifiers'
+
 HOUSE_NUMBER_DICTIONARIES = ('house_number', 'no_number')

 POSTCODE_DICTIONARIES = ('postcode',)
@@ -184,6 +185,7 @@ def create_gazetteer(*dictionaries):


 street_types_gazetteer = create_gazetteer(*STREET_TYPES_DICTIONARIES)
+qualifiers_gazetteer = create_gazetteer(QUALIFIERS_DICTIONARY)
 names_gazetteer = create_gazetteer(*NAME_ABBREVIATION_DICTIONARIES)
 chains_gazetteer = create_gazetteer(CHAIN_DICTIONARY)
 unit_types_gazetteer = create_gazetteer(*UNIT_ABBREVIATION_DICTIONARIES)