[fix] making a separate gazetteer for toponym abbreviations

This commit is contained in:
Al
2016-09-10 01:08:58 -04:00
parent bcde9e2fe7
commit 551cce8cb1
3 changed files with 12 additions and 9 deletions

View File

@@ -157,11 +157,13 @@ HOUSE_NUMBER_DICTIONARIES = ('house_number', 'no_number')
POSTCODE_DICTIONARIES = ('postcode',)
TOPONYM_ABBREVIATION_DICTIONARIES = STREET_TYPES_DICTIONARIES + ('qualifiers',
'personal_titles',
'synonyms',
'toponyms',
)
TOPONYMS_DICTIONARY = 'toponyms'
TOPONYM_ABBREVIATION_DICTIONARIES = ('qualifiers',
'directionals',
'personal_titles',
'synonyms',
)
UNIT_ABBREVIATION_DICTIONARIES = ('level_types_basement',
@@ -198,5 +200,6 @@ chains_gazetteer = create_gazetteer(CHAIN_DICTIONARY)
unit_types_gazetteer = create_gazetteer(*UNIT_ABBREVIATION_DICTIONARIES)
street_and_synonyms_gazetteer = create_gazetteer(*(STREET_TYPES_DICTIONARIES + (SYNONYM_DICTIONARY, )))
abbreviations_gazetteer = create_gazetteer(*ALL_ABBREVIATION_DICTIONARIES)
toponym_gazetteer = create_gazetteer(*TOPONYM_ABBREVIATION_DICTIONARIES)
toponym_abbreviations_gazetteer = create_gazetteer(*TOPONYM_ABBREVIATION_DICTIONARIES)
toponym_gazetteer = create_gazetteer(TOPONYMS_DICTIONARY)
given_name_gazetteer = create_gazetteer(GIVEN_NAME_DICTIONARY)

View File

@@ -774,7 +774,7 @@ class AddressComponents(object):
if component == AddressFormatter.STATE and random.random() < abbreviate_state_prob:
val = state_abbreviations.get_abbreviation(country, language, val, default=val)
elif random.random() < abbreviate_toponym_prob:
val = abbreviate(toponym_gazetteer, val, language, abbreviate_prob=abbreviate_toponym_prob)
val = abbreviate(toponym_abbreviations_gazetteer, val, language, abbreviate_prob=abbreviate_toponym_prob)
address_components[component] = val

View File

@@ -11,7 +11,7 @@ import yaml
from geodata.addresses.units import Unit
from geodata.address_expansions.abbreviations import abbreviate
from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries
from geodata.address_expansions.gazetteers import street_types_gazetteer, unit_types_gazetteer, toponym_gazetteer
from geodata.address_expansions.gazetteers import street_types_gazetteer, unit_types_gazetteer, toponym_abbreviations_gazetteer
from geodata.address_formatting.formatter import AddressFormatter
from geodata.addresses.components import AddressComponents
from geodata.countries.names import country_names
@@ -383,7 +383,7 @@ class OpenAddressesFormatter(object):
for component_key in AddressFormatter.BOUNDARY_COMPONENTS:
component = components.get(component_key, None)
if component is not None:
component = abbreviate(toponym_gazetteer, component, language,
component = abbreviate(toponym_abbreviations_gazetteer, component, language,
abbreviate_prob=abbreviate_toponym_prob)
components[component_key] = component