[fix] making a separate gazetteer for toponym abbreviations
This commit is contained in:
@@ -157,11 +157,13 @@ HOUSE_NUMBER_DICTIONARIES = ('house_number', 'no_number')
|
||||
|
||||
POSTCODE_DICTIONARIES = ('postcode',)
|
||||
|
||||
TOPONYM_ABBREVIATION_DICTIONARIES = STREET_TYPES_DICTIONARIES + ('qualifiers',
|
||||
'personal_titles',
|
||||
'synonyms',
|
||||
'toponyms',
|
||||
)
|
||||
TOPONYMS_DICTIONARY = 'toponyms'
|
||||
|
||||
TOPONYM_ABBREVIATION_DICTIONARIES = ('qualifiers',
|
||||
'directionals',
|
||||
'personal_titles',
|
||||
'synonyms',
|
||||
)
|
||||
|
||||
|
||||
UNIT_ABBREVIATION_DICTIONARIES = ('level_types_basement',
|
||||
@@ -198,5 +200,6 @@ chains_gazetteer = create_gazetteer(CHAIN_DICTIONARY)
|
||||
unit_types_gazetteer = create_gazetteer(*UNIT_ABBREVIATION_DICTIONARIES)
|
||||
street_and_synonyms_gazetteer = create_gazetteer(*(STREET_TYPES_DICTIONARIES + (SYNONYM_DICTIONARY, )))
|
||||
abbreviations_gazetteer = create_gazetteer(*ALL_ABBREVIATION_DICTIONARIES)
|
||||
toponym_gazetteer = create_gazetteer(*TOPONYM_ABBREVIATION_DICTIONARIES)
|
||||
toponym_abbreviations_gazetteer = create_gazetteer(*TOPONYM_ABBREVIATION_DICTIONARIES)
|
||||
toponym_gazetteer = create_gazetteer(TOPONYMS_DICTIONARY)
|
||||
given_name_gazetteer = create_gazetteer(GIVEN_NAME_DICTIONARY)
|
||||
|
||||
@@ -774,7 +774,7 @@ class AddressComponents(object):
|
||||
if component == AddressFormatter.STATE and random.random() < abbreviate_state_prob:
|
||||
val = state_abbreviations.get_abbreviation(country, language, val, default=val)
|
||||
elif random.random() < abbreviate_toponym_prob:
|
||||
val = abbreviate(toponym_gazetteer, val, language, abbreviate_prob=abbreviate_toponym_prob)
|
||||
val = abbreviate(toponym_abbreviations_gazetteer, val, language, abbreviate_prob=abbreviate_toponym_prob)
|
||||
|
||||
address_components[component] = val
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ import yaml
|
||||
from geodata.addresses.units import Unit
|
||||
from geodata.address_expansions.abbreviations import abbreviate
|
||||
from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries
|
||||
from geodata.address_expansions.gazetteers import street_types_gazetteer, unit_types_gazetteer, toponym_gazetteer
|
||||
from geodata.address_expansions.gazetteers import street_types_gazetteer, unit_types_gazetteer, toponym_abbreviations_gazetteer
|
||||
from geodata.address_formatting.formatter import AddressFormatter
|
||||
from geodata.addresses.components import AddressComponents
|
||||
from geodata.countries.names import country_names
|
||||
@@ -383,7 +383,7 @@ class OpenAddressesFormatter(object):
|
||||
for component_key in AddressFormatter.BOUNDARY_COMPONENTS:
|
||||
component = components.get(component_key, None)
|
||||
if component is not None:
|
||||
component = abbreviate(toponym_gazetteer, component, language,
|
||||
component = abbreviate(toponym_abbreviations_gazetteer, component, language,
|
||||
abbreviate_prob=abbreviate_toponym_prob)
|
||||
components[component_key] = component
|
||||
|
||||
|
||||
Reference in New Issue
Block a user