[fix] making a separate gazetteer for toponym abbreviations
This commit is contained in:
@@ -157,11 +157,13 @@ HOUSE_NUMBER_DICTIONARIES = ('house_number', 'no_number')
|
|||||||
|
|
||||||
POSTCODE_DICTIONARIES = ('postcode',)
|
POSTCODE_DICTIONARIES = ('postcode',)
|
||||||
|
|
||||||
TOPONYM_ABBREVIATION_DICTIONARIES = STREET_TYPES_DICTIONARIES + ('qualifiers',
|
TOPONYMS_DICTIONARY = 'toponyms'
|
||||||
'personal_titles',
|
|
||||||
'synonyms',
|
TOPONYM_ABBREVIATION_DICTIONARIES = ('qualifiers',
|
||||||
'toponyms',
|
'directionals',
|
||||||
)
|
'personal_titles',
|
||||||
|
'synonyms',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
UNIT_ABBREVIATION_DICTIONARIES = ('level_types_basement',
|
UNIT_ABBREVIATION_DICTIONARIES = ('level_types_basement',
|
||||||
@@ -198,5 +200,6 @@ chains_gazetteer = create_gazetteer(CHAIN_DICTIONARY)
|
|||||||
unit_types_gazetteer = create_gazetteer(*UNIT_ABBREVIATION_DICTIONARIES)
|
unit_types_gazetteer = create_gazetteer(*UNIT_ABBREVIATION_DICTIONARIES)
|
||||||
street_and_synonyms_gazetteer = create_gazetteer(*(STREET_TYPES_DICTIONARIES + (SYNONYM_DICTIONARY, )))
|
street_and_synonyms_gazetteer = create_gazetteer(*(STREET_TYPES_DICTIONARIES + (SYNONYM_DICTIONARY, )))
|
||||||
abbreviations_gazetteer = create_gazetteer(*ALL_ABBREVIATION_DICTIONARIES)
|
abbreviations_gazetteer = create_gazetteer(*ALL_ABBREVIATION_DICTIONARIES)
|
||||||
toponym_gazetteer = create_gazetteer(*TOPONYM_ABBREVIATION_DICTIONARIES)
|
toponym_abbreviations_gazetteer = create_gazetteer(*TOPONYM_ABBREVIATION_DICTIONARIES)
|
||||||
|
toponym_gazetteer = create_gazetteer(TOPONYMS_DICTIONARY)
|
||||||
given_name_gazetteer = create_gazetteer(GIVEN_NAME_DICTIONARY)
|
given_name_gazetteer = create_gazetteer(GIVEN_NAME_DICTIONARY)
|
||||||
|
|||||||
@@ -774,7 +774,7 @@ class AddressComponents(object):
|
|||||||
if component == AddressFormatter.STATE and random.random() < abbreviate_state_prob:
|
if component == AddressFormatter.STATE and random.random() < abbreviate_state_prob:
|
||||||
val = state_abbreviations.get_abbreviation(country, language, val, default=val)
|
val = state_abbreviations.get_abbreviation(country, language, val, default=val)
|
||||||
elif random.random() < abbreviate_toponym_prob:
|
elif random.random() < abbreviate_toponym_prob:
|
||||||
val = abbreviate(toponym_gazetteer, val, language, abbreviate_prob=abbreviate_toponym_prob)
|
val = abbreviate(toponym_abbreviations_gazetteer, val, language, abbreviate_prob=abbreviate_toponym_prob)
|
||||||
|
|
||||||
address_components[component] = val
|
address_components[component] = val
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import yaml
|
|||||||
from geodata.addresses.units import Unit
|
from geodata.addresses.units import Unit
|
||||||
from geodata.address_expansions.abbreviations import abbreviate
|
from geodata.address_expansions.abbreviations import abbreviate
|
||||||
from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries
|
from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries
|
||||||
from geodata.address_expansions.gazetteers import street_types_gazetteer, unit_types_gazetteer, toponym_gazetteer
|
from geodata.address_expansions.gazetteers import street_types_gazetteer, unit_types_gazetteer, toponym_abbreviations_gazetteer
|
||||||
from geodata.address_formatting.formatter import AddressFormatter
|
from geodata.address_formatting.formatter import AddressFormatter
|
||||||
from geodata.addresses.components import AddressComponents
|
from geodata.addresses.components import AddressComponents
|
||||||
from geodata.countries.names import country_names
|
from geodata.countries.names import country_names
|
||||||
@@ -383,7 +383,7 @@ class OpenAddressesFormatter(object):
|
|||||||
for component_key in AddressFormatter.BOUNDARY_COMPONENTS:
|
for component_key in AddressFormatter.BOUNDARY_COMPONENTS:
|
||||||
component = components.get(component_key, None)
|
component = components.get(component_key, None)
|
||||||
if component is not None:
|
if component is not None:
|
||||||
component = abbreviate(toponym_gazetteer, component, language,
|
component = abbreviate(toponym_abbreviations_gazetteer, component, language,
|
||||||
abbreviate_prob=abbreviate_toponym_prob)
|
abbreviate_prob=abbreviate_toponym_prob)
|
||||||
components[component_key] = component
|
components[component_key] = component
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user