[names] component expansion uses the new configurable affix replacements

This commit is contained in:
Al
2016-05-05 18:32:14 -04:00
parent 6f8e94b851
commit ed12d7bbe4

View File

@@ -13,7 +13,7 @@ from geodata.coordinates.conversion import latlon_to_decimal
from geodata.countries.country_names import * from geodata.countries.country_names import *
from geodata.language_id.disambiguation import * from geodata.language_id.disambiguation import *
from geodata.language_id.sample import sample_random_language from geodata.language_id.sample import sample_random_language
from geodata.names.normalization import replace_name_prefixes, replace_name_suffixes from geodata.names.normalization import name_affixes
from geodata.osm.extract import osm_address_components from geodata.osm.extract import osm_address_components
from geodata.states.state_abbreviations import state_abbreviations from geodata.states.state_abbreviations import state_abbreviations
@@ -590,18 +590,21 @@ class AddressExpander(object):
if component not in address_components and random.random() < add_neighborhood_prob: if component not in address_components and random.random() < add_neighborhood_prob:
address_components[component] = neighborhoods[0] address_components[component] = neighborhoods[0]
def replace_name_affixes(self, address_components, replacement_prob=0.6): def replace_name_affixes(self, address_components, language, replacement_prob=0.6):
''' '''
Name normalization Name normalization
------------------ ------------------
Probabilistically strip standard prefixes/suffixes e.g. "London Borough of" Probabilistically strip standard prefixes/suffixes e.g. "London Borough of"
''' '''
for component in self.BOUNDARY_COMPONENTS: for component in list(address_components):
name = address_components.get(component) if component not in self.BOUNDARY_COMPONENTS:
continue
name = address_components[component]
if not name: if not name:
continue continue
replacement = replace_name_prefixes(replace_name_suffixes(name)) replacement = name_affixes.replace_name_suffixes(name, language)
replacement = name_affixes.replace_name_prefixes(replacement, language)
if replacement != name and random.random() < replacement_prob: if replacement != name and random.random() < replacement_prob:
address_components[component] = replacement address_components[component] = replacement
@@ -719,7 +722,7 @@ class AddressExpander(object):
street = address_components.get(AddressFormatter.ROAD) street = address_components.get(AddressFormatter.ROAD)
self.replace_name_affixes(address_components) self.replace_name_affixes(address_components, non_local_language or language)
self.replace_names(address_components) self.replace_names(address_components)
@@ -795,7 +798,7 @@ class AddressExpander(object):
self.add_neighborhoods(address_components, neighborhoods, self.add_neighborhoods(address_components, neighborhoods,
osm_suffix=osm_suffix) osm_suffix=osm_suffix)
self.replace_name_affixes(address_components) self.replace_name_affixes(address_components, non_local_language or language)
self.replace_names(address_components) self.replace_names(address_components)