[countries] use ISO 3166 country name 5% of the time for general addresses, 10% of the time for OpenAddresses. Gives the parser examples of names like "Korea, Republic of" in #168
This commit is contained in:
@@ -684,10 +684,11 @@ class AddressComponents(object):
|
||||
|
||||
alpha_2_iso_code_prob = float(cldr_config['iso_alpha_2_code_probability'])
|
||||
localized_name_prob = float(cldr_config['localized_name_probability'])
|
||||
iso_3166_name_prob = float(cldr_config['iso_3166_name_probability'])
|
||||
alpha_3_iso_code_prob = float(cldr_config['iso_alpha_3_code_probability'])
|
||||
|
||||
values = ('localized', 'alpha3', 'alpha2')
|
||||
probs = cdf([localized_name_prob, alpha_3_iso_code_prob, alpha_2_iso_code_prob])
|
||||
localized, iso_3166, alpha3, alpha2 = range(4)
|
||||
probs = cdf([localized_name_prob, iso_3166_name_prob, alpha_3_iso_code_prob, alpha_2_iso_code_prob])
|
||||
value = weighted_choice(values, probs)
|
||||
|
||||
country_name = country_code.upper()
|
||||
@@ -695,9 +696,11 @@ class AddressComponents(object):
|
||||
if language in (AMBIGUOUS_LANGUAGE, UNKNOWN_LANGUAGE):
|
||||
language = None
|
||||
|
||||
if value == 'localized':
|
||||
if value == localized:
|
||||
country_name = country_names.localized_name(country_code, language) or country_names.localized_name(country_code) or country_name
|
||||
elif value == 'alpha3':
|
||||
elif value == iso_3166:
|
||||
country_name = country_names.iso_3166_name(country_code)
|
||||
elif value == alpha3:
|
||||
country_name = country_names.alpha3_code(country_code) or country_name
|
||||
|
||||
return country_name
|
||||
|
||||
@@ -52,6 +52,7 @@ class CountryNames(object):
|
||||
self.base_dir = base_dir
|
||||
|
||||
self.country_alpha3_codes = {c.alpha2.lower(): c.alpha3.lower() for c in pycountry.countries}
|
||||
self.iso_3166_names = {c.alpha2.lower(): c.name for c in pycountry.countries}
|
||||
|
||||
self.language_country_names = {}
|
||||
self.country_language_names = defaultdict(dict)
|
||||
@@ -177,7 +178,10 @@ class CountryNames(object):
|
||||
return self.country_language_names.get(country_code, {}).get(language)
|
||||
|
||||
def alpha3_code(self, alpha2_code):
|
||||
alpha3 = self.country_alpha3_codes.get(alpha2_code.lower())
|
||||
alpha3 = self.country_alpha3_codes.get(alpha2_code.lower())
|
||||
return alpha3.upper() if alpha3 else None
|
||||
|
||||
def iso_3166_name(self, alpha2_code):
|
||||
return self.iso_3166_names.get(alpha2_code.lower())
|
||||
|
||||
country_names = CountryNames()
|
||||
|
||||
@@ -243,18 +243,21 @@ class OpenAddressesFormatter(object):
|
||||
country_name = None
|
||||
|
||||
if random.random() < cldr_country_prob:
|
||||
localized, alpha2, alpha3 = values = range(3)
|
||||
localized, iso_3166, alpha2, alpha3 = values = range(4)
|
||||
localized_prob = float(self.get_property('localized_name_probability', *configs))
|
||||
iso_3166_prob = float(self.get_property('iso_3166_name_probability', *configs))
|
||||
alpha2_prob = float(self.get_property('iso_alpha_2_code_probability', *configs))
|
||||
alpha3_prob = float(self.get_property('iso_alpha_3_code_probability', *configs))
|
||||
|
||||
probs = cdf([localized_prob, alpha2_prob, alpha3_prob])
|
||||
probs = cdf([localized_prob, iso_3166_prob, alpha2_prob, alpha3_prob])
|
||||
|
||||
country_type = weighted_choice(values, probs)
|
||||
|
||||
country_name = country_code.upper()
|
||||
if country_type == localized:
|
||||
country_name = country_names.localized_name(country_code, language) or country_names.localized_name(country_code) or country_name
|
||||
elif country_type == iso_3166:
|
||||
country_name = country_names.iso3166_name(country_code)
|
||||
elif country_type == alpha3:
|
||||
country_name = country_names.alpha3_code(country_code) or country_name
|
||||
|
||||
|
||||
Reference in New Issue
Block a user