[parser] Sample chain store alternate names from the cross-language dictionary
This commit is contained in:
@@ -40,6 +40,8 @@ ADDRESS_FORMAT_DATA_TAGGED_FILENAME = 'formatted_addresses_tagged.tsv'
|
|||||||
ADDRESS_FORMAT_DATA_FILENAME = 'formatted_addresses.tsv'
|
ADDRESS_FORMAT_DATA_FILENAME = 'formatted_addresses.tsv'
|
||||||
ADDRESS_FORMAT_DATA_LANGUAGE_FILENAME = 'formatted_addresses_by_language.tsv'
|
ADDRESS_FORMAT_DATA_LANGUAGE_FILENAME = 'formatted_addresses_by_language.tsv'
|
||||||
|
|
||||||
|
ALL_LANGUAGES = 'all'
|
||||||
|
|
||||||
|
|
||||||
class OSMAddressFormatter(object):
|
class OSMAddressFormatter(object):
|
||||||
aliases = Aliases(
|
aliases = Aliases(
|
||||||
@@ -326,6 +328,8 @@ class OSMAddressFormatter(object):
|
|||||||
name = canonical
|
name = canonical
|
||||||
if random.random() < sample_probability:
|
if random.random() < sample_probability:
|
||||||
names = address_config.sample_phrases.get((language, dictionary), {}).get(canonical, [])
|
names = address_config.sample_phrases.get((language, dictionary), {}).get(canonical, [])
|
||||||
|
if not names:
|
||||||
|
names = address_config.sample_phrases.get((ALL_LANGUAGES, dictionary), {}).get(canonical, [])
|
||||||
if names:
|
if names:
|
||||||
name = random.choice(names)
|
name = random.choice(names)
|
||||||
phrase = Chain.phrase(name, language, country)
|
phrase = Chain.phrase(name, language, country)
|
||||||
|
|||||||
@@ -172,9 +172,6 @@ def get_language_names(language_rtree, key, value, tag_prefix='name'):
|
|||||||
return country, name_language
|
return country, name_language
|
||||||
|
|
||||||
|
|
||||||
ALL_LANGUAGES = 'all'
|
|
||||||
|
|
||||||
|
|
||||||
def build_ways_training_data(language_rtree, infile, out_dir, abbreviate_streets=True):
|
def build_ways_training_data(language_rtree, infile, out_dir, abbreviate_streets=True):
|
||||||
'''
|
'''
|
||||||
Creates a training set for language classification using most OSM ways
|
Creates a training set for language classification using most OSM ways
|
||||||
|
|||||||
Reference in New Issue
Block a user