names: replace_affix_probability: 0.6 languages: # sample a language from the distribution of languages found on the Internet non_local_language_probability: 0.05 # Replace user-tagged admin components with the non-local language version replace_non_local_probability: 0.4 # Dependencies for including each component in an "address" # Two-way dependencies are not an issue component_dependencies: house: dependencies: - house_number - road - suburb - city_district - city - postcode road: dependencies: - house - house_number - suburb - city_district - city - postcode house_number: dependencies: - road entrance: dependencies: - house_number staircase: dependencies: - house_number level: dependencies: - house_number unit: dependencies: - house_number postcode: dependencies: [] # Each component is dropped out separately and a new address # is added to the training set. These are only the address-level # components. Places/boundaries are taken care of elsewhere. dropout: attention: probability: 0.8 care_of: probability: 0.8 house: probability: 0.6 house_number: probability: 0.5 road: probability: 0.4 entrance: probability: 0.8 staircase: probability: 0.8 level: probability: 0.6 unit: probability: 0.5 postcode: probability: 0.6 po_box: probability: 0.1 # Note: these probabilities all independent (don't need to sum to 1) drop_address_probability: 0.8 # drop house number, road, etc. drop_places_probability: 0.1 # drop place names drop_postcode_probability: 0.3 # drop postal code category: # Same thing for category queries drop_address_probability: 0.8 # drop house number, road, etc. drop_places_probability: 0.1 # drop place names drop_postcode_probability: 0.3 # drop postal code neighborhood: # Usually in Germany, may have e.g. name:prefix=Ortsteil add_prefix_probability: 0.5 add_neighborhood_probability: 0.5 city: quattroshapes_geonames_backup_city_probability: 0.2 quattroshapes_geonames_abbreviated_probability: 0.1 state_district: join_probability: 0.5 state: # Probability of using full name e.g. New York vs. NY full_name_probability: 0.4 abbreviated_probability: 0.6 country: # If no country is specified, pull the country name from CLDR (authoratative country names translated into different languages) cldr_country_probability: 0.5 # When a country is specified and is simply an ISO code (e.g. US, DE), replace with one of the CLDR names replace_with_cldr_country_probability: 0.9 # When the user-specified country is an ISO code, remove it from the components with this probability (fall back on geocoded components) remove_iso_code_probability: 0.1 cldr: localized_name_probability: 0.7 iso_alpha_2_code_probability: 0.2 iso_alpha_3_code_probability: 0.1