[addresses] Adding address-level component dropout to AddressComponents (returns an ordering so the client formatter can potentially emit multiple addresses with different components dropped out). Adding PO box and category probabilities to config

This commit is contained in:
Al
2016-05-21 17:54:30 -04:00
parent e1aec72c32
commit 8f358d295f
3 changed files with 148 additions and 33 deletions

View File

@@ -7,6 +7,83 @@ languages:
# Replace user-tagged admin components with the non-local language version
replace_non_local_probability: 0.4
# Dependencies for including each component in an "address"
# Two-way dependencies are not an issue
component_dependencies:
house:
dependencies: []
road:
dependencies:
- house
- house_number
- suburb
- city_district
- city
- postcode
house_number:
dependencies:
- road
entrance:
dependencies:
- house_number
staircase:
dependencies:
- house_number
level:
dependencies:
- house_number
unit:
dependencies:
- house_number
postcode:
dependencies: []
# Each component is dropped out separately and a new address
# is added to the training set. These are only the address-level
# components. Places/boundaries are taken care of elsewhere.
dropout:
attention:
probability: 0.8
care_of:
probability: 0.8
house:
probability: 0.6
house_number:
probability: 0.5
road:
probability: 0.4
entrance:
probability: 0.8
staircase:
probability: 0.8
level:
probability: 0.6
unit:
probability: 0.5
postcode:
probability: 0.6
po_box:
probability: 0.1
# Note: these probabilities all independent (don't need to sum to 1)
drop_address_probability: 0.8 # drop house number, road, etc.
drop_places_probability: 0.1 # drop place names
drop_postcode_probability: 0.3 # drop postal code
category:
# Same thing for category queries
drop_address_probability: 0.8 # drop house number, road, etc.
drop_places_probability: 0.1 # drop place names
drop_postcode_probability: 0.3 # drop postal code
neighborhood:
# Usually in Germany, may have e.g. name:prefix=Ortsteil
add_prefix_probability: 0.5