[addresses] Spanish sub-building component probabilities

This commit is contained in:
Al
2016-05-18 02:47:43 -04:00
parent 4485a8c234
commit 53aa5b10ab

View File

@@ -3,6 +3,64 @@
# Note: make Latin-American conventions by default (country overrides for Spain
# as well as any other country-specific norms)
components:
po_box:
null_probability: 0.9
alphanumeric_probability: 0.1
conditional:
- component: level
probabilities:
null_probability: 0.995
alphanumeric_probability: 0.005
- component: unit
probabilities:
null_probability: 0.99
alphanumeric_probability: 0.01
- component: staircase
probabilities:
null_probability: 0.999
alphanumeric_probability: 0.001
- component: entrance
probabilities:
null_probability: 0.999
alphanumeric_probability: 0.001
level:
# If no floor number is specified
null_probability: 0.6
alphanumeric_probability: 0.35
standalone_probability: 0.05
staircase:
null_probability: 0.989
alphanumeric_probability: 0.01
directional_probability: 0.001
entrance:
null_probability: 0.9999
alphanumeric_probability: 0.0001
unit:
# If no unit number is specified
null_probability: 0.3
alphanumeric_probability: 0.65
standalone_probability: 0.05
combinations:
level_unit:
components:
- level
- unit
label: unit
separators:
- separator: /
probability: 0.2
- separator: " "
probability: 0.6
- separator: " - "
probability: 0.2
probability: 0.005
numbers:
default: &numero
canonical: número
@@ -198,10 +256,6 @@ levels:
canonical_probability: 0.7
abbreviated_probability: 0.1
sample_probability: 0.2
numeric:
direction: left
numeric_probability: 0.4
standalone_probability: 0.6
sobreatico: &sobreatico
canonical: sobreatico
aliases:
@@ -251,17 +305,6 @@ levels:
numeric_probability: 0.99
alpha_probability: 0.01
order:
# e.g. Calle Ruiz de Alarcón 23 piso 3
- after: house_number
probability: 0.8
# e.g. Piso 3, Museo del Prado, Calle Ruiz de Alarcón 23
- before: house
probability: 0.1
# e.g. Museo del Prado, Bajos, Calle Ruiz de Alarcón 23
- before: road
probability: 0.1
blocks:
default:
canonical: bloque
@@ -373,14 +416,10 @@ po_boxes:
alpha_probability: 0.05 # Apdo A
numeric_plus_alpha_probability: 0.04 # Apdo 123G
alpha_plus_numeric_probability: 0.01 # Apdo A123
alpha_plus_numeric_whitespace_probability: 0.1
numeric_plus_alpha_whitespace_probability: 0.1
order:
- after: house
probability: 0.8
- before: house
probability: 0.2
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
postcodes:
alphanumeric:
@@ -535,7 +574,34 @@ entrances:
canonical_probability: 0.5
abbreviated_probability: 0.2
sample_probability: 0.3
numeric:
direction: left
# Entrance 1, Entrance A, etc.
alphanumeric:
default: *entrada
numeric_probability: 0.1 # e.g. Entrance 1
alpha_probability: 0.85 # e.g. Entrnace A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
modifier:
alternatives:
- alternative: *norte
- alternative: *sur
- alternative: *este
- alternative: *oeste
- alternative: *derecha
- alternative: *izquierda
- alternative: *trasera
- alternative: *frente
staircases:
escalera: &escalera
@@ -545,22 +611,38 @@ staircases:
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
alphanumeric:
# For alphanumerics, Stair A, Stair 1, etc.
default: *escalera
numeric_probability: 0.6 # e.g. Escalera 1
alpha_probability: 0.35 # e.g. Escalera A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right # e.g. Escalera Izq
direction_probability: 0.8
modifier:
alternatives:
- alternative: *norte
- alternative: *sur
- alternative: *este
- alternative: *oeste
- alternative: *derecha
- alternative: *izquierda
- alternative: *trasera
- alternative: *frente
units:
# Units are not part of the global address formats (and are not always standard)
# This is a list of places in the address where the unit line might go
order:
- before: house
probability: 0.05
- before: road
probability: 0.05
# e.g. Piso 3 Dpto 12 (most common)
- after: level
probability: 0.8
# e.g. Apto 6, 2o piso (less common)
- before: level
probability: 0.1
apartment: &apartamento
canonical: apartamento
abbreviated: apto
@@ -669,9 +751,7 @@ units:
- alternative: *casa
probability: 0.05
- alternative: *puerta
probability: 0.045
- alternative: *atico
probability: 0.005
probability: 0.05
# Separate random probability for adding directions like 2o Izq, 2 Dcha, etc.
add_direction: true
@@ -683,8 +763,10 @@ units:
numeric_plus_alpha_probability: 0.01 # e.g. Dpto 1A
alpha_plus_numeric_probability: 0.01 # e.g. Dpto A1
alpha_probability: 0.08 # e.g. Dpto A
alpha_plus_numeric_whitespace_probability: 0.1
numeric_plus_alpha_whitespace_probability: 0.1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
alphanumeric_apartamento: &unit_alphanumeric_apartamento
@@ -697,9 +779,7 @@ units:
- alternative: *casa
probability: 0.05
- alternative: *puerta
probability: 0.045
- alternative: *atico
probability: 0.005
probability: 0.05
alphanumeric_apartamento_exclusive: &unit_alphanumeric_apartamento_exclusive
<<: *unit_alphanumeric
@@ -709,9 +789,7 @@ units:
- alternative: *casa
probability: 0.05
- alternative: *puerta
probability: 0.045
- alternative: *atico
probability: 0.005
probability: 0.05
zones:
residential: *unit_alphanumeric
@@ -755,28 +833,14 @@ units:
parcel_plus_lot_probability: 0.02
# For unit types like 2o/B
combined:
component: level
direction: right
separators:
- separator: /
probability: 0.2
- separator: " "
probability: 0.6
- separator: " - "
probability: 0.2
# If no unit number is specified
alphanumeric_probability: 0.75
standalone_probability: 0.2495
combined_probability: 0.005
countries:
# España / Spain
es:
components:
staircase:
null_probability: 0.97
alphanumeric_probability: 0.02
directional_probability: 0.01
levels:
planta: &planta
# Everywhere except Spain
@@ -897,9 +961,7 @@ countries:
- alternative: *apartamento
probability: 0.1
- alternative: *casa
probability: 0.05
- alternative: *atico
probability: 0.05
probability: 0.1
zones:
residential: *unit_alphanumeric_puerta