[addresses] Estonian address config

This commit is contained in:
Al
2016-06-28 13:18:23 -04:00
parent a86c2f491c
commit a0ac1e203b

470
resources/addresses/et.yaml Normal file
View File

@@ -0,0 +1,470 @@
# et.yaml
# -------
# Estonian language specification.
components:
level:
null_probability: 0.97
alphanumeric_probability: 0.02
standalone_probability: 0.01
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
house_number_unit:
components:
- house_number
- unit
label: house_number
separators:
- separator: "-"
probability: 0.95
- separator: " - "
probability: 0.05
probability: 0.7
numbers:
default: &number
canonical: number
abbreviated: nbr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
house_number:
alphanumeric:
default: *number
alphanumeric_phrase_probability: 0.0001
and:
default: &ja
canonical: ja
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *ja
corner_of: &nurgas
canonical: nurgas
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &nurgal
canonical: nurgal
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *ja
probability: 0.7
alternatives:
- alternative: *nurgas
probability: 0.15
- alternative: *nurgal
probability: 0.15
between:
canonical: vahel
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &korrusel
canonical: korrusel
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
direction_probability: 0.9
ordinal:
direction: right
numeric_probability: 0.4
ordinal_probability: 0.6
parter: &parter
canonical: parter
sample: true
canonical_probability: 0.8
sample_probability: 0.2
kelder: &kelder
canonical: kelder
sample: true
canonical_probability: 0.8
sample_probability: 0.2
standalone_probability: 1.0
keldris: &keldris
canonical: keldris
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# e.g. 1 keldris
numeric:
direction: right
direction_probability: 0.8
# e.g. k1
numeric_affix:
affix: k
direction: left
# e.g. 1. keldris
ordinal:
direction: right
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.05
numeric_affix_probability: 0.9
ordinal_probability: 0.05
aliases:
"<-1":
default: *kelder
probability: 0.85
alternatives:
- alternative: *keldris
probability: 0.15
"-1":
default: *kelder
probability: 0.85
alternatives:
- alternative: *keldris
probability: 0.1
- alternative: *korrusel
probability: 0.05
"1":
default: *parter
probability: 0.5
alternatives:
- alternative: *korrusel
probability: 0.5
numbering_starts_at: 1
alphanumeric:
default: *korrusel
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: lähedal
sample: true
canonical_probability: 0.8
sample_probability: 0.2
nearby:
default:
canonical: lähedal
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.7
alternatives:
- alternative:
canonical: siin lähedal
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: siinkandis
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
near_me:
default:
canonical: lähedal mulle
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Probabilities of each phrase
near_probability: 0.7
nearby_probability: 0.2
near_me_probability: 0.1
directions:
right: &paremal
canonical: paremal
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
numeric_affix:
affix: p
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
paramale: &paremale
canonical: paremale
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
numeric_affix:
affix: p
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
left: &vasakul
canonical: vasakul
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
numeric_affix:
affix: v
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
vasakule: &vasakule
canonical: vasakule
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
numeric_affix:
affix: v
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
alternatives:
- alternative: *paremal
probability: 0.25
- alternative: *paremale
probability: 0.25
- alternative: *vasakul
probability: 0.25
- alternative: *vasakule
probability: 0.25
cardinal_directions:
east: &ida
canonical: ida
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
west: &laas
canonical: lääs
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
north: &pohi
canonical: põhi
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
south: &louna
canonical: lõuna
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *pohi
probability: 0.25
- alternative: *ida
probability: 0.25
- alternative: *louna
probability: 0.25
- alternative: *laas
probability: 0.25
entrances:
sissepaas: &sissepaas
canonical: sissepääs
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Eingang 1, Eingang A, etc.
alphanumeric: &entrance_alphanumeric
default: *sissepaas
numeric_probability: 0.1 # e.g. Eingang 1
alpha_probability: 0.85 # e.g. Eingang A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
trepikoda: &trepikoda
canonical: trepikoda
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *trepikoda
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *pohi
- alternative: *louna
- alternative: *ida
- alternative: *laas
po_boxes:
postboks: &abonementpostkast
canonical: abonementpostkast
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # abonementpostkast #1234
kast: &kast
canonical: kast
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Kast #1234
alphanumeric:
sample: false
default: *abonementpostkast
probability: 0.9
alternatives:
- alternative: *kast
probability: 0.1
numeric_probability: 0.9 # 123
alpha_probability: 0.05 # A
numeric_plus_alpha_probability: 0.04 # 123G
alpha_plus_numeric_probability: 0.01 # A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
korter: &korter
canonical: korter
abbreviated: k
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.1
sample_probability: 0.1
numeric:
direction: left
null_phrase_probability: 0.3
# Lejlighed nummer 4
add_number_phrase: true
add_number_phrase_probability: 0.05
ruumi: &ruumi
canonical: ruumi
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *korter
probability: 0.9
alternatives:
- alternative: *ruumi
probability: 0.1
numeric_probability: 1.0 # e.g. korter 1
# Separate random probability for adding directions like 2P, 2V, etc.
add_direction: true
add_direction_probability: 0.005
# Add directions for plain numbers
add_direction_numeric: true
# Add direction only e.g. Korter vasakule
add_direction_standalone: true
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.05