Files

505 lines
14 KiB
YAML

# ro.yaml
# -------
# Romanian language specification
components:
level:
# If no floor number is specified
null_probability: 0.6
alphanumeric_probability: 0.35
standalone_probability: 0.05
staircase:
null_probability: 0.95
alphanumeric_probability: 0.05
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
# If no unit number is specified
null_probability: 0.3
alphanumeric_probability: 0.65
standalone_probability: 0.05
numbers:
default: &numar
canonical: număr
abbreviated: nr
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.7
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#" # e.g. #3, #2F, etc.
probability: 0.5
alternatives:
- alternative:
direction: left # affix goes on the number's left
# Probabilities for numbers
numeric_probability: 0.9
numeric_affix_probability: 0.1
and:
default: &si
canonical: și
abbreviated: "&"
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.4
sample_probability: 0.1
cross_streets:
and: *si
corner_of: &colt
canonical: colț
sample: true
canonical_probability: 0.7
sample_probability: 0.3
at_the_corner_of: &la_coltul_de_pe
canonical: la colțul de pe
sample: true
canonical_probability: 0.7
sample_probability: 0.3
intersection:
default: *si
probability: 0.7
alternatives:
- alternative: *colt
probability: 0.2
- alternative: *la_coltul_de_pe
probability: 0.1
between:
canonical: între
sample: true
canonical_probability: 0.7
sample_probability: 0.3
parentheses_probabililty: 0.5
house_numbers:
# fara numar (FN) addresses
no_number:
default:
canonical: fără număr
abbreviated: fn
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.7
sample_probability: 0.2
alphanumeric:
default: *numar
alphanumeric_phrase_probability: 0.7
no_number_probability: 0.1 # With this probability, use fara numar if no house_number is specified
levels:
floor: &etaj
canonical: etaj
abbreviated: et
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.4
sample_probability: 0.1
numeric:
direction: left
add_number_phrase: true # Occasionally add variation of "number", e.g. et. nr 2
add_number_phrase_probability: 0.05
digits:
ascii_probability: 0.8
roman_numeral_probability: 0.2
# Ground floor
parter: &parter
canonical: parter
sample: true
canonical_probability: 0.8
sample_probability: 0.2
aliases:
"0":
default: *parter
probability: 0.9
alternatives:
- alternative: *etaj
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *etaj
add_number_phrase: true
add_number_phrase_probability: 0.05
numeric_probability: 0.99
alpha_probability: 0.01
blocks:
alphanumeric:
default:
canonical: bloc
abbreviated: bl
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: left
categories:
near:
default:
canonical: in apropiere de
nearby:
default:
canonical: în apropiere
probability: 0.5
alternatives:
- alternative:
canonical: in apropiere
probability: 0.2
- alternative:
canonical: aproape de aici
probability: 0.1
- alternative:
canonical: aici
probability: 0.1
- alternative:
canonical: în jurul aici
probability: 0.05
- alternative:
canonical: in jurul aici
probability: 0.05
near_me:
default:
canonical: lângă mine
probability: 0.7
alternatives:
- alternative:
canonical: langa mine
probability: 0.3
in:
default:
canonical: din
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &dreapta
canonical: dreapta
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: d
direction: right
whitespace_probability: 0.1
numeric_probability: 0.4
numeric_affix_probability: 0.6
left: &stanga
canonical: stânga
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: s
direction: right
whitespace_probability: 0.1
numeric_probability: 0.4
numeric_affix_probability: 0.6
alternatives:
- alternative: *dreapta
probability: 0.5
- alternative: *stanga
probability: 0.5
cardinal_directions:
east: &est
canonical: est
abbreviated: e
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: e
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &vest
canonical: vest
abbreviated: v
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: v
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &nord
canonical: nord
abbreviated: n
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &sud
canonical: sud
abbreviated: s
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *nord
probability: 0.25
- alternative: *est
probability: 0.25
- alternative: *sud
probability: 0.25
- alternative: *vest
probability: 0.25
entrances:
entrada: &intrare
canonical: intrare
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
# Intrare 1, Intare A, etc.
alphanumeric:
default: *intrare
numeric_probability: 0.1 # e.g. Intrare 1
alpha_probability: 0.85 # e.g. Intrare A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
modifier:
alternatives:
- alternative: *nord
- alternative: *sud
- alternative: *est
- alternative: *vest
- alternative: *dreapta
- alternative: *stanga
staircases:
scara: &scara
canonical: scara
abbreviated: sc
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
alphanumeric:
# For alphanumerics, Scara A, Scara 1, etc.
default: *scara
numeric_probability: 0.35 # e.g. Scara 1
alpha_probability: 0.6 # e.g. Scara A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right # e.g. Scara Nord
direction_probability: 0.8
modifier:
alternatives:
- alternative: *nord
- alternative: *sud
- alternative: *est
- alternative: *vest
- alternative: *dreapta
- alternative: *stanga
po_boxes:
casuta_postala: &casuta_postala
canonical: căsuță poștală
abbreviated: cp
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.3
sample_probability: 0.3
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.4 # Apdo No 1234
numeric_probability: 1.0
alphanumeric:
sample: false
default: *casuta_postala
numeric_probability: 0.9 # Apdo 123
alpha_probability: 0.05 # Apdo A
numeric_plus_alpha_probability: 0.04 # Apdo 123G
alpha_plus_numeric_probability: 0.01 # Apdo A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
apartament: &apartament
canonical: apartament
abbreviated: ap
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.6
sample_probability: 0.2
numeric:
direction: left
sala: &sala
canonical: sală
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
birou: &birou
canonical: birou
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
lotul: &lotul
canonical: lotul
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *apartament
probability: 0.9
sample: true
alternatives:
- alternative: *sala
probability: 0.1
# Separate random probability for adding directions like 2o Izq, 2 Dcha, etc.
add_direction: true
add_direction_probability: 0.1
add_direction_numeric: true # Only for numbers
numeric_probability: 0.9 # e.g. ap 1
numeric_plus_alpha_probability: 0.01 # e.g. ap 1A
alpha_plus_numeric_probability: 0.01 # e.g. ap A1
alpha_probability: 0.08 # e.g. ap A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
zones:
residential: *unit_alphanumeric
commercial:
default: *birou
numeric_probability: 0.9 # e.g. Birou 1
numeric_plus_alpha_probability: 0.01 # e.g. Birou 1A
alpha_plus_numeric_probability: 0.01 # e.g. Birou A1
alpha_probability: 0.08 # e.g. Birou A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
industrial:
default: *lotul
probability: 0.5
alternatives:
- alternative: *birou
probability: 0.3
- alternative: *sala
probability: 0.2
numeric_probability: 0.9 # e.g. Lotul 1
numeric_plus_alpha_probability: 0.01 # e.g. Lotul 1A
alpha_plus_numeric_probability: 0.01 # e.g. Lotul A1
alpha_probability: 0.08 # e.g. Lotul A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *sala
probability: 0.9
alternatives:
- alternative: *birou
probability: 0.1
numeric_probability: 0.9 # e.g. Sala 1
numeric_plus_alpha_probability: 0.01 # e.g. Sala 1A
alpha_plus_numeric_probability: 0.01 # e.g. Sala A1
alpha_probability: 0.08 # e.g. Sala A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1