Files
2025-09-06 22:03:29 -04:00

510 lines
14 KiB
YAML

# pl.yaml
# -------
# Polish language specification.
components:
level:
null_probability: 0.95
alphanumeric_probability: 0.04
standalone_probability: 0.01
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.9
- separator: "-"
probability: 0.05
- separator: " - "
probability: 0.05
probability: 0.01
numbers:
default: &numer
canonical: numer
abbreviated: nr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
house_numbers:
dom: &dom
canonical: dom
abbreviated: d
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.1
sample_probability: 0.1
numeric:
direction: left
alphanumeric:
default: *numer
probability: 0.6
alternatives:
- alternative: *dom
probability: 0.4
alphanumeric_phrase_probability: 0.0001
and:
default: &i
canonical: i
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *i
at: &w
canonical: w
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner_of: &rogu
canonical: rogu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &na_rogu
canonical: na rogu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *i
probability: 0.7
alternatives:
- alternative: *w
probability: 0.1
- alternative: *rogu
probability: 0.1
- alternative: *na_rogu
probability: 0.1
between:
canonical: pomiędzy
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &pietro
canonical: piętro
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
ordinal_suffix_probability: 0.6
numeric_probability: 0.4
ordinal_probability: 0.6
parter: &parter
canonical: parter
sample: true
canonical_probability: 0.9
sample_probability: 0.1
suterena: &suterena
canonical: suterena
# e.g. suterena 1
numeric:
direction: left
direction_probability: 0.8
# e.g. s1
numeric_affix:
affix: s
direction: left
# e.g. 1. suterena
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.985
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *suterena
"-1":
default: *suterena
"0":
default: *parter
probability: 0.9
alternatives:
- alternative: *pietro
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *pietro
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: w pobliżu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.7
alternatives:
- alternative:
canonical: blisko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: koło
sample: true
canonical_probability: 0.7
sample_probability: 0.3
probability: 0.05
- alternative:
canonical: niedaleko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
- alternative:
canonical: obok
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
- alternative:
canonical: przy
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
nearby:
default:
canonical: w pobliżu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.6
alternatives:
- alternative:
canonical: w pobliżu tutaj
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: wokół tutaj
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: blisko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
near_me:
default:
canonical: w pobliżu mnie
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Don't worry about agreement
in:
default:
canonical: w
probability: 0.7
alternatives:
- alternative:
canonical: we
probability: 0.3
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &prawo
canonical: prawo
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &lewo
canonical: lewo
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *prawo
probability: 0.5
- alternative: *lewo
probability: 0.5
cardinal_directions:
east: &wschod
canonical: wschód
abbreviated: w
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: w
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &zachod
canonical: zachód
abbreviated: z
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: z
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &polnoc
canonical: północ
abbreviated: pn
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: pn
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &poludnie
canonical: południe
abbreviated: pd
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: pd
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *polnoc
probability: 0.25
- alternative: *wschod
probability: 0.25
- alternative: *poludnie
probability: 0.25
- alternative: *zachod
probability: 0.25
entrances:
wejscie: &wejscie
canonical: wejście
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Wejście 1, Wejście A, etc.
alphanumeric: &entrance_alphanumeric
default: *wejscie
numeric_probability: 0.1 # e.g. Wejście 1
alpha_probability: 0.85 # e.g. Wejście A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
schody: &schody
canonical: schody
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *schody
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *polnoc
- alternative: *poludnie
- alternative: *wschod
- alternative: *zachod
po_boxes:
skrytka_pocztowa: &skrytka_pocztowa
canonical: skrytka pocztowa
abbreviated: skr poczt
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Skr Poczt 1234
alphanumeric:
default: *skrytka_pocztowa
numeric_probability: 0.9 # Skr Poczt 123
alpha_probability: 0.05 # Skr Poczt A
numeric_plus_alpha_probability: 0.04 # Skr Poczt 123G
alpha_plus_numeric_probability: 0.01 # Skr Poczt A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
mieszkanie: &mieszkanie
canonical: mieszkanie
abbreviated: m
sample: true
canonical_probability: 0.05
abbreviated_probability: 0.9
sample_probability: 0.05
numeric:
direction: left
pokoj: &pokoj
canonical: pokój
abbreviated: pok
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.5
sample_probability: 0.1
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *mieszkanie
probability: 0.9
alternatives:
- alternative: *pokoj
probability: 0.1
numeric_probability: 0.9 # e.g. m. 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. m. A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.01
zones:
commercial: &commercial_unit_types
default: *pokoj
numeric_probability: 0.95 # e.g. pokój 1
numeric_plus_alpha_probability: 0.01 # e.g. pokój 1A
alpha_plus_numeric_probability: 0.01 # e.g. pokój A1
alpha_probability: 0.03 # e.g. pokój A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university: *commercial_unit_types