[addresses] Polish address config and dictionary updates

This commit is contained in:
Al
2016-06-25 20:36:07 -04:00
parent 558d643042
commit 53052e6d25
13 changed files with 520 additions and 3 deletions

492
resources/addresses/pl.yaml Normal file
View File

@@ -0,0 +1,492 @@
# pl.yaml
# -------
# Polish language specification.
components:
level:
null_probability: 0.95
alphanumeric_probability: 0.04
standalone_probability: 0.01
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
level_unit:
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.9
- separator: "-"
probability: 0.05
- separator: " - "
probability: 0.05
probability: 0.01
numbers:
default: &numer
canonical: numer
abbreviated: nr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
house_number:
dom: &dom
canonical: dom
abbreviated: d
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.1
sample_probability: 0.1
numeric:
direction: left
alphanumeric:
default: *numer
probability: 0.6
alternatives:
- alternative: *dom
probability: 0.4
alphanumeric_phrase_probability: 0.0001
and:
default: &i
canonical: i
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *i
corner_of: &rogu
canonical: rogu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &na_rogu
canonical: na rogu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *i
probability: 0.7
alternatives:
- alternative: *rogu
probability: 0.15
- alternative: *na_rogu
probability: 0.15
between:
canonical: pomiędzy
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &pietro
canonical: piętro
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
direction_probability: 0.9
ordinal:
direction: right
numeric_probability: 0.4
ordinal_probability: 0.6
parter: &parter
canonical: parter
sample: true
canonical_probability: 0.9
sample_probability: 0.1
suterena: &suterena
canonical: suterena
# e.g. suterena 1
numeric:
direction: left
direction_probability: 0.8
# e.g. s1
numeric_affix:
affix: s
direction: left
# e.g. 1. suterena
ordinal:
direction: right
standalone_probability: 0.985
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *suterena
"-1":
default: *suterena
"0":
default: *parter
probability: 0.9
alternatives:
- alternative: *pietro
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *pietro
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: w pobliżu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.7
alternatives:
- alternative:
canonical: blisko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: koło
sample: true
canonical_probability: 0.7
sample_probability: 0.3
probability: 0.05
- alternative:
canonical: niedaleko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
- alternative:
canonical: obok
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
- alternative:
canonical: przy
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
nearby:
default:
canonical: w pobliżu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.6
alternatives:
- alternative:
canonical: w pobliżu tutaj
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: wokół tutaj
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: blisko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
near_me:
default:
canonical: w pobliżu mnie
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Don't worry about agreement
in:
default:
canonical: w
probability: 0.7
alternatives:
- alternative:
canonical: we
probability: 0.3
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &prawo
canonical: prawo
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &lewo
canonical: lewo
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *prawo
probability: 0.5
- alternative: *lewo
probability: 0.5
cardinal_directions:
east: &wschod
canonical: wschód
abbreviated: w
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: w
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &zachod
canonical: zachód
abbreviated: z
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: z
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &polnoc
canonical: północ
abbreviated: pn
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: pn
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &poludnie
canonical: południe
abbreviated: pd
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: pd
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *polnoc
probability: 0.25
- alternative: *wschod
probability: 0.25
- alternative: *poludnie
probability: 0.25
- alternative: *zachod
probability: 0.25
entrances:
wejscie: &wejscie
canonical: wejście
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Wejście 1, Wejście A, etc.
alphanumeric: &entrance_alphanumeric
default: *wejscie
numeric_probability: 0.1 # e.g. Wejście 1
alpha_probability: 0.85 # e.g. Wejście A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
schody: &schody
canonical: schody
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *schody
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *polnoc
- alternative: *poludnie
- alternative: *wschod
- alternative: *zachod
po_boxes:
skrytka_pocztowa: &skrytka_pocztowa
canonical: skrytka pocztowa
abbreviated: skr poczt
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Skr Poczt 1234
alphanumeric:
default: *skrytka_pocztowa
numeric_probability: 0.9 # Skr Poczt 123
alpha_probability: 0.05 # Skr Poczt A
numeric_plus_alpha_probability: 0.04 # Skr Poczt 123G
alpha_plus_numeric_probability: 0.01 # Skr Poczt A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
mieszkanie: &mieszkanie
canonical: mieszkanie
abbreviated: m
sample: true
canonical_probability: 0.05
abbreviated_probability: 0.9
sample_probability: 0.05
numeric:
direction: left
pokoj: &pokoj
canonical: pokój
abbreviated: pok
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.5
sample_probability: 0.1
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *mieszkanie
probability: 0.9
alternatives:
- alternative: *pokoj
probability: 0.1
numeric_probability: 0.9 # e.g. m. 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. m. A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.01
zones:
commercial: &commercial_unit_types
default: *pokoj
numeric_probability: 0.95 # e.g. pokój 1
numeric_plus_alpha_probability: 0.01 # e.g. pokój 1A
alpha_plus_numeric_probability: 0.01 # e.g. pokój A1
alpha_probability: 0.03 # e.g. pokój A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university: *commercial_unit_types

View File

@@ -1,3 +1,4 @@
d
g g
k k
m m

View File

@@ -0,0 +1,5 @@
i
na rogu
pomiędzy|pomiedzy
rogu
w

View File

@@ -1,8 +1,8 @@
północ|polnoc|płn|pln północ|polnoc|płn|pln
północny wschód|polnocny wschod|pn.-wsch.|pn-wsch|pn wsch|płn.-wsch.|płn-wsch|płn wsch|pln.-wsch.|pln-wsch|pln wsch północny wschód|polnocny wschod|pn.-wsch.|pn-wsch|pn wsch|płn.-wsch.|płn-wsch|płn wsch|pln.-wsch.|pln-wsch|pln wsch
północny zachód|polnocny zachod|pn.-zach.|pn-zach|pn zach|płn.-zach.|płn-zach|płn zach|pln.-zach.|pln-zach|pln zach północny zachód|polnocny zachod|pn.-zach.|pn-zach|pn zach|płn.-zach.|płn-zach|płn zach|pln.-zach.|pln-zach|pln zach
wschód|wschod wschód|wschod|w|wsch
południe|poludnie|pd|płd|pld południe|poludnie|pd|płd|pld
południowy wschód|poludniowy wschod|pd.-wsch.|pd-wsch|pd wsch|płd.-wsch.|płd-wsch|płd wsch|pld.-wsch.|pld-wsch|pld wsch południowy wschód|poludniowy wschod|pd.-wsch.|pd-wsch|pd wsch|płd.-wsch.|płd-wsch|płd wsch|pld.-wsch.|pld-wsch|pld wsch
południowy zachód|poludniowy zachod|pd.-zach.|pd-zach|pd zach|płd.-zach.|płd-zach|płd zach|pld.-zach.|pld-zach|pld zach południowy zachód|poludniowy zachod|pd.-zach.|pd-zach|pd zach|płd.-zach.|płd-zach|płd zach|pld.-zach.|pld-zach|pld zach
zachód|zachod zachód|zachod|z|zach

View File

@@ -0,0 +1 @@
wejście|wejscie

View File

@@ -0,0 +1 @@
dom|d

View File

@@ -0,0 +1 @@
suterena|s

View File

@@ -0,0 +1 @@
parter

View File

@@ -0,0 +1,11 @@
blisko
koło|kolo|k
niedaleko
obok
przy
w
w pobliżu|w poblizu
w pobliżu mnie|w poblizu mnie
w pobliżu tutaj|w poblizu tutaj
wokół tutaj|wokol tutaj
we

View File

@@ -0,0 +1 @@
numer|nr|#|№

View File

@@ -0,0 +1 @@
schody

View File

@@ -1,3 +1,3 @@
i i
na na
koło|k w

View File

@@ -0,0 +1,2 @@
lewo
prawo