Files
libpostal/resources/addresses/nb.yaml
2016-07-21 17:04:57 -04:00

595 lines
16 KiB
YAML
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# nb.yaml
# -------
# Norwegian language specification.
components:
level:
null_probability: 0.85
alphanumeric_probability: 0.1
standalone_probability: 0.05
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
# e.g. 2/34, more common way to specify a unit number in German
# if unit exists in the first place
level_unit:
components:
- level
- unit
label: unit
zero_pad_digits: 2
separators:
- separator: ""
probability: 1.0
probability: 0.05
numbers:
default: &nummer
canonical: nummer
abbreviated: nr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
house_number:
alphanumeric:
default: *nummer
alphanumeric_phrase_probability: 0.0001
and:
default: &og
canonical: og
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *og
corner_of: &hjorne_av
canonical: hjørne av
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &pa_hjornet_av
canonical: på hjørnet av
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *og
probability: 0.7
alternatives:
- alternative: *hjorne_av
probability: 0.15
- alternative: *pa_hjornet_av
probability: 0.15
between:
canonical: mellom
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &etasje
canonical: etasje
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
direction_probability: 0.9
ordinal:
direction: right
numeric_probability: 0.4
ordinal_probability: 0.6
hovedetasje: &hovedetasje
canonical: hovedetasje
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric_affix:
affix: h
direction: left
zero_pad: 2
numeric_probability: 0.1
numeric_affix_probability: 0.9
underetasje: &underetasje
canonical: underetasje
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
numeric_affix:
affix: u
direction: left
zero_pad: 2
numeric_probability: 0.1
numeric_affix_probability: 0.9
loftsetasje: &loftsetasje
canonical: loftsetasje
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
numeric_affix:
affix: l
direction: left
zero_pad: 2
numeric_probability: 0.1
numeric_affix_probability: 0.9
loft: &loft
canonical: loft
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
kjeller: &kjeller
canonical: kjeller
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# e.g. 1 kjeller
numeric:
direction: right
direction_probability: 0.8
# e.g. k1
numeric_affix:
affix: k
direction: left
# e.g. 1. k
ordinal:
direction: right
standalone_probability: 0.9
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.1
ordinal_probability: 0.005
aliases:
"<-1":
default: *kjeller
"-1":
default: *kjeller
probability: 0.9
alternatives:
- alternative: *etasje
probability: 0.1
"top":
default: *etasje
probability: 0.85
alternatives:
- alternative: *loftsetasje
probability: 0.1
- alternative: *loft
probability: 0.05
numbering_starts_at: 1
alphanumeric:
default: *etasje
probability: 0.95
alternatives:
- alternative: *underetasje
probability: 0.05
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: i nærheden af
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.7
alternatives:
- alternative:
canonical: tæt på
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: tæt ved
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
nearby:
default:
canonical: i nærheden
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.4
alternatives:
- alternative:
canonical: rundt her
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: nær her
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: nær
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: omkring her
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: tæt på her
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
near_me:
default:
canonical: nær mig
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.8
alternatives:
- alternative:
canonical: i nærheden af mig
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: tæt på mig
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
# Don't worry about agreement
in:
default:
canonical: i
probability: 0.8
alternatives:
- alternative:
canonical: om
probability: 0.1
- alternative:
canonical:
probability: 0.1
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &hoyre
canonical: høyre
sample: true
canonical_probability: 0.1
sample_probability: 0.9
numeric:
direction: right
numeric_affix:
affix: h
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
left: &igjen
canonical: igjen
sample: true
canonical_probability: 0.1
sample_probability: 0.9
numeric:
direction: right
numeric_affix:
affix: i
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
alternatives:
- alternative: *hoyre
probability: 0.5
- alternative: *igjen
probability: 0.5
cardinal_directions:
east: &ost
canonical: øst
abbreviated: ø
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: ø
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &vest
canonical: vest
abbreviated: v
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: v
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &nord
canonical: nord
abbreviated: n
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &syd
canonical: syd
abbreviated: s
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *nord
probability: 0.25
- alternative: *ost
probability: 0.25
- alternative: *syd
probability: 0.25
- alternative: *vest
probability: 0.25
entrances:
inngang: &inngang
canonical: inngang
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Eingang 1, Eingang A, etc.
alphanumeric: &entrance_alphanumeric
default: *inngang
numeric_probability: 0.1 # e.g. Eingang 1
alpha_probability: 0.85 # e.g. Eingang A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
stiege: &stiege
canonical: stiege
abbreviated: stg
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.2
sample_probability: 0.1
numeric:
direction: left
trapp: &trapp
canonical: trapp
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *trapp
probability: 0.8
alternatives:
- alternative: *stiege
probability: 0.2
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *nord
- alternative: *syd
- alternative: *ost
- alternative: *vest
po_boxes:
postboks: &postboks
canonical: postboks
abbreviated: pb
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Pb No 1234
boks: &boks
canonical: boks
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Boks No 1234
alphanumeric:
sample: false
default: *postboks
probability: 0.9
alternatives:
- alternative: *boks
probability: 0.1
numeric_probability: 0.9 # Pb 123
alpha_probability: 0.05 # Pb A
numeric_plus_alpha_probability: 0.04 # Pb 123G
alpha_plus_numeric_probability: 0.01 # Pb A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
leilighet: &leilighet
canonical: leilighet
abbreviated: leil
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.1
sample_probability: 0.3
numeric:
direction: left
null_phrase_probability: 0.4
# Lejlighed nummer 4
add_number_phrase: true
add_number_phrase_probability: 0.05
hus: &hus
canonical: hus
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
vaerelse: &vaerelse
canonical: værelse
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *leilighet
probability: 0.8
alternatives:
- alternative: *hus
probability: 0.1
- alternative: *vaerelse
probability: 0.1
numeric_probability: 0.95 # e.g. Lejlighed 1
alpha_probability: 0.05 # e.g. Lejl A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# Separate random probability for adding directions like 2H, 2I, etc.
add_direction: true
add_direction_probability: 0.005
# Add directions for plain numbers
add_direction_numeric: true
# Add direction only e.g. Lejlighed Igjen
add_direction_standalone: true
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.2
# Use the actual floor phrase as long as the whole phrase is numeric
# Has the effect of creating Bolignummer-style units
use_floor_numeric_affix_probability: 0.7
use_floor_affix_floor_num_digits: 2
use_floor_affix_unit_num_digits: 2