Files
2025-09-06 22:03:29 -04:00

460 lines
12 KiB
YAML

# is.yaml
# -------
# Icelandic language specification.
components:
level:
null_probability: 0.9
alphanumeric_probability: 0.1
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
-
components:
- level
- unit
label: unit
separators:
- separator: "-"
probability: 0.9
- separator: " - "
probability: 0.1
probability: 0.005
-
components:
- entrance
- unit
label: unit
separators:
- separator: "-"
probability: 0.9
- separator: " - "
probability: 0.1
probability: 0.001
numbers:
default: &numer
canonical: númer
abbreviated: nr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
house_numbers:
alphanumeric:
default: *numer
alphanumeric_phrase_probability: 0.0001
and:
default: &og
canonical: og
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *og
corner_of: &horn_of
canonical: horn af
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &a_horinu_a
canonical: á horninu á
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *og
probability: 0.7
alternatives:
- alternative: *horn_of
probability: 0.15
- alternative: *a_horinu_a
probability: 0.15
between:
canonical: milli
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &haeo
canonical: hæð
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
direction_probability: 0.9
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
numeric_probability: 0.4
ordinal_probability: 0.6
jarohaeo: &jarohaeo
canonical: jarðhæð
sample: true
canonical_probability: 0.3
sample_probability: 0.7
kjallara: &kjallara
canonical: kjallara
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# e.g. 1 kjallara
numeric:
direction: right
direction_probability: 0.8
# e.g. k1
numeric_affix:
affix: k
direction: left
# e.g. 1. kjallara
ordinal:
direction: right
standalone_probability: 0.985
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *kjallara
"-1":
default: *kjallara
"0":
default: *jarohaeo
numbering_starts_at: 0
alphanumeric:
default: *haeo
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: nálægt
sample: true
canonical_probability: 0.8
sample_probability: 0.2
nearby:
default:
canonical: nálægt
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.6
alternatives:
- alternative:
canonical: nálægt hér
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: hérna
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: hér
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
near_me:
default:
canonical: nálægt mér
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Don't worry about agreement
in:
default:
canonical: í
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &til_haegri
canonical: til hægri
abbreviated: t.h
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.5
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: t.h
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
left: &til_vinstri
canonical: til vinstri
abbreviated: t.v
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.6
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: t.v
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
alternatives:
- alternative: *til_haegri
probability: 0.5
- alternative: *til_vinstri
probability: 0.5
cardinal_directions:
east: &austur
canonical: austur
abbreviated: a
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: a
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &vestur
canonical: vestur
abbreviated: v
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: v
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &norour
canonical: norður
abbreviated: n
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &suour
canonical: suður
abbreviated: s
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *norour
probability: 0.25
- alternative: *austur
probability: 0.25
- alternative: *suour
probability: 0.25
- alternative: *vestur
probability: 0.25
entrances:
inngangur: &inngangur
canonical: inngangur
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Inngangur 1, Inngangur A, etc.
alphanumeric: &entrance_alphanumeric
default: *inngangur
numeric_probability: 0.1 # e.g. Inngangur 1
alpha_probability: 0.85 # e.g. Inngangur A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
stiege: &stigi
canonical: stigi
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *stigi
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *norour
- alternative: *suour
- alternative: *austur
- alternative: *vestur
po_boxes:
postholf: &postholf
canonical: pósthólf
abbreviated: ph
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Ph Nr 1234
alphanumeric:
sample: false
default: *postholf
numeric_probability: 0.9 # Ph 123
alpha_probability: 0.05 # Ph A
numeric_plus_alpha_probability: 0.04 # Ph 123G
alpha_plus_numeric_probability: 0.01 # Ph A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
ibuo: &ibuo
canonical: íbúð
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
null_phrase_probability: 0.5
# íbúð nummer 4
add_number_phrase: true
add_number_phrase_probability: 0.05
alphanumeric: &unit_alphanumeric
default: *ibuo
numeric_probability: 0.9 # e.g. íbúð 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. íbúð A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# Separate random probability for adding directions like 2R, 2L, etc.
add_direction: true
add_direction_probability: 0.1
# Add directions for plain numbers
add_direction_numeric: true
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.1