Files
2017-01-03 03:07:53 -05:00

270 lines
7.1 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# he.yaml
# -------
# Hebrew language specification
alphabet: אבגדהוזחטיכךלמםנןסעפףצץקרשת
alphabet_probability: 0.8
components:
level:
null_probability: 0.95
alphanumeric_probability: 0.05
entrance:
null_probability: 0.9
alphanumeric_probability: 0.1
unit:
null_probability: 0.6
alphanumeric_probability: 0.4
combinations:
-
components:
- house_number
- entrance
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.7
-
components:
- house_number
- entrance
label: house_number
separators:
- separator: " "
probability: 0.5
- separator: ""
probability: 0.2
- separator: "/"
probability: 0.1
- separator: "-"
probability: 0.1
- separator: " - "
probability: 0.1
probability: 0.7
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.1
levels:
koma: &koma
canonical: קומה
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: left
numeric_probability: 0.4
ordinal_probability: 0.6
koma_latin: &koma_latin
canonical: koma
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
ordinal:
direction: left
numeric_probability: 0.4
ordinal_probability: 0.6
komat_karka: &komat_karka
canonical: קומת קרקע
sample: true
canonical_probability: 0.8
sample_probability: 0.2
komat_karka_latin: &komat_karka_latin
canonical: komát karká
sample: true
canonical_probability: 0.6
sample_probability: 0.4
martef: &martef
canonical: מרתף
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: left
number_abs_value: true
number_min_abs_value: 1
standalone_probability: 0.985
numeric_probability: 0.01
ordinal_probability: 0.005
martef_latin: &martef_latin
canonical: martef
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: left
number_abs_value: true
number_min_abs_value: 1
standalone_probability: 0.985
numeric_probability: 0.01
ordinal_probability: 0.005
aliases:
"<-1":
default: *martef
probability: 0.9
alternatives:
- alternative: *martef_latin
probability: 0.1
"-1":
default: *martef
probability: 0.9
alternatives:
- alternative: *martef_latin
probability: 0.1
"0":
default: *komat_karka
probability: 0.9
alternatives:
- alternative: *komat_karka_latin
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *koma
probability: 0.9
alternatives:
- alternative: *koma_latin
probability: 0.1
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
entrances:
knisa: &knisa
canonical: כניסה
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
knisa_latin: &knisa_latin
canonical: knisa
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# вход 1, вход A, etc.
alphanumeric:
default: *knisa
probability: 0.99
alternatives:
- alternative: *knisa_latin
probability: 0.01
numeric_probability: 0.1
alpha_probability: 0.9
po_boxes:
ta_doar: &ta_doar
canonical: תיבת דואר
abbreviated: ת.ד.
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
ta_doar_latin: &ta_doar_latin
canonical: abonementnyy pochtovyy yashchik
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric:
default: *ta_doar
probability: 0.8
alternatives:
- alternative: *ta_doar_latin
probability: 0.2
numeric_probability: 0.9 # ta doar 123
alpha_probability: 0.05 # ta doar А
numeric_plus_alpha_probability: 0.04 # ta doar 123А
alpha_plus_numeric_probability: 0.01 # ta doar А123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
dira: &dira
canonical: דירה
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
dira_latin: &dira_latin
canonical: dira
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *dira
probability: 0.9
alternatives:
- alternative: *dira_latin
probability: 0.1
numeric_probability: 0.9 # e.g. dira 1
numeric_plus_alpha_probability: 0.03 # e.g. 1А
alpha_plus_numeric_probability: 0.03 # e.g. AА1
alpha_probability: 0.04 # e.g. dira А
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.1