diff --git a/resources/addresses/he.yaml b/resources/addresses/he.yaml new file mode 100644 index 00000000..57484380 --- /dev/null +++ b/resources/addresses/he.yaml @@ -0,0 +1,272 @@ +# he.yaml +# ------- +# Hebrew language specification + + +alphabet: אבגדהוזחטיכךלמםנןסעפףצץקרשת +alphabet_probability: 0.8 + +components: + level: + null_probability: 0.95 + alphanumeric_probability: 0.05 + + entrance: + null_probability: 0.9 + alphanumeric_probability: 0.1 + + unit: + null_probability: 0.6 + alphanumeric_probability: 0.4 + + + combinations: + - + components: + - house_number + - entrance + - unit + label: house_number + separators: + - separator: "/" + probability: 0.95 + - separator: "-" + probability: 0.05 + probability: 0.7 + - + components: + - house_number + - entrance + label: house_number + separators: + - separator: " " + probability: 0.5 + - separator: "" + probability: 0.2 + - separator: "/" + probability: 0.1 + - separator: "-" + probability: 0.1 + - separator: " - " + probability: 0.1 + probability: 0.7 + - + components: + - house_number + - unit + label: house_number + separators: + - separator: "/" + probability: 0.95 + - separator: "-" + probability: 0.05 + probability: 0.1 + +levels: + koma: &koma + canonical: קומה + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + ordinal: + direction: left + numeric_probability: 0.4 + ordinal_probability: 0.6 + koma_latin: &koma_latin + canonical: koma + sample: true + canonical_probability: 0.7 + sample_probability: 0.3 + numeric: + direction: left + ordinal: + direction: left + numeric_probability: 0.4 + ordinal_probability: 0.6 + + komat_karka: &komat_karka + canonical: קומת קרקע + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + komat_karka_latin: &komat_karka_latin + canonical: komát karká + sample: true + canonical_probability: 0.6 + sample_probability: 0.4 + + martef: &martef + canonical: מרתף + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + ordinal: + direction: left + number_abs_value: true + number_min_abs_value: 1 + standalone_probability: 0.985 + numeric_probability: 0.01 + ordinal_probability: 0.005 + martef_latin: &martef_latin + canonical: martef + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + ordinal: + direction: left + number_abs_value: true + number_min_abs_value: 1 + standalone_probability: 0.985 + numeric_probability: 0.01 + ordinal_probability: 0.005 + + aliases: + "<-1": + default: *martef + probability: 0.9 + alternatives: + - alternative: *martef_latin + probability: 0.1 + "-1": &ground_floor + default: *martef + probability: 0.9 + alternatives: + - alternative: *martef_latin + probability: 0.1 + "0": + default: *komat_karka + probability: 0.9 + alternatives: + - alternative: *komat_karka_latin + probability: 0.1 + + numbering_starts_at: 0 + + alphanumeric: + default: *koma + probability: 0.9 + alternatives: + - alternative: *koma_latin + probability: 0.1 + numeric_probability: 0.99 # With this probability, pick an integer + alpha_probability: 0.0098 # With this probability, pick a letter e.g. A + numeric_plus_alpha_probability: 0.0001 # e.g. 2A + alpha_plus_numeric_probability: 0.0001 # e.g. A2 + +entrances: + knisa: &knisa + canonical: כניסה + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + knisa_latin: &knisa_latin + canonical: knisa + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + + # вход 1, вход A, etc. + alphanumeric: + default: *knisa + probability: 0.99 + alternatives: + - alternative: *knisa_latin + probability: 0.01 + alpha_probability: 1.0 + + alphanumeric: + numeric_probability: 0.1 + alpha_probability: 0.9 + +po_boxes: + ta_doar: &ta_doar + canonical: תיבת דואר + abbreviated: ת.ד. + sample: true + canonical_probability: 0.4 + abbreviated_probability: 0.4 + sample_probability: 0.1 + numeric: + direction: left + ta_doar_latin: &ta_doar_latin + canonical: abonementnyy pochtovyy yashchik + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + + alphanumeric: + default: *ta_doar + probability: 0.8 + alternatives: + - alternative: *ta_doar_latin + probability: 0.2 + numeric_probability: 0.9 # ta doar 123 + alpha_probability: 0.05 # ta doar А + numeric_plus_alpha_probability: 0.04 # ta doar 123А + alpha_plus_numeric_probability: 0.01 # ta doar А123 + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + digits: + - length: 1 + probability: 0.05 + - length: 2 + probability: 0.1 + - length: 3 + probability: 0.2 + - length: 4 + probability: 0.5 + - length: 5 + probability: 0.1 + - length: 6 + probability: 0.05 + +units: + dira: &dira + canonical: דירה + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + dira_latin: &dira_latin + canonical: dira + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + + alphanumeric: &unit_alphanumeric + default: *dira + probability: 0.9 + alternatives: + - alternative: *dira_latin + probability: 0.1 + + numeric_probability: 0.9 # e.g. dira 1 + numeric_plus_alpha_probability: 0.03 # e.g. 1А + alpha_plus_numeric_probability: 0.03 # e.g. AА1 + alpha_probability: 0.04 # e.g. dira А + + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + # If there are 10 floors, create unit numbers like #301 or #1032 + use_floor_probability: 0.1