From e54f73647f124440f44f324830be6b314eb06f9d Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 4 Jul 2016 13:56:46 -0400 Subject: [PATCH] [addresses] Chinese Pinyin config --- resources/addresses/zy_pinyin.yaml | 152 +++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 resources/addresses/zy_pinyin.yaml diff --git a/resources/addresses/zy_pinyin.yaml b/resources/addresses/zy_pinyin.yaml new file mode 100644 index 00000000..8ad9d525 --- /dev/null +++ b/resources/addresses/zy_pinyin.yaml @@ -0,0 +1,152 @@ +# zh_pinyin.yaml +# -------------- +# Chinese (Pinyin) + +whitespace: false + +components: + level: + null_probability: 0.85 # Probability of doing nothing if no floor number is specified + alphanumeric_probability: 0.15 + + unit: + # If no unit number is specified + null_probability: 0.6 + alphanumeric_probability: 0.4 + +numbers: + default: &hao + canonical: hao + numeric_affix: + affix: -hao + direction: right + numeric_probability: 0.0 + numeric_affix_probability: 1.0 + +house_number: + alphanumeric: + default: *hao + alphanumeric_phrase_probability: 0.6 + +levels: + lou: &lou + canonical: lóu + numeric_affix: + affix: -lóu + direction: right + numeric_probability: 0.0 + numeric_affix_probability: 1.0 + lou_no_accent: &lou_no_accent + canonical: lou + numeric_affix: + affix: -lou + direction: right + add_number_phrase: true + add_number_phrase_probability: 0.5 + digits: + ascii_probability: 0.6 + unicode_full_width_probability: 0.1 + spellout_probability: 0.3 + numeric_probability: 0.0 + numeric_affix_probability: 1.0 + ceng: &ceng + canonical: céng + numeric_affix: + affix: -céng + direction: right + numeric_probability: 0.0 + numeric_affix_probability: 1.0 + ceng_no_accent: &ceng_no_accent + canonical: ceng + numeric_affix: + affix: -ceng + direction: right + numeric_probability: 0.0 + numeric_affix_probability: 1.0 + + numbering_starts_at: 1 + + alphanumeric: + default: *lou + probability: 0.85 + alternatives: + - alternative: *lou_no_accent + probability: 0.05 + - alternative: *ceng + probability: 0.08 + - alternative: *ceng_no_accent + probability: 0.02 + numeric_probability: 1.0 + +po_boxes: + youzheng_xinxiang: &youzheng_xinxiang + canonical: youzheng xinxiang + numeric: + direction: left + numeric_probability: 1.0 + + alphanumeric: + default: *youzheng_xinxiang + numeric_probability: 1.0 + + digits: + - length: 1 + probability: 0.05 + - length: 2 + probability: 0.1 + - length: 3 + probability: 0.2 + - length: 4 + probability: 0.5 + - length: 5 + probability: 0.1 + - length: 6 + probability: 0.05 + +postcodes: + alphanumeric: + default: &youbian + canonical: yóubiān + numeric: + direction: left + # null_probability means the chance of doing nothing e.g. just the postal code + null_probability: 0.9 + numeric_probability: 0.1 + probability: 0.9 + alternatives: + - alternative: &youbian_no_accent + canonical: youbian + numeric: + direction: left + # null_probability means the chance of doing nothing e.g. just the postal code + null_probability: 0.9 + numeric_probability: 0.1 + probability: 0.1 + +units: + shi: &shi + canonical: shì + numeric_affix: + affix: -shì + direction: right + numeric_probability: 0.0 + numeric_affix_probability: 1.0 + + shi_no_accent: &shi_no_accent + canonical: shi + numeric_affix: + affix: -shi + direction: right + numeric_probability: 0.0 + numeric_affix_probability: 1.0 + + alphanumeric: + default: *shi + probability: 0.8 + alternatives: + - alternative: *shi_no_accent + probability: 0.2 + numeric_probability: 1.0 + use_positive_numbers_probability: 1.0 + # If we have a floor number (from building:levels), use it + use_floor_probability: 0.8