From d09e0ca966317f2e2fccfdb3d0f3b3223b357775 Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 4 Jul 2016 18:08:55 -0400 Subject: [PATCH] [addresses] Implementing whitespace_probability and ordinal_suffix probability for Roman numerals --- resources/addresses/pl.yaml | 1 + scripts/geodata/addresses/numbering.py | 15 ++++++++++++++- scripts/geodata/numbers/ordinals.py | 9 +++++++-- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/resources/addresses/pl.yaml b/resources/addresses/pl.yaml index d790f570..9ec26c94 100644 --- a/resources/addresses/pl.yaml +++ b/resources/addresses/pl.yaml @@ -136,6 +136,7 @@ levels: digits: ascii_probability: 0.3 roman_numeral_probability: 0.7 + ordinal_suffix_probability: 0.6 numeric_probability: 0.4 ordinal_probability: 0.6 parter: &parter diff --git a/scripts/geodata/addresses/numbering.py b/scripts/geodata/addresses/numbering.py index 213f889f..c5b7cf66 100644 --- a/scripts/geodata/addresses/numbering.py +++ b/scripts/geodata/addresses/numbering.py @@ -125,7 +125,12 @@ class Digits(object): elif digit_type == cls.SPELLOUT: return cls.rewrite_spellout(d, lang, num_type, props) elif digit_type == cls.ROMAN_NUMERAL: - return cls.rewrite_roman_numeral(d) + roman_numeral = cls.rewrite_roman_numeral(d) + if random.random() < props.get('ordinal_suffix_probability', 0.0): + ordinal_suffix = ordinal_expressions.get_suffix(d, lang, gender=props.get('gender', None)) + if ordinal_suffix: + roman_numeral = six.u('{}{}').format(roman_numeral, ordinal_suffix) + return roman_numeral elif digit_type == cls.UNICODE_FULL_WIDTH: return cls.rewrite_full_width(d) else: @@ -175,6 +180,10 @@ class NumericPhrase(object): direction = props['direction'] whitespace = props.get('whitespace', whitespace_default) + whitespace_probability = props.get('whitespace_probability') + if whitespace_probability is not None: + whitespace = random.random() < whitespace_probability + if props.get('title_case', True): # Title case unless the config specifies otherwise phrase = phrase.title() @@ -391,6 +400,10 @@ class NumberedComponent(object): direction = props['direction'] whitespace = props.get('whitespace', whitespace_default) + whitespace_probability = props.get('whitespace_probability') + if whitespace_probability is not None: + whitespace = random.random() < whitespace_probability + # Occasionally switch up if direction_probability is specified if random.random() > props.get('direction_probability', 1.0): if direction == 'left': diff --git a/scripts/geodata/numbers/ordinals.py b/scripts/geodata/numbers/ordinals.py index f6653a2d..5be6ebf2 100644 --- a/scripts/geodata/numbers/ordinals.py +++ b/scripts/geodata/numbers/ordinals.py @@ -93,11 +93,16 @@ class OrdinalExpressions(object): return trie.search_suffix(str(num)) - def suffixed_number(self, num, lang, gender=None, category=None): + def get_suffix(self, num, lang, gender=None, category=None): suffixes = self.get_suffixes(num, lang, gender=gender, category=category) if not suffixes: return None - suffix = random.choice(suffixes) + return random.choice(suffixes) + + def suffixed_number(self, num, lang, gender=None, category=None): + suffix = self.get_suffix(num, lang, gender=gender, category=category) + if not suffix: + return None return six.u('{}{}').format(safe_decode(num), safe_decode(suffix)) ordinal_expressions = OrdinalExpressions()