From 9c4348a990bcf7c13a65a294945f64e5059b9abe Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 14 Apr 2016 01:21:42 -0400 Subject: [PATCH] [addresses] conjunction class for building phrases like "5th and 6th" or "Units 1 & 2" across languages using the address configs --- scripts/geodata/addresses/conjunctions.py | 36 +++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 scripts/geodata/addresses/conjunctions.py diff --git a/scripts/geodata/addresses/conjunctions.py b/scripts/geodata/addresses/conjunctions.py new file mode 100644 index 00000000..e646aa47 --- /dev/null +++ b/scripts/geodata/addresses/conjunctions.py @@ -0,0 +1,36 @@ +import six +from geodata.addresses.config import address_config +from geodata.addresses.sampling import weighted_choice +from geodata.encoding import safe_decode + + +class Conjunction(object): + DEFAULT_WHITESPACE_JOIN = ', ' + DEFAULT_NON_WHITESPACE_JOIN = '' + + @classmethod + def join(cls, phrases, language, country=None): + + if not hasattr(phrases, '__iter__'): + raise ValueError('Param phrases must be iterable') + + values, probs = address_config.alternative_probabilities('and', language, country=country) + phrase, props = weighted_choice(values, probs) + + whitespace = props.get('whitespace', True) + whitespace_phrase = six.u(' ') if whitespace else six.u('') + + phrases = [safe_decode(p) for p in phrases] + + max_phrase_join = props.get('max_phrase_join', 2) + if len(phrases) > max_phrase_join: + default_join = safe_decode(props.get('default_join', cls.DEFAULT_WHITESPACE_JOIN if whitespace else cls.DEFAULT_NON_WHITESPACE_JOIN)) + prefix = default_join.join(phrases[:-max_phrase_join] + [six.u('')]) + else: + prefix = six.u('') + + if whitespace: + phrase = six.u('{}{}{}').format(whitespace_phrase, phrase, whitespace_phrase) + joined_phrase = phrase.join(phrases[-max_phrase_join:]) + + return six.u('').join([prefix, joined_phrase])