[mv] Moving sampling to math.sampling
This commit is contained in:
@@ -6,8 +6,8 @@ import yaml
|
|||||||
|
|
||||||
from collections import Mapping
|
from collections import Mapping
|
||||||
|
|
||||||
from geodata.addresses.sampling import cdf, check_probability_distribution
|
|
||||||
from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries
|
from geodata.address_expansions.address_dictionaries import address_phrase_dictionaries
|
||||||
|
from geodata.math.sampling import cdf, check_probability_distribution
|
||||||
|
|
||||||
|
|
||||||
this_dir = os.path.realpath(os.path.dirname(__file__))
|
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||||
@@ -54,7 +54,7 @@ class AddressConfig(object):
|
|||||||
self.cache = {}
|
self.cache = {}
|
||||||
|
|
||||||
for filename in os.listdir(config_dir):
|
for filename in os.listdir(config_dir):
|
||||||
if filename != 'en.yaml':
|
if filename not in ('en.yaml', 'es.yaml'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
config = yaml.load(open(os.path.join(ADDRESS_CONFIG_DIR, filename)))
|
config = yaml.load(open(os.path.join(ADDRESS_CONFIG_DIR, filename)))
|
||||||
@@ -100,7 +100,10 @@ class AddressConfig(object):
|
|||||||
'''Get a probability distribution over alternatives'''
|
'''Get a probability distribution over alternatives'''
|
||||||
key = self.cache_key(prop, language, dictionaries, country=country)
|
key = self.cache_key(prop, language, dictionaries, country=country)
|
||||||
if key not in self.cache:
|
if key not in self.cache:
|
||||||
properties = self.get_property(prop, language, country=country)
|
properties = self.get_property(prop, language, country=country, default=None)
|
||||||
|
|
||||||
|
if properties is None:
|
||||||
|
return None, None
|
||||||
|
|
||||||
probs = []
|
probs = []
|
||||||
alternatives = []
|
alternatives = []
|
||||||
@@ -118,10 +121,21 @@ class AddressConfig(object):
|
|||||||
probs.extend([prob * p for p in phrase_probs])
|
probs.extend([prob * p for p in phrase_probs])
|
||||||
alternatives.extend([(p, props) for p in phrases])
|
alternatives.extend([(p, props) for p in phrases])
|
||||||
|
|
||||||
|
sample_probability = properties.get('sample_probability')
|
||||||
|
if sample_probability is not None:
|
||||||
|
sample_phrases = []
|
||||||
|
for dictionary in dictionaries:
|
||||||
|
phrases = self.sample_phrases.get((language, dictionary), [])
|
||||||
|
for canonical, surface_forms in six.iteritems(phrases):
|
||||||
|
sample_phrases.append(canonical)
|
||||||
|
sample_phrases.extend(surface_forms)
|
||||||
|
# Note: use the outer properties dictionary e.g. units.alphanumeric
|
||||||
|
alternatives.extend([(p, properties) for p in sample_phrases])
|
||||||
|
probs.extend([float(sample_probability) / len(sample_phrases)] * len(sample_phrases))
|
||||||
|
|
||||||
alts = properties.get('alternatives', [])
|
alts = properties.get('alternatives', [])
|
||||||
total_before_alts = 0.0
|
|
||||||
for alt in alts:
|
for alt in alts:
|
||||||
prob = alt.get('probability', (1.0 - total_before_alts) / len(alts))
|
prob = alt.get('probability', 1.0 / len(alts))
|
||||||
props = alt['alternative']
|
props = alt['alternative']
|
||||||
phrases, phrase_probs = self.form_probabilities(props, language, dictionaries=dictionaries)
|
phrases, phrase_probs = self.form_probabilities(props, language, dictionaries=dictionaries)
|
||||||
probs.extend([prob * p for p in phrase_probs])
|
probs.extend([prob * p for p in phrase_probs])
|
||||||
@@ -130,7 +144,7 @@ class AddressConfig(object):
|
|||||||
try:
|
try:
|
||||||
check_probability_distribution(probs)
|
check_probability_distribution(probs)
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
print 'values where: {}'.format(alternatives)
|
print 'values were: {}'.format(alternatives)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
probs_cdf = cdf(probs)
|
probs_cdf = cdf(probs)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import six
|
import six
|
||||||
from geodata.addresses.config import address_config
|
from geodata.addresses.config import address_config
|
||||||
from geodata.addresses.sampling import weighted_choice
|
|
||||||
from geodata.encoding import safe_decode
|
from geodata.encoding import safe_decode
|
||||||
|
from geodata.math.sampling import weighted_choice
|
||||||
|
|
||||||
|
|
||||||
class Conjunction(object):
|
class Conjunction(object):
|
||||||
|
|||||||
@@ -1,10 +1,4 @@
|
|||||||
import random
|
|
||||||
import six
|
|
||||||
|
|
||||||
from geodata.addresses.config import address_config
|
|
||||||
from geodata.addresses.numbering import NumericPhrase
|
from geodata.addresses.numbering import NumericPhrase
|
||||||
from geodata.addresses.sampling import weighted_choice
|
|
||||||
from geodata.encoding import safe_decode
|
|
||||||
|
|
||||||
|
|
||||||
class RelativeDirection(NumericPhrase):
|
class RelativeDirection(NumericPhrase):
|
||||||
|
|||||||
@@ -3,8 +3,8 @@ import six
|
|||||||
|
|
||||||
from geodata.addresses.config import address_config
|
from geodata.addresses.config import address_config
|
||||||
from geodata.addresses.numbering import NumberedComponent, sample_alphabet, latin_alphabet
|
from geodata.addresses.numbering import NumberedComponent, sample_alphabet, latin_alphabet
|
||||||
from geodata.addresses.sampling import weighted_choice, zipfian_distribution, cdf
|
|
||||||
from geodata.encoding import safe_decode
|
from geodata.encoding import safe_decode
|
||||||
|
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
|
||||||
|
|
||||||
|
|
||||||
class Floor(NumberedComponent):
|
class Floor(NumberedComponent):
|
||||||
@@ -74,11 +74,11 @@ class Floor(NumberedComponent):
|
|||||||
try:
|
try:
|
||||||
floor = int(floor)
|
floor = int(floor)
|
||||||
integer_floor = True
|
integer_floor = True
|
||||||
except ValueError:
|
except (ValueError, TypeError):
|
||||||
try:
|
try:
|
||||||
floor = float(floor)
|
floor = float(floor)
|
||||||
integer_floor = int(floor) == floor
|
integer_floor = int(floor) == floor
|
||||||
except ValueError:
|
except (ValueError, TypeError):
|
||||||
return cls.numeric_phrase('levels.alphanumeric', safe_decode(floor), language,
|
return cls.numeric_phrase('levels.alphanumeric', safe_decode(floor), language,
|
||||||
dictionaries=['level_types_numbered'], country=country)
|
dictionaries=['level_types_numbered'], country=country)
|
||||||
|
|
||||||
@@ -121,4 +121,4 @@ class Floor(NumberedComponent):
|
|||||||
country=country)
|
country=country)
|
||||||
|
|
||||||
return cls.numeric_phrase('levels.alphanumeric', safe_decode(floor), language,
|
return cls.numeric_phrase('levels.alphanumeric', safe_decode(floor), language,
|
||||||
dictionaries=['level_types_numbered'], country=country)
|
dictionaries=['level_types_numbered'], country=country)
|
||||||
@@ -2,8 +2,8 @@ import random
|
|||||||
import six
|
import six
|
||||||
|
|
||||||
from geodata.addresses.config import address_config
|
from geodata.addresses.config import address_config
|
||||||
from geodata.addresses.sampling import weighted_choice, zipfian_distribution, cdf
|
|
||||||
from geodata.encoding import safe_decode
|
from geodata.encoding import safe_decode
|
||||||
|
from geodata.math.sampling import weighted_choice, zipfian_distribution, cdf
|
||||||
from geodata.numbers.ordinals import ordinal_expressions
|
from geodata.numbers.ordinals import ordinal_expressions
|
||||||
|
|
||||||
|
|
||||||
@@ -202,6 +202,9 @@ class NumberedComponent(object):
|
|||||||
elif num_type == 'ordinal':
|
elif num_type == 'ordinal':
|
||||||
num = ordinal_expressions.suffixed_number(num, language, gender=props.get('gender', None))
|
num = ordinal_expressions.suffixed_number(num, language, gender=props.get('gender', None))
|
||||||
|
|
||||||
|
if random.random() < props.get('null_phrase_probability', 0.0):
|
||||||
|
return num
|
||||||
|
|
||||||
direction = props['direction']
|
direction = props['direction']
|
||||||
whitespace = props.get('whitespace', whitespace_default)
|
whitespace = props.get('whitespace', whitespace_default)
|
||||||
|
|
||||||
|
|||||||
0
scripts/geodata/math/__init__.py
Normal file
0
scripts/geodata/math/__init__.py
Normal file
Reference in New Issue
Block a user