47 lines
1.4 KiB
Python
47 lines
1.4 KiB
Python
import bisect
|
|
import random
|
|
import sys
|
|
|
|
FLOAT_EPSILON = 1e-09
|
|
|
|
|
|
def weighted_choice(values, cdf):
|
|
"""Pick one of n values given a discrete cumulative distribution"""
|
|
assert values and cdf, 'values and probabilities cannot be empty/None'
|
|
assert len(values) == len(cdf), 'len(values) != len(probs)'
|
|
assert all(p >= 0.0 and p <= (1.0 + FLOAT_EPSILON) for p in cdf), 'Probabilities not valid: {}'.format(cdf)
|
|
|
|
x = random.random()
|
|
i = bisect.bisect(cdf, x)
|
|
return values[i]
|
|
|
|
|
|
def isclose(a, b, rel_tol=FLOAT_EPSILON, abs_tol=0.0):
|
|
return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
|
|
|
|
|
|
def check_probability_distribution(probs):
|
|
cumulative = 0.0
|
|
for p in probs:
|
|
assert p >= 0.0, 'Probabilities cannot be negative'
|
|
assert p <= 1.0, 'Probabilities cannot be > 1.0'
|
|
cumulative += p
|
|
assert isclose(cumulative, 1.0), 'Probabilities must sum to 1: probs={}, cumulative={}'.format(probs, cumulative)
|
|
|
|
|
|
def cdf(probs):
|
|
total = 0.0
|
|
cumulative = [0.0] * len(probs)
|
|
for i, p in enumerate(probs):
|
|
total += p
|
|
cumulative[i] = total
|
|
|
|
return cumulative
|
|
|
|
|
|
def zipfian_distribution(n, b=1.0):
|
|
"""Distribution where the ith item's frequency is proportional to its rank"""
|
|
frequencies = [1. / (i ** b) for i in xrange(1, n + 1)]
|
|
total = sum(frequencies)
|
|
return [f / total for f in frequencies]
|