Repository URL to install this package:
|
Version:
3.0.1 ▾
|
# coding=utf-8
import bisect
from faker.generator import random as mod_random
def random_sample(random=None):
if random is None:
random = mod_random
return random.uniform(0.0, 1.0)
def cumsum(it):
total = 0
for x in it:
total += x
yield total
def choices_distribution_unique(a, p, random=None, length=1):
# As of Python 3.7, there isn't a way to sample unique elements that takes
# weight into account.
if random is None:
random = mod_random
assert len(a) == len(p)
assert len(a) >= length, "You can't request more unique samples than elements in the dataset."
choices = []
items = list(a)
probabilities = list(p)
for i in range(length):
cdf = list(cumsum(probabilities))
normal = cdf[-1]
cdf2 = [float(i) / float(normal) for i in cdf]
uniform_sample = random_sample(random=random)
idx = bisect.bisect_right(cdf2, uniform_sample)
item = items[idx]
choices.append(item)
probabilities.pop(idx)
items.pop(idx)
return choices
def choices_distribution(a, p, random=None, length=1):
if random is None:
random = mod_random
assert len(a) == len(p)
if hasattr(random, 'choices'):
choices = random.choices(a, weights=p, k=length)
return choices
else:
choices = []
cdf = list(cumsum(p))
normal = cdf[-1]
cdf2 = [float(i) / float(normal) for i in cdf]
for i in range(length):
uniform_sample = random_sample(random=random)
idx = bisect.bisect_right(cdf2, uniform_sample)
item = a[idx]
choices.append(item)
return choices