Repository URL to install this package:
|
Version:
1.14.0 ▾
|
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities for testing random variables."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import numpy as np
from tensorflow.python.framework import dtypes
from tensorflow.python.ops.distributions import special_math
def test_moment_matching(
samples,
number_moments,
dist,
stride=0):
"""Return z-test scores for sample moments to match analytic moments.
Given `samples`, check that the first sample `number_moments` match
the given `dist` moments by doing a z-test.
Args:
samples: Samples from target distribution.
number_moments: Python `int` describing how many sample moments to check.
dist: SciPy distribution object that provides analytic moments.
stride: Distance between samples to check for statistical properties.
A stride of 0 means to use all samples, while other strides test for
spatial correlation.
Returns:
Array of z_test scores.
"""
sample_moments = []
expected_moments = []
variance_sample_moments = []
x = samples.flat
for i in range(1, number_moments + 1):
strided_range = x[::(i - 1) * stride + 1]
sample_moments.append(np.mean(strided_range ** i))
expected_moments.append(dist.moment(i))
variance_sample_moments.append(
(dist.moment(2 * i) - dist.moment(i) ** 2) / len(strided_range))
z_test_scores = []
for i in range(1, number_moments + 1):
# Assume every operation has a small numerical error.
# It takes i multiplications to calculate one i-th moment.
total_variance = (
variance_sample_moments[i - 1] +
i * np.finfo(samples.dtype).eps)
tiny = np.finfo(samples.dtype).tiny
assert np.all(total_variance > 0)
if total_variance < tiny:
total_variance = tiny
# z_test is approximately a unit normal distribution.
z_test_scores.append(abs(
(sample_moments[i - 1] - expected_moments[i - 1]) / np.sqrt(
total_variance)))
return z_test_scores
def chi_squared(x, bins):
"""Pearson's Chi-squared test."""
x = np.ravel(x)
n = len(x)
histogram, _ = np.histogram(x, bins=bins, range=(0, 1))
expected = n / float(bins)
return np.sum(np.square(histogram - expected) / expected)
def normal_cdf(x):
"""Cumulative distribution function for a standard normal distribution."""
return 0.5 + 0.5 * np.vectorize(math.erf)(x / math.sqrt(2))
def anderson_darling(x):
"""Anderson-Darling test for a standard normal distribution."""
x = np.sort(np.ravel(x))
n = len(x)
i = np.linspace(1, n, n)
z = np.sum((2 * i - 1) * np.log(normal_cdf(x)) +
(2 * (n - i) + 1) * np.log(1 - normal_cdf(x)))
return -n - z / n
def test_truncated_normal(assert_equal, assert_all_close, dtype, n, y):
"""Tests truncated normal distribution's statistics."""
def _normal_cdf(x):
return .5 * math.erfc(-x / math.sqrt(2))
def normal_pdf(x):
return math.exp(-(x**2) / 2.) / math.sqrt(2 * math.pi)
def probit(x):
return special_math.ndtri(x)
a = -2.
b = 2.
mu = 0.
sigma = 1.
alpha = (a - mu) / sigma
beta = (b - mu) / sigma
z = _normal_cdf(beta) - _normal_cdf(alpha)
assert_equal((y >= a).sum(), n)
assert_equal((y <= b).sum(), n)
# For more information on these calculations, see:
# Burkardt, John. "The Truncated Normal Distribution".
# Department of Scientific Computing website. Florida State University.
expected_mean = mu + (normal_pdf(alpha) - normal_pdf(beta)) / z * sigma
y = y.astype(float)
actual_mean = np.mean(y)
assert_all_close(actual_mean, expected_mean, atol=5e-4)
expected_median = mu + probit(
(_normal_cdf(alpha) + _normal_cdf(beta)) / 2.) * sigma
actual_median = np.median(y)
assert_all_close(actual_median, expected_median, atol=8e-4)
expected_variance = sigma**2 * (1 + (
(alpha * normal_pdf(alpha) - beta * normal_pdf(beta)) / z) - (
(normal_pdf(alpha) - normal_pdf(beta)) / z)**2)
actual_variance = np.var(y)
assert_all_close(
actual_variance,
expected_variance,
rtol=6e-3 if dtype == dtypes.bfloat16 else 1e-3)