from caffe2.python import core, workspace
from hypothesis import given, settings
import caffe2.python.hypothesis_test_util as hu
import caffe2.python.serialized_test.serialized_test_util as serial
import hypothesis.strategies as st
import numpy as np
import unittest
class TestSoftmaxOps(serial.SerializedTestCase):
@serial.given(n=st.sampled_from([0, 2, 4, 71, 103]),
D=st.sampled_from([0, 4, 8, 64, 79, 256, 333]),
engine=st.sampled_from([None, 'CUDNN']),
**hu.gcs)
def test_softmax(self, n, D, engine, gc, dc):
# n = number of examples, D = |labels|
# Initialize X and add 1e-2 for numerical stability
X = np.random.rand(n, D).astype(np.float32)
X = X + 1e-2
# Reference implementation of cross entropy with soft labels
def label_softmax(X):
probs = np.zeros((n, D))
rowmax = np.zeros(n)
if D == 0:
return [probs]
for i in range(n):
rowmax[i] = max(X[i, ])
# We need to subtract the max to avoid numerical issues
probs[i] = X[i] - rowmax[i]
exps = np.exp(probs[i, ])
norm = sum(exps)
probs[i, ] = exps / norm
return [probs]
op = core.CreateOperator(
"Softmax",
["X"],
["probs"],
engine=engine
)
self.assertReferenceChecks(
device_option=gc,
op=op,
inputs=[X],
reference=label_softmax,
)
@given(n=st.sampled_from([0, 2, 4, 71, 103, 555, 751, 1201]),
D=st.sampled_from([0, 4, 8, 64, 79, 256, 333, 1000]),
engine=st.sampled_from([None, 'CUDNN']),
**hu.gcs)
@settings(deadline=10000)
def test_softmax_grad(self, n, D, engine, gc, dc):
# n = number of examples, D = |labels|
# Initialize X and add 1e-2 for numerical stability
Y = np.random.rand(n, D).astype(np.float32)
dY = np.random.rand(n, D).astype(np.float32)
Y = Y + 1e-2
# Reference implementation of cross entropy with soft labels
def label_softmax_grad(X, dY):
dX = Y * 0.0
for i in range(n):
d = np.dot(Y[i, :], dY[i, :])
dX[i, :] = Y[i, :] * (dY[i, :] - d)
return [dX]
op = core.CreateOperator(
"SoftmaxGradient",
["Y", "dY"],
["dX"],
engine=engine
)
self.assertReferenceChecks(
device_option=gc,
op=op,
inputs=[Y, dY],
reference=label_softmax_grad,
)
@given(axis=st.integers(min_value=1, max_value=4),
engine=st.sampled_from([None, 'CUDNN']),
**hu.gcs)
def test_softmax_axis(self, axis, engine, gc, dc):
np.random.seed(1)
X = np.random.randn(1, 2, 3, 2, 1).astype(np.float32)
X = X + 1e-2
def prod(xs):
p = 1
for x in xs:
p *= x
return p
N = prod(list(X.shape)[:axis])
D = prod(list(X.shape)[axis:])
# Reference implementation of cross entropy with soft labels
def label_softmax(X):
X_ = X.reshape(N, D)
probs = np.zeros((N, D))
rowmax = np.zeros(N)
for i in range(N):
rowmax[i] = max(X_[i, ])
# We need to subtract the max to avoid numerical issues
probs[i] = X_[i] - rowmax[i]
exps = np.exp(probs[i, ])
norm = sum(exps)
probs[i, ] = exps / norm
return [probs.reshape(*X.shape)]
op = core.CreateOperator(
"Softmax",
["X"],
["probs"],
axis=axis,
engine=engine,
)
self.assertReferenceChecks(
device_option=gc,
op=op,
inputs=[X],
reference=label_softmax,
)
self.assertGradientChecks(
gc, op, [X], 0, [0], stepsize=1e-4, threshold=1e-2)
@given(n=st.integers(2, 10), D=st.integers(4, 16),
only_loss=st.booleans(), **hu.gcs)
@settings(deadline=1000)
def test_softmax_with_loss(self, n, D, gc, only_loss, dc):
# n = number of examples, D = |labels|
# Initialize X and add 1e-2 for numerical stability
np.random.seed(2603)
X = np.random.rand(n, D).astype(np.float32)
X = X + 1e-2
# Initialize label
label = (np.random.rand(n) * D).astype(np.int32)
# Reference implementation of cross entropy with soft labels
def label_softmax_crossent(X, label):
probs = np.zeros((n, D))
rowmax = np.zeros(n)
for i in range(n):
rowmax[i] = max(X[i, ])
# We need to subtract the max to avoid numerical issues
probs[i] = X[i] - rowmax[i]
exps = np.exp(probs[i, ])
norm = sum(exps)
probs[i, ] = exps / norm
label_xent = [-np.log(max(probs[i][label[i]], 1e-20))
for i in range(n)]
avgloss = np.sum(label_xent) / float(n)
return (probs, avgloss)
op = core.CreateOperator(
"SoftmaxWithLoss",
["X", "label"],
["probs", "avgloss"],
only_loss=only_loss,
)
self.assertReferenceChecks(
device_option=gc,
op=op,
inputs=[X, label],
reference=label_softmax_crossent,
)
self.assertGradientChecks(
gc, op, [X, label], 0, [1], stepsize=1e-4, threshold=1e-2)
@given(
n=st.integers(2, 5),
D=st.integers(4, 16),
only_loss=st.booleans(),
label_prob=st.booleans(),
**hu.gcs
)
@settings(deadline=10000)
def test_softmax_with_loss_axis_2(
self, n, D, only_loss, label_prob,
gc, dc
):
np.random.seed(2603)
X = np.random.rand(n, n, D).astype(np.float32)
X = X + 1e-2
if label_prob:
label = np.random.rand(n, n, D).astype(np.float32)
label /= label.sum(axis=2, keepdims=True)
else:
label = (np.random.rand(n, n) * D).astype(np.int32)
# Reference implementation of cross entropy with soft labels
def label_softmax_crossent(X, label):
probs = np.zeros((n, n, D))
rowmax = np.zeros((n, n))
for i in range(n):
for j in range(n):
rowmax[i, j] = max(X[i, j, ])
# We need to subtract the max to avoid numerical issues
probs[i, j] = X[i, j] - rowmax[i, j]
exps = np.exp(probs[i, j, ])
norm = sum(exps)
probs[i, j, ] = exps / norm
label_xent = 0
for i in range(n):
for j in range(n):
if label_prob:
for k in range(D):
label_xent += (
-np.log(max(probs[i, j, k], 1e-20)) *
label[i, j, k]
)
else:
label_xent += -np.log(max(probs[i, j, label[i, j]], 1e-20))
avgloss = label_xent / float(n * n)
return (probs, avgloss)
op = core.CreateOperator(
"SoftmaxWithLoss",
["X", "label"],
["probs", "avgloss"],
only_loss=only_loss,
label_prob=label_prob,
axis=2,
)
self.assertReferenceChecks(
device_option=gc,
op=op,
inputs=[X, label],
reference=label_softmax_crossent,
)
self.assertGradientChecks(
gc, op, [X, label], 0, [1], stepsize=1e-4, threshold=1e-2)
@unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
@given(**hu.gcs_gpu_only)
def test_softmax_with_loss_large(self, gc, dc):
np.random.seed(2603)
for n in [32]:
for D in [1000, 2000, 20000]:
# n = number of examples, D = |labels|
# Initialize X and add 1e-2 for numerical stability
X = np.random.rand(n, D).astype(np.float32)
X = X + 1e-2
# Initialize label
label = (np.random.rand(n) * D).astype(np.int32)
# Reference implementation of cross entropy with soft labels
def label_softmax_crossent(X, label):
probs = np.zeros((n, D))
rowmax = np.zeros(n)
for i in range(n):
rowmax[i] = max(X[i, ])
# We need to subtract the max to avoid numerical issues
probs[i] = X[i] - rowmax[i]
exps = np.exp(probs[i, ])
norm = sum(exps)
probs[i, ] = exps / norm
label_xent = [-np.log(max(probs[i][label[i]], 1e-20))
for i in range(n)]
avgloss = np.sum(label_xent) / float(n)
return (probs, avgloss)
op = core.CreateOperator(
"SoftmaxWithLoss",
["X", "label"],
["probs", "avgloss"]
)
self.assertReferenceChecks(
device_option=gc,
op=op,
inputs=[X, label],
reference=label_softmax_crossent,
)
@given(n=st.integers(2, 10), D=st.integers(4, 16), **hu.gcs)
@settings(deadline=1000)
def test_softmax_with_loss_label_prob(self, n, D, gc, dc):
# n = number of examples, D = |labels|
# Initialize X and add 1e-2 for numerical stability
np.random.seed(2603)
X = np.random.rand(n, D).astype(np.float32)
X = X + 1e-2
# Initialize label
label = np.random.rand(D, n).astype(np.float32)
# normalize labels to sum to 1
label /= np.sum(label, axis=0)
label = label.transpose()
# Reference implementation of cross entropy with soft labels
def label_softmax_crossent(X, label):
probs = np.zeros((n, D))
rowmax = np.zeros(n)
for i in range(n):
rowmax[i] = max(X[i, ])
# We need to subtract the max to avoid numerical issues
probs[i] = X[i] - rowmax[i]
exps = np.exp(probs[i, ])
norm = sum(exps)
probs[i, ] = exps / norm
label_xent = np.zeros(X.shape)
for i in range(n):
for j in range(D):
label_xent[i][j] = -np.log(
max(probs[i, j], 1e-20)) * label[i, j]
avgloss = np.sum(label_xent) / float(n)
return (probs, avgloss)
op = core.CreateOperator(
"SoftmaxWithLoss",
["X", "label"],
["probs", "avgloss"],
label_prob=1
)
Loading ...