Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

neilisaac / torch   python

Repository URL to install this package:

/ python / operator_test / fc_operator_test.py






from caffe2.proto import caffe2_pb2
from caffe2.python import core
from hypothesis import assume, given, settings, HealthCheck
import caffe2.python.hypothesis_test_util as hu
import caffe2.python.serialized_test.serialized_test_util as serial
import hypothesis.strategies as st
import numpy as np
import unittest


class TestFcOperator(serial.SerializedTestCase):
    def _run_test(self, n, m, k, transposed, multi_dim, dtype, engine, gc, dc):
        if dtype == np.float16:
            # fp16 only supported with CUDA/HIP
            assume(core.IsGPUDeviceType(gc.device_type))
            dc = [d for d in dc if core.IsGPUDeviceType(d.device_type)]

        if engine == 'TENSORCORE':
            # TensorCore only makes sense with CUDA
            assume(gc.device_type == caffe2_pb2.CUDA)
            # ensures TensorCore kernels can be called
            m *= 8
            k *= 8
            n *= 8

        X = np.random.rand(m, k).astype(dtype) - 0.5
        if multi_dim:
            if transposed:
                W = np.random.rand(k, n, 1, 1).astype(dtype) - 0.5
            else:
                W = np.random.rand(n, k, 1, 1).astype(dtype) - 0.5
        else:
            if transposed:
                W = np.random.rand(k, n).astype(dtype) - 0.5
            else:
                W = np.random.rand(n, k).astype(dtype) - 0.5
        b = np.random.rand(n).astype(dtype) - 0.5

        def fc_op(X, W, b):
            return [np.dot(X, W.reshape(n, k).transpose()) + b.reshape(n)]

        def fc_transposed_op(X, W, b):
            return [np.dot(X, W.reshape(k, n)) + b.reshape(n)]

        op = core.CreateOperator(
            'FCTransposed' if transposed else 'FC',
            ['X', 'W', 'b'],
            'out',
            engine=engine,
        )

        if dtype == np.float16 and core.IsGPUDeviceType(gc.device_type):
            a = caffe2_pb2.Argument()
            a.i = 1
            a.name = "float16_compute"
            op.arg.extend([a])

        # Check against numpy reference
        # ReferenceChecks is flaky on rocm with threshold of 1e-4 for fp16. Relaxing to 1e-3.
        threshold = 1e-3 if (gc.device_type == caffe2_pb2.HIP and dtype == np.float16) else 1e-4
        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[X, W, b],
            reference=fc_transposed_op if transposed else fc_op,
            threshold=threshold
        )
        # Check over multiple devices
        self.assertDeviceChecks(dc, op, [X, W, b], [0])

        # Gradient checks
        threshold = 0.5 if dtype == np.float16 else 0.005
        stepsize = 0.5 if dtype == np.float16 else 0.05
        for i in range(3):
            self.assertGradientChecks(gc, op, [X, W, b], i, [0],
                                      threshold=threshold, stepsize=stepsize)

    @settings(max_examples=50, suppress_health_check=[HealthCheck.filter_too_much])
    @serial.given(n=st.integers(1, 5),
           m=st.integers(0, 5),
           k=st.integers(1, 5),
           multi_dim=st.sampled_from([True, False]),
           dtype=st.sampled_from([np.float32, np.float16]),
           engine=st.sampled_from(['', 'TENSORCORE']),
           **hu.gcs)
    def test_fc(self, **kwargs):
        self._run_test(transposed=False, **kwargs)

    @settings(max_examples=50, suppress_health_check=[HealthCheck.filter_too_much])
    @given(n=st.integers(1, 5),
           m=st.integers(0, 5),
           k=st.integers(1, 5),
           multi_dim=st.sampled_from([True, False]),
           dtype=st.sampled_from([np.float32, np.float16]),
           engine=st.sampled_from(['', 'TENSORCORE']),
           **hu.gcs)
    def test_fc_transposed(self, **kwargs):
        self._run_test(transposed=True, **kwargs)


if __name__ == "__main__":
    import unittest
    unittest.main()