from caffe2.proto import caffe2_pb2
import caffe2.python.optimizer as optimizer
from caffe2.python.optimizer import (
build_sgd, build_multi_precision_sgd, build_ftrl, build_gftrl, build_wngrad,
build_adagrad, build_adadelta, build_adam, build_yellowfin, build_rms_prop,
build_storm, add_weight_decay, SgdOptimizer)
from caffe2.python.optimizer_context import UseOptimizer
from caffe2.python.optimizer_test_util import (
OptimizerTestBase, LRModificationTestBase
)
from caffe2.python import core, workspace
from caffe2.python.test_util import TestCase
import numpy as np
from numpy.testing import assert_allclose, assert_equal
import math
import unittest
class TestLars(OptimizerTestBase, TestCase):
def testSparse(self):
raise unittest.SkipTest("no sparse support")
def build_optimizer(self, model, **kwargs):
self._skip_gpu = False
return build_sgd(model, base_learning_rate=0.1, lars=0.5, **kwargs)
def check_optimizer(self, optimizer):
self.assertTrue(optimizer.get_auxiliary_parameters().shared)
self.assertFalse(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().shared:
tensor = workspace.FetchBlob(param)
np.testing.assert_allclose(np.array([1.0]), tensor, atol=1e-5)
class TestMomentumSgd(OptimizerTestBase, TestCase):
def build_optimizer(self, model, **kwargs):
self._skip_gpu = False
return build_sgd(model, base_learning_rate=0.1, momentum=0.1, **kwargs)
def check_optimizer(self, optimizer):
self.assertTrue(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().shared:
tensor = workspace.FetchBlob(param)
np.testing.assert_allclose(np.array([1.0]), tensor, atol=1e-5)
class TestSgd(OptimizerTestBase, LRModificationTestBase, TestCase):
def build_optimizer(self, model, **kwargs):
self._skip_gpu = False
return build_sgd(model, base_learning_rate=0.1, **kwargs)
def check_optimizer(self, optimizer):
self.assertTrue(optimizer.get_auxiliary_parameters().shared)
self.assertFalse(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().shared:
tensor = workspace.FetchBlob(param)
np.testing.assert_allclose(np.array([1.0]), tensor, atol=1e-5)
class TestMultiPrecisionSgd(
OptimizerTestBase, LRModificationTestBase, TestCase
):
def build_optimizer(self, model, **kwargs):
self._skip_gpu = False
return build_multi_precision_sgd(
model, base_learning_rate=0.1, **kwargs
)
def check_optimizer(self, optimizer):
self.assertTrue(optimizer.get_auxiliary_parameters().shared)
self.assertFalse(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().shared:
tensor = workspace.FetchBlob(param)
np.testing.assert_allclose(np.array([1.0]), tensor, atol=1e-5)
@unittest.skipIf(not workspace.has_gpu_support, "No GPU support")
def testGPUDense(self):
super(TestMultiPrecisionSgd, self).testGPUDense(core.DataType.FLOAT16)
class TestFtrl(OptimizerTestBase, TestCase):
def build_optimizer(self, model, **kwargs):
self._skip_gpu = True
return build_ftrl(
model,
engine=None,
alpha=1.0,
beta=0.1,
lambda1=0.0,
lambda2=0.0,
**kwargs
)
def check_optimizer(self, optimizer):
self.assertFalse(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
class TestGFtrl(OptimizerTestBase, TestCase):
def testSparse(self):
raise unittest.SkipTest("no sparse support")
def build_optimizer(self, model, **kwargs):
self._skip_gpu = True
return build_gftrl(
model,
engine=None,
alpha=1.0,
beta=0.1,
lambda1=0.0,
lambda2=0.0,
**kwargs
)
def check_optimizer(self, optimizer):
self.assertFalse(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
class TestAdagrad(OptimizerTestBase, LRModificationTestBase, TestCase):
def build_optimizer(self, model, **kwargs):
self._skip_gpu = False
return build_adagrad(model, base_learning_rate=1.0, lars=0.5, **kwargs)
def check_optimizer(self, optimizer):
self.assertFalse(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
class TestRowWiseAdagrad(OptimizerTestBase, TestCase):
def build_optimizer(self, model, **kwargs):
self._skip_gpu = True
return build_adagrad(
model, base_learning_rate=1.0, lars=0.5, rowWise=True, **kwargs
)
def check_optimizer(self, optimizer):
self.assertFalse(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
def testDense(self):
raise unittest.SkipTest("no dense support")
def testGPUDense(self):
raise unittest.SkipTest("no dense support")
class TestRowWiseAdagradWithCounter(OptimizerTestBase, TestCase):
def build_optimizer(self, model, **kwargs):
self._skip_gpu = True
return build_adagrad(
model,
base_learning_rate=1.0,
lars=0.5,
rowWise=True,
counter_halflife=5,
**kwargs
)
def check_optimizer(self, optimizer):
self.assertTrue(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
self.assertTrue(workspace.HasBlob("optimizer_iteration"))
iteration_tensor = workspace.FetchBlob("optimizer_iteration")
np.testing.assert_allclose(np.array([2000]),
iteration_tensor,
atol=1e-5)
for param in optimizer.get_auxiliary_parameters().shared:
workspace.FetchBlob(param)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
def testDense(self):
raise unittest.SkipTest("no dense support")
def testGPUDense(self):
raise unittest.SkipTest("no dense support")
class TestWngrad(OptimizerTestBase, LRModificationTestBase, TestCase):
def build_optimizer(self, model, **kwargs):
self._skip_gpu = True
return build_wngrad(model, base_learning_rate=25.0, **kwargs)
def check_optimizer(self, optimizer):
self.assertFalse(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
class TestStorm(OptimizerTestBase, LRModificationTestBase, TestCase):
def build_optimizer(self, model, **kwargs):
self._skip_gpu = True
return build_storm(model, base_learning_rate=2.0, **kwargs)
def check_optimizer(self, optimizer):
self.assertFalse(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
class TestAdadelta(OptimizerTestBase, LRModificationTestBase, TestCase):
def build_optimizer(self, model, **kwargs):
self._skip_gpu = False
return build_adadelta(model, base_learning_rate=1.0, decay=0.995, **kwargs)
def check_optimizer(self, optimizer):
self.assertFalse(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
class TestAdam(OptimizerTestBase, LRModificationTestBase, TestCase):
def build_optimizer(self, model, **kwargs):
self._skip_gpu = False
return build_adam(model, base_learning_rate=0.1, **kwargs)
def check_optimizer(self, optimizer):
self.assertTrue(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
self.assertTrue(workspace.HasBlob("optimizer_iteration"))
iteration_tensor = workspace.FetchBlob("optimizer_iteration")
np.testing.assert_allclose(np.array([2000]),
iteration_tensor,
atol=1e-5)
for param in optimizer.get_auxiliary_parameters().shared:
workspace.FetchBlob(param)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
class TestSparseRAdam(OptimizerTestBase, LRModificationTestBase, TestCase):
def build_optimizer(self, model, **kwargs):
self._skip_gpu = True
return build_adam(model, base_learning_rate=0.1, enableRAdam=True, **kwargs)
def check_optimizer(self, optimizer):
self.assertTrue(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
self.assertTrue(workspace.HasBlob("optimizer_iteration"))
iteration_tensor = workspace.FetchBlob("optimizer_iteration")
np.testing.assert_allclose(np.array([2000]),
iteration_tensor,
atol=1e-5)
for param in optimizer.get_auxiliary_parameters().shared:
workspace.FetchBlob(param)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
class TestYellowFin(OptimizerTestBase, TestCase):
# YellowFin: An automatic tuner for momentum SGD
# (https://arxiv.org/abs/1706.03471)
def build_optimizer(self, model):
self._skip_gpu = False
return build_yellowfin(model, base_learning_rate=0.1)
def check_optimizer(self, optimizer):
self.assertTrue(optimizer.get_auxiliary_parameters().shared)
self.assertTrue(optimizer.get_auxiliary_parameters().local)
self.assertTrue(workspace.HasBlob("optimizer_iteration"))
iteration_tensor = workspace.FetchBlob("optimizer_iteration")
np.testing.assert_allclose(np.array([2000]),
iteration_tensor,
atol=1e-5)
for param in optimizer.get_auxiliary_parameters().shared:
workspace.FetchBlob(param)
for param in optimizer.get_auxiliary_parameters().local:
workspace.FetchBlob(param)
def testSparse(self):
raise unittest.SkipTest("no sparse support")
def deb(self, val, beta, i, zero_debias):
if zero_debias:
return val / (1.0 - beta ** i)
else:
return val
def get_lr_mu(self, distance, grad_var, h_min, h_max):
# First tune based on dynamic range
if grad_var == 0:
dr = h_max / h_min
mu = ((np.sqrt(dr) - 1) / (np.sqrt(dr) + 1)) ** 2
lr_min = (1 + np.sqrt(mu)) ** 2 / h_max
return lr_min, mu
p = distance ** 2 * h_min ** 2 / 2 / grad_var
w3 = (-math.sqrt(p * p + 4.0 / 27.0 * p * p * p) - p) / 2.0
w = (1.0 if w3 > 0.0 else -1.0) * math.pow(math.fabs(w3), 1.0 / 3.0)
y = w - p / 3.0 / w
root = y + 1
root = min(root, 1.0 - 1e-6)
dr = h_max / h_min
mu = max(((np.sqrt(dr) - 1) / (np.sqrt(dr) + 1)) ** 2, root**2)
lr_min = (1 - np.sqrt(mu)) ** 2 / h_min
return lr_min, mu
def caffe2_yellowfin(self, zero_debias, grad_coef, n_dim, n_iter, gpu):
caffe2_res = {}
alpha = 1.0
mu = 0.0
beta = 0.999
curv_win_width = 20
epsilon = 1e-6
net = core.Net("net")
param_init_net = core.Net("param_init_net")
workspace.ResetWorkspace()
with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
iteration = param_init_net.ConstantFill(
[],
"iteration",
shape=[1],
value=0,
dtype=core.DataType.INT64)
iter_mutex = param_init_net.CreateMutex([], ["iteration_mutex"])
net.AtomicIter([iter_mutex, iteration], [iteration])
pre_grad = param_init_net.ConstantFill(
[],
"pre_grad",
shape=[n_dim],
value=grad_coef
)
if gpu:
iteration = net.CopyCPUToGPU(
[iteration],
"iteration_cpu"
)
iteration_float = net.Cast([iteration], "iteration_float")
Loading ...