Gemfury

seanyen / tensorflow python

Repository URL to install this package:
Details
tensorflow / purelib / tensorflow / contrib / solvers / python / ops / lanczos.py
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Lanczos algorithms."""

# TODO(rmlarsen): Add implementation of symmetric Lanczos algorithm.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections

from tensorflow.contrib.solvers.python.ops import util
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import tensor_array_ops


def lanczos_bidiag(operator,
                   k,
                   orthogonalize=True,
                   starting_vector=None,
                   name="lanczos_bidiag"):
  """Computes a Lanczos bidiagonalization for a linear operator.

  Computes matrices `U` of shape `[m, k+1]`, `V` of shape `[n, k]` and lower
  bidiagonal matrix `B` of shape `[k+1, k]`, that satisfy the equations
  `A * V = U * B` and `A' * U[:, :-1] = V * B[:-1, :]'`.

  The columns of `U` are orthonormal and form a basis for the Krylov subspace
  `K(A*A', U[:,0])`.

  The columns of `V` are orthonormal and form a basis for the Krylov subspace
  `K(A'*A, A' U[:,0])`.

  Args:
    operator: An object representing a linear operator with attributes:
      - shape: Either a list of integers or a 1-D `Tensor` of type `int32` of
        length 2. `shape[0]` is the dimension on the domain of the operator,
        `shape[1]` is the dimension of the co-domain of the operator. On other
        words, if operator represents an M x N matrix A, `shape` must contain
        `[M, N]`.
      - dtype: The datatype of input to and output from `apply` and
        `apply_adjoint`.
      - apply: Callable object taking a vector `x` as input and returning a
        vector with the result of applying the operator to `x`, i.e. if
       `operator` represents matrix `A`, `apply` should return `A * x`.
      - apply_adjoint: Callable object taking a vector `x` as input and
        returning a vector with the result of applying the adjoint operator
        to `x`, i.e. if `operator` represents matrix `A`, `apply_adjoint` should
        return `conj(transpose(A)) * x`.
    k: An integer or a scalar Tensor of type `int32`. Determines the maximum
      number of steps to run. If an invariant subspace is found, the algorithm
      may terminate before `k` steps have been run.
    orthogonalize: If `True`, perform full orthogonalization. If `False` no
      orthogonalization is performed.
    starting_vector: If not null, must be a `Tensor` of shape `[n]`.
    name: A name scope for the operation.

  Returns:
    output: A namedtuple representing a Lanczos bidiagonalization of
      `operator` with attributes:
      u: A rank-2 `Tensor` of type `operator.dtype` and shape
        `[operator.shape[0], k_actual+1]`, where `k_actual` is the number of
        steps run.
      v: A rank-2 `Tensor` of type `operator.dtype` and shape
        `[operator.shape[1], k_actual]`, where `k_actual` is the number of steps
        run.
      alpha: A rank-1 `Tensor` of type `operator.dtype` and shape `[k]`.
      beta: A rank-1 `Tensor` of type `operator.dtype` and shape `[k]`.
  """

  def tarray(size, dtype, name):
    return tensor_array_ops.TensorArray(
        dtype=dtype, size=size, tensor_array_name=name, clear_after_read=False)

  # Reads a row-vector at location i in tarray and returns it as a
  # column-vector.
  def read_colvec(tarray, i):
    return array_ops.expand_dims(tarray.read(i), -1)

  # Writes an column-vector as a row-vecor at location i in tarray.
  def write_colvec(tarray, colvec, i):
    return tarray.write(i, array_ops.squeeze(colvec))

  # Ephemeral class holding Lanczos bidiagonalization state:
  #   u = left Lanczos vectors
  #   v = right Lanczos vectors
  #   alpha = diagonal of B_k.
  #   beta = subdiagonal of B_k.
  # Notice that we store the left and right Lanczos vectors as the _rows_
  # of u and v. This is done because tensors are stored row-major and
  # TensorArray only supports packing along dimension 0.
  lanzcos_bidiag_state = collections.namedtuple("LanczosBidiagState",
                                                ["u", "v", "alpha", "beta"])

  def update_state(old, i, u, v, alpha, beta):
    return lanzcos_bidiag_state(
        write_colvec(old.u, u, i + 1),
        write_colvec(old.v, v, i),
        old.alpha.write(i, alpha), old.beta.write(i, beta))

  def gram_schmidt_step(j, basis, v):
    """Makes v orthogonal to the j'th vector in basis."""
    v_shape = v.get_shape()
    basis_vec = read_colvec(basis, j)
    v -= math_ops.matmul(basis_vec, v, adjoint_a=True) * basis_vec
    v.set_shape(v_shape)
    return j + 1, basis, v

  def orthogonalize_once(i, basis, v):
    j = constant_op.constant(0, dtype=dtypes.int32)
    _, _, v = control_flow_ops.while_loop(lambda j, basis, v: j < i,
                                          gram_schmidt_step, [j, basis, v])
    return util.l2normalize(v)

  # Iterated modified Gram-Schmidt orthogonalization adapted from PROPACK.
  # TODO(rmlarsen): This is possibly the slowest implementation of
  # iterated Gram-Schmidt orthogonalization since the abacus. Move to C++.
  def orthogonalize_(i, basis, v):
    v_norm = util.l2norm(v)
    v_new, v_new_norm = orthogonalize_once(i, basis, v)
    # If the norm decreases more than 1/sqrt(2), run a second
    # round of MGS. See proof in:
    #   B. N. Parlett, ``The Symmetric Eigenvalue Problem'',
    #   Prentice-Hall, Englewood Cliffs, NJ, 1980. pp. 105-109
    return control_flow_ops.cond(v_new_norm < 0.7071 * v_norm,
                                 lambda: orthogonalize_once(i, basis, v),
                                 lambda: (v_new, v_new_norm))

  def stopping_criterion(i, _):
    # TODO(rmlarsen): Stop if an invariant subspace is detected.
    return i < k

  def lanczos_bidiag_step(i, ls):
    """Extends the Lanczos bidiagonalization ls by one step."""
    u = read_colvec(ls.u, i)
    r = operator.apply_adjoint(u)
    # The shape inference doesn't work across cond, save and reapply the shape.
    r_shape = r.get_shape()
    r = control_flow_ops.cond(
        i > 0, lambda: r - ls.beta.read(i - 1) * read_colvec(ls.v, i - 1),
        lambda: r)
    r.set_shape(r_shape)
    if orthogonalize:
      v, alpha = orthogonalize_(i - 1, ls.v, r)
    else:
      v, alpha = util.l2normalize(r)
    p = operator.apply(v) - alpha * u
    if orthogonalize:
      u, beta = orthogonalize_(i, ls.u, p)
    else:
      u, beta = util.l2normalize(p)

    return i + 1, update_state(ls, i, u, v, alpha, beta)

  with ops.name_scope(name):
    dtype = operator.dtype
    if starting_vector is None:
      starting_vector = random_ops.random_uniform(
          operator.shape[:1], -1, 1, dtype=dtype)
    u0, _ = util.l2normalize(starting_vector)
    ls = lanzcos_bidiag_state(
        u=write_colvec(tarray(k + 1, dtype, "u"), u0, 0),
        v=tarray(k, dtype, "v"),
        alpha=tarray(k, dtype, "alpha"),
        beta=tarray(k, dtype, "beta"))
    i = constant_op.constant(0, dtype=dtypes.int32)
    _, ls = control_flow_ops.while_loop(stopping_criterion, lanczos_bidiag_step,
                                        [i, ls])
    return lanzcos_bidiag_state(
        array_ops.matrix_transpose(ls.u.stack()),
        array_ops.matrix_transpose(ls.v.stack()),
        ls.alpha.stack(), ls.beta.stack())


# TODO(rmlarsen): Implement C++ ops for handling bidiagonal matrices
# efficiently. Such a module should provide
#    - multiplication,
#    - linear system solution by back-substitution,
#    - QR factorization,
#    - SVD.
def bidiag_matmul(matrix, alpha, beta, adjoint_b=False, name="bidiag_matmul"):
  """Multiplies a matrix by a bidiagonal matrix.

  alpha and beta are length k vectors representing the diagonal and first lower
  subdiagonal of (K+1) x K matrix B.
  If adjoint_b is False, computes A * B as follows:

    A * B =  A[:, :-1] * diag(alpha) + A[:, 1:] * diag(beta)

  If adjoint_b is True, computes A * B[:-1, :]' as follows

    A * B[:-1, :]' =
      A * diag(alpha) + [zeros(m,1), A[:, :-1] * diag(beta[:-1])]

  Args:
    matrix: A rank-2 `Tensor` representing matrix A.
    alpha: A rank-1 `Tensor` representing the diagonal of B.
    beta: A rank-1 `Tensor` representing the lower subdiagonal diagonal of B.
    adjoint_b: `bool` determining what to compute.
    name: A name scope for the operation.

  Returns:
    If `adjoint_b` is False the `A * B` is returned.
    If `adjoint_b` is True the `A * B'` is returned.
  """
  with ops.name_scope(name):
    alpha = array_ops.expand_dims(alpha, 0)
    if adjoint_b is False:
      beta = array_ops.expand_dims(beta, 0)
      return matrix[:, :-1] * alpha + matrix[:, 1:] * beta
    else:
      beta = array_ops.expand_dims(beta[:-1], 0)
      shape = array_ops.shape(matrix)
      zero_column = array_ops.expand_dims(
          array_ops.zeros(
              shape[:1], dtype=matrix.dtype), 1)
      return matrix * alpha + array_ops.concat(
          [zero_column, matrix[:, :-1] * beta], 1)
seanyen / tensorflow python

Products

About

Resources

Contact Gemfury