StableNorm.h 6.6 KB
Newer Older
LM's avatar
LM committed
1 2 3 4 5
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
//
Don Gagne's avatar
Don Gagne committed
6 7 8
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
LM's avatar
LM committed
9 10 11 12

#ifndef EIGEN_STABLENORM_H
#define EIGEN_STABLENORM_H

Don Gagne's avatar
Don Gagne committed
13 14
namespace Eigen { 

LM's avatar
LM committed
15
namespace internal {
Don Gagne's avatar
Don Gagne committed
16

LM's avatar
LM committed
17 18 19
template<typename ExpressionType, typename Scalar>
inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
{
20 21 22 23
  using std::max;
  Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
  
  if (maxCoeff>scale)
LM's avatar
LM committed
24
  {
25 26 27 28 29 30 31 32 33 34 35 36
    ssq = ssq * numext::abs2(scale/maxCoeff);
    Scalar tmp = Scalar(1)/maxCoeff;
    if(tmp > NumTraits<Scalar>::highest())
    {
      invScale = NumTraits<Scalar>::highest();
      scale = Scalar(1)/invScale;
    }
    else
    {
      scale = maxCoeff;
      invScale = tmp;
    }
LM's avatar
LM committed
37
  }
38 39
  
  // TODO if the maxCoeff is much much smaller than the current scale,
LM's avatar
LM committed
40
  // then we can neglect this sub vector
41 42
  if(scale>Scalar(0)) // if scale==0, then bl is 0 
    ssq += (bl*invScale).squaredNorm();
LM's avatar
LM committed
43 44 45
}

template<typename Derived>
Don Gagne's avatar
Don Gagne committed
46 47
inline typename NumTraits<typename traits<Derived>::Scalar>::Real
blueNorm_impl(const EigenBase<Derived>& _vec)
LM's avatar
LM committed
48
{
Don Gagne's avatar
Don Gagne committed
49 50
  typedef typename Derived::RealScalar RealScalar;  
  typedef typename Derived::Index Index;
LM's avatar
LM committed
51 52 53
  using std::pow;
  using std::min;
  using std::max;
Don Gagne's avatar
Don Gagne committed
54 55 56 57
  using std::sqrt;
  using std::abs;
  const Derived& vec(_vec.derived());
  static bool initialized = false;
LM's avatar
LM committed
58
  static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
Don Gagne's avatar
Don Gagne committed
59
  if(!initialized)
LM's avatar
LM committed
60
  {
Don Gagne's avatar
Don Gagne committed
61 62
    int ibeta, it, iemin, iemax, iexp;
    RealScalar eps;
LM's avatar
LM committed
63
    // This program calculates the machine-dependent constants
Don Gagne's avatar
Don Gagne committed
64
    // bl, b2, slm, s2m, relerr overfl
LM's avatar
LM committed
65 66 67 68 69 70
    // from the "basic" machine-dependent numbers
    // nbig, ibeta, it, iemin, iemax, rbig.
    // The following define the basic machine-dependent constants.
    // For portability, the PORT subprograms "ilmaeh" and "rlmach"
    // are used. For any specific computer, each of the assignment
    // statements can be replaced
Don Gagne's avatar
Don Gagne committed
71 72 73 74 75
    ibeta = std::numeric_limits<RealScalar>::radix;                 // base for floating-point numbers
    it    = std::numeric_limits<RealScalar>::digits;                // number of base-beta digits in mantissa
    iemin = std::numeric_limits<RealScalar>::min_exponent;          // minimum exponent
    iemax = std::numeric_limits<RealScalar>::max_exponent;          // maximum exponent
    rbig  = (std::numeric_limits<RealScalar>::max)();               // largest floating-point number
LM's avatar
LM committed
76 77

    iexp  = -((1-iemin)/2);
Don Gagne's avatar
Don Gagne committed
78
    b1    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // lower boundary of midrange
LM's avatar
LM committed
79
    iexp  = (iemax + 1 - it)/2;
Don Gagne's avatar
Don Gagne committed
80
    b2    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // upper boundary of midrange
LM's avatar
LM committed
81 82

    iexp  = (2-iemin)/2;
Don Gagne's avatar
Don Gagne committed
83
    s1m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for lower range
LM's avatar
LM committed
84
    iexp  = - ((iemax+it)/2);
Don Gagne's avatar
Don Gagne committed
85
    s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for upper range
LM's avatar
LM committed
86

Don Gagne's avatar
Don Gagne committed
87
    overfl  = rbig*s2m;                                             // overflow boundary for abig
LM's avatar
LM committed
88
    eps     = RealScalar(pow(double(ibeta), 1-it));
Don Gagne's avatar
Don Gagne committed
89 90
    relerr  = sqrt(eps);                                            // tolerance for neglecting asml
    initialized = true;
LM's avatar
LM committed
91
  }
Don Gagne's avatar
Don Gagne committed
92
  Index n = vec.size();
LM's avatar
LM committed
93 94 95 96
  RealScalar ab2 = b2 / RealScalar(n);
  RealScalar asml = RealScalar(0);
  RealScalar amed = RealScalar(0);
  RealScalar abig = RealScalar(0);
Don Gagne's avatar
Don Gagne committed
97
  for(typename Derived::InnerIterator it(vec, 0); it; ++it)
LM's avatar
LM committed
98
  {
Don Gagne's avatar
Don Gagne committed
99 100 101 102
    RealScalar ax = abs(it.value());
    if(ax > ab2)     abig += numext::abs2(ax*s2m);
    else if(ax < b1) asml += numext::abs2(ax*s1m);
    else             amed += numext::abs2(ax);
LM's avatar
LM committed
103 104 105
  }
  if(abig > RealScalar(0))
  {
Don Gagne's avatar
Don Gagne committed
106
    abig = sqrt(abig);
LM's avatar
LM committed
107 108 109 110 111 112 113
    if(abig > overfl)
    {
      return rbig;
    }
    if(amed > RealScalar(0))
    {
      abig = abig/s2m;
Don Gagne's avatar
Don Gagne committed
114
      amed = sqrt(amed);
LM's avatar
LM committed
115 116 117 118 119 120 121 122
    }
    else
      return abig/s2m;
  }
  else if(asml > RealScalar(0))
  {
    if (amed > RealScalar(0))
    {
Don Gagne's avatar
Don Gagne committed
123 124
      abig = sqrt(amed);
      amed = sqrt(asml) / s1m;
LM's avatar
LM committed
125 126
    }
    else
Don Gagne's avatar
Don Gagne committed
127
      return sqrt(asml)/s1m;
LM's avatar
LM committed
128 129
  }
  else
Don Gagne's avatar
Don Gagne committed
130 131 132
    return sqrt(amed);
  asml = (min)(abig, amed);
  abig = (max)(abig, amed);
LM's avatar
LM committed
133 134 135
  if(asml <= abig*relerr)
    return abig;
  else
Don Gagne's avatar
Don Gagne committed
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
    return abig * sqrt(RealScalar(1) + numext::abs2(asml/abig));
}

} // end namespace internal

/** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
  * This version use a blockwise two passes algorithm:
  *  1 - find the absolute largest coefficient \c s
  *  2 - compute \f$ s \Vert \frac{*this}{s} \Vert \f$ in a standard way
  *
  * For architecture/scalar types supporting vectorization, this version
  * is faster than blueNorm(). Otherwise the blueNorm() is much faster.
  *
  * \sa norm(), blueNorm(), hypotNorm()
  */
template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::stableNorm() const
{
  using std::min;
  using std::sqrt;
  const Index blockSize = 4096;
  RealScalar scale(0);
  RealScalar invScale(1);
  RealScalar ssq(0); // sum of square
  enum {
    Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
  };
  Index n = size();
  Index bi = internal::first_aligned(derived());
  if (bi>0)
    internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
  for (; bi<n; bi+=blockSize)
    internal::stable_norm_kernel(this->segment(bi,(min)(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
  return scale * sqrt(ssq);
}

/** \returns the \em l2 norm of \c *this using the Blue's algorithm.
  * A Portable Fortran Program to Find the Euclidean Norm of a Vector,
  * ACM TOMS, Vol 4, Issue 1, 1978.
  *
  * For architecture/scalar types without vectorization, this version
  * is much faster than stableNorm(). Otherwise the stableNorm() is faster.
  *
  * \sa norm(), stableNorm(), hypotNorm()
  */
template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::blueNorm() const
{
  return internal::blueNorm_impl(*this);
LM's avatar
LM committed
187 188 189 190 191 192 193 194 195 196 197 198 199 200
}

/** \returns the \em l2 norm of \c *this avoiding undeflow and overflow.
  * This version use a concatenation of hypot() calls, and it is very slow.
  *
  * \sa norm(), stableNorm()
  */
template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::hypotNorm() const
{
  return this->cwiseAbs().redux(internal::scalar_hypot_op<RealScalar>());
}

Don Gagne's avatar
Don Gagne committed
201 202
} // end namespace Eigen

LM's avatar
LM committed
203
#endif // EIGEN_STABLENORM_H