Redux.h 17.4 KB
Newer Older
LM's avatar
LM committed
1 2 3 4 5 6
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
Don Gagne's avatar
Don Gagne committed
7 8 9
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
LM's avatar
LM committed
10 11 12 13

#ifndef EIGEN_REDUX_H
#define EIGEN_REDUX_H

Don Gagne's avatar
Don Gagne committed
14 15
namespace Eigen { 

LM's avatar
LM committed
16 17 18 19 20 21 22 23 24 25 26 27 28 29
namespace internal {

// TODO
//  * implement other kind of vectorization
//  * factorize code

/***************************************************************************
* Part 1 : the logic deciding a strategy for vectorization and unrolling
***************************************************************************/

template<typename Func, typename Derived>
struct redux_traits
{
public:
30
    typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType;
LM's avatar
LM committed
31
  enum {
32
    PacketSize = unpacket_traits<PacketType>::size,
LM's avatar
LM committed
33 34 35 36 37 38 39 40
    InnerMaxSize = int(Derived::IsRowMajor)
                 ? Derived::MaxColsAtCompileTime
                 : Derived::MaxRowsAtCompileTime
  };

  enum {
    MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
                  && (functor_traits<Func>::PacketAccess),
41 42
    MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit),
    MaySliceVectorize  = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
LM's avatar
LM committed
43 44 45 46 47 48 49 50 51 52 53
  };

public:
  enum {
    Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
              : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
                                        : int(DefaultTraversal)
  };

public:
  enum {
54 55
    Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
         : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
LM's avatar
LM committed
56 57 58 59 60
    UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
  };

public:
  enum {
61
    Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
LM's avatar
LM committed
62
  };
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
  
#ifdef EIGEN_DEBUG_ASSIGN
  static void debug()
  {
    std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
    std::cerr.setf(std::ios::hex, std::ios::basefield);
    EIGEN_DEBUG_VAR(Derived::Flags)
    std::cerr.unsetf(std::ios::hex);
    EIGEN_DEBUG_VAR(InnerMaxSize)
    EIGEN_DEBUG_VAR(PacketSize)
    EIGEN_DEBUG_VAR(MightVectorize)
    EIGEN_DEBUG_VAR(MayLinearVectorize)
    EIGEN_DEBUG_VAR(MaySliceVectorize)
    EIGEN_DEBUG_VAR(Traversal)
    EIGEN_DEBUG_VAR(UnrollingLimit)
    EIGEN_DEBUG_VAR(Unrolling)
    std::cerr << std::endl;
  }
#endif
LM's avatar
LM committed
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
};

/***************************************************************************
* Part 2 : unrollers
***************************************************************************/

/*** no vectorization ***/

template<typename Func, typename Derived, int Start, int Length>
struct redux_novec_unroller
{
  enum {
    HalfLength = Length/2
  };

  typedef typename Derived::Scalar Scalar;

99
  EIGEN_DEVICE_FUNC
Don Gagne's avatar
Don Gagne committed
100
  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
LM's avatar
LM committed
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
  {
    return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
                redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
  }
};

template<typename Func, typename Derived, int Start>
struct redux_novec_unroller<Func, Derived, Start, 1>
{
  enum {
    outer = Start / Derived::InnerSizeAtCompileTime,
    inner = Start % Derived::InnerSizeAtCompileTime
  };

  typedef typename Derived::Scalar Scalar;

117
  EIGEN_DEVICE_FUNC
Don Gagne's avatar
Don Gagne committed
118
  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
LM's avatar
LM committed
119 120 121 122 123 124 125 126 127 128 129 130
  {
    return mat.coeffByOuterInner(outer, inner);
  }
};

// This is actually dead code and will never be called. It is required
// to prevent false warnings regarding failed inlining though
// for 0 length run() will never be called at all.
template<typename Func, typename Derived, int Start>
struct redux_novec_unroller<Func, Derived, Start, 0>
{
  typedef typename Derived::Scalar Scalar;
131
  EIGEN_DEVICE_FUNC 
Don Gagne's avatar
Don Gagne committed
132
  static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
LM's avatar
LM committed
133 134 135 136 137 138 139 140
};

/*** vectorization ***/

template<typename Func, typename Derived, int Start, int Length>
struct redux_vec_unroller
{
  enum {
141
    PacketSize = redux_traits<Func, Derived>::PacketSize,
LM's avatar
LM committed
142 143 144 145
    HalfLength = Length/2
  };

  typedef typename Derived::Scalar Scalar;
146
  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
LM's avatar
LM committed
147

Don Gagne's avatar
Don Gagne committed
148
  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
LM's avatar
LM committed
149 150 151 152 153 154 155 156 157 158 159
  {
    return func.packetOp(
            redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
            redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
  }
};

template<typename Func, typename Derived, int Start>
struct redux_vec_unroller<Func, Derived, Start, 1>
{
  enum {
160
    index = Start * redux_traits<Func, Derived>::PacketSize,
LM's avatar
LM committed
161 162
    outer = index / int(Derived::InnerSizeAtCompileTime),
    inner = index % int(Derived::InnerSizeAtCompileTime),
163
    alignment = Derived::Alignment
LM's avatar
LM committed
164 165 166
  };

  typedef typename Derived::Scalar Scalar;
167
  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
LM's avatar
LM committed
168

Don Gagne's avatar
Don Gagne committed
169
  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
LM's avatar
LM committed
170
  {
171
    return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
LM's avatar
LM committed
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
  }
};

/***************************************************************************
* Part 3 : implementation of all cases
***************************************************************************/

template<typename Func, typename Derived,
         int Traversal = redux_traits<Func, Derived>::Traversal,
         int Unrolling = redux_traits<Func, Derived>::Unrolling
>
struct redux_impl;

template<typename Func, typename Derived>
struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
{
  typedef typename Derived::Scalar Scalar;
189 190
  EIGEN_DEVICE_FUNC
  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
LM's avatar
LM committed
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
  {
    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
    Scalar res;
    res = mat.coeffByOuterInner(0, 0);
    for(Index i = 1; i < mat.innerSize(); ++i)
      res = func(res, mat.coeffByOuterInner(0, i));
    for(Index i = 1; i < mat.outerSize(); ++i)
      for(Index j = 0; j < mat.innerSize(); ++j)
        res = func(res, mat.coeffByOuterInner(i, j));
    return res;
  }
};

template<typename Func, typename Derived>
struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
  : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
{};

template<typename Func, typename Derived>
struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
{
  typedef typename Derived::Scalar Scalar;
213
  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
LM's avatar
LM committed
214

215
  static Scalar run(const Derived &mat, const Func& func)
LM's avatar
LM committed
216 217
  {
    const Index size = mat.size();
218 219 220
    
    const Index packetSize = redux_traits<Func, Derived>::PacketSize;
    const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
LM's avatar
LM committed
221
    enum {
222 223
      alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
      alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
LM's avatar
LM committed
224
    };
225
    const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
Don Gagne's avatar
Don Gagne committed
226 227 228 229
    const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
    const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
    const Index alignedEnd2 = alignedStart + alignedSize2;
    const Index alignedEnd  = alignedStart + alignedSize;
LM's avatar
LM committed
230 231 232
    Scalar res;
    if(alignedSize)
    {
233
      PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
Don Gagne's avatar
Don Gagne committed
234 235
      if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
      {
236
        PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
Don Gagne's avatar
Don Gagne committed
237 238
        for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
        {
239 240
          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
          packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
Don Gagne's avatar
Don Gagne committed
241 242 243 244
        }

        packet_res0 = func.packetOp(packet_res0,packet_res1);
        if(alignedEnd>alignedEnd2)
245
          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
Don Gagne's avatar
Don Gagne committed
246 247
      }
      res = func.predux(packet_res0);
LM's avatar
LM committed
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266

      for(Index index = 0; index < alignedStart; ++index)
        res = func(res,mat.coeff(index));

      for(Index index = alignedEnd; index < size; ++index)
        res = func(res,mat.coeff(index));
    }
    else // too small to vectorize anything.
         // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
    {
      res = mat.coeff(0);
      for(Index index = 1; index < size; ++index)
        res = func(res,mat.coeff(index));
    }

    return res;
  }
};

267 268 269
// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
template<typename Func, typename Derived, int Unrolling>
struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
LM's avatar
LM committed
270 271
{
  typedef typename Derived::Scalar Scalar;
272
  typedef typename redux_traits<Func, Derived>::PacketType PacketType;
LM's avatar
LM committed
273

274
  EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
LM's avatar
LM committed
275 276 277 278 279
  {
    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
    const Index innerSize = mat.innerSize();
    const Index outerSize = mat.outerSize();
    enum {
280
      packetSize = redux_traits<Func, Derived>::PacketSize
LM's avatar
LM committed
281 282 283 284 285
    };
    const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
    Scalar res;
    if(packetedInnerSize)
    {
286
      PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
LM's avatar
LM committed
287 288
      for(Index j=0; j<outerSize; ++j)
        for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
289
          packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
LM's avatar
LM committed
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309

      res = func.predux(packet_res);
      for(Index j=0; j<outerSize; ++j)
        for(Index i=packetedInnerSize; i<innerSize; ++i)
          res = func(res, mat.coeffByOuterInner(j,i));
    }
    else // too small to vectorize anything.
         // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
    {
      res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
    }

    return res;
  }
};

template<typename Func, typename Derived>
struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
{
  typedef typename Derived::Scalar Scalar;
310 311

  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
LM's avatar
LM committed
312
  enum {
313
    PacketSize = redux_traits<Func, Derived>::PacketSize,
LM's avatar
LM committed
314 315 316
    Size = Derived::SizeAtCompileTime,
    VectorizedSize = (Size / PacketSize) * PacketSize
  };
317
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
LM's avatar
LM committed
318 319
  {
    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
320 321 322 323 324 325 326 327 328
    if (VectorizedSize > 0) {
      Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
      if (VectorizedSize != Size)
        res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
      return res;
    }
    else {
      return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
    }
LM's avatar
LM committed
329 330 331
  }
};

332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
// evaluator adaptor
template<typename _XprType>
class redux_evaluator
{
public:
  typedef _XprType XprType;
  EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
  
  typedef typename XprType::Scalar Scalar;
  typedef typename XprType::CoeffReturnType CoeffReturnType;
  typedef typename XprType::PacketScalar PacketScalar;
  typedef typename XprType::PacketReturnType PacketReturnType;
  
  enum {
    MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
    // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
    Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
    IsRowMajor = XprType::IsRowMajor,
    SizeAtCompileTime = XprType::SizeAtCompileTime,
    InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
    CoeffReadCost = evaluator<XprType>::CoeffReadCost,
    Alignment = evaluator<XprType>::Alignment
  };
  
  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
  EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
  EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
  EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }

  EIGEN_DEVICE_FUNC
  CoeffReturnType coeff(Index row, Index col) const
  { return m_evaluator.coeff(row, col); }

  EIGEN_DEVICE_FUNC
  CoeffReturnType coeff(Index index) const
  { return m_evaluator.coeff(index); }

  template<int LoadMode, typename PacketType>
  PacketType packet(Index row, Index col) const
  { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }

  template<int LoadMode, typename PacketType>
  PacketType packet(Index index) const
  { return m_evaluator.template packet<LoadMode,PacketType>(index); }
  
  EIGEN_DEVICE_FUNC
  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
  { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
  
  template<int LoadMode, typename PacketType>
  PacketType packetByOuterInner(Index outer, Index inner) const
  { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
  
  const XprType & nestedExpression() const { return m_xpr; }
  
protected:
  internal::evaluator<XprType> m_evaluator;
  const XprType &m_xpr;
};

LM's avatar
LM committed
394 395 396 397 398 399 400 401 402 403
} // end namespace internal

/***************************************************************************
* Part 4 : public API
***************************************************************************/


/** \returns the result of a full redux operation on the whole matrix or vector using \a func
  *
  * The template parameter \a BinaryOp is the type of the functor \a func which must be
404
  * an associative operator. Both current C++98 and C++11 functor styles are handled.
LM's avatar
LM committed
405 406 407 408 409
  *
  * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
  */
template<typename Derived>
template<typename Func>
410
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
LM's avatar
LM committed
411 412
DenseBase<Derived>::redux(const Func& func) const
{
413 414 415 416 417 418
  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");

  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
  ThisEvaluator thisEval(derived());
  
  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
LM's avatar
LM committed
419 420
}

Don Gagne's avatar
Don Gagne committed
421 422
/** \returns the minimum of all coefficients of \c *this.
  * \warning the result is undefined if \c *this contains NaN.
LM's avatar
LM committed
423 424 425 426 427
  */
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff() const
{
428
  return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
LM's avatar
LM committed
429 430
}

Don Gagne's avatar
Don Gagne committed
431 432
/** \returns the maximum of all coefficients of \c *this.
  * \warning the result is undefined if \c *this contains NaN.
LM's avatar
LM committed
433 434 435 436 437
  */
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff() const
{
438
  return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
LM's avatar
LM committed
439 440
}

441 442 443
/** \returns the sum of all coefficients of \c *this
  *
  * If \c *this is empty, then the value 0 is returned.
LM's avatar
LM committed
444 445 446 447 448 449 450 451 452
  *
  * \sa trace(), prod(), mean()
  */
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::sum() const
{
  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
    return Scalar(0);
453
  return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
LM's avatar
LM committed
454 455 456 457 458 459 460 461 462 463
}

/** \returns the mean of all coefficients of *this
*
* \sa trace(), prod(), sum()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::mean() const
{
464 465 466 467 468 469 470 471
#ifdef __INTEL_COMPILER
  #pragma warning push
  #pragma warning ( disable : 2259 )
#endif
  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
#ifdef __INTEL_COMPILER
  #pragma warning pop
#endif
LM's avatar
LM committed
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
}

/** \returns the product of all coefficients of *this
  *
  * Example: \include MatrixBase_prod.cpp
  * Output: \verbinclude MatrixBase_prod.out
  *
  * \sa sum(), mean(), trace()
  */
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::prod() const
{
  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
    return Scalar(1);
487
  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
LM's avatar
LM committed
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
}

/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
  *
  * \c *this can be any matrix, not necessarily square.
  *
  * \sa diagonal(), sum()
  */
template<typename Derived>
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
MatrixBase<Derived>::trace() const
{
  return derived().diagonal().sum();
}

Don Gagne's avatar
Don Gagne committed
503 504
} // end namespace Eigen

LM's avatar
LM committed
505
#endif // EIGEN_REDUX_H