TriangularMatrixMatrix.h 19.9 KB
Newer Older
LM's avatar
LM committed
1 2 3 4 5
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
//
Don Gagne's avatar
Don Gagne committed
6 7 8
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
LM's avatar
LM committed
9 10 11 12

#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
#define EIGEN_TRIANGULAR_MATRIX_MATRIX_H

Don Gagne's avatar
Don Gagne committed
13 14
namespace Eigen { 

LM's avatar
LM committed
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
namespace internal {

// template<typename Scalar, int mr, int StorageOrder, bool Conjugate, int Mode>
// struct gemm_pack_lhs_triangular
// {
//   Matrix<Scalar,mr,mr,
//   void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar* _lhs, int lhsStride, int depth, int rows)
//   {
//     conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
//     const_blas_data_mapper<Scalar, StorageOrder> lhs(_lhs,lhsStride);
//     int count = 0;
//     const int peeled_mc = (rows/mr)*mr;
//     for(int i=0; i<peeled_mc; i+=mr)
//     {
//       for(int k=0; k<depth; k++)
//         for(int w=0; w<mr; w++)
//           blockA[count++] = cj(lhs(i+w, k));
//     }
//     for(int i=peeled_mc; i<rows; i++)
//     {
//       for(int k=0; k<depth; k++)
//         blockA[count++] = cj(lhs(i, k));
//     }
//   }
// };

/* Optimized triangular matrix * matrix (_TRMM++) product built on top of
 * the general matrix matrix product.
 */
template <typename Scalar, typename Index,
          int Mode, bool LhsIsTriangular,
          int LhsStorageOrder, bool ConjugateLhs,
          int RhsStorageOrder, bool ConjugateRhs,
Don Gagne's avatar
Don Gagne committed
48
          int ResStorageOrder, int Version = Specialized>
LM's avatar
LM committed
49 50 51 52 53
struct product_triangular_matrix_matrix;

template <typename Scalar, typename Index,
          int Mode, bool LhsIsTriangular,
          int LhsStorageOrder, bool ConjugateLhs,
Don Gagne's avatar
Don Gagne committed
54
          int RhsStorageOrder, bool ConjugateRhs, int Version>
LM's avatar
LM committed
55 56
struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
                                           LhsStorageOrder,ConjugateLhs,
Don Gagne's avatar
Don Gagne committed
57
                                           RhsStorageOrder,ConjugateRhs,RowMajor,Version>
LM's avatar
LM committed
58 59 60 61 62 63
{
  static EIGEN_STRONG_INLINE void run(
    Index rows, Index cols, Index depth,
    const Scalar* lhs, Index lhsStride,
    const Scalar* rhs, Index rhsStride,
    Scalar* res,       Index resStride,
Don Gagne's avatar
Don Gagne committed
64
    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
LM's avatar
LM committed
65 66 67 68 69 70 71 72 73
  {
    product_triangular_matrix_matrix<Scalar, Index,
      (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
      (!LhsIsTriangular),
      RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
      ConjugateRhs,
      LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
      ConjugateLhs,
      ColMajor>
Don Gagne's avatar
Don Gagne committed
74
      ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
LM's avatar
LM committed
75 76 77 78 79 80
  }
};

// implements col-major += alpha * op(triangular) * op(general)
template <typename Scalar, typename Index, int Mode,
          int LhsStorageOrder, bool ConjugateLhs,
Don Gagne's avatar
Don Gagne committed
81
          int RhsStorageOrder, bool ConjugateRhs, int Version>
LM's avatar
LM committed
82 83
struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
                                           LhsStorageOrder,ConjugateLhs,
Don Gagne's avatar
Don Gagne committed
84
                                           RhsStorageOrder,ConjugateRhs,ColMajor,Version>
LM's avatar
LM committed
85 86 87 88
{
  
  typedef gebp_traits<Scalar,Scalar> Traits;
  enum {
Don Gagne's avatar
Don Gagne committed
89
    SmallPanelWidth   = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
LM's avatar
LM committed
90 91 92 93 94 95 96 97 98
    IsLower = (Mode&Lower) == Lower,
    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
  };

  static EIGEN_DONT_INLINE void run(
    Index _rows, Index _cols, Index _depth,
    const Scalar* _lhs, Index lhsStride,
    const Scalar* _rhs, Index rhsStride,
    Scalar* res,        Index resStride,
Don Gagne's avatar
Don Gagne committed
99 100 101 102 103 104 105 106 107 108 109 110
    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};

template <typename Scalar, typename Index, int Mode,
          int LhsStorageOrder, bool ConjugateLhs,
          int RhsStorageOrder, bool ConjugateRhs, int Version>
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
                                                        LhsStorageOrder,ConjugateLhs,
                                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
    Index _rows, Index _cols, Index _depth,
    const Scalar* _lhs, Index lhsStride,
    const Scalar* _rhs, Index rhsStride,
111
    Scalar* _res,        Index resStride,
Don Gagne's avatar
Don Gagne committed
112
    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
LM's avatar
LM committed
113 114
  {
    // strip zeros
Don Gagne's avatar
Don Gagne committed
115
    Index diagSize  = (std::min)(_rows,_depth);
LM's avatar
LM committed
116 117 118 119
    Index rows      = IsLower ? _rows : diagSize;
    Index depth     = IsLower ? diagSize : _depth;
    Index cols      = _cols;
    
120 121 122 123 124 125
    typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
    typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
    typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
    LhsMapper lhs(_lhs,lhsStride);
    RhsMapper rhs(_rhs,rhsStride);
    ResMapper res(_res, resStride);
LM's avatar
LM committed
126

Don Gagne's avatar
Don Gagne committed
127 128
    Index kc = blocking.kc();                   // cache block size along the K direction
    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
129 130 131 132
    // The small panel size must not be larger than blocking size.
    // Usually this should never be the case because SmallPanelWidth^2 is very small
    // compared to L2 cache size, but let's be safe:
    Index panelWidth = (std::min)(Index(SmallPanelWidth),(std::min)(kc,mc));
Don Gagne's avatar
Don Gagne committed
133 134 135 136 137 138

    std::size_t sizeA = kc*mc;
    std::size_t sizeB = kc*cols;

    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
LM's avatar
LM committed
139

140 141 142 143 144 145 146
    // To work around an "error: member reference base type 'Matrix<...>
    // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
    // not a structure or union" compilation error in nvcc (tested V8.0.61),
    // create a dummy internal::constructor_without_unaligned_array_assert
    // object to pass to the Matrix constructor.
    internal::constructor_without_unaligned_array_assert a;
    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
LM's avatar
LM committed
147 148 149 150 151 152
    triangularBuffer.setZero();
    if((Mode&ZeroDiag)==ZeroDiag)
      triangularBuffer.diagonal().setZero();
    else
      triangularBuffer.diagonal().setOnes();

153 154 155
    gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
    gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
LM's avatar
LM committed
156 157 158 159 160

    for(Index k2=IsLower ? depth : 0;
        IsLower ? k2>0 : k2<depth;
        IsLower ? k2-=kc : k2+=kc)
    {
Don Gagne's avatar
Don Gagne committed
161
      Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc);
LM's avatar
LM committed
162 163 164 165 166 167 168 169 170
      Index actual_k2 = IsLower ? k2-actual_kc : k2;

      // align blocks with the end of the triangular part for trapezoidal lhs
      if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))
      {
        actual_kc = rows-k2;
        k2 = k2+actual_kc-kc;
      }

171
      pack_rhs(blockB, rhs.getSubMapper(actual_k2,0), actual_kc, cols);
LM's avatar
LM committed
172 173 174 175 176 177 178 179 180 181

      // the selected lhs's panel has to be split in three different parts:
      //  1 - the part which is zero => skip it
      //  2 - the diagonal block => special kernel
      //  3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP

      // the block diagonal, if any:
      if(IsLower || actual_k2<rows)
      {
        // for each small vertical panels of lhs
182
        for (Index k1=0; k1<actual_kc; k1+=panelWidth)
LM's avatar
LM committed
183
        {
184
          Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);
LM's avatar
LM committed
185 186 187 188 189 190 191 192 193 194 195 196 197 198
          Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
          Index startBlock   = actual_k2+k1;
          Index blockBOffset = k1;

          // => GEBP with the micro triangular block
          // The trick is to pack this micro block while filling the opposite triangular part with zeros.
          // To this end we do an extra triangular copy to a small temporary buffer
          for (Index k=0;k<actualPanelWidth;++k)
          {
            if (SetDiag)
              triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
            for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
              triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
          }
199
          pack_lhs(blockA, LhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()), actualPanelWidth, actualPanelWidth);
LM's avatar
LM committed
200

201 202 203
          gebp_kernel(res.getSubMapper(startBlock, 0), blockA, blockB,
                      actualPanelWidth, actualPanelWidth, cols, alpha,
                      actualPanelWidth, actual_kc, 0, blockBOffset);
LM's avatar
LM committed
204 205 206 207 208 209

          // GEBP with remaining micro panel
          if (lengthTarget>0)
          {
            Index startTarget  = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;

210
            pack_lhs(blockA, lhs.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
LM's avatar
LM committed
211

212 213 214
            gebp_kernel(res.getSubMapper(startTarget, 0), blockA, blockB,
                        lengthTarget, actualPanelWidth, cols, alpha,
                        actualPanelWidth, actual_kc, 0, blockBOffset);
LM's avatar
LM committed
215 216 217 218 219 220
          }
        }
      }
      // the part below (lower case) or above (upper case) the diagonal => GEPP
      {
        Index start = IsLower ? k2 : 0;
Don Gagne's avatar
Don Gagne committed
221
        Index end   = IsLower ? rows : (std::min)(actual_k2,rows);
LM's avatar
LM committed
222 223
        for(Index i2=start; i2<end; i2+=mc)
        {
Don Gagne's avatar
Don Gagne committed
224
          const Index actual_mc = (std::min)(i2+mc,end)-i2;
225 226
          gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
            (blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
LM's avatar
LM committed
227

228 229
          gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc,
                      actual_kc, cols, alpha, -1, -1, 0, 0);
LM's avatar
LM committed
230 231 232 233 234 235 236 237
        }
      }
    }
  }

// implements col-major += alpha * op(general) * op(triangular)
template <typename Scalar, typename Index, int Mode,
          int LhsStorageOrder, bool ConjugateLhs,
Don Gagne's avatar
Don Gagne committed
238
          int RhsStorageOrder, bool ConjugateRhs, int Version>
LM's avatar
LM committed
239
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Don Gagne's avatar
Don Gagne committed
240 241
                                        LhsStorageOrder,ConjugateLhs,
                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>
LM's avatar
LM committed
242 243 244 245 246 247 248 249 250 251 252 253 254
{
  typedef gebp_traits<Scalar,Scalar> Traits;
  enum {
    SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
    IsLower = (Mode&Lower) == Lower,
    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
  };

  static EIGEN_DONT_INLINE void run(
    Index _rows, Index _cols, Index _depth,
    const Scalar* _lhs, Index lhsStride,
    const Scalar* _rhs, Index rhsStride,
    Scalar* res,        Index resStride,
Don Gagne's avatar
Don Gagne committed
255 256 257 258 259 260 261 262 263 264 265 266
    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};

template <typename Scalar, typename Index, int Mode,
          int LhsStorageOrder, bool ConjugateLhs,
          int RhsStorageOrder, bool ConjugateRhs, int Version>
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
                                                        LhsStorageOrder,ConjugateLhs,
                                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
    Index _rows, Index _cols, Index _depth,
    const Scalar* _lhs, Index lhsStride,
    const Scalar* _rhs, Index rhsStride,
267
    Scalar* _res,        Index resStride,
Don Gagne's avatar
Don Gagne committed
268
    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
LM's avatar
LM committed
269
  {
270
    const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar);
LM's avatar
LM committed
271
    // strip zeros
Don Gagne's avatar
Don Gagne committed
272
    Index diagSize  = (std::min)(_cols,_depth);
LM's avatar
LM committed
273 274 275 276
    Index rows      = _rows;
    Index depth     = IsLower ? _depth : diagSize;
    Index cols      = IsLower ? diagSize : _cols;
    
277 278 279 280 281 282
    typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
    typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
    typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
    LhsMapper lhs(_lhs,lhsStride);
    RhsMapper rhs(_rhs,rhsStride);
    ResMapper res(_res, resStride);
LM's avatar
LM committed
283

Don Gagne's avatar
Don Gagne committed
284 285
    Index kc = blocking.kc();                   // cache block size along the K direction
    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
LM's avatar
LM committed
286

Don Gagne's avatar
Don Gagne committed
287
    std::size_t sizeA = kc*mc;
288
    std::size_t sizeB = kc*cols+EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar);
Don Gagne's avatar
Don Gagne committed
289 290 291

    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
LM's avatar
LM committed
292

293 294
    internal::constructor_without_unaligned_array_assert a;
    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);
LM's avatar
LM committed
295 296 297 298 299 300
    triangularBuffer.setZero();
    if((Mode&ZeroDiag)==ZeroDiag)
      triangularBuffer.diagonal().setZero();
    else
      triangularBuffer.diagonal().setOnes();

301 302 303 304
    gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
    gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
LM's avatar
LM committed
305 306 307 308 309

    for(Index k2=IsLower ? 0 : depth;
        IsLower ? k2<depth  : k2>0;
        IsLower ? k2+=kc   : k2-=kc)
    {
Don Gagne's avatar
Don Gagne committed
310
      Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc);
LM's avatar
LM committed
311 312 313 314 315 316 317 318 319 320
      Index actual_k2 = IsLower ? k2 : k2-actual_kc;

      // align blocks with the end of the triangular part for trapezoidal rhs
      if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))
      {
        actual_kc = cols-k2;
        k2 = actual_k2 + actual_kc - kc;
      }

      // remaining size
Don Gagne's avatar
Don Gagne committed
321
      Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;
LM's avatar
LM committed
322 323 324 325
      // size of the triangular part
      Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;

      Scalar* geb = blockB+ts*ts;
326
      geb = geb + internal::first_aligned<PacketBytes>(geb,PacketBytes/sizeof(Scalar));
LM's avatar
LM committed
327

328
      pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);
LM's avatar
LM committed
329 330 331 332 333 334 335 336 337 338 339 340

      // pack the triangular part of the rhs padding the unrolled blocks with zeros
      if(ts>0)
      {
        for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
        {
          Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
          Index actual_j2 = actual_k2 + j2;
          Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
          Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
          // general part
          pack_rhs_panel(blockB+j2*actual_kc,
341
                         rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
LM's avatar
LM committed
342 343 344 345 346 347 348 349 350 351 352 353 354
                         panelLength, actualPanelWidth,
                         actual_kc, panelOffset);

          // append the triangular part via a temporary buffer
          for (Index j=0;j<actualPanelWidth;++j)
          {
            if (SetDiag)
              triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
            for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
              triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
          }

          pack_rhs_panel(blockB+j2*actual_kc,
355
                         RhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()),
LM's avatar
LM committed
356 357 358 359 360 361 362
                         actualPanelWidth, actualPanelWidth,
                         actual_kc, j2);
        }
      }

      for (Index i2=0; i2<rows; i2+=mc)
      {
Don Gagne's avatar
Don Gagne committed
363
        const Index actual_mc = (std::min)(mc,rows-i2);
364
        pack_lhs(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
LM's avatar
LM committed
365 366 367 368 369 370 371 372 373 374

        // triangular kernel
        if(ts>0)
        {
          for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
          {
            Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
            Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;
            Index blockOffset = IsLower ? j2 : 0;

375
            gebp_kernel(res.getSubMapper(i2, actual_k2 + j2),
LM's avatar
LM committed
376 377 378 379
                        blockA, blockB+j2*actual_kc,
                        actual_mc, panelLength, actualPanelWidth,
                        alpha,
                        actual_kc, actual_kc,  // strides
380
                        blockOffset, blockOffset);// offsets
LM's avatar
LM committed
381 382
          }
        }
383
        gebp_kernel(res.getSubMapper(i2, IsLower ? 0 : k2),
LM's avatar
LM committed
384 385
                    blockA, geb, actual_mc, actual_kc, rs,
                    alpha,
386
                    -1, -1, 0, 0);
LM's avatar
LM committed
387 388 389 390 391 392 393 394 395 396
      }
    }
  }

/***************************************************************************
* Wrapper to product_triangular_matrix_matrix
***************************************************************************/

} // end namespace internal

397
namespace internal {
LM's avatar
LM committed
398
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
399
struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
LM's avatar
LM committed
400
{
401
  template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
LM's avatar
LM committed
402
  {
403 404 405 406 407 408 409 410 411 412 413 414 415
    typedef typename Lhs::Scalar  LhsScalar;
    typedef typename Rhs::Scalar  RhsScalar;
    typedef typename Dest::Scalar Scalar;
    
    typedef internal::blas_traits<Lhs> LhsBlasTraits;
    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
    typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
    typedef internal::blas_traits<Rhs> RhsBlasTraits;
    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
    
    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
LM's avatar
LM committed
416

417 418 419
    LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
    Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
LM's avatar
LM committed
420

Don Gagne's avatar
Don Gagne committed
421 422 423 424 425 426 427 428 429
    typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
              Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;

    enum { IsLower = (Mode&Lower) == Lower };
    Index stripedRows  = ((!LhsIsTriangular) || (IsLower))  ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols());
    Index stripedCols  = ((LhsIsTriangular)  || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows());
    Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows()))
                                         : ((IsLower)  ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols()));

430
    BlockingType blocking(stripedRows, stripedCols, stripedDepth, 1, false);
Don Gagne's avatar
Don Gagne committed
431

LM's avatar
LM committed
432 433
    internal::product_triangular_matrix_matrix<Scalar, Index,
      Mode, LhsIsTriangular,
434 435
      (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
      (internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
LM's avatar
LM committed
436 437
      (internal::traits<Dest          >::Flags&RowMajorBit) ? RowMajor : ColMajor>
      ::run(
Don Gagne's avatar
Don Gagne committed
438
        stripedRows, stripedCols, stripedDepth,   // sizes
439 440
        &lhs.coeffRef(0,0), lhs.outerStride(),    // lhs info
        &rhs.coeffRef(0,0), rhs.outerStride(),    // rhs info
Don Gagne's avatar
Don Gagne committed
441 442
        &dst.coeffRef(0,0), dst.outerStride(),    // result info
        actualAlpha, blocking
LM's avatar
LM committed
443
      );
444 445 446 447 448 449 450 451 452 453 454 455 456 457 458

    // Apply correction if the diagonal is unit and a scalar factor was nested:
    if ((Mode&UnitDiag)==UnitDiag)
    {
      if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
      {
        Index diagSize = (std::min)(lhs.rows(),lhs.cols());
        dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
      }
      else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
      {
        Index diagSize = (std::min)(rhs.rows(),rhs.cols());
        dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
      }
    }
LM's avatar
LM committed
459 460 461
  }
};

462 463
} // end namespace internal

Don Gagne's avatar
Don Gagne committed
464
} // end namespace Eigen
LM's avatar
LM committed
465 466

#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H