SparseLU_kernel_bmod.h 5.58 KB
Newer Older
Don Gagne's avatar
Don Gagne committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef SPARSELU_KERNEL_BMOD_H
#define SPARSELU_KERNEL_BMOD_H

namespace Eigen {
namespace internal {
  
/**
 * \brief Performs numeric block updates from a given supernode to a single column
 * 
 * \param segsize Size of the segment (and blocks ) to use for updates
 * \param[in,out] dense Packed values of the original matrix
 * \param tempv temporary vector to use for updates
 * \param lusup array containing the supernodes
 * \param lda Leading dimension in the supernode
 * \param nrow Number of rows in the rectangular part of the supernode
 * \param lsub compressed row subscripts of supernodes
 * \param lptr pointer to the first column of the current supernode in lsub
 * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
 * \return 0 on success
 */
template <int SegSizeAtCompileTime> struct LU_kernel_bmod
{
  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
  static EIGEN_DONT_INLINE void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
                                    const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
};

template <int SegSizeAtCompileTime>
template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
                                                                  const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
{
  typedef typename ScalarVector::Scalar Scalar;
  // First, copy U[*,j] segment from dense(*) to tempv(*)
  // The result of triangular solve is in tempv[*]; 
    // The result of matric-vector update is in dense[*]
  Index isub = lptr + no_zeros; 
  int i;
  Index irow;
  for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
  {
    irow = lsub(isub); 
    tempv(i) = dense(irow); 
    ++isub; 
  }
  // Dense triangular solve -- start effective triangle
  luptr += lda * no_zeros + no_zeros; 
  // Form Eigen matrix and vector 
59
  Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );
Don Gagne's avatar
Don Gagne committed
60 61 62 63 64 65 66 67
  Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);
  
  u = A.template triangularView<UnitLower>().solve(u); 
  
  // Dense matrix-vector product y <-- B*x 
  luptr += segsize;
  const Index PacketSize = internal::packet_traits<Scalar>::size;
  Index ldl = internal::first_multiple(nrow, PacketSize);
68
  Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
Don Gagne's avatar
Don Gagne committed
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
  Index aligned_offset = internal::first_aligned(tempv.data()+segsize, PacketSize);
  Index aligned_with_B_offset = (PacketSize-internal::first_aligned(B.data(), PacketSize))%PacketSize;
  Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
  
  l.setZero();
  internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride());
  
  // Scatter tempv[] into SPA dense[] as a temporary storage 
  isub = lptr + no_zeros;
  for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
  {
    irow = lsub(isub++); 
    dense(irow) = tempv(i);
  }
  
  // Scatter l into SPA dense[]
  for (i = 0; i < nrow; i++)
  {
    irow = lsub(isub++); 
    dense(irow) -= l(i);
  } 
}

template <> struct LU_kernel_bmod<1>
{
  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
  static EIGEN_DONT_INLINE void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
                                    const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
};


template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
                                              const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
{
  typedef typename ScalarVector::Scalar Scalar;
  Scalar f = dense(lsub(lptr + no_zeros));
  luptr += lda * no_zeros + no_zeros + 1;
  const Scalar* a(lusup.data() + luptr);
  const /*typename IndexVector::Scalar*/Index*  irow(lsub.data()+lptr + no_zeros + 1);
  Index i = 0;
  for (; i+1 < nrow; i+=2)
  {
    Index i0 = *(irow++);
    Index i1 = *(irow++);
    Scalar a0 = *(a++);
    Scalar a1 = *(a++);
    Scalar d0 = dense.coeff(i0);
    Scalar d1 = dense.coeff(i1);
    d0 -= f*a0;
    d1 -= f*a1;
    dense.coeffRef(i0) = d0;
    dense.coeffRef(i1) = d1;
  }
  if(i<nrow)
    dense.coeffRef(*(irow++)) -= f * *(a++);
}

} // end namespace internal

} // end namespace Eigen
#endif // SPARSELU_KERNEL_BMOD_H