Skip to content
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to LAPACKe
* LLt decomposition based on LAPACKE_?potrf function.
********************************************************************************
*/
#ifndef EIGEN_LLT_LAPACKE_H
#define EIGEN_LLT_LAPACKE_H
namespace Eigen {
namespace internal {
template<typename Scalar> struct lapacke_llt;
#define EIGEN_LAPACKE_LLT(EIGTYPE, BLASTYPE, LAPACKE_PREFIX) \
template<> struct lapacke_llt<EIGTYPE> \
{ \
template<typename MatrixType> \
static inline Index potrf(MatrixType& m, char uplo) \
{ \
lapack_int matrix_order; \
lapack_int size, lda, info, StorageOrder; \
EIGTYPE* a; \
eigen_assert(m.rows()==m.cols()); \
/* Set up parameters for ?potrf */ \
size = convert_index<lapack_int>(m.rows()); \
StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
a = &(m.coeffRef(0,0)); \
lda = convert_index<lapack_int>(m.outerStride()); \
\
info = LAPACKE_##LAPACKE_PREFIX##potrf( matrix_order, uplo, size, (BLASTYPE*)a, lda ); \
info = (info==0) ? -1 : info>0 ? info-1 : size; \
return info; \
} \
}; \
template<> struct llt_inplace<EIGTYPE, Lower> \
{ \
template<typename MatrixType> \
static Index blocked(MatrixType& m) \
{ \
return lapacke_llt<EIGTYPE>::potrf(m, 'L'); \
} \
template<typename MatrixType, typename VectorType> \
static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
{ return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \
}; \
template<> struct llt_inplace<EIGTYPE, Upper> \
{ \
template<typename MatrixType> \
static Index blocked(MatrixType& m) \
{ \
return lapacke_llt<EIGTYPE>::potrf(m, 'U'); \
} \
template<typename MatrixType, typename VectorType> \
static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
{ \
Transpose<MatrixType> matt(mat); \
return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate(), sigma); \
} \
};
EIGEN_LAPACKE_LLT(double, double, d)
EIGEN_LAPACKE_LLT(float, float, s)
EIGEN_LAPACKE_LLT(dcomplex, lapack_complex_double, z)
EIGEN_LAPACKE_LLT(scomplex, lapack_complex_float, c)
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_LLT_LAPACKE_H
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* LLt decomposition based on LAPACKE_?potrf function.
********************************************************************************
*/
#ifndef EIGEN_LLT_MKL_H
#define EIGEN_LLT_MKL_H
#include "Eigen/src/Core/util/MKL_support.h"
#include <iostream>
namespace Eigen {
namespace internal {
template<typename Scalar> struct mkl_llt;
#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \
template<> struct mkl_llt<EIGTYPE> \
{ \
template<typename MatrixType> \
static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \
{ \
lapack_int matrix_order; \
lapack_int size, lda, info, StorageOrder; \
EIGTYPE* a; \
eigen_assert(m.rows()==m.cols()); \
/* Set up parameters for ?potrf */ \
size = m.rows(); \
StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
a = &(m.coeffRef(0,0)); \
lda = m.outerStride(); \
\
info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \
info = (info==0) ? -1 : info>0 ? info-1 : size; \
return info; \
} \
}; \
template<> struct llt_inplace<EIGTYPE, Lower> \
{ \
template<typename MatrixType> \
static typename MatrixType::Index blocked(MatrixType& m) \
{ \
return mkl_llt<EIGTYPE>::potrf(m, 'L'); \
} \
template<typename MatrixType, typename VectorType> \
static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
{ return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \
}; \
template<> struct llt_inplace<EIGTYPE, Upper> \
{ \
template<typename MatrixType> \
static typename MatrixType::Index blocked(MatrixType& m) \
{ \
return mkl_llt<EIGTYPE>::potrf(m, 'U'); \
} \
template<typename MatrixType, typename VectorType> \
static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
{ \
Transpose<MatrixType> matt(mat); \
return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate(), sigma); \
} \
};
EIGEN_MKL_LLT(double, double, d)
EIGEN_MKL_LLT(float, float, s)
EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z)
EIGEN_MKL_LLT(scomplex, MKL_Complex8, c)
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_LLT_MKL_H
FILE(GLOB Eigen_CholmodSupport_SRCS "*.h")
INSTALL(FILES
${Eigen_CholmodSupport_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel
)
...@@ -14,46 +14,52 @@ namespace Eigen { ...@@ -14,46 +14,52 @@ namespace Eigen {
namespace internal { namespace internal {
template<typename Scalar, typename CholmodType> template<typename Scalar> struct cholmod_configure_matrix;
void cholmod_configure_matrix(CholmodType& mat)
{ template<> struct cholmod_configure_matrix<double> {
if (internal::is_same<Scalar,float>::value) template<typename CholmodType>
{ static void run(CholmodType& mat) {
mat.xtype = CHOLMOD_REAL;
mat.dtype = CHOLMOD_SINGLE;
}
else if (internal::is_same<Scalar,double>::value)
{
mat.xtype = CHOLMOD_REAL; mat.xtype = CHOLMOD_REAL;
mat.dtype = CHOLMOD_DOUBLE; mat.dtype = CHOLMOD_DOUBLE;
} }
else if (internal::is_same<Scalar,std::complex<float> >::value) };
{
mat.xtype = CHOLMOD_COMPLEX; template<> struct cholmod_configure_matrix<std::complex<double> > {
mat.dtype = CHOLMOD_SINGLE; template<typename CholmodType>
} static void run(CholmodType& mat) {
else if (internal::is_same<Scalar,std::complex<double> >::value)
{
mat.xtype = CHOLMOD_COMPLEX; mat.xtype = CHOLMOD_COMPLEX;
mat.dtype = CHOLMOD_DOUBLE; mat.dtype = CHOLMOD_DOUBLE;
} }
else };
{
eigen_assert(false && "Scalar type not supported by CHOLMOD"); // Other scalar types are not yet suppotred by Cholmod
} // template<> struct cholmod_configure_matrix<float> {
} // template<typename CholmodType>
// static void run(CholmodType& mat) {
// mat.xtype = CHOLMOD_REAL;
// mat.dtype = CHOLMOD_SINGLE;
// }
// };
//
// template<> struct cholmod_configure_matrix<std::complex<float> > {
// template<typename CholmodType>
// static void run(CholmodType& mat) {
// mat.xtype = CHOLMOD_COMPLEX;
// mat.dtype = CHOLMOD_SINGLE;
// }
// };
} // namespace internal } // namespace internal
/** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object. /** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object.
* Note that the data are shared. * Note that the data are shared.
*/ */
template<typename _Scalar, int _Options, typename _Index> template<typename _Scalar, int _Options, typename _StorageIndex>
cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> > mat)
{ {
cholmod_sparse res; cholmod_sparse res;
res.nzmax = mat.nonZeros(); res.nzmax = mat.nonZeros();
res.nrow = mat.rows();; res.nrow = mat.rows();
res.ncol = mat.cols(); res.ncol = mat.cols();
res.p = mat.outerIndexPtr(); res.p = mat.outerIndexPtr();
res.i = mat.innerIndexPtr(); res.i = mat.innerIndexPtr();
...@@ -74,11 +80,11 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) ...@@ -74,11 +80,11 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
res.dtype = 0; res.dtype = 0;
res.stype = -1; res.stype = -1;
if (internal::is_same<_Index,int>::value) if (internal::is_same<_StorageIndex,int>::value)
{ {
res.itype = CHOLMOD_INT; res.itype = CHOLMOD_INT;
} }
else if (internal::is_same<_Index,SuiteSparse_long>::value) else if (internal::is_same<_StorageIndex,long>::value)
{ {
res.itype = CHOLMOD_LONG; res.itype = CHOLMOD_LONG;
} }
...@@ -88,7 +94,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) ...@@ -88,7 +94,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
} }
// setup res.xtype // setup res.xtype
internal::cholmod_configure_matrix<_Scalar>(res); internal::cholmod_configure_matrix<_Scalar>::run(res);
res.stype = 0; res.stype = 0;
...@@ -98,16 +104,23 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) ...@@ -98,16 +104,23 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
template<typename _Scalar, int _Options, typename _Index> template<typename _Scalar, int _Options, typename _Index>
const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat) const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat)
{ {
cholmod_sparse res = viewAsCholmod(mat.const_cast_derived()); cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
return res;
}
template<typename _Scalar, int _Options, typename _Index>
const cholmod_sparse viewAsCholmod(const SparseVector<_Scalar,_Options,_Index>& mat)
{
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
return res; return res;
} }
/** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix. /** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix.
* The data are not copied but shared. */ * The data are not copied but shared. */
template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo> template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo>
cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat) cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
{ {
cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived()); cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.matrix().const_cast_derived()));
if(UpLo==Upper) res.stype = 1; if(UpLo==Upper) res.stype = 1;
if(UpLo==Lower) res.stype = -1; if(UpLo==Lower) res.stype = -1;
...@@ -131,19 +144,19 @@ cholmod_dense viewAsCholmod(MatrixBase<Derived>& mat) ...@@ -131,19 +144,19 @@ cholmod_dense viewAsCholmod(MatrixBase<Derived>& mat)
res.x = (void*)(mat.derived().data()); res.x = (void*)(mat.derived().data());
res.z = 0; res.z = 0;
internal::cholmod_configure_matrix<Scalar>(res); internal::cholmod_configure_matrix<Scalar>::run(res);
return res; return res;
} }
/** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix. /** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix.
* The data are not copied but shared. */ * The data are not copied but shared. */
template<typename Scalar, int Flags, typename Index> template<typename Scalar, int Flags, typename StorageIndex>
MappedSparseMatrix<Scalar,Flags,Index> viewAsEigen(cholmod_sparse& cm) MappedSparseMatrix<Scalar,Flags,StorageIndex> viewAsEigen(cholmod_sparse& cm)
{ {
return MappedSparseMatrix<Scalar,Flags,Index> return MappedSparseMatrix<Scalar,Flags,StorageIndex>
(cm.nrow, cm.ncol, static_cast<Index*>(cm.p)[cm.ncol], (cm.nrow, cm.ncol, static_cast<StorageIndex*>(cm.p)[cm.ncol],
static_cast<Index*>(cm.p), static_cast<Index*>(cm.i),static_cast<Scalar*>(cm.x) ); static_cast<StorageIndex*>(cm.p), static_cast<StorageIndex*>(cm.i),static_cast<Scalar*>(cm.x) );
} }
enum CholmodMode { enum CholmodMode {
...@@ -157,29 +170,39 @@ enum CholmodMode { ...@@ -157,29 +170,39 @@ enum CholmodMode {
* \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT
*/ */
template<typename _MatrixType, int _UpLo, typename Derived> template<typename _MatrixType, int _UpLo, typename Derived>
class CholmodBase : internal::noncopyable class CholmodBase : public SparseSolverBase<Derived>
{ {
protected:
typedef SparseSolverBase<Derived> Base;
using Base::derived;
using Base::m_isInitialized;
public: public:
typedef _MatrixType MatrixType; typedef _MatrixType MatrixType;
enum { UpLo = _UpLo }; enum { UpLo = _UpLo };
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::RealScalar RealScalar;
typedef MatrixType CholMatrixType; typedef MatrixType CholMatrixType;
typedef typename MatrixType::Index Index; typedef typename MatrixType::StorageIndex StorageIndex;
enum {
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
};
public: public:
CholmodBase() CholmodBase()
: m_cholmodFactor(0), m_info(Success), m_isInitialized(false) : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
{ {
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
cholmod_start(&m_cholmod); cholmod_start(&m_cholmod);
} }
CholmodBase(const MatrixType& matrix) explicit CholmodBase(const MatrixType& matrix)
: m_cholmodFactor(0), m_info(Success), m_isInitialized(false) : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
{ {
m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
cholmod_start(&m_cholmod); cholmod_start(&m_cholmod);
compute(matrix); compute(matrix);
} }
...@@ -191,11 +214,8 @@ class CholmodBase : internal::noncopyable ...@@ -191,11 +214,8 @@ class CholmodBase : internal::noncopyable
cholmod_finish(&m_cholmod); cholmod_finish(&m_cholmod);
} }
inline Index cols() const { return m_cholmodFactor->n; } inline StorageIndex cols() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); }
inline Index rows() const { return m_cholmodFactor->n; } inline StorageIndex rows() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); }
Derived& derived() { return *static_cast<Derived*>(this); }
const Derived& derived() const { return *static_cast<const Derived*>(this); }
/** \brief Reports whether previous computation was successful. /** \brief Reports whether previous computation was successful.
* *
...@@ -216,34 +236,6 @@ class CholmodBase : internal::noncopyable ...@@ -216,34 +236,6 @@ class CholmodBase : internal::noncopyable
return derived(); return derived();
} }
/** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
*
* \sa compute()
*/
template<typename Rhs>
inline const internal::solve_retval<CholmodBase, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
eigen_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(rows()==b.rows()
&& "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b");
return internal::solve_retval<CholmodBase, Rhs>(*this, b.derived());
}
/** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
*
* \sa compute()
*/
template<typename Rhs>
inline const internal::sparse_solve_retval<CholmodBase, Rhs>
solve(const SparseMatrixBase<Rhs>& b) const
{
eigen_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(rows()==b.rows()
&& "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b");
return internal::sparse_solve_retval<CholmodBase, Rhs>(*this, b.derived());
}
/** Performs a symbolic decomposition on the sparsity pattern of \a matrix. /** Performs a symbolic decomposition on the sparsity pattern of \a matrix.
* *
* This function is particularly useful when solving for several problems having the same structure. * This function is particularly useful when solving for several problems having the same structure.
...@@ -290,20 +282,22 @@ class CholmodBase : internal::noncopyable ...@@ -290,20 +282,22 @@ class CholmodBase : internal::noncopyable
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal */ /** \internal */
template<typename Rhs,typename Dest> template<typename Rhs,typename Dest>
void _solve(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
{ {
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
const Index size = m_cholmodFactor->n; const Index size = m_cholmodFactor->n;
EIGEN_UNUSED_VARIABLE(size); EIGEN_UNUSED_VARIABLE(size);
eigen_assert(size==b.rows()); eigen_assert(size==b.rows());
// note: cd stands for Cholmod Dense // Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref.
Rhs& b_ref(b.const_cast_derived()); Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived());
cholmod_dense b_cd = viewAsCholmod(b_ref); cholmod_dense b_cd = viewAsCholmod(b_ref);
cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod); cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod);
if(!x_cd) if(!x_cd)
{ {
this->m_info = NumericalIssue; this->m_info = NumericalIssue;
return;
} }
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.) // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols()); dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());
...@@ -311,8 +305,8 @@ class CholmodBase : internal::noncopyable ...@@ -311,8 +305,8 @@ class CholmodBase : internal::noncopyable
} }
/** \internal */ /** \internal */
template<typename RhsScalar, int RhsOptions, typename RhsIndex, typename DestScalar, int DestOptions, typename DestIndex> template<typename RhsDerived, typename DestDerived>
void _solve(const SparseMatrix<RhsScalar,RhsOptions,RhsIndex> &b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const void _solve_impl(const SparseMatrixBase<RhsDerived> &b, SparseMatrixBase<DestDerived> &dest) const
{ {
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
const Index size = m_cholmodFactor->n; const Index size = m_cholmodFactor->n;
...@@ -320,14 +314,16 @@ class CholmodBase : internal::noncopyable ...@@ -320,14 +314,16 @@ class CholmodBase : internal::noncopyable
eigen_assert(size==b.rows()); eigen_assert(size==b.rows());
// note: cs stands for Cholmod Sparse // note: cs stands for Cholmod Sparse
cholmod_sparse b_cs = viewAsCholmod(b); Ref<SparseMatrix<typename RhsDerived::Scalar,ColMajor,typename RhsDerived::StorageIndex> > b_ref(b.const_cast_derived());
cholmod_sparse b_cs = viewAsCholmod(b_ref);
cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod); cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod);
if(!x_cs) if(!x_cs)
{ {
this->m_info = NumericalIssue; this->m_info = NumericalIssue;
return;
} }
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.) // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
dest = viewAsEigen<DestScalar,DestOptions,DestIndex>(*x_cs); dest.derived() = viewAsEigen<typename DestDerived::Scalar,ColMajor,typename DestDerived::StorageIndex>(*x_cs);
cholmod_free_sparse(&x_cs, &m_cholmod); cholmod_free_sparse(&x_cs, &m_cholmod);
} }
#endif // EIGEN_PARSED_BY_DOXYGEN #endif // EIGEN_PARSED_BY_DOXYGEN
...@@ -344,10 +340,61 @@ class CholmodBase : internal::noncopyable ...@@ -344,10 +340,61 @@ class CholmodBase : internal::noncopyable
*/ */
Derived& setShift(const RealScalar& offset) Derived& setShift(const RealScalar& offset)
{ {
m_shiftOffset[0] = offset; m_shiftOffset[0] = double(offset);
return derived(); return derived();
} }
/** \returns the determinant of the underlying matrix from the current factorization */
Scalar determinant() const
{
using std::exp;
return exp(logDeterminant());
}
/** \returns the log determinant of the underlying matrix from the current factorization */
Scalar logDeterminant() const
{
using std::log;
using numext::real;
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
RealScalar logDet = 0;
Scalar *x = static_cast<Scalar*>(m_cholmodFactor->x);
if (m_cholmodFactor->is_super)
{
// Supernodal factorization stored as a packed list of dense column-major blocs,
// as described by the following structure:
// super[k] == index of the first column of the j-th super node
StorageIndex *super = static_cast<StorageIndex*>(m_cholmodFactor->super);
// pi[k] == offset to the description of row indices
StorageIndex *pi = static_cast<StorageIndex*>(m_cholmodFactor->pi);
// px[k] == offset to the respective dense block
StorageIndex *px = static_cast<StorageIndex*>(m_cholmodFactor->px);
Index nb_super_nodes = m_cholmodFactor->nsuper;
for (Index k=0; k < nb_super_nodes; ++k)
{
StorageIndex ncols = super[k + 1] - super[k];
StorageIndex nrows = pi[k + 1] - pi[k];
Map<const Array<Scalar,1,Dynamic>, 0, InnerStride<> > sk(x + px[k], ncols, InnerStride<>(nrows+1));
logDet += sk.real().log().sum();
}
}
else
{
// Simplicial factorization stored as standard CSC matrix.
StorageIndex *p = static_cast<StorageIndex*>(m_cholmodFactor->p);
Index size = m_cholmodFactor->n;
for (Index k=0; k<size; ++k)
logDet += log(real( x[p[k]] ));
}
if (m_cholmodFactor->is_ll)
logDet *= 2.0;
return logDet;
};
template<typename Stream> template<typename Stream>
void dumpMemory(Stream& /*s*/) void dumpMemory(Stream& /*s*/)
{} {}
...@@ -355,9 +402,8 @@ class CholmodBase : internal::noncopyable ...@@ -355,9 +402,8 @@ class CholmodBase : internal::noncopyable
protected: protected:
mutable cholmod_common m_cholmod; mutable cholmod_common m_cholmod;
cholmod_factor* m_cholmodFactor; cholmod_factor* m_cholmodFactor;
RealScalar m_shiftOffset[2]; double m_shiftOffset[2];
mutable ComputationInfo m_info; mutable ComputationInfo m_info;
bool m_isInitialized;
int m_factorizationIsOk; int m_factorizationIsOk;
int m_analysisIsOk; int m_analysisIsOk;
}; };
...@@ -376,9 +422,13 @@ class CholmodBase : internal::noncopyable ...@@ -376,9 +422,13 @@ class CholmodBase : internal::noncopyable
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
* or Upper. Default is Lower. * or Upper. Default is Lower.
* *
* \implsparsesolverconcept
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
* *
* \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT * \warning Only double precision real and complex scalar types are supported by Cholmod.
*
* \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT
*/ */
template<typename _MatrixType, int _UpLo = Lower> template<typename _MatrixType, int _UpLo = Lower>
class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> > class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> >
...@@ -395,7 +445,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl ...@@ -395,7 +445,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
CholmodSimplicialLLT(const MatrixType& matrix) : Base() CholmodSimplicialLLT(const MatrixType& matrix) : Base()
{ {
init(); init();
Base::compute(matrix); this->compute(matrix);
} }
~CholmodSimplicialLLT() {} ~CholmodSimplicialLLT() {}
...@@ -423,9 +473,13 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl ...@@ -423,9 +473,13 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
* or Upper. Default is Lower. * or Upper. Default is Lower.
* *
* \implsparsesolverconcept
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
* *
* \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT * \warning Only double precision real and complex scalar types are supported by Cholmod.
*
* \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT
*/ */
template<typename _MatrixType, int _UpLo = Lower> template<typename _MatrixType, int _UpLo = Lower>
class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> > class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> >
...@@ -442,7 +496,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp ...@@ -442,7 +496,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
CholmodSimplicialLDLT(const MatrixType& matrix) : Base() CholmodSimplicialLDLT(const MatrixType& matrix) : Base()
{ {
init(); init();
Base::compute(matrix); this->compute(matrix);
} }
~CholmodSimplicialLDLT() {} ~CholmodSimplicialLDLT() {}
...@@ -468,9 +522,13 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp ...@@ -468,9 +522,13 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
* or Upper. Default is Lower. * or Upper. Default is Lower.
* *
* \implsparsesolverconcept
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
* *
* \sa \ref TutorialSparseDirectSolvers * \warning Only double precision real and complex scalar types are supported by Cholmod.
*
* \sa \ref TutorialSparseSolverConcept
*/ */
template<typename _MatrixType, int _UpLo = Lower> template<typename _MatrixType, int _UpLo = Lower>
class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> > class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> >
...@@ -487,7 +545,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper ...@@ -487,7 +545,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
CholmodSupernodalLLT(const MatrixType& matrix) : Base() CholmodSupernodalLLT(const MatrixType& matrix) : Base()
{ {
init(); init();
Base::compute(matrix); this->compute(matrix);
} }
~CholmodSupernodalLLT() {} ~CholmodSupernodalLLT() {}
...@@ -515,9 +573,13 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper ...@@ -515,9 +573,13 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
* or Upper. Default is Lower. * or Upper. Default is Lower.
* *
* \implsparsesolverconcept
*
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
* *
* \sa \ref TutorialSparseDirectSolvers * \warning Only double precision real and complex scalar types are supported by Cholmod.
*
* \sa \ref TutorialSparseSolverConcept
*/ */
template<typename _MatrixType, int _UpLo = Lower> template<typename _MatrixType, int _UpLo = Lower>
class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> > class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> >
...@@ -534,7 +596,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom ...@@ -534,7 +596,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
CholmodDecomposition(const MatrixType& matrix) : Base() CholmodDecomposition(const MatrixType& matrix) : Base()
{ {
init(); init();
Base::compute(matrix); this->compute(matrix);
} }
~CholmodDecomposition() {} ~CholmodDecomposition() {}
...@@ -572,36 +634,6 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom ...@@ -572,36 +634,6 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
} }
}; };
namespace internal {
template<typename _MatrixType, int _UpLo, typename Derived, typename Rhs>
struct solve_retval<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
: solve_retval_base<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
{
typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec;
EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
template<typename Dest> void evalTo(Dest& dst) const
{
dec()._solve(rhs(),dst);
}
};
template<typename _MatrixType, int _UpLo, typename Derived, typename Rhs>
struct sparse_solve_retval<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
: sparse_solve_retval_base<CholmodBase<_MatrixType,_UpLo,Derived>, Rhs>
{
typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec;
EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
template<typename Dest> void evalTo(Dest& dst) const
{
dec()._solve(rhs(),dst);
}
};
} // end namespace internal
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_CHOLMODSUPPORT_H #endif // EIGEN_CHOLMODSUPPORT_H
...@@ -12,6 +12,15 @@ ...@@ -12,6 +12,15 @@
namespace Eigen { namespace Eigen {
namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
typedef ArrayXpr XprKind;
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
};
}
/** \class Array /** \class Array
* \ingroup Core_Module * \ingroup Core_Module
* *
...@@ -24,20 +33,14 @@ namespace Eigen { ...@@ -24,20 +33,14 @@ namespace Eigen {
* API for the %Matrix class provides easy access to linear-algebra * API for the %Matrix class provides easy access to linear-algebra
* operations. * operations.
* *
* See documentation of class Matrix for detailed information on the template parameters
* storage layout.
*
* This class can be extended with the help of the plugin mechanism described on the page * This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
* *
* \sa \ref TutorialArrayClass, \ref TopicClassHierarchy * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy
*/ */
namespace internal {
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
{
typedef ArrayXpr XprKind;
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
};
}
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
class Array class Array
: public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
...@@ -69,11 +72,27 @@ class Array ...@@ -69,11 +72,27 @@ class Array
* the usage of 'using'. This should be done only for operator=. * the usage of 'using'. This should be done only for operator=.
*/ */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other) EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
{ {
return Base::operator=(other); return Base::operator=(other);
} }
/** Set all the entries to \a value.
* \sa DenseBase::setConstant(), DenseBase::fill()
*/
/* This overload is needed because the usage of
* using Base::operator=;
* fails on MSVC. Since the code below is working with GCC and MSVC, we skipped
* the usage of 'using'. This should be done only for operator=.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array& operator=(const Scalar &value)
{
Base::setConstant(value);
return *this;
}
/** Copies the value of the expression \a other into \c *this with automatic resizing. /** Copies the value of the expression \a other into \c *this with automatic resizing.
* *
* *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
...@@ -84,7 +103,8 @@ class Array ...@@ -84,7 +103,8 @@ class Array
* remain row-vectors and vectors remain vectors. * remain row-vectors and vectors remain vectors.
*/ */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Array& operator=(const ArrayBase<OtherDerived>& other) EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array& operator=(const DenseBase<OtherDerived>& other)
{ {
return Base::_set(other); return Base::_set(other);
} }
...@@ -92,6 +112,7 @@ class Array ...@@ -92,6 +112,7 @@ class Array
/** This is a special case of the templated operator=. Its purpose is to /** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=. * prevent a default operator= from hiding the templated operator=.
*/ */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array& operator=(const Array& other) EIGEN_STRONG_INLINE Array& operator=(const Array& other)
{ {
return Base::_set(other); return Base::_set(other);
...@@ -107,6 +128,7 @@ class Array ...@@ -107,6 +128,7 @@ class Array
* *
* \sa resize(Index,Index) * \sa resize(Index,Index)
*/ */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array() : Base() EIGEN_STRONG_INLINE Array() : Base()
{ {
Base::_check_template_params(); Base::_check_template_params();
...@@ -116,6 +138,7 @@ class Array ...@@ -116,6 +138,7 @@ class Array
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
// FIXME is it still needed ?? // FIXME is it still needed ??
/** \internal */ /** \internal */
EIGEN_DEVICE_FUNC
Array(internal::constructor_without_unaligned_array_assert) Array(internal::constructor_without_unaligned_array_assert)
: Base(internal::constructor_without_unaligned_array_assert()) : Base(internal::constructor_without_unaligned_array_assert())
{ {
...@@ -124,56 +147,62 @@ class Array ...@@ -124,56 +147,62 @@ class Array
} }
#endif #endif
#ifdef EIGEN_HAVE_RVALUE_REFERENCES #if EIGEN_HAS_RVALUE_REFERENCES
Array(Array&& other) EIGEN_DEVICE_FUNC
Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
: Base(std::move(other)) : Base(std::move(other))
{ {
Base::_check_template_params(); Base::_check_template_params();
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
Base::_set_noalias(other);
} }
Array& operator=(Array&& other) EIGEN_DEVICE_FUNC
Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
{ {
other.swap(*this); other.swap(*this);
return *this; return *this;
} }
#endif #endif
/** Constructs a vector or row-vector with given dimension. \only_for_vectors #ifndef EIGEN_PARSED_BY_DOXYGEN
* template<typename T>
* Note that this is only useful for dynamic-size vectors. For fixed-size vectors, EIGEN_DEVICE_FUNC
* it is redundant to pass the dimension here, so it makes more sense to use the default EIGEN_STRONG_INLINE explicit Array(const T& x)
* constructor Matrix() instead.
*/
EIGEN_STRONG_INLINE explicit Array(Index dim)
: Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
{ {
Base::_check_template_params(); Base::_check_template_params();
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array) Base::template _init1<T>(x);
eigen_assert(dim >= 0);
eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
} }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename T0, typename T1> template<typename T0, typename T1>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1) EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
{ {
Base::_check_template_params(); Base::_check_template_params();
this->template _init2<T0,T1>(val0, val1); this->template _init2<T0,T1>(val0, val1);
} }
#else #else
/** constructs an uninitialized matrix with \a rows rows and \a cols columns. /** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */
EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);
/** Constructs a vector or row-vector with given dimension. \only_for_vectors
*
* Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
* it is redundant to pass the dimension here, so it makes more sense to use the default
* constructor Array() instead.
*/
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE explicit Array(Index dim);
/** constructs an initialized 1x1 Array with the given coefficient */
Array(const Scalar& value);
/** constructs an uninitialized array with \a rows rows and \a cols columns.
* *
* This is useful for dynamic-size matrices. For fixed-size matrices, * This is useful for dynamic-size arrays. For fixed-size arrays,
* it is redundant to pass these parameters, so one should use the default constructor * it is redundant to pass these parameters, so one should use the default constructor
* Matrix() instead. */ * Array() instead. */
Array(Index rows, Index cols); Array(Index rows, Index cols);
/** constructs an initialized 2D vector with given coefficients */ /** constructs an initialized 2D vector with given coefficients */
Array(const Scalar& val0, const Scalar& val1); Array(const Scalar& val0, const Scalar& val1);
#endif #endif
/** constructs an initialized 3D vector with given coefficients */ /** constructs an initialized 3D vector with given coefficients */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2) EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
{ {
Base::_check_template_params(); Base::_check_template_params();
...@@ -183,6 +212,7 @@ class Array ...@@ -183,6 +212,7 @@ class Array
m_storage.data()[2] = val2; m_storage.data()[2] = val2;
} }
/** constructs an initialized 4D vector with given coefficients */ /** constructs an initialized 4D vector with given coefficients */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3) EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
{ {
Base::_check_template_params(); Base::_check_template_params();
...@@ -193,51 +223,27 @@ class Array ...@@ -193,51 +223,27 @@ class Array
m_storage.data()[3] = val3; m_storage.data()[3] = val3;
} }
explicit Array(const Scalar *data);
/** Constructor copying the value of the expression \a other */
template<typename OtherDerived>
EIGEN_STRONG_INLINE Array(const ArrayBase<OtherDerived>& other)
: Base(other.rows() * other.cols(), other.rows(), other.cols())
{
Base::_check_template_params();
Base::_set_noalias(other);
}
/** Copy constructor */ /** Copy constructor */
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const Array& other) EIGEN_STRONG_INLINE Array(const Array& other)
: Base(other.rows() * other.cols(), other.rows(), other.cols()) : Base(other)
{ { }
Base::_check_template_params();
Base::_set_noalias(other);
}
/** Copy constructor with in-place evaluation */
template<typename OtherDerived>
EIGEN_STRONG_INLINE Array(const ReturnByValue<OtherDerived>& other)
{
Base::_check_template_params();
Base::resize(other.rows(), other.cols());
other.evalTo(*this);
}
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */ private:
template<typename OtherDerived> struct PrivateType {};
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other) public:
: Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
{
Base::_check_template_params();
Base::_resize_to_match(other);
*this = other;
}
/** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
* data pointers.
*/
template<typename OtherDerived> template<typename OtherDerived>
void swap(ArrayBase<OtherDerived> const & other) EIGEN_DEVICE_FUNC
{ this->_swap(other.derived()); } EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
inline Index innerStride() const { return 1; } PrivateType>::type = PrivateType())
inline Index outerStride() const { return this->innerSize(); } : Base(other.derived())
{ }
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
#ifdef EIGEN_ARRAY_PLUGIN #ifdef EIGEN_ARRAY_PLUGIN
#include EIGEN_ARRAY_PLUGIN #include EIGEN_ARRAY_PLUGIN
......
...@@ -32,7 +32,7 @@ template<typename ExpressionType> class MatrixWrapper; ...@@ -32,7 +32,7 @@ template<typename ExpressionType> class MatrixWrapper;
* \tparam Derived is the derived type, e.g., an array or an expression type. * \tparam Derived is the derived type, e.g., an array or an expression type.
* *
* This class can be extended with the help of the plugin mechanism described on the page * This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN. * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
* *
* \sa class MatrixBase, \ref TopicClassHierarchy * \sa class MatrixBase, \ref TopicClassHierarchy
*/ */
...@@ -47,13 +47,11 @@ template<typename Derived> class ArrayBase ...@@ -47,13 +47,11 @@ template<typename Derived> class ArrayBase
typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl; typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;
typedef typename internal::traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index;
typedef typename internal::traits<Derived>::Scalar Scalar; typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::packet_traits<Scalar>::type PacketScalar; typedef typename internal::packet_traits<Scalar>::type PacketScalar;
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
typedef DenseBase<Derived> Base; typedef DenseBase<Derived> Base;
using Base::operator*;
using Base::RowsAtCompileTime; using Base::RowsAtCompileTime;
using Base::ColsAtCompileTime; using Base::ColsAtCompileTime;
using Base::SizeAtCompileTime; using Base::SizeAtCompileTime;
...@@ -62,7 +60,6 @@ template<typename Derived> class ArrayBase ...@@ -62,7 +60,6 @@ template<typename Derived> class ArrayBase
using Base::MaxSizeAtCompileTime; using Base::MaxSizeAtCompileTime;
using Base::IsVectorAtCompileTime; using Base::IsVectorAtCompileTime;
using Base::Flags; using Base::Flags;
using Base::CoeffReadCost;
using Base::derived; using Base::derived;
using Base::const_cast_derived; using Base::const_cast_derived;
...@@ -83,25 +80,14 @@ template<typename Derived> class ArrayBase ...@@ -83,25 +80,14 @@ template<typename Derived> class ArrayBase
#endif // not EIGEN_PARSED_BY_DOXYGEN #endif // not EIGEN_PARSED_BY_DOXYGEN
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal the plain matrix type corresponding to this expression. Note that is not necessarily typedef typename Base::PlainObject PlainObject;
* exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const
* reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either
* PlainObject or const PlainObject&.
*/
typedef Array<typename internal::traits<Derived>::Scalar,
internal::traits<Derived>::RowsAtCompileTime,
internal::traits<Derived>::ColsAtCompileTime,
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
internal::traits<Derived>::MaxRowsAtCompileTime,
internal::traits<Derived>::MaxColsAtCompileTime
> PlainObject;
/** \internal Represents a matrix with all coefficients equal to one another*/ /** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType; typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
#endif // not EIGEN_PARSED_BY_DOXYGEN #endif // not EIGEN_PARSED_BY_DOXYGEN
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
#define EIGEN_DOC_UNARY_ADDONS(X,Y)
# include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/CommonCwiseUnaryOps.h"
# include "../plugins/MatrixCwiseUnaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h"
# include "../plugins/ArrayCwiseUnaryOps.h" # include "../plugins/ArrayCwiseUnaryOps.h"
...@@ -112,44 +98,62 @@ template<typename Derived> class ArrayBase ...@@ -112,44 +98,62 @@ template<typename Derived> class ArrayBase
# include EIGEN_ARRAYBASE_PLUGIN # include EIGEN_ARRAYBASE_PLUGIN
# endif # endif
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS #undef EIGEN_CURRENT_STORAGE_BASE_CLASS
#undef EIGEN_DOC_UNARY_ADDONS
/** Special case of the template operator=, in order to prevent the compiler /** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1) * from generating a default operator= (issue hit with g++ 4.1)
*/ */
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const ArrayBase& other) Derived& operator=(const ArrayBase& other)
{ {
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived()); internal::call_assignment(derived(), other.derived());
return derived();
} }
Derived& operator+=(const Scalar& scalar) /** Set all the entries to \a value.
{ return *this = derived() + scalar; } * \sa DenseBase::setConstant(), DenseBase::fill() */
Derived& operator-=(const Scalar& scalar) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
{ return *this = derived() - scalar; } Derived& operator=(const Scalar &value)
{ Base::setConstant(value); return derived(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator+=(const Scalar& scalar);
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const Scalar& scalar);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator+=(const ArrayBase<OtherDerived>& other); Derived& operator+=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const ArrayBase<OtherDerived>& other); Derived& operator-=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator*=(const ArrayBase<OtherDerived>& other); Derived& operator*=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator/=(const ArrayBase<OtherDerived>& other); Derived& operator/=(const ArrayBase<OtherDerived>& other);
public: public:
EIGEN_DEVICE_FUNC
ArrayBase<Derived>& array() { return *this; } ArrayBase<Derived>& array() { return *this; }
EIGEN_DEVICE_FUNC
const ArrayBase<Derived>& array() const { return *this; } const ArrayBase<Derived>& array() const { return *this; }
/** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array
* \sa MatrixBase::array() */ * \sa MatrixBase::array() */
MatrixWrapper<Derived> matrix() { return derived(); } EIGEN_DEVICE_FUNC
const MatrixWrapper<const Derived> matrix() const { return derived(); } MatrixWrapper<Derived> matrix() { return MatrixWrapper<Derived>(derived()); }
EIGEN_DEVICE_FUNC
const MatrixWrapper<const Derived> matrix() const { return MatrixWrapper<const Derived>(derived()); }
// template<typename Dest> // template<typename Dest>
// inline void evalTo(Dest& dst) const { dst = matrix(); } // inline void evalTo(Dest& dst) const { dst = matrix(); }
protected: protected:
EIGEN_DEVICE_FUNC
ArrayBase() : Base() {} ArrayBase() : Base() {}
private: private:
...@@ -171,11 +175,10 @@ template<typename Derived> class ArrayBase ...@@ -171,11 +175,10 @@ template<typename Derived> class ArrayBase
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other) ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
{ {
SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, Derived, OtherDerived> tmp(derived()); call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
tmp = other.derived();
return derived(); return derived();
} }
...@@ -185,11 +188,10 @@ ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other) ...@@ -185,11 +188,10 @@ ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
{ {
SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, Derived, OtherDerived> tmp(derived()); call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
tmp = other.derived();
return derived(); return derived();
} }
...@@ -199,11 +201,10 @@ ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other) ...@@ -199,11 +201,10 @@ ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
{ {
SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, OtherDerived> tmp(derived()); call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
tmp = other.derived();
return derived(); return derived();
} }
...@@ -213,11 +214,10 @@ ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other) ...@@ -213,11 +214,10 @@ ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
{ {
SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, OtherDerived> tmp(derived()); call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());
tmp = other.derived();
return derived(); return derived();
} }
......
...@@ -32,7 +32,8 @@ struct traits<ArrayWrapper<ExpressionType> > ...@@ -32,7 +32,8 @@ struct traits<ArrayWrapper<ExpressionType> >
// Let's remove NestByRefBit // Let's remove NestByRefBit
enum { enum {
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
Flags = Flags0 & ~NestByRefBit LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
}; };
}; };
} }
...@@ -44,6 +45,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > ...@@ -44,6 +45,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
typedef ArrayBase<ArrayWrapper> Base; typedef ArrayBase<ArrayWrapper> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper) EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
typedef typename internal::conditional< typedef typename internal::conditional<
internal::is_lvalue<ExpressionType>::value, internal::is_lvalue<ExpressionType>::value,
...@@ -51,76 +53,45 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > ...@@ -51,76 +53,45 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
const Scalar const Scalar
>::type ScalarWithConstIfNotLvalue; >::type ScalarWithConstIfNotLvalue;
typedef typename internal::nested<ExpressionType>::type NestedExpressionType; typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} using Base::coeffRef;
EIGEN_DEVICE_FUNC
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_expression.rows(); } inline Index rows() const { return m_expression.rows(); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return m_expression.cols(); } inline Index cols() const { return m_expression.cols(); }
EIGEN_DEVICE_FUNC
inline Index outerStride() const { return m_expression.outerStride(); } inline Index outerStride() const { return m_expression.outerStride(); }
EIGEN_DEVICE_FUNC
inline Index innerStride() const { return m_expression.innerStride(); } inline Index innerStride() const { return m_expression.innerStride(); }
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return m_expression.data(); } inline const Scalar* data() const { return m_expression.data(); }
inline CoeffReturnType coeff(Index rowId, Index colId) const EIGEN_DEVICE_FUNC
{
return m_expression.coeff(rowId, colId);
}
inline Scalar& coeffRef(Index rowId, Index colId)
{
return m_expression.const_cast_derived().coeffRef(rowId, colId);
}
inline const Scalar& coeffRef(Index rowId, Index colId) const inline const Scalar& coeffRef(Index rowId, Index colId) const
{ {
return m_expression.const_cast_derived().coeffRef(rowId, colId); return m_expression.coeffRef(rowId, colId);
}
inline CoeffReturnType coeff(Index index) const
{
return m_expression.coeff(index);
}
inline Scalar& coeffRef(Index index)
{
return m_expression.const_cast_derived().coeffRef(index);
} }
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const inline const Scalar& coeffRef(Index index) const
{ {
return m_expression.const_cast_derived().coeffRef(index); return m_expression.coeffRef(index);
}
template<int LoadMode>
inline const PacketScalar packet(Index rowId, Index colId) const
{
return m_expression.template packet<LoadMode>(rowId, colId);
}
template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{
m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
}
template<int LoadMode>
inline const PacketScalar packet(Index index) const
{
return m_expression.template packet<LoadMode>(index);
}
template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val)
{
m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
} }
template<typename Dest> template<typename Dest>
EIGEN_DEVICE_FUNC
inline void evalTo(Dest& dst) const { dst = m_expression; } inline void evalTo(Dest& dst) const { dst = m_expression; }
const typename internal::remove_all<NestedExpressionType>::type& const typename internal::remove_all<NestedExpressionType>::type&
EIGEN_DEVICE_FUNC
nestedExpression() const nestedExpression() const
{ {
return m_expression; return m_expression;
...@@ -128,10 +99,12 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > ...@@ -128,10 +99,12 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
/** Forwards the resizing request to the nested expression /** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index) */ * \sa DenseBase::resize(Index) */
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } EIGEN_DEVICE_FUNC
void resize(Index newSize) { m_expression.resize(newSize); }
/** Forwards the resizing request to the nested expression /** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index,Index)*/ * \sa DenseBase::resize(Index,Index)*/
void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); } EIGEN_DEVICE_FUNC
void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
protected: protected:
NestedExpressionType m_expression; NestedExpressionType m_expression;
...@@ -157,7 +130,8 @@ struct traits<MatrixWrapper<ExpressionType> > ...@@ -157,7 +130,8 @@ struct traits<MatrixWrapper<ExpressionType> >
// Let's remove NestByRefBit // Let's remove NestByRefBit
enum { enum {
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
Flags = Flags0 & ~NestByRefBit LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
}; };
}; };
} }
...@@ -169,6 +143,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > ...@@ -169,6 +143,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
typedef MatrixBase<MatrixWrapper<ExpressionType> > Base; typedef MatrixBase<MatrixWrapper<ExpressionType> > Base;
EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper) EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
typedef typename internal::conditional< typedef typename internal::conditional<
internal::is_lvalue<ExpressionType>::value, internal::is_lvalue<ExpressionType>::value,
...@@ -176,72 +151,40 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > ...@@ -176,72 +151,40 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
const Scalar const Scalar
>::type ScalarWithConstIfNotLvalue; >::type ScalarWithConstIfNotLvalue;
typedef typename internal::nested<ExpressionType>::type NestedExpressionType; typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {} using Base::coeffRef;
EIGEN_DEVICE_FUNC
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_expression.rows(); } inline Index rows() const { return m_expression.rows(); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return m_expression.cols(); } inline Index cols() const { return m_expression.cols(); }
EIGEN_DEVICE_FUNC
inline Index outerStride() const { return m_expression.outerStride(); } inline Index outerStride() const { return m_expression.outerStride(); }
EIGEN_DEVICE_FUNC
inline Index innerStride() const { return m_expression.innerStride(); } inline Index innerStride() const { return m_expression.innerStride(); }
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return m_expression.data(); } inline const Scalar* data() const { return m_expression.data(); }
inline CoeffReturnType coeff(Index rowId, Index colId) const EIGEN_DEVICE_FUNC
{
return m_expression.coeff(rowId, colId);
}
inline Scalar& coeffRef(Index rowId, Index colId)
{
return m_expression.const_cast_derived().coeffRef(rowId, colId);
}
inline const Scalar& coeffRef(Index rowId, Index colId) const inline const Scalar& coeffRef(Index rowId, Index colId) const
{ {
return m_expression.derived().coeffRef(rowId, colId); return m_expression.derived().coeffRef(rowId, colId);
} }
inline CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC
{
return m_expression.coeff(index);
}
inline Scalar& coeffRef(Index index)
{
return m_expression.const_cast_derived().coeffRef(index);
}
inline const Scalar& coeffRef(Index index) const inline const Scalar& coeffRef(Index index) const
{ {
return m_expression.const_cast_derived().coeffRef(index); return m_expression.coeffRef(index);
}
template<int LoadMode>
inline const PacketScalar packet(Index rowId, Index colId) const
{
return m_expression.template packet<LoadMode>(rowId, colId);
}
template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{
m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
}
template<int LoadMode>
inline const PacketScalar packet(Index index) const
{
return m_expression.template packet<LoadMode>(index);
}
template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val)
{
m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
} }
EIGEN_DEVICE_FUNC
const typename internal::remove_all<NestedExpressionType>::type& const typename internal::remove_all<NestedExpressionType>::type&
nestedExpression() const nestedExpression() const
{ {
...@@ -250,10 +193,12 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > ...@@ -250,10 +193,12 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
/** Forwards the resizing request to the nested expression /** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index) */ * \sa DenseBase::resize(Index) */
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } EIGEN_DEVICE_FUNC
void resize(Index newSize) { m_expression.resize(newSize); }
/** Forwards the resizing request to the nested expression /** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index,Index)*/ * \sa DenseBase::resize(Index,Index)*/
void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); } EIGEN_DEVICE_FUNC
void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
protected: protected:
NestedExpressionType m_expression; NestedExpressionType m_expression;
......
...@@ -14,478 +14,6 @@ ...@@ -14,478 +14,6 @@
namespace Eigen { namespace Eigen {
namespace internal {
/***************************************************************************
* Part 1 : the logic deciding a strategy for traversal and unrolling *
***************************************************************************/
template <typename Derived, typename OtherDerived>
struct assign_traits
{
public:
enum {
DstIsAligned = Derived::Flags & AlignedBit,
DstHasDirectAccess = Derived::Flags & DirectAccessBit,
SrcIsAligned = OtherDerived::Flags & AlignedBit,
JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
};
private:
enum {
InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
: int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
: int(Derived::RowsAtCompileTime),
InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
: int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
: int(Derived::MaxRowsAtCompileTime),
MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
PacketSize = packet_traits<typename Derived::Scalar>::size
};
enum {
StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
MightVectorize = StorageOrdersAgree
&& (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
&& int(DstIsAligned) && int(SrcIsAligned),
MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
&& (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
MaySliceVectorize = MightVectorize && DstHasDirectAccess
&& (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
/* slice vectorization can be slow, so we only want it if the slices are big, which is
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
in a fixed-size matrix */
};
public:
enum {
Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
: int(MayLinearize) ? int(LinearTraversal)
: int(DefaultTraversal),
Vectorized = int(Traversal) == InnerVectorizedTraversal
|| int(Traversal) == LinearVectorizedTraversal
|| int(Traversal) == SliceVectorizedTraversal
};
private:
enum {
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
&& int(OtherDerived::CoeffReadCost) != Dynamic
&& int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
MayUnrollInner = int(InnerSize) != Dynamic
&& int(OtherDerived::CoeffReadCost) != Dynamic
&& int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
};
public:
enum {
Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
? (
int(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(MayUnrollInner) ? int(InnerUnrolling)
: int(NoUnrolling)
)
: int(Traversal) == int(LinearVectorizedTraversal)
? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
: int(NoUnrolling)
};
#ifdef EIGEN_DEBUG_ASSIGN
static void debug()
{
EIGEN_DEBUG_VAR(DstIsAligned)
EIGEN_DEBUG_VAR(SrcIsAligned)
EIGEN_DEBUG_VAR(JointAlignment)
EIGEN_DEBUG_VAR(InnerSize)
EIGEN_DEBUG_VAR(InnerMaxSize)
EIGEN_DEBUG_VAR(PacketSize)
EIGEN_DEBUG_VAR(StorageOrdersAgree)
EIGEN_DEBUG_VAR(MightVectorize)
EIGEN_DEBUG_VAR(MayLinearize)
EIGEN_DEBUG_VAR(MayInnerVectorize)
EIGEN_DEBUG_VAR(MayLinearVectorize)
EIGEN_DEBUG_VAR(MaySliceVectorize)
EIGEN_DEBUG_VAR(Traversal)
EIGEN_DEBUG_VAR(UnrollingLimit)
EIGEN_DEBUG_VAR(MayUnrollCompletely)
EIGEN_DEBUG_VAR(MayUnrollInner)
EIGEN_DEBUG_VAR(Unrolling)
}
#endif
};
/***************************************************************************
* Part 2 : meta-unrollers
***************************************************************************/
/************************
*** Default traversal ***
************************/
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_DefaultTraversal_CompleteUnrolling
{
enum {
outer = Index / Derived1::InnerSizeAtCompileTime,
inner = Index % Derived1::InnerSizeAtCompileTime
};
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeffByOuterInner(outer, inner, src);
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
}
};
template<typename Derived1, typename Derived2, int Stop>
struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_DefaultTraversal_InnerUnrolling
{
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
{
dst.copyCoeffByOuterInner(outer, Index, src);
assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
}
};
template<typename Derived1, typename Derived2, int Stop>
struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
};
/***********************
*** Linear traversal ***
***********************/
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_LinearTraversal_CompleteUnrolling
{
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
dst.copyCoeff(Index, src);
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
}
};
template<typename Derived1, typename Derived2, int Stop>
struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
};
/**************************
*** Inner vectorization ***
**************************/
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_innervec_CompleteUnrolling
{
enum {
outer = Index / Derived1::InnerSizeAtCompileTime,
inner = Index % Derived1::InnerSizeAtCompileTime,
JointAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
dst.template copyPacketByOuterInner<Derived2, Aligned, JointAlignment>(outer, inner, src);
assign_innervec_CompleteUnrolling<Derived1, Derived2,
Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
}
};
template<typename Derived1, typename Derived2, int Stop>
struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int Index, int Stop>
struct assign_innervec_InnerUnrolling
{
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
{
dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
assign_innervec_InnerUnrolling<Derived1, Derived2,
Index+packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, outer);
}
};
template<typename Derived1, typename Derived2, int Stop>
struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
{
static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
};
/***************************************************************************
* Part 3 : implementation of all cases
***************************************************************************/
template<typename Derived1, typename Derived2,
int Traversal = assign_traits<Derived1, Derived2>::Traversal,
int Unrolling = assign_traits<Derived1, Derived2>::Unrolling,
int Version = Specialized>
struct assign_impl;
/************************
*** Default traversal ***
************************/
template<typename Derived1, typename Derived2, int Unrolling, int Version>
struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
{
static inline void run(Derived1 &, const Derived2 &) { }
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
{
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; ++inner)
dst.copyCoeffByOuterInner(outer, inner, src);
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
{
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
{
typedef typename Derived1::Index Index;
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
::run(dst, src, outer);
}
};
/***********************
*** Linear traversal ***
***********************/
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
{
const Index size = dst.size();
for(Index i = 0; i < size; ++i)
dst.copyCoeff(i, src);
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
{
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
/**************************
*** Inner vectorization ***
**************************/
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
{
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
const Index packetSize = packet_traits<typename Derived1::Scalar>::size;
for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; inner+=packetSize)
dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, inner, src);
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, CompleteUnrolling, Version>
{
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
::run(dst, src);
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolling, Version>
{
typedef typename Derived1::Index Index;
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
assign_innervec_InnerUnrolling<Derived1, Derived2, 0, Derived1::InnerSizeAtCompileTime>
::run(dst, src, outer);
}
};
/***************************
*** Linear vectorization ***
***************************/
template <bool IsAligned = false>
struct unaligned_assign_impl
{
template <typename Derived, typename OtherDerived>
static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {}
};
template <>
struct unaligned_assign_impl<false>
{
// MSVC must not inline this functions. If it does, it fails to optimize the
// packet access path.
#ifdef _MSC_VER
template <typename Derived, typename OtherDerived>
static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end)
#else
template <typename Derived, typename OtherDerived>
static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end)
#endif
{
for (typename Derived::Index index = start; index < end; ++index)
dst.copyCoeff(index, src);
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
const Index size = dst.size();
typedef packet_traits<typename Derived1::Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits<Derived1,Derived2>::DstIsAligned) ,
srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
const Index alignedStart = assign_traits<Derived1,Derived2>::DstIsAligned ? 0
: internal::first_aligned(&dst.coeffRef(0), size);
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
unaligned_assign_impl<assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
{
dst.template copyPacket<Derived2, dstAlignment, srcAlignment>(index, src);
}
unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
}
};
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, LinearVectorizedTraversal, CompleteUnrolling, Version>
{
typedef typename Derived1::Index Index;
static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
{
enum { size = Derived1::SizeAtCompileTime,
packetSize = packet_traits<typename Derived1::Scalar>::size,
alignedSize = (size/packetSize)*packetSize };
assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
}
};
/**************************
*** Slice vectorization ***
***************************/
template<typename Derived1, typename Derived2, int Version>
struct assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling, Version>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
{
typedef typename Derived1::Scalar Scalar;
typedef packet_traits<Scalar> PacketTraits;
enum {
packetSize = PacketTraits::size,
alignable = PacketTraits::AlignedOnScalar,
dstIsAligned = assign_traits<Derived1,Derived2>::DstIsAligned,
dstAlignment = alignable ? Aligned : int(dstIsAligned),
srcAlignment = assign_traits<Derived1,Derived2>::JointAlignment
};
const Scalar *dst_ptr = &dst.coeffRef(0,0);
if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)
{
// the pointer is not aligend-on scalar, so alignment is not possible
return assign_impl<Derived1,Derived2,DefaultTraversal,NoUnrolling>::run(dst, src);
}
const Index packetAlignedMask = packetSize - 1;
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize);
for(Index outer = 0; outer < outerSize; ++outer)
{
const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
// do the non-vectorizable part of the assignment
for(Index inner = 0; inner<alignedStart ; ++inner)
dst.copyCoeffByOuterInner(outer, inner, src);
// do the vectorizable part of the assignment
for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
dst.template copyPacketByOuterInner<Derived2, dstAlignment, Unaligned>(outer, inner, src);
// do the non-vectorizable part of the assignment
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
dst.copyCoeffByOuterInner(outer, inner, src);
alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
}
}
};
} // end namespace internal
/***************************************************************************
* Part 4 : implementation of DenseBase methods
***************************************************************************/
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived> EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
...@@ -499,90 +27,62 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived> ...@@ -499,90 +27,62 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
#ifdef EIGEN_DEBUG_ASSIGN
internal::assign_traits<Derived, OtherDerived>::debug();
#endif
eigen_assert(rows() == other.rows() && cols() == other.cols()); eigen_assert(rows() == other.rows() && cols() == other.cols());
internal::assign_impl<Derived, OtherDerived, int(SameType) ? int(internal::assign_traits<Derived, OtherDerived>::Traversal) internal::call_assignment_no_alias(derived(),other.derived());
: int(InvalidTraversal)>::run(derived(),other.derived());
#ifndef EIGEN_NO_DEBUG
checkTransposeAliasing(other.derived());
#endif
return derived(); return derived();
} }
namespace internal {
template<typename Derived, typename OtherDerived,
bool EvalBeforeAssigning = (int(internal::traits<OtherDerived>::Flags) & EvalBeforeAssigningBit) != 0,
bool NeedToTranspose = ((int(Derived::RowsAtCompileTime) == 1 && int(OtherDerived::ColsAtCompileTime) == 1)
| // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
// revert to || as soon as not needed anymore.
(int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1))
&& int(Derived::SizeAtCompileTime) != 1>
struct assign_selector;
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,false,false> {
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
template<typename ActualDerived, typename ActualOtherDerived>
static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,true,false> {
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,false,true> {
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
template<typename ActualDerived, typename ActualOtherDerived>
static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose<ActualDerived> dstTrans(dst); other.evalTo(dstTrans); return dst; }
};
template<typename Derived, typename OtherDerived>
struct assign_selector<Derived,OtherDerived,true,true> {
static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
};
} // end namespace internal
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other) EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
{ {
return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived()); internal::call_assignment(derived(), other.derived());
return derived();
} }
template<typename Derived> template<typename Derived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other) EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
{ {
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived()); internal::call_assignment(derived(), other.derived());
return derived();
} }
template<typename Derived> template<typename Derived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other) EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
{ {
return internal::assign_selector<Derived,Derived>::run(derived(), other.derived()); internal::call_assignment(derived(), other.derived());
return derived();
} }
template<typename Derived> template<typename Derived>
template <typename OtherDerived> template <typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other) EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
{ {
return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived()); internal::call_assignment(derived(), other.derived());
return derived();
} }
template<typename Derived> template<typename Derived>
template <typename OtherDerived> template <typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other) EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
{ {
return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived()); internal::call_assignment(derived(), other.derived());
return derived();
} }
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other) EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
{ {
return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived()); other.derived().evalTo(derived());
return derived();
} }
} // end namespace Eigen } // end namespace Eigen
......
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_ASSIGN_EVALUATOR_H
#define EIGEN_ASSIGN_EVALUATOR_H
namespace Eigen {
// This implementation is based on Assign.h
namespace internal {
/***************************************************************************
* Part 1 : the logic deciding a strategy for traversal and unrolling *
***************************************************************************/
// copy_using_evaluator_traits is based on assign_traits
template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
struct copy_using_evaluator_traits
{
typedef typename DstEvaluator::XprType Dst;
typedef typename Dst::Scalar DstScalar;
enum {
DstFlags = DstEvaluator::Flags,
SrcFlags = SrcEvaluator::Flags
};
public:
enum {
DstAlignment = DstEvaluator::Alignment,
SrcAlignment = SrcEvaluator::Alignment,
DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
};
private:
enum {
InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
: int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
: int(Dst::RowsAtCompileTime),
InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
: int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
: int(Dst::MaxRowsAtCompileTime),
OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
MaxSizeAtCompileTime = Dst::SizeAtCompileTime
};
// TODO distinguish between linear traversal and inner-traversals
typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
enum {
LinearPacketSize = unpacket_traits<LinearPacketType>::size,
InnerPacketSize = unpacket_traits<InnerPacketType>::size
};
public:
enum {
LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
};
private:
enum {
DstIsRowMajor = DstFlags&RowMajorBit,
SrcIsRowMajor = SrcFlags&RowMajorBit,
StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
MightVectorize = bool(StorageOrdersAgree)
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
&& bool(functor_traits<AssignFunc>::PacketAccess),
MayInnerVectorize = MightVectorize
&& int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
&& (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
&& (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
&& (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
/* slice vectorization can be slow, so we only want it if the slices are big, which is
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
in a fixed-size matrix
However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
};
public:
enum {
Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
: int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
: int(MayLinearize) ? int(LinearTraversal)
: int(DefaultTraversal),
Vectorized = int(Traversal) == InnerVectorizedTraversal
|| int(Traversal) == LinearVectorizedTraversal
|| int(Traversal) == SliceVectorizedTraversal
};
typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
private:
enum {
ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
: Vectorized ? InnerPacketSize
: 1,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
&& int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
MayUnrollInner = int(InnerSize) != Dynamic
&& int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
};
public:
enum {
Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
? (
int(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(MayUnrollInner) ? int(InnerUnrolling)
: int(NoUnrolling)
)
: int(Traversal) == int(LinearVectorizedTraversal)
? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
? int(CompleteUnrolling)
: int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(NoUnrolling) )
#if EIGEN_UNALIGNED_VECTORIZE
: int(Traversal) == int(SliceVectorizedTraversal)
? ( bool(MayUnrollInner) ? int(InnerUnrolling)
: int(NoUnrolling) )
#endif
: int(NoUnrolling)
};
#ifdef EIGEN_DEBUG_ASSIGN
static void debug()
{
std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
std::cerr.setf(std::ios::hex, std::ios::basefield);
std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
std::cerr.unsetf(std::ios::hex);
EIGEN_DEBUG_VAR(DstAlignment)
EIGEN_DEBUG_VAR(SrcAlignment)
EIGEN_DEBUG_VAR(LinearRequiredAlignment)
EIGEN_DEBUG_VAR(InnerRequiredAlignment)
EIGEN_DEBUG_VAR(JointAlignment)
EIGEN_DEBUG_VAR(InnerSize)
EIGEN_DEBUG_VAR(InnerMaxSize)
EIGEN_DEBUG_VAR(LinearPacketSize)
EIGEN_DEBUG_VAR(InnerPacketSize)
EIGEN_DEBUG_VAR(ActualPacketSize)
EIGEN_DEBUG_VAR(StorageOrdersAgree)
EIGEN_DEBUG_VAR(MightVectorize)
EIGEN_DEBUG_VAR(MayLinearize)
EIGEN_DEBUG_VAR(MayInnerVectorize)
EIGEN_DEBUG_VAR(MayLinearVectorize)
EIGEN_DEBUG_VAR(MaySliceVectorize)
std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
EIGEN_DEBUG_VAR(UnrollingLimit)
EIGEN_DEBUG_VAR(MayUnrollCompletely)
EIGEN_DEBUG_VAR(MayUnrollInner)
std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
std::cerr << std::endl;
}
#endif
};
/***************************************************************************
* Part 2 : meta-unrollers
***************************************************************************/
/************************
*** Default traversal ***
************************/
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
{
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime
};
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
kernel.assignCoeffByOuterInner(outer, inner);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
}
};
template<typename Kernel, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
};
template<typename Kernel, int Index_, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
{
kernel.assignCoeffByOuterInner(outer, Index_);
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
}
};
template<typename Kernel, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
};
/***********************
*** Linear traversal ***
***********************/
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
{
kernel.assignCoeff(Index);
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
}
};
template<typename Kernel, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
};
/**************************
*** Inner vectorization ***
**************************/
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling
{
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::PacketType PacketType;
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime,
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
DstAlignment = Kernel::AssignmentTraits::DstAlignment
};
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
enum { NextIndex = Index + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
}
};
template<typename Kernel, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
};
template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
struct copy_using_evaluator_innervec_InnerUnrolling
{
typedef typename Kernel::PacketType PacketType;
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
{
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
}
};
template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
};
/***************************************************************************
* Part 3 : implementation of all cases
***************************************************************************/
// dense_assignment_loop is based on assign_impl
template<typename Kernel,
int Traversal = Kernel::AssignmentTraits::Traversal,
int Unrolling = Kernel::AssignmentTraits::Unrolling>
struct dense_assignment_loop;
/************************
*** Default traversal ***
************************/
template<typename Kernel>
struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
{
EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
{
for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
kernel.assignCoeffByOuterInner(outer, inner);
}
}
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
const Index outerSize = kernel.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
}
};
/***************************
*** Linear vectorization ***
***************************/
// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
// of the non vectorizable beginning and ending parts
template <bool IsAligned = false>
struct unaligned_dense_assignment_loop
{
// if IsAligned = true, then do nothing
template <typename Kernel>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
};
template <>
struct unaligned_dense_assignment_loop<false>
{
// MSVC must not inline this functions. If it does, it fails to optimize the
// packet access path.
// FIXME check which version exhibits this issue
#if EIGEN_COMP_MSVC
template <typename Kernel>
static EIGEN_DONT_INLINE void run(Kernel &kernel,
Index start,
Index end)
#else
template <typename Kernel>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
Index start,
Index end)
#endif
{
for (Index index = start; index < end; ++index)
kernel.assignCoeff(index);
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
const Index size = kernel.size();
typedef typename Kernel::Scalar Scalar;
typedef typename Kernel::PacketType PacketType;
enum {
requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
packetSize = unpacket_traits<PacketType>::size,
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
: int(Kernel::AssignmentTraits::DstAlignment),
srcAlignment = Kernel::AssignmentTraits::JointAlignment
};
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::PacketType PacketType;
enum { size = DstXprType::SizeAtCompileTime,
packetSize =unpacket_traits<PacketType>::size,
alignedSize = (size/packetSize)*packetSize };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
}
};
/**************************
*** Inner vectorization ***
**************************/
template<typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
{
typedef typename Kernel::PacketType PacketType;
enum {
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
DstAlignment = Kernel::AssignmentTraits::DstAlignment
};
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
const Index innerSize = kernel.innerSize();
const Index outerSize = kernel.outerSize();
const Index packetSize = unpacket_traits<PacketType>::size;
for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; inner+=packetSize)
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::AssignmentTraits Traits;
const Index outerSize = kernel.outerSize();
for(Index outer = 0; outer < outerSize; ++outer)
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
}
};
/***********************
*** Linear traversal ***
***********************/
template<typename Kernel>
struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
const Index size = kernel.size();
for(Index i = 0; i < size; ++i)
kernel.assignCoeff(i);
}
};
template<typename Kernel>
struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
}
};
/**************************
*** Slice vectorization ***
***************************/
template<typename Kernel>
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::Scalar Scalar;
typedef typename Kernel::PacketType PacketType;
enum {
packetSize = unpacket_traits<PacketType>::size,
requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
dstAlignment = alignable ? int(requestedAlignment)
: int(Kernel::AssignmentTraits::DstAlignment)
};
const Scalar *dst_ptr = kernel.dstDataPtr();
if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
{
// the pointer is not aligend-on scalar, so alignment is not possible
return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
}
const Index packetAlignedMask = packetSize - 1;
const Index innerSize = kernel.innerSize();
const Index outerSize = kernel.outerSize();
const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
for(Index outer = 0; outer < outerSize; ++outer)
{
const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
// do the non-vectorizable part of the assignment
for(Index inner = 0; inner<alignedStart ; ++inner)
kernel.assignCoeffByOuterInner(outer, inner);
// do the vectorizable part of the assignment
for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
// do the non-vectorizable part of the assignment
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
kernel.assignCoeffByOuterInner(outer, inner);
alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
}
}
};
#if EIGEN_UNALIGNED_VECTORIZE
template<typename Kernel>
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::PacketType PacketType;
enum { size = DstXprType::InnerSizeAtCompileTime,
packetSize =unpacket_traits<PacketType>::size,
vectorizableSize = (size/packetSize)*packetSize };
for(Index outer = 0; outer < kernel.outerSize(); ++outer)
{
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
}
}
};
#endif
/***************************************************************************
* Part 4 : Generic dense assignment kernel
***************************************************************************/
// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
// to another dense writable evaluator.
// It is parametrized by the two evaluators, and the actual assignment functor.
// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
// One can customize the assignment using this generic dense_assignment_kernel with different
// functors, or by completely overloading it, by-passing a functor.
template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
class generic_dense_assignment_kernel
{
protected:
typedef typename DstEvaluatorTypeT::XprType DstXprType;
typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
public:
typedef DstEvaluatorTypeT DstEvaluatorType;
typedef SrcEvaluatorTypeT SrcEvaluatorType;
typedef typename DstEvaluatorType::Scalar Scalar;
typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
typedef typename AssignmentTraits::PacketType PacketType;
EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
: m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
{
#ifdef EIGEN_DEBUG_ASSIGN
AssignmentTraits::debug();
#endif
}
EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
/// Assign src(row,col) to dst(row,col) through the assignment functor.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
{
m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
}
/// \sa assignCoeff(Index,Index)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
{
m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
}
/// \sa assignCoeff(Index,Index)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
{
Index row = rowIndexByOuterInner(outer, inner);
Index col = colIndexByOuterInner(outer, inner);
assignCoeff(row, col);
}
template<int StoreMode, int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
{
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
}
template<int StoreMode, int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
{
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
}
template<int StoreMode, int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
{
Index row = rowIndexByOuterInner(outer, inner);
Index col = colIndexByOuterInner(outer, inner);
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
}
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
{
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::RowsAtCompileTime) == 1 ? 0
: int(Traits::ColsAtCompileTime) == 1 ? inner
: int(DstEvaluatorType::Flags)&RowMajorBit ? outer
: inner;
}
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
{
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::ColsAtCompileTime) == 1 ? 0
: int(Traits::RowsAtCompileTime) == 1 ? inner
: int(DstEvaluatorType::Flags)&RowMajorBit ? inner
: outer;
}
EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
{
return m_dstExpr.data();
}
protected:
DstEvaluatorType& m_dst;
const SrcEvaluatorType& m_src;
const Functor &m_functor;
// TODO find a way to avoid the needs of the original expression
DstXprType& m_dstExpr;
};
/***************************************************************************
* Part 5 : Entry point for dense rectangular assignment
***************************************************************************/
template<typename DstXprType,typename SrcXprType, typename Functor>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
{
EIGEN_ONLY_USED_FOR_DEBUG(dst);
EIGEN_ONLY_USED_FOR_DEBUG(src);
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
}
template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
dst.resize(dstRows, dstCols);
eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
}
template<typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
{
typedef evaluator<DstXprType> DstEvaluatorType;
typedef evaluator<SrcXprType> SrcEvaluatorType;
SrcEvaluatorType srcEvaluator(src);
// NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
// we need to resize the destination after the source evaluator has been created.
resize_if_allowed(dst, src, func);
DstEvaluatorType dstEvaluator(dst);
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
dense_assignment_loop<Kernel>::run(kernel);
}
template<typename DstXprType, typename SrcXprType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
{
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
}
/***************************************************************************
* Part 6 : Generic assignment
***************************************************************************/
// Based on the respective shapes of the destination and source,
// the class AssignmentKind determine the kind of assignment mechanism.
// AssignmentKind must define a Kind typedef.
template<typename DstShape, typename SrcShape> struct AssignmentKind;
// Assignement kind defined in this file:
struct Dense2Dense {};
struct EigenBase2EigenBase {};
template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
// This is the main assignment class
template< typename DstXprType, typename SrcXprType, typename Functor,
typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
typename EnableIf = void>
struct Assignment;
// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
// Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
// So this intermediate function removes everything related to "assume-aliasing" such that Assignment
// does not has to bother about these annoying details.
template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(Dst& dst, const Src& src)
{
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
}
template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(const Dst& dst, const Src& src)
{
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
}
// Deal with "assume-aliasing"
template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
{
typename plain_matrix_type<Src>::type tmp(src);
call_assignment_no_alias(dst, tmp, func);
}
template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
{
call_assignment_no_alias(dst, src, func);
}
// by-pass "assume-aliasing"
// When there is no aliasing, we require that 'dst' has been properly resized
template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
{
call_assignment_no_alias(dst.expression(), src, func);
}
template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
{
enum {
NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
|| (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
) && int(Dst::SizeAtCompileTime) != 1
};
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
ActualDstType actualDst(dst);
// TODO check whether this is the right place to perform these checks:
EIGEN_STATIC_ASSERT_LVALUE(Dst)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
}
template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias(Dst& dst, const Src& src)
{
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
}
template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
{
// TODO check whether this is the right place to perform these checks:
EIGEN_STATIC_ASSERT_LVALUE(Dst)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
Assignment<Dst,Src,Func>::run(dst, src, func);
}
template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
{
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
}
// forward declaration
template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
// Generic Dense to Dense assignment
// Note that the last template argument "Weak" is needed to make it possible to perform
// both partial specialization+SFINAE without ambiguous specialization
template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
#ifndef EIGEN_NO_DEBUG
internal::check_for_aliasing(dst, src);
#endif
call_dense_assignment_loop(dst, src, func);
}
};
// Generic assignment through evalTo.
// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
// Note that the last template argument "Weak" is needed to make it possible to perform
// both partial specialization+SFINAE without ambiguous specialization
template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
src.evalTo(dst);
}
// NOTE The following two functions are templated to avoid their instanciation if not needed
// This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
template<typename SrcScalarType>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
src.addTo(dst);
}
template<typename SrcScalarType>
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
src.subTo(dst);
}
};
} // namespace internal
} // end namespace Eigen
#endif // EIGEN_ASSIGN_EVALUATOR_H
/* /*
Copyright (c) 2011, Intel Corporation. All rights reserved. Copyright (c) 2011, Intel Corporation. All rights reserved.
Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
Redistribution and use in source and binary forms, with or without modification, Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met: are permitted provided that the following conditions are met:
...@@ -37,17 +38,13 @@ namespace Eigen { ...@@ -37,17 +38,13 @@ namespace Eigen {
namespace internal { namespace internal {
template<typename Op> struct vml_call template<typename Dst, typename Src>
{ enum { IsSupported = 0 }; };
template<typename Dst, typename Src, typename UnaryOp>
class vml_assign_traits class vml_assign_traits
{ {
private: private:
enum { enum {
DstHasDirectAccess = Dst::Flags & DirectAccessBit, DstHasDirectAccess = Dst::Flags & DirectAccessBit,
SrcHasDirectAccess = Src::Flags & DirectAccessBit, SrcHasDirectAccess = Src::Flags & DirectAccessBit,
StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
: int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
...@@ -57,165 +54,122 @@ class vml_assign_traits ...@@ -57,165 +54,122 @@ class vml_assign_traits
: int(Dst::MaxRowsAtCompileTime), : int(Dst::MaxRowsAtCompileTime),
MaxSizeAtCompileTime = Dst::SizeAtCompileTime, MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
&& Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD, LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD
MayEnableVml = MightEnableVml && LargeEnough,
MayLinearize = MayEnableVml && MightLinearize
}; };
public: public:
enum { enum {
Traversal = MayLinearize ? LinearVectorizedTraversal EnableVml = MightEnableVml && LargeEnough,
: MayEnableVml ? InnerVectorizedTraversal Traversal = MightLinearize ? LinearTraversal : DefaultTraversal
: DefaultTraversal
};
};
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
struct vml_assign_impl
: assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
{
}; };
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
{
typedef typename Derived1::Scalar Scalar;
typedef typename Derived1::Index Index;
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
{
// in case we want to (or have to) skip VML at runtime we can call:
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
const Index innerSize = dst.innerSize();
const Index outerSize = dst.outerSize();
for(Index outer = 0; outer < outerSize; ++outer) {
const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
&(src.nestedExpression().coeffRef(0, outer));
Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
}
}
}; };
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling> #define EIGEN_PP_EXPAND(ARG) ARG
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
{
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
{
// in case we want to (or have to) skip VML at runtime we can call:
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
}
};
// Macroses
#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
template<typename Derived1, typename Derived2, typename UnaryOp> \
struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
} \
};
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
#define EIGEN_MKL_VML_MODE VML_HA #define EIGEN_VMLMODE_EXPAND_LA , VML_HA
#else #else
#define EIGEN_MKL_VML_MODE VML_LA #define EIGEN_VMLMODE_EXPAND_LA , VML_LA
#endif #endif
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ #define EIGEN_VMLMODE_EXPAND__
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \ #define EIGEN_VMLMODE_PREFIX_LA vm
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \ #define EIGEN_VMLMODE_PREFIX__ v
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ #define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE)
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
template< typename DstXprType, typename SrcXprNested> \
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
resize_if_allowed(dst, src, func); \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
} else { \
const Index outerSize = dst.outerSize(); \
for(Index outer = 0; outer < outerSize; ++outer) { \
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
&(src.nestedExpression().coeffRef(0, outer)); \
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \
} \
} \
} \
}; \
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA)
// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
template< typename DstXprType, typename SrcXprNested, typename Plain> \
struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>, \
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
resize_if_allowed(dst, src, func); \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
{ \
VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent, \
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
} else { \
const Index outerSize = dst.outerSize(); \
for(Index outer = 0; outer < outerSize; ++outer) { \
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) : \
&(src.lhs().coeffRef(0, outer)); \
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \
} \ } \
};
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
} \ } \
};
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
enum { IsSupported = 1 }; \
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
EIGENTYPE exponent = func.m_exponent; \
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
(VMLTYPE*)dst, &vmlMode); \
} \ } \
}; };
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \ EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA)
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double) EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
// The vm*powx functions are not avaibale in the windows version of MKL.
#ifndef _WIN32
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
#endif
} // end namespace internal } // end namespace internal
......
...@@ -32,7 +32,7 @@ class BandMatrixBase : public EigenBase<Derived> ...@@ -32,7 +32,7 @@ class BandMatrixBase : public EigenBase<Derived>
}; };
typedef typename internal::traits<Derived>::Scalar Scalar; typedef typename internal::traits<Derived>::Scalar Scalar;
typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType; typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType;
typedef typename DenseMatrixType::Index Index; typedef typename DenseMatrixType::StorageIndex StorageIndex;
typedef typename internal::traits<Derived>::CoefficientsType CoefficientsType; typedef typename internal::traits<Derived>::CoefficientsType CoefficientsType;
typedef EigenBase<Derived> Base; typedef EigenBase<Derived> Base;
...@@ -161,12 +161,12 @@ class BandMatrixBase : public EigenBase<Derived> ...@@ -161,12 +161,12 @@ class BandMatrixBase : public EigenBase<Derived>
* *
* \brief Represents a rectangular matrix with a banded storage * \brief Represents a rectangular matrix with a banded storage
* *
* \param _Scalar Numeric type, i.e. float, double, int * \tparam _Scalar Numeric type, i.e. float, double, int
* \param Rows Number of rows, or \b Dynamic * \tparam _Rows Number of rows, or \b Dynamic
* \param Cols Number of columns, or \b Dynamic * \tparam _Cols Number of columns, or \b Dynamic
* \param Supers Number of super diagonal * \tparam _Supers Number of super diagonal
* \param Subs Number of sub diagonal * \tparam _Subs Number of sub diagonal
* \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
* The former controls \ref TopicStorageOrders "storage order", and defaults to * The former controls \ref TopicStorageOrders "storage order", and defaults to
* column-major. The latter controls whether the matrix represents a selfadjoint * column-major. The latter controls whether the matrix represents a selfadjoint
* matrix in which case either Supers of Subs have to be null. * matrix in which case either Supers of Subs have to be null.
...@@ -179,7 +179,7 @@ struct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> > ...@@ -179,7 +179,7 @@ struct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
{ {
typedef _Scalar Scalar; typedef _Scalar Scalar;
typedef Dense StorageKind; typedef Dense StorageKind;
typedef DenseIndex Index; typedef Eigen::Index StorageIndex;
enum { enum {
CoeffReadCost = NumTraits<Scalar>::ReadCost, CoeffReadCost = NumTraits<Scalar>::ReadCost,
RowsAtCompileTime = _Rows, RowsAtCompileTime = _Rows,
...@@ -201,10 +201,10 @@ class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Sub ...@@ -201,10 +201,10 @@ class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Sub
public: public:
typedef typename internal::traits<BandMatrix>::Scalar Scalar; typedef typename internal::traits<BandMatrix>::Scalar Scalar;
typedef typename internal::traits<BandMatrix>::Index Index; typedef typename internal::traits<BandMatrix>::StorageIndex StorageIndex;
typedef typename internal::traits<BandMatrix>::CoefficientsType CoefficientsType; typedef typename internal::traits<BandMatrix>::CoefficientsType CoefficientsType;
inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)
: m_coeffs(1+supers+subs,cols), : m_coeffs(1+supers+subs,cols),
m_rows(rows), m_supers(supers), m_subs(subs) m_rows(rows), m_supers(supers), m_subs(subs)
{ {
...@@ -241,7 +241,7 @@ struct traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Opt ...@@ -241,7 +241,7 @@ struct traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Opt
{ {
typedef typename _CoefficientsType::Scalar Scalar; typedef typename _CoefficientsType::Scalar Scalar;
typedef typename _CoefficientsType::StorageKind StorageKind; typedef typename _CoefficientsType::StorageKind StorageKind;
typedef typename _CoefficientsType::Index Index; typedef typename _CoefficientsType::StorageIndex StorageIndex;
enum { enum {
CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost, CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost,
RowsAtCompileTime = _Rows, RowsAtCompileTime = _Rows,
...@@ -264,9 +264,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT ...@@ -264,9 +264,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
typedef typename internal::traits<BandMatrixWrapper>::Scalar Scalar; typedef typename internal::traits<BandMatrixWrapper>::Scalar Scalar;
typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType; typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType;
typedef typename internal::traits<BandMatrixWrapper>::Index Index; typedef typename internal::traits<BandMatrixWrapper>::StorageIndex StorageIndex;
inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
: m_coeffs(coeffs), : m_coeffs(coeffs),
m_rows(rows), m_supers(supers), m_subs(subs) m_rows(rows), m_supers(supers), m_subs(subs)
{ {
...@@ -302,9 +302,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT ...@@ -302,9 +302,9 @@ class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsT
* *
* \brief Represents a tridiagonal matrix with a compact banded storage * \brief Represents a tridiagonal matrix with a compact banded storage
* *
* \param _Scalar Numeric type, i.e. float, double, int * \tparam Scalar Numeric type, i.e. float, double, int
* \param Size Number of rows and cols, or \b Dynamic * \tparam Size Number of rows and cols, or \b Dynamic
* \param _Options Can be 0 or \b SelfAdjoint * \tparam Options Can be 0 or \b SelfAdjoint
* *
* \sa class BandMatrix * \sa class BandMatrix
*/ */
...@@ -312,9 +312,9 @@ template<typename Scalar, int Size, int Options> ...@@ -312,9 +312,9 @@ template<typename Scalar, int Size, int Options>
class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor>
{ {
typedef BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> Base; typedef BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> Base;
typedef typename Base::Index Index; typedef typename Base::StorageIndex StorageIndex;
public: public:
TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {}
inline typename Base::template DiagonalIntReturnType<1>::Type super() inline typename Base::template DiagonalIntReturnType<1>::Type super()
{ return Base::template diagonal<1>(); } { return Base::template diagonal<1>(); }
...@@ -327,6 +327,25 @@ class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint ...@@ -327,6 +327,25 @@ class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint
protected: protected:
}; };
struct BandShape {};
template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
struct evaluator_traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
: public evaluator_traits_base<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
{
typedef BandShape Shape;
};
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
struct evaluator_traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
: public evaluator_traits_base<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
{
typedef BandShape Shape;
};
template<> struct AssignmentKind<DenseShape,BandShape> { typedef EigenBase2EigenBase Kind; };
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen
......
...@@ -13,38 +13,6 @@ ...@@ -13,38 +13,6 @@
namespace Eigen { namespace Eigen {
/** \class Block
* \ingroup Core_Module
*
* \brief Expression of a fixed-size or dynamic-size block
*
* \param XprType the type of the expression in which we are taking a block
* \param BlockRows the number of rows of the block we are taking at compile time (optional)
* \param BlockCols the number of columns of the block we are taking at compile time (optional)
*
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
* most of the time this is the only way it is used.
*
* However, if you want to directly maniputate block expressions,
* for instance if you want to write a function returning such an expression, you
* will need to use this class.
*
* Here is an example illustrating the dynamic case:
* \include class_Block.cpp
* Output: \verbinclude class_Block.out
*
* \note Even though this expression has dynamic size, in the case where \a XprType
* has fixed size, this expression inherits a fixed maximal size which means that evaluating
* it does not cause a dynamic memory allocation.
*
* Here is an example illustrating the fixed-size case:
* \include class_FixedBlock.cpp
* Output: \verbinclude class_FixedBlock.out
*
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
*/
namespace internal { namespace internal {
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType> struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
...@@ -52,7 +20,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp ...@@ -52,7 +20,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
typedef typename traits<XprType>::Scalar Scalar; typedef typename traits<XprType>::Scalar Scalar;
typedef typename traits<XprType>::StorageKind StorageKind; typedef typename traits<XprType>::StorageKind StorageKind;
typedef typename traits<XprType>::XprKind XprKind; typedef typename traits<XprType>::XprKind XprKind;
typedef typename nested<XprType>::type XprTypeNested; typedef typename ref_selector<XprType>::type XprTypeNested;
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
enum{ enum{
MatrixRows = traits<XprType>::RowsAtCompileTime, MatrixRows = traits<XprType>::RowsAtCompileTime,
...@@ -65,10 +33,10 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp ...@@ -65,10 +33,10 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
MaxColsAtCompileTime = BlockCols==0 ? 0 MaxColsAtCompileTime = BlockCols==0 ? 0
: ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
: int(traits<XprType>::MaxColsAtCompileTime), : int(traits<XprType>::MaxColsAtCompileTime),
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0, XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
IsDense = is_same<StorageKind,Dense>::value, IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
IsRowMajor = (IsDense&&MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
: (IsDense&&MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
: XprTypeIsRowMajor, : XprTypeIsRowMajor,
HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor), HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
...@@ -78,18 +46,16 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp ...@@ -78,18 +46,16 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
OuterStrideAtCompileTime = HasSameStorageOrderAsXprType OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(outer_stride_at_compile_time<XprType>::ret) ? int(outer_stride_at_compile_time<XprType>::ret)
: int(inner_stride_at_compile_time<XprType>::ret), : int(inner_stride_at_compile_time<XprType>::ret),
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1) // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
? PacketAccessBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (traits<XprType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0, FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) | Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
DirectAccessBit | // FIXME DirectAccessBit should not be handled by expressions
MaskPacketAccessBit | //
MaskAlignedBit), // Alignment is needed by MapBase's assertions
Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
Alignment = 0
}; };
}; };
...@@ -100,6 +66,40 @@ template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool In ...@@ -100,6 +66,40 @@ template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool In
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl; template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
/** \class Block
* \ingroup Core_Module
*
* \brief Expression of a fixed-size or dynamic-size block
*
* \tparam XprType the type of the expression in which we are taking a block
* \tparam BlockRows the number of rows of the block we are taking at compile time (optional)
* \tparam BlockCols the number of columns of the block we are taking at compile time (optional)
* \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or
* to set of columns of a column major matrix (optional). The parameter allows to determine
* at compile time whether aligned access is possible on the block expression.
*
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
* most of the time this is the only way it is used.
*
* However, if you want to directly maniputate block expressions,
* for instance if you want to write a function returning such an expression, you
* will need to use this class.
*
* Here is an example illustrating the dynamic case:
* \include class_Block.cpp
* Output: \verbinclude class_Block.out
*
* \note Even though this expression has dynamic size, in the case where \a XprType
* has fixed size, this expression inherits a fixed maximal size which means that evaluating
* it does not cause a dynamic memory allocation.
*
* Here is an example illustrating the fixed-size case:
* \include class_FixedBlock.cpp
* Output: \verbinclude class_FixedBlock.out
*
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
*/
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
: public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind> : public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
{ {
...@@ -110,8 +110,11 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class ...@@ -110,8 +110,11 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
EIGEN_GENERIC_PUBLIC_INTERFACE(Block) EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
typedef typename internal::remove_all<XprType>::type NestedExpression;
/** Column or Row constructor /** Column or Row constructor
*/ */
EIGEN_DEVICE_FUNC
inline Block(XprType& xpr, Index i) : Impl(xpr,i) inline Block(XprType& xpr, Index i) : Impl(xpr,i)
{ {
eigen_assert( (i>=0) && ( eigen_assert( (i>=0) && (
...@@ -121,25 +124,27 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class ...@@ -121,25 +124,27 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
/** Fixed-size constructor /** Fixed-size constructor
*/ */
inline Block(XprType& xpr, Index a_startRow, Index a_startCol) EIGEN_DEVICE_FUNC
: Impl(xpr, a_startRow, a_startCol) inline Block(XprType& xpr, Index startRow, Index startCol)
: Impl(xpr, startRow, startCol)
{ {
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE) EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
eigen_assert(a_startRow >= 0 && BlockRows >= 1 && a_startRow + BlockRows <= xpr.rows() eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows()
&& a_startCol >= 0 && BlockCols >= 1 && a_startCol + BlockCols <= xpr.cols()); && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols());
} }
/** Dynamic-size constructor /** Dynamic-size constructor
*/ */
EIGEN_DEVICE_FUNC
inline Block(XprType& xpr, inline Block(XprType& xpr,
Index a_startRow, Index a_startCol, Index startRow, Index startCol,
Index blockRows, Index blockCols) Index blockRows, Index blockCols)
: Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) : Impl(xpr, startRow, startCol, blockRows, blockCols)
{ {
eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows) eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
&& (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols)); && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow <= xpr.rows() - blockRows eigen_assert(startRow >= 0 && blockRows >= 0 && startRow <= xpr.rows() - blockRows
&& a_startCol >= 0 && blockCols >= 0 && a_startCol <= xpr.cols() - blockCols); && startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols);
} }
}; };
...@@ -150,14 +155,15 @@ class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense> ...@@ -150,14 +155,15 @@ class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
: public internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel> : public internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel>
{ {
typedef internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel> Impl; typedef internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel> Impl;
typedef typename XprType::Index Index; typedef typename XprType::StorageIndex StorageIndex;
public: public:
typedef Impl Base; typedef Impl Base;
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {} EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {} EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {}
inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol, Index blockRows, Index blockCols) EIGEN_DEVICE_FUNC
: Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) {} inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: Impl(xpr, startRow, startCol, blockRows, blockCols) {}
}; };
namespace internal { namespace internal {
...@@ -167,16 +173,18 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H ...@@ -167,16 +173,18 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
: public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type : public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type
{ {
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType; typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
public: public:
typedef typename internal::dense_xpr_base<BlockType>::type Base; typedef typename internal::dense_xpr_base<BlockType>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(BlockType) EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
class InnerIterator; // class InnerIterator; // FIXME apparently never used
/** Column or Row constructor /** Column or Row constructor
*/ */
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, Index i) inline BlockImpl_dense(XprType& xpr, Index i)
: m_xpr(xpr), : m_xpr(xpr),
// It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime, // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
...@@ -191,75 +199,76 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H ...@@ -191,75 +199,76 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
/** Fixed-size constructor /** Fixed-size constructor
*/ */
inline BlockImpl_dense(XprType& xpr, Index a_startRow, Index a_startCol) EIGEN_DEVICE_FUNC
: m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol), inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
: m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
m_blockRows(BlockRows), m_blockCols(BlockCols) m_blockRows(BlockRows), m_blockCols(BlockCols)
{} {}
/** Dynamic-size constructor /** Dynamic-size constructor
*/ */
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, inline BlockImpl_dense(XprType& xpr,
Index a_startRow, Index a_startCol, Index startRow, Index startCol,
Index blockRows, Index blockCols) Index blockRows, Index blockCols)
: m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol), : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
m_blockRows(blockRows), m_blockCols(blockCols) m_blockRows(blockRows), m_blockCols(blockCols)
{} {}
inline Index rows() const { return m_blockRows.value(); } EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
inline Index cols() const { return m_blockCols.value(); } EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId) inline Scalar& coeffRef(Index rowId, Index colId)
{ {
EIGEN_STATIC_ASSERT_LVALUE(XprType) EIGEN_STATIC_ASSERT_LVALUE(XprType)
return m_xpr.const_cast_derived() return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
} }
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const inline const Scalar& coeffRef(Index rowId, Index colId) const
{ {
return m_xpr.derived() return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
} }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const
{ {
return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value()); return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());
} }
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index) inline Scalar& coeffRef(Index index)
{ {
EIGEN_STATIC_ASSERT_LVALUE(XprType) EIGEN_STATIC_ASSERT_LVALUE(XprType)
return m_xpr.const_cast_derived() return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
} }
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const inline const Scalar& coeffRef(Index index) const
{ {
return m_xpr.const_cast_derived() return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
} }
EIGEN_DEVICE_FUNC
inline const CoeffReturnType coeff(Index index) const inline const CoeffReturnType coeff(Index index) const
{ {
return m_xpr return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
} }
template<int LoadMode> template<int LoadMode>
inline PacketScalar packet(Index rowId, Index colId) const inline PacketScalar packet(Index rowId, Index colId) const
{ {
return m_xpr.template packet<Unaligned> return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
(rowId + m_startRow.value(), colId + m_startCol.value());
} }
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val) inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{ {
m_xpr.const_cast_derived().template writePacket<Unaligned> m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
(rowId + m_startRow.value(), colId + m_startCol.value(), val);
} }
template<int LoadMode> template<int LoadMode>
...@@ -273,40 +282,46 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H ...@@ -273,40 +282,46 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val) inline void writePacket(Index index, const PacketScalar& val)
{ {
m_xpr.const_cast_derived().template writePacket<Unaligned> m_xpr.template writePacket<Unaligned>
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val); m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
} }
#ifdef EIGEN_PARSED_BY_DOXYGEN #ifdef EIGEN_PARSED_BY_DOXYGEN
/** \sa MapBase::data() */ /** \sa MapBase::data() */
inline const Scalar* data() const; EIGEN_DEVICE_FUNC inline const Scalar* data() const;
inline Index innerStride() const; EIGEN_DEVICE_FUNC inline Index innerStride() const;
inline Index outerStride() const; EIGEN_DEVICE_FUNC inline Index outerStride() const;
#endif #endif
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const EIGEN_DEVICE_FUNC
const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
{ {
return m_xpr; return m_xpr;
} }
Index startRow() const EIGEN_DEVICE_FUNC
XprType& nestedExpression() { return m_xpr; }
EIGEN_DEVICE_FUNC
StorageIndex startRow() const
{ {
return m_startRow.value(); return m_startRow.value();
} }
Index startCol() const EIGEN_DEVICE_FUNC
StorageIndex startCol() const
{ {
return m_startCol.value(); return m_startCol.value();
} }
protected: protected:
const typename XprType::Nested m_xpr; XprTypeNested m_xpr;
const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow; const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol; const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows; const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols; const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols;
}; };
/** \internal Internal implementation of dense Blocks in the direct access case.*/ /** \internal Internal implementation of dense Blocks in the direct access case.*/
...@@ -315,6 +330,10 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> ...@@ -315,6 +330,10 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
: public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> > : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
{ {
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType; typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
enum {
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
};
public: public:
typedef MapBase<BlockType> Base; typedef MapBase<BlockType> Base;
...@@ -323,42 +342,52 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> ...@@ -323,42 +342,52 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
/** Column or Row constructor /** Column or Row constructor
*/ */
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, Index i) inline BlockImpl_dense(XprType& xpr, Index i)
: Base(internal::const_cast_ptr(&xpr.coeffRef( : Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor))
(BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0, || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
(BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)),
BlockRows==1 ? 1 : xpr.rows(), BlockRows==1 ? 1 : xpr.rows(),
BlockCols==1 ? 1 : xpr.cols()), BlockCols==1 ? 1 : xpr.cols()),
m_xpr(xpr) m_xpr(xpr),
m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)
{ {
init(); init();
} }
/** Fixed-size constructor /** Fixed-size constructor
*/ */
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
: Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr) : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
{ {
init(); init();
} }
/** Dynamic-size constructor /** Dynamic-size constructor
*/ */
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, inline BlockImpl_dense(XprType& xpr,
Index startRow, Index startCol, Index startRow, Index startCol,
Index blockRows, Index blockCols) Index blockRows, Index blockCols)
: Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols), : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
m_xpr(xpr) m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
{ {
init(); init();
} }
const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const EIGEN_DEVICE_FUNC
const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
{ {
return m_xpr; return m_xpr;
} }
EIGEN_DEVICE_FUNC
XprType& nestedExpression() { return m_xpr; }
/** \sa MapBase::innerStride() */ /** \sa MapBase::innerStride() */
EIGEN_DEVICE_FUNC
inline Index innerStride() const inline Index innerStride() const
{ {
return internal::traits<BlockType>::HasSameStorageOrderAsXprType return internal::traits<BlockType>::HasSameStorageOrderAsXprType
...@@ -367,11 +396,24 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> ...@@ -367,11 +396,24 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
} }
/** \sa MapBase::outerStride() */ /** \sa MapBase::outerStride() */
EIGEN_DEVICE_FUNC
inline Index outerStride() const inline Index outerStride() const
{ {
return m_outerStride; return m_outerStride;
} }
EIGEN_DEVICE_FUNC
StorageIndex startRow() const
{
return m_startRow.value();
}
EIGEN_DEVICE_FUNC
StorageIndex startCol() const
{
return m_startCol.value();
}
#ifndef __SUNPRO_CC #ifndef __SUNPRO_CC
// FIXME sunstudio is not friendly with the above friend... // FIXME sunstudio is not friendly with the above friend...
// META-FIXME there is no 'friend' keyword around here. Is this obsolete? // META-FIXME there is no 'friend' keyword around here. Is this obsolete?
...@@ -380,6 +422,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> ...@@ -380,6 +422,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal used by allowAligned() */ /** \internal used by allowAligned() */
EIGEN_DEVICE_FUNC
inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols) inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
: Base(data, blockRows, blockCols), m_xpr(xpr) : Base(data, blockRows, blockCols), m_xpr(xpr)
{ {
...@@ -388,6 +431,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> ...@@ -388,6 +431,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
#endif #endif
protected: protected:
EIGEN_DEVICE_FUNC
void init() void init()
{ {
m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType
...@@ -395,7 +439,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true> ...@@ -395,7 +439,9 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
: m_xpr.innerStride(); : m_xpr.innerStride();
} }
typename XprType::Nested m_xpr; XprTypeNested m_xpr;
const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
Index m_outerStride; Index m_outerStride;
}; };
......
...@@ -17,9 +17,10 @@ namespace internal { ...@@ -17,9 +17,10 @@ namespace internal {
template<typename Derived, int UnrollCount> template<typename Derived, int UnrollCount>
struct all_unroller struct all_unroller
{ {
typedef typename Derived::ExpressionTraits Traits;
enum { enum {
col = (UnrollCount-1) / Derived::RowsAtCompileTime, col = (UnrollCount-1) / Traits::RowsAtCompileTime,
row = (UnrollCount-1) % Derived::RowsAtCompileTime row = (UnrollCount-1) % Traits::RowsAtCompileTime
}; };
static inline bool run(const Derived &mat) static inline bool run(const Derived &mat)
...@@ -43,9 +44,10 @@ struct all_unroller<Derived, Dynamic> ...@@ -43,9 +44,10 @@ struct all_unroller<Derived, Dynamic>
template<typename Derived, int UnrollCount> template<typename Derived, int UnrollCount>
struct any_unroller struct any_unroller
{ {
typedef typename Derived::ExpressionTraits Traits;
enum { enum {
col = (UnrollCount-1) / Derived::RowsAtCompileTime, col = (UnrollCount-1) / Traits::RowsAtCompileTime,
row = (UnrollCount-1) % Derived::RowsAtCompileTime row = (UnrollCount-1) % Traits::RowsAtCompileTime
}; };
static inline bool run(const Derived &mat) static inline bool run(const Derived &mat)
...@@ -78,19 +80,19 @@ struct any_unroller<Derived, Dynamic> ...@@ -78,19 +80,19 @@ struct any_unroller<Derived, Dynamic>
template<typename Derived> template<typename Derived>
inline bool DenseBase<Derived>::all() const inline bool DenseBase<Derived>::all() const
{ {
typedef internal::evaluator<Derived> Evaluator;
enum { enum {
unroll = SizeAtCompileTime != Dynamic unroll = SizeAtCompileTime != Dynamic
&& CoeffReadCost != Dynamic && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
&& NumTraits<Scalar>::AddCost != Dynamic
&& SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
}; };
Evaluator evaluator(derived());
if(unroll) if(unroll)
return internal::all_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived()); return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
else else
{ {
for(Index j = 0; j < cols(); ++j) for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i) for(Index i = 0; i < rows(); ++i)
if (!coeff(i, j)) return false; if (!evaluator.coeff(i, j)) return false;
return true; return true;
} }
} }
...@@ -102,19 +104,19 @@ inline bool DenseBase<Derived>::all() const ...@@ -102,19 +104,19 @@ inline bool DenseBase<Derived>::all() const
template<typename Derived> template<typename Derived>
inline bool DenseBase<Derived>::any() const inline bool DenseBase<Derived>::any() const
{ {
typedef internal::evaluator<Derived> Evaluator;
enum { enum {
unroll = SizeAtCompileTime != Dynamic unroll = SizeAtCompileTime != Dynamic
&& CoeffReadCost != Dynamic && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
&& NumTraits<Scalar>::AddCost != Dynamic
&& SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
}; };
Evaluator evaluator(derived());
if(unroll) if(unroll)
return internal::any_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived()); return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
else else
{ {
for(Index j = 0; j < cols(); ++j) for(Index j = 0; j < cols(); ++j)
for(Index i = 0; i < rows(); ++i) for(Index i = 0; i < rows(); ++i)
if (coeff(i, j)) return true; if (evaluator.coeff(i, j)) return true;
return false; return false;
} }
} }
...@@ -124,7 +126,7 @@ inline bool DenseBase<Derived>::any() const ...@@ -124,7 +126,7 @@ inline bool DenseBase<Derived>::any() const
* \sa all(), any() * \sa all(), any()
*/ */
template<typename Derived> template<typename Derived>
inline typename DenseBase<Derived>::Index DenseBase<Derived>::count() const inline Eigen::Index DenseBase<Derived>::count() const
{ {
return derived().template cast<bool>().template cast<Index>().sum(); return derived().template cast<bool>().template cast<Index>().sum();
} }
...@@ -136,7 +138,11 @@ inline typename DenseBase<Derived>::Index DenseBase<Derived>::count() const ...@@ -136,7 +138,11 @@ inline typename DenseBase<Derived>::Index DenseBase<Derived>::count() const
template<typename Derived> template<typename Derived>
inline bool DenseBase<Derived>::hasNaN() const inline bool DenseBase<Derived>::hasNaN() const
{ {
#if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
return derived().array().isNaN().any();
#else
return !((derived().array()==derived().array()).all()); return !((derived().array()==derived().array()).all());
#endif
} }
/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values. /** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values.
...@@ -146,7 +152,11 @@ inline bool DenseBase<Derived>::hasNaN() const ...@@ -146,7 +152,11 @@ inline bool DenseBase<Derived>::hasNaN() const
template<typename Derived> template<typename Derived>
inline bool DenseBase<Derived>::allFinite() const inline bool DenseBase<Derived>::allFinite() const
{ {
#if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
return derived().array().isFinite().all();
#else
return !((derived()-derived()).hasNaN()); return !((derived()-derived()).hasNaN());
#endif
} }
} // end namespace Eigen } // end namespace Eigen
......
FILE(GLOB Eigen_Core_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core COMPONENT Devel
)
ADD_SUBDIRECTORY(products)
ADD_SUBDIRECTORY(util)
ADD_SUBDIRECTORY(arch)
...@@ -22,14 +22,14 @@ namespace Eigen { ...@@ -22,14 +22,14 @@ namespace Eigen {
* the return type of MatrixBase::operator<<, and most of the time this is the only * the return type of MatrixBase::operator<<, and most of the time this is the only
* way it is used. * way it is used.
* *
* \sa \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished() * \sa \blank \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished()
*/ */
template<typename XprType> template<typename XprType>
struct CommaInitializer struct CommaInitializer
{ {
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::Index Index;
EIGEN_DEVICE_FUNC
inline CommaInitializer(XprType& xpr, const Scalar& s) inline CommaInitializer(XprType& xpr, const Scalar& s)
: m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1) : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1)
{ {
...@@ -37,6 +37,7 @@ struct CommaInitializer ...@@ -37,6 +37,7 @@ struct CommaInitializer
} }
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other) inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other)
: m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows()) : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows())
{ {
...@@ -46,6 +47,7 @@ struct CommaInitializer ...@@ -46,6 +47,7 @@ struct CommaInitializer
/* Copy/Move constructor which transfers ownership. This is crucial in /* Copy/Move constructor which transfers ownership. This is crucial in
* absence of return value optimization to avoid assertions during destruction. */ * absence of return value optimization to avoid assertions during destruction. */
// FIXME in C++11 mode this could be replaced by a proper RValue constructor // FIXME in C++11 mode this could be replaced by a proper RValue constructor
EIGEN_DEVICE_FUNC
inline CommaInitializer(const CommaInitializer& o) inline CommaInitializer(const CommaInitializer& o)
: m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) { : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {
// Mark original object as finished. In absence of R-value references we need to const_cast: // Mark original object as finished. In absence of R-value references we need to const_cast:
...@@ -55,6 +57,7 @@ struct CommaInitializer ...@@ -55,6 +57,7 @@ struct CommaInitializer
} }
/* inserts a scalar value in the target matrix */ /* inserts a scalar value in the target matrix */
EIGEN_DEVICE_FUNC
CommaInitializer& operator,(const Scalar& s) CommaInitializer& operator,(const Scalar& s)
{ {
if (m_col==m_xpr.cols()) if (m_col==m_xpr.cols())
...@@ -74,11 +77,10 @@ struct CommaInitializer ...@@ -74,11 +77,10 @@ struct CommaInitializer
/* inserts a matrix expression in the target matrix */ /* inserts a matrix expression in the target matrix */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
CommaInitializer& operator,(const DenseBase<OtherDerived>& other) CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
{ {
if(other.cols()==0 || other.rows()==0) if (m_col==m_xpr.cols() && (other.cols()!=0 || other.rows()!=m_currentBlockRows))
return *this;
if (m_col==m_xpr.cols())
{ {
m_row+=m_currentBlockRows; m_row+=m_currentBlockRows;
m_col = 0; m_col = 0;
...@@ -86,24 +88,22 @@ struct CommaInitializer ...@@ -86,24 +88,22 @@ struct CommaInitializer
eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows() eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows()
&& "Too many rows passed to comma initializer (operator<<)"); && "Too many rows passed to comma initializer (operator<<)");
} }
eigen_assert(m_col<m_xpr.cols() eigen_assert((m_col + other.cols() <= m_xpr.cols())
&& "Too many coefficients passed to comma initializer (operator<<)"); && "Too many coefficients passed to comma initializer (operator<<)");
eigen_assert(m_currentBlockRows==other.rows()); eigen_assert(m_currentBlockRows==other.rows());
if (OtherDerived::SizeAtCompileTime != Dynamic) m_xpr.template block<OtherDerived::RowsAtCompileTime, OtherDerived::ColsAtCompileTime>
m_xpr.template block<OtherDerived::RowsAtCompileTime != Dynamic ? OtherDerived::RowsAtCompileTime : 1, (m_row, m_col, other.rows(), other.cols()) = other;
OtherDerived::ColsAtCompileTime != Dynamic ? OtherDerived::ColsAtCompileTime : 1>
(m_row, m_col) = other;
else
m_xpr.block(m_row, m_col, other.rows(), other.cols()) = other;
m_col += other.cols(); m_col += other.cols();
return *this; return *this;
} }
EIGEN_DEVICE_FUNC
inline ~CommaInitializer() inline ~CommaInitializer()
#if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS
EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception)
#endif
{ {
eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows() finished();
&& m_col == m_xpr.cols()
&& "Too few coefficients passed to comma initializer (operator<<)");
} }
/** \returns the built matrix once all its coefficients have been set. /** \returns the built matrix once all its coefficients have been set.
...@@ -113,7 +113,13 @@ struct CommaInitializer ...@@ -113,7 +113,13 @@ struct CommaInitializer
* quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished()); * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished());
* \endcode * \endcode
*/ */
inline XprType& finished() { return m_xpr; } EIGEN_DEVICE_FUNC
inline XprType& finished() {
eigen_assert(((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0)
&& m_col == m_xpr.cols()
&& "Too few coefficients passed to comma initializer (operator<<)");
return m_xpr;
}
XprType& m_xpr; // target expression XprType& m_xpr; // target expression
Index m_row; // current row id Index m_row; // current row id
......
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com)
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_CONDITIONESTIMATOR_H
#define EIGEN_CONDITIONESTIMATOR_H
namespace Eigen {
namespace internal {
template <typename Vector, typename RealVector, bool IsComplex>
struct rcond_compute_sign {
static inline Vector run(const Vector& v) {
const RealVector v_abs = v.cwiseAbs();
return (v_abs.array() == static_cast<typename Vector::RealScalar>(0))
.select(Vector::Ones(v.size()), v.cwiseQuotient(v_abs));
}
};
// Partial specialization to avoid elementwise division for real vectors.
template <typename Vector>
struct rcond_compute_sign<Vector, Vector, false> {
static inline Vector run(const Vector& v) {
return (v.array() < static_cast<typename Vector::RealScalar>(0))
.select(-Vector::Ones(v.size()), Vector::Ones(v.size()));
}
};
/**
* \returns an estimate of ||inv(matrix)||_1 given a decomposition of
* \a matrix that implements .solve() and .adjoint().solve() methods.
*
* This function implements Algorithms 4.1 and 5.1 from
* http://www.maths.manchester.ac.uk/~higham/narep/narep135.pdf
* which also forms the basis for the condition number estimators in
* LAPACK. Since at most 10 calls to the solve method of dec are
* performed, the total cost is O(dims^2), as opposed to O(dims^3)
* needed to compute the inverse matrix explicitly.
*
* The most common usage is in estimating the condition number
* ||matrix||_1 * ||inv(matrix)||_1. The first term ||matrix||_1 can be
* computed directly in O(n^2) operations.
*
* Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, and
* LLT.
*
* \sa FullPivLU, PartialPivLU, LDLT, LLT.
*/
template <typename Decomposition>
typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomposition& dec)
{
typedef typename Decomposition::MatrixType MatrixType;
typedef typename Decomposition::Scalar Scalar;
typedef typename Decomposition::RealScalar RealScalar;
typedef typename internal::plain_col_type<MatrixType>::type Vector;
typedef typename internal::plain_col_type<MatrixType, RealScalar>::type RealVector;
const bool is_complex = (NumTraits<Scalar>::IsComplex != 0);
eigen_assert(dec.rows() == dec.cols());
const Index n = dec.rows();
if (n == 0)
return 0;
// Disable Index to float conversion warning
#ifdef __INTEL_COMPILER
#pragma warning push
#pragma warning ( disable : 2259 )
#endif
Vector v = dec.solve(Vector::Ones(n) / Scalar(n));
#ifdef __INTEL_COMPILER
#pragma warning pop
#endif
// lower_bound is a lower bound on
// ||inv(matrix)||_1 = sup_v ||inv(matrix) v||_1 / ||v||_1
// and is the objective maximized by the ("super-") gradient ascent
// algorithm below.
RealScalar lower_bound = v.template lpNorm<1>();
if (n == 1)
return lower_bound;
// Gradient ascent algorithm follows: We know that the optimum is achieved at
// one of the simplices v = e_i, so in each iteration we follow a
// super-gradient to move towards the optimal one.
RealScalar old_lower_bound = lower_bound;
Vector sign_vector(n);
Vector old_sign_vector;
Index v_max_abs_index = -1;
Index old_v_max_abs_index = v_max_abs_index;
for (int k = 0; k < 4; ++k)
{
sign_vector = internal::rcond_compute_sign<Vector, RealVector, is_complex>::run(v);
if (k > 0 && !is_complex && sign_vector == old_sign_vector) {
// Break if the solution stagnated.
break;
}
// v_max_abs_index = argmax |real( inv(matrix)^T * sign_vector )|
v = dec.adjoint().solve(sign_vector);
v.real().cwiseAbs().maxCoeff(&v_max_abs_index);
if (v_max_abs_index == old_v_max_abs_index) {
// Break if the solution stagnated.
break;
}
// Move to the new simplex e_j, where j = v_max_abs_index.
v = dec.solve(Vector::Unit(n, v_max_abs_index)); // v = inv(matrix) * e_j.
lower_bound = v.template lpNorm<1>();
if (lower_bound <= old_lower_bound) {
// Break if the gradient step did not increase the lower_bound.
break;
}
if (!is_complex) {
old_sign_vector = sign_vector;
}
old_v_max_abs_index = v_max_abs_index;
old_lower_bound = lower_bound;
}
// The following calculates an independent estimate of ||matrix||_1 by
// multiplying matrix by a vector with entries of slowly increasing
// magnitude and alternating sign:
// v_i = (-1)^{i} (1 + (i / (dim-1))), i = 0,...,dim-1.
// This improvement to Hager's algorithm above is due to Higham. It was
// added to make the algorithm more robust in certain corner cases where
// large elements in the matrix might otherwise escape detection due to
// exact cancellation (especially when op and op_adjoint correspond to a
// sequence of backsubstitutions and permutations), which could cause
// Hager's algorithm to vastly underestimate ||matrix||_1.
Scalar alternating_sign(RealScalar(1));
for (Index i = 0; i < n; ++i) {
// The static_cast is needed when Scalar is a complex and RealScalar implements expression templates
v[i] = alternating_sign * static_cast<RealScalar>(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1))));
alternating_sign = -alternating_sign;
}
v = dec.solve(v);
const RealScalar alternate_lower_bound = (2 * v.template lpNorm<1>()) / (3 * RealScalar(n));
return numext::maxi(lower_bound, alternate_lower_bound);
}
/** \brief Reciprocal condition number estimator.
*
* Computing a decomposition of a dense matrix takes O(n^3) operations, while
* this method estimates the condition number quickly and reliably in O(n^2)
* operations.
*
* \returns an estimate of the reciprocal condition number
* (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and
* its decomposition. Supports the following decompositions: FullPivLU,
* PartialPivLU, LDLT, and LLT.
*
* \sa FullPivLU, PartialPivLU, LDLT, LLT.
*/
template <typename Decomposition>
typename Decomposition::RealScalar
rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec)
{
typedef typename Decomposition::RealScalar RealScalar;
eigen_assert(dec.rows() == dec.cols());
if (dec.rows() == 0) return NumTraits<RealScalar>::infinity();
if (matrix_norm == RealScalar(0)) return RealScalar(0);
if (dec.rows() == 1) return RealScalar(1);
const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0)
: (RealScalar(1) / inverse_matrix_norm) / matrix_norm);
}
} // namespace internal
} // namespace Eigen
#endif