Geometry_SSE.h 5.26 KB
Newer Older
LM's avatar
LM committed
1 2 3 4 5 6
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
Don Gagne's avatar
Don Gagne committed
7 8 9
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
LM's avatar
LM committed
10 11 12 13

#ifndef EIGEN_GEOMETRY_SSE_H
#define EIGEN_GEOMETRY_SSE_H

Don Gagne's avatar
Don Gagne committed
14 15
namespace Eigen { 

LM's avatar
LM committed
16 17 18
namespace internal {

template<class Derived, class OtherDerived>
19
struct quat_product<Architecture::SSE, Derived, OtherDerived, float>
LM's avatar
LM committed
20
{
21 22 23 24 25
  enum {
    AAlignment = traits<Derived>::Alignment,
    BAlignment = traits<OtherDerived>::Alignment,
    ResAlignment = traits<Quaternion<float> >::Alignment
  };
Don Gagne's avatar
Don Gagne committed
26
  static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
LM's avatar
LM committed
27 28
  {
    Quaternion<float> res;
29 30 31 32 33 34 35
    const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
    __m128 a = _a.coeffs().template packet<AAlignment>(0);
    __m128 b = _b.coeffs().template packet<BAlignment>(0);
    __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
    __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
    pstoret<float,Packet4f,ResAlignment>(
              &res.x(),
LM's avatar
LM committed
36 37 38
              _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)),
                                    _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0),
                                               vec4f_swizzle1(b,1,2,0,0))),
39 40
                         _mm_xor_ps(mask,_mm_add_ps(s1,s2))));
    
LM's avatar
LM committed
41 42 43 44
    return res;
  }
};

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
template<class Derived>
struct quat_conj<Architecture::SSE, Derived, float>
{
  enum {
    ResAlignment = traits<Quaternion<float> >::Alignment
  };
  static inline Quaternion<float> run(const QuaternionBase<Derived>& q)
  {
    Quaternion<float> res;
    const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f);
    pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
    return res;
  }
};


LM's avatar
LM committed
61 62 63
template<typename VectorLhs,typename VectorRhs>
struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
{
64 65 66
  enum {
    ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment
  };
Don Gagne's avatar
Don Gagne committed
67
  static inline typename plain_matrix_type<VectorLhs>::type
LM's avatar
LM committed
68 69
  run(const VectorLhs& lhs, const VectorRhs& rhs)
  {
70 71
    __m128 a = lhs.template packet<traits<VectorLhs>::Alignment>(0);
    __m128 b = rhs.template packet<traits<VectorRhs>::Alignment>(0);
LM's avatar
LM committed
72 73 74
    __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));
    __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));
    typename plain_matrix_type<VectorLhs>::type res;
75
    pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2));
LM's avatar
LM committed
76 77 78 79 80 81 82 83
    return res;
  }
};




template<class Derived, class OtherDerived>
84
struct quat_product<Architecture::SSE, Derived, OtherDerived, double>
LM's avatar
LM committed
85
{
86 87 88 89 90
  enum {
    BAlignment = traits<OtherDerived>::Alignment,
    ResAlignment = traits<Quaternion<double> >::Alignment
  };

Don Gagne's avatar
Don Gagne committed
91
  static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
LM's avatar
LM committed
92 93 94 95 96 97
  {
  const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));

  Quaternion<double> res;

  const double* a = _a.coeffs().data();
98 99
  Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0);
  Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2);
LM's avatar
LM committed
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
  Packet2d a_xx = pset1<Packet2d>(a[0]);
  Packet2d a_yy = pset1<Packet2d>(a[1]);
  Packet2d a_zz = pset1<Packet2d>(a[2]);
  Packet2d a_ww = pset1<Packet2d>(a[3]);

  // two temporaries:
  Packet2d t1, t2;

  /*
   * t1 = ww*xy + yy*zw
   * t2 = zz*xy - xx*zw
   * res.xy = t1 +/- swap(t2)
   */
  t1 = padd(pmul(a_ww, b_xy), pmul(a_yy, b_zw));
  t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw));
Don Gagne's avatar
Don Gagne committed
115
#ifdef EIGEN_VECTORIZE_SSE3
LM's avatar
LM committed
116
  EIGEN_UNUSED_VARIABLE(mask)
117
  pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
LM's avatar
LM committed
118
#else
119
  pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2))));
LM's avatar
LM committed
120 121 122 123 124 125 126 127 128
#endif
  
  /*
   * t1 = ww*zw - yy*xy
   * t2 = zz*zw + xx*xy
   * res.zw = t1 -/+ swap(t2) = swap( swap(t1) +/- t2)
   */
  t1 = psub(pmul(a_ww, b_zw), pmul(a_yy, b_xy));
  t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy));
Don Gagne's avatar
Don Gagne committed
129
#ifdef EIGEN_VECTORIZE_SSE3
LM's avatar
LM committed
130
  EIGEN_UNUSED_VARIABLE(mask)
131
  pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
LM's avatar
LM committed
132
#else
133
  pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2))));
LM's avatar
LM committed
134 135 136 137 138 139
#endif

  return res;
}
};

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
template<class Derived>
struct quat_conj<Architecture::SSE, Derived, double>
{
  enum {
    ResAlignment = traits<Quaternion<double> >::Alignment
  };
  static inline Quaternion<double> run(const QuaternionBase<Derived>& q)
  {
    Quaternion<double> res;
    const __m128d mask0 = _mm_setr_pd(-0.,-0.);
    const __m128d mask2 = _mm_setr_pd(-0.,0.);
    pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
    pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2)));
    return res;
  }
};

LM's avatar
LM committed
157 158
} // end namespace internal

Don Gagne's avatar
Don Gagne committed
159 160
} // end namespace Eigen

LM's avatar
LM committed
161
#endif // EIGEN_GEOMETRY_SSE_H