[136] | 1 | // This file is part of Eigen, a lightweight C++ template library
|
---|
| 2 | // for linear algebra.
|
---|
| 3 | //
|
---|
| 4 | // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
---|
| 5 | // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
---|
| 6 | //
|
---|
| 7 | // This Source Code Form is subject to the terms of the Mozilla
|
---|
| 8 | // Public License v. 2.0. If a copy of the MPL was not distributed
|
---|
| 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
---|
| 10 |
|
---|
| 11 | #ifndef EIGEN_COEFFBASED_PRODUCT_H
|
---|
| 12 | #define EIGEN_COEFFBASED_PRODUCT_H
|
---|
| 13 |
|
---|
| 14 | namespace Eigen {
|
---|
| 15 |
|
---|
| 16 | namespace internal {
|
---|
| 17 |
|
---|
| 18 | /*********************************************************************************
|
---|
| 19 | * Coefficient based product implementation.
|
---|
| 20 | * It is designed for the following use cases:
|
---|
| 21 | * - small fixed sizes
|
---|
| 22 | * - lazy products
|
---|
| 23 | *********************************************************************************/
|
---|
| 24 |
|
---|
| 25 | /* Since the all the dimensions of the product are small, here we can rely
|
---|
| 26 | * on the generic Assign mechanism to evaluate the product per coeff (or packet).
|
---|
| 27 | *
|
---|
| 28 | * Note that here the inner-loops should always be unrolled.
|
---|
| 29 | */
|
---|
| 30 |
|
---|
| 31 | template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
---|
| 32 | struct product_coeff_impl;
|
---|
| 33 |
|
---|
| 34 | template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
---|
| 35 | struct product_packet_impl;
|
---|
| 36 |
|
---|
| 37 | template<typename LhsNested, typename RhsNested, int NestingFlags>
|
---|
| 38 | struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
|
---|
| 39 | {
|
---|
| 40 | typedef MatrixXpr XprKind;
|
---|
| 41 | typedef typename remove_all<LhsNested>::type _LhsNested;
|
---|
| 42 | typedef typename remove_all<RhsNested>::type _RhsNested;
|
---|
| 43 | typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
|
---|
| 44 | typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
|
---|
| 45 | typename traits<_RhsNested>::StorageKind>::ret StorageKind;
|
---|
| 46 | typedef typename promote_index_type<typename traits<_LhsNested>::Index,
|
---|
| 47 | typename traits<_RhsNested>::Index>::type Index;
|
---|
| 48 |
|
---|
| 49 | enum {
|
---|
| 50 | LhsCoeffReadCost = _LhsNested::CoeffReadCost,
|
---|
| 51 | RhsCoeffReadCost = _RhsNested::CoeffReadCost,
|
---|
| 52 | LhsFlags = _LhsNested::Flags,
|
---|
| 53 | RhsFlags = _RhsNested::Flags,
|
---|
| 54 |
|
---|
| 55 | RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
|
---|
| 56 | ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
|
---|
| 57 | InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
|
---|
| 58 |
|
---|
| 59 | MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
|
---|
| 60 | MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
|
---|
| 61 |
|
---|
| 62 | LhsRowMajor = LhsFlags & RowMajorBit,
|
---|
| 63 | RhsRowMajor = RhsFlags & RowMajorBit,
|
---|
| 64 |
|
---|
| 65 | SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
|
---|
| 66 |
|
---|
| 67 | CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
|
---|
| 68 | && (ColsAtCompileTime == Dynamic
|
---|
| 69 | || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
|
---|
| 70 | && (RhsFlags&AlignedBit)
|
---|
| 71 | )
|
---|
| 72 | ),
|
---|
| 73 |
|
---|
| 74 | CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
|
---|
| 75 | && (RowsAtCompileTime == Dynamic
|
---|
| 76 | || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
|
---|
| 77 | && (LhsFlags&AlignedBit)
|
---|
| 78 | )
|
---|
| 79 | ),
|
---|
| 80 |
|
---|
| 81 | EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
|
---|
| 82 | : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
---|
| 83 | : (RhsRowMajor && !CanVectorizeLhs),
|
---|
| 84 |
|
---|
| 85 | Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
|
---|
| 86 | | (EvalToRowMajor ? RowMajorBit : 0)
|
---|
| 87 | | NestingFlags
|
---|
| 88 | | (LhsFlags & RhsFlags & AlignedBit)
|
---|
| 89 | // TODO enable vectorization for mixed types
|
---|
| 90 | | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
|
---|
| 91 |
|
---|
| 92 | CoeffReadCost = InnerSize == Dynamic ? Dynamic
|
---|
| 93 | : InnerSize == 0 ? 0
|
---|
| 94 | : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
|
---|
| 95 | + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
|
---|
| 96 |
|
---|
| 97 | /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
|
---|
| 98 | * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
|
---|
| 99 | * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
|
---|
| 100 | * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
|
---|
| 101 | */
|
---|
| 102 | CanVectorizeInner = SameType
|
---|
| 103 | && LhsRowMajor
|
---|
| 104 | && (!RhsRowMajor)
|
---|
| 105 | && (LhsFlags & RhsFlags & ActualPacketAccessBit)
|
---|
| 106 | && (LhsFlags & RhsFlags & AlignedBit)
|
---|
| 107 | && (InnerSize % packet_traits<Scalar>::size == 0)
|
---|
| 108 | };
|
---|
| 109 | };
|
---|
| 110 |
|
---|
| 111 | } // end namespace internal
|
---|
| 112 |
|
---|
| 113 | template<typename LhsNested, typename RhsNested, int NestingFlags>
|
---|
| 114 | class CoeffBasedProduct
|
---|
| 115 | : internal::no_assignment_operator,
|
---|
| 116 | public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
|
---|
| 117 | {
|
---|
| 118 | public:
|
---|
| 119 |
|
---|
| 120 | typedef MatrixBase<CoeffBasedProduct> Base;
|
---|
| 121 | EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
|
---|
| 122 | typedef typename Base::PlainObject PlainObject;
|
---|
| 123 |
|
---|
| 124 | private:
|
---|
| 125 |
|
---|
| 126 | typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
|
---|
| 127 | typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
|
---|
| 128 |
|
---|
| 129 | enum {
|
---|
| 130 | PacketSize = internal::packet_traits<Scalar>::size,
|
---|
| 131 | InnerSize = internal::traits<CoeffBasedProduct>::InnerSize,
|
---|
| 132 | Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
|
---|
| 133 | CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
|
---|
| 134 | };
|
---|
| 135 |
|
---|
| 136 | typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
|
---|
| 137 | Unroll ? InnerSize : Dynamic,
|
---|
| 138 | _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
|
---|
| 139 |
|
---|
| 140 | typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
|
---|
| 141 |
|
---|
| 142 | public:
|
---|
| 143 |
|
---|
| 144 | inline CoeffBasedProduct(const CoeffBasedProduct& other)
|
---|
| 145 | : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
|
---|
| 146 | {}
|
---|
| 147 |
|
---|
| 148 | template<typename Lhs, typename Rhs>
|
---|
| 149 | inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
|
---|
| 150 | : m_lhs(lhs), m_rhs(rhs)
|
---|
| 151 | {
|
---|
| 152 | // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
|
---|
| 153 | // We still allow to mix T and complex<T>.
|
---|
| 154 | EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
|
---|
| 155 | YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
---|
| 156 | eigen_assert(lhs.cols() == rhs.rows()
|
---|
| 157 | && "invalid matrix product"
|
---|
| 158 | && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
|
---|
| 159 | }
|
---|
| 160 |
|
---|
| 161 | EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
|
---|
| 162 | EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
|
---|
| 163 |
|
---|
| 164 | EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
|
---|
| 165 | {
|
---|
| 166 | Scalar res;
|
---|
| 167 | ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
|
---|
| 168 | return res;
|
---|
| 169 | }
|
---|
| 170 |
|
---|
| 171 | /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
|
---|
| 172 | * which is why we don't set the LinearAccessBit.
|
---|
| 173 | */
|
---|
| 174 | EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
|
---|
| 175 | {
|
---|
| 176 | Scalar res;
|
---|
| 177 | const Index row = RowsAtCompileTime == 1 ? 0 : index;
|
---|
| 178 | const Index col = RowsAtCompileTime == 1 ? index : 0;
|
---|
| 179 | ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
|
---|
| 180 | return res;
|
---|
| 181 | }
|
---|
| 182 |
|
---|
| 183 | template<int LoadMode>
|
---|
| 184 | EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
|
---|
| 185 | {
|
---|
| 186 | PacketScalar res;
|
---|
| 187 | internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
|
---|
| 188 | Unroll ? InnerSize : Dynamic,
|
---|
| 189 | _LhsNested, _RhsNested, PacketScalar, LoadMode>
|
---|
| 190 | ::run(row, col, m_lhs, m_rhs, res);
|
---|
| 191 | return res;
|
---|
| 192 | }
|
---|
| 193 |
|
---|
| 194 | // Implicit conversion to the nested type (trigger the evaluation of the product)
|
---|
| 195 | EIGEN_STRONG_INLINE operator const PlainObject& () const
|
---|
| 196 | {
|
---|
| 197 | m_result.lazyAssign(*this);
|
---|
| 198 | return m_result;
|
---|
| 199 | }
|
---|
| 200 |
|
---|
| 201 | const _LhsNested& lhs() const { return m_lhs; }
|
---|
| 202 | const _RhsNested& rhs() const { return m_rhs; }
|
---|
| 203 |
|
---|
| 204 | const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
|
---|
| 205 | { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
|
---|
| 206 |
|
---|
| 207 | template<int DiagonalIndex>
|
---|
| 208 | const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
|
---|
| 209 | { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
|
---|
| 210 |
|
---|
| 211 | const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
|
---|
| 212 | { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
|
---|
| 213 |
|
---|
| 214 | protected:
|
---|
| 215 | typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
|
---|
| 216 | typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
|
---|
| 217 |
|
---|
| 218 | mutable PlainObject m_result;
|
---|
| 219 | };
|
---|
| 220 |
|
---|
| 221 | namespace internal {
|
---|
| 222 |
|
---|
| 223 | // here we need to overload the nested rule for products
|
---|
| 224 | // such that the nested type is a const reference to a plain matrix
|
---|
| 225 | template<typename Lhs, typename Rhs, int N, typename PlainObject>
|
---|
| 226 | struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
|
---|
| 227 | {
|
---|
| 228 | typedef PlainObject const& type;
|
---|
| 229 | };
|
---|
| 230 |
|
---|
| 231 | /***************************************************************************
|
---|
| 232 | * Normal product .coeff() implementation (with meta-unrolling)
|
---|
| 233 | ***************************************************************************/
|
---|
| 234 |
|
---|
| 235 | /**************************************
|
---|
| 236 | *** Scalar path - no vectorization ***
|
---|
| 237 | **************************************/
|
---|
| 238 |
|
---|
| 239 | template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
---|
| 240 | struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
|
---|
| 241 | {
|
---|
| 242 | typedef typename Lhs::Index Index;
|
---|
| 243 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
---|
| 244 | {
|
---|
| 245 | product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
|
---|
| 246 | res += lhs.coeff(row, UnrollingIndex-1) * rhs.coeff(UnrollingIndex-1, col);
|
---|
| 247 | }
|
---|
| 248 | };
|
---|
| 249 |
|
---|
| 250 | template<typename Lhs, typename Rhs, typename RetScalar>
|
---|
| 251 | struct product_coeff_impl<DefaultTraversal, 1, Lhs, Rhs, RetScalar>
|
---|
| 252 | {
|
---|
| 253 | typedef typename Lhs::Index Index;
|
---|
| 254 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
---|
| 255 | {
|
---|
| 256 | res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
---|
| 257 | }
|
---|
| 258 | };
|
---|
| 259 |
|
---|
| 260 | template<typename Lhs, typename Rhs, typename RetScalar>
|
---|
| 261 | struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
|
---|
| 262 | {
|
---|
| 263 | typedef typename Lhs::Index Index;
|
---|
| 264 | static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res)
|
---|
| 265 | {
|
---|
| 266 | res = RetScalar(0);
|
---|
| 267 | }
|
---|
| 268 | };
|
---|
| 269 |
|
---|
| 270 | template<typename Lhs, typename Rhs, typename RetScalar>
|
---|
| 271 | struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
---|
| 272 | {
|
---|
| 273 | typedef typename Lhs::Index Index;
|
---|
| 274 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
|
---|
| 275 | {
|
---|
| 276 | res = (lhs.row(row).transpose().cwiseProduct( rhs.col(col) )).sum();
|
---|
| 277 | }
|
---|
| 278 | };
|
---|
| 279 |
|
---|
| 280 | /*******************************************
|
---|
| 281 | *** Scalar path with inner vectorization ***
|
---|
| 282 | *******************************************/
|
---|
| 283 |
|
---|
| 284 | template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
|
---|
| 285 | struct product_coeff_vectorized_unroller
|
---|
| 286 | {
|
---|
| 287 | typedef typename Lhs::Index Index;
|
---|
| 288 | enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
|
---|
| 289 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
|
---|
| 290 | {
|
---|
| 291 | product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
|
---|
| 292 | pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
|
---|
| 293 | }
|
---|
| 294 | };
|
---|
| 295 |
|
---|
| 296 | template<typename Lhs, typename Rhs, typename Packet>
|
---|
| 297 | struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
|
---|
| 298 | {
|
---|
| 299 | typedef typename Lhs::Index Index;
|
---|
| 300 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
|
---|
| 301 | {
|
---|
| 302 | pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
|
---|
| 303 | }
|
---|
| 304 | };
|
---|
| 305 |
|
---|
| 306 | template<typename Lhs, typename Rhs, typename RetScalar>
|
---|
| 307 | struct product_coeff_impl<InnerVectorizedTraversal, 0, Lhs, Rhs, RetScalar>
|
---|
| 308 | {
|
---|
| 309 | typedef typename Lhs::Index Index;
|
---|
| 310 | static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res)
|
---|
| 311 | {
|
---|
| 312 | res = 0;
|
---|
| 313 | }
|
---|
| 314 | };
|
---|
| 315 |
|
---|
| 316 | template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
---|
| 317 | struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
|
---|
| 318 | {
|
---|
| 319 | typedef typename Lhs::PacketScalar Packet;
|
---|
| 320 | typedef typename Lhs::Index Index;
|
---|
| 321 | enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
|
---|
| 322 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
|
---|
| 323 | {
|
---|
| 324 | Packet pres;
|
---|
| 325 | product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
|
---|
| 326 | res = predux(pres);
|
---|
| 327 | }
|
---|
| 328 | };
|
---|
| 329 |
|
---|
| 330 | template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
|
---|
| 331 | struct product_coeff_vectorized_dyn_selector
|
---|
| 332 | {
|
---|
| 333 | typedef typename Lhs::Index Index;
|
---|
| 334 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
---|
| 335 | {
|
---|
| 336 | res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
|
---|
| 337 | }
|
---|
| 338 | };
|
---|
| 339 |
|
---|
| 340 | // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
|
---|
| 341 | // NOTE maybe they are now useless since we have a specialization for Block<Matrix>
|
---|
| 342 | template<typename Lhs, typename Rhs, int RhsCols>
|
---|
| 343 | struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
|
---|
| 344 | {
|
---|
| 345 | typedef typename Lhs::Index Index;
|
---|
| 346 | static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
---|
| 347 | {
|
---|
| 348 | res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
|
---|
| 349 | }
|
---|
| 350 | };
|
---|
| 351 |
|
---|
| 352 | template<typename Lhs, typename Rhs, int LhsRows>
|
---|
| 353 | struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
|
---|
| 354 | {
|
---|
| 355 | typedef typename Lhs::Index Index;
|
---|
| 356 | static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
---|
| 357 | {
|
---|
| 358 | res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
|
---|
| 359 | }
|
---|
| 360 | };
|
---|
| 361 |
|
---|
| 362 | template<typename Lhs, typename Rhs>
|
---|
| 363 | struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
|
---|
| 364 | {
|
---|
| 365 | typedef typename Lhs::Index Index;
|
---|
| 366 | static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
---|
| 367 | {
|
---|
| 368 | res = lhs.transpose().cwiseProduct(rhs).sum();
|
---|
| 369 | }
|
---|
| 370 | };
|
---|
| 371 |
|
---|
| 372 | template<typename Lhs, typename Rhs, typename RetScalar>
|
---|
| 373 | struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
---|
| 374 | {
|
---|
| 375 | typedef typename Lhs::Index Index;
|
---|
| 376 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
|
---|
| 377 | {
|
---|
| 378 | product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
|
---|
| 379 | }
|
---|
| 380 | };
|
---|
| 381 |
|
---|
| 382 | /*******************
|
---|
| 383 | *** Packet path ***
|
---|
| 384 | *******************/
|
---|
| 385 |
|
---|
| 386 | template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
---|
| 387 | struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
---|
| 388 | {
|
---|
| 389 | typedef typename Lhs::Index Index;
|
---|
| 390 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
---|
| 391 | {
|
---|
| 392 | product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
|
---|
| 393 | res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode>(UnrollingIndex-1, col), res);
|
---|
| 394 | }
|
---|
| 395 | };
|
---|
| 396 |
|
---|
| 397 | template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
---|
| 398 | struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
---|
| 399 | {
|
---|
| 400 | typedef typename Lhs::Index Index;
|
---|
| 401 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
---|
| 402 | {
|
---|
| 403 | product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
|
---|
| 404 | res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
|
---|
| 405 | }
|
---|
| 406 | };
|
---|
| 407 |
|
---|
| 408 | template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
---|
| 409 | struct product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
---|
| 410 | {
|
---|
| 411 | typedef typename Lhs::Index Index;
|
---|
| 412 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
---|
| 413 | {
|
---|
| 414 | res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
---|
| 415 | }
|
---|
| 416 | };
|
---|
| 417 |
|
---|
| 418 | template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
---|
| 419 | struct product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
|
---|
| 420 | {
|
---|
| 421 | typedef typename Lhs::Index Index;
|
---|
| 422 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
|
---|
| 423 | {
|
---|
| 424 | res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
---|
| 425 | }
|
---|
| 426 | };
|
---|
| 427 |
|
---|
| 428 | template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
---|
| 429 | struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
---|
| 430 | {
|
---|
| 431 | typedef typename Lhs::Index Index;
|
---|
| 432 | static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res)
|
---|
| 433 | {
|
---|
| 434 | res = pset1<Packet>(0);
|
---|
| 435 | }
|
---|
| 436 | };
|
---|
| 437 |
|
---|
| 438 | template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
---|
| 439 | struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
---|
| 440 | {
|
---|
| 441 | typedef typename Lhs::Index Index;
|
---|
| 442 | static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res)
|
---|
| 443 | {
|
---|
| 444 | res = pset1<Packet>(0);
|
---|
| 445 | }
|
---|
| 446 | };
|
---|
| 447 |
|
---|
| 448 | template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
---|
| 449 | struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
---|
| 450 | {
|
---|
| 451 | typedef typename Lhs::Index Index;
|
---|
| 452 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
|
---|
| 453 | {
|
---|
| 454 | res = pset1<Packet>(0);
|
---|
| 455 | for(Index i = 0; i < lhs.cols(); ++i)
|
---|
| 456 | res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
|
---|
| 457 | }
|
---|
| 458 | };
|
---|
| 459 |
|
---|
| 460 | template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
---|
| 461 | struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
---|
| 462 | {
|
---|
| 463 | typedef typename Lhs::Index Index;
|
---|
| 464 | static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
|
---|
| 465 | {
|
---|
| 466 | res = pset1<Packet>(0);
|
---|
| 467 | for(Index i = 0; i < lhs.cols(); ++i)
|
---|
| 468 | res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
---|
| 469 | }
|
---|
| 470 | };
|
---|
| 471 |
|
---|
| 472 | } // end namespace internal
|
---|
| 473 |
|
---|
| 474 | } // end namespace Eigen
|
---|
| 475 |
|
---|
| 476 | #endif // EIGEN_COEFFBASED_PRODUCT_H
|
---|