source: pacpussensors/trunk/Vislab/lib3dv/eigen/Eigen/src/Core/products/CoeffBasedProduct.h@ 136

Last change on this file since 136 was 136, checked in by ldecherf, 7 years ago

Doc

File size: 18.9 KB
Line 
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_COEFFBASED_PRODUCT_H
12#define EIGEN_COEFFBASED_PRODUCT_H
13
14namespace Eigen {
15
16namespace internal {
17
18/*********************************************************************************
19* Coefficient based product implementation.
20* It is designed for the following use cases:
21* - small fixed sizes
22* - lazy products
23*********************************************************************************/
24
25/* Since the all the dimensions of the product are small, here we can rely
26 * on the generic Assign mechanism to evaluate the product per coeff (or packet).
27 *
28 * Note that here the inner-loops should always be unrolled.
29 */
30
31template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
32struct product_coeff_impl;
33
34template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
35struct product_packet_impl;
36
37template<typename LhsNested, typename RhsNested, int NestingFlags>
38struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
39{
40 typedef MatrixXpr XprKind;
41 typedef typename remove_all<LhsNested>::type _LhsNested;
42 typedef typename remove_all<RhsNested>::type _RhsNested;
43 typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
44 typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
45 typename traits<_RhsNested>::StorageKind>::ret StorageKind;
46 typedef typename promote_index_type<typename traits<_LhsNested>::Index,
47 typename traits<_RhsNested>::Index>::type Index;
48
49 enum {
50 LhsCoeffReadCost = _LhsNested::CoeffReadCost,
51 RhsCoeffReadCost = _RhsNested::CoeffReadCost,
52 LhsFlags = _LhsNested::Flags,
53 RhsFlags = _RhsNested::Flags,
54
55 RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
56 ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
57 InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
58
59 MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
60 MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
61
62 LhsRowMajor = LhsFlags & RowMajorBit,
63 RhsRowMajor = RhsFlags & RowMajorBit,
64
65 SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
66
67 CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
68 && (ColsAtCompileTime == Dynamic
69 || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
70 && (RhsFlags&AlignedBit)
71 )
72 ),
73
74 CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
75 && (RowsAtCompileTime == Dynamic
76 || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
77 && (LhsFlags&AlignedBit)
78 )
79 ),
80
81 EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
82 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
83 : (RhsRowMajor && !CanVectorizeLhs),
84
85 Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
86 | (EvalToRowMajor ? RowMajorBit : 0)
87 | NestingFlags
88 | (LhsFlags & RhsFlags & AlignedBit)
89 // TODO enable vectorization for mixed types
90 | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
91
92 CoeffReadCost = InnerSize == Dynamic ? Dynamic
93 : InnerSize == 0 ? 0
94 : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
95 + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
96
97 /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
98 * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
99 * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
100 * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
101 */
102 CanVectorizeInner = SameType
103 && LhsRowMajor
104 && (!RhsRowMajor)
105 && (LhsFlags & RhsFlags & ActualPacketAccessBit)
106 && (LhsFlags & RhsFlags & AlignedBit)
107 && (InnerSize % packet_traits<Scalar>::size == 0)
108 };
109};
110
111} // end namespace internal
112
113template<typename LhsNested, typename RhsNested, int NestingFlags>
114class CoeffBasedProduct
115 : internal::no_assignment_operator,
116 public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
117{
118 public:
119
120 typedef MatrixBase<CoeffBasedProduct> Base;
121 EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
122 typedef typename Base::PlainObject PlainObject;
123
124 private:
125
126 typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
127 typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
128
129 enum {
130 PacketSize = internal::packet_traits<Scalar>::size,
131 InnerSize = internal::traits<CoeffBasedProduct>::InnerSize,
132 Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
133 CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
134 };
135
136 typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
137 Unroll ? InnerSize : Dynamic,
138 _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
139
140 typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
141
142 public:
143
144 inline CoeffBasedProduct(const CoeffBasedProduct& other)
145 : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
146 {}
147
148 template<typename Lhs, typename Rhs>
149 inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
150 : m_lhs(lhs), m_rhs(rhs)
151 {
152 // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
153 // We still allow to mix T and complex<T>.
154 EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
155 YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
156 eigen_assert(lhs.cols() == rhs.rows()
157 && "invalid matrix product"
158 && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
159 }
160
161 EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
162 EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
163
164 EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
165 {
166 Scalar res;
167 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
168 return res;
169 }
170
171 /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
172 * which is why we don't set the LinearAccessBit.
173 */
174 EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
175 {
176 Scalar res;
177 const Index row = RowsAtCompileTime == 1 ? 0 : index;
178 const Index col = RowsAtCompileTime == 1 ? index : 0;
179 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
180 return res;
181 }
182
183 template<int LoadMode>
184 EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
185 {
186 PacketScalar res;
187 internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
188 Unroll ? InnerSize : Dynamic,
189 _LhsNested, _RhsNested, PacketScalar, LoadMode>
190 ::run(row, col, m_lhs, m_rhs, res);
191 return res;
192 }
193
194 // Implicit conversion to the nested type (trigger the evaluation of the product)
195 EIGEN_STRONG_INLINE operator const PlainObject& () const
196 {
197 m_result.lazyAssign(*this);
198 return m_result;
199 }
200
201 const _LhsNested& lhs() const { return m_lhs; }
202 const _RhsNested& rhs() const { return m_rhs; }
203
204 const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
205 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
206
207 template<int DiagonalIndex>
208 const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
209 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
210
211 const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
212 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
213
214 protected:
215 typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
216 typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
217
218 mutable PlainObject m_result;
219};
220
221namespace internal {
222
223// here we need to overload the nested rule for products
224// such that the nested type is a const reference to a plain matrix
225template<typename Lhs, typename Rhs, int N, typename PlainObject>
226struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
227{
228 typedef PlainObject const& type;
229};
230
231/***************************************************************************
232* Normal product .coeff() implementation (with meta-unrolling)
233***************************************************************************/
234
235/**************************************
236*** Scalar path - no vectorization ***
237**************************************/
238
239template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
240struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
241{
242 typedef typename Lhs::Index Index;
243 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
244 {
245 product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
246 res += lhs.coeff(row, UnrollingIndex-1) * rhs.coeff(UnrollingIndex-1, col);
247 }
248};
249
250template<typename Lhs, typename Rhs, typename RetScalar>
251struct product_coeff_impl<DefaultTraversal, 1, Lhs, Rhs, RetScalar>
252{
253 typedef typename Lhs::Index Index;
254 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
255 {
256 res = lhs.coeff(row, 0) * rhs.coeff(0, col);
257 }
258};
259
260template<typename Lhs, typename Rhs, typename RetScalar>
261struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
262{
263 typedef typename Lhs::Index Index;
264 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res)
265 {
266 res = RetScalar(0);
267 }
268};
269
270template<typename Lhs, typename Rhs, typename RetScalar>
271struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
272{
273 typedef typename Lhs::Index Index;
274 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
275 {
276 res = (lhs.row(row).transpose().cwiseProduct( rhs.col(col) )).sum();
277 }
278};
279
280/*******************************************
281*** Scalar path with inner vectorization ***
282*******************************************/
283
284template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
285struct product_coeff_vectorized_unroller
286{
287 typedef typename Lhs::Index Index;
288 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
289 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
290 {
291 product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
292 pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
293 }
294};
295
296template<typename Lhs, typename Rhs, typename Packet>
297struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
298{
299 typedef typename Lhs::Index Index;
300 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
301 {
302 pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
303 }
304};
305
306template<typename Lhs, typename Rhs, typename RetScalar>
307struct product_coeff_impl<InnerVectorizedTraversal, 0, Lhs, Rhs, RetScalar>
308{
309 typedef typename Lhs::Index Index;
310 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res)
311 {
312 res = 0;
313 }
314};
315
316template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
317struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
318{
319 typedef typename Lhs::PacketScalar Packet;
320 typedef typename Lhs::Index Index;
321 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
322 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
323 {
324 Packet pres;
325 product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
326 res = predux(pres);
327 }
328};
329
330template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
331struct product_coeff_vectorized_dyn_selector
332{
333 typedef typename Lhs::Index Index;
334 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
335 {
336 res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
337 }
338};
339
340// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
341// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
342template<typename Lhs, typename Rhs, int RhsCols>
343struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
344{
345 typedef typename Lhs::Index Index;
346 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
347 {
348 res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
349 }
350};
351
352template<typename Lhs, typename Rhs, int LhsRows>
353struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
354{
355 typedef typename Lhs::Index Index;
356 static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
357 {
358 res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
359 }
360};
361
362template<typename Lhs, typename Rhs>
363struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
364{
365 typedef typename Lhs::Index Index;
366 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
367 {
368 res = lhs.transpose().cwiseProduct(rhs).sum();
369 }
370};
371
372template<typename Lhs, typename Rhs, typename RetScalar>
373struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
374{
375 typedef typename Lhs::Index Index;
376 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
377 {
378 product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
379 }
380};
381
382/*******************
383*** Packet path ***
384*******************/
385
386template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
387struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
388{
389 typedef typename Lhs::Index Index;
390 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
391 {
392 product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
393 res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet<LoadMode>(UnrollingIndex-1, col), res);
394 }
395};
396
397template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
398struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
399{
400 typedef typename Lhs::Index Index;
401 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
402 {
403 product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
404 res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex-1), pset1<Packet>(rhs.coeff(UnrollingIndex-1, col)), res);
405 }
406};
407
408template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
409struct product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
410{
411 typedef typename Lhs::Index Index;
412 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
413 {
414 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
415 }
416};
417
418template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
419struct product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
420{
421 typedef typename Lhs::Index Index;
422 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
423 {
424 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
425 }
426};
427
428template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
429struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
430{
431 typedef typename Lhs::Index Index;
432 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res)
433 {
434 res = pset1<Packet>(0);
435 }
436};
437
438template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
439struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
440{
441 typedef typename Lhs::Index Index;
442 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res)
443 {
444 res = pset1<Packet>(0);
445 }
446};
447
448template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
449struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
450{
451 typedef typename Lhs::Index Index;
452 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
453 {
454 res = pset1<Packet>(0);
455 for(Index i = 0; i < lhs.cols(); ++i)
456 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
457 }
458};
459
460template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
461struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
462{
463 typedef typename Lhs::Index Index;
464 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
465 {
466 res = pset1<Packet>(0);
467 for(Index i = 0; i < lhs.cols(); ++i)
468 res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
469 }
470};
471
472} // end namespace internal
473
474} // end namespace Eigen
475
476#endif // EIGEN_COEFFBASED_PRODUCT_H
Note: See TracBrowser for help on using the repository browser.