source: pacpussensors/trunk/Vislab/lib3dv/eigen/Eigen/src/Core/Redux.h@ 136

Last change on this file since 136 was 136, checked in by ldecherf, 7 years ago

Doc

File size: 13.8 KB
Line 
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_REDUX_H
12#define EIGEN_REDUX_H
13
14namespace Eigen {
15
16namespace internal {
17
18// TODO
19// * implement other kind of vectorization
20// * factorize code
21
22/***************************************************************************
23* Part 1 : the logic deciding a strategy for vectorization and unrolling
24***************************************************************************/
25
26template<typename Func, typename Derived>
27struct redux_traits
28{
29public:
30 enum {
31 PacketSize = packet_traits<typename Derived::Scalar>::size,
32 InnerMaxSize = int(Derived::IsRowMajor)
33 ? Derived::MaxColsAtCompileTime
34 : Derived::MaxRowsAtCompileTime
35 };
36
37 enum {
38 MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
39 && (functor_traits<Func>::PacketAccess),
40 MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
41 MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
42 };
43
44public:
45 enum {
46 Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
47 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
48 : int(DefaultTraversal)
49 };
50
51public:
52 enum {
53 Cost = ( Derived::SizeAtCompileTime == Dynamic
54 || Derived::CoeffReadCost == Dynamic
55 || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
56 ) ? Dynamic
57 : Derived::SizeAtCompileTime * Derived::CoeffReadCost
58 + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
59 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
60 };
61
62public:
63 enum {
64 Unrolling = Cost != Dynamic && Cost <= UnrollingLimit
65 ? CompleteUnrolling
66 : NoUnrolling
67 };
68};
69
70/***************************************************************************
71* Part 2 : unrollers
72***************************************************************************/
73
74/*** no vectorization ***/
75
76template<typename Func, typename Derived, int Start, int Length>
77struct redux_novec_unroller
78{
79 enum {
80 HalfLength = Length/2
81 };
82
83 typedef typename Derived::Scalar Scalar;
84
85 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
86 {
87 return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
88 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
89 }
90};
91
92template<typename Func, typename Derived, int Start>
93struct redux_novec_unroller<Func, Derived, Start, 1>
94{
95 enum {
96 outer = Start / Derived::InnerSizeAtCompileTime,
97 inner = Start % Derived::InnerSizeAtCompileTime
98 };
99
100 typedef typename Derived::Scalar Scalar;
101
102 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
103 {
104 return mat.coeffByOuterInner(outer, inner);
105 }
106};
107
108// This is actually dead code and will never be called. It is required
109// to prevent false warnings regarding failed inlining though
110// for 0 length run() will never be called at all.
111template<typename Func, typename Derived, int Start>
112struct redux_novec_unroller<Func, Derived, Start, 0>
113{
114 typedef typename Derived::Scalar Scalar;
115 static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
116};
117
118/*** vectorization ***/
119
120template<typename Func, typename Derived, int Start, int Length>
121struct redux_vec_unroller
122{
123 enum {
124 PacketSize = packet_traits<typename Derived::Scalar>::size,
125 HalfLength = Length/2
126 };
127
128 typedef typename Derived::Scalar Scalar;
129 typedef typename packet_traits<Scalar>::type PacketScalar;
130
131 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
132 {
133 return func.packetOp(
134 redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
135 redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
136 }
137};
138
139template<typename Func, typename Derived, int Start>
140struct redux_vec_unroller<Func, Derived, Start, 1>
141{
142 enum {
143 index = Start * packet_traits<typename Derived::Scalar>::size,
144 outer = index / int(Derived::InnerSizeAtCompileTime),
145 inner = index % int(Derived::InnerSizeAtCompileTime),
146 alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
147 };
148
149 typedef typename Derived::Scalar Scalar;
150 typedef typename packet_traits<Scalar>::type PacketScalar;
151
152 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
153 {
154 return mat.template packetByOuterInner<alignment>(outer, inner);
155 }
156};
157
158/***************************************************************************
159* Part 3 : implementation of all cases
160***************************************************************************/
161
162template<typename Func, typename Derived,
163 int Traversal = redux_traits<Func, Derived>::Traversal,
164 int Unrolling = redux_traits<Func, Derived>::Unrolling
165>
166struct redux_impl;
167
168template<typename Func, typename Derived>
169struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
170{
171 typedef typename Derived::Scalar Scalar;
172 typedef typename Derived::Index Index;
173 static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
174 {
175 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
176 Scalar res;
177 res = mat.coeffByOuterInner(0, 0);
178 for(Index i = 1; i < mat.innerSize(); ++i)
179 res = func(res, mat.coeffByOuterInner(0, i));
180 for(Index i = 1; i < mat.outerSize(); ++i)
181 for(Index j = 0; j < mat.innerSize(); ++j)
182 res = func(res, mat.coeffByOuterInner(i, j));
183 return res;
184 }
185};
186
187template<typename Func, typename Derived>
188struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
189 : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
190{};
191
192template<typename Func, typename Derived>
193struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
194{
195 typedef typename Derived::Scalar Scalar;
196 typedef typename packet_traits<Scalar>::type PacketScalar;
197 typedef typename Derived::Index Index;
198
199 static Scalar run(const Derived& mat, const Func& func)
200 {
201 const Index size = mat.size();
202 eigen_assert(size && "you are using an empty matrix");
203 const Index packetSize = packet_traits<Scalar>::size;
204 const Index alignedStart = internal::first_aligned(mat);
205 enum {
206 alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
207 ? Aligned : Unaligned
208 };
209 const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
210 const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
211 const Index alignedEnd2 = alignedStart + alignedSize2;
212 const Index alignedEnd = alignedStart + alignedSize;
213 Scalar res;
214 if(alignedSize)
215 {
216 PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
217 if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
218 {
219 PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
220 for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
221 {
222 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
223 packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
224 }
225
226 packet_res0 = func.packetOp(packet_res0,packet_res1);
227 if(alignedEnd>alignedEnd2)
228 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
229 }
230 res = func.predux(packet_res0);
231
232 for(Index index = 0; index < alignedStart; ++index)
233 res = func(res,mat.coeff(index));
234
235 for(Index index = alignedEnd; index < size; ++index)
236 res = func(res,mat.coeff(index));
237 }
238 else // too small to vectorize anything.
239 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
240 {
241 res = mat.coeff(0);
242 for(Index index = 1; index < size; ++index)
243 res = func(res,mat.coeff(index));
244 }
245
246 return res;
247 }
248};
249
250// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
251template<typename Func, typename Derived, int Unrolling>
252struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
253{
254 typedef typename Derived::Scalar Scalar;
255 typedef typename packet_traits<Scalar>::type PacketScalar;
256 typedef typename Derived::Index Index;
257
258 static Scalar run(const Derived& mat, const Func& func)
259 {
260 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
261 const Index innerSize = mat.innerSize();
262 const Index outerSize = mat.outerSize();
263 enum {
264 packetSize = packet_traits<Scalar>::size
265 };
266 const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
267 Scalar res;
268 if(packetedInnerSize)
269 {
270 PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
271 for(Index j=0; j<outerSize; ++j)
272 for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
273 packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
274
275 res = func.predux(packet_res);
276 for(Index j=0; j<outerSize; ++j)
277 for(Index i=packetedInnerSize; i<innerSize; ++i)
278 res = func(res, mat.coeffByOuterInner(j,i));
279 }
280 else // too small to vectorize anything.
281 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
282 {
283 res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
284 }
285
286 return res;
287 }
288};
289
290template<typename Func, typename Derived>
291struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
292{
293 typedef typename Derived::Scalar Scalar;
294 typedef typename packet_traits<Scalar>::type PacketScalar;
295 enum {
296 PacketSize = packet_traits<Scalar>::size,
297 Size = Derived::SizeAtCompileTime,
298 VectorizedSize = (Size / PacketSize) * PacketSize
299 };
300 static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
301 {
302 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
303 Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
304 if (VectorizedSize != Size)
305 res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
306 return res;
307 }
308};
309
310} // end namespace internal
311
312/***************************************************************************
313* Part 4 : public API
314***************************************************************************/
315
316
317/** \returns the result of a full redux operation on the whole matrix or vector using \a func
318 *
319 * The template parameter \a BinaryOp is the type of the functor \a func which must be
320 * an associative operator. Both current STL and TR1 functor styles are handled.
321 *
322 * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
323 */
324template<typename Derived>
325template<typename Func>
326EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
327DenseBase<Derived>::redux(const Func& func) const
328{
329 typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
330 return internal::redux_impl<Func, ThisNested>
331 ::run(derived(), func);
332}
333
334/** \returns the minimum of all coefficients of \c *this.
335 * \warning the result is undefined if \c *this contains NaN.
336 */
337template<typename Derived>
338EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
339DenseBase<Derived>::minCoeff() const
340{
341 return this->redux(Eigen::internal::scalar_min_op<Scalar>());
342}
343
344/** \returns the maximum of all coefficients of \c *this.
345 * \warning the result is undefined if \c *this contains NaN.
346 */
347template<typename Derived>
348EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
349DenseBase<Derived>::maxCoeff() const
350{
351 return this->redux(Eigen::internal::scalar_max_op<Scalar>());
352}
353
354/** \returns the sum of all coefficients of *this
355 *
356 * \sa trace(), prod(), mean()
357 */
358template<typename Derived>
359EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
360DenseBase<Derived>::sum() const
361{
362 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
363 return Scalar(0);
364 return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
365}
366
367/** \returns the mean of all coefficients of *this
368*
369* \sa trace(), prod(), sum()
370*/
371template<typename Derived>
372EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
373DenseBase<Derived>::mean() const
374{
375 return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
376}
377
378/** \returns the product of all coefficients of *this
379 *
380 * Example: \include MatrixBase_prod.cpp
381 * Output: \verbinclude MatrixBase_prod.out
382 *
383 * \sa sum(), mean(), trace()
384 */
385template<typename Derived>
386EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
387DenseBase<Derived>::prod() const
388{
389 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
390 return Scalar(1);
391 return this->redux(Eigen::internal::scalar_product_op<Scalar>());
392}
393
394/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
395 *
396 * \c *this can be any matrix, not necessarily square.
397 *
398 * \sa diagonal(), sum()
399 */
400template<typename Derived>
401EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
402MatrixBase<Derived>::trace() const
403{
404 return derived().diagonal().sum();
405}
406
407} // end namespace Eigen
408
409#endif // EIGEN_REDUX_H
Note: See TracBrowser for help on using the repository browser.