1 | // This file is part of Eigen, a lightweight C++ template library
|
---|
2 | // for linear algebra.
|
---|
3 | //
|
---|
4 | // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
---|
5 | // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
---|
6 | //
|
---|
7 | // This Source Code Form is subject to the terms of the Mozilla
|
---|
8 | // Public License v. 2.0. If a copy of the MPL was not distributed
|
---|
9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
---|
10 |
|
---|
11 | #ifndef EIGEN_GENERIC_PACKET_MATH_H
|
---|
12 | #define EIGEN_GENERIC_PACKET_MATH_H
|
---|
13 |
|
---|
14 | namespace Eigen {
|
---|
15 |
|
---|
16 | namespace internal {
|
---|
17 |
|
---|
18 | /** \internal
|
---|
19 | * \file GenericPacketMath.h
|
---|
20 | *
|
---|
21 | * Default implementation for types not supported by the vectorization.
|
---|
22 | * In practice these functions are provided to make easier the writing
|
---|
23 | * of generic vectorized code.
|
---|
24 | */
|
---|
25 |
|
---|
26 | #ifndef EIGEN_DEBUG_ALIGNED_LOAD
|
---|
27 | #define EIGEN_DEBUG_ALIGNED_LOAD
|
---|
28 | #endif
|
---|
29 |
|
---|
30 | #ifndef EIGEN_DEBUG_UNALIGNED_LOAD
|
---|
31 | #define EIGEN_DEBUG_UNALIGNED_LOAD
|
---|
32 | #endif
|
---|
33 |
|
---|
34 | #ifndef EIGEN_DEBUG_ALIGNED_STORE
|
---|
35 | #define EIGEN_DEBUG_ALIGNED_STORE
|
---|
36 | #endif
|
---|
37 |
|
---|
38 | #ifndef EIGEN_DEBUG_UNALIGNED_STORE
|
---|
39 | #define EIGEN_DEBUG_UNALIGNED_STORE
|
---|
40 | #endif
|
---|
41 |
|
---|
42 | struct default_packet_traits
|
---|
43 | {
|
---|
44 | enum {
|
---|
45 | HasAdd = 1,
|
---|
46 | HasSub = 1,
|
---|
47 | HasMul = 1,
|
---|
48 | HasNegate = 1,
|
---|
49 | HasAbs = 1,
|
---|
50 | HasAbs2 = 1,
|
---|
51 | HasMin = 1,
|
---|
52 | HasMax = 1,
|
---|
53 | HasConj = 1,
|
---|
54 | HasSetLinear = 1,
|
---|
55 |
|
---|
56 | HasDiv = 0,
|
---|
57 | HasSqrt = 0,
|
---|
58 | HasExp = 0,
|
---|
59 | HasLog = 0,
|
---|
60 | HasPow = 0,
|
---|
61 |
|
---|
62 | HasSin = 0,
|
---|
63 | HasCos = 0,
|
---|
64 | HasTan = 0,
|
---|
65 | HasASin = 0,
|
---|
66 | HasACos = 0,
|
---|
67 | HasATan = 0
|
---|
68 | };
|
---|
69 | };
|
---|
70 |
|
---|
71 | template<typename T> struct packet_traits : default_packet_traits
|
---|
72 | {
|
---|
73 | typedef T type;
|
---|
74 | enum {
|
---|
75 | Vectorizable = 0,
|
---|
76 | size = 1,
|
---|
77 | AlignedOnScalar = 0
|
---|
78 | };
|
---|
79 | enum {
|
---|
80 | HasAdd = 0,
|
---|
81 | HasSub = 0,
|
---|
82 | HasMul = 0,
|
---|
83 | HasNegate = 0,
|
---|
84 | HasAbs = 0,
|
---|
85 | HasAbs2 = 0,
|
---|
86 | HasMin = 0,
|
---|
87 | HasMax = 0,
|
---|
88 | HasConj = 0,
|
---|
89 | HasSetLinear = 0
|
---|
90 | };
|
---|
91 | };
|
---|
92 |
|
---|
93 | /** \internal \returns a + b (coeff-wise) */
|
---|
94 | template<typename Packet> inline Packet
|
---|
95 | padd(const Packet& a,
|
---|
96 | const Packet& b) { return a+b; }
|
---|
97 |
|
---|
98 | /** \internal \returns a - b (coeff-wise) */
|
---|
99 | template<typename Packet> inline Packet
|
---|
100 | psub(const Packet& a,
|
---|
101 | const Packet& b) { return a-b; }
|
---|
102 |
|
---|
103 | /** \internal \returns -a (coeff-wise) */
|
---|
104 | template<typename Packet> inline Packet
|
---|
105 | pnegate(const Packet& a) { return -a; }
|
---|
106 |
|
---|
107 | /** \internal \returns conj(a) (coeff-wise) */
|
---|
108 | template<typename Packet> inline Packet
|
---|
109 | pconj(const Packet& a) { return numext::conj(a); }
|
---|
110 |
|
---|
111 | /** \internal \returns a * b (coeff-wise) */
|
---|
112 | template<typename Packet> inline Packet
|
---|
113 | pmul(const Packet& a,
|
---|
114 | const Packet& b) { return a*b; }
|
---|
115 |
|
---|
116 | /** \internal \returns a / b (coeff-wise) */
|
---|
117 | template<typename Packet> inline Packet
|
---|
118 | pdiv(const Packet& a,
|
---|
119 | const Packet& b) { return a/b; }
|
---|
120 |
|
---|
121 | /** \internal \returns the min of \a a and \a b (coeff-wise) */
|
---|
122 | template<typename Packet> inline Packet
|
---|
123 | pmin(const Packet& a,
|
---|
124 | const Packet& b) { using std::min; return (min)(a, b); }
|
---|
125 |
|
---|
126 | /** \internal \returns the max of \a a and \a b (coeff-wise) */
|
---|
127 | template<typename Packet> inline Packet
|
---|
128 | pmax(const Packet& a,
|
---|
129 | const Packet& b) { using std::max; return (max)(a, b); }
|
---|
130 |
|
---|
131 | /** \internal \returns the absolute value of \a a */
|
---|
132 | template<typename Packet> inline Packet
|
---|
133 | pabs(const Packet& a) { using std::abs; return abs(a); }
|
---|
134 |
|
---|
135 | /** \internal \returns the bitwise and of \a a and \a b */
|
---|
136 | template<typename Packet> inline Packet
|
---|
137 | pand(const Packet& a, const Packet& b) { return a & b; }
|
---|
138 |
|
---|
139 | /** \internal \returns the bitwise or of \a a and \a b */
|
---|
140 | template<typename Packet> inline Packet
|
---|
141 | por(const Packet& a, const Packet& b) { return a | b; }
|
---|
142 |
|
---|
143 | /** \internal \returns the bitwise xor of \a a and \a b */
|
---|
144 | template<typename Packet> inline Packet
|
---|
145 | pxor(const Packet& a, const Packet& b) { return a ^ b; }
|
---|
146 |
|
---|
147 | /** \internal \returns the bitwise andnot of \a a and \a b */
|
---|
148 | template<typename Packet> inline Packet
|
---|
149 | pandnot(const Packet& a, const Packet& b) { return a & (!b); }
|
---|
150 |
|
---|
151 | /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
|
---|
152 | template<typename Packet> inline Packet
|
---|
153 | pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
---|
154 |
|
---|
155 | /** \internal \returns a packet version of \a *from, (un-aligned load) */
|
---|
156 | template<typename Packet> inline Packet
|
---|
157 | ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
---|
158 |
|
---|
159 | /** \internal \returns a packet with elements of \a *from duplicated.
|
---|
160 | * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and
|
---|
161 | * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
|
---|
162 | * Currently, this function is only used for scalar * complex products.
|
---|
163 | */
|
---|
164 | template<typename Packet> inline Packet
|
---|
165 | ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
---|
166 |
|
---|
167 | /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
|
---|
168 | template<typename Packet> inline Packet
|
---|
169 | pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
|
---|
170 |
|
---|
171 | /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
|
---|
172 | template<typename Scalar> inline typename packet_traits<Scalar>::type
|
---|
173 | plset(const Scalar& a) { return a; }
|
---|
174 |
|
---|
175 | /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
|
---|
176 | template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
|
---|
177 | { (*to) = from; }
|
---|
178 |
|
---|
179 | /** \internal copy the packet \a from to \a *to, (un-aligned store) */
|
---|
180 | template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
|
---|
181 | { (*to) = from; }
|
---|
182 |
|
---|
183 | /** \internal tries to do cache prefetching of \a addr */
|
---|
184 | template<typename Scalar> inline void prefetch(const Scalar* addr)
|
---|
185 | {
|
---|
186 | #if !defined(_MSC_VER)
|
---|
187 | __builtin_prefetch(addr);
|
---|
188 | #endif
|
---|
189 | }
|
---|
190 |
|
---|
191 | /** \internal \returns the first element of a packet */
|
---|
192 | template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
|
---|
193 | { return a; }
|
---|
194 |
|
---|
195 | /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
|
---|
196 | template<typename Packet> inline Packet
|
---|
197 | preduxp(const Packet* vecs) { return vecs[0]; }
|
---|
198 |
|
---|
199 | /** \internal \returns the sum of the elements of \a a*/
|
---|
200 | template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
|
---|
201 | { return a; }
|
---|
202 |
|
---|
203 | /** \internal \returns the product of the elements of \a a*/
|
---|
204 | template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
|
---|
205 | { return a; }
|
---|
206 |
|
---|
207 | /** \internal \returns the min of the elements of \a a*/
|
---|
208 | template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
|
---|
209 | { return a; }
|
---|
210 |
|
---|
211 | /** \internal \returns the max of the elements of \a a*/
|
---|
212 | template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
|
---|
213 | { return a; }
|
---|
214 |
|
---|
215 | /** \internal \returns the reversed elements of \a a*/
|
---|
216 | template<typename Packet> inline Packet preverse(const Packet& a)
|
---|
217 | { return a; }
|
---|
218 |
|
---|
219 |
|
---|
220 | /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
|
---|
221 | template<typename Packet> inline Packet pcplxflip(const Packet& a)
|
---|
222 | {
|
---|
223 | // FIXME: uncomment the following in case we drop the internal imag and real functions.
|
---|
224 | // using std::imag;
|
---|
225 | // using std::real;
|
---|
226 | return Packet(imag(a),real(a));
|
---|
227 | }
|
---|
228 |
|
---|
229 | /**************************
|
---|
230 | * Special math functions
|
---|
231 | ***************************/
|
---|
232 |
|
---|
233 | /** \internal \returns the sine of \a a (coeff-wise) */
|
---|
234 | template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
---|
235 | Packet psin(const Packet& a) { using std::sin; return sin(a); }
|
---|
236 |
|
---|
237 | /** \internal \returns the cosine of \a a (coeff-wise) */
|
---|
238 | template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
---|
239 | Packet pcos(const Packet& a) { using std::cos; return cos(a); }
|
---|
240 |
|
---|
241 | /** \internal \returns the tan of \a a (coeff-wise) */
|
---|
242 | template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
---|
243 | Packet ptan(const Packet& a) { using std::tan; return tan(a); }
|
---|
244 |
|
---|
245 | /** \internal \returns the arc sine of \a a (coeff-wise) */
|
---|
246 | template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
---|
247 | Packet pasin(const Packet& a) { using std::asin; return asin(a); }
|
---|
248 |
|
---|
249 | /** \internal \returns the arc cosine of \a a (coeff-wise) */
|
---|
250 | template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
---|
251 | Packet pacos(const Packet& a) { using std::acos; return acos(a); }
|
---|
252 |
|
---|
253 | /** \internal \returns the exp of \a a (coeff-wise) */
|
---|
254 | template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
---|
255 | Packet pexp(const Packet& a) { using std::exp; return exp(a); }
|
---|
256 |
|
---|
257 | /** \internal \returns the log of \a a (coeff-wise) */
|
---|
258 | template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
---|
259 | Packet plog(const Packet& a) { using std::log; return log(a); }
|
---|
260 |
|
---|
261 | /** \internal \returns the square-root of \a a (coeff-wise) */
|
---|
262 | template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
---|
263 | Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
|
---|
264 |
|
---|
265 | /***************************************************************************
|
---|
266 | * The following functions might not have to be overwritten for vectorized types
|
---|
267 | ***************************************************************************/
|
---|
268 |
|
---|
269 | /** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
|
---|
270 | // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
|
---|
271 | template<typename Packet>
|
---|
272 | inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
|
---|
273 | {
|
---|
274 | pstore(to, pset1<Packet>(a));
|
---|
275 | }
|
---|
276 |
|
---|
277 | /** \internal \returns a * b + c (coeff-wise) */
|
---|
278 | template<typename Packet> inline Packet
|
---|
279 | pmadd(const Packet& a,
|
---|
280 | const Packet& b,
|
---|
281 | const Packet& c)
|
---|
282 | { return padd(pmul(a, b),c); }
|
---|
283 |
|
---|
284 | /** \internal \returns a packet version of \a *from.
|
---|
285 | * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
|
---|
286 | template<typename Packet, int LoadMode>
|
---|
287 | inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
|
---|
288 | {
|
---|
289 | if(LoadMode == Aligned)
|
---|
290 | return pload<Packet>(from);
|
---|
291 | else
|
---|
292 | return ploadu<Packet>(from);
|
---|
293 | }
|
---|
294 |
|
---|
295 | /** \internal copy the packet \a from to \a *to.
|
---|
296 | * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
|
---|
297 | template<typename Scalar, typename Packet, int LoadMode>
|
---|
298 | inline void pstoret(Scalar* to, const Packet& from)
|
---|
299 | {
|
---|
300 | if(LoadMode == Aligned)
|
---|
301 | pstore(to, from);
|
---|
302 | else
|
---|
303 | pstoreu(to, from);
|
---|
304 | }
|
---|
305 |
|
---|
306 | /** \internal default implementation of palign() allowing partial specialization */
|
---|
307 | template<int Offset,typename PacketType>
|
---|
308 | struct palign_impl
|
---|
309 | {
|
---|
310 | // by default data are aligned, so there is nothing to be done :)
|
---|
311 | static inline void run(PacketType&, const PacketType&) {}
|
---|
312 | };
|
---|
313 |
|
---|
314 | /** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
|
---|
315 | * of \a first and \a Offset first elements of \a second.
|
---|
316 | *
|
---|
317 | * This function is currently only used to optimize matrix-vector products on unligned matrices.
|
---|
318 | * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
|
---|
319 | * at the position \a Offset. For instance, for packets of 4 elements, we have:
|
---|
320 | * Input:
|
---|
321 | * - first = {f0,f1,f2,f3}
|
---|
322 | * - second = {s0,s1,s2,s3}
|
---|
323 | * Output:
|
---|
324 | * - if Offset==0 then {f0,f1,f2,f3}
|
---|
325 | * - if Offset==1 then {f1,f2,f3,s0}
|
---|
326 | * - if Offset==2 then {f2,f3,s0,s1}
|
---|
327 | * - if Offset==3 then {f3,s0,s1,s3}
|
---|
328 | */
|
---|
329 | template<int Offset,typename PacketType>
|
---|
330 | inline void palign(PacketType& first, const PacketType& second)
|
---|
331 | {
|
---|
332 | palign_impl<Offset,PacketType>::run(first,second);
|
---|
333 | }
|
---|
334 |
|
---|
335 | /***************************************************************************
|
---|
336 | * Fast complex products (GCC generates a function call which is very slow)
|
---|
337 | ***************************************************************************/
|
---|
338 |
|
---|
339 | template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
|
---|
340 | { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
|
---|
341 |
|
---|
342 | template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
|
---|
343 | { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
|
---|
344 |
|
---|
345 | } // end namespace internal
|
---|
346 |
|
---|
347 | } // end namespace Eigen
|
---|
348 |
|
---|
349 | #endif // EIGEN_GENERIC_PACKET_MATH_H
|
---|
350 |
|
---|