1 | /*
|
---|
2 | Copyright (c) 2011, Intel Corporation. All rights reserved.
|
---|
3 |
|
---|
4 | Redistribution and use in source and binary forms, with or without modification,
|
---|
5 | are permitted provided that the following conditions are met:
|
---|
6 |
|
---|
7 | * Redistributions of source code must retain the above copyright notice, this
|
---|
8 | list of conditions and the following disclaimer.
|
---|
9 | * Redistributions in binary form must reproduce the above copyright notice,
|
---|
10 | this list of conditions and the following disclaimer in the documentation
|
---|
11 | and/or other materials provided with the distribution.
|
---|
12 | * Neither the name of Intel Corporation nor the names of its contributors may
|
---|
13 | be used to endorse or promote products derived from this software without
|
---|
14 | specific prior written permission.
|
---|
15 |
|
---|
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
---|
17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
---|
18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
---|
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
---|
20 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
---|
21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
---|
22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
---|
23 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
---|
24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
---|
25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
---|
26 |
|
---|
27 | ********************************************************************************
|
---|
28 | * Content : Eigen bindings to Intel(R) MKL
|
---|
29 | * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
|
---|
30 | ********************************************************************************
|
---|
31 | */
|
---|
32 |
|
---|
33 | #ifndef EIGEN_ASSIGN_VML_H
|
---|
34 | #define EIGEN_ASSIGN_VML_H
|
---|
35 |
|
---|
36 | namespace Eigen {
|
---|
37 |
|
---|
38 | namespace internal {
|
---|
39 |
|
---|
40 | template<typename Op> struct vml_call
|
---|
41 | { enum { IsSupported = 0 }; };
|
---|
42 |
|
---|
43 | template<typename Dst, typename Src, typename UnaryOp>
|
---|
44 | class vml_assign_traits
|
---|
45 | {
|
---|
46 | private:
|
---|
47 | enum {
|
---|
48 | DstHasDirectAccess = Dst::Flags & DirectAccessBit,
|
---|
49 | SrcHasDirectAccess = Src::Flags & DirectAccessBit,
|
---|
50 |
|
---|
51 | StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
|
---|
52 | InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
|
---|
53 | : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
|
---|
54 | : int(Dst::RowsAtCompileTime),
|
---|
55 | InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
|
---|
56 | : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
|
---|
57 | : int(Dst::MaxRowsAtCompileTime),
|
---|
58 | MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
|
---|
59 |
|
---|
60 | MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
|
---|
61 | && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
|
---|
62 | MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
|
---|
63 | VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
|
---|
64 | LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
|
---|
65 | MayEnableVml = MightEnableVml && LargeEnough,
|
---|
66 | MayLinearize = MayEnableVml && MightLinearize
|
---|
67 | };
|
---|
68 | public:
|
---|
69 | enum {
|
---|
70 | Traversal = MayLinearize ? LinearVectorizedTraversal
|
---|
71 | : MayEnableVml ? InnerVectorizedTraversal
|
---|
72 | : DefaultTraversal
|
---|
73 | };
|
---|
74 | };
|
---|
75 |
|
---|
76 | template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
|
---|
77 | int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
|
---|
78 | struct vml_assign_impl
|
---|
79 | : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
|
---|
80 | {
|
---|
81 | };
|
---|
82 |
|
---|
83 | template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
|
---|
84 | struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
|
---|
85 | {
|
---|
86 | typedef typename Derived1::Scalar Scalar;
|
---|
87 | typedef typename Derived1::Index Index;
|
---|
88 | static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
|
---|
89 | {
|
---|
90 | // in case we want to (or have to) skip VML at runtime we can call:
|
---|
91 | // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
|
---|
92 | const Index innerSize = dst.innerSize();
|
---|
93 | const Index outerSize = dst.outerSize();
|
---|
94 | for(Index outer = 0; outer < outerSize; ++outer) {
|
---|
95 | const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
|
---|
96 | &(src.nestedExpression().coeffRef(0, outer));
|
---|
97 | Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
|
---|
98 | vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
|
---|
99 | }
|
---|
100 | }
|
---|
101 | };
|
---|
102 |
|
---|
103 | template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
|
---|
104 | struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
|
---|
105 | {
|
---|
106 | static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
|
---|
107 | {
|
---|
108 | // in case we want to (or have to) skip VML at runtime we can call:
|
---|
109 | // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
|
---|
110 | vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
|
---|
111 | }
|
---|
112 | };
|
---|
113 |
|
---|
114 | // Macroses
|
---|
115 |
|
---|
116 | #define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
|
---|
117 | template<typename Derived1, typename Derived2, typename UnaryOp> \
|
---|
118 | struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
|
---|
119 | static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
|
---|
120 | vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
|
---|
121 | } \
|
---|
122 | };
|
---|
123 |
|
---|
124 | EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
|
---|
125 | EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
|
---|
126 | EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
|
---|
127 | EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
|
---|
128 | EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
|
---|
129 | EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
|
---|
130 | EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
|
---|
131 | EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
|
---|
132 | EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
|
---|
133 | EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
|
---|
134 | EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
|
---|
135 |
|
---|
136 |
|
---|
137 | #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
|
---|
138 | #define EIGEN_MKL_VML_MODE VML_HA
|
---|
139 | #else
|
---|
140 | #define EIGEN_MKL_VML_MODE VML_LA
|
---|
141 | #endif
|
---|
142 |
|
---|
143 | #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
---|
144 | template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
---|
145 | enum { IsSupported = 1 }; \
|
---|
146 | static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
|
---|
147 | int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
---|
148 | VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
|
---|
149 | } \
|
---|
150 | };
|
---|
151 |
|
---|
152 | #define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
---|
153 | template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
---|
154 | enum { IsSupported = 1 }; \
|
---|
155 | static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
|
---|
156 | int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
---|
157 | MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
|
---|
158 | VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
|
---|
159 | } \
|
---|
160 | };
|
---|
161 |
|
---|
162 | #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
---|
163 | template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
---|
164 | enum { IsSupported = 1 }; \
|
---|
165 | static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
|
---|
166 | int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
---|
167 | EIGENTYPE exponent = func.m_exponent; \
|
---|
168 | MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
|
---|
169 | VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
|
---|
170 | (VMLTYPE*)dst, &vmlMode); \
|
---|
171 | } \
|
---|
172 | };
|
---|
173 |
|
---|
174 | #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
|
---|
175 | EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \
|
---|
176 | EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
|
---|
177 |
|
---|
178 | #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
|
---|
179 | EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
|
---|
180 | EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
|
---|
181 |
|
---|
182 | #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
|
---|
183 | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
|
---|
184 | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
|
---|
185 |
|
---|
186 |
|
---|
187 | #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
|
---|
188 | EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
|
---|
189 | EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
|
---|
190 |
|
---|
191 | #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
|
---|
192 | EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
|
---|
193 | EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
|
---|
194 |
|
---|
195 | #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
|
---|
196 | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
|
---|
197 | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
|
---|
198 |
|
---|
199 |
|
---|
200 | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)
|
---|
201 | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
|
---|
202 | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
|
---|
203 | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
|
---|
204 | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
|
---|
205 | //EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
|
---|
206 | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
|
---|
207 | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
|
---|
208 | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
|
---|
209 |
|
---|
210 | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
|
---|
211 |
|
---|
212 | // The vm*powx functions are not avaibale in the windows version of MKL.
|
---|
213 | #ifndef _WIN32
|
---|
214 | EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
|
---|
215 | EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
|
---|
216 | EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
|
---|
217 | EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
|
---|
218 | #endif
|
---|
219 |
|
---|
220 | } // end namespace internal
|
---|
221 |
|
---|
222 | } // end namespace Eigen
|
---|
223 |
|
---|
224 | #endif // EIGEN_ASSIGN_VML_H
|
---|