[136] | 1 | // This file is part of Eigen, a lightweight C++ template library
|
---|
| 2 | // for linear algebra.
|
---|
| 3 | //
|
---|
| 4 | // Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>
|
---|
| 5 | // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
---|
| 6 | //
|
---|
| 7 | // This Source Code Form is subject to the terms of the Mozilla
|
---|
| 8 | // Public License v. 2.0. If a copy of the MPL was not distributed
|
---|
| 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
---|
| 10 |
|
---|
| 11 | #ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
|
---|
| 12 | #define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
|
---|
| 13 |
|
---|
| 14 | namespace Eigen {
|
---|
| 15 |
|
---|
| 16 | namespace internal {
|
---|
| 17 |
|
---|
| 18 | /** \internal \returns the arcsin of \a a (coeff-wise) */
|
---|
| 19 | template<typename Packet> inline static Packet pasin(Packet a) { return std::asin(a); }
|
---|
| 20 |
|
---|
| 21 | #ifdef EIGEN_VECTORIZE_SSE
|
---|
| 22 |
|
---|
| 23 | template<> EIGEN_DONT_INLINE Packet4f pasin(Packet4f x)
|
---|
| 24 | {
|
---|
| 25 | _EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
|
---|
| 26 | _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5);
|
---|
| 27 | _EIGEN_DECLARE_CONST_Packet4f(3half, 1.5);
|
---|
| 28 |
|
---|
| 29 | _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
|
---|
| 30 |
|
---|
| 31 | _EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654);
|
---|
| 32 | _EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654*0.5);
|
---|
| 33 |
|
---|
| 34 | _EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2);
|
---|
| 35 | _EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2);
|
---|
| 36 | _EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2);
|
---|
| 37 | _EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2);
|
---|
| 38 | _EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1);
|
---|
| 39 |
|
---|
| 40 | Packet4f a = pabs(x);//got the absolute value
|
---|
| 41 |
|
---|
| 42 | Packet4f sign_bit= _mm_and_ps(x, p4f_sign_mask);//extracted the sign bit
|
---|
| 43 |
|
---|
| 44 | Packet4f z1,z2;//will need them during computation
|
---|
| 45 |
|
---|
| 46 |
|
---|
| 47 | //will compute the two branches for asin
|
---|
| 48 | //so first compare with half
|
---|
| 49 |
|
---|
| 50 | Packet4f branch_mask= _mm_cmpgt_ps(a, p4f_half);//this is to select which branch to take
|
---|
| 51 | //both will be taken, and finally results will be merged
|
---|
| 52 | //the branch for values >0.5
|
---|
| 53 |
|
---|
| 54 | {
|
---|
| 55 | //the core series expansion
|
---|
| 56 | z1=pmadd(p4f_minus_half,a,p4f_half);
|
---|
| 57 | Packet4f x1=psqrt(z1);
|
---|
| 58 | Packet4f s1=pmadd(p4f_asin1, z1, p4f_asin2);
|
---|
| 59 | Packet4f s2=pmadd(s1, z1, p4f_asin3);
|
---|
| 60 | Packet4f s3=pmadd(s2,z1, p4f_asin4);
|
---|
| 61 | Packet4f s4=pmadd(s3,z1, p4f_asin5);
|
---|
| 62 | Packet4f temp=pmul(s4,z1);//not really a madd but a mul by z so that the next term can be a madd
|
---|
| 63 | z1=pmadd(temp,x1,x1);
|
---|
| 64 | z1=padd(z1,z1);
|
---|
| 65 | z1=psub(p4f_pi_over_2,z1);
|
---|
| 66 | }
|
---|
| 67 |
|
---|
| 68 | {
|
---|
| 69 | //the core series expansion
|
---|
| 70 | Packet4f x2=a;
|
---|
| 71 | z2=pmul(x2,x2);
|
---|
| 72 | Packet4f s1=pmadd(p4f_asin1, z2, p4f_asin2);
|
---|
| 73 | Packet4f s2=pmadd(s1, z2, p4f_asin3);
|
---|
| 74 | Packet4f s3=pmadd(s2,z2, p4f_asin4);
|
---|
| 75 | Packet4f s4=pmadd(s3,z2, p4f_asin5);
|
---|
| 76 | Packet4f temp=pmul(s4,z2);//not really a madd but a mul by z so that the next term can be a madd
|
---|
| 77 | z2=pmadd(temp,x2,x2);
|
---|
| 78 | }
|
---|
| 79 |
|
---|
| 80 | /* select the correct result from the two branch evaluations */
|
---|
| 81 | z1 = _mm_and_ps(branch_mask, z1);
|
---|
| 82 | z2 = _mm_andnot_ps(branch_mask, z2);
|
---|
| 83 | Packet4f z = _mm_or_ps(z1,z2);
|
---|
| 84 |
|
---|
| 85 | /* update the sign */
|
---|
| 86 | return _mm_xor_ps(z, sign_bit);
|
---|
| 87 | }
|
---|
| 88 |
|
---|
| 89 | #endif // EIGEN_VECTORIZE_SSE
|
---|
| 90 |
|
---|
| 91 | } // end namespace internal
|
---|
| 92 |
|
---|
| 93 | } // end namespace Eigen
|
---|
| 94 |
|
---|
| 95 | #endif // EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
|
---|