46 # define Float float |
43 # define Float float |
47 # define Eps 1e-6 |
44 # define Eps 1e-6 |
48 # define Inf FLT_MAX |
45 # define Inf FLT_MAX |
49 #endif |
46 #endif |
50 |
47 |
|
48 #ifndef NO_SSE |
|
49 // SSE intrinsics |
|
50 #include <xmmintrin.h> |
|
51 |
51 const __m128 mZero = _mm_set_ps1(0.); |
52 const __m128 mZero = _mm_set_ps1(0.); |
52 const __m128 mOne = _mm_set_ps1(1.); |
53 const __m128 mOne = _mm_set_ps1(1.); |
|
54 const __m128 mTwo = _mm_set_ps1(2.); |
53 const __m128 mEps = _mm_set_ps1(Eps); |
55 const __m128 mEps = _mm_set_ps1(Eps); |
54 const __m128 mMEps = _mm_set_ps1(-Eps); |
56 const __m128 mMEps = _mm_set_ps1(-Eps); |
55 const __m128 mInf = _mm_set_ps1(Inf); |
57 const __m128 mInf = _mm_set_ps1(Inf); |
56 const __m128 mAllSet = _mm_cmplt_ps(mZero, mOne); |
58 const __m128 mAllSet = _mm_cmplt_ps(mZero, mOne); |
|
59 |
|
60 inline const __m128 _mm_fastpow(const __m128& base, const __m128& exponent) |
|
61 { |
|
62 __m128 denom = _mm_mul_ps( exponent, base); |
|
63 denom = _mm_sub_ps( exponent, denom); |
|
64 denom = _mm_add_ps( base, denom); |
|
65 return _mm_mul_ps( base, _mm_rcp_ps(denom)); |
|
66 } |
|
67 #endif |
57 |
68 |
58 /* verbosity level: |
69 /* verbosity level: |
59 0: only errors (E) |
70 0: only errors (E) |
60 1: major status messages (*) |
71 1: major status messages (*) |
61 2: minor status, progress (-) |
72 2: minor status, progress (-) |