70 #define mShuffle1 _MM_SHUFFLE(1,1,1,1) |
70 #define mShuffle1 _MM_SHUFFLE(1,1,1,1) |
71 #define mShuffle2 _MM_SHUFFLE(2,2,2,2) |
71 #define mShuffle2 _MM_SHUFFLE(2,2,2,2) |
72 #define mShuffle3 _MM_SHUFFLE(3,3,3,3) |
72 #define mShuffle3 _MM_SHUFFLE(3,3,3,3) |
73 #define mshuffle _mm_shuffle_ps |
73 #define mshuffle _mm_shuffle_ps |
74 |
74 |
|
75 /** select values from a and b according to mask (a if mask is 1, b if mask is 0) */ |
75 inline const mfloat4 mselect(const mfloat4& mask, const mfloat4& a, const mfloat4& b) |
76 inline const mfloat4 mselect(const mfloat4& mask, const mfloat4& a, const mfloat4& b) |
76 { |
77 { |
77 return _mm_or_ps(_mm_and_ps(mask, a), _mm_andnot_ps(mask, b)); |
78 return _mm_or_ps(_mm_and_ps(mask, a), _mm_andnot_ps(mask, b)); |
78 } |
79 } |
79 |
80 |
|
81 /** fast power function */ |
80 inline const mfloat4 mfastpow(const mfloat4& base, const mfloat4& exponent) |
82 inline const mfloat4 mfastpow(const mfloat4& base, const mfloat4& exponent) |
81 { |
83 { |
82 __m128 denom = _mm_mul_ps(exponent, base); |
84 __m128 denom = _mm_mul_ps(exponent, base); |
83 denom = _mm_sub_ps(exponent, denom); |
85 denom = _mm_sub_ps(exponent, denom); |
84 denom = _mm_add_ps(base, denom); |
86 denom = _mm_add_ps(base, denom); |