|
1 /* |
|
2 * simd.h: abstraction of Intel SSE instruction set |
|
3 * |
|
4 * This file is part of Pyrit Ray Tracer. |
|
5 * |
|
6 * Copyright 2008 Radek Brich |
|
7 * |
|
8 * Permission is hereby granted, free of charge, to any person obtaining a copy |
|
9 * of this software and associated documentation files (the "Software"), to deal |
|
10 * in the Software without restriction, including without limitation the rights |
|
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
12 * copies of the Software, and to permit persons to whom the Software is |
|
13 * furnished to do so, subject to the following conditions: |
|
14 * |
|
15 * The above copyright notice and this permission notice shall be included in |
|
16 * all copies or substantial portions of the Software. |
|
17 * |
|
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
|
24 * THE SOFTWARE. |
|
25 */ |
|
26 |
|
27 #ifndef SIMD_H |
|
28 #define SIMD_H |
|
29 |
|
30 #include "common.h" |
|
31 |
|
32 #ifndef NO_SIMD |
|
33 |
|
34 #include <xmmintrin.h> |
|
35 |
|
36 typedef __m128 mfloat4; |
|
37 |
|
38 #define mZero _mm_set_ps1(0.0f) |
|
39 #define mOne _mm_set_ps1(1.0f) |
|
40 #define mTwo _mm_set_ps1(2.0f) |
|
41 #define mEps _mm_set_ps1(Eps) |
|
42 #define mMEps _mm_set_ps1(-Eps) |
|
43 #define mInf _mm_set_ps1(Inf) |
|
44 #define mMInf _mm_set_ps1(-Inf) |
|
45 #define mAllSet _mm_cmplt_ps(mZero, mOne) |
|
46 |
|
47 #define mset1 _mm_set_ps1 |
|
48 #define mset _mm_set_ps |
|
49 |
|
50 #define madd _mm_add_ps |
|
51 #define msub _mm_sub_ps |
|
52 #define mmul _mm_mul_ps |
|
53 #define mdiv _mm_div_ps |
|
54 #define msqrt _mm_sqrt_ps |
|
55 |
|
56 #define mand _mm_and_ps |
|
57 #define mor _mm_or_ps |
|
58 #define mcmpgt _mm_cmpgt_ps |
|
59 #define mcmplt _mm_cmplt_ps |
|
60 #define mcmpge _mm_cmpge_ps |
|
61 #define mcmple _mm_cmple_ps |
|
62 #define mcmpeq _mm_cmpeq_ps |
|
63 #define mcmpneq _mm_cmpneq_ps |
|
64 #define mmin _mm_min_ps |
|
65 #define mmax _mm_max_ps |
|
66 #define mmovemask _mm_movemask_ps |
|
67 |
|
68 inline const mfloat4 mselect(const mfloat4& mask, const mfloat4& a, const mfloat4& b) |
|
69 { |
|
70 return _mm_or_ps(_mm_and_ps(mask, a), _mm_andnot_ps(mask, b)); |
|
71 } |
|
72 |
|
73 inline const mfloat4 mfastpow(const mfloat4& base, const mfloat4& exponent) |
|
74 { |
|
75 __m128 denom = _mm_mul_ps(exponent, base); |
|
76 denom = _mm_sub_ps(exponent, denom); |
|
77 denom = _mm_add_ps(base, denom); |
|
78 return _mm_mul_ps(base, _mm_rcp_ps(denom)); |
|
79 } |
|
80 #endif |
|
81 |
|
82 #endif |