include/simd.h
branchpyrit
changeset 92 9af5c039b678
child 94 4c8abb8977dc
equal deleted inserted replaced
91:9d66d323c354 92:9af5c039b678
       
     1 /*
       
     2  * simd.h: abstraction of Intel SSE instruction set
       
     3  *
       
     4  * This file is part of Pyrit Ray Tracer.
       
     5  *
       
     6  * Copyright 2008  Radek Brich
       
     7  *
       
     8  * Permission is hereby granted, free of charge, to any person obtaining a copy
       
     9  * of this software and associated documentation files (the "Software"), to deal
       
    10  * in the Software without restriction, including without limitation the rights
       
    11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
       
    12  * copies of the Software, and to permit persons to whom the Software is
       
    13  * furnished to do so, subject to the following conditions:
       
    14  *
       
    15  * The above copyright notice and this permission notice shall be included in
       
    16  * all copies or substantial portions of the Software.
       
    17  *
       
    18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
       
    19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
       
    20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
       
    21  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
       
    22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
       
    23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
       
    24  * THE SOFTWARE.
       
    25  */
       
    26 
       
    27 #ifndef SIMD_H
       
    28 #define SIMD_H
       
    29 
       
    30 #include "common.h"
       
    31 
       
    32 #ifndef NO_SIMD
       
    33 
       
    34 #include <xmmintrin.h>
       
    35 
       
    36 typedef __m128 mfloat4;
       
    37 
       
    38 #define mZero  _mm_set_ps1(0.0f)
       
    39 #define mOne   _mm_set_ps1(1.0f)
       
    40 #define mTwo   _mm_set_ps1(2.0f)
       
    41 #define mEps   _mm_set_ps1(Eps)
       
    42 #define mMEps  _mm_set_ps1(-Eps)
       
    43 #define mInf   _mm_set_ps1(Inf)
       
    44 #define mMInf  _mm_set_ps1(-Inf)
       
    45 #define mAllSet  _mm_cmplt_ps(mZero, mOne)
       
    46 
       
    47 #define mset1 _mm_set_ps1
       
    48 #define mset _mm_set_ps
       
    49 
       
    50 #define madd _mm_add_ps
       
    51 #define msub _mm_sub_ps
       
    52 #define mmul _mm_mul_ps
       
    53 #define mdiv _mm_div_ps
       
    54 #define msqrt _mm_sqrt_ps
       
    55 
       
    56 #define mand _mm_and_ps
       
    57 #define mor  _mm_or_ps
       
    58 #define mcmpgt _mm_cmpgt_ps
       
    59 #define mcmplt _mm_cmplt_ps
       
    60 #define mcmpge _mm_cmpge_ps
       
    61 #define mcmple _mm_cmple_ps
       
    62 #define mcmpeq _mm_cmpeq_ps
       
    63 #define mcmpneq _mm_cmpneq_ps
       
    64 #define mmin _mm_min_ps
       
    65 #define mmax _mm_max_ps
       
    66 #define mmovemask _mm_movemask_ps
       
    67 
       
    68 inline const mfloat4 mselect(const mfloat4& mask, const mfloat4& a, const mfloat4& b)
       
    69 {
       
    70 	return _mm_or_ps(_mm_and_ps(mask, a), _mm_andnot_ps(mask, b));
       
    71 }
       
    72 
       
    73 inline const mfloat4 mfastpow(const mfloat4& base, const mfloat4& exponent)
       
    74 {
       
    75     __m128 denom = _mm_mul_ps(exponent, base);
       
    76     denom = _mm_sub_ps(exponent, denom);
       
    77     denom = _mm_add_ps(base, denom);
       
    78     return _mm_mul_ps(base, _mm_rcp_ps(denom));
       
    79 }
       
    80 #endif
       
    81 
       
    82 #endif