diff -r 9d66d323c354 -r 9af5c039b678 include/vector.h --- a/include/vector.h Fri May 02 13:27:47 2008 +0200 +++ b/include/vector.h Mon May 05 15:31:14 2008 +0200 @@ -3,7 +3,7 @@ * * This file is part of Pyrit Ray Tracer. * - * Copyright 2006, 2007 Radek Brich + * Copyright 2006, 2007, 2008 Radek Brich * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -31,6 +31,7 @@ #include #include "common.h" +#include "simd.h" using namespace std; @@ -42,8 +43,8 @@ public: // data union { -#ifndef NO_SSE - __m128 mps; +#ifndef NO_SIMD + mfloat4 mf4; #endif Float cell[4]; struct { Float x, y, z, w; }; @@ -51,16 +52,17 @@ }; // constructors -#ifndef NO_SSE - Vector(__m128 m): mps(m) {}; +#ifndef NO_SIMD + Vector(mfloat4 m): mf4(m) {}; #endif Vector(): x(0.0f), y(0.0f), z(0.0f), w(1.0) {}; Vector(Float ax, Float ay, Float az): x(ax), y(ay), z(az), w(1.0) {}; // index operator const Float &operator[](int index) const { return cell[index]; }; + Float &operator[](int index) { return cell[index]; }; - bool operator==(Vector &v) const { return x==v.x && y==v.y && z==v.z; }; + bool operator==(const Vector &v) const { return x==v.x && y==v.y && z==v.z; }; // normalize Vector normalize() @@ -87,12 +89,12 @@ // accumulate Vector operator+=(const Vector &v) { -#ifdef NO_SSE +#ifdef NO_SIMD x += v.x; y += v.y; z += v.z; #else - mps = _mm_add_ps(mps, v.mps); + mf4 = madd(mf4, v.mf4); #endif return *this; }; @@ -110,7 +112,7 @@ // cut Vector operator/=(const Float &f) { - Float finv = 1./f; + Float finv = 1.0f / f; x *= finv; y *= finv; z *= finv; @@ -120,20 +122,20 @@ // sum friend Vector operator+(const Vector &a, const Vector &b) { -#ifdef NO_SSE +#ifdef NO_SIMD return Vector(a.x + b.x, a.y + b.y, a.z + b.z); #else - return Vector(_mm_add_ps(a.mps, b.mps)); + return Vector(madd(a.mf4, b.mf4)); #endif }; // difference friend Vector operator-(const Vector &a, const Vector &b) { -#ifdef NO_SSE +#ifdef NO_SIMD return Vector(a.x - b.x, a.y - b.y, a.z - b.z); #else - return Vector(_mm_sub_ps(a.mps, b.mps)); + return Vector(msub(a.mf4, b.mf4)); #endif }; @@ -165,16 +167,16 @@ // scalar division friend Vector operator/(const Vector &v, const Float &f) { - const Float finv = 1./f; + const Float finv = 1.0f / f; return Vector(v.x * finv, v.y * finv, v.z * finv); }; friend Vector operator/(const Float &f, const Vector &v) { -#ifdef NO_SSE +#ifdef NO_SIMD return Vector(f / v.x, f / v.y, f / v.z); #else - return Vector(_mm_div_ps(_mm_set_ps1(f), v.mps)); + return Vector(mdiv(mset1(f), v.mf4)); #endif }; @@ -193,10 +195,10 @@ // cell by cell product (only usable for colours) friend Vector operator*(const Vector &a, const Vector &b) { -#ifdef NO_SSE +#ifdef NO_SIMD return Vector(a.x * b.x, a.y * b.y, a.z * b.z); #else - return Vector(_mm_mul_ps(a.mps, b.mps)); + return Vector(mmul(a.mf4, b.mf4)); #endif }; @@ -223,16 +225,16 @@ typedef Vector Colour; -#ifndef NO_SSE +#ifndef NO_SIMD class VectorPacket { public: union { - __m128 ma[3]; + mfloat4 ma[3]; struct { - __m128 mx; - __m128 my; - __m128 mz; + mfloat4 mx; + mfloat4 my; + mfloat4 mz; }; struct { float x[4]; @@ -242,10 +244,10 @@ }; VectorPacket() {}; - VectorPacket(__m128 ax, __m128 ay, __m128 az): + VectorPacket(mfloat4 ax, mfloat4 ay, mfloat4 az): mx(ax), my(ay), mz(az) {}; VectorPacket(const Vector &v): - mx(_mm_set_ps1(v.x)), my(_mm_set_ps1(v.y)), mz(_mm_set_ps1(v.z)) {}; + mx(mset1(v.x)), my(mset1(v.y)), mz(mset1(v.z)) {}; Vector getVector(int i) const { @@ -259,100 +261,92 @@ void normalize() { - __m128 m,x,y,z; - x = _mm_mul_ps(mx, mx); // x*x - y = _mm_mul_ps(my, my); // y*y - z = _mm_mul_ps(mz, mz); // z*z - m = _mm_add_ps(x, y); - m = _mm_add_ps(m, z); // x*x + y*y + z*z - m = _mm_sqrt_ps(m); - m = _mm_div_ps(mOne, m); // m = 1/sqrt(m) - mx = _mm_mul_ps(mx, m); - my = _mm_mul_ps(my, m); - mz = _mm_mul_ps(mz, m); + mfloat4 m,x,y,z; + x = mmul(mx, mx); // x*x + y = mmul(my, my); // y*y + z = mmul(mz, mz); // z*z + m = madd(madd(x, y), z); // x*x + y*y + z*z + m = mdiv(mOne, msqrt(m)); // m = 1/sqrt(m) + mx = mmul(mx, m); + my = mmul(my, m); + mz = mmul(mz, m); }; // accumulate VectorPacket operator+=(const VectorPacket &v) { - mx = _mm_add_ps(mx, v.mx); - my = _mm_add_ps(my, v.my); - mz = _mm_add_ps(mz, v.mz); + mx = madd(mx, v.mx); + my = madd(my, v.my); + mz = madd(mz, v.mz); return *this; }; // add to non-masked components - VectorPacket selectiveAdd(__m128 mask, const VectorPacket &v) + VectorPacket selectiveAdd(const mfloat4 &mask, const VectorPacket &v) { - mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, v.mx)), - _mm_andnot_ps(mask, mx)); - my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, v.my)), - _mm_andnot_ps(mask, my)); - mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, v.mz)), - _mm_andnot_ps(mask, mz)); + mx = mselect(mask, madd(mx, v.mx), mx); + my = mselect(mask, madd(my, v.my), my); + mz = mselect(mask, madd(mz, v.mz), mz); return *this; }; // add scalar to non-masked components - VectorPacket selectiveAdd(__m128 mask, const __m128 m) + VectorPacket selectiveAdd(const mfloat4 &mask, const mfloat4 &m) { - mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, m)), - _mm_andnot_ps(mask, mx)); - my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, m)), - _mm_andnot_ps(mask, my)); - mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, m)), - _mm_andnot_ps(mask, mz)); + mx = mselect(mask, madd(mx, m), mx); + my = mselect(mask, madd(my, m), my); + mz = mselect(mask, madd(mz, m), mz); return *this; }; // dot product - friend __m128 dot(const VectorPacket &a, const VectorPacket &b) + friend mfloat4 dot(const VectorPacket &a, const VectorPacket &b) { - return _mm_add_ps(_mm_add_ps( - _mm_mul_ps(a.mx, b.mx), - _mm_mul_ps(a.my, b.my)), - _mm_mul_ps(a.mz, b.mz)); + return madd(madd( + mmul(a.mx, b.mx), + mmul(a.my, b.my)), + mmul(a.mz, b.mz)); }; friend VectorPacket operator+(const VectorPacket &a, const VectorPacket &b) { return VectorPacket( - _mm_add_ps(a.mx, b.mx), - _mm_add_ps(a.my, b.my), - _mm_add_ps(a.mz, b.mz)); + madd(a.mx, b.mx), + madd(a.my, b.my), + madd(a.mz, b.mz)); }; friend VectorPacket operator-(const VectorPacket &a, const VectorPacket &b) { return VectorPacket( - _mm_sub_ps(a.mx, b.mx), - _mm_sub_ps(a.my, b.my), - _mm_sub_ps(a.mz, b.mz)); + msub(a.mx, b.mx), + msub(a.my, b.my), + msub(a.mz, b.mz)); }; - friend VectorPacket operator*(const VectorPacket &v, const __m128 &m) + friend VectorPacket operator*(const VectorPacket &v, const mfloat4 &m) { return VectorPacket( - _mm_mul_ps(v.mx, m), - _mm_mul_ps(v.my, m), - _mm_mul_ps(v.mz, m)); + mmul(v.mx, m), + mmul(v.my, m), + mmul(v.mz, m)); }; - friend VectorPacket operator/(const __m128 &m, const VectorPacket &v) + friend VectorPacket operator/(const mfloat4 &m, const VectorPacket &v) { return VectorPacket( - _mm_div_ps(m, v.mx), - _mm_div_ps(m, v.my), - _mm_div_ps(m, v.mz)); + mdiv(m, v.mx), + mdiv(m, v.my), + mdiv(m, v.mz)); }; // cell by cell product (only usable for colours) friend VectorPacket operator*(const VectorPacket &a, const VectorPacket &b) { return VectorPacket( - _mm_mul_ps(a.mx, b.mx), - _mm_mul_ps(a.my, b.my), - _mm_mul_ps(a.mz, b.mz)); + mmul(a.mx, b.mx), + mmul(a.my, b.my), + mmul(a.mz, b.mz)); }; // write to character stream