--- a/include/vector.h Fri May 02 13:27:47 2008 +0200
+++ b/include/vector.h Mon May 05 15:31:14 2008 +0200
@@ -3,7 +3,7 @@
*
* This file is part of Pyrit Ray Tracer.
*
- * Copyright 2006, 2007 Radek Brich
+ * Copyright 2006, 2007, 2008 Radek Brich
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -31,6 +31,7 @@
#include <iostream>
#include "common.h"
+#include "simd.h"
using namespace std;
@@ -42,8 +43,8 @@
public:
// data
union {
-#ifndef NO_SSE
- __m128 mps;
+#ifndef NO_SIMD
+ mfloat4 mf4;
#endif
Float cell[4];
struct { Float x, y, z, w; };
@@ -51,16 +52,17 @@
};
// constructors
-#ifndef NO_SSE
- Vector(__m128 m): mps(m) {};
+#ifndef NO_SIMD
+ Vector(mfloat4 m): mf4(m) {};
#endif
Vector(): x(0.0f), y(0.0f), z(0.0f), w(1.0) {};
Vector(Float ax, Float ay, Float az): x(ax), y(ay), z(az), w(1.0) {};
// index operator
const Float &operator[](int index) const { return cell[index]; };
+ Float &operator[](int index) { return cell[index]; };
- bool operator==(Vector &v) const { return x==v.x && y==v.y && z==v.z; };
+ bool operator==(const Vector &v) const { return x==v.x && y==v.y && z==v.z; };
// normalize
Vector normalize()
@@ -87,12 +89,12 @@
// accumulate
Vector operator+=(const Vector &v)
{
-#ifdef NO_SSE
+#ifdef NO_SIMD
x += v.x;
y += v.y;
z += v.z;
#else
- mps = _mm_add_ps(mps, v.mps);
+ mf4 = madd(mf4, v.mf4);
#endif
return *this;
};
@@ -110,7 +112,7 @@
// cut
Vector operator/=(const Float &f)
{
- Float finv = 1./f;
+ Float finv = 1.0f / f;
x *= finv;
y *= finv;
z *= finv;
@@ -120,20 +122,20 @@
// sum
friend Vector operator+(const Vector &a, const Vector &b)
{
-#ifdef NO_SSE
+#ifdef NO_SIMD
return Vector(a.x + b.x, a.y + b.y, a.z + b.z);
#else
- return Vector(_mm_add_ps(a.mps, b.mps));
+ return Vector(madd(a.mf4, b.mf4));
#endif
};
// difference
friend Vector operator-(const Vector &a, const Vector &b)
{
-#ifdef NO_SSE
+#ifdef NO_SIMD
return Vector(a.x - b.x, a.y - b.y, a.z - b.z);
#else
- return Vector(_mm_sub_ps(a.mps, b.mps));
+ return Vector(msub(a.mf4, b.mf4));
#endif
};
@@ -165,16 +167,16 @@
// scalar division
friend Vector operator/(const Vector &v, const Float &f)
{
- const Float finv = 1./f;
+ const Float finv = 1.0f / f;
return Vector(v.x * finv, v.y * finv, v.z * finv);
};
friend Vector operator/(const Float &f, const Vector &v)
{
-#ifdef NO_SSE
+#ifdef NO_SIMD
return Vector(f / v.x, f / v.y, f / v.z);
#else
- return Vector(_mm_div_ps(_mm_set_ps1(f), v.mps));
+ return Vector(mdiv(mset1(f), v.mf4));
#endif
};
@@ -193,10 +195,10 @@
// cell by cell product (only usable for colours)
friend Vector operator*(const Vector &a, const Vector &b)
{
-#ifdef NO_SSE
+#ifdef NO_SIMD
return Vector(a.x * b.x, a.y * b.y, a.z * b.z);
#else
- return Vector(_mm_mul_ps(a.mps, b.mps));
+ return Vector(mmul(a.mf4, b.mf4));
#endif
};
@@ -223,16 +225,16 @@
typedef Vector Colour;
-#ifndef NO_SSE
+#ifndef NO_SIMD
class VectorPacket
{
public:
union {
- __m128 ma[3];
+ mfloat4 ma[3];
struct {
- __m128 mx;
- __m128 my;
- __m128 mz;
+ mfloat4 mx;
+ mfloat4 my;
+ mfloat4 mz;
};
struct {
float x[4];
@@ -242,10 +244,10 @@
};
VectorPacket() {};
- VectorPacket(__m128 ax, __m128 ay, __m128 az):
+ VectorPacket(mfloat4 ax, mfloat4 ay, mfloat4 az):
mx(ax), my(ay), mz(az) {};
VectorPacket(const Vector &v):
- mx(_mm_set_ps1(v.x)), my(_mm_set_ps1(v.y)), mz(_mm_set_ps1(v.z)) {};
+ mx(mset1(v.x)), my(mset1(v.y)), mz(mset1(v.z)) {};
Vector getVector(int i) const
{
@@ -259,100 +261,92 @@
void normalize()
{
- __m128 m,x,y,z;
- x = _mm_mul_ps(mx, mx); // x*x
- y = _mm_mul_ps(my, my); // y*y
- z = _mm_mul_ps(mz, mz); // z*z
- m = _mm_add_ps(x, y);
- m = _mm_add_ps(m, z); // x*x + y*y + z*z
- m = _mm_sqrt_ps(m);
- m = _mm_div_ps(mOne, m); // m = 1/sqrt(m)
- mx = _mm_mul_ps(mx, m);
- my = _mm_mul_ps(my, m);
- mz = _mm_mul_ps(mz, m);
+ mfloat4 m,x,y,z;
+ x = mmul(mx, mx); // x*x
+ y = mmul(my, my); // y*y
+ z = mmul(mz, mz); // z*z
+ m = madd(madd(x, y), z); // x*x + y*y + z*z
+ m = mdiv(mOne, msqrt(m)); // m = 1/sqrt(m)
+ mx = mmul(mx, m);
+ my = mmul(my, m);
+ mz = mmul(mz, m);
};
// accumulate
VectorPacket operator+=(const VectorPacket &v)
{
- mx = _mm_add_ps(mx, v.mx);
- my = _mm_add_ps(my, v.my);
- mz = _mm_add_ps(mz, v.mz);
+ mx = madd(mx, v.mx);
+ my = madd(my, v.my);
+ mz = madd(mz, v.mz);
return *this;
};
// add to non-masked components
- VectorPacket selectiveAdd(__m128 mask, const VectorPacket &v)
+ VectorPacket selectiveAdd(const mfloat4 &mask, const VectorPacket &v)
{
- mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, v.mx)),
- _mm_andnot_ps(mask, mx));
- my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, v.my)),
- _mm_andnot_ps(mask, my));
- mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, v.mz)),
- _mm_andnot_ps(mask, mz));
+ mx = mselect(mask, madd(mx, v.mx), mx);
+ my = mselect(mask, madd(my, v.my), my);
+ mz = mselect(mask, madd(mz, v.mz), mz);
return *this;
};
// add scalar to non-masked components
- VectorPacket selectiveAdd(__m128 mask, const __m128 m)
+ VectorPacket selectiveAdd(const mfloat4 &mask, const mfloat4 &m)
{
- mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, m)),
- _mm_andnot_ps(mask, mx));
- my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, m)),
- _mm_andnot_ps(mask, my));
- mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, m)),
- _mm_andnot_ps(mask, mz));
+ mx = mselect(mask, madd(mx, m), mx);
+ my = mselect(mask, madd(my, m), my);
+ mz = mselect(mask, madd(mz, m), mz);
return *this;
};
// dot product
- friend __m128 dot(const VectorPacket &a, const VectorPacket &b)
+ friend mfloat4 dot(const VectorPacket &a, const VectorPacket &b)
{
- return _mm_add_ps(_mm_add_ps(
- _mm_mul_ps(a.mx, b.mx),
- _mm_mul_ps(a.my, b.my)),
- _mm_mul_ps(a.mz, b.mz));
+ return madd(madd(
+ mmul(a.mx, b.mx),
+ mmul(a.my, b.my)),
+ mmul(a.mz, b.mz));
};
friend VectorPacket operator+(const VectorPacket &a, const VectorPacket &b)
{
return VectorPacket(
- _mm_add_ps(a.mx, b.mx),
- _mm_add_ps(a.my, b.my),
- _mm_add_ps(a.mz, b.mz));
+ madd(a.mx, b.mx),
+ madd(a.my, b.my),
+ madd(a.mz, b.mz));
};
friend VectorPacket operator-(const VectorPacket &a, const VectorPacket &b)
{
return VectorPacket(
- _mm_sub_ps(a.mx, b.mx),
- _mm_sub_ps(a.my, b.my),
- _mm_sub_ps(a.mz, b.mz));
+ msub(a.mx, b.mx),
+ msub(a.my, b.my),
+ msub(a.mz, b.mz));
};
- friend VectorPacket operator*(const VectorPacket &v, const __m128 &m)
+ friend VectorPacket operator*(const VectorPacket &v, const mfloat4 &m)
{
return VectorPacket(
- _mm_mul_ps(v.mx, m),
- _mm_mul_ps(v.my, m),
- _mm_mul_ps(v.mz, m));
+ mmul(v.mx, m),
+ mmul(v.my, m),
+ mmul(v.mz, m));
};
- friend VectorPacket operator/(const __m128 &m, const VectorPacket &v)
+ friend VectorPacket operator/(const mfloat4 &m, const VectorPacket &v)
{
return VectorPacket(
- _mm_div_ps(m, v.mx),
- _mm_div_ps(m, v.my),
- _mm_div_ps(m, v.mz));
+ mdiv(m, v.mx),
+ mdiv(m, v.my),
+ mdiv(m, v.mz));
};
// cell by cell product (only usable for colours)
friend VectorPacket operator*(const VectorPacket &a, const VectorPacket &b)
{
return VectorPacket(
- _mm_mul_ps(a.mx, b.mx),
- _mm_mul_ps(a.my, b.my),
- _mm_mul_ps(a.mz, b.mz));
+ mmul(a.mx, b.mx),
+ mmul(a.my, b.my),
+ mmul(a.mz, b.mz));
};
// write to character stream