diff -r 9d66d323c354 -r 9af5c039b678 include/vector.h
--- a/include/vector.h	Fri May 02 13:27:47 2008 +0200
+++ b/include/vector.h	Mon May 05 15:31:14 2008 +0200
@@ -3,7 +3,7 @@
  *
  * This file is part of Pyrit Ray Tracer.
  *
- * Copyright 2006, 2007  Radek Brich
+ * Copyright 2006, 2007, 2008  Radek Brich
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -31,6 +31,7 @@
 #include <iostream>
 
 #include "common.h"
+#include "simd.h"
 
 using namespace std;
 
@@ -42,8 +43,8 @@
 public:
 	// data
 	union {
-#ifndef NO_SSE
-		__m128 mps;
+#ifndef NO_SIMD
+		mfloat4 mf4;
 #endif
 		Float cell[4];
 		struct { Float x, y, z, w; };
@@ -51,16 +52,17 @@
 	};
 
 	// constructors
-#ifndef NO_SSE
-	Vector(__m128 m): mps(m) {};
+#ifndef NO_SIMD
+	Vector(mfloat4 m): mf4(m) {};
 #endif
 	Vector(): x(0.0f), y(0.0f), z(0.0f), w(1.0) {};
 	Vector(Float ax, Float ay, Float az): x(ax), y(ay), z(az), w(1.0) {};
 
 	// index operator
 	const Float &operator[](int index) const { return cell[index]; };
+	Float &operator[](int index) { return cell[index]; };
 
-	bool operator==(Vector &v) const { return x==v.x && y==v.y && z==v.z; };
+	bool operator==(const Vector &v) const { return x==v.x && y==v.y && z==v.z; };
 
 	// normalize
 	Vector normalize()
@@ -87,12 +89,12 @@
 	// accumulate
 	Vector operator+=(const Vector &v)
 	{
-#ifdef NO_SSE
+#ifdef NO_SIMD
 		x += v.x;
 		y += v.y;
 		z += v.z;
 #else
-		mps = _mm_add_ps(mps, v.mps);
+		mf4 = madd(mf4, v.mf4);
 #endif
 		return *this;
 	};
@@ -110,7 +112,7 @@
 	// cut
 	Vector operator/=(const Float &f)
 	{
-		Float finv = 1./f;
+		Float finv = 1.0f / f;
 		x *= finv;
 		y *= finv;
 		z *= finv;
@@ -120,20 +122,20 @@
 	// sum
 	friend Vector operator+(const Vector &a, const Vector &b)
 	{
-#ifdef NO_SSE
+#ifdef NO_SIMD
 		return Vector(a.x + b.x, a.y + b.y, a.z + b.z);
 #else
-		return Vector(_mm_add_ps(a.mps, b.mps));
+		return Vector(madd(a.mf4, b.mf4));
 #endif
 	};
 
 	// difference
 	friend Vector operator-(const Vector &a, const Vector &b)
 	{
-#ifdef NO_SSE
+#ifdef NO_SIMD
 		return Vector(a.x - b.x, a.y - b.y, a.z - b.z);
 #else
-		return Vector(_mm_sub_ps(a.mps, b.mps));
+		return Vector(msub(a.mf4, b.mf4));
 #endif
 	};
 
@@ -165,16 +167,16 @@
 	// scalar division
 	friend Vector operator/(const Vector &v, const Float &f)
 	{
-		const Float finv = 1./f;
+		const Float finv = 1.0f / f;
 		return Vector(v.x * finv, v.y * finv, v.z * finv);
 	};
 
 	friend Vector operator/(const Float &f, const Vector &v)
 	{
-#ifdef NO_SSE
+#ifdef NO_SIMD
 		return Vector(f / v.x, f / v.y, f / v.z);
 #else
-		return Vector(_mm_div_ps(_mm_set_ps1(f), v.mps));
+		return Vector(mdiv(mset1(f), v.mf4));
 #endif
 	};
 
@@ -193,10 +195,10 @@
 	// cell by cell product (only usable for colours)
 	friend Vector operator*(const Vector &a,  const Vector &b)
 	{
-#ifdef NO_SSE
+#ifdef NO_SIMD
 		return Vector(a.x * b.x, a.y * b.y, a.z * b.z);
 #else
-		return Vector(_mm_mul_ps(a.mps, b.mps));
+		return Vector(mmul(a.mf4, b.mf4));
 #endif
 	};
 
@@ -223,16 +225,16 @@
 
 typedef Vector Colour;
 
-#ifndef NO_SSE
+#ifndef NO_SIMD
 class VectorPacket
 {
 public:
 	union {
-		__m128 ma[3];
+		mfloat4 ma[3];
 		struct {
-			__m128 mx;
-			__m128 my;
-			__m128 mz;
+			mfloat4 mx;
+			mfloat4 my;
+			mfloat4 mz;
 		};
 		struct {
 			float x[4];
@@ -242,10 +244,10 @@
 	};
 
 	VectorPacket() {};
-	VectorPacket(__m128 ax, __m128 ay, __m128 az):
+	VectorPacket(mfloat4 ax, mfloat4 ay, mfloat4 az):
 		mx(ax), my(ay), mz(az) {};
 	VectorPacket(const Vector &v):
-		mx(_mm_set_ps1(v.x)), my(_mm_set_ps1(v.y)), mz(_mm_set_ps1(v.z)) {};
+		mx(mset1(v.x)), my(mset1(v.y)), mz(mset1(v.z)) {};
 
 	Vector getVector(int i) const
 	{
@@ -259,100 +261,92 @@
 
 	void normalize()
 	{
-		__m128 m,x,y,z;
-		x = _mm_mul_ps(mx, mx); // x*x
-		y = _mm_mul_ps(my, my); // y*y
-		z = _mm_mul_ps(mz, mz); // z*z
-		m = _mm_add_ps(x, y);
-		m = _mm_add_ps(m, z);     // x*x + y*y + z*z
-		m = _mm_sqrt_ps(m);
-		m = _mm_div_ps(mOne, m);   // m = 1/sqrt(m)
-		mx = _mm_mul_ps(mx, m);
-		my = _mm_mul_ps(my, m);
-		mz = _mm_mul_ps(mz, m);
+		mfloat4 m,x,y,z;
+		x = mmul(mx, mx); // x*x
+		y = mmul(my, my); // y*y
+		z = mmul(mz, mz); // z*z
+		m = madd(madd(x, y), z);     // x*x + y*y + z*z
+		m = mdiv(mOne, msqrt(m));   // m = 1/sqrt(m)
+		mx = mmul(mx, m);
+		my = mmul(my, m);
+		mz = mmul(mz, m);
 	};
 
 	// accumulate
 	VectorPacket operator+=(const VectorPacket &v)
 	{
-		mx = _mm_add_ps(mx, v.mx);
-		my = _mm_add_ps(my, v.my);
-		mz = _mm_add_ps(mz, v.mz);
+		mx = madd(mx, v.mx);
+		my = madd(my, v.my);
+		mz = madd(mz, v.mz);
 		return *this;
 	};
 
 	// add to non-masked components
-	VectorPacket selectiveAdd(__m128 mask, const VectorPacket &v)
+	VectorPacket selectiveAdd(const mfloat4 &mask, const VectorPacket &v)
 	{
-		mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, v.mx)),
-			_mm_andnot_ps(mask, mx));
-		my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, v.my)),
-			_mm_andnot_ps(mask, my));
-		mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, v.mz)),
-			_mm_andnot_ps(mask, mz));
+		mx = mselect(mask, madd(mx, v.mx), mx);
+		my = mselect(mask, madd(my, v.my), my);
+		mz = mselect(mask, madd(mz, v.mz), mz);
 		return *this;
 	};
 
 	// add scalar to non-masked components
-	VectorPacket selectiveAdd(__m128 mask, const __m128 m)
+	VectorPacket selectiveAdd(const mfloat4 &mask, const mfloat4 &m)
 	{
-		mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, m)),
-			_mm_andnot_ps(mask, mx));
-		my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, m)),
-			_mm_andnot_ps(mask, my));
-		mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, m)),
-			_mm_andnot_ps(mask, mz));
+		mx = mselect(mask, madd(mx, m), mx);
+		my = mselect(mask, madd(my, m), my);
+		mz = mselect(mask, madd(mz, m), mz);
 		return *this;
 	};
 
 	// dot product
-	friend __m128 dot(const VectorPacket &a, const VectorPacket &b)
+	friend mfloat4 dot(const VectorPacket &a, const VectorPacket &b)
 	{
-		return _mm_add_ps(_mm_add_ps(
-			_mm_mul_ps(a.mx, b.mx),
-			_mm_mul_ps(a.my, b.my)),
-			_mm_mul_ps(a.mz, b.mz));
+		return madd(madd(
+			mmul(a.mx, b.mx),
+			mmul(a.my, b.my)),
+			mmul(a.mz, b.mz));
 	};
 
 	friend VectorPacket operator+(const VectorPacket &a, const VectorPacket &b)
 	{
 		return VectorPacket(
-			_mm_add_ps(a.mx, b.mx),
-			_mm_add_ps(a.my, b.my),
-			_mm_add_ps(a.mz, b.mz));
+			madd(a.mx, b.mx),
+			madd(a.my, b.my),
+			madd(a.mz, b.mz));
 	};
 
 	friend VectorPacket operator-(const VectorPacket &a, const VectorPacket &b)
 	{
 		return VectorPacket(
-			_mm_sub_ps(a.mx, b.mx),
-			_mm_sub_ps(a.my, b.my),
-			_mm_sub_ps(a.mz, b.mz));
+			msub(a.mx, b.mx),
+			msub(a.my, b.my),
+			msub(a.mz, b.mz));
 	};
 
-	friend VectorPacket operator*(const VectorPacket &v,  const __m128 &m)
+	friend VectorPacket operator*(const VectorPacket &v,  const mfloat4 &m)
 	{
 		return VectorPacket(
-			_mm_mul_ps(v.mx, m),
-			_mm_mul_ps(v.my, m),
-			_mm_mul_ps(v.mz, m));
+			mmul(v.mx, m),
+			mmul(v.my, m),
+			mmul(v.mz, m));
 	};
 
-	friend VectorPacket operator/(const __m128 &m, const VectorPacket &v)
+	friend VectorPacket operator/(const mfloat4 &m, const VectorPacket &v)
 	{
 		return VectorPacket(
-			_mm_div_ps(m, v.mx),
-			_mm_div_ps(m, v.my),
-			_mm_div_ps(m, v.mz));
+			mdiv(m, v.mx),
+			mdiv(m, v.my),
+			mdiv(m, v.mz));
 	};
 
 	// cell by cell product (only usable for colours)
 	friend VectorPacket operator*(const VectorPacket &a,  const VectorPacket &b)
 	{
 		return VectorPacket(
-			_mm_mul_ps(a.mx, b.mx),
-			_mm_mul_ps(a.my, b.my),
-			_mm_mul_ps(a.mz, b.mz));
+			mmul(a.mx, b.mx),
+			mmul(a.my, b.my),
+			mmul(a.mz, b.mz));
 	};
 
 	// write to character stream