include/vector.h
branchpyrit
changeset 92 9af5c039b678
parent 91 9d66d323c354
child 93 96d65f841791
equal deleted inserted replaced
91:9d66d323c354 92:9af5c039b678
     1 /*
     1 /*
     2  * vector.h: Vector class with Colour alias
     2  * vector.h: Vector class with Colour alias
     3  *
     3  *
     4  * This file is part of Pyrit Ray Tracer.
     4  * This file is part of Pyrit Ray Tracer.
     5  *
     5  *
     6  * Copyright 2006, 2007  Radek Brich
     6  * Copyright 2006, 2007, 2008  Radek Brich
     7  *
     7  *
     8  * Permission is hereby granted, free of charge, to any person obtaining a copy
     8  * Permission is hereby granted, free of charge, to any person obtaining a copy
     9  * of this software and associated documentation files (the "Software"), to deal
     9  * of this software and associated documentation files (the "Software"), to deal
    10  * in the Software without restriction, including without limitation the rights
    10  * in the Software without restriction, including without limitation the rights
    11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    29 
    29 
    30 #include <math.h>
    30 #include <math.h>
    31 #include <iostream>
    31 #include <iostream>
    32 
    32 
    33 #include "common.h"
    33 #include "common.h"
       
    34 #include "simd.h"
    34 
    35 
    35 using namespace std;
    36 using namespace std;
    36 
    37 
    37 /**
    38 /**
    38  * three cell vector
    39  * three cell vector
    40 class Vector
    41 class Vector
    41 {
    42 {
    42 public:
    43 public:
    43 	// data
    44 	// data
    44 	union {
    45 	union {
    45 #ifndef NO_SSE
    46 #ifndef NO_SIMD
    46 		__m128 mps;
    47 		mfloat4 mf4;
    47 #endif
    48 #endif
    48 		Float cell[4];
    49 		Float cell[4];
    49 		struct { Float x, y, z, w; };
    50 		struct { Float x, y, z, w; };
    50 		struct { Float r, g, b, a; };
    51 		struct { Float r, g, b, a; };
    51 	};
    52 	};
    52 
    53 
    53 	// constructors
    54 	// constructors
    54 #ifndef NO_SSE
    55 #ifndef NO_SIMD
    55 	Vector(__m128 m): mps(m) {};
    56 	Vector(mfloat4 m): mf4(m) {};
    56 #endif
    57 #endif
    57 	Vector(): x(0.0f), y(0.0f), z(0.0f), w(1.0) {};
    58 	Vector(): x(0.0f), y(0.0f), z(0.0f), w(1.0) {};
    58 	Vector(Float ax, Float ay, Float az): x(ax), y(ay), z(az), w(1.0) {};
    59 	Vector(Float ax, Float ay, Float az): x(ax), y(ay), z(az), w(1.0) {};
    59 
    60 
    60 	// index operator
    61 	// index operator
    61 	const Float &operator[](int index) const { return cell[index]; };
    62 	const Float &operator[](int index) const { return cell[index]; };
    62 
    63 	Float &operator[](int index) { return cell[index]; };
    63 	bool operator==(Vector &v) const { return x==v.x && y==v.y && z==v.z; };
    64 
       
    65 	bool operator==(const Vector &v) const { return x==v.x && y==v.y && z==v.z; };
    64 
    66 
    65 	// normalize
    67 	// normalize
    66 	Vector normalize()
    68 	Vector normalize()
    67 	{
    69 	{
    68 		const Float f = 1.0f / mag();
    70 		const Float f = 1.0f / mag();
    85 	Vector operator-() const { return Vector(-x, -y, -z); };
    87 	Vector operator-() const { return Vector(-x, -y, -z); };
    86 
    88 
    87 	// accumulate
    89 	// accumulate
    88 	Vector operator+=(const Vector &v)
    90 	Vector operator+=(const Vector &v)
    89 	{
    91 	{
    90 #ifdef NO_SSE
    92 #ifdef NO_SIMD
    91 		x += v.x;
    93 		x += v.x;
    92 		y += v.y;
    94 		y += v.y;
    93 		z += v.z;
    95 		z += v.z;
    94 #else
    96 #else
    95 		mps = _mm_add_ps(mps, v.mps);
    97 		mf4 = madd(mf4, v.mf4);
    96 #endif
    98 #endif
    97 		return *this;
    99 		return *this;
    98 	};
   100 	};
    99 
   101 
   100 	// multiply
   102 	// multiply
   108 
   110 
   109 
   111 
   110 	// cut
   112 	// cut
   111 	Vector operator/=(const Float &f)
   113 	Vector operator/=(const Float &f)
   112 	{
   114 	{
   113 		Float finv = 1./f;
   115 		Float finv = 1.0f / f;
   114 		x *= finv;
   116 		x *= finv;
   115 		y *= finv;
   117 		y *= finv;
   116 		z *= finv;
   118 		z *= finv;
   117 		return *this;
   119 		return *this;
   118 	};
   120 	};
   119 
   121 
   120 	// sum
   122 	// sum
   121 	friend Vector operator+(const Vector &a, const Vector &b)
   123 	friend Vector operator+(const Vector &a, const Vector &b)
   122 	{
   124 	{
   123 #ifdef NO_SSE
   125 #ifdef NO_SIMD
   124 		return Vector(a.x + b.x, a.y + b.y, a.z + b.z);
   126 		return Vector(a.x + b.x, a.y + b.y, a.z + b.z);
   125 #else
   127 #else
   126 		return Vector(_mm_add_ps(a.mps, b.mps));
   128 		return Vector(madd(a.mf4, b.mf4));
   127 #endif
   129 #endif
   128 	};
   130 	};
   129 
   131 
   130 	// difference
   132 	// difference
   131 	friend Vector operator-(const Vector &a, const Vector &b)
   133 	friend Vector operator-(const Vector &a, const Vector &b)
   132 	{
   134 	{
   133 #ifdef NO_SSE
   135 #ifdef NO_SIMD
   134 		return Vector(a.x - b.x, a.y - b.y, a.z - b.z);
   136 		return Vector(a.x - b.x, a.y - b.y, a.z - b.z);
   135 #else
   137 #else
   136 		return Vector(_mm_sub_ps(a.mps, b.mps));
   138 		return Vector(msub(a.mf4, b.mf4));
   137 #endif
   139 #endif
   138 	};
   140 	};
   139 
   141 
   140 	// dot product
   142 	// dot product
   141 	friend Float dot(const Vector &a, const Vector &b)
   143 	friend Float dot(const Vector &a, const Vector &b)
   163 	};
   165 	};
   164 
   166 
   165 	// scalar division
   167 	// scalar division
   166 	friend Vector operator/(const Vector &v, const Float &f)
   168 	friend Vector operator/(const Vector &v, const Float &f)
   167 	{
   169 	{
   168 		const Float finv = 1./f;
   170 		const Float finv = 1.0f / f;
   169 		return Vector(v.x * finv, v.y * finv, v.z * finv);
   171 		return Vector(v.x * finv, v.y * finv, v.z * finv);
   170 	};
   172 	};
   171 
   173 
   172 	friend Vector operator/(const Float &f, const Vector &v)
   174 	friend Vector operator/(const Float &f, const Vector &v)
   173 	{
   175 	{
   174 #ifdef NO_SSE
   176 #ifdef NO_SIMD
   175 		return Vector(f / v.x, f / v.y, f / v.z);
   177 		return Vector(f / v.x, f / v.y, f / v.z);
   176 #else
   178 #else
   177 		return Vector(_mm_div_ps(_mm_set_ps1(f), v.mps));
   179 		return Vector(mdiv(mset1(f), v.mf4));
   178 #endif
   180 #endif
   179 	};
   181 	};
   180 
   182 
   181 	// vector plus scalar
   183 	// vector plus scalar
   182 	friend Vector operator+(const Vector &v, const Float &f)
   184 	friend Vector operator+(const Vector &v, const Float &f)
   191 	};
   193 	};
   192 
   194 
   193 	// cell by cell product (only usable for colours)
   195 	// cell by cell product (only usable for colours)
   194 	friend Vector operator*(const Vector &a,  const Vector &b)
   196 	friend Vector operator*(const Vector &a,  const Vector &b)
   195 	{
   197 	{
   196 #ifdef NO_SSE
   198 #ifdef NO_SIMD
   197 		return Vector(a.x * b.x, a.y * b.y, a.z * b.z);
   199 		return Vector(a.x * b.x, a.y * b.y, a.z * b.z);
   198 #else
   200 #else
   199 		return Vector(_mm_mul_ps(a.mps, b.mps));
   201 		return Vector(mmul(a.mf4, b.mf4));
   200 #endif
   202 #endif
   201 	};
   203 	};
   202 
   204 
   203 	// write
   205 	// write
   204 	friend ostream & operator<<(ostream &st, const Vector &v)
   206 	friend ostream & operator<<(ostream &st, const Vector &v)
   221 	};
   223 	};
   222 };
   224 };
   223 
   225 
   224 typedef Vector Colour;
   226 typedef Vector Colour;
   225 
   227 
   226 #ifndef NO_SSE
   228 #ifndef NO_SIMD
   227 class VectorPacket
   229 class VectorPacket
   228 {
   230 {
   229 public:
   231 public:
   230 	union {
   232 	union {
   231 		__m128 ma[3];
   233 		mfloat4 ma[3];
   232 		struct {
   234 		struct {
   233 			__m128 mx;
   235 			mfloat4 mx;
   234 			__m128 my;
   236 			mfloat4 my;
   235 			__m128 mz;
   237 			mfloat4 mz;
   236 		};
   238 		};
   237 		struct {
   239 		struct {
   238 			float x[4];
   240 			float x[4];
   239 			float y[4];
   241 			float y[4];
   240 			float z[4];
   242 			float z[4];
   241 		};
   243 		};
   242 	};
   244 	};
   243 
   245 
   244 	VectorPacket() {};
   246 	VectorPacket() {};
   245 	VectorPacket(__m128 ax, __m128 ay, __m128 az):
   247 	VectorPacket(mfloat4 ax, mfloat4 ay, mfloat4 az):
   246 		mx(ax), my(ay), mz(az) {};
   248 		mx(ax), my(ay), mz(az) {};
   247 	VectorPacket(const Vector &v):
   249 	VectorPacket(const Vector &v):
   248 		mx(_mm_set_ps1(v.x)), my(_mm_set_ps1(v.y)), mz(_mm_set_ps1(v.z)) {};
   250 		mx(mset1(v.x)), my(mset1(v.y)), mz(mset1(v.z)) {};
   249 
   251 
   250 	Vector getVector(int i) const
   252 	Vector getVector(int i) const
   251 	{
   253 	{
   252 		return Vector(x[i], y[i], z[i]);
   254 		return Vector(x[i], y[i], z[i]);
   253 	};
   255 	};
   257 		x[i] = v.x; y[i] = v.y; z[i] = v.z;
   259 		x[i] = v.x; y[i] = v.y; z[i] = v.z;
   258 	};
   260 	};
   259 
   261 
   260 	void normalize()
   262 	void normalize()
   261 	{
   263 	{
   262 		__m128 m,x,y,z;
   264 		mfloat4 m,x,y,z;
   263 		x = _mm_mul_ps(mx, mx); // x*x
   265 		x = mmul(mx, mx); // x*x
   264 		y = _mm_mul_ps(my, my); // y*y
   266 		y = mmul(my, my); // y*y
   265 		z = _mm_mul_ps(mz, mz); // z*z
   267 		z = mmul(mz, mz); // z*z
   266 		m = _mm_add_ps(x, y);
   268 		m = madd(madd(x, y), z);     // x*x + y*y + z*z
   267 		m = _mm_add_ps(m, z);     // x*x + y*y + z*z
   269 		m = mdiv(mOne, msqrt(m));   // m = 1/sqrt(m)
   268 		m = _mm_sqrt_ps(m);
   270 		mx = mmul(mx, m);
   269 		m = _mm_div_ps(mOne, m);   // m = 1/sqrt(m)
   271 		my = mmul(my, m);
   270 		mx = _mm_mul_ps(mx, m);
   272 		mz = mmul(mz, m);
   271 		my = _mm_mul_ps(my, m);
       
   272 		mz = _mm_mul_ps(mz, m);
       
   273 	};
   273 	};
   274 
   274 
   275 	// accumulate
   275 	// accumulate
   276 	VectorPacket operator+=(const VectorPacket &v)
   276 	VectorPacket operator+=(const VectorPacket &v)
   277 	{
   277 	{
   278 		mx = _mm_add_ps(mx, v.mx);
   278 		mx = madd(mx, v.mx);
   279 		my = _mm_add_ps(my, v.my);
   279 		my = madd(my, v.my);
   280 		mz = _mm_add_ps(mz, v.mz);
   280 		mz = madd(mz, v.mz);
   281 		return *this;
   281 		return *this;
   282 	};
   282 	};
   283 
   283 
   284 	// add to non-masked components
   284 	// add to non-masked components
   285 	VectorPacket selectiveAdd(__m128 mask, const VectorPacket &v)
   285 	VectorPacket selectiveAdd(const mfloat4 &mask, const VectorPacket &v)
   286 	{
   286 	{
   287 		mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, v.mx)),
   287 		mx = mselect(mask, madd(mx, v.mx), mx);
   288 			_mm_andnot_ps(mask, mx));
   288 		my = mselect(mask, madd(my, v.my), my);
   289 		my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, v.my)),
   289 		mz = mselect(mask, madd(mz, v.mz), mz);
   290 			_mm_andnot_ps(mask, my));
       
   291 		mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, v.mz)),
       
   292 			_mm_andnot_ps(mask, mz));
       
   293 		return *this;
   290 		return *this;
   294 	};
   291 	};
   295 
   292 
   296 	// add scalar to non-masked components
   293 	// add scalar to non-masked components
   297 	VectorPacket selectiveAdd(__m128 mask, const __m128 m)
   294 	VectorPacket selectiveAdd(const mfloat4 &mask, const mfloat4 &m)
   298 	{
   295 	{
   299 		mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, m)),
   296 		mx = mselect(mask, madd(mx, m), mx);
   300 			_mm_andnot_ps(mask, mx));
   297 		my = mselect(mask, madd(my, m), my);
   301 		my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, m)),
   298 		mz = mselect(mask, madd(mz, m), mz);
   302 			_mm_andnot_ps(mask, my));
       
   303 		mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, m)),
       
   304 			_mm_andnot_ps(mask, mz));
       
   305 		return *this;
   299 		return *this;
   306 	};
   300 	};
   307 
   301 
   308 	// dot product
   302 	// dot product
   309 	friend __m128 dot(const VectorPacket &a, const VectorPacket &b)
   303 	friend mfloat4 dot(const VectorPacket &a, const VectorPacket &b)
   310 	{
   304 	{
   311 		return _mm_add_ps(_mm_add_ps(
   305 		return madd(madd(
   312 			_mm_mul_ps(a.mx, b.mx),
   306 			mmul(a.mx, b.mx),
   313 			_mm_mul_ps(a.my, b.my)),
   307 			mmul(a.my, b.my)),
   314 			_mm_mul_ps(a.mz, b.mz));
   308 			mmul(a.mz, b.mz));
   315 	};
   309 	};
   316 
   310 
   317 	friend VectorPacket operator+(const VectorPacket &a, const VectorPacket &b)
   311 	friend VectorPacket operator+(const VectorPacket &a, const VectorPacket &b)
   318 	{
   312 	{
   319 		return VectorPacket(
   313 		return VectorPacket(
   320 			_mm_add_ps(a.mx, b.mx),
   314 			madd(a.mx, b.mx),
   321 			_mm_add_ps(a.my, b.my),
   315 			madd(a.my, b.my),
   322 			_mm_add_ps(a.mz, b.mz));
   316 			madd(a.mz, b.mz));
   323 	};
   317 	};
   324 
   318 
   325 	friend VectorPacket operator-(const VectorPacket &a, const VectorPacket &b)
   319 	friend VectorPacket operator-(const VectorPacket &a, const VectorPacket &b)
   326 	{
   320 	{
   327 		return VectorPacket(
   321 		return VectorPacket(
   328 			_mm_sub_ps(a.mx, b.mx),
   322 			msub(a.mx, b.mx),
   329 			_mm_sub_ps(a.my, b.my),
   323 			msub(a.my, b.my),
   330 			_mm_sub_ps(a.mz, b.mz));
   324 			msub(a.mz, b.mz));
   331 	};
   325 	};
   332 
   326 
   333 	friend VectorPacket operator*(const VectorPacket &v,  const __m128 &m)
   327 	friend VectorPacket operator*(const VectorPacket &v,  const mfloat4 &m)
   334 	{
   328 	{
   335 		return VectorPacket(
   329 		return VectorPacket(
   336 			_mm_mul_ps(v.mx, m),
   330 			mmul(v.mx, m),
   337 			_mm_mul_ps(v.my, m),
   331 			mmul(v.my, m),
   338 			_mm_mul_ps(v.mz, m));
   332 			mmul(v.mz, m));
   339 	};
   333 	};
   340 
   334 
   341 	friend VectorPacket operator/(const __m128 &m, const VectorPacket &v)
   335 	friend VectorPacket operator/(const mfloat4 &m, const VectorPacket &v)
   342 	{
   336 	{
   343 		return VectorPacket(
   337 		return VectorPacket(
   344 			_mm_div_ps(m, v.mx),
   338 			mdiv(m, v.mx),
   345 			_mm_div_ps(m, v.my),
   339 			mdiv(m, v.my),
   346 			_mm_div_ps(m, v.mz));
   340 			mdiv(m, v.mz));
   347 	};
   341 	};
   348 
   342 
   349 	// cell by cell product (only usable for colours)
   343 	// cell by cell product (only usable for colours)
   350 	friend VectorPacket operator*(const VectorPacket &a,  const VectorPacket &b)
   344 	friend VectorPacket operator*(const VectorPacket &a,  const VectorPacket &b)
   351 	{
   345 	{
   352 		return VectorPacket(
   346 		return VectorPacket(
   353 			_mm_mul_ps(a.mx, b.mx),
   347 			mmul(a.mx, b.mx),
   354 			_mm_mul_ps(a.my, b.my),
   348 			mmul(a.my, b.my),
   355 			_mm_mul_ps(a.mz, b.mz));
   349 			mmul(a.mz, b.mz));
   356 	};
   350 	};
   357 
   351 
   358 	// write to character stream
   352 	// write to character stream
   359 	friend ostream & operator<<(ostream &st, const VectorPacket &v)
   353 	friend ostream & operator<<(ostream &st, const VectorPacket &v)
   360 	{
   354 	{