1 /* |
1 /* |
2 * vector.h: Vector class with Colour alias |
2 * vector.h: Vector class with Colour alias |
3 * |
3 * |
4 * This file is part of Pyrit Ray Tracer. |
4 * This file is part of Pyrit Ray Tracer. |
5 * |
5 * |
6 * Copyright 2006, 2007 Radek Brich |
6 * Copyright 2006, 2007, 2008 Radek Brich |
7 * |
7 * |
8 * Permission is hereby granted, free of charge, to any person obtaining a copy |
8 * Permission is hereby granted, free of charge, to any person obtaining a copy |
9 * of this software and associated documentation files (the "Software"), to deal |
9 * of this software and associated documentation files (the "Software"), to deal |
10 * in the Software without restriction, including without limitation the rights |
10 * in the Software without restriction, including without limitation the rights |
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
40 class Vector |
41 class Vector |
41 { |
42 { |
42 public: |
43 public: |
43 // data |
44 // data |
44 union { |
45 union { |
45 #ifndef NO_SSE |
46 #ifndef NO_SIMD |
46 __m128 mps; |
47 mfloat4 mf4; |
47 #endif |
48 #endif |
48 Float cell[4]; |
49 Float cell[4]; |
49 struct { Float x, y, z, w; }; |
50 struct { Float x, y, z, w; }; |
50 struct { Float r, g, b, a; }; |
51 struct { Float r, g, b, a; }; |
51 }; |
52 }; |
52 |
53 |
53 // constructors |
54 // constructors |
54 #ifndef NO_SSE |
55 #ifndef NO_SIMD |
55 Vector(__m128 m): mps(m) {}; |
56 Vector(mfloat4 m): mf4(m) {}; |
56 #endif |
57 #endif |
57 Vector(): x(0.0f), y(0.0f), z(0.0f), w(1.0) {}; |
58 Vector(): x(0.0f), y(0.0f), z(0.0f), w(1.0) {}; |
58 Vector(Float ax, Float ay, Float az): x(ax), y(ay), z(az), w(1.0) {}; |
59 Vector(Float ax, Float ay, Float az): x(ax), y(ay), z(az), w(1.0) {}; |
59 |
60 |
60 // index operator |
61 // index operator |
61 const Float &operator[](int index) const { return cell[index]; }; |
62 const Float &operator[](int index) const { return cell[index]; }; |
62 |
63 Float &operator[](int index) { return cell[index]; }; |
63 bool operator==(Vector &v) const { return x==v.x && y==v.y && z==v.z; }; |
64 |
|
65 bool operator==(const Vector &v) const { return x==v.x && y==v.y && z==v.z; }; |
64 |
66 |
65 // normalize |
67 // normalize |
66 Vector normalize() |
68 Vector normalize() |
67 { |
69 { |
68 const Float f = 1.0f / mag(); |
70 const Float f = 1.0f / mag(); |
257 x[i] = v.x; y[i] = v.y; z[i] = v.z; |
259 x[i] = v.x; y[i] = v.y; z[i] = v.z; |
258 }; |
260 }; |
259 |
261 |
260 void normalize() |
262 void normalize() |
261 { |
263 { |
262 __m128 m,x,y,z; |
264 mfloat4 m,x,y,z; |
263 x = _mm_mul_ps(mx, mx); // x*x |
265 x = mmul(mx, mx); // x*x |
264 y = _mm_mul_ps(my, my); // y*y |
266 y = mmul(my, my); // y*y |
265 z = _mm_mul_ps(mz, mz); // z*z |
267 z = mmul(mz, mz); // z*z |
266 m = _mm_add_ps(x, y); |
268 m = madd(madd(x, y), z); // x*x + y*y + z*z |
267 m = _mm_add_ps(m, z); // x*x + y*y + z*z |
269 m = mdiv(mOne, msqrt(m)); // m = 1/sqrt(m) |
268 m = _mm_sqrt_ps(m); |
270 mx = mmul(mx, m); |
269 m = _mm_div_ps(mOne, m); // m = 1/sqrt(m) |
271 my = mmul(my, m); |
270 mx = _mm_mul_ps(mx, m); |
272 mz = mmul(mz, m); |
271 my = _mm_mul_ps(my, m); |
|
272 mz = _mm_mul_ps(mz, m); |
|
273 }; |
273 }; |
274 |
274 |
275 // accumulate |
275 // accumulate |
276 VectorPacket operator+=(const VectorPacket &v) |
276 VectorPacket operator+=(const VectorPacket &v) |
277 { |
277 { |
278 mx = _mm_add_ps(mx, v.mx); |
278 mx = madd(mx, v.mx); |
279 my = _mm_add_ps(my, v.my); |
279 my = madd(my, v.my); |
280 mz = _mm_add_ps(mz, v.mz); |
280 mz = madd(mz, v.mz); |
281 return *this; |
281 return *this; |
282 }; |
282 }; |
283 |
283 |
284 // add to non-masked components |
284 // add to non-masked components |
285 VectorPacket selectiveAdd(__m128 mask, const VectorPacket &v) |
285 VectorPacket selectiveAdd(const mfloat4 &mask, const VectorPacket &v) |
286 { |
286 { |
287 mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, v.mx)), |
287 mx = mselect(mask, madd(mx, v.mx), mx); |
288 _mm_andnot_ps(mask, mx)); |
288 my = mselect(mask, madd(my, v.my), my); |
289 my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, v.my)), |
289 mz = mselect(mask, madd(mz, v.mz), mz); |
290 _mm_andnot_ps(mask, my)); |
|
291 mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, v.mz)), |
|
292 _mm_andnot_ps(mask, mz)); |
|
293 return *this; |
290 return *this; |
294 }; |
291 }; |
295 |
292 |
296 // add scalar to non-masked components |
293 // add scalar to non-masked components |
297 VectorPacket selectiveAdd(__m128 mask, const __m128 m) |
294 VectorPacket selectiveAdd(const mfloat4 &mask, const mfloat4 &m) |
298 { |
295 { |
299 mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, m)), |
296 mx = mselect(mask, madd(mx, m), mx); |
300 _mm_andnot_ps(mask, mx)); |
297 my = mselect(mask, madd(my, m), my); |
301 my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, m)), |
298 mz = mselect(mask, madd(mz, m), mz); |
302 _mm_andnot_ps(mask, my)); |
|
303 mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, m)), |
|
304 _mm_andnot_ps(mask, mz)); |
|
305 return *this; |
299 return *this; |
306 }; |
300 }; |
307 |
301 |
308 // dot product |
302 // dot product |
309 friend __m128 dot(const VectorPacket &a, const VectorPacket &b) |
303 friend mfloat4 dot(const VectorPacket &a, const VectorPacket &b) |
310 { |
304 { |
311 return _mm_add_ps(_mm_add_ps( |
305 return madd(madd( |
312 _mm_mul_ps(a.mx, b.mx), |
306 mmul(a.mx, b.mx), |
313 _mm_mul_ps(a.my, b.my)), |
307 mmul(a.my, b.my)), |
314 _mm_mul_ps(a.mz, b.mz)); |
308 mmul(a.mz, b.mz)); |
315 }; |
309 }; |
316 |
310 |
317 friend VectorPacket operator+(const VectorPacket &a, const VectorPacket &b) |
311 friend VectorPacket operator+(const VectorPacket &a, const VectorPacket &b) |
318 { |
312 { |
319 return VectorPacket( |
313 return VectorPacket( |
320 _mm_add_ps(a.mx, b.mx), |
314 madd(a.mx, b.mx), |
321 _mm_add_ps(a.my, b.my), |
315 madd(a.my, b.my), |
322 _mm_add_ps(a.mz, b.mz)); |
316 madd(a.mz, b.mz)); |
323 }; |
317 }; |
324 |
318 |
325 friend VectorPacket operator-(const VectorPacket &a, const VectorPacket &b) |
319 friend VectorPacket operator-(const VectorPacket &a, const VectorPacket &b) |
326 { |
320 { |
327 return VectorPacket( |
321 return VectorPacket( |
328 _mm_sub_ps(a.mx, b.mx), |
322 msub(a.mx, b.mx), |
329 _mm_sub_ps(a.my, b.my), |
323 msub(a.my, b.my), |
330 _mm_sub_ps(a.mz, b.mz)); |
324 msub(a.mz, b.mz)); |
331 }; |
325 }; |
332 |
326 |
333 friend VectorPacket operator*(const VectorPacket &v, const __m128 &m) |
327 friend VectorPacket operator*(const VectorPacket &v, const mfloat4 &m) |
334 { |
328 { |
335 return VectorPacket( |
329 return VectorPacket( |
336 _mm_mul_ps(v.mx, m), |
330 mmul(v.mx, m), |
337 _mm_mul_ps(v.my, m), |
331 mmul(v.my, m), |
338 _mm_mul_ps(v.mz, m)); |
332 mmul(v.mz, m)); |
339 }; |
333 }; |
340 |
334 |
341 friend VectorPacket operator/(const __m128 &m, const VectorPacket &v) |
335 friend VectorPacket operator/(const mfloat4 &m, const VectorPacket &v) |
342 { |
336 { |
343 return VectorPacket( |
337 return VectorPacket( |
344 _mm_div_ps(m, v.mx), |
338 mdiv(m, v.mx), |
345 _mm_div_ps(m, v.my), |
339 mdiv(m, v.my), |
346 _mm_div_ps(m, v.mz)); |
340 mdiv(m, v.mz)); |
347 }; |
341 }; |
348 |
342 |
349 // cell by cell product (only usable for colours) |
343 // cell by cell product (only usable for colours) |
350 friend VectorPacket operator*(const VectorPacket &a, const VectorPacket &b) |
344 friend VectorPacket operator*(const VectorPacket &a, const VectorPacket &b) |
351 { |
345 { |
352 return VectorPacket( |
346 return VectorPacket( |
353 _mm_mul_ps(a.mx, b.mx), |
347 mmul(a.mx, b.mx), |
354 _mm_mul_ps(a.my, b.my), |
348 mmul(a.my, b.my), |
355 _mm_mul_ps(a.mz, b.mz)); |
349 mmul(a.mz, b.mz)); |
356 }; |
350 }; |
357 |
351 |
358 // write to character stream |
352 // write to character stream |
359 friend ostream & operator<<(ostream &st, const VectorPacket &v) |
353 friend ostream & operator<<(ostream &st, const VectorPacket &v) |
360 { |
354 { |