--- a/include/common.h Thu Apr 24 13:55:11 2008 +0200
+++ b/include/common.h Thu Apr 24 18:12:32 2008 +0200
@@ -33,6 +33,9 @@
#include <pthread.h>
#include <string>
+// SSE intrinsics
+#include <xmmintrin.h>
+
using namespace std;
#ifdef PYRIT_DOUBLE
--- a/include/scene.h Thu Apr 24 13:55:11 2008 +0200
+++ b/include/scene.h Thu Apr 24 18:12:32 2008 +0200
@@ -80,8 +80,63 @@
void makeRayPacket(Sample *samples, Ray *rays)
{
+ __m128 m1x,m1y,m1z;
+ __m128 m2x,m2y,m2z;
+ __m128 m;
+
+ // m1(xyz) = u * samples[i].x
+ m1x = _mm_set1_ps(u.x);
+ m1y = _mm_set1_ps(u.y);
+ m1z = _mm_set1_ps(u.z);
+ m = _mm_set_ps(samples[0].x, samples[1].x, samples[2].x, samples[3].x);
+ m1x = _mm_mul_ps(m1x, m);
+ m1y = _mm_mul_ps(m1y, m);
+ m1z = _mm_mul_ps(m1z, m);
+
+ // m2(xyz) = v * samples[i].y
+ m2x = _mm_set1_ps(v.x);
+ m2y = _mm_set1_ps(v.y);
+ m2z = _mm_set1_ps(v.z);
+ m = _mm_set_ps(samples[0].y, samples[1].y, samples[2].y, samples[3].y);
+ m2x = _mm_mul_ps(m2x, m);
+ m2y = _mm_mul_ps(m2y, m);
+ m2z = _mm_mul_ps(m2z, m);
+
+ // m1(xyz) = (m1 + m2) = (u*samples[i].x + v*samples[i].y)
+ m1x = _mm_add_ps(m1x, m2x);
+ m1y = _mm_add_ps(m1y, m2y);
+ m1z = _mm_add_ps(m1z, m2z);
+
+ // m1(xyz) = m1*F = (u*samples[i].x + v*samples[i].y)*F
+ m = _mm_set_ps(F,F,F,F);
+ m1x = _mm_mul_ps(m1x, m);
+ m1y = _mm_mul_ps(m1y, m);
+ m1z = _mm_mul_ps(m1z, m);
+
+ // m1(xyz) = p - m1 = p - (u*samples[i].x + v*samples[i].y)*F = dir
+ m2x = _mm_set1_ps(p.x);
+ m2y = _mm_set1_ps(p.y);
+ m2z = _mm_set1_ps(p.z);
+ m2x = _mm_sub_ps(m2x, m1x);
+ m2y = _mm_sub_ps(m2y, m1y);
+ m2z = _mm_sub_ps(m2z, m1z);
+
+ // normalize dir
+ m1x = _mm_mul_ps(m2x, m2x); // x*x
+ m1y = _mm_mul_ps(m2y, m2y); // y*y
+ m1z = _mm_mul_ps(m2z, m2z); // z*z
+ m = _mm_add_ps(m1x, m1y); // x*x + y*y
+ m = _mm_add_ps(m, m1z); // m = x*x + y*y + z*z
+ m = _mm_sqrt_ps(m); // m = sqrt(m)
+ m2x = _mm_div_ps(m2x, m); // dir(xyz) /= m
+ m2y = _mm_div_ps(m2y, m);
+ m2z = _mm_div_ps(m2z, m);
+
for (int i = 0; i < 4; i++)
- rays[i] = makeRay(samples[i]);
+ {
+ Vector3 dir(((float*)&m2x)[3-i], ((float*)&m2y)[3-i], ((float*)&m2z)[3-i]);
+ rays[i] = Ray(eye, dir);
+ }
};
};
--- a/include/vector.h Thu Apr 24 13:55:11 2008 +0200
+++ b/include/vector.h Thu Apr 24 18:12:32 2008 +0200
@@ -71,10 +71,10 @@
};
// get normalized copy
- Vector3 unit() const
+ friend Vector3 normalize(Vector3 &v)
{
- Vector3 u(*this);
- return u.normalize();
+ const Float f = 1.0f / v.mag();
+ return v * f;
};
// square magnitude, magnitude
--- a/src/raytracer.cc Thu Apr 24 13:55:11 2008 +0200
+++ b/src/raytracer.cc Thu Apr 24 18:12:32 2008 +0200
@@ -404,7 +404,7 @@
pthread_mutex_unlock(&sample_queue_mutex);
sampdone += my_count;
- dbgmsg(2, "\b\b\b\b\b\b\b\b%2d%% done", (sampdone - sample_queue_count)*100/sampnum);
+ dbgmsg(2, "\b\b\b\b\b\b\b\b%2d%% done", ((long)sampdone - sample_queue_count)*100/sampnum);
pthread_mutex_lock(&sampler_mutex);