--- a/include/scene.h Thu Apr 24 13:55:11 2008 +0200
+++ b/include/scene.h Thu Apr 24 18:12:32 2008 +0200
@@ -80,8 +80,63 @@
void makeRayPacket(Sample *samples, Ray *rays)
{
+ __m128 m1x,m1y,m1z;
+ __m128 m2x,m2y,m2z;
+ __m128 m;
+
+ // m1(xyz) = u * samples[i].x
+ m1x = _mm_set1_ps(u.x);
+ m1y = _mm_set1_ps(u.y);
+ m1z = _mm_set1_ps(u.z);
+ m = _mm_set_ps(samples[0].x, samples[1].x, samples[2].x, samples[3].x);
+ m1x = _mm_mul_ps(m1x, m);
+ m1y = _mm_mul_ps(m1y, m);
+ m1z = _mm_mul_ps(m1z, m);
+
+ // m2(xyz) = v * samples[i].y
+ m2x = _mm_set1_ps(v.x);
+ m2y = _mm_set1_ps(v.y);
+ m2z = _mm_set1_ps(v.z);
+ m = _mm_set_ps(samples[0].y, samples[1].y, samples[2].y, samples[3].y);
+ m2x = _mm_mul_ps(m2x, m);
+ m2y = _mm_mul_ps(m2y, m);
+ m2z = _mm_mul_ps(m2z, m);
+
+ // m1(xyz) = (m1 + m2) = (u*samples[i].x + v*samples[i].y)
+ m1x = _mm_add_ps(m1x, m2x);
+ m1y = _mm_add_ps(m1y, m2y);
+ m1z = _mm_add_ps(m1z, m2z);
+
+ // m1(xyz) = m1*F = (u*samples[i].x + v*samples[i].y)*F
+ m = _mm_set_ps(F,F,F,F);
+ m1x = _mm_mul_ps(m1x, m);
+ m1y = _mm_mul_ps(m1y, m);
+ m1z = _mm_mul_ps(m1z, m);
+
+ // m1(xyz) = p - m1 = p - (u*samples[i].x + v*samples[i].y)*F = dir
+ m2x = _mm_set1_ps(p.x);
+ m2y = _mm_set1_ps(p.y);
+ m2z = _mm_set1_ps(p.z);
+ m2x = _mm_sub_ps(m2x, m1x);
+ m2y = _mm_sub_ps(m2y, m1y);
+ m2z = _mm_sub_ps(m2z, m1z);
+
+ // normalize dir
+ m1x = _mm_mul_ps(m2x, m2x); // x*x
+ m1y = _mm_mul_ps(m2y, m2y); // y*y
+ m1z = _mm_mul_ps(m2z, m2z); // z*z
+ m = _mm_add_ps(m1x, m1y); // x*x + y*y
+ m = _mm_add_ps(m, m1z); // m = x*x + y*y + z*z
+ m = _mm_sqrt_ps(m); // m = sqrt(m)
+ m2x = _mm_div_ps(m2x, m); // dir(xyz) /= m
+ m2y = _mm_div_ps(m2y, m);
+ m2z = _mm_div_ps(m2z, m);
+
for (int i = 0; i < 4; i++)
- rays[i] = makeRay(samples[i]);
+ {
+ Vector3 dir(((float*)&m2x)[3-i], ((float*)&m2y)[3-i], ((float*)&m2z)[3-i]);
+ rays[i] = Ray(eye, dir);
+ }
};
};