--- a/include/scene.h Thu Apr 24 18:12:32 2008 +0200
+++ b/include/scene.h Sun Apr 27 09:44:49 2008 +0200
@@ -48,6 +48,23 @@
};
/**
+ * packet of 4 rays
+ */
+class RayPacket
+{
+public:
+ VectorPacket o, dir;
+
+ // index operator - get a ray
+ Ray operator[](int i) const
+ {
+ return Ray(
+ Vector3(o.x[i], o.y[i], o.z[i]),
+ Vector3(dir.x[i], dir.y[i], dir.z[i]));
+ };
+};
+
+/**
* a camera
*/
class Camera
@@ -78,65 +95,55 @@
return Ray(eye, dir);
};
- void makeRayPacket(Sample *samples, Ray *rays)
+ void makeRayPacket(Sample *samples, RayPacket &rays)
{
__m128 m1x,m1y,m1z;
__m128 m2x,m2y,m2z;
__m128 m;
-
+
// m1(xyz) = u * samples[i].x
- m1x = _mm_set1_ps(u.x);
- m1y = _mm_set1_ps(u.y);
- m1z = _mm_set1_ps(u.z);
- m = _mm_set_ps(samples[0].x, samples[1].x, samples[2].x, samples[3].x);
+ m1x = _mm_set_ps1(u.x);
+ m1y = _mm_set_ps1(u.y);
+ m1z = _mm_set_ps1(u.z);
+ m = _mm_set_ps(samples[3].x, samples[2].x, samples[1].x, samples[0].x);
m1x = _mm_mul_ps(m1x, m);
m1y = _mm_mul_ps(m1y, m);
m1z = _mm_mul_ps(m1z, m);
-
+
// m2(xyz) = v * samples[i].y
- m2x = _mm_set1_ps(v.x);
- m2y = _mm_set1_ps(v.y);
- m2z = _mm_set1_ps(v.z);
- m = _mm_set_ps(samples[0].y, samples[1].y, samples[2].y, samples[3].y);
+ m2x = _mm_set_ps1(v.x);
+ m2y = _mm_set_ps1(v.y);
+ m2z = _mm_set_ps1(v.z);
+ m = _mm_set_ps(samples[3].y, samples[2].y, samples[1].y, samples[0].y);
m2x = _mm_mul_ps(m2x, m);
m2y = _mm_mul_ps(m2y, m);
m2z = _mm_mul_ps(m2z, m);
-
+
// m1(xyz) = (m1 + m2) = (u*samples[i].x + v*samples[i].y)
m1x = _mm_add_ps(m1x, m2x);
m1y = _mm_add_ps(m1y, m2y);
m1z = _mm_add_ps(m1z, m2z);
-
+
// m1(xyz) = m1*F = (u*samples[i].x + v*samples[i].y)*F
- m = _mm_set_ps(F,F,F,F);
+ m = _mm_set_ps1(F);
m1x = _mm_mul_ps(m1x, m);
m1y = _mm_mul_ps(m1y, m);
m1z = _mm_mul_ps(m1z, m);
-
+
// m1(xyz) = p - m1 = p - (u*samples[i].x + v*samples[i].y)*F = dir
- m2x = _mm_set1_ps(p.x);
- m2y = _mm_set1_ps(p.y);
- m2z = _mm_set1_ps(p.z);
- m2x = _mm_sub_ps(m2x, m1x);
- m2y = _mm_sub_ps(m2y, m1y);
- m2z = _mm_sub_ps(m2z, m1z);
-
- // normalize dir
- m1x = _mm_mul_ps(m2x, m2x); // x*x
- m1y = _mm_mul_ps(m2y, m2y); // y*y
- m1z = _mm_mul_ps(m2z, m2z); // z*z
- m = _mm_add_ps(m1x, m1y); // x*x + y*y
- m = _mm_add_ps(m, m1z); // m = x*x + y*y + z*z
- m = _mm_sqrt_ps(m); // m = sqrt(m)
- m2x = _mm_div_ps(m2x, m); // dir(xyz) /= m
- m2y = _mm_div_ps(m2y, m);
- m2z = _mm_div_ps(m2z, m);
-
- for (int i = 0; i < 4; i++)
- {
- Vector3 dir(((float*)&m2x)[3-i], ((float*)&m2y)[3-i], ((float*)&m2z)[3-i]);
- rays[i] = Ray(eye, dir);
- }
+ m2x = _mm_set_ps1(p.x);
+ m2y = _mm_set_ps1(p.y);
+ m2z = _mm_set_ps1(p.z);
+ rays.dir.mx = _mm_sub_ps(m2x, m1x);
+ rays.dir.my = _mm_sub_ps(m2y, m1y);
+ rays.dir.mz = _mm_sub_ps(m2z, m1z);
+
+ // copy origin
+ rays.o.mx = _mm_set_ps1(eye.x);
+ rays.o.my = _mm_set_ps1(eye.y);
+ rays.o.mz = _mm_set_ps1(eye.z);
+
+ rays.dir.normalize();
};
};
@@ -171,6 +178,13 @@
Float h() { return H.y-L.y; };
Float d() { return H.z-L.z; };
bool intersect(const Ray &ray, Float &a, Float &b);
+ bool intersect_packet(const RayPacket &rays, __m128 &a, __m128 &b)
+ {
+ return intersect(rays[0], ((float*)&a)[0], ((float*)&b)[0])
+ || intersect(rays[1], ((float*)&a)[1], ((float*)&b)[1])
+ || intersect(rays[2], ((float*)&a)[2], ((float*)&b)[2])
+ || intersect(rays[3], ((float*)&a)[3], ((float*)&b)[3]);
+ };
};
#endif