diff -r e3a2a5b26abb -r 6f7fe14782c2 include/scene.h --- a/include/scene.h Thu Apr 24 18:12:32 2008 +0200 +++ b/include/scene.h Sun Apr 27 09:44:49 2008 +0200 @@ -48,6 +48,23 @@ }; /** + * packet of 4 rays + */ +class RayPacket +{ +public: + VectorPacket o, dir; + + // index operator - get a ray + Ray operator[](int i) const + { + return Ray( + Vector3(o.x[i], o.y[i], o.z[i]), + Vector3(dir.x[i], dir.y[i], dir.z[i])); + }; +}; + +/** * a camera */ class Camera @@ -78,65 +95,55 @@ return Ray(eye, dir); }; - void makeRayPacket(Sample *samples, Ray *rays) + void makeRayPacket(Sample *samples, RayPacket &rays) { __m128 m1x,m1y,m1z; __m128 m2x,m2y,m2z; __m128 m; - + // m1(xyz) = u * samples[i].x - m1x = _mm_set1_ps(u.x); - m1y = _mm_set1_ps(u.y); - m1z = _mm_set1_ps(u.z); - m = _mm_set_ps(samples[0].x, samples[1].x, samples[2].x, samples[3].x); + m1x = _mm_set_ps1(u.x); + m1y = _mm_set_ps1(u.y); + m1z = _mm_set_ps1(u.z); + m = _mm_set_ps(samples[3].x, samples[2].x, samples[1].x, samples[0].x); m1x = _mm_mul_ps(m1x, m); m1y = _mm_mul_ps(m1y, m); m1z = _mm_mul_ps(m1z, m); - + // m2(xyz) = v * samples[i].y - m2x = _mm_set1_ps(v.x); - m2y = _mm_set1_ps(v.y); - m2z = _mm_set1_ps(v.z); - m = _mm_set_ps(samples[0].y, samples[1].y, samples[2].y, samples[3].y); + m2x = _mm_set_ps1(v.x); + m2y = _mm_set_ps1(v.y); + m2z = _mm_set_ps1(v.z); + m = _mm_set_ps(samples[3].y, samples[2].y, samples[1].y, samples[0].y); m2x = _mm_mul_ps(m2x, m); m2y = _mm_mul_ps(m2y, m); m2z = _mm_mul_ps(m2z, m); - + // m1(xyz) = (m1 + m2) = (u*samples[i].x + v*samples[i].y) m1x = _mm_add_ps(m1x, m2x); m1y = _mm_add_ps(m1y, m2y); m1z = _mm_add_ps(m1z, m2z); - + // m1(xyz) = m1*F = (u*samples[i].x + v*samples[i].y)*F - m = _mm_set_ps(F,F,F,F); + m = _mm_set_ps1(F); m1x = _mm_mul_ps(m1x, m); m1y = _mm_mul_ps(m1y, m); m1z = _mm_mul_ps(m1z, m); - + // m1(xyz) = p - m1 = p - (u*samples[i].x + v*samples[i].y)*F = dir - m2x = _mm_set1_ps(p.x); - m2y = _mm_set1_ps(p.y); - m2z = _mm_set1_ps(p.z); - m2x = _mm_sub_ps(m2x, m1x); - m2y = _mm_sub_ps(m2y, m1y); - m2z = _mm_sub_ps(m2z, m1z); - - // normalize dir - m1x = _mm_mul_ps(m2x, m2x); // x*x - m1y = _mm_mul_ps(m2y, m2y); // y*y - m1z = _mm_mul_ps(m2z, m2z); // z*z - m = _mm_add_ps(m1x, m1y); // x*x + y*y - m = _mm_add_ps(m, m1z); // m = x*x + y*y + z*z - m = _mm_sqrt_ps(m); // m = sqrt(m) - m2x = _mm_div_ps(m2x, m); // dir(xyz) /= m - m2y = _mm_div_ps(m2y, m); - m2z = _mm_div_ps(m2z, m); - - for (int i = 0; i < 4; i++) - { - Vector3 dir(((float*)&m2x)[3-i], ((float*)&m2y)[3-i], ((float*)&m2z)[3-i]); - rays[i] = Ray(eye, dir); - } + m2x = _mm_set_ps1(p.x); + m2y = _mm_set_ps1(p.y); + m2z = _mm_set_ps1(p.z); + rays.dir.mx = _mm_sub_ps(m2x, m1x); + rays.dir.my = _mm_sub_ps(m2y, m1y); + rays.dir.mz = _mm_sub_ps(m2z, m1z); + + // copy origin + rays.o.mx = _mm_set_ps1(eye.x); + rays.o.my = _mm_set_ps1(eye.y); + rays.o.mz = _mm_set_ps1(eye.z); + + rays.dir.normalize(); }; }; @@ -171,6 +178,13 @@ Float h() { return H.y-L.y; }; Float d() { return H.z-L.z; }; bool intersect(const Ray &ray, Float &a, Float &b); + bool intersect_packet(const RayPacket &rays, __m128 &a, __m128 &b) + { + return intersect(rays[0], ((float*)&a)[0], ((float*)&b)[0]) + || intersect(rays[1], ((float*)&a)[1], ((float*)&b)[1]) + || intersect(rays[2], ((float*)&a)[2], ((float*)&b)[2]) + || intersect(rays[3], ((float*)&a)[3], ((float*)&b)[3]); + }; }; #endif