diff -r 930a2d3ecaed -r e3a2a5b26abb include/scene.h --- a/include/scene.h Thu Apr 24 13:55:11 2008 +0200 +++ b/include/scene.h Thu Apr 24 18:12:32 2008 +0200 @@ -80,8 +80,63 @@ void makeRayPacket(Sample *samples, Ray *rays) { + __m128 m1x,m1y,m1z; + __m128 m2x,m2y,m2z; + __m128 m; + + // m1(xyz) = u * samples[i].x + m1x = _mm_set1_ps(u.x); + m1y = _mm_set1_ps(u.y); + m1z = _mm_set1_ps(u.z); + m = _mm_set_ps(samples[0].x, samples[1].x, samples[2].x, samples[3].x); + m1x = _mm_mul_ps(m1x, m); + m1y = _mm_mul_ps(m1y, m); + m1z = _mm_mul_ps(m1z, m); + + // m2(xyz) = v * samples[i].y + m2x = _mm_set1_ps(v.x); + m2y = _mm_set1_ps(v.y); + m2z = _mm_set1_ps(v.z); + m = _mm_set_ps(samples[0].y, samples[1].y, samples[2].y, samples[3].y); + m2x = _mm_mul_ps(m2x, m); + m2y = _mm_mul_ps(m2y, m); + m2z = _mm_mul_ps(m2z, m); + + // m1(xyz) = (m1 + m2) = (u*samples[i].x + v*samples[i].y) + m1x = _mm_add_ps(m1x, m2x); + m1y = _mm_add_ps(m1y, m2y); + m1z = _mm_add_ps(m1z, m2z); + + // m1(xyz) = m1*F = (u*samples[i].x + v*samples[i].y)*F + m = _mm_set_ps(F,F,F,F); + m1x = _mm_mul_ps(m1x, m); + m1y = _mm_mul_ps(m1y, m); + m1z = _mm_mul_ps(m1z, m); + + // m1(xyz) = p - m1 = p - (u*samples[i].x + v*samples[i].y)*F = dir + m2x = _mm_set1_ps(p.x); + m2y = _mm_set1_ps(p.y); + m2z = _mm_set1_ps(p.z); + m2x = _mm_sub_ps(m2x, m1x); + m2y = _mm_sub_ps(m2y, m1y); + m2z = _mm_sub_ps(m2z, m1z); + + // normalize dir + m1x = _mm_mul_ps(m2x, m2x); // x*x + m1y = _mm_mul_ps(m2y, m2y); // y*y + m1z = _mm_mul_ps(m2z, m2z); // z*z + m = _mm_add_ps(m1x, m1y); // x*x + y*y + m = _mm_add_ps(m, m1z); // m = x*x + y*y + z*z + m = _mm_sqrt_ps(m); // m = sqrt(m) + m2x = _mm_div_ps(m2x, m); // dir(xyz) /= m + m2y = _mm_div_ps(m2y, m); + m2z = _mm_div_ps(m2z, m); + for (int i = 0; i < 4; i++) - rays[i] = makeRay(samples[i]); + { + Vector3 dir(((float*)&m2x)[3-i], ((float*)&m2y)[3-i], ((float*)&m2z)[3-i]); + rays[i] = Ray(eye, dir); + } }; };