diff -r 9d66d323c354 -r 9af5c039b678 src/scene.cc --- a/src/scene.cc Fri May 02 13:27:47 2008 +0200 +++ b/src/scene.cc Mon May 05 15:31:14 2008 +0200 @@ -73,7 +73,7 @@ } /* http://www.siggraph.org/education/materials/HyperGraph/raytrace/rtinter3.htm */ -bool BBox::intersect(const Ray &ray, Float &a, Float &b) +bool BBox::intersect(const Ray &ray, Float &a, Float &b) const { register Float tnear = -Inf; register Float tfar = Inf; @@ -108,46 +108,33 @@ return true; } -#ifndef NO_SSE +#ifndef NO_SIMD // rewrite of BBox::intersect for ray packets -__m128 BBox::intersect_packet(const RayPacket &rays, __m128 &a, __m128 &b) +mfloat4 BBox::intersect_packet(const RayPacket &rays, mfloat4 &a, mfloat4 &b) const { - register __m128 tnear = mZero; - register __m128 tfar = mInf; - register __m128 t1, t2; - register __m128 mask = mAllSet; - - for (int i = 0; i < 3; i++) - { - const __m128 mL = _mm_set_ps1(L[i]); - const __m128 mH = _mm_set_ps1(H[i]); - mask = _mm_and_ps(mask, - _mm_or_ps( - _mm_or_ps(_mm_cmplt_ps(rays.dir.ma[i], mMEps), _mm_cmpgt_ps(rays.dir.ma[i], mEps)), - _mm_and_ps(_mm_cmpge_ps(rays.o.ma[i], mL), _mm_cmple_ps(rays.o.ma[i], mH)) - )); - if (!_mm_movemask_ps(mask)) - return mask; + mfloat4 origin = rays.o.ma[0]; + mfloat4 invdir = rays.invdir.ma[0]; + mfloat4 t1 = mmul(msub(mset1(L[0]), origin), invdir); + mfloat4 t2 = mmul(msub(mset1(H[0]), origin), invdir); + mfloat4 tmin = mmin(t1, t2); + mfloat4 tmax = mmax(t1, t2); - /* compute the intersection distance of the planes */ - t1 = _mm_div_ps(_mm_sub_ps(mL, rays.o.ma[i]), rays.dir.ma[i]); - t2 = _mm_div_ps(_mm_sub_ps(mH, rays.o.ma[i]), rays.dir.ma[i]); - - __m128 t = _mm_min_ps(t1, t2); - t2 = _mm_max_ps(t1, t2); - t1 = t; + origin = rays.o.ma[1]; + invdir = rays.invdir.ma[1]; + t1 = mmul(msub(mset1(L[1]), origin), invdir); + t2 = mmul(msub(mset1(H[1]), origin), invdir); + tmin = mmax(mmin(t1, t2), tmin); + tmax = mmin(mmax(t1, t2), tmax); - tnear = _mm_max_ps(tnear, t1); /* want largest Tnear */ - tfar = _mm_min_ps(tfar, t2); /* want smallest Tfar */ + origin = rays.o.ma[2]; + invdir = rays.invdir.ma[2]; + t1 = mmul(msub(mset1(L[2]), origin), invdir); + t2 = mmul(msub(mset1(H[2]), origin), invdir); + tmin = mmax(mmin(t1, t2), tmin); + tmax = mmin(mmax(t1, t2), tmax); - mask = _mm_and_ps(mask, - _mm_and_ps(_mm_cmple_ps(tnear, tfar), _mm_cmpge_ps(tfar, mZero))); - if (!_mm_movemask_ps(mask)) - return mask; - } - - a = tnear; - b = tfar; - return mask; + a = tmin; + b = tmax; + return mand(mcmplt(tmin, tmax), mcmpgt(tmax, mZero)); } #endif