diff -r 6f7fe14782c2 -r 907a634e5c02 src/shapes.cc --- a/src/shapes.cc Sun Apr 27 09:44:49 2008 +0200 +++ b/src/shapes.cc Sun Apr 27 14:19:37 2008 +0200 @@ -348,6 +348,54 @@ #endif } +#ifdef TRI_BARI_PRE +__m128 Triangle::intersect_packet(const RayPacket &rays, __m128 &dists) +{ + static const int modulo3[5] = {0,1,2,0,1}; + register const int u = modulo3[k+1]; + register const int v = modulo3[k+2]; + __m128 mask; + + const __m128 t = _mm_div_ps( + _mm_sub_ps(_mm_sub_ps( + _mm_sub_ps(_mm_set_ps1(nd), rays.o.ma[k]), + _mm_mul_ps(_mm_set_ps1(nu), rays.o.ma[u]) + ), _mm_mul_ps(_mm_set_ps1(nv), rays.o.ma[v])), + _mm_add_ps(rays.dir.ma[k], + _mm_add_ps(_mm_mul_ps(_mm_set_ps1(nu), rays.dir.ma[u]), + _mm_mul_ps(_mm_set_ps1(nv), rays.dir.ma[v]))) + ); + + mask = _mm_and_ps(_mm_cmplt_ps(t, dists), _mm_cmpge_ps(t, mEps)); + if (!_mm_movemask_ps(mask)) + return mask; + + const __m128 hu = _mm_sub_ps(_mm_add_ps(rays.o.ma[u], + _mm_mul_ps(t, rays.dir.ma[u])), _mm_set_ps1(A->P[u])); + const __m128 hv = _mm_sub_ps(_mm_add_ps(rays.o.ma[v], + _mm_mul_ps(t, rays.dir.ma[v])), _mm_set_ps1(A->P[v])); + const __m128 beta = _mm_add_ps(_mm_mul_ps(hv, _mm_set_ps1(bnu)), + _mm_mul_ps(hu, _mm_set_ps1(bnv))); + + mask = _mm_and_ps(mask, _mm_cmpge_ps(beta, zeros)); + if (!_mm_movemask_ps(mask)) + return mask; + + const __m128 gamma = _mm_add_ps(_mm_mul_ps(hu, _mm_set_ps1(cnv)), + _mm_mul_ps(hv, _mm_set_ps1(cnu))); + + mask = _mm_and_ps(mask, _mm_and_ps(_mm_cmpge_ps(gamma, zeros), + _mm_cmple_ps(_mm_add_ps(beta, gamma), ones))); + if (!_mm_movemask_ps(mask)) + return mask; + + for (int i = 0; i < 4; i++) + if ((_mm_movemask_ps(mask)>>i)&1) + ((float*)&dists)[i] = ((float*)&t)[i]; + return mask; +} +#endif + bool Triangle::intersect_bbox(const BBox &bbox) const { const Vector3 boxcenter = (bbox.L+bbox.H)*0.5;