diff -r ce6abe0aeeae -r 1081e3dd3f3e src/kdtree.cc --- a/src/kdtree.cc Sun Apr 27 19:56:23 2008 +0200 +++ b/src/kdtree.cc Sun Apr 27 22:55:17 2008 +0200 @@ -365,15 +365,14 @@ stack[entry].t = a; /* distinguish between internal and external origin of a ray*/ - stack[entry].pb = rays.o + rays.dir * a; /* external */ - for (int i = 0; i < 4; i++) - if (((float*)&a)[i] < 0.0) - { - /* internal */ - stack[entry].pb.x[i] = rays.o.x[i]; - stack[entry].pb.y[i] = rays.o.y[i]; - stack[entry].pb.z[i] = rays.o.z[i]; - } + t = _mm_cmplt_ps(a, mZero); + stack[entry].pb = rays.o + rays.dir * a; + stack[entry].pb.mx = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mx), + _mm_and_ps(t, rays.o.mx)); + stack[entry].pb.my = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.my), + _mm_and_ps(t, rays.o.my)); + stack[entry].pb.mz = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mz), + _mm_and_ps(t, rays.o.mz)); /* setup initial exit point in the stack */ stack[exit].t = b; @@ -396,11 +395,11 @@ axis = node->getAxis(); // mask out invalid rays with near > far - __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t)); - __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal); - __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal); - __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal); - __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal); + const __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t)); + const __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal); + const __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal); + const __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal); + const __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal); // if all of // stack[entry].pb[axis] <= splitVal, @@ -488,7 +487,8 @@ { results = (*shape)->intersect_packet(rays, dists); int valid = _mm_movemask_ps( - _mm_and_ps(mask, _mm_and_ps(results, _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps))))); + _mm_and_ps(mask, _mm_and_ps(results, + _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps))))); for (int i = 0; i < 4; i++) { if (*shape != origin_shapes[i] && ((valid>>i)&1))