--- a/src/kdtree.cc Sun Apr 27 19:56:23 2008 +0200
+++ b/src/kdtree.cc Sun Apr 27 22:55:17 2008 +0200
@@ -365,15 +365,14 @@
stack[entry].t = a;
/* distinguish between internal and external origin of a ray*/
- stack[entry].pb = rays.o + rays.dir * a; /* external */
- for (int i = 0; i < 4; i++)
- if (((float*)&a)[i] < 0.0)
- {
- /* internal */
- stack[entry].pb.x[i] = rays.o.x[i];
- stack[entry].pb.y[i] = rays.o.y[i];
- stack[entry].pb.z[i] = rays.o.z[i];
- }
+ t = _mm_cmplt_ps(a, mZero);
+ stack[entry].pb = rays.o + rays.dir * a;
+ stack[entry].pb.mx = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mx),
+ _mm_and_ps(t, rays.o.mx));
+ stack[entry].pb.my = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.my),
+ _mm_and_ps(t, rays.o.my));
+ stack[entry].pb.mz = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mz),
+ _mm_and_ps(t, rays.o.mz));
/* setup initial exit point in the stack */
stack[exit].t = b;
@@ -396,11 +395,11 @@
axis = node->getAxis();
// mask out invalid rays with near > far
- __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
- __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
- __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
- __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
- __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
+ const __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
+ const __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
+ const __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
+ const __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
+ const __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
// if all of
// stack[entry].pb[axis] <= splitVal,
@@ -488,7 +487,8 @@
{
results = (*shape)->intersect_packet(rays, dists);
int valid = _mm_movemask_ps(
- _mm_and_ps(mask, _mm_and_ps(results, _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)))));
+ _mm_and_ps(mask, _mm_and_ps(results,
+ _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)))));
for (int i = 0; i < 4; i++)
{
if (*shape != origin_shapes[i] && ((valid>>i)&1))