src/kdtree.cc
branchpyrit
changeset 87 1081e3dd3f3e
parent 86 ce6abe0aeeae
child 91 9d66d323c354
--- a/src/kdtree.cc	Sun Apr 27 19:56:23 2008 +0200
+++ b/src/kdtree.cc	Sun Apr 27 22:55:17 2008 +0200
@@ -365,15 +365,14 @@
 	stack[entry].t = a;
 
 	/* distinguish between internal and external origin of a ray*/
-	stack[entry].pb = rays.o + rays.dir * a; /* external */
-	for (int i = 0; i < 4; i++)
-		if (((float*)&a)[i] < 0.0)
-		{
-			/* internal */
-			stack[entry].pb.x[i] = rays.o.x[i];
-			stack[entry].pb.y[i] = rays.o.y[i];
-			stack[entry].pb.z[i] = rays.o.z[i];
-		}
+	t = _mm_cmplt_ps(a, mZero);
+	stack[entry].pb = rays.o + rays.dir * a;
+	stack[entry].pb.mx = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mx),
+		_mm_and_ps(t, rays.o.mx));
+	stack[entry].pb.my = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.my),
+		_mm_and_ps(t, rays.o.my));
+	stack[entry].pb.mz = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mz),
+		_mm_and_ps(t, rays.o.mz));
 
 	/* setup initial exit point in the stack */
 	stack[exit].t = b;
@@ -396,11 +395,11 @@
 			axis = node->getAxis();
 
 			// mask out invalid rays with near > far
-			__m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
-			__m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
-			__m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
-			__m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
-			__m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
+			const __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
+			const __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
+			const __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
+			const __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
+			const __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
 
 			// if all of
 			// stack[entry].pb[axis] <= splitVal,
@@ -488,7 +487,8 @@
 		{
 			results = (*shape)->intersect_packet(rays, dists);
 			int valid = _mm_movemask_ps(
-				_mm_and_ps(mask, _mm_and_ps(results, _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)))));
+				_mm_and_ps(mask, _mm_and_ps(results,
+				_mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)))));
 			for (int i = 0; i < 4; i++)
 			{
 				if (*shape != origin_shapes[i] && ((valid>>i)&1))