src/kdtree.cc
branchpyrit
changeset 84 6f7fe14782c2
parent 80 907929fa9b59
child 85 907a634e5c02
--- a/src/kdtree.cc	Thu Apr 24 18:12:32 2008 +0200
+++ b/src/kdtree.cc	Sun Apr 27 09:44:49 2008 +0200
@@ -256,7 +256,7 @@
 					node = node->getLeftChild();
 					continue;
 				}
-				if (stack[exit].pb[axis] == splitVal)
+				if (stack[entry].pb[axis] == splitVal)
 				{ /* case Z1 */
 					node = node->getRightChild();
 					continue;
@@ -328,6 +328,219 @@
 	return NULL;
 }
 
+// stack element for kd-tree traversal (packet version)
+struct StackElem4
+{
+	KdNode* node; /* pointer to far child */
+	__m128 t; /* the entry/exit signed distance */
+	VectorPacket pb; /* the coordinates of entry/exit point */
+	int prev;
+};
+
+void KdTree::packet_intersection(const Shape **origin_shapes, const RayPacket &rays,
+		Float *nearest_distances, Shape **nearest_shapes)
+{
+	__m128 a, b; /* entry/exit signed distance */
+	__m128 t;    /* signed distance to the splitting plane */
+	__m128 mask = zeros;
+
+	/* if we have no tree, fall back to naive test */
+	if (!built)
+		Container::packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);
+
+	nearest_shapes[0] = NULL;
+	nearest_shapes[1] = NULL;
+	nearest_shapes[2] = NULL;
+	nearest_shapes[3] = NULL;
+
+	//bbox.intersect_packet(rays, a, b)
+	if (bbox.intersect(rays[0], ((float*)&a)[0], ((float*)&b)[0]))
+		((int*)&mask)[0] = -1;
+	if (bbox.intersect(rays[1], ((float*)&a)[1], ((float*)&b)[1]))
+		((int*)&mask)[1] = -1;
+	if (bbox.intersect(rays[2], ((float*)&a)[2], ((float*)&b)[2]))
+		((int*)&mask)[2] = -1;
+	if (bbox.intersect(rays[3], ((float*)&a)[3], ((float*)&b)[3]))
+		((int*)&mask)[3] = -1;
+
+	if (!_mm_movemask_ps(mask))
+		return;
+
+	/* pointers to the far child node and current node */
+	KdNode *farchild, *node;
+	node = root;
+
+	StackElem4 stack[max_depth];
+
+	int entry = 0, exit = 1;
+	stack[entry].t = a;
+
+	/* distinguish between internal and external origin of a ray*/
+	stack[entry].pb = rays.o + rays.dir * a; /* external */
+	for (int i = 0; i < 4; i++)
+		if (((float*)&a)[i] < 0.0)
+		{
+			/* internal */
+			stack[entry].pb.x[i] = rays.o.x[i];
+			stack[entry].pb.y[i] = rays.o.y[i];
+			stack[entry].pb.z[i] = rays.o.z[i];
+		}
+
+	/* setup initial exit point in the stack */
+	stack[exit].t = b;
+	stack[exit].pb = rays.o + rays.dir * b;
+	stack[exit].node = NULL;
+
+	/* loop, traverse through the whole kd-tree,
+	until an object is intersected or ray leaves the scene */
+	__m128 splitVal;
+	int axis;
+	static const int mod3[] = {0,1,2,0,1};
+	const VectorPacket invdirs = ones / rays.dir;
+	while (node)
+	{
+		/* loop until a leaf is found */
+		while (!node->isLeaf())
+		{
+			/* retrieve position of splitting plane */
+			splitVal = _mm_set_ps1(node->getSplit());
+			axis = node->getAxis();
+
+			// mask out invalid rays with near > far
+			__m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
+			__m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
+			__m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
+			__m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
+			__m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
+
+			// if all of
+			// stack[entry].pb[axis] <= splitVal,
+			// stack[exit].pb[axis] <= splitVal
+			if (!_mm_movemask_ps(
+				_mm_and_ps(_mm_or_ps(entry_gt, exit_gt), curmask)))
+			{
+				node = node->getLeftChild();
+				continue;
+			}
+
+			// if all of
+			// stack[entry].pb[axis] >= splitVal,
+			// stack[exit].pb[axis] >= splitVal
+			if (!_mm_movemask_ps(
+				_mm_and_ps(_mm_or_ps(entry_lt, exit_lt), curmask)))
+			{
+				node = node->getRightChild();
+				continue;
+			}
+
+			// any of
+			// stack[entry].pb[axis] < splitVal,
+			// stack[exit].pb[axis] > splitVal
+			bool cond1 = _mm_movemask_ps(
+				_mm_and_ps(_mm_and_ps(entry_lt, exit_gt), curmask));
+
+			// any of
+			// stack[entry].pb[axis] > splitVal,
+			// stack[exit].pb[axis] < splitVal
+			bool cond2 = _mm_movemask_ps(
+				_mm_and_ps(_mm_and_ps(entry_gt, exit_lt), curmask));
+
+			if ((!cond1 && !cond2) || (cond1 && cond2))
+			{
+				// fall back to single rays
+				// FIXME: split rays and continue
+				for (int i = 0; i < 4; i++)
+					if(!nearest_shapes[i])
+						nearest_shapes[i] = nearest_intersection(origin_shapes[i],
+							rays[i], nearest_distances[i]);
+				return;
+			}
+
+			if (cond1)
+			{
+				farchild = node->getRightChild();
+				node = node->getLeftChild();
+			}
+			else
+			{
+				farchild = node->getLeftChild();
+				node = node->getRightChild();
+			}
+
+			/* traverse both children */
+
+			/* signed distance to the splitting plane */
+			t = _mm_mul_ps(_mm_sub_ps(splitVal, rays.o.ma[axis]), invdirs.ma[axis]);
+
+			/* setup the new exit point and push it onto stack */
+			register int tmp = exit;
+
+			exit++;
+			if (exit == entry)
+				exit++;
+			assert(exit < max_depth);
+
+			stack[exit].prev = tmp;
+			stack[exit].t = t;
+			stack[exit].node = farchild;
+			stack[exit].pb.ma[axis] = splitVal;
+			stack[exit].pb.ma[mod3[axis+1]] =
+				_mm_add_ps(rays.o.ma[mod3[axis+1]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+1]]));
+			stack[exit].pb.ma[mod3[axis+2]] =
+				_mm_add_ps(rays.o.ma[mod3[axis+2]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+2]]));
+		}
+
+		/* current node is the leaf . . . empty or full */
+		__m128 dists = stack[exit].t;
+		ShapeList::iterator shape;
+		for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++)
+		{
+			for (int i = 0; i < 4; i++)
+				if ( ((_mm_movemask_ps(mask)>>(i))&1) &&
+				((float*)&stack[entry].t)[i] < ((float*)&stack[exit].t)[i] &&
+				*shape != origin_shapes[i] &&
+				(*shape)->intersect(rays[i], ((float*)&dists)[i])
+				&& ((float*)&dists)[i] >= ((float*)&stack[entry].t)[i] - Eps)
+				{
+					nearest_shapes[i] = *shape;
+					nearest_distances[i] = ((float*)&dists)[i];
+				}
+
+			/*
+			bool results[4];
+			(*shape)->intersect_packet(rays, dists, results);
+			int greater_than_entry = _mm_movemask_ps(
+				_mm_and_ps(_mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)), mask));
+			for (int i = 0; i < 4; i++)
+			{
+				if (results[i] //&& *shape != origin_shapes[i]
+				&& ((greater_than_entry>>(3-i))&1))
+				{
+					nearest_shapes[i] = *shape;
+					nearest_distances[i] = ((float*)&dists)[i];
+				}
+			}
+			*/
+		}
+
+		for (int i = 0; i < 4; i++)
+			if (nearest_shapes[i])
+				((int*)&mask)[i] = 0;
+
+		if (!_mm_movemask_ps(mask))
+			return;
+
+		entry = exit;
+
+		/* retrieve the pointer to the next node,
+		it is possible that ray traversal terminates */
+		node = stack[entry].node;
+		exit = stack[entry].prev;
+	}
+
+	/* ray leaves the scene */
+}
+
 ostream & operator<<(ostream &st, KdNode &node)
 {
 	if (node.isLeaf())