--- a/src/kdtree.cc Thu Apr 24 18:12:32 2008 +0200
+++ b/src/kdtree.cc Sun Apr 27 09:44:49 2008 +0200
@@ -256,7 +256,7 @@
node = node->getLeftChild();
continue;
}
- if (stack[exit].pb[axis] == splitVal)
+ if (stack[entry].pb[axis] == splitVal)
{ /* case Z1 */
node = node->getRightChild();
continue;
@@ -328,6 +328,219 @@
return NULL;
}
+// stack element for kd-tree traversal (packet version)
+struct StackElem4
+{
+ KdNode* node; /* pointer to far child */
+ __m128 t; /* the entry/exit signed distance */
+ VectorPacket pb; /* the coordinates of entry/exit point */
+ int prev;
+};
+
+void KdTree::packet_intersection(const Shape **origin_shapes, const RayPacket &rays,
+ Float *nearest_distances, Shape **nearest_shapes)
+{
+ __m128 a, b; /* entry/exit signed distance */
+ __m128 t; /* signed distance to the splitting plane */
+ __m128 mask = zeros;
+
+ /* if we have no tree, fall back to naive test */
+ if (!built)
+ Container::packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);
+
+ nearest_shapes[0] = NULL;
+ nearest_shapes[1] = NULL;
+ nearest_shapes[2] = NULL;
+ nearest_shapes[3] = NULL;
+
+ //bbox.intersect_packet(rays, a, b)
+ if (bbox.intersect(rays[0], ((float*)&a)[0], ((float*)&b)[0]))
+ ((int*)&mask)[0] = -1;
+ if (bbox.intersect(rays[1], ((float*)&a)[1], ((float*)&b)[1]))
+ ((int*)&mask)[1] = -1;
+ if (bbox.intersect(rays[2], ((float*)&a)[2], ((float*)&b)[2]))
+ ((int*)&mask)[2] = -1;
+ if (bbox.intersect(rays[3], ((float*)&a)[3], ((float*)&b)[3]))
+ ((int*)&mask)[3] = -1;
+
+ if (!_mm_movemask_ps(mask))
+ return;
+
+ /* pointers to the far child node and current node */
+ KdNode *farchild, *node;
+ node = root;
+
+ StackElem4 stack[max_depth];
+
+ int entry = 0, exit = 1;
+ stack[entry].t = a;
+
+ /* distinguish between internal and external origin of a ray*/
+ stack[entry].pb = rays.o + rays.dir * a; /* external */
+ for (int i = 0; i < 4; i++)
+ if (((float*)&a)[i] < 0.0)
+ {
+ /* internal */
+ stack[entry].pb.x[i] = rays.o.x[i];
+ stack[entry].pb.y[i] = rays.o.y[i];
+ stack[entry].pb.z[i] = rays.o.z[i];
+ }
+
+ /* setup initial exit point in the stack */
+ stack[exit].t = b;
+ stack[exit].pb = rays.o + rays.dir * b;
+ stack[exit].node = NULL;
+
+ /* loop, traverse through the whole kd-tree,
+ until an object is intersected or ray leaves the scene */
+ __m128 splitVal;
+ int axis;
+ static const int mod3[] = {0,1,2,0,1};
+ const VectorPacket invdirs = ones / rays.dir;
+ while (node)
+ {
+ /* loop until a leaf is found */
+ while (!node->isLeaf())
+ {
+ /* retrieve position of splitting plane */
+ splitVal = _mm_set_ps1(node->getSplit());
+ axis = node->getAxis();
+
+ // mask out invalid rays with near > far
+ __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
+ __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
+ __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
+ __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
+ __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
+
+ // if all of
+ // stack[entry].pb[axis] <= splitVal,
+ // stack[exit].pb[axis] <= splitVal
+ if (!_mm_movemask_ps(
+ _mm_and_ps(_mm_or_ps(entry_gt, exit_gt), curmask)))
+ {
+ node = node->getLeftChild();
+ continue;
+ }
+
+ // if all of
+ // stack[entry].pb[axis] >= splitVal,
+ // stack[exit].pb[axis] >= splitVal
+ if (!_mm_movemask_ps(
+ _mm_and_ps(_mm_or_ps(entry_lt, exit_lt), curmask)))
+ {
+ node = node->getRightChild();
+ continue;
+ }
+
+ // any of
+ // stack[entry].pb[axis] < splitVal,
+ // stack[exit].pb[axis] > splitVal
+ bool cond1 = _mm_movemask_ps(
+ _mm_and_ps(_mm_and_ps(entry_lt, exit_gt), curmask));
+
+ // any of
+ // stack[entry].pb[axis] > splitVal,
+ // stack[exit].pb[axis] < splitVal
+ bool cond2 = _mm_movemask_ps(
+ _mm_and_ps(_mm_and_ps(entry_gt, exit_lt), curmask));
+
+ if ((!cond1 && !cond2) || (cond1 && cond2))
+ {
+ // fall back to single rays
+ // FIXME: split rays and continue
+ for (int i = 0; i < 4; i++)
+ if(!nearest_shapes[i])
+ nearest_shapes[i] = nearest_intersection(origin_shapes[i],
+ rays[i], nearest_distances[i]);
+ return;
+ }
+
+ if (cond1)
+ {
+ farchild = node->getRightChild();
+ node = node->getLeftChild();
+ }
+ else
+ {
+ farchild = node->getLeftChild();
+ node = node->getRightChild();
+ }
+
+ /* traverse both children */
+
+ /* signed distance to the splitting plane */
+ t = _mm_mul_ps(_mm_sub_ps(splitVal, rays.o.ma[axis]), invdirs.ma[axis]);
+
+ /* setup the new exit point and push it onto stack */
+ register int tmp = exit;
+
+ exit++;
+ if (exit == entry)
+ exit++;
+ assert(exit < max_depth);
+
+ stack[exit].prev = tmp;
+ stack[exit].t = t;
+ stack[exit].node = farchild;
+ stack[exit].pb.ma[axis] = splitVal;
+ stack[exit].pb.ma[mod3[axis+1]] =
+ _mm_add_ps(rays.o.ma[mod3[axis+1]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+1]]));
+ stack[exit].pb.ma[mod3[axis+2]] =
+ _mm_add_ps(rays.o.ma[mod3[axis+2]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+2]]));
+ }
+
+ /* current node is the leaf . . . empty or full */
+ __m128 dists = stack[exit].t;
+ ShapeList::iterator shape;
+ for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++)
+ {
+ for (int i = 0; i < 4; i++)
+ if ( ((_mm_movemask_ps(mask)>>(i))&1) &&
+ ((float*)&stack[entry].t)[i] < ((float*)&stack[exit].t)[i] &&
+ *shape != origin_shapes[i] &&
+ (*shape)->intersect(rays[i], ((float*)&dists)[i])
+ && ((float*)&dists)[i] >= ((float*)&stack[entry].t)[i] - Eps)
+ {
+ nearest_shapes[i] = *shape;
+ nearest_distances[i] = ((float*)&dists)[i];
+ }
+
+ /*
+ bool results[4];
+ (*shape)->intersect_packet(rays, dists, results);
+ int greater_than_entry = _mm_movemask_ps(
+ _mm_and_ps(_mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)), mask));
+ for (int i = 0; i < 4; i++)
+ {
+ if (results[i] //&& *shape != origin_shapes[i]
+ && ((greater_than_entry>>(3-i))&1))
+ {
+ nearest_shapes[i] = *shape;
+ nearest_distances[i] = ((float*)&dists)[i];
+ }
+ }
+ */
+ }
+
+ for (int i = 0; i < 4; i++)
+ if (nearest_shapes[i])
+ ((int*)&mask)[i] = 0;
+
+ if (!_mm_movemask_ps(mask))
+ return;
+
+ entry = exit;
+
+ /* retrieve the pointer to the next node,
+ it is possible that ray traversal terminates */
+ node = stack[entry].node;
+ exit = stack[entry].prev;
+ }
+
+ /* ray leaves the scene */
+}
+
ostream & operator<<(ostream &st, KdNode &node)
{
if (node.isLeaf())