diff -r e3a2a5b26abb -r 6f7fe14782c2 src/kdtree.cc --- a/src/kdtree.cc Thu Apr 24 18:12:32 2008 +0200 +++ b/src/kdtree.cc Sun Apr 27 09:44:49 2008 +0200 @@ -256,7 +256,7 @@ node = node->getLeftChild(); continue; } - if (stack[exit].pb[axis] == splitVal) + if (stack[entry].pb[axis] == splitVal) { /* case Z1 */ node = node->getRightChild(); continue; @@ -328,6 +328,219 @@ return NULL; } +// stack element for kd-tree traversal (packet version) +struct StackElem4 +{ + KdNode* node; /* pointer to far child */ + __m128 t; /* the entry/exit signed distance */ + VectorPacket pb; /* the coordinates of entry/exit point */ + int prev; +}; + +void KdTree::packet_intersection(const Shape **origin_shapes, const RayPacket &rays, + Float *nearest_distances, Shape **nearest_shapes) +{ + __m128 a, b; /* entry/exit signed distance */ + __m128 t; /* signed distance to the splitting plane */ + __m128 mask = zeros; + + /* if we have no tree, fall back to naive test */ + if (!built) + Container::packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes); + + nearest_shapes[0] = NULL; + nearest_shapes[1] = NULL; + nearest_shapes[2] = NULL; + nearest_shapes[3] = NULL; + + //bbox.intersect_packet(rays, a, b) + if (bbox.intersect(rays[0], ((float*)&a)[0], ((float*)&b)[0])) + ((int*)&mask)[0] = -1; + if (bbox.intersect(rays[1], ((float*)&a)[1], ((float*)&b)[1])) + ((int*)&mask)[1] = -1; + if (bbox.intersect(rays[2], ((float*)&a)[2], ((float*)&b)[2])) + ((int*)&mask)[2] = -1; + if (bbox.intersect(rays[3], ((float*)&a)[3], ((float*)&b)[3])) + ((int*)&mask)[3] = -1; + + if (!_mm_movemask_ps(mask)) + return; + + /* pointers to the far child node and current node */ + KdNode *farchild, *node; + node = root; + + StackElem4 stack[max_depth]; + + int entry = 0, exit = 1; + stack[entry].t = a; + + /* distinguish between internal and external origin of a ray*/ + stack[entry].pb = rays.o + rays.dir * a; /* external */ + for (int i = 0; i < 4; i++) + if (((float*)&a)[i] < 0.0) + { + /* internal */ + stack[entry].pb.x[i] = rays.o.x[i]; + stack[entry].pb.y[i] = rays.o.y[i]; + stack[entry].pb.z[i] = rays.o.z[i]; + } + + /* setup initial exit point in the stack */ + stack[exit].t = b; + stack[exit].pb = rays.o + rays.dir * b; + stack[exit].node = NULL; + + /* loop, traverse through the whole kd-tree, + until an object is intersected or ray leaves the scene */ + __m128 splitVal; + int axis; + static const int mod3[] = {0,1,2,0,1}; + const VectorPacket invdirs = ones / rays.dir; + while (node) + { + /* loop until a leaf is found */ + while (!node->isLeaf()) + { + /* retrieve position of splitting plane */ + splitVal = _mm_set_ps1(node->getSplit()); + axis = node->getAxis(); + + // mask out invalid rays with near > far + __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t)); + __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal); + __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal); + __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal); + __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal); + + // if all of + // stack[entry].pb[axis] <= splitVal, + // stack[exit].pb[axis] <= splitVal + if (!_mm_movemask_ps( + _mm_and_ps(_mm_or_ps(entry_gt, exit_gt), curmask))) + { + node = node->getLeftChild(); + continue; + } + + // if all of + // stack[entry].pb[axis] >= splitVal, + // stack[exit].pb[axis] >= splitVal + if (!_mm_movemask_ps( + _mm_and_ps(_mm_or_ps(entry_lt, exit_lt), curmask))) + { + node = node->getRightChild(); + continue; + } + + // any of + // stack[entry].pb[axis] < splitVal, + // stack[exit].pb[axis] > splitVal + bool cond1 = _mm_movemask_ps( + _mm_and_ps(_mm_and_ps(entry_lt, exit_gt), curmask)); + + // any of + // stack[entry].pb[axis] > splitVal, + // stack[exit].pb[axis] < splitVal + bool cond2 = _mm_movemask_ps( + _mm_and_ps(_mm_and_ps(entry_gt, exit_lt), curmask)); + + if ((!cond1 && !cond2) || (cond1 && cond2)) + { + // fall back to single rays + // FIXME: split rays and continue + for (int i = 0; i < 4; i++) + if(!nearest_shapes[i]) + nearest_shapes[i] = nearest_intersection(origin_shapes[i], + rays[i], nearest_distances[i]); + return; + } + + if (cond1) + { + farchild = node->getRightChild(); + node = node->getLeftChild(); + } + else + { + farchild = node->getLeftChild(); + node = node->getRightChild(); + } + + /* traverse both children */ + + /* signed distance to the splitting plane */ + t = _mm_mul_ps(_mm_sub_ps(splitVal, rays.o.ma[axis]), invdirs.ma[axis]); + + /* setup the new exit point and push it onto stack */ + register int tmp = exit; + + exit++; + if (exit == entry) + exit++; + assert(exit < max_depth); + + stack[exit].prev = tmp; + stack[exit].t = t; + stack[exit].node = farchild; + stack[exit].pb.ma[axis] = splitVal; + stack[exit].pb.ma[mod3[axis+1]] = + _mm_add_ps(rays.o.ma[mod3[axis+1]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+1]])); + stack[exit].pb.ma[mod3[axis+2]] = + _mm_add_ps(rays.o.ma[mod3[axis+2]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+2]])); + } + + /* current node is the leaf . . . empty or full */ + __m128 dists = stack[exit].t; + ShapeList::iterator shape; + for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++) + { + for (int i = 0; i < 4; i++) + if ( ((_mm_movemask_ps(mask)>>(i))&1) && + ((float*)&stack[entry].t)[i] < ((float*)&stack[exit].t)[i] && + *shape != origin_shapes[i] && + (*shape)->intersect(rays[i], ((float*)&dists)[i]) + && ((float*)&dists)[i] >= ((float*)&stack[entry].t)[i] - Eps) + { + nearest_shapes[i] = *shape; + nearest_distances[i] = ((float*)&dists)[i]; + } + + /* + bool results[4]; + (*shape)->intersect_packet(rays, dists, results); + int greater_than_entry = _mm_movemask_ps( + _mm_and_ps(_mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)), mask)); + for (int i = 0; i < 4; i++) + { + if (results[i] //&& *shape != origin_shapes[i] + && ((greater_than_entry>>(3-i))&1)) + { + nearest_shapes[i] = *shape; + nearest_distances[i] = ((float*)&dists)[i]; + } + } + */ + } + + for (int i = 0; i < 4; i++) + if (nearest_shapes[i]) + ((int*)&mask)[i] = 0; + + if (!_mm_movemask_ps(mask)) + return; + + entry = exit; + + /* retrieve the pointer to the next node, + it is possible that ray traversal terminates */ + node = stack[entry].node; + exit = stack[entry].prev; + } + + /* ray leaves the scene */ +} + ostream & operator<<(ostream &st, KdNode &node) { if (node.isLeaf())