--- a/src/kdtree.cc Fri May 02 13:27:47 2008 +0200
+++ b/src/kdtree.cc Mon May 05 15:31:14 2008 +0200
@@ -64,12 +64,10 @@
{
if (isLeaf())
delete getShapes();
- else
- delete[] getLeftChild();
}
// kd-tree recursive build algorithm, inspired by PBRT (www.pbrt.org)
-void KdTree::recursive_build(KdNode *node, BBox bounds, int maxdepth)
+void KdTree::recursive_build(KdNode *node, const BBox &bounds, int maxdepth)
{
ShapeList *shapes = node->getShapes();
@@ -102,12 +100,12 @@
sort(edges[ax].begin(), edges[ax].end());
// choose best split pos
- const Float K = 1.4; // constant, K = cost of traversal / cost of ray-triangle intersection
+ const Float K = 1.4f; // constant, K = cost of traversal / cost of ray-triangle intersection
Float SAV = (bounds.w()*bounds.h() + // surface area of node
bounds.w()*bounds.d() + bounds.h()*bounds.d());
Float cost = SAV * (K + shapes->size()); // initial cost = non-split cost
- vector<ShapeBound>::iterator edge, splitedge = edges[2].end();
+ vector<ShapeBound>::iterator edge, splitedge = edges[0].end();
int axis = 0;
for (int ax = 0; ax < 3; ax++)
{
@@ -120,8 +118,8 @@
rnum--;
// calculate SAH cost of this split
- lbb.H.cell[ax] = edge->pos;
- rbb.L.cell[ax] = edge->pos;
+ lbb.H[ax] = edge->pos;
+ rbb.L[ax] = edge->pos;
Float SAL = (lbb.w()*lbb.h() + lbb.w()*lbb.d() + lbb.h()*lbb.d());
Float SAR = (rbb.w()*rbb.h() + rbb.w()*rbb.d() + rbb.h()*rbb.d());
Float splitcost = K*SAV + SAL*(K + lnum) + SAR*(K + rnum);
@@ -138,7 +136,10 @@
}
}
- if (splitedge == edges[2].end())
+ // we actually need to compare with edges[0].end(), but
+ // MSVC does not allow comparison of iterators from differen instances of vector
+ // it's OK this way, because axis will be zero if no good split was found
+ if (splitedge == edges[axis].end())
{
node->setLeaf();
return;
@@ -152,18 +153,18 @@
static ofstream F("kdtree.obj");
Vector v;
static int f=0;
- v.cell[axis] = node->getSplit();
- v.cell[(axis+1)%3] = bounds.L.cell[(axis+1)%3];
- v.cell[(axis+2)%3] = bounds.L.cell[(axis+2)%3];
+ v[axis] = node->getSplit();
+ v[(axis+1)%3] = bounds.L[(axis+1)%3];
+ v[(axis+2)%3] = bounds.L[(axis+2)%3];
F << "v " << v.x << " " << v.y << " " << v.z << endl;
- v.cell[(axis+1)%3] = bounds.L.cell[(axis+1)%3];
- v.cell[(axis+2)%3] = bounds.H.cell[(axis+2)%3];
+ v[(axis+1)%3] = bounds.L[(axis+1)%3];
+ v[(axis+2)%3] = bounds.H[(axis+2)%3];
F << "v " << v.x << " " << v.y << " " << v.z << endl;
- v.cell[(axis+1)%3] = bounds.H.cell[(axis+1)%3];
- v.cell[(axis+2)%3] = bounds.H.cell[(axis+2)%3];
+ v[(axis+1)%3] = bounds.H[(axis+1)%3];
+ v[(axis+2)%3] = bounds.H[(axis+2)%3];
F << "v " << v.x << " " << v.y << " " << v.z << endl;
- v.cell[(axis+1)%3] = bounds.H.cell[(axis+1)%3];
- v.cell[(axis+2)%3] = bounds.L.cell[(axis+2)%3];
+ v[(axis+1)%3] = bounds.H[(axis+1)%3];
+ v[(axis+2)%3] = bounds.L[(axis+2)%3];
F << "v " << v.x << " " << v.y << " " << v.z << endl;
F << "f " << f+1 << " " << f+2 << " " << f+3 << " " << f+4 << endl;
f += 4;
@@ -174,9 +175,10 @@
BBox lbb = bounds;
BBox rbb = bounds;
- lbb.H.cell[axis] = node->getSplit();
- rbb.L.cell[axis] = node->getSplit();
- node->setChildren(new KdNode[2]);
+ lbb.H[axis] = node->getSplit();
+ rbb.L[axis] = node->getSplit();
+ node->setChildren(new (mempool.alloc()) KdNode);
+ new (mempool.alloc()) KdNode;
node->setAxis(axis);
for (edge = edges[axis].begin(); edge != splitedge; edge++)
@@ -219,7 +221,12 @@
KdNode *farchild, *node;
node = root;
+#ifdef MSVC
+ // MSVC wants constant expression here... hope it won't overflow :)
+ StackElem stack[64];
+#else
StackElem stack[max_depth];
+#endif
int entry = 0, exit = 1;
stack[entry].t = a;
@@ -294,11 +301,11 @@
stack[exit].prev = tmp;
stack[exit].t = t;
stack[exit].node = farchild;
- stack[exit].pb.cell[axis] = splitVal;
- stack[exit].pb.cell[mod3[axis+1]] = ray.o.cell[mod3[axis+1]]
- + t * ray.dir.cell[mod3[axis+1]];
- stack[exit].pb.cell[mod3[axis+2]] = ray.o.cell[mod3[axis+2]]
- + t * ray.dir.cell[mod3[axis+2]];
+ stack[exit].pb[axis] = splitVal;
+ stack[exit].pb[mod3[axis+1]] = ray.o[mod3[axis+1]]
+ + t * ray.dir[mod3[axis+1]];
+ stack[exit].pb[mod3[axis+2]] = ray.o[mod3[axis+2]]
+ + t * ray.dir[mod3[axis+2]];
}
/* current node is the leaf . . . empty or full */
@@ -328,22 +335,22 @@
return NULL;
}
-#ifndef NO_SSE
+#ifndef NO_SIMD
// stack element for kd-tree traversal (packet version)
struct StackElem4
{
KdNode* node; /* pointer to far child */
- __m128 t; /* the entry/exit signed distance */
+ mfloat4 t; /* the entry/exit signed distance */
VectorPacket pb; /* the coordinates of entry/exit point */
int prev;
};
-void KdTree::packet_intersection(const Shape **origin_shapes, const RayPacket &rays,
+void KdTree::packet_intersection(const Shape* const* origin_shapes, const RayPacket &rays,
Float *nearest_distances, Shape **nearest_shapes)
{
- __m128 a, b; /* entry/exit signed distance */
- __m128 t; /* signed distance to the splitting plane */
- __m128 mask = mZero;
+ mfloat4 a, b; /* entry/exit signed distance */
+ mfloat4 t; /* signed distance to the splitting plane */
+ mfloat4 mask = mZero;
/* if we have no tree, fall back to naive test */
if (!built)
@@ -353,27 +360,29 @@
memset(nearest_shapes, 0, 4*sizeof(Shape*));
mask = bbox.intersect_packet(rays, a, b);
- if (!_mm_movemask_ps(mask))
+ if (!mmovemask(mask))
return;
/* pointers to the far child node and current node */
KdNode *farchild, *node;
node = root;
+#ifdef MSVC
+ // MSVC wants constant expression here... hope it won't overflow :)
+ StackElem4 stack[64];
+#else
StackElem4 stack[max_depth];
+#endif
int entry = 0, exit = 1;
stack[entry].t = a;
/* distinguish between internal and external origin of a ray*/
- t = _mm_cmplt_ps(a, mZero);
+ t = mcmplt(a, mZero);
stack[entry].pb = rays.o + rays.dir * a;
- stack[entry].pb.mx = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mx),
- _mm_and_ps(t, rays.o.mx));
- stack[entry].pb.my = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.my),
- _mm_and_ps(t, rays.o.my));
- stack[entry].pb.mz = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mz),
- _mm_and_ps(t, rays.o.mz));
+ stack[entry].pb.mx = mselect(t, rays.o.mx, stack[entry].pb.mx);
+ stack[entry].pb.my = mselect(t, rays.o.my, stack[entry].pb.my);
+ stack[entry].pb.mz = mselect(t, rays.o.mz, stack[entry].pb.mz);
/* setup initial exit point in the stack */
stack[exit].t = b;
@@ -382,7 +391,7 @@
/* loop, traverse through the whole kd-tree,
until an object is intersected or ray leaves the scene */
- __m128 splitVal;
+ mfloat4 splitVal;
int axis;
static const int mod3[] = {0,1,2,0,1};
const VectorPacket invdirs = mOne / rays.dir;
@@ -392,21 +401,21 @@
while (!node->isLeaf())
{
/* retrieve position of splitting plane */
- splitVal = _mm_set_ps1(node->getSplit());
+ splitVal = mset1(node->getSplit());
axis = node->getAxis();
// mask out invalid rays with near > far
- const __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
- const __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
- const __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
- const __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
- const __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
+ const mfloat4 curmask = mand(mask, mcmple(stack[entry].t, stack[exit].t));
+ const mfloat4 entry_lt = mcmplt(stack[entry].pb.ma[axis], splitVal);
+ const mfloat4 entry_gt = mcmpgt(stack[entry].pb.ma[axis], splitVal);
+ const mfloat4 exit_lt = mcmplt(stack[exit].pb.ma[axis], splitVal);
+ const mfloat4 exit_gt = mcmpgt(stack[exit].pb.ma[axis], splitVal);
// if all of
// stack[entry].pb[axis] <= splitVal,
// stack[exit].pb[axis] <= splitVal
- if (!_mm_movemask_ps(
- _mm_and_ps(_mm_or_ps(entry_gt, exit_gt), curmask)))
+ if (!mmovemask(
+ mand(mor(entry_gt, exit_gt), curmask)))
{
node = node->getLeftChild();
continue;
@@ -415,8 +424,8 @@
// if all of
// stack[entry].pb[axis] >= splitVal,
// stack[exit].pb[axis] >= splitVal
- if (!_mm_movemask_ps(
- _mm_and_ps(_mm_or_ps(entry_lt, exit_lt), curmask)))
+ if (!mmovemask(
+ mand(mor(entry_lt, exit_lt), curmask)))
{
node = node->getRightChild();
continue;
@@ -425,14 +434,14 @@
// any of
// stack[entry].pb[axis] < splitVal,
// stack[exit].pb[axis] > splitVal
- bool cond1 = _mm_movemask_ps(
- _mm_and_ps(_mm_and_ps(entry_lt, exit_gt), curmask));
+ int cond1 = mmovemask(
+ mand(mand(entry_lt, exit_gt), curmask));
// any of
// stack[entry].pb[axis] > splitVal,
// stack[exit].pb[axis] < splitVal
- bool cond2 = _mm_movemask_ps(
- _mm_and_ps(_mm_and_ps(entry_gt, exit_lt), curmask));
+ int cond2 = mmovemask(
+ mand(mand(entry_gt, exit_lt), curmask));
if ((!cond1 && !cond2) || (cond1 && cond2))
{
@@ -459,7 +468,7 @@
/* traverse both children */
/* signed distance to the splitting plane */
- t = _mm_mul_ps(_mm_sub_ps(splitVal, rays.o.ma[axis]), invdirs.ma[axis]);
+ t = mmul(msub(splitVal, rays.o.ma[axis]), invdirs.ma[axis]);
/* setup the new exit point and push it onto stack */
register int tmp = exit;
@@ -474,22 +483,22 @@
stack[exit].node = farchild;
stack[exit].pb.ma[axis] = splitVal;
stack[exit].pb.ma[mod3[axis+1]] =
- _mm_add_ps(rays.o.ma[mod3[axis+1]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+1]]));
+ madd(rays.o.ma[mod3[axis+1]], mmul(t, rays.dir.ma[mod3[axis+1]]));
stack[exit].pb.ma[mod3[axis+2]] =
- _mm_add_ps(rays.o.ma[mod3[axis+2]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+2]]));
+ madd(rays.o.ma[mod3[axis+2]], mmul(t, rays.dir.ma[mod3[axis+2]]));
}
/* current node is the leaf . . . empty or full */
- __m128 dists = stack[exit].t;
+ mfloat4 dists = stack[exit].t;
ShapeList::iterator shape;
- __m128 results;
- __m128 newmask = mask;
+ mfloat4 results;
+ mfloat4 newmask = mask;
for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++)
{
results = (*shape)->intersect_packet(rays, dists);
- int valid = _mm_movemask_ps(
- _mm_and_ps(mask, _mm_and_ps(results,
- _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)))));
+ int valid = mmovemask(
+ mand(mask, mand(results,
+ mcmpge(dists, msub(stack[entry].t, mEps)))));
for (int i = 0; i < 4; i++)
{
if (*shape != origin_shapes[i] && ((valid>>i)&1))
@@ -502,7 +511,7 @@
}
mask = newmask;
- if (!_mm_movemask_ps(mask))
+ if (!mmovemask(mask))
return;
entry = exit;