src/kdtree.cc
branchpyrit
changeset 92 9af5c039b678
parent 91 9d66d323c354
child 93 96d65f841791
--- a/src/kdtree.cc	Fri May 02 13:27:47 2008 +0200
+++ b/src/kdtree.cc	Mon May 05 15:31:14 2008 +0200
@@ -64,12 +64,10 @@
 {
 	if (isLeaf())
 		delete getShapes();
-	else
-		delete[] getLeftChild();
 }
 
 // kd-tree recursive build algorithm, inspired by PBRT (www.pbrt.org)
-void KdTree::recursive_build(KdNode *node, BBox bounds, int maxdepth)
+void KdTree::recursive_build(KdNode *node, const BBox &bounds, int maxdepth)
 {
 	ShapeList *shapes = node->getShapes();
 
@@ -102,12 +100,12 @@
 		sort(edges[ax].begin(), edges[ax].end());
 
 	// choose best split pos
-	const Float K = 1.4; // constant, K = cost of traversal / cost of ray-triangle intersection
+	const Float K = 1.4f; // constant, K = cost of traversal / cost of ray-triangle intersection
 	Float SAV = (bounds.w()*bounds.h() +  // surface area of node
 		bounds.w()*bounds.d() + bounds.h()*bounds.d());
 	Float cost = SAV * (K + shapes->size()); // initial cost = non-split cost
 
-	vector<ShapeBound>::iterator edge, splitedge = edges[2].end();
+	vector<ShapeBound>::iterator edge, splitedge = edges[0].end();
 	int axis = 0;
 	for (int ax = 0; ax < 3; ax++)
 	{
@@ -120,8 +118,8 @@
 				rnum--;
 
 			// calculate SAH cost of this split
-			lbb.H.cell[ax] = edge->pos;
-			rbb.L.cell[ax] = edge->pos;
+			lbb.H[ax] = edge->pos;
+			rbb.L[ax] = edge->pos;
 			Float SAL = (lbb.w()*lbb.h() + lbb.w()*lbb.d() + lbb.h()*lbb.d());
 			Float SAR = (rbb.w()*rbb.h() + rbb.w()*rbb.d() + rbb.h()*rbb.d());
 			Float splitcost = K*SAV + SAL*(K + lnum) + SAR*(K + rnum);
@@ -138,7 +136,10 @@
 		}
 	}
 
-	if (splitedge == edges[2].end())
+	// we actually need to compare with edges[0].end(), but
+	// MSVC does not allow comparison of iterators from differen instances of vector
+	// it's OK this way, because axis will be zero if no good split was found
+	if (splitedge == edges[axis].end())
 	{
 		node->setLeaf();
 		return;
@@ -152,18 +153,18 @@
 	static ofstream F("kdtree.obj");
 	Vector v;
 	static int f=0;
-	v.cell[axis] = node->getSplit();
-	v.cell[(axis+1)%3] = bounds.L.cell[(axis+1)%3];
-	v.cell[(axis+2)%3] = bounds.L.cell[(axis+2)%3];
+	v[axis] = node->getSplit();
+	v[(axis+1)%3] = bounds.L[(axis+1)%3];
+	v[(axis+2)%3] = bounds.L[(axis+2)%3];
 	F << "v " << v.x << " " << v.y << " " << v.z << endl;
-	v.cell[(axis+1)%3] = bounds.L.cell[(axis+1)%3];
-	v.cell[(axis+2)%3] = bounds.H.cell[(axis+2)%3];
+	v[(axis+1)%3] = bounds.L[(axis+1)%3];
+	v[(axis+2)%3] = bounds.H[(axis+2)%3];
 	F << "v " << v.x << " " << v.y << " " << v.z << endl;
-	v.cell[(axis+1)%3] = bounds.H.cell[(axis+1)%3];
-	v.cell[(axis+2)%3] = bounds.H.cell[(axis+2)%3];
+	v[(axis+1)%3] = bounds.H[(axis+1)%3];
+	v[(axis+2)%3] = bounds.H[(axis+2)%3];
 	F << "v " << v.x << " " << v.y << " " << v.z << endl;
-	v.cell[(axis+1)%3] = bounds.H.cell[(axis+1)%3];
-	v.cell[(axis+2)%3] = bounds.L.cell[(axis+2)%3];
+	v[(axis+1)%3] = bounds.H[(axis+1)%3];
+	v[(axis+2)%3] = bounds.L[(axis+2)%3];
 	F << "v " << v.x << " " << v.y << " " << v.z << endl;
 	F << "f " << f+1 << " " << f+2 << " " << f+3 << " " << f+4 << endl;
 	f += 4;
@@ -174,9 +175,10 @@
 
 	BBox lbb = bounds;
 	BBox rbb = bounds;
-	lbb.H.cell[axis] = node->getSplit();
-	rbb.L.cell[axis] = node->getSplit();
-	node->setChildren(new KdNode[2]);
+	lbb.H[axis] = node->getSplit();
+	rbb.L[axis] = node->getSplit();
+	node->setChildren(new (mempool.alloc()) KdNode);
+	new (mempool.alloc()) KdNode;
 	node->setAxis(axis);
 
 	for (edge = edges[axis].begin(); edge != splitedge; edge++)
@@ -219,7 +221,12 @@
 	KdNode *farchild, *node;
 	node = root;
 
+#ifdef MSVC
+	// MSVC wants constant expression here... hope it won't overflow :)
+	StackElem stack[64];
+#else
 	StackElem stack[max_depth];
+#endif
 
 	int entry = 0, exit = 1;
 	stack[entry].t = a;
@@ -294,11 +301,11 @@
 			stack[exit].prev = tmp;
 			stack[exit].t = t;
 			stack[exit].node = farchild;
-			stack[exit].pb.cell[axis] = splitVal;
-			stack[exit].pb.cell[mod3[axis+1]] = ray.o.cell[mod3[axis+1]]
-				+ t * ray.dir.cell[mod3[axis+1]];
-			stack[exit].pb.cell[mod3[axis+2]] = ray.o.cell[mod3[axis+2]]
-				+ t * ray.dir.cell[mod3[axis+2]];
+			stack[exit].pb[axis] = splitVal;
+			stack[exit].pb[mod3[axis+1]] = ray.o[mod3[axis+1]]
+				+ t * ray.dir[mod3[axis+1]];
+			stack[exit].pb[mod3[axis+2]] = ray.o[mod3[axis+2]]
+				+ t * ray.dir[mod3[axis+2]];
 		}
 
 		/* current node is the leaf . . . empty or full */
@@ -328,22 +335,22 @@
 	return NULL;
 }
 
-#ifndef NO_SSE
+#ifndef NO_SIMD
 // stack element for kd-tree traversal (packet version)
 struct StackElem4
 {
 	KdNode* node; /* pointer to far child */
-	__m128 t; /* the entry/exit signed distance */
+	mfloat4 t; /* the entry/exit signed distance */
 	VectorPacket pb; /* the coordinates of entry/exit point */
 	int prev;
 };
 
-void KdTree::packet_intersection(const Shape **origin_shapes, const RayPacket &rays,
+void KdTree::packet_intersection(const Shape* const* origin_shapes, const RayPacket &rays,
 		Float *nearest_distances, Shape **nearest_shapes)
 {
-	__m128 a, b; /* entry/exit signed distance */
-	__m128 t;    /* signed distance to the splitting plane */
-	__m128 mask = mZero;
+	mfloat4 a, b; /* entry/exit signed distance */
+	mfloat4 t;    /* signed distance to the splitting plane */
+	mfloat4 mask = mZero;
 
 	/* if we have no tree, fall back to naive test */
 	if (!built)
@@ -353,27 +360,29 @@
 	memset(nearest_shapes, 0, 4*sizeof(Shape*));
 
 	mask = bbox.intersect_packet(rays, a, b);
-	if (!_mm_movemask_ps(mask))
+	if (!mmovemask(mask))
 		return;
 
 	/* pointers to the far child node and current node */
 	KdNode *farchild, *node;
 	node = root;
 
+#ifdef MSVC
+	// MSVC wants constant expression here... hope it won't overflow :)
+	StackElem4 stack[64];
+#else
 	StackElem4 stack[max_depth];
+#endif
 
 	int entry = 0, exit = 1;
 	stack[entry].t = a;
 
 	/* distinguish between internal and external origin of a ray*/
-	t = _mm_cmplt_ps(a, mZero);
+	t = mcmplt(a, mZero);
 	stack[entry].pb = rays.o + rays.dir * a;
-	stack[entry].pb.mx = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mx),
-		_mm_and_ps(t, rays.o.mx));
-	stack[entry].pb.my = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.my),
-		_mm_and_ps(t, rays.o.my));
-	stack[entry].pb.mz = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mz),
-		_mm_and_ps(t, rays.o.mz));
+	stack[entry].pb.mx = mselect(t, rays.o.mx, stack[entry].pb.mx);
+	stack[entry].pb.my = mselect(t, rays.o.my, stack[entry].pb.my);
+	stack[entry].pb.mz = mselect(t, rays.o.mz, stack[entry].pb.mz);
 
 	/* setup initial exit point in the stack */
 	stack[exit].t = b;
@@ -382,7 +391,7 @@
 
 	/* loop, traverse through the whole kd-tree,
 	until an object is intersected or ray leaves the scene */
-	__m128 splitVal;
+	mfloat4 splitVal;
 	int axis;
 	static const int mod3[] = {0,1,2,0,1};
 	const VectorPacket invdirs = mOne / rays.dir;
@@ -392,21 +401,21 @@
 		while (!node->isLeaf())
 		{
 			/* retrieve position of splitting plane */
-			splitVal = _mm_set_ps1(node->getSplit());
+			splitVal = mset1(node->getSplit());
 			axis = node->getAxis();
 
 			// mask out invalid rays with near > far
-			const __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
-			const __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
-			const __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
-			const __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
-			const __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
+			const mfloat4 curmask = mand(mask, mcmple(stack[entry].t, stack[exit].t));
+			const mfloat4 entry_lt = mcmplt(stack[entry].pb.ma[axis], splitVal);
+			const mfloat4 entry_gt = mcmpgt(stack[entry].pb.ma[axis], splitVal);
+			const mfloat4 exit_lt = mcmplt(stack[exit].pb.ma[axis], splitVal);
+			const mfloat4 exit_gt = mcmpgt(stack[exit].pb.ma[axis], splitVal);
 
 			// if all of
 			// stack[entry].pb[axis] <= splitVal,
 			// stack[exit].pb[axis] <= splitVal
-			if (!_mm_movemask_ps(
-				_mm_and_ps(_mm_or_ps(entry_gt, exit_gt), curmask)))
+			if (!mmovemask(
+				mand(mor(entry_gt, exit_gt), curmask)))
 			{
 				node = node->getLeftChild();
 				continue;
@@ -415,8 +424,8 @@
 			// if all of
 			// stack[entry].pb[axis] >= splitVal,
 			// stack[exit].pb[axis] >= splitVal
-			if (!_mm_movemask_ps(
-				_mm_and_ps(_mm_or_ps(entry_lt, exit_lt), curmask)))
+			if (!mmovemask(
+				mand(mor(entry_lt, exit_lt), curmask)))
 			{
 				node = node->getRightChild();
 				continue;
@@ -425,14 +434,14 @@
 			// any of
 			// stack[entry].pb[axis] < splitVal,
 			// stack[exit].pb[axis] > splitVal
-			bool cond1 = _mm_movemask_ps(
-				_mm_and_ps(_mm_and_ps(entry_lt, exit_gt), curmask));
+			int cond1 = mmovemask(
+				mand(mand(entry_lt, exit_gt), curmask));
 
 			// any of
 			// stack[entry].pb[axis] > splitVal,
 			// stack[exit].pb[axis] < splitVal
-			bool cond2 = _mm_movemask_ps(
-				_mm_and_ps(_mm_and_ps(entry_gt, exit_lt), curmask));
+			int cond2 = mmovemask(
+				mand(mand(entry_gt, exit_lt), curmask));
 
 			if ((!cond1 && !cond2) || (cond1 && cond2))
 			{
@@ -459,7 +468,7 @@
 			/* traverse both children */
 
 			/* signed distance to the splitting plane */
-			t = _mm_mul_ps(_mm_sub_ps(splitVal, rays.o.ma[axis]), invdirs.ma[axis]);
+			t = mmul(msub(splitVal, rays.o.ma[axis]), invdirs.ma[axis]);
 
 			/* setup the new exit point and push it onto stack */
 			register int tmp = exit;
@@ -474,22 +483,22 @@
 			stack[exit].node = farchild;
 			stack[exit].pb.ma[axis] = splitVal;
 			stack[exit].pb.ma[mod3[axis+1]] =
-				_mm_add_ps(rays.o.ma[mod3[axis+1]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+1]]));
+				madd(rays.o.ma[mod3[axis+1]], mmul(t, rays.dir.ma[mod3[axis+1]]));
 			stack[exit].pb.ma[mod3[axis+2]] =
-				_mm_add_ps(rays.o.ma[mod3[axis+2]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+2]]));
+				madd(rays.o.ma[mod3[axis+2]], mmul(t, rays.dir.ma[mod3[axis+2]]));
 		}
 
 		/* current node is the leaf . . . empty or full */
-		__m128 dists = stack[exit].t;
+		mfloat4 dists = stack[exit].t;
 		ShapeList::iterator shape;
-		__m128 results;
-		__m128 newmask = mask;
+		mfloat4 results;
+		mfloat4 newmask = mask;
 		for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++)
 		{
 			results = (*shape)->intersect_packet(rays, dists);
-			int valid = _mm_movemask_ps(
-				_mm_and_ps(mask, _mm_and_ps(results,
-				_mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)))));
+			int valid = mmovemask(
+				mand(mask, mand(results,
+				mcmpge(dists, msub(stack[entry].t, mEps)))));
 			for (int i = 0; i < 4; i++)
 			{
 				if (*shape != origin_shapes[i] && ((valid>>i)&1))
@@ -502,7 +511,7 @@
 		}
 
 		mask = newmask;
-		if (!_mm_movemask_ps(mask))
+		if (!mmovemask(mask))
 			return;
 
 		entry = exit;