octree traversal rewritten to avoid recursion pyrit
authorRadek Brich <radek.brich@devl.cz>
Fri, 14 Dec 2007 00:05:54 +0100 (2007-12-13)
branchpyrit
changeset 37 5f954c0d34fc
parent 36 b490093b0ac3
child 38 5d043eeb09d9
octree traversal rewritten to avoid recursion reenabled -O3 optimizations (was accidentaly disabled, now it traces even faster! :-)) realtime_bunny: added FPS counter, fixed a bug in ply loader min3 and max3 templates added to common.h
ccdemos/realtime.cc
ccdemos/realtime_bunny.cc
ccdemos/spheres_shadow.cc
config.mk
include/common.h
src/octree.cc
--- a/ccdemos/realtime.cc	Thu Dec 13 00:08:11 2007 +0100
+++ b/ccdemos/realtime.cc	Fri Dec 14 00:05:54 2007 +0100
@@ -1,6 +1,7 @@
 #include <SDL.h>
 
 #include "raytracer.h"
+#include "kdtree.h"
 
 int w = 480;
 int h = 288;
--- a/ccdemos/realtime_bunny.cc	Thu Dec 13 00:08:11 2007 +0100
+++ b/ccdemos/realtime_bunny.cc	Fri Dec 14 00:05:54 2007 +0100
@@ -13,6 +13,9 @@
 Raytracer rt;
 Camera cam;
 
+Uint32 fp10s_acc = 0;
+Uint32 fp10s_acc_samples = 0;
+
 void load_ply(const char *filename, Material *mat, Float scale)
 {
 	vector<NormalVertex*> vertices;
@@ -81,7 +84,7 @@
 		f.ignore(1000,'\n');
 	}
 
-	for (int i; i < vertex_num; i++)
+	for (int i = 0; i < vertex_num; i++)
 	{
 		normals.at(i) /= vertex_face_num.at(i);
 		normals.at(i).normalize();
@@ -93,6 +96,18 @@
 
 void update(SDL_Surface *screen)
 {
+	static Uint32 t = 0;
+	Uint32 tnow = SDL_GetTicks();
+	int fp10s = 10000/(int)(tnow - t);
+	if (t != 0)
+	{
+		fp10s_acc += fp10s;
+		++fp10s_acc_samples;
+	}
+	t = tnow;
+	printf("\b\b\b\b\b\b\b\b\b%3d.%1d fps", fp10s/10, fp10s%10);
+	fflush(stdout);
+
 	rt.render(w, h, render_buffer);
 
 	if (SDL_MUSTLOCK(screen))
@@ -123,6 +138,13 @@
 		SDL_UpdateRect(screen, 0, 0, w, h);
 }
 
+void quit()
+{
+	Uint32 fp100s_aver = fp10s_acc*10/fp10s_acc_samples;
+	printf("\naverlage fps: %3d.%2d\n", fp100s_aver/100, fp100s_aver%100);
+	SDL_Quit();
+}
+
 int main()
 {
 	/* initialize SDL */
@@ -133,7 +155,7 @@
 		exit(1);
 	}
 
-	atexit(SDL_Quit);
+	atexit(quit);
 
 	screen = SDL_SetVideoMode(w, h, 32, SDL_SWSURFACE|SDL_DOUBLEBUF|SDL_RESIZABLE);
 	if ( screen == NULL ) {
@@ -142,6 +164,7 @@
 	}
 
 	/* initialize raytracer and prepare scene */
+	pyrit_verbosity = 0;
 	render_buffer = (Float *) malloc(w*h*3*sizeof(Float));
 
 	rt.setThreads(2);
--- a/ccdemos/spheres_shadow.cc	Thu Dec 13 00:08:11 2007 +0100
+++ b/ccdemos/spheres_shadow.cc	Fri Dec 14 00:05:54 2007 +0100
@@ -1,4 +1,5 @@
 #include "raytracer.h"
+#include "kdtree.h"
 #include "image.h"
 
 int main()
--- a/config.mk	Thu Dec 13 00:08:11 2007 +0100
+++ b/config.mk	Fri Dec 14 00:05:54 2007 +0100
@@ -1,6 +1,6 @@
 DEFS=-DPTHREADS
 
-CCFLAGS=-Wall -Wno-write-strings -fno-strict-aliasing -I$(ROOT)/include
+CCFLAGS=-g -Wall -Wno-write-strings -fno-strict-aliasing -I$(ROOT)/include
 LDFLAGS=
 
 PY_CCFLAGS=$(shell python-config --includes)
@@ -21,5 +21,4 @@
 endif
 
 # optimizations
-CCFLAGS+=-g -O0
-#CCFLAGS+=-O3 -pipe -fomit-frame-pointer -ffast-math -msse3
+CCFLAGS+=-O3 -pipe -ffast-math -msse3
--- a/include/common.h	Thu Dec 13 00:08:11 2007 +0100
+++ b/include/common.h	Fri Dec 14 00:05:54 2007 +0100
@@ -44,4 +44,40 @@
 	}
 }
 
+template<typename Type> const Type &min3(const Type &a, const Type &b, const Type &c)
+{
+	if (a <= b)
+	{
+		if (a <= c)
+			return a;
+		else
+			return c;
+	}
+	else
+	{
+		if (b <= c)
+			return b;
+		else
+			return c;
+	}
+}
+
+template<typename Type> const Type &max3(const Type &a, const Type &b, const Type &c)
+{
+	if (a >= b)
+	{
+		if (a >= c)
+			return a;
+		else
+			return c;
+	}
+	else
+	{
+		if (b >= c)
+			return b;
+		else
+			return c;
+	}
+}
+
 #endif
--- a/src/octree.cc	Thu Dec 13 00:08:11 2007 +0100
+++ b/src/octree.cc	Fri Dec 14 00:05:54 2007 +0100
@@ -83,50 +83,32 @@
 	built = true;
 }
 
-static inline int first_node(const Float tx0, const Float ty0, const Float tz0,
-	const Float txm, const Float tym, const Float tzm)
+
+/*******************************************************
+octree traversal algorithm as described in paper
+"An Efficient Parametric Algorithm for Octree Traversal"
+by J. Revelles, C. Urena and M. Lastra.
+
+see revision 37 for original recursive version
+*******************************************************/
+
+struct OctreeTravState
 {
-	int res = 0;
-	if (tx0 > ty0)
-	{
-		if (tx0 > tz0)
-		{ // YZ
-			if (tym < tx0)
-				res |= 2;
-			if (tzm < tx0)
-				res |= 1;
-		}
-		else
-		{ // XY
-			if (txm < tz0)
-				res |= 4;
-			if (tym < tz0)
-				res |= 2;
-		}
-	}
-	else
-	{
-		if (ty0 > tz0)
-		{ // XZ
-			if (txm < ty0)
-				res |= 4;
-			if (tzm < ty0)
-				res |= 1;
-			return res;
-		}
-		else
-		{ // XY
-			if (txm < tz0)
-				res |= 4;
-			if (tym < tz0)
-				res |= 2;
-		}
-	}
-	return res;
-}
+	Float tx0,ty0,tz0,tx1,ty1,tz1,txm,tym,tzm;
+	OctreeNode *node;
+	int next;
+	OctreeTravState() {};
+	OctreeTravState(
+		const Float atx0, const Float aty0, const Float atz0,
+		const Float atx1, const Float aty1, const Float atz1,
+		const Float atxm, const Float atym, const Float atzm,
+		OctreeNode *const anode, const int anext):
+		tx0(atx0), ty0(aty0), tz0(atz0), tx1(atx1), ty1(aty1), tz1(atz1),
+		txm(atxm), tym(atym), tzm(atzm), node(anode), next(anext) {};
+};
 
-static inline int next_node(const Float txm, const int xnode,
-	const Float tym, const int ynode, const Float tzm, const int znode)
+inline const int &next_node(const Float &txm, const int &xnode,
+	const Float &tym, const int &ynode, const Float &tzm, const int &znode)
 {
 	if (txm < tym)
 	{
@@ -144,86 +126,6 @@
 	}
 }
 
-static Shape *proc_subtree(const int a, const Float tx0, const Float ty0, const Float tz0,
-	const Float tx1, const Float ty1, const Float tz1, OctreeNode *node,
-	const Shape *origin_shape, const Ray &ray, Float &nearest_distance)
-{
-	Float txm, tym, tzm;
-	int curr_node;
-
-	// if ray does not intersect this node
-	if (tx1 < 0.0 || ty1 < 0.0 || tz1 < 0.0)
-		return NULL;
-
-	if (node->isLeaf())
-	{
-		Shape *nearest_shape = NULL;
-		ShapeList::iterator shape;
-		Float mindist = max(max(tx0,ty0),tz0);
-		Float dist = min(min(min(tx1,ty1),tz1),nearest_distance);
-		for (shape = node->shapes->begin(); shape != node->shapes->end(); shape++)
-			if (*shape != origin_shape && (*shape)->intersect(ray, dist) && dist >= mindist)
-			{
-				nearest_shape = *shape;
-				nearest_distance = dist;
-			}
-		return nearest_shape;
-	}
-
-	txm = 0.5 * (tx0+tx1);
-	tym = 0.5 * (ty0+ty1);
-	tzm = 0.5 * (tz0+tz1);
-
-	curr_node = first_node(tx0,ty0,tz0,txm,tym,tzm);
-	Shape *shape = NULL;
-	while (curr_node < 8)
-	{
-		switch (curr_node)
-		{
-			case 0:
-				shape =proc_subtree (a,tx0,ty0,tz0,txm,tym,tzm,node->getChild(a), origin_shape, ray, nearest_distance);
-				curr_node = next_node(txm, 4, tym, 2, tzm, 1);
-				break;
-			case 1:
-				shape =proc_subtree (a,tx0,ty0,tzm,txm,tym,tz1,node->getChild(1^a), origin_shape, ray, nearest_distance);
-				curr_node = next_node(txm, 5, tym, 3, tz1, 8);
-				break;
-			case 2:
-				shape =proc_subtree (a,tx0,tym,tz0,txm,ty1,tzm,node->getChild(2^a), origin_shape, ray, nearest_distance);
-				curr_node = next_node(txm, 6, ty1, 8, tzm, 3);
-				break;
-			case 3:
-				shape =proc_subtree (a,tx0,tym,tzm,txm,ty1,tz1,node->getChild(3^a), origin_shape, ray, nearest_distance);
-				curr_node = next_node(txm, 7, ty1, 8, tz1, 8);
-				break;
-			case 4:
-				shape =proc_subtree (a,txm,ty0,tz0,tx1,tym,tzm,node->getChild(4^a), origin_shape, ray, nearest_distance);
-				curr_node = next_node(tx1, 8, tym, 6, tzm, 5);
-				break;
-			case 5:
-				shape =proc_subtree (a,txm,ty0,tzm,tx1,tym,tz1,node->getChild(5^a), origin_shape, ray, nearest_distance);
-				curr_node = next_node(tx1, 8, tym, 7, tz1, 8);
-				break;
-			case 6:
-				shape =proc_subtree (a,txm,tym,tz0,tx1,ty1,tzm,node->getChild(6^a), origin_shape, ray, nearest_distance);
-				curr_node = next_node(tx1, 8, ty1, 8, tzm, 7);
-				break;
-			case 7:
-				shape =proc_subtree (a,txm,tym,tzm,tx1,ty1,tz1,node->getChild(7^a), origin_shape, ray, nearest_distance);
-				curr_node = 8;
-				break;
-		}
-		if (shape != NULL)
-			return shape;
-	}
-	return NULL;
-}
-
-/*
-traversal algorithm paper as described in paper
-"An Efficient Parametric Algorithm for Octree Traversal"
-by J. Revelles, C. Urena and M. Lastra.
-*/
 Shape * Octree::nearest_intersection(const Shape *origin_shape, const Ray &ray,
 		Float &nearest_distance)
 {
@@ -231,9 +133,23 @@
 	if (!built)
 		return Container::nearest_intersection(origin_shape, ray, nearest_distance);
 
+	OctreeTravState st[max_depth+1];
+	register OctreeTravState *st_cur = st;
+
+#	define node	st_cur->node
+#	define tx0	st_cur->tx0
+#	define ty0	st_cur->ty0
+#	define tz0	st_cur->tz0
+#	define tx1	st_cur->tx1
+#	define ty1	st_cur->ty1
+#	define tz1	st_cur->tz1
+#	define txm	st_cur->txm
+#	define tym	st_cur->tym
+#	define tzm	st_cur->tzm
+
 	int a = 0;
-	Vector3 ro = ray.o;
-	Vector3 rdir = ray.dir;
+	Vector3 ro(ray.o);
+	Vector3 rdir(1.0/ray.dir.x, 1.0/ray.dir.y, 1.0/ray.dir.z);
 
 	if (rdir.x < 0.0)
 	{
@@ -253,16 +169,163 @@
 		rdir.z = -rdir.z;
 		a |= 1;
 	}
-	Float tx0 = (bbox.L.x - ro.x) / rdir.x;
-	Float tx1 = (bbox.H.x - ro.x) / rdir.x;
-	Float ty0 = (bbox.L.y - ro.y) / rdir.y;
-	Float ty1 = (bbox.H.y - ro.y) / rdir.y;
-	Float tz0 = (bbox.L.z - ro.z) / rdir.z;
-	Float tz1 = (bbox.H.z - ro.z) / rdir.z;
+
+	tx0 = (bbox.L.x - ro.x) * rdir.x;
+	tx1 = (bbox.H.x - ro.x) * rdir.x;
+	ty0 = (bbox.L.y - ro.y) * rdir.y;
+	ty1 = (bbox.H.y - ro.y) * rdir.y;
+	tz0 = (bbox.L.z - ro.z) * rdir.z;
+	tz1 = (bbox.H.z - ro.z) * rdir.z;
+
+	if (max3(tx0,ty0,tz0) > min3(tx1,ty1,tz1))
+		return NULL;
+
+	node = root;
+	st_cur->next = -1;
+
+	Shape *nearest_shape = NULL;
+	while (nearest_shape == NULL)
+	{
+		if (st_cur->next == -1)
+		{
+			st_cur->next = 8;
+
+			// if ray does intersect this node
+			if (!(tx1 < 0.0 || ty1 < 0.0 || tz1 < 0.0))
+			{
+				if (node->isLeaf())
+				{
+					ShapeList::iterator shape;
+					register Float mindist = max3(tx0,ty0,tz0);
+					/* correct & slow */
+					//Float dist = min(min3(tx1,ty1,tz1),nearest_distance);
+					/* faster */
+					register Float dist = nearest_distance;
+					for (shape = node->shapes->begin(); shape != node->shapes->end(); shape++)
+						if (*shape != origin_shape && (*shape)->intersect(ray, dist) && dist >= mindist)
+						{
+							nearest_shape = *shape;
+							nearest_distance = dist;
+						}
+				}
+				else
+				{
+					txm = 0.5 * (tx0+tx1);
+					tym = 0.5 * (ty0+ty1);
+					tzm = 0.5 * (tz0+tz1);
 
-	if (max(max(tx0,ty0),tz0) < min (min(tx1,ty1),tz1))
-		return proc_subtree(a,tx0,ty0,tz0,tx1,ty1,tz1,root,
-			origin_shape, ray, nearest_distance);
-	else
-		return NULL;
+					// first node
+					st_cur->next = 0;
+					if (tx0 > ty0)
+					{
+						if (tx0 > tz0)
+						{ // YZ
+							if (tym < tx0)
+								st_cur->next |= 2;
+							if (tzm < tx0)
+								st_cur->next |= 1;
+						}
+						else
+						{ // XY
+							if (txm < tz0)
+								st_cur->next |= 4;
+							if (tym < tz0)
+								st_cur->next |= 2;
+						}
+					}
+					else
+					{
+						if (ty0 > tz0)
+						{ // XZ
+							if (txm < ty0)
+								st_cur->next |= 4;
+							if (tzm < ty0)
+								st_cur->next |= 1;
+						}
+						else
+						{ // XY
+							if (txm < tz0)
+								st_cur->next |= 4;
+							if (tym < tz0)
+								st_cur->next |= 2;
+						}
+					}
+				}
+			}
+		}
+
+		while (st_cur->next == 8)
+		{
+			// pop state from stack
+			if (st_cur == st)
+				return NULL; // nothing to pop, finish
+			--st_cur;
+		}
+
+		// push current state
+		*(st_cur+1) = *st_cur;
+		++st_cur;
+
+		switch (st_cur->next)
+		{
+			case 0:
+				tx1 = txm;
+				ty1 = tym;
+				tz1 = tzm;
+				node = node->getChild(a);
+				(st_cur-1)->next = next_node(txm, 4, tym, 2, tzm, 1);
+				break;
+			case 1:
+				tz0 = tzm;
+				tx1 = txm;
+				ty1 = tym;
+				node = node->getChild(1^a);
+				(st_cur-1)->next = next_node(txm, 5, tym, 3, tz1, 8);
+				break;
+			case 2:
+				ty0 = tym;
+				tx1 = txm;
+				tz1 = tzm;
+				node = node->getChild(2^a);
+				(st_cur-1)->next = next_node(txm, 6, ty1, 8, tzm, 3);
+				break;
+			case 3:
+				ty0 = tym;
+				tz0 = tzm;
+				tx1 = txm;
+				node = node->getChild(3^a);
+				(st_cur-1)->next = next_node(txm, 7, ty1, 8, tz1, 8);
+				break;
+			case 4:
+				tx0 = txm;
+				ty1 = tym;
+				tz1 = tzm;
+				node = node->getChild(4^a);
+				(st_cur-1)->next = next_node(tx1, 8, tym, 6, tzm, 5);
+				break;
+			case 5:
+				tx0 = txm;
+				tz0 = tzm;
+				ty1 = tym;
+				node = node->getChild(5^a);
+				(st_cur-1)->next = next_node(tx1, 8, tym, 7, tz1, 8);
+				break;
+			case 6:
+				tx0 = txm;
+				ty0 = tym;
+				tz1 = tzm;
+				node = node->getChild(6^a);
+				(st_cur-1)->next = next_node(tx1, 8, ty1, 8, tzm, 7);
+				break;
+			case 7:
+				tx0 = txm;
+				ty0 = tym;
+				tz0 = tzm;
+				node = node->getChild(7^a);
+				(st_cur-1)->next = 8;
+				break;
+		}
+		st_cur->next = -1;
+	}
+	return nearest_shape;
 }