# HG changeset patch # User Radek Brich # Date 1197587154 -3600 # Node ID 5f954c0d34fcd17bead661de714e262fe3fd3506 # Parent b490093b0ac3a4de74f5b10faed7e63748eb41d4 octree traversal rewritten to avoid recursion reenabled -O3 optimizations (was accidentaly disabled, now it traces even faster! :-)) realtime_bunny: added FPS counter, fixed a bug in ply loader min3 and max3 templates added to common.h diff -r b490093b0ac3 -r 5f954c0d34fc ccdemos/realtime.cc --- a/ccdemos/realtime.cc Thu Dec 13 00:08:11 2007 +0100 +++ b/ccdemos/realtime.cc Fri Dec 14 00:05:54 2007 +0100 @@ -1,6 +1,7 @@ #include #include "raytracer.h" +#include "kdtree.h" int w = 480; int h = 288; diff -r b490093b0ac3 -r 5f954c0d34fc ccdemos/realtime_bunny.cc --- a/ccdemos/realtime_bunny.cc Thu Dec 13 00:08:11 2007 +0100 +++ b/ccdemos/realtime_bunny.cc Fri Dec 14 00:05:54 2007 +0100 @@ -13,6 +13,9 @@ Raytracer rt; Camera cam; +Uint32 fp10s_acc = 0; +Uint32 fp10s_acc_samples = 0; + void load_ply(const char *filename, Material *mat, Float scale) { vector vertices; @@ -81,7 +84,7 @@ f.ignore(1000,'\n'); } - for (int i; i < vertex_num; i++) + for (int i = 0; i < vertex_num; i++) { normals.at(i) /= vertex_face_num.at(i); normals.at(i).normalize(); @@ -93,6 +96,18 @@ void update(SDL_Surface *screen) { + static Uint32 t = 0; + Uint32 tnow = SDL_GetTicks(); + int fp10s = 10000/(int)(tnow - t); + if (t != 0) + { + fp10s_acc += fp10s; + ++fp10s_acc_samples; + } + t = tnow; + printf("\b\b\b\b\b\b\b\b\b%3d.%1d fps", fp10s/10, fp10s%10); + fflush(stdout); + rt.render(w, h, render_buffer); if (SDL_MUSTLOCK(screen)) @@ -123,6 +138,13 @@ SDL_UpdateRect(screen, 0, 0, w, h); } +void quit() +{ + Uint32 fp100s_aver = fp10s_acc*10/fp10s_acc_samples; + printf("\naverlage fps: %3d.%2d\n", fp100s_aver/100, fp100s_aver%100); + SDL_Quit(); +} + int main() { /* initialize SDL */ @@ -133,7 +155,7 @@ exit(1); } - atexit(SDL_Quit); + atexit(quit); screen = SDL_SetVideoMode(w, h, 32, SDL_SWSURFACE|SDL_DOUBLEBUF|SDL_RESIZABLE); if ( screen == NULL ) { @@ -142,6 +164,7 @@ } /* initialize raytracer and prepare scene */ + pyrit_verbosity = 0; render_buffer = (Float *) malloc(w*h*3*sizeof(Float)); rt.setThreads(2); diff -r b490093b0ac3 -r 5f954c0d34fc ccdemos/spheres_shadow.cc --- a/ccdemos/spheres_shadow.cc Thu Dec 13 00:08:11 2007 +0100 +++ b/ccdemos/spheres_shadow.cc Fri Dec 14 00:05:54 2007 +0100 @@ -1,4 +1,5 @@ #include "raytracer.h" +#include "kdtree.h" #include "image.h" int main() diff -r b490093b0ac3 -r 5f954c0d34fc config.mk --- a/config.mk Thu Dec 13 00:08:11 2007 +0100 +++ b/config.mk Fri Dec 14 00:05:54 2007 +0100 @@ -1,6 +1,6 @@ DEFS=-DPTHREADS -CCFLAGS=-Wall -Wno-write-strings -fno-strict-aliasing -I$(ROOT)/include +CCFLAGS=-g -Wall -Wno-write-strings -fno-strict-aliasing -I$(ROOT)/include LDFLAGS= PY_CCFLAGS=$(shell python-config --includes) @@ -21,5 +21,4 @@ endif # optimizations -CCFLAGS+=-g -O0 -#CCFLAGS+=-O3 -pipe -fomit-frame-pointer -ffast-math -msse3 +CCFLAGS+=-O3 -pipe -ffast-math -msse3 diff -r b490093b0ac3 -r 5f954c0d34fc include/common.h --- a/include/common.h Thu Dec 13 00:08:11 2007 +0100 +++ b/include/common.h Fri Dec 14 00:05:54 2007 +0100 @@ -44,4 +44,40 @@ } } +template const Type &min3(const Type &a, const Type &b, const Type &c) +{ + if (a <= b) + { + if (a <= c) + return a; + else + return c; + } + else + { + if (b <= c) + return b; + else + return c; + } +} + +template const Type &max3(const Type &a, const Type &b, const Type &c) +{ + if (a >= b) + { + if (a >= c) + return a; + else + return c; + } + else + { + if (b >= c) + return b; + else + return c; + } +} + #endif diff -r b490093b0ac3 -r 5f954c0d34fc src/octree.cc --- a/src/octree.cc Thu Dec 13 00:08:11 2007 +0100 +++ b/src/octree.cc Fri Dec 14 00:05:54 2007 +0100 @@ -83,50 +83,32 @@ built = true; } -static inline int first_node(const Float tx0, const Float ty0, const Float tz0, - const Float txm, const Float tym, const Float tzm) + +/******************************************************* +octree traversal algorithm as described in paper +"An Efficient Parametric Algorithm for Octree Traversal" +by J. Revelles, C. Urena and M. Lastra. + +see revision 37 for original recursive version +*******************************************************/ + +struct OctreeTravState { - int res = 0; - if (tx0 > ty0) - { - if (tx0 > tz0) - { // YZ - if (tym < tx0) - res |= 2; - if (tzm < tx0) - res |= 1; - } - else - { // XY - if (txm < tz0) - res |= 4; - if (tym < tz0) - res |= 2; - } - } - else - { - if (ty0 > tz0) - { // XZ - if (txm < ty0) - res |= 4; - if (tzm < ty0) - res |= 1; - return res; - } - else - { // XY - if (txm < tz0) - res |= 4; - if (tym < tz0) - res |= 2; - } - } - return res; -} + Float tx0,ty0,tz0,tx1,ty1,tz1,txm,tym,tzm; + OctreeNode *node; + int next; + OctreeTravState() {}; + OctreeTravState( + const Float atx0, const Float aty0, const Float atz0, + const Float atx1, const Float aty1, const Float atz1, + const Float atxm, const Float atym, const Float atzm, + OctreeNode *const anode, const int anext): + tx0(atx0), ty0(aty0), tz0(atz0), tx1(atx1), ty1(aty1), tz1(atz1), + txm(atxm), tym(atym), tzm(atzm), node(anode), next(anext) {}; +}; -static inline int next_node(const Float txm, const int xnode, - const Float tym, const int ynode, const Float tzm, const int znode) +inline const int &next_node(const Float &txm, const int &xnode, + const Float &tym, const int &ynode, const Float &tzm, const int &znode) { if (txm < tym) { @@ -144,86 +126,6 @@ } } -static Shape *proc_subtree(const int a, const Float tx0, const Float ty0, const Float tz0, - const Float tx1, const Float ty1, const Float tz1, OctreeNode *node, - const Shape *origin_shape, const Ray &ray, Float &nearest_distance) -{ - Float txm, tym, tzm; - int curr_node; - - // if ray does not intersect this node - if (tx1 < 0.0 || ty1 < 0.0 || tz1 < 0.0) - return NULL; - - if (node->isLeaf()) - { - Shape *nearest_shape = NULL; - ShapeList::iterator shape; - Float mindist = max(max(tx0,ty0),tz0); - Float dist = min(min(min(tx1,ty1),tz1),nearest_distance); - for (shape = node->shapes->begin(); shape != node->shapes->end(); shape++) - if (*shape != origin_shape && (*shape)->intersect(ray, dist) && dist >= mindist) - { - nearest_shape = *shape; - nearest_distance = dist; - } - return nearest_shape; - } - - txm = 0.5 * (tx0+tx1); - tym = 0.5 * (ty0+ty1); - tzm = 0.5 * (tz0+tz1); - - curr_node = first_node(tx0,ty0,tz0,txm,tym,tzm); - Shape *shape = NULL; - while (curr_node < 8) - { - switch (curr_node) - { - case 0: - shape =proc_subtree (a,tx0,ty0,tz0,txm,tym,tzm,node->getChild(a), origin_shape, ray, nearest_distance); - curr_node = next_node(txm, 4, tym, 2, tzm, 1); - break; - case 1: - shape =proc_subtree (a,tx0,ty0,tzm,txm,tym,tz1,node->getChild(1^a), origin_shape, ray, nearest_distance); - curr_node = next_node(txm, 5, tym, 3, tz1, 8); - break; - case 2: - shape =proc_subtree (a,tx0,tym,tz0,txm,ty1,tzm,node->getChild(2^a), origin_shape, ray, nearest_distance); - curr_node = next_node(txm, 6, ty1, 8, tzm, 3); - break; - case 3: - shape =proc_subtree (a,tx0,tym,tzm,txm,ty1,tz1,node->getChild(3^a), origin_shape, ray, nearest_distance); - curr_node = next_node(txm, 7, ty1, 8, tz1, 8); - break; - case 4: - shape =proc_subtree (a,txm,ty0,tz0,tx1,tym,tzm,node->getChild(4^a), origin_shape, ray, nearest_distance); - curr_node = next_node(tx1, 8, tym, 6, tzm, 5); - break; - case 5: - shape =proc_subtree (a,txm,ty0,tzm,tx1,tym,tz1,node->getChild(5^a), origin_shape, ray, nearest_distance); - curr_node = next_node(tx1, 8, tym, 7, tz1, 8); - break; - case 6: - shape =proc_subtree (a,txm,tym,tz0,tx1,ty1,tzm,node->getChild(6^a), origin_shape, ray, nearest_distance); - curr_node = next_node(tx1, 8, ty1, 8, tzm, 7); - break; - case 7: - shape =proc_subtree (a,txm,tym,tzm,tx1,ty1,tz1,node->getChild(7^a), origin_shape, ray, nearest_distance); - curr_node = 8; - break; - } - if (shape != NULL) - return shape; - } - return NULL; -} - -/* -traversal algorithm paper as described in paper -"An Efficient Parametric Algorithm for Octree Traversal" -by J. Revelles, C. Urena and M. Lastra. -*/ Shape * Octree::nearest_intersection(const Shape *origin_shape, const Ray &ray, Float &nearest_distance) { @@ -231,9 +133,23 @@ if (!built) return Container::nearest_intersection(origin_shape, ray, nearest_distance); + OctreeTravState st[max_depth+1]; + register OctreeTravState *st_cur = st; + +# define node st_cur->node +# define tx0 st_cur->tx0 +# define ty0 st_cur->ty0 +# define tz0 st_cur->tz0 +# define tx1 st_cur->tx1 +# define ty1 st_cur->ty1 +# define tz1 st_cur->tz1 +# define txm st_cur->txm +# define tym st_cur->tym +# define tzm st_cur->tzm + int a = 0; - Vector3 ro = ray.o; - Vector3 rdir = ray.dir; + Vector3 ro(ray.o); + Vector3 rdir(1.0/ray.dir.x, 1.0/ray.dir.y, 1.0/ray.dir.z); if (rdir.x < 0.0) { @@ -253,16 +169,163 @@ rdir.z = -rdir.z; a |= 1; } - Float tx0 = (bbox.L.x - ro.x) / rdir.x; - Float tx1 = (bbox.H.x - ro.x) / rdir.x; - Float ty0 = (bbox.L.y - ro.y) / rdir.y; - Float ty1 = (bbox.H.y - ro.y) / rdir.y; - Float tz0 = (bbox.L.z - ro.z) / rdir.z; - Float tz1 = (bbox.H.z - ro.z) / rdir.z; + + tx0 = (bbox.L.x - ro.x) * rdir.x; + tx1 = (bbox.H.x - ro.x) * rdir.x; + ty0 = (bbox.L.y - ro.y) * rdir.y; + ty1 = (bbox.H.y - ro.y) * rdir.y; + tz0 = (bbox.L.z - ro.z) * rdir.z; + tz1 = (bbox.H.z - ro.z) * rdir.z; + + if (max3(tx0,ty0,tz0) > min3(tx1,ty1,tz1)) + return NULL; + + node = root; + st_cur->next = -1; + + Shape *nearest_shape = NULL; + while (nearest_shape == NULL) + { + if (st_cur->next == -1) + { + st_cur->next = 8; + + // if ray does intersect this node + if (!(tx1 < 0.0 || ty1 < 0.0 || tz1 < 0.0)) + { + if (node->isLeaf()) + { + ShapeList::iterator shape; + register Float mindist = max3(tx0,ty0,tz0); + /* correct & slow */ + //Float dist = min(min3(tx1,ty1,tz1),nearest_distance); + /* faster */ + register Float dist = nearest_distance; + for (shape = node->shapes->begin(); shape != node->shapes->end(); shape++) + if (*shape != origin_shape && (*shape)->intersect(ray, dist) && dist >= mindist) + { + nearest_shape = *shape; + nearest_distance = dist; + } + } + else + { + txm = 0.5 * (tx0+tx1); + tym = 0.5 * (ty0+ty1); + tzm = 0.5 * (tz0+tz1); - if (max(max(tx0,ty0),tz0) < min (min(tx1,ty1),tz1)) - return proc_subtree(a,tx0,ty0,tz0,tx1,ty1,tz1,root, - origin_shape, ray, nearest_distance); - else - return NULL; + // first node + st_cur->next = 0; + if (tx0 > ty0) + { + if (tx0 > tz0) + { // YZ + if (tym < tx0) + st_cur->next |= 2; + if (tzm < tx0) + st_cur->next |= 1; + } + else + { // XY + if (txm < tz0) + st_cur->next |= 4; + if (tym < tz0) + st_cur->next |= 2; + } + } + else + { + if (ty0 > tz0) + { // XZ + if (txm < ty0) + st_cur->next |= 4; + if (tzm < ty0) + st_cur->next |= 1; + } + else + { // XY + if (txm < tz0) + st_cur->next |= 4; + if (tym < tz0) + st_cur->next |= 2; + } + } + } + } + } + + while (st_cur->next == 8) + { + // pop state from stack + if (st_cur == st) + return NULL; // nothing to pop, finish + --st_cur; + } + + // push current state + *(st_cur+1) = *st_cur; + ++st_cur; + + switch (st_cur->next) + { + case 0: + tx1 = txm; + ty1 = tym; + tz1 = tzm; + node = node->getChild(a); + (st_cur-1)->next = next_node(txm, 4, tym, 2, tzm, 1); + break; + case 1: + tz0 = tzm; + tx1 = txm; + ty1 = tym; + node = node->getChild(1^a); + (st_cur-1)->next = next_node(txm, 5, tym, 3, tz1, 8); + break; + case 2: + ty0 = tym; + tx1 = txm; + tz1 = tzm; + node = node->getChild(2^a); + (st_cur-1)->next = next_node(txm, 6, ty1, 8, tzm, 3); + break; + case 3: + ty0 = tym; + tz0 = tzm; + tx1 = txm; + node = node->getChild(3^a); + (st_cur-1)->next = next_node(txm, 7, ty1, 8, tz1, 8); + break; + case 4: + tx0 = txm; + ty1 = tym; + tz1 = tzm; + node = node->getChild(4^a); + (st_cur-1)->next = next_node(tx1, 8, tym, 6, tzm, 5); + break; + case 5: + tx0 = txm; + tz0 = tzm; + ty1 = tym; + node = node->getChild(5^a); + (st_cur-1)->next = next_node(tx1, 8, tym, 7, tz1, 8); + break; + case 6: + tx0 = txm; + ty0 = tym; + tz1 = tzm; + node = node->getChild(6^a); + (st_cur-1)->next = next_node(tx1, 8, ty1, 8, tzm, 7); + break; + case 7: + tx0 = txm; + ty0 = tym; + tz0 = tzm; + node = node->getChild(7^a); + (st_cur-1)->next = 8; + break; + } + st_cur->next = -1; + } + return nearest_shape; }