# HG changeset patch # User Radek Brich # Date 1209298777 -7200 # Node ID 907a634e5c02295840aabdc529ad2296e3a56ac4 # Parent 6f7fe14782c24ce626dbf472a2f640736ad80aed implement triangle packet intersection diff -r 6f7fe14782c2 -r 907a634e5c02 SConstruct --- a/SConstruct Sun Apr 27 09:44:49 2008 +0200 +++ b/SConstruct Sun Apr 27 14:19:37 2008 +0200 @@ -91,7 +91,7 @@ global cpu, cpuflags_gcc, cpuflags_intelc context.Message('Checking CPU arch and flags... ') env.Execute('@$CC tools/cpuflags.c -o tools/cpuflags') - (cpu, cpuflags_gcc, cpuflags_intelc) = os.popen('tools/cpuflags %s %s' + (cpu, cpuflags_gcc, cpuflags_intelc) = os.popen('tools'+os.sep+'cpuflags %s %s' % (''.join(gccversion.rsplit('.',1)), intelcversion) ).read().split('\n')[:3] context.Result(cpu) return True diff -r 6f7fe14782c2 -r 907a634e5c02 ccdemos/realtime_bunny.cc --- a/ccdemos/realtime_bunny.cc Sun Apr 27 09:44:49 2008 +0200 +++ b/ccdemos/realtime_bunny.cc Sun Apr 27 14:19:37 2008 +0200 @@ -1,5 +1,5 @@ #include "raytracer.h" -#include "octree.h" +#include "kdtree.h" #include "common_sdl.h" #include "common_ply.h" @@ -7,7 +7,7 @@ int main(int argc, char **argv) { Raytracer rt; - Octree top; + KdTree top; Camera cam; rt.setMaxDepth(0); diff -r 6f7fe14782c2 -r 907a634e5c02 include/shapes.h --- a/include/shapes.h Sun Apr 27 09:44:49 2008 +0200 +++ b/include/shapes.h Sun Apr 27 14:19:37 2008 +0200 @@ -55,12 +55,14 @@ // first intersection point virtual bool intersect(const Ray &ray, Float &dist) const = 0; - virtual void intersect_packet(const RayPacket &rays, __m128 &dists, bool *results) + virtual __m128 intersect_packet(const RayPacket &rays, __m128 &dists) { - results[0] = intersect(rays[0], ((float*)&dists)[0]); - results[1] = intersect(rays[1], ((float*)&dists)[1]); - results[2] = intersect(rays[2], ((float*)&dists)[2]); - results[3] = intersect(rays[3], ((float*)&dists)[3]); + __m128 results; + ((int*)&results)[0] = intersect(rays[0], ((float*)&dists)[0]) ? -1 : 0; + ((int*)&results)[1] = intersect(rays[1], ((float*)&dists)[1]) ? -1 : 0; + ((int*)&results)[2] = intersect(rays[2], ((float*)&dists)[2]) ? -1 : 0; + ((int*)&results)[3] = intersect(rays[3], ((float*)&dists)[3]) ? -1 : 0; + return results; }; // all intersections (only for CSG) @@ -203,6 +205,9 @@ Triangle() {}; Triangle(Vertex *aA, Vertex *aB, Vertex *aC, Material *amaterial); bool intersect(const Ray &ray, Float &dist) const; +#ifdef TRI_BARI_PRE + __m128 intersect_packet(const RayPacket &rays, __m128 &dists); +#endif bool intersect_all(const Ray &ray, Float dist, vector &allts) const {return false;}; bool intersect_bbox(const BBox &bbox) const; const Vector3 normal(const Vector3 &P) const { return (material->smooth ? smooth_normal(P) : N); }; diff -r 6f7fe14782c2 -r 907a634e5c02 src/kdtree.cc --- a/src/kdtree.cc Sun Apr 27 09:44:49 2008 +0200 +++ b/src/kdtree.cc Sun Apr 27 14:19:37 2008 +0200 @@ -450,7 +450,7 @@ // fall back to single rays // FIXME: split rays and continue for (int i = 0; i < 4; i++) - if(!nearest_shapes[i]) + if (!nearest_shapes[i]) nearest_shapes[i] = nearest_intersection(origin_shapes[i], rays[i], nearest_distances[i]); return; @@ -493,34 +493,20 @@ /* current node is the leaf . . . empty or full */ __m128 dists = stack[exit].t; ShapeList::iterator shape; + __m128 results; for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++) { - for (int i = 0; i < 4; i++) - if ( ((_mm_movemask_ps(mask)>>(i))&1) && - ((float*)&stack[entry].t)[i] < ((float*)&stack[exit].t)[i] && - *shape != origin_shapes[i] && - (*shape)->intersect(rays[i], ((float*)&dists)[i]) - && ((float*)&dists)[i] >= ((float*)&stack[entry].t)[i] - Eps) - { - nearest_shapes[i] = *shape; - nearest_distances[i] = ((float*)&dists)[i]; - } - - /* - bool results[4]; - (*shape)->intersect_packet(rays, dists, results); - int greater_than_entry = _mm_movemask_ps( - _mm_and_ps(_mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)), mask)); + results = (*shape)->intersect_packet(rays, dists); + int valid = _mm_movemask_ps( + _mm_and_ps(mask, _mm_and_ps(results, _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps))))); for (int i = 0; i < 4; i++) { - if (results[i] //&& *shape != origin_shapes[i] - && ((greater_than_entry>>(3-i))&1)) + if (*shape != origin_shapes[i] && ((valid>>i)&1)) { nearest_shapes[i] = *shape; nearest_distances[i] = ((float*)&dists)[i]; } } - */ } for (int i = 0; i < 4; i++) diff -r 6f7fe14782c2 -r 907a634e5c02 src/shapes.cc --- a/src/shapes.cc Sun Apr 27 09:44:49 2008 +0200 +++ b/src/shapes.cc Sun Apr 27 14:19:37 2008 +0200 @@ -348,6 +348,54 @@ #endif } +#ifdef TRI_BARI_PRE +__m128 Triangle::intersect_packet(const RayPacket &rays, __m128 &dists) +{ + static const int modulo3[5] = {0,1,2,0,1}; + register const int u = modulo3[k+1]; + register const int v = modulo3[k+2]; + __m128 mask; + + const __m128 t = _mm_div_ps( + _mm_sub_ps(_mm_sub_ps( + _mm_sub_ps(_mm_set_ps1(nd), rays.o.ma[k]), + _mm_mul_ps(_mm_set_ps1(nu), rays.o.ma[u]) + ), _mm_mul_ps(_mm_set_ps1(nv), rays.o.ma[v])), + _mm_add_ps(rays.dir.ma[k], + _mm_add_ps(_mm_mul_ps(_mm_set_ps1(nu), rays.dir.ma[u]), + _mm_mul_ps(_mm_set_ps1(nv), rays.dir.ma[v]))) + ); + + mask = _mm_and_ps(_mm_cmplt_ps(t, dists), _mm_cmpge_ps(t, mEps)); + if (!_mm_movemask_ps(mask)) + return mask; + + const __m128 hu = _mm_sub_ps(_mm_add_ps(rays.o.ma[u], + _mm_mul_ps(t, rays.dir.ma[u])), _mm_set_ps1(A->P[u])); + const __m128 hv = _mm_sub_ps(_mm_add_ps(rays.o.ma[v], + _mm_mul_ps(t, rays.dir.ma[v])), _mm_set_ps1(A->P[v])); + const __m128 beta = _mm_add_ps(_mm_mul_ps(hv, _mm_set_ps1(bnu)), + _mm_mul_ps(hu, _mm_set_ps1(bnv))); + + mask = _mm_and_ps(mask, _mm_cmpge_ps(beta, zeros)); + if (!_mm_movemask_ps(mask)) + return mask; + + const __m128 gamma = _mm_add_ps(_mm_mul_ps(hu, _mm_set_ps1(cnv)), + _mm_mul_ps(hv, _mm_set_ps1(cnu))); + + mask = _mm_and_ps(mask, _mm_and_ps(_mm_cmpge_ps(gamma, zeros), + _mm_cmple_ps(_mm_add_ps(beta, gamma), ones))); + if (!_mm_movemask_ps(mask)) + return mask; + + for (int i = 0; i < 4; i++) + if ((_mm_movemask_ps(mask)>>i)&1) + ((float*)&dists)[i] = ((float*)&t)[i]; + return mask; +} +#endif + bool Triangle::intersect_bbox(const BBox &bbox) const { const Vector3 boxcenter = (bbox.L+bbox.H)*0.5;