# HG changeset patch # User Radek Brich # Date 1209318983 -7200 # Node ID ce6abe0aeeae1bcb3903dbc6f78febaf4f4d38f7 # Parent 907a634e5c02295840aabdc529ad2296e3a56ac4 BBox - RayPacket intersection clean up diff -r 907a634e5c02 -r ce6abe0aeeae include/common.h --- a/include/common.h Sun Apr 27 14:19:37 2008 +0200 +++ b/include/common.h Sun Apr 27 19:56:23 2008 +0200 @@ -48,6 +48,13 @@ # define Inf FLT_MAX #endif +const __m128 mZero = _mm_set_ps1(0.); +const __m128 mOne = _mm_set_ps1(1.); +const __m128 mEps = _mm_set_ps1(Eps); +const __m128 mMEps = _mm_set_ps1(-Eps); +const __m128 mInf = _mm_set_ps1(Inf); +const __m128 mAllSet = _mm_cmplt_ps(mZero, mOne); + /* verbosity level: 0: only errors (E) 1: major status messages (*) diff -r 907a634e5c02 -r ce6abe0aeeae include/scene.h --- a/include/scene.h Sun Apr 27 14:19:37 2008 +0200 +++ b/include/scene.h Sun Apr 27 19:56:23 2008 +0200 @@ -178,13 +178,7 @@ Float h() { return H.y-L.y; }; Float d() { return H.z-L.z; }; bool intersect(const Ray &ray, Float &a, Float &b); - bool intersect_packet(const RayPacket &rays, __m128 &a, __m128 &b) - { - return intersect(rays[0], ((float*)&a)[0], ((float*)&b)[0]) - || intersect(rays[1], ((float*)&a)[1], ((float*)&b)[1]) - || intersect(rays[2], ((float*)&a)[2], ((float*)&b)[2]) - || intersect(rays[3], ((float*)&a)[3], ((float*)&b)[3]); - }; + __m128 intersect_packet(const RayPacket &rays, __m128 &a, __m128 &b); }; #endif diff -r 907a634e5c02 -r ce6abe0aeeae include/vector.h --- a/include/vector.h Sun Apr 27 14:19:37 2008 +0200 +++ b/include/vector.h Sun Apr 27 19:56:23 2008 +0200 @@ -191,11 +191,6 @@ typedef Vector3 Colour; - -static const __m128 zeros = _mm_set_ps1(0.); -static const __m128 ones = _mm_set_ps1(1.); -static const __m128 mEps = _mm_set_ps1(Eps); - class VectorPacket { public: @@ -231,7 +226,7 @@ m = _mm_add_ps(x, y); m = _mm_add_ps(m, z); // x*x + y*y + z*z m = _mm_sqrt_ps(m); - m = _mm_div_ps(ones, m); // m = 1/sqrt(m) + m = _mm_div_ps(mOne, m); // m = 1/sqrt(m) mx = _mm_mul_ps(mx, m); my = _mm_mul_ps(my, m); mz = _mm_mul_ps(mz, m); @@ -272,7 +267,7 @@ // write to character stream friend ostream & operator<<(ostream &st, const VectorPacket &v) { - return st << "[" << v.getVector(0) << "," << v.getVector(1) + return st << "[" << v.getVector(0) << "," << v.getVector(1) << "," << v.getVector(2) << "," << v.getVector(3) << ")"; }; diff -r 907a634e5c02 -r ce6abe0aeeae src/kdtree.cc --- a/src/kdtree.cc Sun Apr 27 14:19:37 2008 +0200 +++ b/src/kdtree.cc Sun Apr 27 19:56:23 2008 +0200 @@ -342,27 +342,16 @@ { __m128 a, b; /* entry/exit signed distance */ __m128 t; /* signed distance to the splitting plane */ - __m128 mask = zeros; + __m128 mask = mZero; /* if we have no tree, fall back to naive test */ if (!built) Container::packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes); - nearest_shapes[0] = NULL; - nearest_shapes[1] = NULL; - nearest_shapes[2] = NULL; - nearest_shapes[3] = NULL; + // nearest_shapes[0..4] = NULL + memset(nearest_shapes, 0, 4*sizeof(Shape*)); - //bbox.intersect_packet(rays, a, b) - if (bbox.intersect(rays[0], ((float*)&a)[0], ((float*)&b)[0])) - ((int*)&mask)[0] = -1; - if (bbox.intersect(rays[1], ((float*)&a)[1], ((float*)&b)[1])) - ((int*)&mask)[1] = -1; - if (bbox.intersect(rays[2], ((float*)&a)[2], ((float*)&b)[2])) - ((int*)&mask)[2] = -1; - if (bbox.intersect(rays[3], ((float*)&a)[3], ((float*)&b)[3])) - ((int*)&mask)[3] = -1; - + mask = bbox.intersect_packet(rays, a, b); if (!_mm_movemask_ps(mask)) return; @@ -396,7 +385,7 @@ __m128 splitVal; int axis; static const int mod3[] = {0,1,2,0,1}; - const VectorPacket invdirs = ones / rays.dir; + const VectorPacket invdirs = mOne / rays.dir; while (node) { /* loop until a leaf is found */ @@ -494,6 +483,7 @@ __m128 dists = stack[exit].t; ShapeList::iterator shape; __m128 results; + __m128 newmask = mask; for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++) { results = (*shape)->intersect_packet(rays, dists); @@ -505,14 +495,12 @@ { nearest_shapes[i] = *shape; nearest_distances[i] = ((float*)&dists)[i]; + ((int*)&newmask)[i] = 0; } } } - for (int i = 0; i < 4; i++) - if (nearest_shapes[i]) - ((int*)&mask)[i] = 0; - + mask = newmask; if (!_mm_movemask_ps(mask)) return; diff -r 907a634e5c02 -r ce6abe0aeeae src/raytracer.cc --- a/src/raytracer.cc Sun Apr 27 14:19:37 2008 +0200 +++ b/src/raytracer.cc Sun Apr 27 19:56:23 2008 +0200 @@ -248,7 +248,6 @@ Shape *nearest_shapes[4]; static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL}; top->packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes); - Ray ray; for (int i = 0; i < 4; i++) if (nearest_shapes[i] == NULL) diff -r 907a634e5c02 -r ce6abe0aeeae src/scene.cc --- a/src/scene.cc Sun Apr 27 14:19:37 2008 +0200 +++ b/src/scene.cc Sun Apr 27 19:56:23 2008 +0200 @@ -107,3 +107,45 @@ b = tfar; return true; } + +// rewrite of BBox::intersect for ray packets +__m128 BBox::intersect_packet(const RayPacket &rays, __m128 &a, __m128 &b) +{ + register __m128 tnear = mZero; + register __m128 tfar = mInf; + register __m128 t1, t2; + register __m128 mask = mAllSet; + + for (int i = 0; i < 3; i++) + { + const __m128 mL = _mm_set_ps1(L[i]); + const __m128 mH = _mm_set_ps1(H[i]); + mask = _mm_and_ps(mask, + _mm_or_ps( + _mm_or_ps(_mm_cmplt_ps(rays.dir.ma[i], mMEps), _mm_cmpgt_ps(rays.dir.ma[i], mEps)), + _mm_and_ps(_mm_cmpge_ps(rays.o.ma[i], mL), _mm_cmple_ps(rays.o.ma[i], mH)) + )); + if (!_mm_movemask_ps(mask)) + return mask; + + /* compute the intersection distance of the planes */ + t1 = _mm_div_ps(_mm_sub_ps(mL, rays.o.ma[i]), rays.dir.ma[i]); + t2 = _mm_div_ps(_mm_sub_ps(mH, rays.o.ma[i]), rays.dir.ma[i]); + + __m128 t = _mm_min_ps(t1, t2); + t2 = _mm_max_ps(t1, t2); + t1 = t; + + tnear = _mm_max_ps(tnear, t1); /* want largest Tnear */ + tfar = _mm_min_ps(tfar, t2); /* want smallest Tfar */ + + mask = _mm_and_ps(mask, + _mm_and_ps(_mm_cmple_ps(tnear, tfar), _mm_cmpge_ps(tfar, mZero))); + if (!_mm_movemask_ps(mask)) + return mask; + } + + a = tnear; + b = tfar; + return mask; +} diff -r 907a634e5c02 -r ce6abe0aeeae src/shapes.cc --- a/src/shapes.cc Sun Apr 27 14:19:37 2008 +0200 +++ b/src/shapes.cc Sun Apr 27 19:56:23 2008 +0200 @@ -377,15 +377,15 @@ const __m128 beta = _mm_add_ps(_mm_mul_ps(hv, _mm_set_ps1(bnu)), _mm_mul_ps(hu, _mm_set_ps1(bnv))); - mask = _mm_and_ps(mask, _mm_cmpge_ps(beta, zeros)); + mask = _mm_and_ps(mask, _mm_cmpge_ps(beta, mZero)); if (!_mm_movemask_ps(mask)) return mask; const __m128 gamma = _mm_add_ps(_mm_mul_ps(hu, _mm_set_ps1(cnv)), _mm_mul_ps(hv, _mm_set_ps1(cnu))); - mask = _mm_and_ps(mask, _mm_and_ps(_mm_cmpge_ps(gamma, zeros), - _mm_cmple_ps(_mm_add_ps(beta, gamma), ones))); + mask = _mm_and_ps(mask, _mm_and_ps(_mm_cmpge_ps(gamma, mZero), + _mm_cmple_ps(_mm_add_ps(beta, gamma), mOne))); if (!_mm_movemask_ps(mask)) return mask;