implement triangle packet intersection pyrit
authorRadek Brich <radek.brich@devl.cz>
Sun, 27 Apr 2008 14:19:37 +0200
branchpyrit
changeset 85 907a634e5c02
parent 84 6f7fe14782c2
child 86 ce6abe0aeeae
implement triangle packet intersection
SConstruct
ccdemos/realtime_bunny.cc
include/shapes.h
src/kdtree.cc
src/shapes.cc
--- a/SConstruct	Sun Apr 27 09:44:49 2008 +0200
+++ b/SConstruct	Sun Apr 27 14:19:37 2008 +0200
@@ -91,7 +91,7 @@
 	global cpu, cpuflags_gcc, cpuflags_intelc
 	context.Message('Checking CPU arch and flags... ')
 	env.Execute('@$CC tools/cpuflags.c -o tools/cpuflags')
-	(cpu, cpuflags_gcc, cpuflags_intelc) = os.popen('tools/cpuflags %s %s'
+	(cpu, cpuflags_gcc, cpuflags_intelc) = os.popen('tools'+os.sep+'cpuflags %s %s'
 		% (''.join(gccversion.rsplit('.',1)), intelcversion) ).read().split('\n')[:3]
 	context.Result(cpu)
 	return True
--- a/ccdemos/realtime_bunny.cc	Sun Apr 27 09:44:49 2008 +0200
+++ b/ccdemos/realtime_bunny.cc	Sun Apr 27 14:19:37 2008 +0200
@@ -1,5 +1,5 @@
 #include "raytracer.h"
-#include "octree.h"
+#include "kdtree.h"
 
 #include "common_sdl.h"
 #include "common_ply.h"
@@ -7,7 +7,7 @@
 int main(int argc, char **argv)
 {
 	Raytracer rt;
-	Octree top;
+	KdTree top;
 	Camera cam;
 
 	rt.setMaxDepth(0);
--- a/include/shapes.h	Sun Apr 27 09:44:49 2008 +0200
+++ b/include/shapes.h	Sun Apr 27 14:19:37 2008 +0200
@@ -55,12 +55,14 @@
 	// first intersection point
 	virtual bool intersect(const Ray &ray, Float &dist) const = 0;
 
-	virtual void intersect_packet(const RayPacket &rays, __m128 &dists, bool *results)
+	virtual __m128 intersect_packet(const RayPacket &rays, __m128 &dists)
 	{
-		results[0] = intersect(rays[0], ((float*)&dists)[0]);
-		results[1] = intersect(rays[1], ((float*)&dists)[1]);
-		results[2] = intersect(rays[2], ((float*)&dists)[2]);
-		results[3] = intersect(rays[3], ((float*)&dists)[3]);
+		__m128 results;
+		((int*)&results)[0] = intersect(rays[0], ((float*)&dists)[0]) ? -1 : 0;
+		((int*)&results)[1] = intersect(rays[1], ((float*)&dists)[1]) ? -1 : 0;
+		((int*)&results)[2] = intersect(rays[2], ((float*)&dists)[2]) ? -1 : 0;
+		((int*)&results)[3] = intersect(rays[3], ((float*)&dists)[3]) ? -1 : 0;
+		return results;
 	};
 
 	// all intersections (only for CSG)
@@ -203,6 +205,9 @@
 	Triangle() {};
 	Triangle(Vertex *aA, Vertex *aB, Vertex *aC, Material *amaterial);
 	bool intersect(const Ray &ray, Float &dist) const;
+#ifdef TRI_BARI_PRE
+	__m128 intersect_packet(const RayPacket &rays, __m128 &dists);
+#endif
 	bool intersect_all(const Ray &ray, Float dist, vector<Float> &allts) const {return false;};
 	bool intersect_bbox(const BBox &bbox) const;
 	const Vector3 normal(const Vector3 &P) const { return (material->smooth ? smooth_normal(P) : N); };
--- a/src/kdtree.cc	Sun Apr 27 09:44:49 2008 +0200
+++ b/src/kdtree.cc	Sun Apr 27 14:19:37 2008 +0200
@@ -450,7 +450,7 @@
 				// fall back to single rays
 				// FIXME: split rays and continue
 				for (int i = 0; i < 4; i++)
-					if(!nearest_shapes[i])
+					if (!nearest_shapes[i])
 						nearest_shapes[i] = nearest_intersection(origin_shapes[i],
 							rays[i], nearest_distances[i]);
 				return;
@@ -493,34 +493,20 @@
 		/* current node is the leaf . . . empty or full */
 		__m128 dists = stack[exit].t;
 		ShapeList::iterator shape;
+		__m128 results;
 		for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++)
 		{
-			for (int i = 0; i < 4; i++)
-				if ( ((_mm_movemask_ps(mask)>>(i))&1) &&
-				((float*)&stack[entry].t)[i] < ((float*)&stack[exit].t)[i] &&
-				*shape != origin_shapes[i] &&
-				(*shape)->intersect(rays[i], ((float*)&dists)[i])
-				&& ((float*)&dists)[i] >= ((float*)&stack[entry].t)[i] - Eps)
-				{
-					nearest_shapes[i] = *shape;
-					nearest_distances[i] = ((float*)&dists)[i];
-				}
-
-			/*
-			bool results[4];
-			(*shape)->intersect_packet(rays, dists, results);
-			int greater_than_entry = _mm_movemask_ps(
-				_mm_and_ps(_mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)), mask));
+			results = (*shape)->intersect_packet(rays, dists);
+			int valid = _mm_movemask_ps(
+				_mm_and_ps(mask, _mm_and_ps(results, _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)))));
 			for (int i = 0; i < 4; i++)
 			{
-				if (results[i] //&& *shape != origin_shapes[i]
-				&& ((greater_than_entry>>(3-i))&1))
+				if (*shape != origin_shapes[i] && ((valid>>i)&1))
 				{
 					nearest_shapes[i] = *shape;
 					nearest_distances[i] = ((float*)&dists)[i];
 				}
 			}
-			*/
 		}
 
 		for (int i = 0; i < 4; i++)
--- a/src/shapes.cc	Sun Apr 27 09:44:49 2008 +0200
+++ b/src/shapes.cc	Sun Apr 27 14:19:37 2008 +0200
@@ -348,6 +348,54 @@
 #endif
 }
 
+#ifdef TRI_BARI_PRE
+__m128 Triangle::intersect_packet(const RayPacket &rays, __m128 &dists)
+{
+	static const int modulo3[5] = {0,1,2,0,1};
+	register const int u = modulo3[k+1];
+	register const int v = modulo3[k+2];
+	__m128 mask;
+
+	const __m128 t = _mm_div_ps(
+		_mm_sub_ps(_mm_sub_ps(
+		_mm_sub_ps(_mm_set_ps1(nd), rays.o.ma[k]),
+		_mm_mul_ps(_mm_set_ps1(nu), rays.o.ma[u])
+		), _mm_mul_ps(_mm_set_ps1(nv), rays.o.ma[v])),
+		_mm_add_ps(rays.dir.ma[k],
+		_mm_add_ps(_mm_mul_ps(_mm_set_ps1(nu), rays.dir.ma[u]),
+		_mm_mul_ps(_mm_set_ps1(nv), rays.dir.ma[v])))
+		);
+
+	mask = _mm_and_ps(_mm_cmplt_ps(t, dists), _mm_cmpge_ps(t, mEps));
+	if (!_mm_movemask_ps(mask))
+		return mask;
+
+	const __m128 hu = _mm_sub_ps(_mm_add_ps(rays.o.ma[u],
+		_mm_mul_ps(t, rays.dir.ma[u])), _mm_set_ps1(A->P[u]));
+	const __m128 hv = _mm_sub_ps(_mm_add_ps(rays.o.ma[v],
+		_mm_mul_ps(t, rays.dir.ma[v])), _mm_set_ps1(A->P[v]));
+	const __m128 beta = _mm_add_ps(_mm_mul_ps(hv, _mm_set_ps1(bnu)),
+		_mm_mul_ps(hu, _mm_set_ps1(bnv)));
+
+	mask = _mm_and_ps(mask, _mm_cmpge_ps(beta, zeros));
+	if (!_mm_movemask_ps(mask))
+		return mask;
+
+	const __m128 gamma = _mm_add_ps(_mm_mul_ps(hu, _mm_set_ps1(cnv)),
+		_mm_mul_ps(hv, _mm_set_ps1(cnu)));
+
+	mask = _mm_and_ps(mask, _mm_and_ps(_mm_cmpge_ps(gamma, zeros),
+		_mm_cmple_ps(_mm_add_ps(beta, gamma), ones)));
+	if (!_mm_movemask_ps(mask))
+		return mask;
+
+	for (int i = 0; i < 4; i++)
+		if ((_mm_movemask_ps(mask)>>i)&1)
+			((float*)&dists)[i] = ((float*)&t)[i];
+	return mask;
+}
+#endif
+
 bool Triangle::intersect_bbox(const BBox &bbox) const
 {
 	const Vector3 boxcenter = (bbox.L+bbox.H)*0.5;