Sphere, Box - RayPacket intersection pyrit
authorRadek Brich <radek.brich@devl.cz>
Sun, 27 Apr 2008 22:55:17 +0200
branchpyrit
changeset 87 1081e3dd3f3e
parent 86 ce6abe0aeeae
child 88 f7edb3b90816
Sphere, Box - RayPacket intersection replace 5x oversampling with 4x uniform oversampling
include/sampler.h
include/shapes.h
include/vector.h
src/kdtree.cc
src/sampler.cc
src/shapes.cc
--- a/include/sampler.h	Sun Apr 27 19:56:23 2008 +0200
+++ b/include/sampler.h	Sun Apr 27 22:55:17 2008 +0200
@@ -79,7 +79,7 @@
 {
 	int phase;
 	int subsample;  // 0,1 = no, 1+ = size of sampling grid
-	int oversample; // 0 = no, 1 = 5x, 2 = 9x, 3 = 16x
+	int oversample; // 0 = no, 1 = 4x, 2 = 9x, 3 = 16x
 	int sx,sy,osa_samp; // current sample properties
 public:
 	DefaultSampler(Float *abuffer, int &aw, int &ah):
--- a/include/shapes.h	Sun Apr 27 19:56:23 2008 +0200
+++ b/include/shapes.h	Sun Apr 27 22:55:17 2008 +0200
@@ -100,6 +100,7 @@
 		sqr_radius(aradius*aradius), inv_radius(1.0f/aradius)
 		{ material = amaterial; }
 	bool intersect(const Ray &ray, Float &dist) const;
+	__m128 intersect_packet(const RayPacket &rays, __m128 &dists);
 	bool intersect_all(const Ray &ray, Float dist, vector<Float> &allts) const;
 	bool intersect_bbox(const BBox &bbox) const;
 	const Vector3 normal(const Vector3 &P) const { return (P - center) * inv_radius; };
@@ -125,6 +126,7 @@
 		material = amaterial;
 	};
 	bool intersect(const Ray &ray, Float &dist) const;
+	__m128 intersect_packet(const RayPacket &rays, __m128 &dists);
 	bool intersect_all(const Ray &ray, Float dist, vector<Float> &allts) const { return false; };
 	bool intersect_bbox(const BBox &bbox) const;
 	const Vector3 normal(const Vector3 &P) const;
--- a/include/vector.h	Sun Apr 27 19:56:23 2008 +0200
+++ b/include/vector.h	Sun Apr 27 22:55:17 2008 +0200
@@ -211,6 +211,8 @@
 	VectorPacket() {};
 	VectorPacket(__m128 ax, __m128 ay, __m128 az):
 		mx(ax), my(ay), mz(az) {};
+	VectorPacket(const Vector3 &v):
+		mx(_mm_set_ps1(v.x)), my(_mm_set_ps1(v.y)), mz(_mm_set_ps1(v.z)) {};
 
 	Vector3 getVector(int i) const
 	{
@@ -232,6 +234,15 @@
 		mz = _mm_mul_ps(mz, m);
 	};
 
+	// dot product
+	friend __m128 dot(const VectorPacket &a, const VectorPacket &b)
+	{
+		return _mm_add_ps(_mm_add_ps(
+			_mm_mul_ps(a.mx, b.mx),
+			_mm_mul_ps(a.my, b.my)),
+			_mm_mul_ps(a.mz, b.mz));
+	};
+
 	friend VectorPacket operator+(const VectorPacket &a, const VectorPacket &b)
 	{
 		return VectorPacket(
--- a/src/kdtree.cc	Sun Apr 27 19:56:23 2008 +0200
+++ b/src/kdtree.cc	Sun Apr 27 22:55:17 2008 +0200
@@ -365,15 +365,14 @@
 	stack[entry].t = a;
 
 	/* distinguish between internal and external origin of a ray*/
-	stack[entry].pb = rays.o + rays.dir * a; /* external */
-	for (int i = 0; i < 4; i++)
-		if (((float*)&a)[i] < 0.0)
-		{
-			/* internal */
-			stack[entry].pb.x[i] = rays.o.x[i];
-			stack[entry].pb.y[i] = rays.o.y[i];
-			stack[entry].pb.z[i] = rays.o.z[i];
-		}
+	t = _mm_cmplt_ps(a, mZero);
+	stack[entry].pb = rays.o + rays.dir * a;
+	stack[entry].pb.mx = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mx),
+		_mm_and_ps(t, rays.o.mx));
+	stack[entry].pb.my = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.my),
+		_mm_and_ps(t, rays.o.my));
+	stack[entry].pb.mz = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mz),
+		_mm_and_ps(t, rays.o.mz));
 
 	/* setup initial exit point in the stack */
 	stack[exit].t = b;
@@ -396,11 +395,11 @@
 			axis = node->getAxis();
 
 			// mask out invalid rays with near > far
-			__m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
-			__m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
-			__m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
-			__m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
-			__m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
+			const __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
+			const __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
+			const __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
+			const __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
+			const __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
 
 			// if all of
 			// stack[entry].pb[axis] <= splitVal,
@@ -488,7 +487,8 @@
 		{
 			results = (*shape)->intersect_packet(rays, dists);
 			int valid = _mm_movemask_ps(
-				_mm_and_ps(mask, _mm_and_ps(results, _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)))));
+				_mm_and_ps(mask, _mm_and_ps(results,
+				_mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)))));
 			for (int i = 0; i < 4; i++)
 			{
 				if (*shape != origin_shapes[i] && ((valid>>i)&1))
--- a/src/sampler.cc	Sun Apr 27 19:56:23 2008 +0200
+++ b/src/sampler.cc	Sun Apr 27 22:55:17 2008 +0200
@@ -178,9 +178,9 @@
 	else if (phase == 2)
 	{
 		/* grid oversampling */
-		static const int gridsamples[] = {1,5,9,16};
-		static const Float osa5x[] = {0.0, -0.4, +0.4, +0.4, -0.4};
-		static const Float osa5y[] = {0.0, -0.4, -0.4, +0.4, +0.4};
+		static const int gridsamples[] = {1,4,9,16};
+		static const Float osa4x[] = {-0.25, +0.25, +0.25, -0.25};
+		static const Float osa4y[] = {-0.25, -0.25, +0.25, +0.25};
 		static const Float osa9x[] = {-0.34,  0.00, +0.34,
 			-0.34,  0.00, +0.34, -0.34,  0.00, +0.34};
 		static const Float osa9y[] = {-0.34, -0.34, -0.34,
@@ -191,8 +191,8 @@
 		static const Float osa16y[] = {-0.375, -0.375, -0.375, -0.375,
 			-0.125, -0.125, -0.125, -0.125, +0.125, +0.125, +0.125, +0.125,
 			+0.375, +0.375, +0.375, +0.375};
-		static const Float *osaSx[] = {NULL, osa5x, osa9x, osa16x};
-		static const Float *osaSy[] = {NULL, osa5y, osa9y, osa16y};
+		static const Float *osaSx[] = {NULL, osa4x, osa9x, osa16x};
+		static const Float *osaSy[] = {NULL, osa4y, osa9y, osa16y};
 		const int samples = gridsamples[oversample];
 		const Float *osax = osaSx[oversample];
 		const Float *osay = osaSy[oversample];
--- a/src/shapes.cc	Sun Apr 27 19:56:23 2008 +0200
+++ b/src/shapes.cc	Sun Apr 27 22:55:17 2008 +0200
@@ -54,6 +54,32 @@
 	return false;
 }
 
+__m128 Sphere::intersect_packet(const RayPacket &rays, __m128 &dists)
+{
+	VectorPacket V = rays.o - VectorPacket(center);
+	register __m128 d = _mm_sub_ps(mZero, dot(V, rays.dir));
+	register __m128 Det = _mm_sub_ps(_mm_mul_ps(d, d),
+		_mm_sub_ps(dot(V,V), _mm_set_ps1(sqr_radius)));
+	register __m128 t1, t2, mask;
+
+	mask = _mm_cmpgt_ps(Det, mZero);
+	if (!_mm_movemask_ps(mask))
+		return mask;
+
+	Det = _mm_sqrt_ps(Det);
+	t1 = _mm_sub_ps(d, Det);
+	t2 = _mm_add_ps(d, Det);
+
+	mask = _mm_and_ps(mask, _mm_cmpgt_ps(t2, mZero));
+
+	const __m128 cond1 = _mm_and_ps(_mm_cmpgt_ps(t1, mZero), _mm_cmplt_ps(t1, dists));
+	const __m128 cond2 = _mm_and_ps(_mm_cmple_ps(t1, mZero), _mm_cmplt_ps(t2, dists));
+	const __m128 newdists = _mm_or_ps(_mm_and_ps(cond1, t1), _mm_and_ps(cond2, t2));
+	mask = _mm_and_ps(mask, _mm_or_ps(cond1, cond2));
+	dists = _mm_or_ps(_mm_and_ps(mask, newdists), _mm_andnot_ps(mask, dists));
+	return mask;
+}
+
 /* if there should be CSG sometimes, this may be needed... */
 bool Sphere::intersect_all(const Ray &ray, Float dist, vector<Float> &allts) const
 {
@@ -149,6 +175,47 @@
 	return false;
 }
 
+__m128 Box::intersect_packet(const RayPacket &rays, __m128 &dists)
+{
+	register __m128 tnear = mZero;
+	register __m128 tfar = mInf;
+	register __m128 t1, t2;
+	register __m128 mask = mAllSet;
+
+	for (int i = 0; i < 3; i++)
+	{
+		const __m128 mL = _mm_set_ps1(L[i]);
+		const __m128 mH = _mm_set_ps1(H[i]);
+		mask = _mm_and_ps(mask,
+		_mm_or_ps(
+		_mm_or_ps(_mm_cmplt_ps(rays.dir.ma[i], mMEps), _mm_cmpgt_ps(rays.dir.ma[i], mEps)),
+		_mm_and_ps(_mm_cmpge_ps(rays.o.ma[i], mL), _mm_cmple_ps(rays.o.ma[i], mH))
+		));
+		if (!_mm_movemask_ps(mask))
+			return mask;
+
+		/* compute the intersection distance of the planes */
+		t1 = _mm_div_ps(_mm_sub_ps(mL, rays.o.ma[i]), rays.dir.ma[i]);
+		t2 = _mm_div_ps(_mm_sub_ps(mH, rays.o.ma[i]), rays.dir.ma[i]);
+
+		__m128 t = _mm_min_ps(t1, t2);
+		t2 = _mm_max_ps(t1, t2);
+		t1 = t;
+
+		tnear = _mm_max_ps(tnear, t1);	/* want largest Tnear */
+		tfar = _mm_min_ps(tfar, t2);	/* want smallest Tfar */
+
+		mask = _mm_and_ps(mask,
+			_mm_and_ps(_mm_cmple_ps(tnear, tfar), _mm_cmpge_ps(tfar, mZero)));
+		if (!_mm_movemask_ps(mask))
+			return mask;
+	}
+
+	mask = _mm_and_ps(mask, _mm_cmplt_ps(tnear, dists));
+	dists = _mm_or_ps(_mm_and_ps(mask, tnear), _mm_andnot_ps(mask, dists));
+	return mask;
+}
+
 bool Box::intersect_bbox(const BBox &bbox) const
 {
 	return (
@@ -389,9 +456,7 @@
 	if (!_mm_movemask_ps(mask))
 		return mask;
 
-	for (int i = 0; i < 4; i++)
-		if ((_mm_movemask_ps(mask)>>i)&1)
-			((float*)&dists)[i] = ((float*)&t)[i];
+	dists = _mm_or_ps(_mm_andnot_ps(mask, dists), _mm_and_ps(mask, t));
 	return mask;
 }
 #endif