src/shapes.cc
branchpyrit
changeset 92 9af5c039b678
parent 91 9d66d323c354
child 93 96d65f841791
--- a/src/shapes.cc	Fri May 02 13:27:47 2008 +0200
+++ b/src/shapes.cc	Mon May 05 15:31:14 2008 +0200
@@ -54,30 +54,29 @@
 	return false;
 }
 
-#ifndef NO_SSE
-__m128 Sphere::intersect_packet(const RayPacket &rays, __m128 &dists)
+#ifndef NO_SIMD
+mfloat4 Sphere::intersect_packet(const RayPacket &rays, mfloat4 &dists) const
 {
 	VectorPacket V = rays.o - VectorPacket(center);
-	register __m128 d = _mm_sub_ps(mZero, dot(V, rays.dir));
-	register __m128 Det = _mm_sub_ps(_mm_mul_ps(d, d),
-		_mm_sub_ps(dot(V,V), _mm_set_ps1(sqr_radius)));
-	register __m128 t1, t2, mask;
+	register mfloat4 d = msub(mZero, dot(V, rays.dir));
+	register mfloat4 Det = msub(mmul(d, d), msub(dot(V,V), mset1(sqr_radius)));
+	register mfloat4 t1, t2, mask;
 
-	mask = _mm_cmpgt_ps(Det, mZero);
-	if (!_mm_movemask_ps(mask))
+	mask = mcmpgt(Det, mZero);
+	if (!mmovemask(mask))
 		return mask;
 
-	Det = _mm_sqrt_ps(Det);
-	t1 = _mm_sub_ps(d, Det);
-	t2 = _mm_add_ps(d, Det);
+	Det = msqrt(Det);
+	t1 = msub(d, Det);
+	t2 = madd(d, Det);
 
-	mask = _mm_and_ps(mask, _mm_cmpgt_ps(t2, mZero));
+	mask = mand(mask, mcmpgt(t2, mZero));
 
-	const __m128 cond1 = _mm_and_ps(_mm_cmpgt_ps(t1, mZero), _mm_cmplt_ps(t1, dists));
-	const __m128 cond2 = _mm_and_ps(_mm_cmple_ps(t1, mZero), _mm_cmplt_ps(t2, dists));
-	const __m128 newdists = _mm_or_ps(_mm_and_ps(cond1, t1), _mm_and_ps(cond2, t2));
-	mask = _mm_and_ps(mask, _mm_or_ps(cond1, cond2));
-	dists = _mm_or_ps(_mm_and_ps(mask, newdists), _mm_andnot_ps(mask, dists));
+	const mfloat4 cond1 = mand(mcmpgt(t1, mZero), mcmplt(t1, dists));
+	const mfloat4 cond2 = mand(mcmple(t1, mZero), mcmplt(t2, dists));
+	const mfloat4 newdists = mor(mand(cond1, t1), mand(cond2, t2));
+	mask = mand(mask, mor(cond1, cond2));
+	dists = mselect(mask, newdists, dists);
 	return mask;
 }
 #endif
@@ -177,45 +176,32 @@
 	return false;
 }
 
-#ifndef NO_SSE
-__m128 Box::intersect_packet(const RayPacket &rays, __m128 &dists)
+#ifndef NO_SIMD
+mfloat4 Box::intersect_packet(const RayPacket &rays, mfloat4 &dists) const
 {
-	register __m128 tnear = mZero;
-	register __m128 tfar = mInf;
-	register __m128 t1, t2;
-	register __m128 mask = mAllSet;
-
-	for (int i = 0; i < 3; i++)
-	{
-		const __m128 mL = _mm_set_ps1(L[i]);
-		const __m128 mH = _mm_set_ps1(H[i]);
-		mask = _mm_and_ps(mask,
-		_mm_or_ps(
-		_mm_or_ps(_mm_cmplt_ps(rays.dir.ma[i], mMEps), _mm_cmpgt_ps(rays.dir.ma[i], mEps)),
-		_mm_and_ps(_mm_cmpge_ps(rays.o.ma[i], mL), _mm_cmple_ps(rays.o.ma[i], mH))
-		));
-		if (!_mm_movemask_ps(mask))
-			return mask;
+	mfloat4 origin = rays.o.ma[0];
+	mfloat4 invdir = rays.invdir.ma[0];
+	mfloat4 t1 = mmul(msub(mset1(L[0]), origin), invdir);
+	mfloat4 t2 = mmul(msub(mset1(H[0]), origin), invdir);
+	mfloat4 tmin = mmin(t1, t2);
+	mfloat4 tmax = mmax(t1, t2);
 
-		/* compute the intersection distance of the planes */
-		t1 = _mm_div_ps(_mm_sub_ps(mL, rays.o.ma[i]), rays.dir.ma[i]);
-		t2 = _mm_div_ps(_mm_sub_ps(mH, rays.o.ma[i]), rays.dir.ma[i]);
-
-		__m128 t = _mm_min_ps(t1, t2);
-		t2 = _mm_max_ps(t1, t2);
-		t1 = t;
+	origin = rays.o.ma[1];
+	invdir = rays.invdir.ma[1];
+	t1 = mmul(msub(mset1(L[1]), origin), invdir);
+	t2 = mmul(msub(mset1(H[1]), origin), invdir);
+	tmin = mmax(mmin(t1, t2), tmin);
+	tmax = mmin(mmax(t1, t2), tmax);
 
-		tnear = _mm_max_ps(tnear, t1);	/* want largest Tnear */
-		tfar = _mm_min_ps(tfar, t2);	/* want smallest Tfar */
+	origin = rays.o.ma[2];
+	invdir = rays.invdir.ma[2];
+	t1 = mmul(msub(mset1(L[2]), origin), invdir);
+	t2 = mmul(msub(mset1(H[2]), origin), invdir);
+	tmin = mmax(mmin(t1, t2), tmin);
+	tmax = mmin(mmax(t1, t2), tmax);
 
-		mask = _mm_and_ps(mask,
-			_mm_and_ps(_mm_cmple_ps(tnear, tfar), _mm_cmpge_ps(tfar, mZero)));
-		if (!_mm_movemask_ps(mask))
-			return mask;
-	}
-
-	mask = _mm_and_ps(mask, _mm_cmplt_ps(tnear, dists));
-	dists = _mm_or_ps(_mm_and_ps(mask, tnear), _mm_andnot_ps(mask, dists));
+	mfloat4 mask = mand(mand(mcmplt(tmin, tmax), mcmpgt(tmax, mZero)), mcmplt(tmin, dists));
+	dists = mselect(mask, tmin, dists);
 	return mask;
 }
 #endif
@@ -419,48 +405,48 @@
 #endif
 }
 
-#if not defined(NO_SSE) and defined(TRI_BARI_PRE)
-__m128 Triangle::intersect_packet(const RayPacket &rays, __m128 &dists)
+#if !defined(NO_SIMD) && defined(TRI_BARI_PRE)
+mfloat4 Triangle::intersect_packet(const RayPacket &rays, mfloat4 &dists) const
 {
 	static const int modulo3[5] = {0,1,2,0,1};
 	register const int u = modulo3[k+1];
 	register const int v = modulo3[k+2];
-	__m128 mask;
+	mfloat4 mask;
 
-	const __m128 t = _mm_div_ps(
-		_mm_sub_ps(_mm_sub_ps(
-		_mm_sub_ps(_mm_set_ps1(nd), rays.o.ma[k]),
-		_mm_mul_ps(_mm_set_ps1(nu), rays.o.ma[u])
-		), _mm_mul_ps(_mm_set_ps1(nv), rays.o.ma[v])),
-		_mm_add_ps(rays.dir.ma[k],
-		_mm_add_ps(_mm_mul_ps(_mm_set_ps1(nu), rays.dir.ma[u]),
-		_mm_mul_ps(_mm_set_ps1(nv), rays.dir.ma[v])))
+	const mfloat4 t = mdiv(
+		msub(msub(
+		msub(mset1(nd), rays.o.ma[k]),
+		mmul(mset1(nu), rays.o.ma[u])
+		), mmul(mset1(nv), rays.o.ma[v])),
+		madd(rays.dir.ma[k],
+		madd(mmul(mset1(nu), rays.dir.ma[u]),
+		mmul(mset1(nv), rays.dir.ma[v])))
 		);
 
-	mask = _mm_and_ps(_mm_cmplt_ps(t, dists), _mm_cmpge_ps(t, mEps));
-	if (!_mm_movemask_ps(mask))
+	mask = mand(mcmplt(t, dists), mcmpge(t, mEps));
+	if (!mmovemask(mask))
 		return mask;
 
-	const __m128 hu = _mm_sub_ps(_mm_add_ps(rays.o.ma[u],
-		_mm_mul_ps(t, rays.dir.ma[u])), _mm_set_ps1(A->P[u]));
-	const __m128 hv = _mm_sub_ps(_mm_add_ps(rays.o.ma[v],
-		_mm_mul_ps(t, rays.dir.ma[v])), _mm_set_ps1(A->P[v]));
-	const __m128 beta = _mm_add_ps(_mm_mul_ps(hv, _mm_set_ps1(bnu)),
-		_mm_mul_ps(hu, _mm_set_ps1(bnv)));
+	const mfloat4 hu = msub(madd(rays.o.ma[u],
+		mmul(t, rays.dir.ma[u])), mset1(A->P[u]));
+	const mfloat4 hv = msub(madd(rays.o.ma[v],
+		mmul(t, rays.dir.ma[v])), mset1(A->P[v]));
+	const mfloat4 beta = madd(mmul(hv, mset1(bnu)),
+		mmul(hu, mset1(bnv)));
 
-	mask = _mm_and_ps(mask, _mm_cmpge_ps(beta, mZero));
-	if (!_mm_movemask_ps(mask))
+	mask = mand(mask, mcmpge(beta, mZero));
+	if (!mmovemask(mask))
 		return mask;
 
-	const __m128 gamma = _mm_add_ps(_mm_mul_ps(hu, _mm_set_ps1(cnv)),
-		_mm_mul_ps(hv, _mm_set_ps1(cnu)));
+	const mfloat4 gamma = madd(mmul(hu, mset1(cnv)),
+		mmul(hv, mset1(cnu)));
 
-	mask = _mm_and_ps(mask, _mm_and_ps(_mm_cmpge_ps(gamma, mZero),
-		_mm_cmple_ps(_mm_add_ps(beta, gamma), mOne)));
-	if (!_mm_movemask_ps(mask))
+	mask = mand(mask, mand(mcmpge(gamma, mZero),
+		mcmple(madd(beta, gamma), mOne)));
+	if (!mmovemask(mask))
 		return mask;
 
-	dists = _mm_or_ps(_mm_andnot_ps(mask, dists), _mm_and_ps(mask, t));
+	dists = mselect(mask, t, dists);
 	return mask;
 }
 #endif
@@ -580,13 +566,13 @@
 		v=v0[q];
 		if(N[q]>0.0f)
 		{
-			vmin.cell[q]=-boxhalfsize[q] - v;
-			vmax.cell[q]= boxhalfsize[q] - v;
+			vmin[q]=-boxhalfsize[q] - v;
+			vmax[q]= boxhalfsize[q] - v;
 		}
 		else
 		{
-			vmin.cell[q]= boxhalfsize[q] - v;
-			vmax.cell[q]=-boxhalfsize[q] - v;
+			vmin[q]= boxhalfsize[q] - v;
+			vmax[q]=-boxhalfsize[q] - v;
 		}
 	}
 	if(dot(N,vmin)>0.0f) return false;