--- a/src/shapes.cc Sun Apr 27 19:56:23 2008 +0200
+++ b/src/shapes.cc Sun Apr 27 22:55:17 2008 +0200
@@ -54,6 +54,32 @@
return false;
}
+__m128 Sphere::intersect_packet(const RayPacket &rays, __m128 &dists)
+{
+ VectorPacket V = rays.o - VectorPacket(center);
+ register __m128 d = _mm_sub_ps(mZero, dot(V, rays.dir));
+ register __m128 Det = _mm_sub_ps(_mm_mul_ps(d, d),
+ _mm_sub_ps(dot(V,V), _mm_set_ps1(sqr_radius)));
+ register __m128 t1, t2, mask;
+
+ mask = _mm_cmpgt_ps(Det, mZero);
+ if (!_mm_movemask_ps(mask))
+ return mask;
+
+ Det = _mm_sqrt_ps(Det);
+ t1 = _mm_sub_ps(d, Det);
+ t2 = _mm_add_ps(d, Det);
+
+ mask = _mm_and_ps(mask, _mm_cmpgt_ps(t2, mZero));
+
+ const __m128 cond1 = _mm_and_ps(_mm_cmpgt_ps(t1, mZero), _mm_cmplt_ps(t1, dists));
+ const __m128 cond2 = _mm_and_ps(_mm_cmple_ps(t1, mZero), _mm_cmplt_ps(t2, dists));
+ const __m128 newdists = _mm_or_ps(_mm_and_ps(cond1, t1), _mm_and_ps(cond2, t2));
+ mask = _mm_and_ps(mask, _mm_or_ps(cond1, cond2));
+ dists = _mm_or_ps(_mm_and_ps(mask, newdists), _mm_andnot_ps(mask, dists));
+ return mask;
+}
+
/* if there should be CSG sometimes, this may be needed... */
bool Sphere::intersect_all(const Ray &ray, Float dist, vector<Float> &allts) const
{
@@ -149,6 +175,47 @@
return false;
}
+__m128 Box::intersect_packet(const RayPacket &rays, __m128 &dists)
+{
+ register __m128 tnear = mZero;
+ register __m128 tfar = mInf;
+ register __m128 t1, t2;
+ register __m128 mask = mAllSet;
+
+ for (int i = 0; i < 3; i++)
+ {
+ const __m128 mL = _mm_set_ps1(L[i]);
+ const __m128 mH = _mm_set_ps1(H[i]);
+ mask = _mm_and_ps(mask,
+ _mm_or_ps(
+ _mm_or_ps(_mm_cmplt_ps(rays.dir.ma[i], mMEps), _mm_cmpgt_ps(rays.dir.ma[i], mEps)),
+ _mm_and_ps(_mm_cmpge_ps(rays.o.ma[i], mL), _mm_cmple_ps(rays.o.ma[i], mH))
+ ));
+ if (!_mm_movemask_ps(mask))
+ return mask;
+
+ /* compute the intersection distance of the planes */
+ t1 = _mm_div_ps(_mm_sub_ps(mL, rays.o.ma[i]), rays.dir.ma[i]);
+ t2 = _mm_div_ps(_mm_sub_ps(mH, rays.o.ma[i]), rays.dir.ma[i]);
+
+ __m128 t = _mm_min_ps(t1, t2);
+ t2 = _mm_max_ps(t1, t2);
+ t1 = t;
+
+ tnear = _mm_max_ps(tnear, t1); /* want largest Tnear */
+ tfar = _mm_min_ps(tfar, t2); /* want smallest Tfar */
+
+ mask = _mm_and_ps(mask,
+ _mm_and_ps(_mm_cmple_ps(tnear, tfar), _mm_cmpge_ps(tfar, mZero)));
+ if (!_mm_movemask_ps(mask))
+ return mask;
+ }
+
+ mask = _mm_and_ps(mask, _mm_cmplt_ps(tnear, dists));
+ dists = _mm_or_ps(_mm_and_ps(mask, tnear), _mm_andnot_ps(mask, dists));
+ return mask;
+}
+
bool Box::intersect_bbox(const BBox &bbox) const
{
return (
@@ -389,9 +456,7 @@
if (!_mm_movemask_ps(mask))
return mask;
- for (int i = 0; i < 4; i++)
- if ((_mm_movemask_ps(mask)>>i)&1)
- ((float*)&dists)[i] = ((float*)&t)[i];
+ dists = _mm_or_ps(_mm_andnot_ps(mask, dists), _mm_and_ps(mask, t));
return mask;
}
#endif