--- a/SConstruct Sun Apr 27 09:44:49 2008 +0200
+++ b/SConstruct Sun Apr 27 14:19:37 2008 +0200
@@ -91,7 +91,7 @@
global cpu, cpuflags_gcc, cpuflags_intelc
context.Message('Checking CPU arch and flags... ')
env.Execute('@$CC tools/cpuflags.c -o tools/cpuflags')
- (cpu, cpuflags_gcc, cpuflags_intelc) = os.popen('tools/cpuflags %s %s'
+ (cpu, cpuflags_gcc, cpuflags_intelc) = os.popen('tools'+os.sep+'cpuflags %s %s'
% (''.join(gccversion.rsplit('.',1)), intelcversion) ).read().split('\n')[:3]
context.Result(cpu)
return True
--- a/ccdemos/realtime_bunny.cc Sun Apr 27 09:44:49 2008 +0200
+++ b/ccdemos/realtime_bunny.cc Sun Apr 27 14:19:37 2008 +0200
@@ -1,5 +1,5 @@
#include "raytracer.h"
-#include "octree.h"
+#include "kdtree.h"
#include "common_sdl.h"
#include "common_ply.h"
@@ -7,7 +7,7 @@
int main(int argc, char **argv)
{
Raytracer rt;
- Octree top;
+ KdTree top;
Camera cam;
rt.setMaxDepth(0);
--- a/include/shapes.h Sun Apr 27 09:44:49 2008 +0200
+++ b/include/shapes.h Sun Apr 27 14:19:37 2008 +0200
@@ -55,12 +55,14 @@
// first intersection point
virtual bool intersect(const Ray &ray, Float &dist) const = 0;
- virtual void intersect_packet(const RayPacket &rays, __m128 &dists, bool *results)
+ virtual __m128 intersect_packet(const RayPacket &rays, __m128 &dists)
{
- results[0] = intersect(rays[0], ((float*)&dists)[0]);
- results[1] = intersect(rays[1], ((float*)&dists)[1]);
- results[2] = intersect(rays[2], ((float*)&dists)[2]);
- results[3] = intersect(rays[3], ((float*)&dists)[3]);
+ __m128 results;
+ ((int*)&results)[0] = intersect(rays[0], ((float*)&dists)[0]) ? -1 : 0;
+ ((int*)&results)[1] = intersect(rays[1], ((float*)&dists)[1]) ? -1 : 0;
+ ((int*)&results)[2] = intersect(rays[2], ((float*)&dists)[2]) ? -1 : 0;
+ ((int*)&results)[3] = intersect(rays[3], ((float*)&dists)[3]) ? -1 : 0;
+ return results;
};
// all intersections (only for CSG)
@@ -203,6 +205,9 @@
Triangle() {};
Triangle(Vertex *aA, Vertex *aB, Vertex *aC, Material *amaterial);
bool intersect(const Ray &ray, Float &dist) const;
+#ifdef TRI_BARI_PRE
+ __m128 intersect_packet(const RayPacket &rays, __m128 &dists);
+#endif
bool intersect_all(const Ray &ray, Float dist, vector<Float> &allts) const {return false;};
bool intersect_bbox(const BBox &bbox) const;
const Vector3 normal(const Vector3 &P) const { return (material->smooth ? smooth_normal(P) : N); };
--- a/src/kdtree.cc Sun Apr 27 09:44:49 2008 +0200
+++ b/src/kdtree.cc Sun Apr 27 14:19:37 2008 +0200
@@ -450,7 +450,7 @@
// fall back to single rays
// FIXME: split rays and continue
for (int i = 0; i < 4; i++)
- if(!nearest_shapes[i])
+ if (!nearest_shapes[i])
nearest_shapes[i] = nearest_intersection(origin_shapes[i],
rays[i], nearest_distances[i]);
return;
@@ -493,34 +493,20 @@
/* current node is the leaf . . . empty or full */
__m128 dists = stack[exit].t;
ShapeList::iterator shape;
+ __m128 results;
for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++)
{
- for (int i = 0; i < 4; i++)
- if ( ((_mm_movemask_ps(mask)>>(i))&1) &&
- ((float*)&stack[entry].t)[i] < ((float*)&stack[exit].t)[i] &&
- *shape != origin_shapes[i] &&
- (*shape)->intersect(rays[i], ((float*)&dists)[i])
- && ((float*)&dists)[i] >= ((float*)&stack[entry].t)[i] - Eps)
- {
- nearest_shapes[i] = *shape;
- nearest_distances[i] = ((float*)&dists)[i];
- }
-
- /*
- bool results[4];
- (*shape)->intersect_packet(rays, dists, results);
- int greater_than_entry = _mm_movemask_ps(
- _mm_and_ps(_mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)), mask));
+ results = (*shape)->intersect_packet(rays, dists);
+ int valid = _mm_movemask_ps(
+ _mm_and_ps(mask, _mm_and_ps(results, _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)))));
for (int i = 0; i < 4; i++)
{
- if (results[i] //&& *shape != origin_shapes[i]
- && ((greater_than_entry>>(3-i))&1))
+ if (*shape != origin_shapes[i] && ((valid>>i)&1))
{
nearest_shapes[i] = *shape;
nearest_distances[i] = ((float*)&dists)[i];
}
}
- */
}
for (int i = 0; i < 4; i++)
--- a/src/shapes.cc Sun Apr 27 09:44:49 2008 +0200
+++ b/src/shapes.cc Sun Apr 27 14:19:37 2008 +0200
@@ -348,6 +348,54 @@
#endif
}
+#ifdef TRI_BARI_PRE
+__m128 Triangle::intersect_packet(const RayPacket &rays, __m128 &dists)
+{
+ static const int modulo3[5] = {0,1,2,0,1};
+ register const int u = modulo3[k+1];
+ register const int v = modulo3[k+2];
+ __m128 mask;
+
+ const __m128 t = _mm_div_ps(
+ _mm_sub_ps(_mm_sub_ps(
+ _mm_sub_ps(_mm_set_ps1(nd), rays.o.ma[k]),
+ _mm_mul_ps(_mm_set_ps1(nu), rays.o.ma[u])
+ ), _mm_mul_ps(_mm_set_ps1(nv), rays.o.ma[v])),
+ _mm_add_ps(rays.dir.ma[k],
+ _mm_add_ps(_mm_mul_ps(_mm_set_ps1(nu), rays.dir.ma[u]),
+ _mm_mul_ps(_mm_set_ps1(nv), rays.dir.ma[v])))
+ );
+
+ mask = _mm_and_ps(_mm_cmplt_ps(t, dists), _mm_cmpge_ps(t, mEps));
+ if (!_mm_movemask_ps(mask))
+ return mask;
+
+ const __m128 hu = _mm_sub_ps(_mm_add_ps(rays.o.ma[u],
+ _mm_mul_ps(t, rays.dir.ma[u])), _mm_set_ps1(A->P[u]));
+ const __m128 hv = _mm_sub_ps(_mm_add_ps(rays.o.ma[v],
+ _mm_mul_ps(t, rays.dir.ma[v])), _mm_set_ps1(A->P[v]));
+ const __m128 beta = _mm_add_ps(_mm_mul_ps(hv, _mm_set_ps1(bnu)),
+ _mm_mul_ps(hu, _mm_set_ps1(bnv)));
+
+ mask = _mm_and_ps(mask, _mm_cmpge_ps(beta, zeros));
+ if (!_mm_movemask_ps(mask))
+ return mask;
+
+ const __m128 gamma = _mm_add_ps(_mm_mul_ps(hu, _mm_set_ps1(cnv)),
+ _mm_mul_ps(hv, _mm_set_ps1(cnu)));
+
+ mask = _mm_and_ps(mask, _mm_and_ps(_mm_cmpge_ps(gamma, zeros),
+ _mm_cmple_ps(_mm_add_ps(beta, gamma), ones)));
+ if (!_mm_movemask_ps(mask))
+ return mask;
+
+ for (int i = 0; i < 4; i++)
+ if ((_mm_movemask_ps(mask)>>i)&1)
+ ((float*)&dists)[i] = ((float*)&t)[i];
+ return mask;
+}
+#endif
+
bool Triangle::intersect_bbox(const BBox &bbox) const
{
const Vector3 boxcenter = (bbox.L+bbox.H)*0.5;