--- a/src/raytracer.cc Fri May 02 13:27:47 2008 +0200
+++ b/src/raytracer.cc Mon May 05 15:31:14 2008 +0200
@@ -120,15 +120,15 @@
return acc;
}
-#ifndef NO_SSE
-VectorPacket Raytracer::PhongShader_packet(const Shape **shapes,
+#ifndef NO_SIMD
+VectorPacket Raytracer::PhongShader_packet(const Shape* const* shapes,
const VectorPacket &P, const VectorPacket &N, const VectorPacket &V)
{
VectorPacket acc, colour;
- union { __m128 ambient; float ambient_f[4]; };
- union { __m128 diffuse; float diffuse_f[4]; };
- union { __m128 specular; float specular_f[4]; };
- union { __m128 shininess; float shininess_f[4]; };
+ union { mfloat4 ambient; float ambient_f[4]; };
+ union { mfloat4 diffuse; float diffuse_f[4]; };
+ union { mfloat4 specular; float specular_f[4]; };
+ union { mfloat4 shininess; float shininess_f[4]; };
for (int i = 0; i < 4; i++)
if (shapes[i] == NULL)
@@ -154,38 +154,38 @@
// ambient
acc = colour * ambient;
- Shape **shadow_shapes;
+ Shape *shadow_shapes[4];
vector<Light*>::iterator light;
for (light = lights.begin(); light != lights.end(); light++)
{
// direction vector to light
VectorPacket L = VectorPacket((*light)->pos) - P;
L.normalize();
- const __m128 L_dot_N = dot(L, N);
- __m128 valid = _mm_cmpgt_ps(L_dot_N, mZero);
+ const mfloat4 L_dot_N = dot(L, N);
+ mfloat4 valid = mcmpgt(L_dot_N, mZero);
// test if this light is occluded (sharp shadows)
if ((*light)->cast_shadows)
{
const RayPacket shadow_rays = RayPacket(P, L);
- union { __m128 dists; float dists_f[4]; };
+ union { mfloat4 dists; float dists_f[4]; };
dists = mInf;
top->packet_intersection(shapes, shadow_rays,
dists_f, shadow_shapes);
- valid = _mm_and_ps(valid, _mm_cmpeq_ps(dists, mInf));
+ valid = mand(valid, mcmpeq(dists, mInf));
}
- const VectorPacket R = L - N * _mm_mul_ps(mTwo, L_dot_N);
- const __m128 R_dot_V = dot(R, V);
+ const VectorPacket R = L - N * mmul(mTwo, L_dot_N);
+ const mfloat4 R_dot_V = dot(R, V);
// diffuse
acc.selectiveAdd(valid,
- colour * VectorPacket((*light)->colour) * _mm_mul_ps(diffuse, L_dot_N));
+ colour * VectorPacket((*light)->colour) * mmul(diffuse, L_dot_N));
// specular
- valid = _mm_and_ps(valid, _mm_cmpgt_ps(R_dot_V, mZero));
- __m128 spec = _mm_mul_ps(_mm_mul_ps(specular, _mm_set_ps1((*light)->colour.r)),
- _mm_fastpow(R_dot_V, shininess));
+ valid = mand(valid, mcmpgt(R_dot_V, mZero));
+ mfloat4 spec = mmul(mmul(specular, mset1((*light)->colour.r)),
+ mfastpow(R_dot_V, shininess));
acc.selectiveAdd(valid, spec);
}
return acc;
@@ -297,23 +297,22 @@
}
}
-#ifndef NO_SSE
+#ifndef NO_SIMD
void Raytracer::raytracePacket(RayPacket &rays, Colour *results)
{
union {
float nearest_distances[4];
- __m128 m_nearest_distances;
+ mfloat4 m_nearest_distances;
};
- __m128 mask;
+ mfloat4 mask;
Shape *nearest_shapes[4];
static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL};
m_nearest_distances = mInf;
- mask = mAllSet;
top->packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);
- mask = _mm_cmpneq_ps(m_nearest_distances, mInf);
- if (!_mm_movemask_ps(mask))
+ mask = mcmpneq(m_nearest_distances, mInf);
+ if (!mmovemask(mask))
{
for (int i = 0; i < 4; i++)
results[i] = bg_colour;
@@ -321,34 +320,29 @@
}
const VectorPacket P = rays.o + rays.dir * m_nearest_distances; // point of intersection
-
VectorPacket normal;
for (int i = 0; i < 4; i++)
if (nearest_shapes[i] != NULL)
normal.setVector(i, nearest_shapes[i]->normal(P.getVector(i)));
// make shapes double sided
- __m128 from_inside = _mm_cmpgt_ps(dot(normal, rays.dir), mZero);
- normal.mx = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mx)),
- _mm_andnot_ps(from_inside, normal.mx));
- normal.my = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.my)),
- _mm_andnot_ps(from_inside, normal.my));
- normal.mz = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mz)),
- _mm_andnot_ps(from_inside, normal.mz));
+ mfloat4 from_inside = mcmpgt(dot(normal, rays.dir), mZero);
+ normal.mx = mselect(from_inside, msub(mZero, normal.mx), normal.mx);
+ normal.my = mselect(from_inside, msub(mZero, normal.my), normal.my);
+ normal.mz = mselect(from_inside, msub(mZero, normal.mz), normal.mz);
// shading function
- VectorPacket pres =
- PhongShader_packet(const_cast<const Shape**>(nearest_shapes), P, normal, rays.dir);
- //pres.mx = _mm_or_ps(_mm_and_ps(mask, pres.mx), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.r)));
- //pres.my = _mm_or_ps(_mm_and_ps(mask, pres.my), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.g)));
- //pres.mz = _mm_or_ps(_mm_and_ps(mask, pres.mz), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.b)));
+ VectorPacket pres = PhongShader_packet(nearest_shapes, P, normal, rays.dir);
+ //pres.mx = mselect(mask, pres.mx, mset1(bg_colour.r));
+ //pres.my = mselect(mask, pres.my, mset1(bg_colour.g));
+ //pres.mz = mselect(mask, pres.mz, mset1(bg_colour.b));
for (int i = 0; i < 4; i++)
if (nearest_shapes[i] != NULL)
{
results[i] = pres.getVector(i);
lightScatter(rays[i], nearest_shapes[i], 0,
- P.getVector(i), normal.getVector(i), (_mm_movemask_ps(from_inside)>>i)&1,
+ P.getVector(i), normal.getVector(i), (mmovemask(from_inside)>>i)&1,
results[i]);
}
else
@@ -364,7 +358,7 @@
Colour my_colours[my_queue_size];
int my_count;
Ray ray;
-#ifndef NO_SSE
+#ifndef NO_SIMD
RayPacket rays;
const bool can_use_packets = (rt->use_packets && rt->sampler->packetableSamples());
#endif
@@ -407,7 +401,7 @@
pthread_mutex_unlock(&rt->sample_queue_mutex);
// do the work
-#ifndef NO_SSE
+#ifndef NO_SIMD
if (can_use_packets)
{
// packet ray tracing
@@ -435,6 +429,7 @@
rt->sampler->saveSample(my_queue[i], my_colours[i]);
pthread_mutex_unlock(&rt->sampler_mutex);
}
+ return NULL;
}
void Raytracer::render()
@@ -455,7 +450,7 @@
// create workers
dbgmsg(1, "* using %d threads\n", num_threads);
- pthread_t threads[num_threads];
+ pthread_t *threads = new pthread_t[num_threads];
dbgmsg(1, "* raytracing...\n");
@@ -536,6 +531,7 @@
phase ++;
}
+ delete[] threads;
delete[] sample_queue;
}