diff -r 9d66d323c354 -r 9af5c039b678 src/raytracer.cc --- a/src/raytracer.cc Fri May 02 13:27:47 2008 +0200 +++ b/src/raytracer.cc Mon May 05 15:31:14 2008 +0200 @@ -120,15 +120,15 @@ return acc; } -#ifndef NO_SSE -VectorPacket Raytracer::PhongShader_packet(const Shape **shapes, +#ifndef NO_SIMD +VectorPacket Raytracer::PhongShader_packet(const Shape* const* shapes, const VectorPacket &P, const VectorPacket &N, const VectorPacket &V) { VectorPacket acc, colour; - union { __m128 ambient; float ambient_f[4]; }; - union { __m128 diffuse; float diffuse_f[4]; }; - union { __m128 specular; float specular_f[4]; }; - union { __m128 shininess; float shininess_f[4]; }; + union { mfloat4 ambient; float ambient_f[4]; }; + union { mfloat4 diffuse; float diffuse_f[4]; }; + union { mfloat4 specular; float specular_f[4]; }; + union { mfloat4 shininess; float shininess_f[4]; }; for (int i = 0; i < 4; i++) if (shapes[i] == NULL) @@ -154,38 +154,38 @@ // ambient acc = colour * ambient; - Shape **shadow_shapes; + Shape *shadow_shapes[4]; vector::iterator light; for (light = lights.begin(); light != lights.end(); light++) { // direction vector to light VectorPacket L = VectorPacket((*light)->pos) - P; L.normalize(); - const __m128 L_dot_N = dot(L, N); - __m128 valid = _mm_cmpgt_ps(L_dot_N, mZero); + const mfloat4 L_dot_N = dot(L, N); + mfloat4 valid = mcmpgt(L_dot_N, mZero); // test if this light is occluded (sharp shadows) if ((*light)->cast_shadows) { const RayPacket shadow_rays = RayPacket(P, L); - union { __m128 dists; float dists_f[4]; }; + union { mfloat4 dists; float dists_f[4]; }; dists = mInf; top->packet_intersection(shapes, shadow_rays, dists_f, shadow_shapes); - valid = _mm_and_ps(valid, _mm_cmpeq_ps(dists, mInf)); + valid = mand(valid, mcmpeq(dists, mInf)); } - const VectorPacket R = L - N * _mm_mul_ps(mTwo, L_dot_N); - const __m128 R_dot_V = dot(R, V); + const VectorPacket R = L - N * mmul(mTwo, L_dot_N); + const mfloat4 R_dot_V = dot(R, V); // diffuse acc.selectiveAdd(valid, - colour * VectorPacket((*light)->colour) * _mm_mul_ps(diffuse, L_dot_N)); + colour * VectorPacket((*light)->colour) * mmul(diffuse, L_dot_N)); // specular - valid = _mm_and_ps(valid, _mm_cmpgt_ps(R_dot_V, mZero)); - __m128 spec = _mm_mul_ps(_mm_mul_ps(specular, _mm_set_ps1((*light)->colour.r)), - _mm_fastpow(R_dot_V, shininess)); + valid = mand(valid, mcmpgt(R_dot_V, mZero)); + mfloat4 spec = mmul(mmul(specular, mset1((*light)->colour.r)), + mfastpow(R_dot_V, shininess)); acc.selectiveAdd(valid, spec); } return acc; @@ -297,23 +297,22 @@ } } -#ifndef NO_SSE +#ifndef NO_SIMD void Raytracer::raytracePacket(RayPacket &rays, Colour *results) { union { float nearest_distances[4]; - __m128 m_nearest_distances; + mfloat4 m_nearest_distances; }; - __m128 mask; + mfloat4 mask; Shape *nearest_shapes[4]; static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL}; m_nearest_distances = mInf; - mask = mAllSet; top->packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes); - mask = _mm_cmpneq_ps(m_nearest_distances, mInf); - if (!_mm_movemask_ps(mask)) + mask = mcmpneq(m_nearest_distances, mInf); + if (!mmovemask(mask)) { for (int i = 0; i < 4; i++) results[i] = bg_colour; @@ -321,34 +320,29 @@ } const VectorPacket P = rays.o + rays.dir * m_nearest_distances; // point of intersection - VectorPacket normal; for (int i = 0; i < 4; i++) if (nearest_shapes[i] != NULL) normal.setVector(i, nearest_shapes[i]->normal(P.getVector(i))); // make shapes double sided - __m128 from_inside = _mm_cmpgt_ps(dot(normal, rays.dir), mZero); - normal.mx = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mx)), - _mm_andnot_ps(from_inside, normal.mx)); - normal.my = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.my)), - _mm_andnot_ps(from_inside, normal.my)); - normal.mz = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mz)), - _mm_andnot_ps(from_inside, normal.mz)); + mfloat4 from_inside = mcmpgt(dot(normal, rays.dir), mZero); + normal.mx = mselect(from_inside, msub(mZero, normal.mx), normal.mx); + normal.my = mselect(from_inside, msub(mZero, normal.my), normal.my); + normal.mz = mselect(from_inside, msub(mZero, normal.mz), normal.mz); // shading function - VectorPacket pres = - PhongShader_packet(const_cast(nearest_shapes), P, normal, rays.dir); - //pres.mx = _mm_or_ps(_mm_and_ps(mask, pres.mx), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.r))); - //pres.my = _mm_or_ps(_mm_and_ps(mask, pres.my), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.g))); - //pres.mz = _mm_or_ps(_mm_and_ps(mask, pres.mz), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.b))); + VectorPacket pres = PhongShader_packet(nearest_shapes, P, normal, rays.dir); + //pres.mx = mselect(mask, pres.mx, mset1(bg_colour.r)); + //pres.my = mselect(mask, pres.my, mset1(bg_colour.g)); + //pres.mz = mselect(mask, pres.mz, mset1(bg_colour.b)); for (int i = 0; i < 4; i++) if (nearest_shapes[i] != NULL) { results[i] = pres.getVector(i); lightScatter(rays[i], nearest_shapes[i], 0, - P.getVector(i), normal.getVector(i), (_mm_movemask_ps(from_inside)>>i)&1, + P.getVector(i), normal.getVector(i), (mmovemask(from_inside)>>i)&1, results[i]); } else @@ -364,7 +358,7 @@ Colour my_colours[my_queue_size]; int my_count; Ray ray; -#ifndef NO_SSE +#ifndef NO_SIMD RayPacket rays; const bool can_use_packets = (rt->use_packets && rt->sampler->packetableSamples()); #endif @@ -407,7 +401,7 @@ pthread_mutex_unlock(&rt->sample_queue_mutex); // do the work -#ifndef NO_SSE +#ifndef NO_SIMD if (can_use_packets) { // packet ray tracing @@ -435,6 +429,7 @@ rt->sampler->saveSample(my_queue[i], my_colours[i]); pthread_mutex_unlock(&rt->sampler_mutex); } + return NULL; } void Raytracer::render() @@ -455,7 +450,7 @@ // create workers dbgmsg(1, "* using %d threads\n", num_threads); - pthread_t threads[num_threads]; + pthread_t *threads = new pthread_t[num_threads]; dbgmsg(1, "* raytracing...\n"); @@ -536,6 +531,7 @@ phase ++; } + delete[] threads; delete[] sample_queue; }