src/raytracer.cc
branchpyrit
changeset 92 9af5c039b678
parent 91 9d66d323c354
child 93 96d65f841791
--- a/src/raytracer.cc	Fri May 02 13:27:47 2008 +0200
+++ b/src/raytracer.cc	Mon May 05 15:31:14 2008 +0200
@@ -120,15 +120,15 @@
 	return acc;
 }
 
-#ifndef NO_SSE
-VectorPacket Raytracer::PhongShader_packet(const Shape **shapes,
+#ifndef NO_SIMD
+VectorPacket Raytracer::PhongShader_packet(const Shape* const* shapes,
 	const VectorPacket &P, const VectorPacket &N, const VectorPacket &V)
 {
 	VectorPacket acc, colour;
-	union { __m128 ambient; float ambient_f[4]; };
-	union { __m128 diffuse; float diffuse_f[4]; };
-	union { __m128 specular; float specular_f[4]; };
-	union { __m128 shininess; float shininess_f[4]; };
+	union { mfloat4 ambient; float ambient_f[4]; };
+	union { mfloat4 diffuse; float diffuse_f[4]; };
+	union { mfloat4 specular; float specular_f[4]; };
+	union { mfloat4 shininess; float shininess_f[4]; };
 
 	for (int i = 0; i < 4; i++)
 		if (shapes[i] == NULL)
@@ -154,38 +154,38 @@
 	// ambient
 	acc = colour * ambient;
 
-	Shape **shadow_shapes;
+	Shape *shadow_shapes[4];
 	vector<Light*>::iterator light;
 	for (light = lights.begin(); light != lights.end(); light++)
 	{
 		 // direction vector to light
 		VectorPacket L = VectorPacket((*light)->pos) - P;
 		L.normalize();
-		const __m128 L_dot_N = dot(L, N);
-		__m128 valid = _mm_cmpgt_ps(L_dot_N, mZero);
+		const mfloat4 L_dot_N = dot(L, N);
+		mfloat4 valid = mcmpgt(L_dot_N, mZero);
 
 		// test if this light is occluded (sharp shadows)
 		if ((*light)->cast_shadows)
 		{
 			const RayPacket shadow_rays = RayPacket(P, L);
-			union { __m128 dists; float dists_f[4]; };
+			union { mfloat4 dists; float dists_f[4]; };
 			dists = mInf;
 			top->packet_intersection(shapes, shadow_rays,
 				dists_f, shadow_shapes);
-			valid = _mm_and_ps(valid, _mm_cmpeq_ps(dists, mInf));
+			valid = mand(valid, mcmpeq(dists, mInf));
 		}
 
-		const VectorPacket R = L - N * _mm_mul_ps(mTwo, L_dot_N);
-		const __m128 R_dot_V = dot(R, V);
+		const VectorPacket R = L - N * mmul(mTwo, L_dot_N);
+		const mfloat4 R_dot_V = dot(R, V);
 
 		// diffuse
 		acc.selectiveAdd(valid,
-			colour * VectorPacket((*light)->colour) * _mm_mul_ps(diffuse, L_dot_N));
+			colour * VectorPacket((*light)->colour) * mmul(diffuse, L_dot_N));
 
 		// specular
-		valid = _mm_and_ps(valid, _mm_cmpgt_ps(R_dot_V, mZero));
-		__m128 spec = _mm_mul_ps(_mm_mul_ps(specular, _mm_set_ps1((*light)->colour.r)),
-			_mm_fastpow(R_dot_V, shininess));
+		valid = mand(valid, mcmpgt(R_dot_V, mZero));
+		mfloat4 spec = mmul(mmul(specular, mset1((*light)->colour.r)),
+			mfastpow(R_dot_V, shininess));
 		acc.selectiveAdd(valid, spec);
 	}
 	return acc;
@@ -297,23 +297,22 @@
 	}
 }
 
-#ifndef NO_SSE
+#ifndef NO_SIMD
 void Raytracer::raytracePacket(RayPacket &rays, Colour *results)
 {
 	union {
 		float nearest_distances[4];
-		__m128 m_nearest_distances;
+		mfloat4 m_nearest_distances;
 	};
-	__m128 mask;
+	mfloat4 mask;
 	Shape *nearest_shapes[4];
 	static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL};
 	m_nearest_distances = mInf;
-	mask = mAllSet;
 
 	top->packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);
 
-	mask = _mm_cmpneq_ps(m_nearest_distances, mInf);
-	if (!_mm_movemask_ps(mask))
+	mask = mcmpneq(m_nearest_distances, mInf);
+	if (!mmovemask(mask))
 	{
 		for (int i = 0; i < 4; i++)
 			results[i] = bg_colour;
@@ -321,34 +320,29 @@
 	}
 
 	const VectorPacket P = rays.o + rays.dir * m_nearest_distances; // point of intersection
-
 	VectorPacket normal;
 	for (int i = 0; i < 4; i++)
 		if (nearest_shapes[i] != NULL)
 			normal.setVector(i, nearest_shapes[i]->normal(P.getVector(i)));
 
 	// make shapes double sided
-	__m128 from_inside = _mm_cmpgt_ps(dot(normal, rays.dir), mZero);
-	normal.mx = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mx)),
-		_mm_andnot_ps(from_inside, normal.mx));
-	normal.my = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.my)),
-		_mm_andnot_ps(from_inside, normal.my));
-	normal.mz = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mz)),
-		_mm_andnot_ps(from_inside, normal.mz));
+	mfloat4 from_inside = mcmpgt(dot(normal, rays.dir), mZero);
+	normal.mx = mselect(from_inside, msub(mZero, normal.mx), normal.mx);
+	normal.my = mselect(from_inside, msub(mZero, normal.my), normal.my);
+	normal.mz = mselect(from_inside, msub(mZero, normal.mz), normal.mz);
 
 	// shading function
-	VectorPacket pres =
-		PhongShader_packet(const_cast<const Shape**>(nearest_shapes), P, normal, rays.dir);
-	//pres.mx = _mm_or_ps(_mm_and_ps(mask, pres.mx), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.r)));
-	//pres.my = _mm_or_ps(_mm_and_ps(mask, pres.my), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.g)));
-	//pres.mz = _mm_or_ps(_mm_and_ps(mask, pres.mz), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.b)));
+	VectorPacket pres = PhongShader_packet(nearest_shapes, P, normal, rays.dir);
+	//pres.mx = mselect(mask, pres.mx, mset1(bg_colour.r));
+	//pres.my = mselect(mask, pres.my, mset1(bg_colour.g));
+	//pres.mz = mselect(mask, pres.mz, mset1(bg_colour.b));
 
 	for (int i = 0; i < 4; i++)
 		if (nearest_shapes[i] != NULL)
 		{
 			results[i] = pres.getVector(i);
 			lightScatter(rays[i], nearest_shapes[i], 0,
-				P.getVector(i), normal.getVector(i), (_mm_movemask_ps(from_inside)>>i)&1,
+				P.getVector(i), normal.getVector(i), (mmovemask(from_inside)>>i)&1,
 				results[i]);
 		}
 		else
@@ -364,7 +358,7 @@
 	Colour my_colours[my_queue_size];
 	int my_count;
 	Ray ray;
-#ifndef NO_SSE
+#ifndef NO_SIMD
 	RayPacket rays;
 	const bool can_use_packets = (rt->use_packets && rt->sampler->packetableSamples());
 #endif
@@ -407,7 +401,7 @@
 		pthread_mutex_unlock(&rt->sample_queue_mutex);
 
 		// do the work
-#ifndef NO_SSE
+#ifndef NO_SIMD
 		if (can_use_packets)
 		{
 			// packet ray tracing
@@ -435,6 +429,7 @@
 			rt->sampler->saveSample(my_queue[i], my_colours[i]);
 		pthread_mutex_unlock(&rt->sampler_mutex);
 	}
+	return NULL;
 }
 
 void Raytracer::render()
@@ -455,7 +450,7 @@
 
 	// create workers
 	dbgmsg(1, "* using %d threads\n", num_threads);
-	pthread_t threads[num_threads];
+	pthread_t *threads = new pthread_t[num_threads];
 
 	dbgmsg(1, "* raytracing...\n");
 
@@ -536,6 +531,7 @@
 		phase ++;
 	}
 
+	delete[] threads;
 	delete[] sample_queue;
 }