src/raytracer.cc
branchpyrit
changeset 91 9d66d323c354
parent 90 f6a72eb99631
child 92 9af5c039b678
--- a/src/raytracer.cc	Tue Apr 29 23:31:08 2008 +0200
+++ b/src/raytracer.cc	Fri May 02 13:27:47 2008 +0200
@@ -34,7 +34,7 @@
 
 // Hammersley spherical point distribution
 // http://www.cse.cuhk.edu.hk/~ttwong/papers/udpoint/udpoints.html
-Vector3 Raytracer::SphereDistribute(int i, int n, Float extent, Vector3 &normal)
+Vector Raytracer::SphereDistribute(int i, int n, Float extent, const Vector &normal)
 {
 	Float p, t, st, phi, phirad;
 	int kk;
@@ -67,23 +67,7 @@
 	y = xx*sin(q) + yy*cos(q);
 	z = zz;
 
-	return Vector3(x, y, z);
-}
-
-// ---- tyto dve funkce budou v budouci verzi metody objektu PhongShader
-
-// calculate shader function
-// P is point of intersection, N normal in this point
-Colour PhongShader_ambient(const Material &mat, const Vector3 &P)
-{
-	Colour col;
-	if (mat.texture)
-		col = mat.texture->evaluate(P);
-	else
-		col = mat.colour;
-
-	// ambient
-	return mat.ambient * col;
+	return Vector(x, y, z);
 }
 
 /*
@@ -92,82 +76,138 @@
  R direction of reflected ray,
  V direction to the viewer
 */
-Colour PhongShader_calculate(const Material &mat,
-	const Vector3 &P, const Vector3 &N, const Vector3 &R, const Vector3 &V,
-	const Light &light)
+Colour Raytracer::PhongShader(const Shape *shape,
+	const Vector &P, const Vector &N, const Vector &V)
 {
-	Colour I = Colour();
-	Vector3 L = light.pos - P;
-	L.normalize();
-	Float L_dot_N = dot(L, N);
-	Float R_dot_V = dot(R, V);
-
-	Colour col;
-	if (mat.texture)
-		col = mat.texture->evaluate(P);
-	else
-		col = mat.colour;
-
-	// diffuse
-	I = mat.diffuse * col * light.colour * L_dot_N;
+	Colour col, acc;
+	Material * const &mat = shape->material;
 
-	// specular
-	if (R_dot_V > 0)
-		I += mat.specular * light.colour * powf(R_dot_V, mat.shininess);
-	return I;
-}
+	if (mat->texture)
+		col = mat->texture->evaluate(P);
+	else
+		col = mat->colour;
 
-Colour Raytracer::shader_evalulate(const Ray &ray, int depth, Shape *origin_shape,
-	Float nearest_distance, Shape *nearest_shape)
-{
-	Colour col = Colour();
-	Vector3 P = ray.o + ray.dir * nearest_distance; // point of intersection
-	Vector3 normal = nearest_shape->normal(P);
-	bool from_inside = false;
-
-	// make shapes double sided
-	if (dot(normal, ray.dir) > 0.0)
-	{
-		normal = - normal;
-		from_inside = true;
-	}
-
-	col = PhongShader_ambient(*nearest_shape->material, P);
+	// ambient
+	acc = mat->ambient * col;
 
 	vector<Light*>::iterator light;
-	for (light = lights.begin(); light != lights.end(); light++) {
-		Vector3 jo, L = (*light)->pos - P; // direction vector to light
-		L.normalize();
-		Float L_dot_N = dot(L, normal);
-		if (L_dot_N > 0) {
+	for (light = lights.begin(); light != lights.end(); light++)
+	{
+		const Vector L = normalize((*light)->pos - P); // direction vector to light
+		const Float L_dot_N = dot(L, N);
+		if (L_dot_N > 0)
+		{
 			// test if this light is occluded (sharp shadows)
 			if ((*light)->cast_shadows) {
-				Ray shadow_ray = Ray(P, L);
+				const Ray shadow_ray = Ray(P, L);
 				Float dist = FLT_MAX;
-				if (top->nearest_intersection(nearest_shape, shadow_ray, dist))
+				if (top->nearest_intersection(shape, shadow_ray, dist))
 					continue;
 			}
 
-			// shading function
-			Vector3 R = L - 2.0 * L_dot_N * normal;
-			col += PhongShader_calculate(*nearest_shape->material,
-				P, normal, R, ray.dir, **light);
+			const Vector R = L - 2.0 * L_dot_N * N;
+			const Float R_dot_V = dot(R, V);
+
+			// diffuse
+			acc += mat->diffuse * col * (*light)->colour * L_dot_N;
+
+			// specular
+			if (R_dot_V > 0)
+				acc += mat->specular * (*light)->colour * powf(R_dot_V, mat->shininess);
 		}
 	}
 
+	return acc;
+}
+
+#ifndef NO_SSE
+VectorPacket Raytracer::PhongShader_packet(const Shape **shapes,
+	const VectorPacket &P, const VectorPacket &N, const VectorPacket &V)
+{
+	VectorPacket acc, colour;
+	union { __m128 ambient; float ambient_f[4]; };
+	union { __m128 diffuse; float diffuse_f[4]; };
+	union { __m128 specular; float specular_f[4]; };
+	union { __m128 shininess; float shininess_f[4]; };
+
+	for (int i = 0; i < 4; i++)
+		if (shapes[i] == NULL)
+		{
+			ambient_f[i] = 0;
+			diffuse_f[i] = 0;
+			specular_f[i] = 0;
+			shininess_f[i] = 0;
+		}
+		else
+		{
+			Material * const &mat = shapes[i]->material;
+			if (mat->texture)
+				colour.setVector(i, mat->texture->evaluate(P.getVector(i)));
+			else
+				colour.setVector(i, mat->colour);
+			ambient_f[i] = mat->ambient;
+			diffuse_f[i] = mat->diffuse;
+			specular_f[i] = mat->specular;
+			shininess_f[i] = mat->shininess;
+		}
+
+	// ambient
+	acc = colour * ambient;
+
+	Shape **shadow_shapes;
+	vector<Light*>::iterator light;
+	for (light = lights.begin(); light != lights.end(); light++)
+	{
+		 // direction vector to light
+		VectorPacket L = VectorPacket((*light)->pos) - P;
+		L.normalize();
+		const __m128 L_dot_N = dot(L, N);
+		__m128 valid = _mm_cmpgt_ps(L_dot_N, mZero);
+
+		// test if this light is occluded (sharp shadows)
+		if ((*light)->cast_shadows)
+		{
+			const RayPacket shadow_rays = RayPacket(P, L);
+			union { __m128 dists; float dists_f[4]; };
+			dists = mInf;
+			top->packet_intersection(shapes, shadow_rays,
+				dists_f, shadow_shapes);
+			valid = _mm_and_ps(valid, _mm_cmpeq_ps(dists, mInf));
+		}
+
+		const VectorPacket R = L - N * _mm_mul_ps(mTwo, L_dot_N);
+		const __m128 R_dot_V = dot(R, V);
+
+		// diffuse
+		acc.selectiveAdd(valid,
+			colour * VectorPacket((*light)->colour) * _mm_mul_ps(diffuse, L_dot_N));
+
+		// specular
+		valid = _mm_and_ps(valid, _mm_cmpgt_ps(R_dot_V, mZero));
+		__m128 spec = _mm_mul_ps(_mm_mul_ps(specular, _mm_set_ps1((*light)->colour.r)),
+			_mm_fastpow(R_dot_V, shininess));
+		acc.selectiveAdd(valid, spec);
+	}
+	return acc;
+}
+#endif
+
+void Raytracer::lightScatter(const Ray &ray, const Shape *shape, int depth,
+	const Vector &P, const Vector &normal, bool from_inside, Colour &col)
+{
 	if (depth < max_depth)
 	{
 		Colour trans_col, refl_col;
-		Float trans = nearest_shape->material->transmissivity;
-		Float refl = nearest_shape->material->reflectivity;
+		Float trans = shape->material->transmissivity;
+		Float refl = shape->material->reflectivity;
 		const Float cos_i = - dot(normal, ray.dir);
 
 		// reflection
 		if (refl > 0.01)
 		{
-			Vector3 newdir = ray.dir + 2.0 * cos_i * normal;
+			Vector newdir = ray.dir + 2.0 * cos_i * normal;
 			Ray newray = Ray(P, newdir);
-			refl_col = raytrace(newray, depth + 1, nearest_shape);
+			refl_col = raytrace(newray, depth + 1, shape);
 		}
 
 		// refraction
@@ -176,14 +216,14 @@
 			Float n, n1, n2;
 			if (from_inside)
 			{
-				n1 = nearest_shape->material->refract_index;
+				n1 = shape->material->refract_index;
 				n2 = 1.0;
 				n = n1;
 			}
 			else
 			{
 				n1 = 1.0;
-				n2 = nearest_shape->material->refract_index;
+				n2 = shape->material->refract_index;
 				n = 1.0 / n2;
 			}
 			const Float sin2_t = n*n * (1 - cos_i*cos_i);
@@ -202,7 +242,7 @@
 				const Float R = (Rper*Rper + Rpar*Rpar)/2;
 				refl += R*trans;
 				trans = (1-R)*trans;
-				Vector3 newdir = n * ray.dir + (n*cos_i - cos_t) * normal;
+				Vector newdir = n * ray.dir + (n*cos_i - cos_t) * normal;
 				Ray newray = Ray(P + 0.001*newdir, newdir);
 				trans_col = raytrace(newray, depth + 1, NULL);
 			}
@@ -211,14 +251,15 @@
 	}
 
 	// ambient occlusion
-	if (!from_inside && ao_samples)
+	if (ao_samples && !from_inside)
 	{
 		Float miss = 0;
-		for (int i = 0; i < ao_samples; i++) {
-			Vector3 dir = SphereDistribute(i, ao_samples, ao_angle, normal);
+		for (int i = 0; i < ao_samples; i++)
+		{
+			Vector dir = SphereDistribute(i, ao_samples, ao_angle, normal);
 			Ray ao_ray = Ray(P, dir);
 			Float dist = ao_distance;
-			Shape *shape_in_way = top->nearest_intersection(nearest_shape, ao_ray, dist);
+			Shape *shape_in_way = top->nearest_intersection(shape, ao_ray, dist);
 			if (shape_in_way == NULL)
 				miss += 1.0;
 			else
@@ -227,11 +268,9 @@
 		Float ao_intensity = miss / ao_samples;
 		col = col * ao_intensity;
 	}
-
-	return col;
 }
 
-Colour Raytracer::raytrace(Ray &ray, int depth, Shape *origin_shape)
+Colour Raytracer::raytrace(Ray &ray, int depth, const Shape *origin_shape)
 {
 	Float nearest_distance = Inf;
 	Shape *nearest_shape = top->nearest_intersection(origin_shape, ray, nearest_distance);
@@ -239,23 +278,83 @@
 	if (nearest_shape == NULL)
 		return bg_colour;
 	else
-		return shader_evalulate(ray, depth, origin_shape, nearest_distance, nearest_shape);
+	{
+		const Vector P = ray.o + ray.dir * nearest_distance; // point of intersection
+		Vector normal = nearest_shape->normal(P);
+		bool from_inside = false;
+
+		// make shapes double sided
+		if (dot(normal, ray.dir) > 0.0)
+		{
+			normal = - normal;
+			from_inside = true;
+		}
+
+		// shading function
+		Colour col = PhongShader(nearest_shape, P, normal, ray.dir);
+		lightScatter(ray, nearest_shape, depth, P, normal, from_inside, col);
+		return col;
+	}
 }
 
+#ifndef NO_SSE
 void Raytracer::raytracePacket(RayPacket &rays, Colour *results)
 {
-	Float nearest_distances[4] = {Inf,Inf,Inf,Inf};
+	union {
+		float nearest_distances[4];
+		__m128 m_nearest_distances;
+	};
+	__m128 mask;
 	Shape *nearest_shapes[4];
 	static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL};
+	m_nearest_distances = mInf;
+	mask = mAllSet;
+
 	top->packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);
 
+	mask = _mm_cmpneq_ps(m_nearest_distances, mInf);
+	if (!_mm_movemask_ps(mask))
+	{
+		for (int i = 0; i < 4; i++)
+			results[i] = bg_colour;
+		return;
+	}
+
+	const VectorPacket P = rays.o + rays.dir * m_nearest_distances; // point of intersection
+
+	VectorPacket normal;
+	for (int i = 0; i < 4; i++)
+		if (nearest_shapes[i] != NULL)
+			normal.setVector(i, nearest_shapes[i]->normal(P.getVector(i)));
+
+	// make shapes double sided
+	__m128 from_inside = _mm_cmpgt_ps(dot(normal, rays.dir), mZero);
+	normal.mx = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mx)),
+		_mm_andnot_ps(from_inside, normal.mx));
+	normal.my = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.my)),
+		_mm_andnot_ps(from_inside, normal.my));
+	normal.mz = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mz)),
+		_mm_andnot_ps(from_inside, normal.mz));
+
+	// shading function
+	VectorPacket pres =
+		PhongShader_packet(const_cast<const Shape**>(nearest_shapes), P, normal, rays.dir);
+	//pres.mx = _mm_or_ps(_mm_and_ps(mask, pres.mx), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.r)));
+	//pres.my = _mm_or_ps(_mm_and_ps(mask, pres.my), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.g)));
+	//pres.mz = _mm_or_ps(_mm_and_ps(mask, pres.mz), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.b)));
+
 	for (int i = 0; i < 4; i++)
-		if (nearest_shapes[i] == NULL)
-			results[i] = bg_colour;
+		if (nearest_shapes[i] != NULL)
+		{
+			results[i] = pres.getVector(i);
+			lightScatter(rays[i], nearest_shapes[i], 0,
+				P.getVector(i), normal.getVector(i), (_mm_movemask_ps(from_inside)>>i)&1,
+				results[i]);
+		}
 		else
-			results[i] = shader_evalulate(rays[i], 0, NULL,
-				nearest_distances[i], nearest_shapes[i]);
+			results[i] = bg_colour;
 }
+#endif
 
 void *Raytracer::raytrace_worker(void *d)
 {
@@ -265,8 +364,10 @@
 	Colour my_colours[my_queue_size];
 	int my_count;
 	Ray ray;
+#ifndef NO_SSE
 	RayPacket rays;
 	const bool can_use_packets = (rt->use_packets && rt->sampler->packetableSamples());
+#endif
 	for (;;)
 	{
 		pthread_mutex_lock(&rt->sample_queue_mutex);
@@ -306,6 +407,7 @@
 		pthread_mutex_unlock(&rt->sample_queue_mutex);
 
 		// do the work
+#ifndef NO_SSE
 		if (can_use_packets)
 		{
 			// packet ray tracing
@@ -317,6 +419,7 @@
 			}
 		}
 		else
+#endif
 		{
 			// single ray tracing
 			for (int i = 0; i < my_count; i++)