src/raytracer.cc
author Radek Brich <radek.brich@devl.cz>
Fri, 02 May 2008 13:27:47 +0200 (2008-05-02)
branchpyrit
changeset 91 9d66d323c354
parent 90 f6a72eb99631
child 92 9af5c039b678
permissions -rw-r--r--
packetize Phong shader new scons config options: simd=(yes|no) - allow/suppress explicit SSE force_flags=(yes|no) - force use of specified flags instead of autodetected profile=(yes|no) - enable gcc's profiling (-pg option) check for pthread.h header, don't try to build without it add fourth Vector3 component for better memory aligning rename Vector3 to Vector partialy SSE-ize Vector class (only fully vertical operations) build static lib and python module in distinctive directories to avoid collision of library file names on some platforms
/*
 * raytracer.cc: Raytracer class
 *
 * This file is part of Pyrit Ray Tracer.
 *
 * Copyright 2006, 2007, 2008  Radek Brich
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "raytracer.h"

int pyrit_verbosity = 2;

// Hammersley spherical point distribution
// http://www.cse.cuhk.edu.hk/~ttwong/papers/udpoint/udpoints.html
Vector Raytracer::SphereDistribute(int i, int n, Float extent, const Vector &normal)
{
	Float p, t, st, phi, phirad;
	int kk;

	t = 0;
	for (p=0.5, kk=i; kk; p*=0.5, kk>>=1)
	if (kk & 1)
		t += p;
	t = 1.0 + (t - 1.0)*extent;

	phi = (i + 0.5) / n;
	phirad =  phi * 2.0 * M_PI;

	st = sqrt(1.0 - t*t);

	Float x, y, z, xx, yy, zz, q;
	x = st * cos(phirad);
	y = st * sin(phirad);
	z = t;

	// rotate against Y axis
	q = acos(normal.z);
	zz = z*cos(q) - x*sin(q);
	xx = z*sin(q) + x*cos(q);
	yy = y;

	// rotate against Z axis
	q = atan2f(normal.y, normal.x);
	x = xx*cos(q) - yy*sin(q);
	y = xx*sin(q) + yy*cos(q);
	z = zz;

	return Vector(x, y, z);
}

/*
 P is point of intersection,
 N normal in this point,
 R direction of reflected ray,
 V direction to the viewer
*/
Colour Raytracer::PhongShader(const Shape *shape,
	const Vector &P, const Vector &N, const Vector &V)
{
	Colour col, acc;
	Material * const &mat = shape->material;

	if (mat->texture)
		col = mat->texture->evaluate(P);
	else
		col = mat->colour;

	// ambient
	acc = mat->ambient * col;

	vector<Light*>::iterator light;
	for (light = lights.begin(); light != lights.end(); light++)
	{
		const Vector L = normalize((*light)->pos - P); // direction vector to light
		const Float L_dot_N = dot(L, N);
		if (L_dot_N > 0)
		{
			// test if this light is occluded (sharp shadows)
			if ((*light)->cast_shadows) {
				const Ray shadow_ray = Ray(P, L);
				Float dist = FLT_MAX;
				if (top->nearest_intersection(shape, shadow_ray, dist))
					continue;
			}

			const Vector R = L - 2.0 * L_dot_N * N;
			const Float R_dot_V = dot(R, V);

			// diffuse
			acc += mat->diffuse * col * (*light)->colour * L_dot_N;

			// specular
			if (R_dot_V > 0)
				acc += mat->specular * (*light)->colour * powf(R_dot_V, mat->shininess);
		}
	}

	return acc;
}

#ifndef NO_SSE
VectorPacket Raytracer::PhongShader_packet(const Shape **shapes,
	const VectorPacket &P, const VectorPacket &N, const VectorPacket &V)
{
	VectorPacket acc, colour;
	union { __m128 ambient; float ambient_f[4]; };
	union { __m128 diffuse; float diffuse_f[4]; };
	union { __m128 specular; float specular_f[4]; };
	union { __m128 shininess; float shininess_f[4]; };

	for (int i = 0; i < 4; i++)
		if (shapes[i] == NULL)
		{
			ambient_f[i] = 0;
			diffuse_f[i] = 0;
			specular_f[i] = 0;
			shininess_f[i] = 0;
		}
		else
		{
			Material * const &mat = shapes[i]->material;
			if (mat->texture)
				colour.setVector(i, mat->texture->evaluate(P.getVector(i)));
			else
				colour.setVector(i, mat->colour);
			ambient_f[i] = mat->ambient;
			diffuse_f[i] = mat->diffuse;
			specular_f[i] = mat->specular;
			shininess_f[i] = mat->shininess;
		}

	// ambient
	acc = colour * ambient;

	Shape **shadow_shapes;
	vector<Light*>::iterator light;
	for (light = lights.begin(); light != lights.end(); light++)
	{
		 // direction vector to light
		VectorPacket L = VectorPacket((*light)->pos) - P;
		L.normalize();
		const __m128 L_dot_N = dot(L, N);
		__m128 valid = _mm_cmpgt_ps(L_dot_N, mZero);

		// test if this light is occluded (sharp shadows)
		if ((*light)->cast_shadows)
		{
			const RayPacket shadow_rays = RayPacket(P, L);
			union { __m128 dists; float dists_f[4]; };
			dists = mInf;
			top->packet_intersection(shapes, shadow_rays,
				dists_f, shadow_shapes);
			valid = _mm_and_ps(valid, _mm_cmpeq_ps(dists, mInf));
		}

		const VectorPacket R = L - N * _mm_mul_ps(mTwo, L_dot_N);
		const __m128 R_dot_V = dot(R, V);

		// diffuse
		acc.selectiveAdd(valid,
			colour * VectorPacket((*light)->colour) * _mm_mul_ps(diffuse, L_dot_N));

		// specular
		valid = _mm_and_ps(valid, _mm_cmpgt_ps(R_dot_V, mZero));
		__m128 spec = _mm_mul_ps(_mm_mul_ps(specular, _mm_set_ps1((*light)->colour.r)),
			_mm_fastpow(R_dot_V, shininess));
		acc.selectiveAdd(valid, spec);
	}
	return acc;
}
#endif

void Raytracer::lightScatter(const Ray &ray, const Shape *shape, int depth,
	const Vector &P, const Vector &normal, bool from_inside, Colour &col)
{
	if (depth < max_depth)
	{
		Colour trans_col, refl_col;
		Float trans = shape->material->transmissivity;
		Float refl = shape->material->reflectivity;
		const Float cos_i = - dot(normal, ray.dir);

		// reflection
		if (refl > 0.01)
		{
			Vector newdir = ray.dir + 2.0 * cos_i * normal;
			Ray newray = Ray(P, newdir);
			refl_col = raytrace(newray, depth + 1, shape);
		}

		// refraction
		if (trans > 0.01)
		{
			Float n, n1, n2;
			if (from_inside)
			{
				n1 = shape->material->refract_index;
				n2 = 1.0;
				n = n1;
			}
			else
			{
				n1 = 1.0;
				n2 = shape->material->refract_index;
				n = 1.0 / n2;
			}
			const Float sin2_t = n*n * (1 - cos_i*cos_i);
			if (sin2_t >= 1.0)
			{
					// totally reflected
					refl += trans;
					trans = 0;
			}
			else
			{
				const Float cos_t = sqrtf(1 - sin2_t);
				const Float Rdiv = 1.0/(n1*cos_i + n2*cos_t);
				const Float Rper = (n1*cos_i - n2*cos_t)*Rdiv;
				const Float Rpar = (n2*cos_i - n1*cos_t)*Rdiv;
				const Float R = (Rper*Rper + Rpar*Rpar)/2;
				refl += R*trans;
				trans = (1-R)*trans;
				Vector newdir = n * ray.dir + (n*cos_i - cos_t) * normal;
				Ray newray = Ray(P + 0.001*newdir, newdir);
				trans_col = raytrace(newray, depth + 1, NULL);
			}
		}
		col = (1-refl-trans)*col + refl*refl_col + trans*trans_col;
	}

	// ambient occlusion
	if (ao_samples && !from_inside)
	{
		Float miss = 0;
		for (int i = 0; i < ao_samples; i++)
		{
			Vector dir = SphereDistribute(i, ao_samples, ao_angle, normal);
			Ray ao_ray = Ray(P, dir);
			Float dist = ao_distance;
			Shape *shape_in_way = top->nearest_intersection(shape, ao_ray, dist);
			if (shape_in_way == NULL)
				miss += 1.0;
			else
				miss += dist / ao_distance;
		}
		Float ao_intensity = miss / ao_samples;
		col = col * ao_intensity;
	}
}

Colour Raytracer::raytrace(Ray &ray, int depth, const Shape *origin_shape)
{
	Float nearest_distance = Inf;
	Shape *nearest_shape = top->nearest_intersection(origin_shape, ray, nearest_distance);

	if (nearest_shape == NULL)
		return bg_colour;
	else
	{
		const Vector P = ray.o + ray.dir * nearest_distance; // point of intersection
		Vector normal = nearest_shape->normal(P);
		bool from_inside = false;

		// make shapes double sided
		if (dot(normal, ray.dir) > 0.0)
		{
			normal = - normal;
			from_inside = true;
		}

		// shading function
		Colour col = PhongShader(nearest_shape, P, normal, ray.dir);
		lightScatter(ray, nearest_shape, depth, P, normal, from_inside, col);
		return col;
	}
}

#ifndef NO_SSE
void Raytracer::raytracePacket(RayPacket &rays, Colour *results)
{
	union {
		float nearest_distances[4];
		__m128 m_nearest_distances;
	};
	__m128 mask;
	Shape *nearest_shapes[4];
	static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL};
	m_nearest_distances = mInf;
	mask = mAllSet;

	top->packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);

	mask = _mm_cmpneq_ps(m_nearest_distances, mInf);
	if (!_mm_movemask_ps(mask))
	{
		for (int i = 0; i < 4; i++)
			results[i] = bg_colour;
		return;
	}

	const VectorPacket P = rays.o + rays.dir * m_nearest_distances; // point of intersection

	VectorPacket normal;
	for (int i = 0; i < 4; i++)
		if (nearest_shapes[i] != NULL)
			normal.setVector(i, nearest_shapes[i]->normal(P.getVector(i)));

	// make shapes double sided
	__m128 from_inside = _mm_cmpgt_ps(dot(normal, rays.dir), mZero);
	normal.mx = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mx)),
		_mm_andnot_ps(from_inside, normal.mx));
	normal.my = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.my)),
		_mm_andnot_ps(from_inside, normal.my));
	normal.mz = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mz)),
		_mm_andnot_ps(from_inside, normal.mz));

	// shading function
	VectorPacket pres =
		PhongShader_packet(const_cast<const Shape**>(nearest_shapes), P, normal, rays.dir);
	//pres.mx = _mm_or_ps(_mm_and_ps(mask, pres.mx), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.r)));
	//pres.my = _mm_or_ps(_mm_and_ps(mask, pres.my), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.g)));
	//pres.mz = _mm_or_ps(_mm_and_ps(mask, pres.mz), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.b)));

	for (int i = 0; i < 4; i++)
		if (nearest_shapes[i] != NULL)
		{
			results[i] = pres.getVector(i);
			lightScatter(rays[i], nearest_shapes[i], 0,
				P.getVector(i), normal.getVector(i), (_mm_movemask_ps(from_inside)>>i)&1,
				results[i]);
		}
		else
			results[i] = bg_colour;
}
#endif

void *Raytracer::raytrace_worker(void *d)
{
	static const int my_queue_size = 256;
	Raytracer *rt = (Raytracer*)d;
	Sample my_queue[my_queue_size];
	Colour my_colours[my_queue_size];
	int my_count;
	Ray ray;
#ifndef NO_SSE
	RayPacket rays;
	const bool can_use_packets = (rt->use_packets && rt->sampler->packetableSamples());
#endif
	for (;;)
	{
		pthread_mutex_lock(&rt->sample_queue_mutex);
		while (rt->sample_queue_count == 0)
		{
			if (rt->end_of_samples)
			{
				dbgmsg(4, "T thread [%d] exiting\n", pthread_self());
				pthread_mutex_unlock(&rt->sample_queue_mutex);
				pthread_exit(NULL);
			}
			pthread_cond_wait(&rt->sample_queue_cond, &rt->sample_queue_mutex);
		}

		if (rt->sample_queue_count >= my_queue_size)
			my_count = my_queue_size;
		else
			my_count = rt->sample_queue_count;
		rt->sample_queue_count -= my_count;

		// copy samples to local queue
		if (rt->sample_queue_pos + my_count >= rt->sample_queue_size)
		{
			register int c = rt->sample_queue_size - rt->sample_queue_pos;
			memcpy(my_queue, rt->sample_queue + rt->sample_queue_pos, c*sizeof(Sample));
			memcpy(my_queue + c, rt->sample_queue, (my_count - c)*sizeof(Sample));
			rt->sample_queue_pos = my_count - c;
		}
		else
		{
			memcpy(my_queue, rt->sample_queue + rt->sample_queue_pos,
				my_count*sizeof(Sample));
			rt->sample_queue_pos += my_count;
		}
		if (rt->sample_queue_count <= my_queue_size*2)
			pthread_cond_signal(&rt->worker_ready_cond);
		pthread_mutex_unlock(&rt->sample_queue_mutex);

		// do the work
#ifndef NO_SSE
		if (can_use_packets)
		{
			// packet ray tracing
			assert((my_count % 4) == 0);
			for (int i = 0; i < my_count; i+=4)
			{
				rt->camera->makeRayPacket(my_queue + i, rays);
				rt->raytracePacket(rays, my_colours + i);
			}
		}
		else
#endif
		{
			// single ray tracing
			for (int i = 0; i < my_count; i++)
			{
				ray = rt->camera->makeRay(my_queue[i]);
				my_colours[i] = rt->raytrace(ray, 0, NULL);
			}
		}

		// save the results
		pthread_mutex_lock(&rt->sampler_mutex);
		for (int i = 0; i < my_count; i++)
			rt->sampler->saveSample(my_queue[i], my_colours[i]);
		pthread_mutex_unlock(&rt->sampler_mutex);
	}
}

void Raytracer::render()
{
	if (!sampler || !camera || !top)
		return;

	static const int my_count_max = 256;
	sample_queue_size = my_count_max*2*num_threads;
	sample_queue = new Sample [sample_queue_size];
	sample_queue_count = 0;
	int my_count, my_pos;
	int sampnum = 0, sampdone;
	int phase = 1;
	bool more_samples;

	sampler->init();

	// create workers
	dbgmsg(1, "* using %d threads\n", num_threads);
	pthread_t threads[num_threads];

	dbgmsg(1, "* raytracing...\n");

	while ( (sampnum = sampler->initSampleSet()) > 0 )
	{
		my_pos = 0;
		sample_queue_pos = 0;
		sampdone = 0;
		end_of_samples = false;

		for (int t = 0; t < num_threads; t++)
		{
			int rc = pthread_create(&threads[t], NULL, raytrace_worker, (void*)this);
			if (rc) {
				dbgmsg(0, "\nE pthread_create unsuccessful, return code was %d\n", rc);
				exit(1);
			}
		}

		dbgmsg(2, "- phase %d:  0%% done", phase);

		pthread_mutex_lock(&sampler_mutex);

		for (;;)
		{
			my_count = 0;
			while ( more_samples = sampler->nextSample(&sample_queue[my_pos++]) )
			{
				my_count++;
				if (my_pos >= sample_queue_size)
					my_pos = 0;
				if (my_count >= my_count_max)
					break;
			}
			if (!more_samples && !my_count)
				break;

			pthread_mutex_unlock(&sampler_mutex);
			pthread_mutex_lock(&sample_queue_mutex);
			sample_queue_count += my_count;

			// wait for workers if there is enough samples ready on queue
			while (sample_queue_count > (2*num_threads-1)*my_count_max)
				pthread_cond_wait(&worker_ready_cond, &sample_queue_mutex);

			pthread_cond_signal(&sample_queue_cond);
			pthread_mutex_unlock(&sample_queue_mutex);

			sampdone += my_count;
			dbgmsg(2, "\b\b\b\b\b\b\b\b%2d%% done", ((long)sampdone - sample_queue_count)*100/sampnum);

			pthread_mutex_lock(&sampler_mutex);

			if (!more_samples)
				break;
		}
		dbgmsg(2, "\b\b\b\b\b\b\b\b100%% done\n");

		pthread_mutex_unlock(&sampler_mutex);

		// wait for workers
		dbgmsg(2, "- waiting for threads to finish\n");

		pthread_mutex_lock(&sample_queue_mutex);
		end_of_samples = true;
		while (sample_queue_count)
		{
			pthread_cond_broadcast(&sample_queue_cond);
			pthread_mutex_unlock(&sample_queue_mutex);
			pthread_mutex_lock(&sample_queue_mutex);
		}
		pthread_cond_broadcast(&sample_queue_cond);
		pthread_mutex_unlock(&sample_queue_mutex);

		for (int t = 0; t < num_threads; t++)
			pthread_join(threads[t], NULL);

		phase ++;
	}

	delete[] sample_queue;
}

void Raytracer::ambientOcclusion(int samples, Float distance, Float angle)
{
	ao_samples = samples;
	ao_distance = distance;
	ao_angle = angle;
	if (ao_distance == 0)
		/* 0 ==> Inf */
		ao_distance = Inf;
}