src/raytracer.cc
author Radek Brich <radek.brich@devl.cz>
Sat, 10 May 2008 14:29:37 +0200
branchpyrit
changeset 95 ca7d4c665531
parent 93 96d65f841791
child 100 c005054bf4c1
permissions -rw-r--r--
build script fixes, add ldflags build option update and enhance demos fix bug in 4x grid oversampling warn if writePNG called while compiled without libpng make shapes in ShapeList const and add many other const needed due to snowball effect slightly optimize Camera::makeRayPacket using _mm_shuffle_ps make Vector SIMD vectorization disabled by default (causes problems) fix bug in implicit reflection of transmissive surfaces, when surface's reflection parameter is set to zero

/*
 * raytracer.cc: Raytracer class
 *
 * This file is part of Pyrit Ray Tracer.
 *
 * Copyright 2006, 2007, 2008  Radek Brich
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "raytracer.h"

// Hammersley spherical point distribution
// http://www.cse.cuhk.edu.hk/~ttwong/papers/udpoint/udpoints.html
Vector Raytracer::SphereDistribute(int i, int n, Float extent, const Vector &normal)
{
	Float p, t, st, phi, phirad;
	int kk;

	t = 0;
	for (p=0.5f, kk=i; kk; p*=0.5f, kk>>=1)
	if (kk & 1)
		t += p;
	t = 1.0f + (t - 1.0f)*extent;

	phi = (i + 0.5f) / n;
	phirad =  phi * 2.0f * PI;

	st = sqrt(1.0f - t*t);

	Float x, y, z, xx, yy, zz, q;
	x = st * cos(phirad);
	y = st * sin(phirad);
	z = t;

	// rotate against Y axis
	q = acos(normal.z);
	zz = z*cos(q) - x*sin(q);
	xx = z*sin(q) + x*cos(q);
	yy = y;

	// rotate against Z axis
	q = atan2f(normal.y, normal.x);
	x = xx*cos(q) - yy*sin(q);
	y = xx*sin(q) + yy*cos(q);
	z = zz;

	return Vector(x, y, z);
}

/*
 P is point of intersection,
 N normal in this point,
 R direction of reflected ray,
 V direction to the viewer
*/
Colour Raytracer::PhongShader(const Shape *shape,
	const Vector &P, const Vector &N, const Vector &V)
{
	Colour col, acc;
	Material * const &mat = shape->material;

	if (mat->texture)
		col = mat->texture->evaluate(P);
	else
		col = mat->colour;

	// ambient
	acc = mat->ambient * col;

	vector<Light*>::iterator light;
	for (light = lights.begin(); light != lights.end(); light++)
	{
		const Vector L = normalize((*light)->pos - P); // direction vector to light
		const Float L_dot_N = dot(L, N);
		if (L_dot_N > 0)
		{
			// test if this light is occluded (sharp shadows)
			if ((*light)->cast_shadows) {
				const Ray shadow_ray = Ray(P, L);
				Float dist = FLT_MAX;
				if (top->nearest_intersection(shape, shadow_ray, dist))
					continue;
			}

			const Vector R = L - 2.0f * L_dot_N * N;
			const Float R_dot_V = dot(R, V);

			// diffuse
			acc += mat->diffuse * col * (*light)->colour * L_dot_N;

			// specular
			if (R_dot_V > 0)
				acc += mat->specular * (*light)->colour * powf(R_dot_V, mat->shininess);
		}
	}

	return acc;
}

#ifndef NO_SIMD
VectorPacket Raytracer::PhongShader_packet(const Shape* const* shapes,
	const VectorPacket &P, const VectorPacket &N, const VectorPacket &V)
{
	VectorPacket acc, colour;
	union { mfloat4 ambient; float ambient_f[4]; };
	union { mfloat4 diffuse; float diffuse_f[4]; };
	union { mfloat4 specular; float specular_f[4]; };
	union { mfloat4 shininess; float shininess_f[4]; };

	for (int i = 0; i < 4; i++)
		if (shapes[i] == NULL)
		{
			ambient_f[i] = 0;
			diffuse_f[i] = 0;
			specular_f[i] = 0;
			shininess_f[i] = 0;
		}
		else
		{
			Material * const &mat = shapes[i]->material;
			if (mat->texture)
				colour.setVector(i, mat->texture->evaluate(P.getVector(i)));
			else
				colour.setVector(i, mat->colour);
			ambient_f[i] = mat->ambient;
			diffuse_f[i] = mat->diffuse;
			specular_f[i] = mat->specular;
			shininess_f[i] = mat->shininess;
		}

	// ambient
	acc = colour * ambient;

	const Shape *shadow_shapes[4];
	vector<Light*>::iterator light;
	for (light = lights.begin(); light != lights.end(); light++)
	{
		 // direction vector to light
		VectorPacket L = VectorPacket((*light)->pos) - P;
		L.normalize();
		const mfloat4 L_dot_N = dot(L, N);
		mfloat4 valid = mcmpgt(L_dot_N, mZero);

		// test if this light is occluded (sharp shadows)
		if ((*light)->cast_shadows)
		{
			const RayPacket shadow_rays = RayPacket(P, L);
			union { mfloat4 dists; float dists_f[4]; };
			dists = mInf;
			top->packet_intersection(shapes, shadow_rays,
				dists_f, shadow_shapes);
			valid = mand(valid, mcmpeq(dists, mInf));
		}

		const VectorPacket R = L - N * mmul(mTwo, L_dot_N);
		const mfloat4 R_dot_V = dot(R, V);

		// diffuse
		acc.selectiveAdd(valid,
			colour * VectorPacket((*light)->colour) * mmul(diffuse, L_dot_N));

		// specular
		valid = mand(valid, mcmpgt(R_dot_V, mZero));
		mfloat4 spec = mmul(mmul(specular, mset1((*light)->colour.r)),
			mfastpow(R_dot_V, shininess));
		acc.selectiveAdd(valid, spec);
	}
	return acc;
}
#endif

void Raytracer::lightScatter(const Ray &ray, const Shape *shape, int depth,
	const Vector &P, const Vector &normal, bool from_inside, Colour &col)
{
	if (depth < max_depth)
	{
		Colour trans_col, refl_col;
		Float trans = shape->material->transmissivity;
		Float refl = shape->material->reflectivity;
		const Float cos_i = - dot(normal, ray.dir);

		// refraction
		if (trans > 0.01)
		{
			Float n, n1, n2;
			if (from_inside)
			{
				n1 = shape->material->refract_index;
				n2 = 1.0;
				n = n1;
			}
			else
			{
				n1 = 1.0f;
				n2 = shape->material->refract_index;
				n = 1.0f / n2;
			}
			const Float sin2_t = n*n * (1 - cos_i*cos_i);
			if (sin2_t >= 1.0)
			{
					// totally reflected
					refl += trans;
					trans = 0;
			}
			else
			{
				const Float cos_t = sqrtf(1 - sin2_t);
				const Float Rdiv = 1.0f / (n1*cos_i + n2*cos_t);
				const Float Rper = (n1*cos_i - n2*cos_t)*Rdiv;
				const Float Rpar = (n2*cos_i - n1*cos_t)*Rdiv;
				const Float R = (Rper*Rper + Rpar*Rpar)/2;
				refl += R*trans;
				trans = (1-R)*trans;
				Vector newdir = n * ray.dir + (n*cos_i - cos_t) * normal;
				Ray newray = Ray(P + 0.001f*newdir, newdir);
				trans_col = raytrace(newray, depth + 1, NULL);
			}
		}

		// reflection
		if (refl > 0.01)
		{
			Vector newdir = ray.dir + 2.0f * cos_i * normal;
			Ray newray = Ray(P, newdir);
			refl_col = raytrace(newray, depth + 1, shape);
		}

		col = (1-refl-trans)*col + refl*refl_col + trans*trans_col;
	}

	// ambient occlusion
	if (ao_samples && !from_inside)
	{
		Float miss = 0;
		for (int i = 0; i < ao_samples; i++)
		{
			Vector dir = SphereDistribute(i, ao_samples, ao_angle, normal);
			Ray ao_ray = Ray(P, dir);
			Float dist = ao_distance;
			const Shape *shape_in_way = top->nearest_intersection(shape, ao_ray, dist);
			if (shape_in_way == NULL)
				miss += 1.0;
			else
				miss += dist / ao_distance;
		}
		Float ao_intensity = miss / ao_samples;
		col = col * ao_intensity;
	}
}

Colour Raytracer::raytrace(Ray &ray, int depth, const Shape *origin_shape)
{
	Float nearest_distance = Inf;
	const Shape *nearest_shape = top->nearest_intersection(origin_shape, ray, nearest_distance);

	if (nearest_shape == NULL)
		return bg_colour;
	else
	{
		const Vector P = ray.o + ray.dir * nearest_distance; // point of intersection
		Vector normal = nearest_shape->normal(P);
		bool from_inside = false;

		// make shapes double sided
		if (dot(normal, ray.dir) > 0.0)
		{
			normal = - normal;
			from_inside = true;
		}

		// shading function
		Colour col = PhongShader(nearest_shape, P, normal, ray.dir);
		lightScatter(ray, nearest_shape, depth, P, normal, from_inside, col);
		return col;
	}
}

#ifndef NO_SIMD
void Raytracer::raytracePacket(RayPacket &rays, Colour *results)
{
	union {
		float nearest_distances[4];
		mfloat4 m_nearest_distances;
	};
	mfloat4 mask;
	const Shape *nearest_shapes[4];
	static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL};
	m_nearest_distances = mInf;

	top->packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);

	mask = mcmpneq(m_nearest_distances, mInf);
	if (!mmovemask(mask))
	{
		for (int i = 0; i < 4; i++)
			results[i] = bg_colour;
		return;
	}

	const VectorPacket P = rays.o + rays.dir * m_nearest_distances; // point of intersection
	VectorPacket normal;
	for (int i = 0; i < 4; i++)
		if (nearest_shapes[i] != NULL)
		{
			const Vector Pvecti = P.getVector(i);
			normal.setVector(i, nearest_shapes[i]->normal(Pvecti));
		}

	// make shapes double sided
	mfloat4 from_inside = mcmpgt(dot(normal, rays.dir), mZero);
	normal.mx = mselect(from_inside, msub(mZero, normal.mx), normal.mx);
	normal.my = mselect(from_inside, msub(mZero, normal.my), normal.my);
	normal.mz = mselect(from_inside, msub(mZero, normal.mz), normal.mz);

	// shading function
	VectorPacket pres = PhongShader_packet(nearest_shapes, P, normal, rays.dir);
	//pres.mx = mselect(mask, pres.mx, mset1(bg_colour.r));
	//pres.my = mselect(mask, pres.my, mset1(bg_colour.g));
	//pres.mz = mselect(mask, pres.mz, mset1(bg_colour.b));

	for (int i = 0; i < 4; i++)
		if (nearest_shapes[i] != NULL)
		{
			results[i] = pres.getVector(i);
			lightScatter(rays[i], nearest_shapes[i], 0,
				P.getVector(i), normal.getVector(i), (mmovemask(from_inside)>>i)&1,
				results[i]);
		}
		else
			results[i] = bg_colour;
}
#endif

NORETURN void *Raytracer::raytrace_worker(void *d)
{
	static const int my_queue_size = 256;
	Raytracer *rt = (Raytracer*)d;
	Sample my_queue[my_queue_size];
	Colour my_colours[my_queue_size];
	int my_count;
	Ray ray;
#ifndef NO_SIMD
	RayPacket rays;
	const bool can_use_packets = (rt->use_packets && rt->sampler->packetableSamples());
#endif
	for (;;)
	{
		pthread_mutex_lock(&rt->sample_queue_mutex);
		while (rt->sample_queue_count == 0)
		{
			if (rt->end_of_samples)
			{
				dbgmsg(4, "T thread [%d] exiting\n", pthread_self());
				pthread_mutex_unlock(&rt->sample_queue_mutex);
				pthread_exit(NULL);
			}
			pthread_cond_wait(&rt->sample_queue_cond, &rt->sample_queue_mutex);
		}

		if (rt->sample_queue_count >= my_queue_size)
			my_count = my_queue_size;
		else
			my_count = rt->sample_queue_count;
		rt->sample_queue_count -= my_count;

		// copy samples to local queue
		if (rt->sample_queue_pos + my_count >= rt->sample_queue_size)
		{
			register int c = rt->sample_queue_size - rt->sample_queue_pos;
			memcpy(my_queue, rt->sample_queue + rt->sample_queue_pos, c*sizeof(Sample));
			memcpy(my_queue + c, rt->sample_queue, (my_count - c)*sizeof(Sample));
			rt->sample_queue_pos = my_count - c;
		}
		else
		{
			memcpy(my_queue, rt->sample_queue + rt->sample_queue_pos,
				my_count*sizeof(Sample));
			rt->sample_queue_pos += my_count;
		}
		if (rt->sample_queue_count <= my_queue_size*2)
			pthread_cond_signal(&rt->worker_ready_cond);
		pthread_mutex_unlock(&rt->sample_queue_mutex);

		// do the work
#ifndef NO_SIMD
		if (can_use_packets)
		{
			// packet ray tracing
			assert((my_count & 3) == 0);
			for (int i = 0; i < (my_count >> 2); i++)
			{
				rt->camera->makeRayPacket(my_queue + (i<<2), rays);
				rt->raytracePacket(rays, my_colours + (i<<2));
			}
		}
		else
#endif
		{
			// single ray tracing
			for (int i = 0; i < my_count; i++)
			{
				ray = rt->camera->makeRay(my_queue[i]);
				my_colours[i] = rt->raytrace(ray, 0, NULL);
			}
		}

		// save the results
		pthread_mutex_lock(&rt->sampler_mutex);
		for (int i = 0; i < my_count; i++)
			rt->sampler->saveSample(my_queue[i], my_colours[i]);
		pthread_mutex_unlock(&rt->sampler_mutex);
	}
}

void Raytracer::render()
{
	if (!sampler || !camera || !top)
		return;

	static const int my_count_max = 256;
	sample_queue_size = my_count_max*2*num_threads;
	sample_queue = new Sample [sample_queue_size];
	sample_queue_count = 0;
	int my_count, my_pos;
	int sampnum = 0, sampdone;
	int phase = 1;
	bool more_samples;

	sampler->init();

	// create workers
	dbgmsg(1, "* using %d threads\n", num_threads);
	pthread_t *threads = new pthread_t[num_threads];

	dbgmsg(1, "* raytracing...\n");

	while ( (sampnum = sampler->initSampleSet()) > 0 )
	{
		my_pos = 0;
		sample_queue_pos = 0;
		sampdone = 0;
		end_of_samples = false;

		for (int t = 0; t < num_threads; t++)
		{
			int rc = pthread_create(&threads[t], NULL, raytrace_worker, (void*)this);
			if (rc) {
				dbgmsg(0, "\nE pthread_create unsuccessful, return code was %d\n", rc);
				exit(1);
			}
		}

		dbgmsg(2, "- phase %d:  0%% done", phase);

		pthread_mutex_lock(&sampler_mutex);

		for (;;)
		{
			my_count = 0;
			while ( more_samples = sampler->nextSample(&sample_queue[my_pos++]) )
			{
				my_count++;
				if (my_pos >= sample_queue_size)
					my_pos = 0;
				if (my_count >= my_count_max)
					break;
			}
			if (!more_samples && !my_count)
				break;

			pthread_mutex_unlock(&sampler_mutex);
			pthread_mutex_lock(&sample_queue_mutex);
			sample_queue_count += my_count;

			// wait for workers if there is enough samples ready on queue
			while (sample_queue_count > (2*num_threads-1)*my_count_max)
				pthread_cond_wait(&worker_ready_cond, &sample_queue_mutex);

			pthread_cond_signal(&sample_queue_cond);
			pthread_mutex_unlock(&sample_queue_mutex);

			sampdone += my_count;
			dbgmsg(2, "\b\b\b\b\b\b\b\b%2d%% done", ((long)sampdone - sample_queue_count)*100/sampnum);

			pthread_mutex_lock(&sampler_mutex);

			if (!more_samples)
				break;
		}
		dbgmsg(2, "\b\b\b\b\b\b\b\b100%% done\n");

		pthread_mutex_unlock(&sampler_mutex);

		// wait for workers
		dbgmsg(2, "- waiting for threads to finish\n");

		pthread_mutex_lock(&sample_queue_mutex);
		end_of_samples = true;
		while (sample_queue_count)
		{
			pthread_cond_broadcast(&sample_queue_cond);
			pthread_mutex_unlock(&sample_queue_mutex);
			pthread_mutex_lock(&sample_queue_mutex);
		}
		pthread_cond_broadcast(&sample_queue_cond);
		pthread_mutex_unlock(&sample_queue_mutex);

		for (int t = 0; t < num_threads; t++)
			pthread_join(threads[t], NULL);

		phase ++;
	}

	delete[] threads;
	delete[] sample_queue;
}

void Raytracer::ambientOcclusion(int samples, Float distance, Float angle)
{
	ao_samples = samples;
	ao_distance = distance;
	ao_angle = angle;
	if (ao_distance == 0)
		/* 0 ==> Inf */
		ao_distance = Inf;
}