added bench.py demo which prints tree build and render times
added Vertex to python binding, NormalVertex is now its subclass
/*
* raytracer.cc: Raytracer class
*
* This file is part of Pyrit Ray Tracer.
*
* Copyright 2006, 2007, 2008 Radek Brich
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "raytracer.h"
// Hammersley spherical point distribution
// http://www.cse.cuhk.edu.hk/~ttwong/papers/udpoint/udpoints.html
Vector Raytracer::SphereDistribute(int i, int n, Float extent, const Vector &normal)
{
Float p, t, st, phi, phirad;
int kk;
t = 0;
for (p=0.5f, kk=i; kk; p*=0.5f, kk>>=1)
if (kk & 1)
t += p;
t = 1.0f + (t - 1.0f)*extent;
phi = (i + 0.5f) / n;
phirad = phi * 2.0f * PI;
st = sqrt(1.0f - t*t);
Float x, y, z, xx, yy, zz, q;
x = st * cos(phirad);
y = st * sin(phirad);
z = t;
// rotate against Y axis
q = acos(normal.z);
zz = z*cos(q) - x*sin(q);
xx = z*sin(q) + x*cos(q);
yy = y;
// rotate against Z axis
q = atan2f(normal.y, normal.x);
x = xx*cos(q) - yy*sin(q);
y = xx*sin(q) + yy*cos(q);
z = zz;
return Vector(x, y, z);
}
/*
P is point of intersection,
N normal in this point,
R direction of reflected ray,
V direction to the viewer
*/
Colour Raytracer::PhongShader(const Shape *shape,
const Vector &P, const Vector &N, const Vector &V)
{
Colour col, acc;
Material * const &mat = shape->material;
if (mat->texture)
col = mat->texture->evaluate(P);
else
col = mat->colour;
// ambient
acc = mat->ambient * col;
vector<Light*>::iterator light;
for (light = lights.begin(); light != lights.end(); light++)
{
const Vector L = normalize((*light)->pos - P); // direction vector to light
const Float L_dot_N = dot(L, N);
if (L_dot_N > 0)
{
// test if this light is occluded (sharp shadows)
if ((*light)->cast_shadows) {
const Ray shadow_ray = Ray(P, L);
Float dist = FLT_MAX;
if (top->nearest_intersection(shape, shadow_ray, dist))
continue;
}
const Vector R = L - 2.0f * L_dot_N * N;
const Float R_dot_V = dot(R, V);
// diffuse
acc += mat->diffuse * col * (*light)->colour * L_dot_N;
// specular
if (R_dot_V > 0)
acc += mat->specular * (*light)->colour * powf(R_dot_V, mat->shininess);
}
}
return acc;
}
#ifndef NO_SIMD
VectorPacket Raytracer::PhongShader_packet(const Shape* const* shapes,
const VectorPacket &P, const VectorPacket &N, const VectorPacket &V)
{
VectorPacket acc, colour;
union { mfloat4 ambient; float ambient_f[4]; };
union { mfloat4 diffuse; float diffuse_f[4]; };
union { mfloat4 specular; float specular_f[4]; };
union { mfloat4 shininess; float shininess_f[4]; };
for (int i = 0; i < 4; i++)
if (shapes[i] == NULL)
{
ambient_f[i] = 0;
diffuse_f[i] = 0;
specular_f[i] = 0;
shininess_f[i] = 0;
}
else
{
Material * const &mat = shapes[i]->material;
if (mat->texture)
colour.setVector(i, mat->texture->evaluate(P.getVector(i)));
else
colour.setVector(i, mat->colour);
ambient_f[i] = mat->ambient;
diffuse_f[i] = mat->diffuse;
specular_f[i] = mat->specular;
shininess_f[i] = mat->shininess;
}
// ambient
acc = colour * ambient;
const Shape *shadow_shapes[4];
vector<Light*>::iterator light;
for (light = lights.begin(); light != lights.end(); light++)
{
// direction vector to light
VectorPacket L = VectorPacket((*light)->pos) - P;
L.normalize();
const mfloat4 L_dot_N = dot(L, N);
mfloat4 valid = mcmpgt(L_dot_N, mZero);
// test if this light is occluded (sharp shadows)
if ((*light)->cast_shadows)
{
const RayPacket shadow_rays = RayPacket(P, L);
union { mfloat4 dists; float dists_f[4]; };
dists = mInf;
top->packet_intersection(shapes, shadow_rays,
dists_f, shadow_shapes);
valid = mand(valid, mcmpeq(dists, mInf));
}
const VectorPacket R = L - N * mmul(mTwo, L_dot_N);
const mfloat4 R_dot_V = dot(R, V);
// diffuse
acc.selectiveAdd(valid,
colour * VectorPacket((*light)->colour) * mmul(diffuse, L_dot_N));
// specular
valid = mand(valid, mcmpgt(R_dot_V, mZero));
mfloat4 spec = mmul(mmul(specular, mset1((*light)->colour.r)),
mfastpow(R_dot_V, shininess));
acc.selectiveAdd(valid, spec);
}
return acc;
}
#endif
void Raytracer::lightScatter(const Ray &ray, const Shape *shape, int depth,
const Vector &P, const Vector &normal, bool from_inside, Colour &col)
{
if (depth < max_depth)
{
Colour trans_col, refl_col;
Float trans = shape->material->transmissivity;
Float refl = shape->material->reflectivity;
const Float cos_i = - dot(normal, ray.dir);
// refraction
if (trans > 0.01)
{
Float n, n1, n2;
if (from_inside)
{
n1 = shape->material->refract_index;
n2 = 1.0;
n = n1;
}
else
{
n1 = 1.0f;
n2 = shape->material->refract_index;
n = 1.0f / n2;
}
const Float sin2_t = n*n * (1 - cos_i*cos_i);
if (sin2_t >= 1.0)
{
// totally reflected
refl += trans;
trans = 0;
}
else
{
const Float cos_t = sqrtf(1 - sin2_t);
const Float Rdiv = 1.0f / (n1*cos_i + n2*cos_t);
const Float Rper = (n1*cos_i - n2*cos_t)*Rdiv;
const Float Rpar = (n2*cos_i - n1*cos_t)*Rdiv;
const Float R = (Rper*Rper + Rpar*Rpar)/2;
refl += R*trans;
trans = (1-R)*trans;
Vector newdir = n * ray.dir + (n*cos_i - cos_t) * normal;
Ray newray = Ray(P + 0.001f*newdir, newdir);
trans_col = raytrace(newray, depth + 1, NULL);
}
}
// reflection
if (refl > 0.01)
{
Vector newdir = ray.dir + 2.0f * cos_i * normal;
Ray newray = Ray(P, newdir);
refl_col = raytrace(newray, depth + 1, shape);
}
col = (1-refl-trans)*col + refl*refl_col + trans*trans_col;
}
// ambient occlusion
if (ao_samples && !from_inside)
{
Float miss = 0;
for (int i = 0; i < ao_samples; i++)
{
Vector dir = SphereDistribute(i, ao_samples, ao_angle, normal);
Ray ao_ray = Ray(P, dir);
Float dist = ao_distance;
const Shape *shape_in_way = top->nearest_intersection(shape, ao_ray, dist);
if (shape_in_way == NULL)
miss += 1.0;
else
miss += dist / ao_distance;
}
Float ao_intensity = miss / ao_samples;
col = col * ao_intensity;
}
}
Colour Raytracer::raytrace(Ray &ray, int depth, const Shape *origin_shape)
{
Float nearest_distance = Inf;
const Shape *nearest_shape = top->nearest_intersection(origin_shape, ray, nearest_distance);
if (nearest_shape == NULL)
return bg_colour;
else
{
const Vector P = ray.o + ray.dir * nearest_distance; // point of intersection
Vector normal = nearest_shape->normal(P);
bool from_inside = false;
// make shapes double sided
if (dot(normal, ray.dir) > 0.0)
{
normal = - normal;
from_inside = true;
}
// shading function
Colour col = PhongShader(nearest_shape, P, normal, ray.dir);
lightScatter(ray, nearest_shape, depth, P, normal, from_inside, col);
return col;
}
}
#ifndef NO_SIMD
void Raytracer::raytracePacket(RayPacket &rays, Colour *results)
{
union {
float nearest_distances[4];
mfloat4 m_nearest_distances;
};
mfloat4 mask;
const Shape *nearest_shapes[4];
static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL};
m_nearest_distances = mInf;
top->packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);
mask = mcmpneq(m_nearest_distances, mInf);
if (!mmovemask(mask))
{
for (int i = 0; i < 4; i++)
results[i] = bg_colour;
return;
}
const VectorPacket P = rays.o + rays.dir * m_nearest_distances; // point of intersection
VectorPacket normal;
for (int i = 0; i < 4; i++)
if (nearest_shapes[i] != NULL)
{
const Vector Pvecti = P.getVector(i);
normal.setVector(i, nearest_shapes[i]->normal(Pvecti));
}
// make shapes double sided
mfloat4 from_inside = mcmpgt(dot(normal, rays.dir), mZero);
normal.mx = mselect(from_inside, msub(mZero, normal.mx), normal.mx);
normal.my = mselect(from_inside, msub(mZero, normal.my), normal.my);
normal.mz = mselect(from_inside, msub(mZero, normal.mz), normal.mz);
// shading function
VectorPacket pres = PhongShader_packet(nearest_shapes, P, normal, rays.dir);
//pres.mx = mselect(mask, pres.mx, mset1(bg_colour.r));
//pres.my = mselect(mask, pres.my, mset1(bg_colour.g));
//pres.mz = mselect(mask, pres.mz, mset1(bg_colour.b));
for (int i = 0; i < 4; i++)
if (nearest_shapes[i] != NULL)
{
results[i] = pres.getVector(i);
lightScatter(rays[i], nearest_shapes[i], 0,
P.getVector(i), normal.getVector(i), (mmovemask(from_inside)>>i)&1,
results[i]);
}
else
results[i] = bg_colour;
}
#endif
NORETURN void *Raytracer::raytrace_worker(void *d)
{
static const int my_queue_size = 256;
Raytracer *rt = (Raytracer*)d;
Sample my_queue[my_queue_size];
Colour my_colours[my_queue_size];
int my_count;
Ray ray;
#ifndef NO_SIMD
RayPacket rays;
const bool can_use_packets = (rt->use_packets && rt->sampler->packetableSamples());
#endif
for (;;)
{
pthread_mutex_lock(&rt->sample_queue_mutex);
while (rt->sample_queue_count == 0)
{
if (rt->end_of_samples)
{
dbgmsg(4, "T thread [%d] exiting\n", pthread_self());
pthread_mutex_unlock(&rt->sample_queue_mutex);
pthread_exit(NULL);
}
pthread_cond_wait(&rt->sample_queue_cond, &rt->sample_queue_mutex);
}
if (rt->sample_queue_count >= my_queue_size)
my_count = my_queue_size;
else
my_count = rt->sample_queue_count;
rt->sample_queue_count -= my_count;
// copy samples to local queue
if (rt->sample_queue_pos + my_count >= rt->sample_queue_size)
{
register int c = rt->sample_queue_size - rt->sample_queue_pos;
memcpy(my_queue, rt->sample_queue + rt->sample_queue_pos, c*sizeof(Sample));
memcpy(my_queue + c, rt->sample_queue, (my_count - c)*sizeof(Sample));
rt->sample_queue_pos = my_count - c;
}
else
{
memcpy(my_queue, rt->sample_queue + rt->sample_queue_pos,
my_count*sizeof(Sample));
rt->sample_queue_pos += my_count;
}
if (rt->sample_queue_count <= my_queue_size*2)
pthread_cond_signal(&rt->worker_ready_cond);
pthread_mutex_unlock(&rt->sample_queue_mutex);
// do the work
#ifndef NO_SIMD
if (can_use_packets)
{
// packet ray tracing
assert((my_count & 3) == 0);
for (int i = 0; i < (my_count >> 2); i++)
{
rt->camera->makeRayPacket(my_queue + (i<<2), rays);
rt->raytracePacket(rays, my_colours + (i<<2));
}
}
else
#endif
{
// single ray tracing
for (int i = 0; i < my_count; i++)
{
ray = rt->camera->makeRay(my_queue[i]);
my_colours[i] = rt->raytrace(ray, 0, NULL);
}
}
// save the results
pthread_mutex_lock(&rt->sampler_mutex);
for (int i = 0; i < my_count; i++)
rt->sampler->saveSample(my_queue[i], my_colours[i]);
pthread_mutex_unlock(&rt->sampler_mutex);
}
}
void Raytracer::render()
{
if (!sampler || !camera || !top)
return;
static const int my_count_max = 256;
sample_queue_size = my_count_max*2*num_threads;
sample_queue = new Sample [sample_queue_size];
sample_queue_count = 0;
int my_count, my_pos;
int sampnum = 0, sampdone;
int phase = 1;
bool more_samples;
sampler->init();
// create workers
dbgmsg(1, "* using %d threads\n", num_threads);
pthread_t *threads = new pthread_t[num_threads];
dbgmsg(1, "* raytracing...\n");
while ( (sampnum = sampler->initSampleSet()) > 0 )
{
my_pos = 0;
sample_queue_pos = 0;
sampdone = 0;
end_of_samples = false;
for (int t = 0; t < num_threads; t++)
{
int rc = pthread_create(&threads[t], NULL, raytrace_worker, (void*)this);
if (rc) {
dbgmsg(0, "\nE pthread_create unsuccessful, return code was %d\n", rc);
exit(1);
}
}
dbgmsg(2, "- phase %d: 0%% done", phase);
pthread_mutex_lock(&sampler_mutex);
for (;;)
{
my_count = 0;
while ( more_samples = sampler->nextSample(&sample_queue[my_pos++]) )
{
my_count++;
if (my_pos >= sample_queue_size)
my_pos = 0;
if (my_count >= my_count_max)
break;
}
if (!more_samples && !my_count)
break;
pthread_mutex_unlock(&sampler_mutex);
pthread_mutex_lock(&sample_queue_mutex);
sample_queue_count += my_count;
// wait for workers if there is enough samples ready on queue
while (sample_queue_count > (2*num_threads-1)*my_count_max)
pthread_cond_wait(&worker_ready_cond, &sample_queue_mutex);
pthread_cond_signal(&sample_queue_cond);
pthread_mutex_unlock(&sample_queue_mutex);
sampdone += my_count;
dbgmsg(2, "\b\b\b\b\b\b\b\b%2d%% done", ((long)sampdone - sample_queue_count)*100/sampnum);
pthread_mutex_lock(&sampler_mutex);
if (!more_samples)
break;
}
dbgmsg(2, "\b\b\b\b\b\b\b\b100%% done\n");
pthread_mutex_unlock(&sampler_mutex);
// wait for workers
dbgmsg(2, "- waiting for threads to finish\n");
pthread_mutex_lock(&sample_queue_mutex);
end_of_samples = true;
while (sample_queue_count)
{
pthread_cond_broadcast(&sample_queue_cond);
pthread_mutex_unlock(&sample_queue_mutex);
pthread_mutex_lock(&sample_queue_mutex);
}
pthread_cond_broadcast(&sample_queue_cond);
pthread_mutex_unlock(&sample_queue_mutex);
for (int t = 0; t < num_threads; t++)
pthread_join(threads[t], NULL);
phase ++;
}
delete[] threads;
delete[] sample_queue;
}
void Raytracer::ambientOcclusion(int samples, Float distance, Float angle)
{
ao_samples = samples;
ao_distance = distance;
ao_angle = angle;
if (ao_distance == 0)
/* 0 ==> Inf */
ao_distance = Inf;
}