build script fixes, add ldflags build option
update and enhance demos
fix bug in 4x grid oversampling
warn if writePNG called while compiled without libpng
make shapes in ShapeList const
and add many other const needed due to snowball effect
slightly optimize Camera::makeRayPacket using _mm_shuffle_ps
make Vector SIMD vectorization disabled by default (causes problems)
fix bug in implicit reflection of transmissive surfaces,
when surface's reflection parameter is set to zero
/*
* raytracer.cc: Raytracer class
*
* This file is part of Pyrit Ray Tracer.
*
* Copyright 2006, 2007, 2008 Radek Brich
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "raytracer.h"
// Hammersley spherical point distribution
// http://www.cse.cuhk.edu.hk/~ttwong/papers/udpoint/udpoints.html
Vector Raytracer::SphereDistribute(int i, int n, Float extent, const Vector &normal)
{
Float p, t, st, phi, phirad;
int kk;
t = 0;
for (p=0.5f, kk=i; kk; p*=0.5f, kk>>=1)
if (kk & 1)
t += p;
t = 1.0f + (t - 1.0f)*extent;
phi = (i + 0.5f) / n;
phirad = phi * 2.0f * PI;
st = sqrt(1.0f - t*t);
Float x, y, z, xx, yy, zz, q;
x = st * cos(phirad);
y = st * sin(phirad);
z = t;
// rotate against Y axis
q = acos(normal.z);
zz = z*cos(q) - x*sin(q);
xx = z*sin(q) + x*cos(q);
yy = y;
// rotate against Z axis
q = atan2f(normal.y, normal.x);
x = xx*cos(q) - yy*sin(q);
y = xx*sin(q) + yy*cos(q);
z = zz;
return Vector(x, y, z);
}
/*
P is point of intersection,
N normal in this point,
R direction of reflected ray,
V direction to the viewer
*/
Colour Raytracer::PhongShader(const Shape *shape,
const Vector &P, const Vector &N, const Vector &V)
{
Colour col, acc;
Material * const &mat = shape->material;
if (mat->texture)
col = mat->texture->evaluate(P);
else
col = mat->colour;
// ambient
acc = mat->ambient * col;
vector<Light*>::iterator light;
for (light = lights.begin(); light != lights.end(); light++)
{
const Vector L = normalize((*light)->pos - P); // direction vector to light
const Float L_dot_N = dot(L, N);
if (L_dot_N > 0)
{
// test if this light is occluded (sharp shadows)
if ((*light)->cast_shadows) {
const Ray shadow_ray = Ray(P, L);
Float dist = FLT_MAX;
if (top->nearest_intersection(shape, shadow_ray, dist))
continue;
}
const Vector R = L - 2.0f * L_dot_N * N;
const Float R_dot_V = dot(R, V);
// diffuse
acc += mat->diffuse * col * (*light)->colour * L_dot_N;
// specular
if (R_dot_V > 0)
acc += mat->specular * (*light)->colour * powf(R_dot_V, mat->shininess);
}
}
return acc;
}
#ifndef NO_SIMD
VectorPacket Raytracer::PhongShader_packet(const Shape* const* shapes,
const VectorPacket &P, const VectorPacket &N, const VectorPacket &V)
{
VectorPacket acc, colour;
union { mfloat4 ambient; float ambient_f[4]; };
union { mfloat4 diffuse; float diffuse_f[4]; };
union { mfloat4 specular; float specular_f[4]; };
union { mfloat4 shininess; float shininess_f[4]; };
for (int i = 0; i < 4; i++)
if (shapes[i] == NULL)
{
ambient_f[i] = 0;
diffuse_f[i] = 0;
specular_f[i] = 0;
shininess_f[i] = 0;
}
else
{
Material * const &mat = shapes[i]->material;
if (mat->texture)
colour.setVector(i, mat->texture->evaluate(P.getVector(i)));
else
colour.setVector(i, mat->colour);
ambient_f[i] = mat->ambient;
diffuse_f[i] = mat->diffuse;
specular_f[i] = mat->specular;
shininess_f[i] = mat->shininess;
}
// ambient
acc = colour * ambient;
const Shape *shadow_shapes[4];
vector<Light*>::iterator light;
for (light = lights.begin(); light != lights.end(); light++)
{
// direction vector to light
VectorPacket L = VectorPacket((*light)->pos) - P;
L.normalize();
const mfloat4 L_dot_N = dot(L, N);
mfloat4 valid = mcmpgt(L_dot_N, mZero);
// test if this light is occluded (sharp shadows)
if ((*light)->cast_shadows)
{
const RayPacket shadow_rays = RayPacket(P, L);
union { mfloat4 dists; float dists_f[4]; };
dists = mInf;
top->packet_intersection(shapes, shadow_rays,
dists_f, shadow_shapes);
valid = mand(valid, mcmpeq(dists, mInf));
}
const VectorPacket R = L - N * mmul(mTwo, L_dot_N);
const mfloat4 R_dot_V = dot(R, V);
// diffuse
acc.selectiveAdd(valid,
colour * VectorPacket((*light)->colour) * mmul(diffuse, L_dot_N));
// specular
valid = mand(valid, mcmpgt(R_dot_V, mZero));
mfloat4 spec = mmul(mmul(specular, mset1((*light)->colour.r)),
mfastpow(R_dot_V, shininess));
acc.selectiveAdd(valid, spec);
}
return acc;
}
#endif
void Raytracer::lightScatter(const Ray &ray, const Shape *shape, int depth,
const Vector &P, const Vector &normal, bool from_inside, Colour &col)
{
if (depth < max_depth)
{
Colour trans_col, refl_col;
Float trans = shape->material->transmissivity;
Float refl = shape->material->reflectivity;
const Float cos_i = - dot(normal, ray.dir);
// refraction
if (trans > 0.01)
{
Float n, n1, n2;
if (from_inside)
{
n1 = shape->material->refract_index;
n2 = 1.0;
n = n1;
}
else
{
n1 = 1.0f;
n2 = shape->material->refract_index;
n = 1.0f / n2;
}
const Float sin2_t = n*n * (1 - cos_i*cos_i);
if (sin2_t >= 1.0)
{
// totally reflected
refl += trans;
trans = 0;
}
else
{
const Float cos_t = sqrtf(1 - sin2_t);
const Float Rdiv = 1.0f / (n1*cos_i + n2*cos_t);
const Float Rper = (n1*cos_i - n2*cos_t)*Rdiv;
const Float Rpar = (n2*cos_i - n1*cos_t)*Rdiv;
const Float R = (Rper*Rper + Rpar*Rpar)/2;
refl += R*trans;
trans = (1-R)*trans;
Vector newdir = n * ray.dir + (n*cos_i - cos_t) * normal;
Ray newray = Ray(P + 0.001f*newdir, newdir);
trans_col = raytrace(newray, depth + 1, NULL);
}
}
// reflection
if (refl > 0.01)
{
Vector newdir = ray.dir + 2.0f * cos_i * normal;
Ray newray = Ray(P, newdir);
refl_col = raytrace(newray, depth + 1, shape);
}
col = (1-refl-trans)*col + refl*refl_col + trans*trans_col;
}
// ambient occlusion
if (ao_samples && !from_inside)
{
Float miss = 0;
for (int i = 0; i < ao_samples; i++)
{
Vector dir = SphereDistribute(i, ao_samples, ao_angle, normal);
Ray ao_ray = Ray(P, dir);
Float dist = ao_distance;
const Shape *shape_in_way = top->nearest_intersection(shape, ao_ray, dist);
if (shape_in_way == NULL)
miss += 1.0;
else
miss += dist / ao_distance;
}
Float ao_intensity = miss / ao_samples;
col = col * ao_intensity;
}
}
Colour Raytracer::raytrace(Ray &ray, int depth, const Shape *origin_shape)
{
Float nearest_distance = Inf;
const Shape *nearest_shape = top->nearest_intersection(origin_shape, ray, nearest_distance);
if (nearest_shape == NULL)
return bg_colour;
else
{
const Vector P = ray.o + ray.dir * nearest_distance; // point of intersection
Vector normal = nearest_shape->normal(P);
bool from_inside = false;
// make shapes double sided
if (dot(normal, ray.dir) > 0.0)
{
normal = - normal;
from_inside = true;
}
// shading function
Colour col = PhongShader(nearest_shape, P, normal, ray.dir);
lightScatter(ray, nearest_shape, depth, P, normal, from_inside, col);
return col;
}
}
#ifndef NO_SIMD
void Raytracer::raytracePacket(RayPacket &rays, Colour *results)
{
union {
float nearest_distances[4];
mfloat4 m_nearest_distances;
};
mfloat4 mask;
const Shape *nearest_shapes[4];
static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL};
m_nearest_distances = mInf;
top->packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);
mask = mcmpneq(m_nearest_distances, mInf);
if (!mmovemask(mask))
{
for (int i = 0; i < 4; i++)
results[i] = bg_colour;
return;
}
const VectorPacket P = rays.o + rays.dir * m_nearest_distances; // point of intersection
VectorPacket normal;
for (int i = 0; i < 4; i++)
if (nearest_shapes[i] != NULL)
{
const Vector Pvecti = P.getVector(i);
normal.setVector(i, nearest_shapes[i]->normal(Pvecti));
}
// make shapes double sided
mfloat4 from_inside = mcmpgt(dot(normal, rays.dir), mZero);
normal.mx = mselect(from_inside, msub(mZero, normal.mx), normal.mx);
normal.my = mselect(from_inside, msub(mZero, normal.my), normal.my);
normal.mz = mselect(from_inside, msub(mZero, normal.mz), normal.mz);
// shading function
VectorPacket pres = PhongShader_packet(nearest_shapes, P, normal, rays.dir);
//pres.mx = mselect(mask, pres.mx, mset1(bg_colour.r));
//pres.my = mselect(mask, pres.my, mset1(bg_colour.g));
//pres.mz = mselect(mask, pres.mz, mset1(bg_colour.b));
for (int i = 0; i < 4; i++)
if (nearest_shapes[i] != NULL)
{
results[i] = pres.getVector(i);
lightScatter(rays[i], nearest_shapes[i], 0,
P.getVector(i), normal.getVector(i), (mmovemask(from_inside)>>i)&1,
results[i]);
}
else
results[i] = bg_colour;
}
#endif
NORETURN void *Raytracer::raytrace_worker(void *d)
{
static const int my_queue_size = 256;
Raytracer *rt = (Raytracer*)d;
Sample my_queue[my_queue_size];
Colour my_colours[my_queue_size];
int my_count;
Ray ray;
#ifndef NO_SIMD
RayPacket rays;
const bool can_use_packets = (rt->use_packets && rt->sampler->packetableSamples());
#endif
for (;;)
{
pthread_mutex_lock(&rt->sample_queue_mutex);
while (rt->sample_queue_count == 0)
{
if (rt->end_of_samples)
{
dbgmsg(4, "T thread [%d] exiting\n", pthread_self());
pthread_mutex_unlock(&rt->sample_queue_mutex);
pthread_exit(NULL);
}
pthread_cond_wait(&rt->sample_queue_cond, &rt->sample_queue_mutex);
}
if (rt->sample_queue_count >= my_queue_size)
my_count = my_queue_size;
else
my_count = rt->sample_queue_count;
rt->sample_queue_count -= my_count;
// copy samples to local queue
if (rt->sample_queue_pos + my_count >= rt->sample_queue_size)
{
register int c = rt->sample_queue_size - rt->sample_queue_pos;
memcpy(my_queue, rt->sample_queue + rt->sample_queue_pos, c*sizeof(Sample));
memcpy(my_queue + c, rt->sample_queue, (my_count - c)*sizeof(Sample));
rt->sample_queue_pos = my_count - c;
}
else
{
memcpy(my_queue, rt->sample_queue + rt->sample_queue_pos,
my_count*sizeof(Sample));
rt->sample_queue_pos += my_count;
}
if (rt->sample_queue_count <= my_queue_size*2)
pthread_cond_signal(&rt->worker_ready_cond);
pthread_mutex_unlock(&rt->sample_queue_mutex);
// do the work
#ifndef NO_SIMD
if (can_use_packets)
{
// packet ray tracing
assert((my_count & 3) == 0);
for (int i = 0; i < (my_count >> 2); i++)
{
rt->camera->makeRayPacket(my_queue + (i<<2), rays);
rt->raytracePacket(rays, my_colours + (i<<2));
}
}
else
#endif
{
// single ray tracing
for (int i = 0; i < my_count; i++)
{
ray = rt->camera->makeRay(my_queue[i]);
my_colours[i] = rt->raytrace(ray, 0, NULL);
}
}
// save the results
pthread_mutex_lock(&rt->sampler_mutex);
for (int i = 0; i < my_count; i++)
rt->sampler->saveSample(my_queue[i], my_colours[i]);
pthread_mutex_unlock(&rt->sampler_mutex);
}
}
void Raytracer::render()
{
if (!sampler || !camera || !top)
return;
static const int my_count_max = 256;
sample_queue_size = my_count_max*2*num_threads;
sample_queue = new Sample [sample_queue_size];
sample_queue_count = 0;
int my_count, my_pos;
int sampnum = 0, sampdone;
int phase = 1;
bool more_samples;
sampler->init();
// create workers
dbgmsg(1, "* using %d threads\n", num_threads);
pthread_t *threads = new pthread_t[num_threads];
dbgmsg(1, "* raytracing...\n");
while ( (sampnum = sampler->initSampleSet()) > 0 )
{
my_pos = 0;
sample_queue_pos = 0;
sampdone = 0;
end_of_samples = false;
for (int t = 0; t < num_threads; t++)
{
int rc = pthread_create(&threads[t], NULL, raytrace_worker, (void*)this);
if (rc) {
dbgmsg(0, "\nE pthread_create unsuccessful, return code was %d\n", rc);
exit(1);
}
}
dbgmsg(2, "- phase %d: 0%% done", phase);
pthread_mutex_lock(&sampler_mutex);
for (;;)
{
my_count = 0;
while ( more_samples = sampler->nextSample(&sample_queue[my_pos++]) )
{
my_count++;
if (my_pos >= sample_queue_size)
my_pos = 0;
if (my_count >= my_count_max)
break;
}
if (!more_samples && !my_count)
break;
pthread_mutex_unlock(&sampler_mutex);
pthread_mutex_lock(&sample_queue_mutex);
sample_queue_count += my_count;
// wait for workers if there is enough samples ready on queue
while (sample_queue_count > (2*num_threads-1)*my_count_max)
pthread_cond_wait(&worker_ready_cond, &sample_queue_mutex);
pthread_cond_signal(&sample_queue_cond);
pthread_mutex_unlock(&sample_queue_mutex);
sampdone += my_count;
dbgmsg(2, "\b\b\b\b\b\b\b\b%2d%% done", ((long)sampdone - sample_queue_count)*100/sampnum);
pthread_mutex_lock(&sampler_mutex);
if (!more_samples)
break;
}
dbgmsg(2, "\b\b\b\b\b\b\b\b100%% done\n");
pthread_mutex_unlock(&sampler_mutex);
// wait for workers
dbgmsg(2, "- waiting for threads to finish\n");
pthread_mutex_lock(&sample_queue_mutex);
end_of_samples = true;
while (sample_queue_count)
{
pthread_cond_broadcast(&sample_queue_cond);
pthread_mutex_unlock(&sample_queue_mutex);
pthread_mutex_lock(&sample_queue_mutex);
}
pthread_cond_broadcast(&sample_queue_cond);
pthread_mutex_unlock(&sample_queue_mutex);
for (int t = 0; t < num_threads; t++)
pthread_join(threads[t], NULL);
phase ++;
}
delete[] threads;
delete[] sample_queue;
}
void Raytracer::ambientOcclusion(int samples, Float distance, Float angle)
{
ao_samples = samples;
ao_distance = distance;
ao_angle = angle;
if (ao_distance == 0)
/* 0 ==> Inf */
ao_distance = Inf;
}