--- a/src/raytracer.cc Mon Apr 21 19:35:27 2008 +0200
+++ b/src/raytracer.cc Tue Apr 22 13:33:12 2008 +0200
@@ -235,164 +235,6 @@
}
}
-#if 0
-static void *renderrow(void *data)
-{
- RenderrowData *d = (RenderrowData*) data;
- const int subsample = d->rt->getSubsample();
- const Float subsample2 = 1.0/(subsample*subsample);
- const int oversample = d->rt->getOversample();
- const int ww = d->w*3;
- Vector3 dir = d->dfix;
- for (int x = 0; x < d->w; x += subsample) {
- // generate a ray from eye passing through this pixel
- if (subsample > 1)
- {
- Colour ic;
- // top-left
- dir.normalize();
- Ray ray(d->eye, dir);
- Colour c1 = d->rt->raytrace(ray, 0, NULL);
- // top-right
- Vector3 tmpdir = dir + (subsample-1)*d->dx;
- tmpdir.normalize();
- ray.dir = tmpdir;
- Colour c2 = d->rt->raytrace(ray, 0, NULL);
- // bottom right
- tmpdir += (subsample-1)*d->dy;
- tmpdir.normalize();
- ray.dir = tmpdir;
- Colour c4 = d->rt->raytrace(ray, 0, NULL);
- // bottom left
- tmpdir = dir + (subsample-1)*d->dy;
- tmpdir.normalize();
- ray.dir = tmpdir;
- Colour c3 = d->rt->raytrace(ray, 0, NULL);
- // are the colors similar?
- Float m = (c1-c2).mag2();
- m = max(m, (c2-c3).mag2());
- m = max(m, (c3-c4).mag2());
- m = max(m, (c4-c1).mag2());
- if (m < 0.001)
- {
- // interpolate
- Float *i = d->iter;
- for (int x = 0; x < subsample; x++)
- {
- for (int y = 0; y < subsample; y++)
- {
- ic = c1*(subsample-x)*(subsample-y)*subsample2
- + c2*(x)*(subsample-y)*subsample2
- + c3*(subsample-x)*(y)*subsample2
- + c4*(x)*(y)*subsample2;
- *(i + ww*y) = ic.r;
- *(i + ww*y + 1) = ic.g;
- *(i + ww*y + 2) = ic.b;
- }
- i += 3;
- }
- d->iter = i;
- }
- else
- {
- // render all pixels
- Vector3 tmpdir = dir;
-
- if (oversample)
- {
- for (int x = 0; x < subsample; x++)
- {
- for (int y = 0; y < subsample; y++)
- {
- Vector3 tmp2dir = tmpdir + y*d->dy;
- samplepixel(ic, tmp2dir, d, oversample);
- *(d->iter + ww*y) = ic.r;
- *(d->iter + ww*y + 1) = ic.g;
- *(d->iter + ww*y + 2) = ic.b;
- }
- d->iter += 3;
- tmpdir += d->dx;
- }
- }
- else
- {
- /* this is so complex because it tries to reuse
- already computed corner pixels
- though, above code will also work for non-oversampling... */
- // first column
- *(d->iter) = c1.r;
- *(d->iter + 1) = c1.g;
- *(d->iter + 2) = c1.b;
- for (int y = 1; y < subsample-1; y++)
- {
- Vector3 tmp2dir = tmpdir + y*d->dy;
- tmp2dir.normalize();
- ray.dir = tmp2dir;
- ic = d->rt->raytrace(ray, 0, NULL);
- *(d->iter + ww*y) = ic.r;
- *(d->iter + ww*y + 1) = ic.g;
- *(d->iter + ww*y + 2) = ic.b;
- }
- *(d->iter + ww*(subsample-1)) = c3.r;
- *(d->iter + ww*(subsample-1) + 1) = c3.g;
- *(d->iter + ww*(subsample-1) + 2) = c3.b;
- d->iter += 3;
- tmpdir += d->dx;
- // middle
- for (int x = 1; x < subsample-1; x++)
- {
- for (int y = 0; y < subsample; y++)
- {
- Vector3 tmp2dir = tmpdir + y*d->dy;
- tmp2dir.normalize();
- ray.dir = tmp2dir;
- ic = d->rt->raytrace(ray, 0, NULL);
- *(d->iter + ww*y) = ic.r;
- *(d->iter + ww*y + 1) = ic.g;
- *(d->iter + ww*y + 2) = ic.b;
- }
- d->iter += 3;
- tmpdir += d->dx;
- }
- // last column
- *(d->iter) = c2.r;
- *(d->iter + 1) = c2.g;
- *(d->iter + 2) = c2.b;
- for (int y = 1; y < subsample-1; y++)
- {
- Vector3 tmp2dir = tmpdir + y*d->dy;
- tmp2dir.normalize();
- ray.dir = tmp2dir;
- ic = d->rt->raytrace(ray, 0, NULL);
- *(d->iter + ww*y) = ic.r;
- *(d->iter + ww*y + 1) = ic.g;
- *(d->iter + ww*y + 2) = ic.b;
- }
- *(d->iter + ww*(subsample-1)) = c4.r;
- *(d->iter + ww*(subsample-1) + 1) = c4.g;
- *(d->iter + ww*(subsample-1) + 2) = c4.b;
- d->iter += 3;
- }
- }
- }
- else // subsample <= 1
- {
- Colour c;
- samplepixel(c, dir, d, oversample);
- // write color to buffer
- *d->iter++ = c.r;
- *d->iter++ = c.g;
- *d->iter++ = c.b;
- }
- dir += d->dx*subsample;
- }
-#ifdef PTHREADS
- pthread_exit((void *)d);
-#endif
- return (void *)d;
-}
-#endif
-
void *Raytracer::raytrace_worker(void *d)
{
static const int my_queue_size = 256;
@@ -462,34 +304,40 @@
static const int my_count_max = 256;
sample_queue_size = my_count_max*2*num_threads;
sample_queue = new Sample [sample_queue_size];
- sample_queue_pos = 0;
sample_queue_count = 0;
- end_of_samples = false;
- int my_count, my_pos = 0;
+ int my_count, my_pos;
int sampnum = 0, sampdone;
+ int phase = 1;
bool more_samples;
sampler->init();
// create workers
- dbgmsg(1, "* running %d threads\n", num_threads);
+ dbgmsg(1, "* using %d threads\n", num_threads);
pthread_t threads[num_threads];
- for (int t = 0; t < num_threads; t++)
- {
- int rc = pthread_create(&threads[t], NULL, raytrace_worker, (void*)this);
- if (rc) {
- dbgmsg(0, "\nE pthread_create unsuccessful, return code was %d\n", rc);
- exit(1);
- }
- }
dbgmsg(1, "* raytracing...\n");
- dbgmsg(2, "- 0%% done");
- pthread_mutex_lock(&sampler_mutex);
while ( (sampnum = sampler->initSampleSet()) > 0 )
{
+ my_pos = 0;
+ sample_queue_pos = 0;
sampdone = 0;
+ end_of_samples = false;
+
+ for (int t = 0; t < num_threads; t++)
+ {
+ int rc = pthread_create(&threads[t], NULL, raytrace_worker, (void*)this);
+ if (rc) {
+ dbgmsg(0, "\nE pthread_create unsuccessful, return code was %d\n", rc);
+ exit(1);
+ }
+ }
+
+ dbgmsg(2, "phase %d: 0%% done", phase);
+
+ pthread_mutex_lock(&sampler_mutex);
+
for (;;)
{
my_count = 0;
@@ -519,27 +367,33 @@
dbgmsg(2, "\b\b\b\b\b\b\b\b%2d%% done", (sampdone - sample_queue_count)*100/sampnum);
pthread_mutex_lock(&sampler_mutex);
+
+ if (!more_samples)
+ break;
}
dbgmsg(2, "\b\b\b\b\b\b\b\b100%% done\n");
- }
- pthread_mutex_unlock(&sampler_mutex);
+
+ pthread_mutex_unlock(&sampler_mutex);
+
+ // wait for workers
+ dbgmsg(2, "- waiting for threads to finish\n");
- // wait for workers
- dbgmsg(2, "- waiting for threads to finish\n");
-
- pthread_mutex_lock(&sample_queue_mutex);
- end_of_samples = true;
- while (sample_queue_count)
- {
+ pthread_mutex_lock(&sample_queue_mutex);
+ end_of_samples = true;
+ while (sample_queue_count)
+ {
+ pthread_cond_broadcast(&sample_queue_cond);
+ pthread_mutex_unlock(&sample_queue_mutex);
+ pthread_mutex_lock(&sample_queue_mutex);
+ }
pthread_cond_broadcast(&sample_queue_cond);
pthread_mutex_unlock(&sample_queue_mutex);
- pthread_mutex_lock(&sample_queue_mutex);
+
+ for (int t = 0; t < num_threads; t++)
+ pthread_join(threads[t], NULL);
+
+ phase ++;
}
- pthread_cond_broadcast(&sample_queue_cond);
- pthread_mutex_unlock(&sample_queue_mutex);
-
- for (int t = 0; t < num_threads; t++)
- pthread_join(threads[t], NULL);
delete[] sample_queue;
}
--- a/src/sampler.cc Mon Apr 21 19:35:27 2008 +0200
+++ b/src/sampler.cc Tue Apr 22 13:33:12 2008 +0200
@@ -40,91 +40,222 @@
const int samples = gridsamples[oversample];
if ( phase == 0 )
{
- phase++;
- sx = -1;
- return w*h*samples;
+ if (subsample > 1)
+ {
+ phase = 1;
+ sx = -1;
+ return (w/subsample+1)*(h/subsample+1);
+ }
+ else
+ {
+ phase = 2;
+ sx = -1;
+ return w*h*samples;
+ }
}
- else if ( phase == 1 && oversample )
+ if ( phase == 1 )
{
- phase = -1;
+ // finalize subsampling
+ const Float subsample2 = 1.0/(subsample*subsample);
+ int num_samples = 0;
+ Colour ic;
+ phase = 2;
+ sx = -1;
+ for (int y = 0; y < h/subsample; y++)
+ for (int x = 0; x < w/subsample; x++)
+ {
+ int x1 = x*subsample;
+ int y1 = y*subsample;
+ int x2 = (x+1)*subsample;
+ int y2 = (y+1)*subsample;
+ if (x2 > w-1) x2 = w-1;
+ if (y2 > h-1) y2 = h-1;
+ if (x1 == x2 || y1 == y2)
+ continue;
+ Float *p;
+ p = buffer + 3*(y1*w + x1);
+ Colour c1(*p, *(p+1), *(p+2));
+ p = buffer + 3*(y1*w + x2);
+ Colour c2(*p, *(p+1), *(p+2));
+ p = buffer + 3*(y2*w + x1);
+ Colour c3(*p, *(p+1), *(p+2));
+ p = buffer + 3*(y2*w + x2);
+ Colour c4(*p, *(p+1), *(p+2));
+ Float m = (c1-c2).mag2();
+ m = max(m, (c2-c3).mag2());
+ m = max(m, (c3-c4).mag2());
+ m = max(m, (c4-c1).mag2());
+ if (m < 0.002)
+ {
+ // interpolate
+ for (int i = 0; i < subsample; i++)
+ for (int j = 0; j < subsample; j++)
+ {
+ ic = c1*(subsample-i)*(subsample-j)*subsample2
+ + c2*(i)*(subsample-j)*subsample2
+ + c3*(subsample-i)*(j)*subsample2
+ + c4*(i)*(j)*subsample2;
+ p = buffer + 3*((y1+j)*w + x1+i);
+ *(p + 0) = ic.r;
+ *(p + 1) = oversample ? -ic.g : ic.g;
+ *(p + 2) = ic.b;
+ }
+ }
+ else
+ {
+ // mark as to be computed
+ num_samples += subsample * subsample;
+ for (int i = 0; i < subsample; i++)
+ for (int j = 0; j < subsample; j++)
+ if (oversample || i != 0 || j != 0)
+ *(buffer + 3*((y1+j)*w + x1+i)) = -1.;
+ }
+ }
+ return num_samples;
+ }
+ if ( phase == 2 && oversample )
+ {
+ // finalize oversampling
Float *buf;
- for (buf = buffer; buf != buffer + w*h*3; buf++)
- *buf = *buf * (1.0/samples);
- return 0;
+ if (subsample > 1)
+ for (buf = buffer; buf != buffer + w*h*3; buf += 3)
+ if (*(buf+1) < 0)
+ {
+ // interpolated
+ *(buf+1) = -*(buf+1);
+ }
+ else
+ {
+ *buf = *buf * (1.0/samples);
+ *(buf+1) = *(buf+1) * (1.0/samples);
+ *(buf+2) = *(buf+2) * (1.0/samples);
+ }
+ else
+ for (buf = buffer; buf != buffer + w*h*3; buf++)
+ *buf = *buf * (1.0/samples);
}
- else
- {
- phase = -1;
- return 0;
- }
+ phase = -1;
+ return 0;
}
bool DefaultSampler::nextSample(Sample* s)
{
- /* grid oversampling */
- static const int gridsamples[] = {1,5,9,16};
- static const Float osa5x[] = {0.0, -0.4, +0.4, +0.4, -0.4};
- static const Float osa5y[] = {0.0, -0.4, -0.4, +0.4, +0.4};
- static const Float osa9x[] = {-0.34, 0.00, +0.34,
- -0.34, 0.00, +0.34, -0.34, 0.00, +0.34};
- static const Float osa9y[] = {-0.34, -0.34, -0.34,
- 0.00, 0.00, 0.00, +0.34, +0.34, +0.34};
- static const Float osa16x[] = {-0.375, -0.125, +0.125, +0.375,
- -0.375, -0.125, +0.125, +0.375, -0.375, -0.125, +0.125, +0.375,
- -0.375, -0.125, +0.125, +0.375};
- static const Float osa16y[] = {-0.375, -0.375, -0.375, -0.375,
- -0.125, -0.125, -0.125, -0.125, +0.125, +0.125, +0.125, +0.125,
- +0.375, +0.375, +0.375, +0.375};
- static const Float *osaSx[] = {NULL, osa5x, osa9x, osa16x};
- static const Float *osaSy[] = {NULL, osa5y, osa9y, osa16y};
- const int samples = gridsamples[oversample];
- const Float *osax = osaSx[oversample];
- const Float *osay = osaSy[oversample];
-
- if (sx < 0)
+ if (phase == 1)
{
- // first sample
- s->x = -(Float)w/h/2.0;
- s->y = -0.5;
- sx = 0;
- sy = 0;
- osa_samp = 0;
- }
- else
- {
- osa_samp++;
-
- if (oversample && oversample <= 3 && osa_samp < samples)
+ // subsampling
+ if (sx < 0)
{
- s->x = osax[osa_samp]/h + (Float)sx/h - (Float)w/h/2.0;
- s->y = osay[osa_samp]/h + (Float)sy/h - 0.5;
+ // first sample
+ s->x = -(Float)w/h/2.0;
+ s->y = -0.5;
+ sx = 0;
+ sy = 0;
+ osa_samp = 0;
}
else
{
- sx++;
- if (sx >= w)
+ if (sx == w-1)
{
+ if (sy == h-1)
+ return false;
+ sy += subsample;
+ if (sy > h-1)
+ sy = h-1;
sx = 0;
- sy++;
}
-
- if (sy >= h)
- return false;
+ else
+ {
+ sx += subsample;
+ if (sx > w-1)
+ sx = w-1;
+ }
s->x = (Float)sx/h - (Float)w/h/2.0;
s->y = (Float)sy/h - 0.5;
- osa_samp = 0;
}
}
-
- if (osa_samp == 0 && oversample && oversample <= 3)
+ else if (phase == 2)
{
- s->x += osax[0]/h;
- s->y += osay[0]/h;
- Float *buf = buffer + 3*(sy*w + sx);
- *(buf++) = 0;
- *(buf++) = 0;
- *(buf++) = 0;
+ /* grid oversampling */
+ static const int gridsamples[] = {1,5,9,16};
+ static const Float osa5x[] = {0.0, -0.4, +0.4, +0.4, -0.4};
+ static const Float osa5y[] = {0.0, -0.4, -0.4, +0.4, +0.4};
+ static const Float osa9x[] = {-0.34, 0.00, +0.34,
+ -0.34, 0.00, +0.34, -0.34, 0.00, +0.34};
+ static const Float osa9y[] = {-0.34, -0.34, -0.34,
+ 0.00, 0.00, 0.00, +0.34, +0.34, +0.34};
+ static const Float osa16x[] = {-0.375, -0.125, +0.125, +0.375,
+ -0.375, -0.125, +0.125, +0.375, -0.375, -0.125, +0.125, +0.375,
+ -0.375, -0.125, +0.125, +0.375};
+ static const Float osa16y[] = {-0.375, -0.375, -0.375, -0.375,
+ -0.125, -0.125, -0.125, -0.125, +0.125, +0.125, +0.125, +0.125,
+ +0.375, +0.375, +0.375, +0.375};
+ static const Float *osaSx[] = {NULL, osa5x, osa9x, osa16x};
+ static const Float *osaSy[] = {NULL, osa5y, osa9y, osa16y};
+ const int samples = gridsamples[oversample];
+ const Float *osax = osaSx[oversample];
+ const Float *osay = osaSy[oversample];
+
+ if (sx < 0)
+ {
+ // first sample
+ s->x = -(Float)w/h/2.0;
+ s->y = -0.5;
+ sx = 0;
+ sy = 0;
+ osa_samp = 0;
+ }
+ else
+ {
+ osa_samp++;
+
+ if (oversample && oversample <= 3 && osa_samp < samples)
+ {
+ s->x = osax[osa_samp]/h + (Float)sx/h - (Float)w/h/2.0;
+ s->y = osay[osa_samp]/h + (Float)sy/h - 0.5;
+ }
+ else
+ {
+ sx++;
+ if (sx >= w)
+ {
+ sx = 0;
+ sy++;
+ }
+ if (sy >= h)
+ return false;
+
+ if (subsample > 1)
+ {
+ // find next not interpolated pixel
+ while ( *(buffer + 3*(sy*w + sx)) >= 0. )
+ {
+ sx++;
+ if (sx >= w)
+ {
+ sx = 0;
+ sy++;
+ }
+ if (sy >= h)
+ return false;
+ }
+ }
+
+ s->x = (Float)sx/h - (Float)w/h/2.0;
+ s->y = (Float)sy/h - 0.5;
+ osa_samp = 0;
+ }
+ }
+
+ if (osa_samp == 0 && oversample && oversample <= 3)
+ {
+ s->x += osax[0]/h;
+ s->y += osay[0]/h;
+ Float *buf = buffer + 3*(sy*w + sx);
+ *(buf++) = 0;
+ *(buf++) = 0;
+ *(buf++) = 0;
+ }
}
s->sx = sx;
@@ -137,7 +268,7 @@
void DefaultSampler::saveSample(Sample &samp, Colour &col)
{
Float *buf = buffer + 3*(samp.sy * w + samp.sx);
- if (oversample)
+ if (phase == 2 && oversample)
{
*(buf+0) += col.r;
*(buf+1) += col.g;