rewrite subsampling from old code to DefaultSampler pyrit
authorRadek Brich <radek.brich@devl.cz>
Tue, 22 Apr 2008 13:33:12 +0200
branchpyrit
changeset 77 dbe8438d5dca
parent 76 3b60fd0bea64
child 78 9569e9f35374
rewrite subsampling from old code to DefaultSampler render in phases, clean workers after each phase
TODO
src/raytracer.cc
src/sampler.cc
--- a/TODO	Mon Apr 21 19:35:27 2008 +0200
+++ b/TODO	Tue Apr 22 13:33:12 2008 +0200
@@ -4,18 +4,17 @@
 
 Future Plans
 ============
- * enhance Sampler to support subsampling
+ * textures (3D procedural, pixmaps)
+ * generalization: Camera "shader" (ray generator), surface shader and maybe light & background shaders
  * namespace
  * kd-tree:
    - optimize structures
    - optimize construction: use box-shape intersection instead of bounding boxes of shapes
    - save/load
- * textures (3D procedural, pixmaps)
  * Python binding for all classes
  * stochastic oversampling
  * absorbtion of refracted rays in dense materials (can be computed using shape distance and some 'absorbance' constant)
  * implement efficient AABB-ray intersection using Plucker coordinates
- * generalization: Camera "shader" (ray generator), surface shader and maybe light & background shaders
 
 New Classes?
 ============
--- a/src/raytracer.cc	Mon Apr 21 19:35:27 2008 +0200
+++ b/src/raytracer.cc	Tue Apr 22 13:33:12 2008 +0200
@@ -235,164 +235,6 @@
 	}
 }
 
-#if 0
-static void *renderrow(void *data)
-{
-	RenderrowData *d = (RenderrowData*) data;
-	const int subsample = d->rt->getSubsample();
-	const Float subsample2 = 1.0/(subsample*subsample);
-	const int oversample = d->rt->getOversample();
-	const int ww = d->w*3;
-	Vector3 dir = d->dfix;
-	for (int x = 0; x < d->w; x += subsample) {
-		// generate a ray from eye passing through this pixel
-		if (subsample > 1)
-		{
-			Colour ic;
-			// top-left
-			dir.normalize();
-			Ray ray(d->eye, dir);
-			Colour c1 = d->rt->raytrace(ray, 0, NULL);
-			// top-right
-			Vector3 tmpdir = dir + (subsample-1)*d->dx;
-			tmpdir.normalize();
-			ray.dir = tmpdir;
-			Colour c2 = d->rt->raytrace(ray, 0, NULL);
-			// bottom right
-			tmpdir += (subsample-1)*d->dy;
-			tmpdir.normalize();
-			ray.dir = tmpdir;
-			Colour c4 = d->rt->raytrace(ray, 0, NULL);
-			// bottom left
-			tmpdir = dir + (subsample-1)*d->dy;
-			tmpdir.normalize();
-			ray.dir = tmpdir;
-			Colour c3 = d->rt->raytrace(ray, 0, NULL);
-			// are the colors similar?
-			Float m = (c1-c2).mag2();
-			m = max(m, (c2-c3).mag2());
-			m = max(m, (c3-c4).mag2());
-			m = max(m, (c4-c1).mag2());
-			if (m < 0.001)
-			{
-				// interpolate
-				Float *i = d->iter;
-				for (int x = 0; x < subsample; x++)
-				{
-					for (int y = 0; y < subsample; y++)
-					{
-						ic = c1*(subsample-x)*(subsample-y)*subsample2
-							+ c2*(x)*(subsample-y)*subsample2
-							+ c3*(subsample-x)*(y)*subsample2
-							+ c4*(x)*(y)*subsample2;
-						*(i + ww*y) = ic.r;
-						*(i + ww*y + 1) = ic.g;
-						*(i + ww*y + 2) = ic.b;
-					}
-					i += 3;
-				}
-				d->iter = i;
-			}
-			else
-			{
-				// render all pixels
-				Vector3 tmpdir = dir;
-
-				if (oversample)
-				{
-					for (int x = 0; x < subsample; x++)
-					{
-						for (int y = 0; y < subsample; y++)
-						{
-							Vector3 tmp2dir = tmpdir + y*d->dy;
-							samplepixel(ic, tmp2dir, d, oversample);
-							*(d->iter + ww*y) = ic.r;
-							*(d->iter + ww*y + 1) = ic.g;
-							*(d->iter + ww*y + 2) = ic.b;
-						}
-						d->iter += 3;
-						tmpdir += d->dx;
-					}
-				}
-				else
-				{
-					/* this is so complex because it tries to reuse
-					   already computed corner pixels
-					   though, above code will also work for non-oversampling... */
-					// first column
-					*(d->iter) = c1.r;
-					*(d->iter + 1) = c1.g;
-					*(d->iter + 2) = c1.b;
-					for (int y = 1; y < subsample-1; y++)
-					{
-						Vector3 tmp2dir = tmpdir + y*d->dy;
-						tmp2dir.normalize();
-						ray.dir = tmp2dir;
-						ic = d->rt->raytrace(ray, 0, NULL);
-						*(d->iter + ww*y) = ic.r;
-						*(d->iter + ww*y + 1) = ic.g;
-						*(d->iter + ww*y + 2) = ic.b;
-					}
-					*(d->iter + ww*(subsample-1)) = c3.r;
-					*(d->iter + ww*(subsample-1) + 1) = c3.g;
-					*(d->iter + ww*(subsample-1) + 2) = c3.b;
-					d->iter += 3;
-					tmpdir += d->dx;
-					// middle
-					for (int x = 1; x < subsample-1; x++)
-					{
-						for (int y = 0; y < subsample; y++)
-						{
-							Vector3 tmp2dir = tmpdir + y*d->dy;
-							tmp2dir.normalize();
-							ray.dir = tmp2dir;
-							ic = d->rt->raytrace(ray, 0, NULL);
-							*(d->iter + ww*y) = ic.r;
-							*(d->iter + ww*y + 1) = ic.g;
-							*(d->iter + ww*y + 2) = ic.b;
-						}
-						d->iter += 3;
-						tmpdir += d->dx;
-					}
-					// last column
-					*(d->iter) = c2.r;
-					*(d->iter + 1) = c2.g;
-					*(d->iter + 2) = c2.b;
-					for (int y = 1; y < subsample-1; y++)
-					{
-						Vector3 tmp2dir = tmpdir + y*d->dy;
-						tmp2dir.normalize();
-						ray.dir = tmp2dir;
-						ic = d->rt->raytrace(ray, 0, NULL);
-						*(d->iter + ww*y) = ic.r;
-						*(d->iter + ww*y + 1) = ic.g;
-						*(d->iter + ww*y + 2) = ic.b;
-					}
-					*(d->iter + ww*(subsample-1)) = c4.r;
-					*(d->iter + ww*(subsample-1) + 1) = c4.g;
-					*(d->iter + ww*(subsample-1) + 2) = c4.b;
-					d->iter += 3;
-				}
-			}
-		}
-		else // subsample <= 1
-		{
-			Colour c;
-			samplepixel(c, dir, d, oversample);
-			// write color to buffer
-			*d->iter++ = c.r;
-			*d->iter++ = c.g;
-			*d->iter++ = c.b;
-		}
-		dir += d->dx*subsample;
-	}
-#ifdef PTHREADS
-	pthread_exit((void *)d);
-#endif
-	return (void *)d;
-}
-#endif
-
 void *Raytracer::raytrace_worker(void *d)
 {
 	static const int my_queue_size = 256;
@@ -462,34 +304,40 @@
 	static const int my_count_max = 256;
 	sample_queue_size = my_count_max*2*num_threads;
 	sample_queue = new Sample [sample_queue_size];
-	sample_queue_pos = 0;
 	sample_queue_count = 0;
-	end_of_samples = false;
-	int my_count, my_pos = 0;
+	int my_count, my_pos;
 	int sampnum = 0, sampdone;
+	int phase = 1;
 	bool more_samples;
 
 	sampler->init();
 
 	// create workers
-	dbgmsg(1, "* running %d threads\n", num_threads);
+	dbgmsg(1, "* using %d threads\n", num_threads);
 	pthread_t threads[num_threads];
-	for (int t = 0; t < num_threads; t++)
-	{
-		int rc = pthread_create(&threads[t], NULL, raytrace_worker, (void*)this);
-		if (rc) {
-			dbgmsg(0, "\nE pthread_create unsuccessful, return code was %d\n", rc);
-			exit(1);
-		}
-	}
 
 	dbgmsg(1, "* raytracing...\n");
-	dbgmsg(2, "-  0%% done");
 
-	pthread_mutex_lock(&sampler_mutex);
 	while ( (sampnum = sampler->initSampleSet()) > 0 )
 	{
+		my_pos = 0;
+		sample_queue_pos = 0;
 		sampdone = 0;
+		end_of_samples = false;
+
+		for (int t = 0; t < num_threads; t++)
+		{
+			int rc = pthread_create(&threads[t], NULL, raytrace_worker, (void*)this);
+			if (rc) {
+				dbgmsg(0, "\nE pthread_create unsuccessful, return code was %d\n", rc);
+				exit(1);
+			}
+		}
+
+		dbgmsg(2, "phase %d:  0%% done", phase);
+
+		pthread_mutex_lock(&sampler_mutex);
+
 		for (;;)
 		{
 			my_count = 0;
@@ -519,27 +367,33 @@
 			dbgmsg(2, "\b\b\b\b\b\b\b\b%2d%% done", (sampdone - sample_queue_count)*100/sampnum);
 
 			pthread_mutex_lock(&sampler_mutex);
+
+			if (!more_samples)
+				break;
 		}
 		dbgmsg(2, "\b\b\b\b\b\b\b\b100%% done\n");
-	}
-	pthread_mutex_unlock(&sampler_mutex);
+
+		pthread_mutex_unlock(&sampler_mutex);
+
+		// wait for workers
+		dbgmsg(2, "- waiting for threads to finish\n");
 
-	// wait for workers
-	dbgmsg(2, "- waiting for threads to finish\n");
-
-	pthread_mutex_lock(&sample_queue_mutex);
-	end_of_samples = true;
-	while (sample_queue_count)
-	{
+		pthread_mutex_lock(&sample_queue_mutex);
+		end_of_samples = true;
+		while (sample_queue_count)
+		{
+			pthread_cond_broadcast(&sample_queue_cond);
+			pthread_mutex_unlock(&sample_queue_mutex);
+			pthread_mutex_lock(&sample_queue_mutex);
+		}
 		pthread_cond_broadcast(&sample_queue_cond);
 		pthread_mutex_unlock(&sample_queue_mutex);
-		pthread_mutex_lock(&sample_queue_mutex);
+
+		for (int t = 0; t < num_threads; t++)
+			pthread_join(threads[t], NULL);
+
+		phase ++;
 	}
-	pthread_cond_broadcast(&sample_queue_cond);
-	pthread_mutex_unlock(&sample_queue_mutex);
-
-	for (int t = 0; t < num_threads; t++)
-		pthread_join(threads[t], NULL);
 
 	delete[] sample_queue;
 }
--- a/src/sampler.cc	Mon Apr 21 19:35:27 2008 +0200
+++ b/src/sampler.cc	Tue Apr 22 13:33:12 2008 +0200
@@ -40,91 +40,222 @@
 	const int samples = gridsamples[oversample];
 	if ( phase == 0 )
 	{
-		phase++;
-		sx = -1;
-		return w*h*samples;
+		if (subsample > 1)
+		{
+			phase = 1;
+			sx = -1;
+			return (w/subsample+1)*(h/subsample+1);
+		}
+		else
+		{
+			phase = 2;
+			sx = -1;
+			return w*h*samples;
+		}
 	}
-	else if ( phase == 1 && oversample )
+	if ( phase == 1 )
 	{
-		phase = -1;
+		// finalize subsampling
+		const Float subsample2 = 1.0/(subsample*subsample);
+		int num_samples = 0;
+		Colour ic;
+		phase = 2;
+		sx = -1;
+		for (int y = 0; y < h/subsample; y++)
+			for (int x = 0; x < w/subsample; x++)
+			{
+				int x1 = x*subsample;
+				int y1 = y*subsample;
+				int x2 = (x+1)*subsample;
+				int y2 = (y+1)*subsample;
+				if (x2 > w-1)	x2 = w-1;
+				if (y2 > h-1)	y2 = h-1;
+				if (x1 == x2 || y1 == y2)
+					continue;
+				Float *p;
+				p = buffer + 3*(y1*w + x1);
+				Colour c1(*p, *(p+1), *(p+2));
+				p = buffer + 3*(y1*w + x2);
+				Colour c2(*p, *(p+1), *(p+2));
+				p = buffer + 3*(y2*w + x1);
+				Colour c3(*p, *(p+1), *(p+2));
+				p = buffer + 3*(y2*w + x2);
+				Colour c4(*p, *(p+1), *(p+2));
+				Float m = (c1-c2).mag2();
+				m = max(m, (c2-c3).mag2());
+				m = max(m, (c3-c4).mag2());
+				m = max(m, (c4-c1).mag2());
+				if (m < 0.002)
+				{
+					// interpolate
+					for (int i = 0; i < subsample; i++)
+						for (int j = 0; j < subsample; j++)
+						{
+							ic = c1*(subsample-i)*(subsample-j)*subsample2
+								+ c2*(i)*(subsample-j)*subsample2
+								+ c3*(subsample-i)*(j)*subsample2
+								+ c4*(i)*(j)*subsample2;
+							p = buffer + 3*((y1+j)*w + x1+i);
+							*(p + 0) = ic.r;
+							*(p + 1) = oversample ? -ic.g : ic.g;
+							*(p + 2) = ic.b;
+						}
+				}
+				else
+				{
+					// mark as to be computed
+					num_samples += subsample * subsample;
+					for (int i = 0; i < subsample; i++)
+						for (int j = 0; j < subsample; j++)
+							if (oversample || i != 0 || j != 0)
+								*(buffer + 3*((y1+j)*w + x1+i)) = -1.;
+				}
+			}
+		return num_samples;
+	}
+	if ( phase == 2 && oversample )
+	{
+		// finalize oversampling
 		Float *buf;
-		for (buf = buffer; buf != buffer + w*h*3; buf++)
-			*buf = *buf * (1.0/samples);
-		return 0;
+		if (subsample > 1)
+			for (buf = buffer; buf != buffer + w*h*3; buf += 3)
+				if (*(buf+1) < 0)
+				{
+					// interpolated
+					*(buf+1) = -*(buf+1);
+				}
+				else
+				{
+					*buf = *buf * (1.0/samples);
+					*(buf+1) = *(buf+1) * (1.0/samples);
+					*(buf+2) = *(buf+2) * (1.0/samples);
+				}
+		else
+			for (buf = buffer; buf != buffer + w*h*3; buf++)
+				*buf = *buf * (1.0/samples);
 	}
-	else
-	{
-		phase = -1;
-		return 0;
-	}
+	phase = -1;
+	return 0;
 }
 
 bool DefaultSampler::nextSample(Sample* s)
 {
-	/* grid oversampling */
-	static const int gridsamples[] = {1,5,9,16};
-	static const Float osa5x[] = {0.0, -0.4, +0.4, +0.4, -0.4};
-	static const Float osa5y[] = {0.0, -0.4, -0.4, +0.4, +0.4};
-	static const Float osa9x[] = {-0.34,  0.00, +0.34,
-		-0.34,  0.00, +0.34, -0.34,  0.00, +0.34};
-	static const Float osa9y[] = {-0.34, -0.34, -0.34,
-			0.00,  0.00,  0.00, +0.34, +0.34, +0.34};
-	static const Float osa16x[] = {-0.375, -0.125, +0.125, +0.375,
-		-0.375, -0.125, +0.125, +0.375, -0.375, -0.125, +0.125, +0.375,
-		-0.375, -0.125, +0.125, +0.375};
-	static const Float osa16y[] = {-0.375, -0.375, -0.375, -0.375,
-		-0.125, -0.125, -0.125, -0.125, +0.125, +0.125, +0.125, +0.125,
-		+0.375, +0.375, +0.375, +0.375};
-	static const Float *osaSx[] = {NULL, osa5x, osa9x, osa16x};
-	static const Float *osaSy[] = {NULL, osa5y, osa9y, osa16y};
-	const int samples = gridsamples[oversample];
-	const Float *osax = osaSx[oversample];
-	const Float *osay = osaSy[oversample];
-
-	if (sx < 0)
+	if (phase == 1)
 	{
-		// first sample
-		s->x = -(Float)w/h/2.0;
-		s->y = -0.5;
-		sx = 0;
-		sy = 0;
-		osa_samp = 0;
-	}
-	else
-	{
-		osa_samp++;
-
-		if (oversample && oversample <= 3 && osa_samp < samples)
+		// subsampling
+		if (sx < 0)
 		{
-			s->x = osax[osa_samp]/h + (Float)sx/h - (Float)w/h/2.0;
-			s->y = osay[osa_samp]/h + (Float)sy/h - 0.5;
+			// first sample
+			s->x = -(Float)w/h/2.0;
+			s->y = -0.5;
+			sx = 0;
+			sy = 0;
+			osa_samp = 0;
 		}
 		else
 		{
-			sx++;
-			if (sx >= w)
+			if (sx == w-1)
 			{
+				if (sy == h-1)
+					return false;
+				sy += subsample;
+				if (sy > h-1)
+					sy = h-1;
 				sx = 0;
-				sy++;
 			}
-
-			if (sy >= h)
-				return false;
+			else
+			{
+				sx += subsample;
+				if (sx > w-1)
+					sx = w-1;
+			}
 
 			s->x = (Float)sx/h - (Float)w/h/2.0;
 			s->y = (Float)sy/h - 0.5;
-			osa_samp = 0;
 		}
 	}
-
-	if (osa_samp == 0 && oversample && oversample <= 3)
+	else if (phase == 2)
 	{
-		s->x += osax[0]/h;
-		s->y += osay[0]/h;
-		Float *buf = buffer + 3*(sy*w + sx);
-		*(buf++) = 0;
-		*(buf++) = 0;
-		*(buf++) = 0;
+		/* grid oversampling */
+		static const int gridsamples[] = {1,5,9,16};
+		static const Float osa5x[] = {0.0, -0.4, +0.4, +0.4, -0.4};
+		static const Float osa5y[] = {0.0, -0.4, -0.4, +0.4, +0.4};
+		static const Float osa9x[] = {-0.34,  0.00, +0.34,
+			-0.34,  0.00, +0.34, -0.34,  0.00, +0.34};
+		static const Float osa9y[] = {-0.34, -0.34, -0.34,
+				0.00,  0.00,  0.00, +0.34, +0.34, +0.34};
+		static const Float osa16x[] = {-0.375, -0.125, +0.125, +0.375,
+			-0.375, -0.125, +0.125, +0.375, -0.375, -0.125, +0.125, +0.375,
+			-0.375, -0.125, +0.125, +0.375};
+		static const Float osa16y[] = {-0.375, -0.375, -0.375, -0.375,
+			-0.125, -0.125, -0.125, -0.125, +0.125, +0.125, +0.125, +0.125,
+			+0.375, +0.375, +0.375, +0.375};
+		static const Float *osaSx[] = {NULL, osa5x, osa9x, osa16x};
+		static const Float *osaSy[] = {NULL, osa5y, osa9y, osa16y};
+		const int samples = gridsamples[oversample];
+		const Float *osax = osaSx[oversample];
+		const Float *osay = osaSy[oversample];
+
+		if (sx < 0)
+		{
+			// first sample
+			s->x = -(Float)w/h/2.0;
+			s->y = -0.5;
+			sx = 0;
+			sy = 0;
+			osa_samp = 0;
+		}
+		else
+		{
+			osa_samp++;
+
+			if (oversample && oversample <= 3 && osa_samp < samples)
+			{
+				s->x = osax[osa_samp]/h + (Float)sx/h - (Float)w/h/2.0;
+				s->y = osay[osa_samp]/h + (Float)sy/h - 0.5;
+			}
+			else
+			{
+				sx++;
+				if (sx >= w)
+				{
+					sx = 0;
+					sy++;
+				}
+				if (sy >= h)
+					return false;
+
+				if (subsample > 1)
+				{
+					// find next not interpolated pixel
+					while ( *(buffer + 3*(sy*w + sx)) >= 0. )
+					{
+						sx++;
+						if (sx >= w)
+						{
+							sx = 0;
+							sy++;
+						}
+						if (sy >= h)
+							return false;
+					}
+				}
+
+				s->x = (Float)sx/h - (Float)w/h/2.0;
+				s->y = (Float)sy/h - 0.5;
+				osa_samp = 0;
+			}
+		}
+
+		if (osa_samp == 0 && oversample && oversample <= 3)
+		{
+			s->x += osax[0]/h;
+			s->y += osay[0]/h;
+			Float *buf = buffer + 3*(sy*w + sx);
+			*(buf++) = 0;
+			*(buf++) = 0;
+			*(buf++) = 0;
+		}
 	}
 
 	s->sx = sx;
@@ -137,7 +268,7 @@
 void DefaultSampler::saveSample(Sample &samp, Colour &col)
 {
 	Float *buf = buffer + 3*(samp.sy * w + samp.sx);
-	if (oversample)
+	if (phase == 2 && oversample)
 	{
 		*(buf+0) += col.r;
 		*(buf+1) += col.g;