use SSE for float to char image buffer conversion pyrit
authorRadek Brich <radek.brich@devl.cz>
Tue, 29 Apr 2008 13:56:29 +0200 (2008-04-29)
branchpyrit
changeset 89 fcf1487b398b
parent 88 f7edb3b90816
child 90 f6a72eb99631
use SSE for float to char image buffer conversion
ccdemos/common_sdl.h
src/pixmap.cc
src/raytracermodule.cc
--- a/ccdemos/common_sdl.h	Mon Apr 28 11:44:11 2008 +0200
+++ b/ccdemos/common_sdl.h	Tue Apr 29 13:56:29 2008 +0200
@@ -28,9 +28,10 @@
 			return;
 
 	Uint32 *bufp = (Uint32 *)screen->pixels;
-	unsigned char c[3];
 	for (Float *fd = render_buffer; fd != render_buffer + w*h*3; fd += 3)
 	{
+#ifdef NO_SSE
+		unsigned char c[3];
 		for (int i = 0; i < 3; i++)
 		{
 			if (fd[i] > 1.0)
@@ -39,6 +40,11 @@
 				c[i] = (unsigned char)(fd[i] * 255.0);
 		}
 		*bufp = SDL_MapRGB(screen->format, c[0], c[1], c[2]);
+#else
+		__m64 m = _mm_cvtps_pi8(_mm_mul_ps(_mm_set_ps1(255.0),
+			_mm_min_ps(mOne, _mm_set_ps(0, fd[2],fd[1],fd[0]))));
+		*bufp = SDL_MapRGB(screen->format, ((char*)&m)[0], ((char*)&m)[1], ((char*)&m)[2]);
+#endif
 		bufp++;
 	}
 
--- a/src/pixmap.cc	Mon Apr 28 11:44:11 2008 +0200
+++ b/src/pixmap.cc	Tue Apr 29 13:56:29 2008 +0200
@@ -36,13 +36,24 @@
 {
 	unsigned char *cdata = new unsigned char[w*h*3];
 	Float *fd = fdata;
+
+#ifdef NO_SSE
 	for (unsigned char *cd = cdata; cd != cdata + w*h*3; cd++, fd++)
-	{
 		if (*fd > 1.0)
 			*cd = 255;
 		else
 			*cd = (unsigned char)(*fd * 255.0);
+#else
+	__m128 cmax = _mm_set_ps1(255.0);
+	__m64 m;
+	for (unsigned char *cd = cdata; cd < cdata + w*h*3; cd += 4, fd += 4)
+	{
+		m = _mm_cvtps_pi8(_mm_mul_ps(cmax,
+			_mm_min_ps(mOne, _mm_set_ps(fd[3],fd[2],fd[1],fd[0]))));
+		memcpy(cd, &m, 4);
 	}
+#endif
+
 	return cdata;
 }
 
--- a/src/raytracermodule.cc	Mon Apr 28 11:44:11 2008 +0200
+++ b/src/raytracermodule.cc	Tue Apr 29 13:56:29 2008 +0200
@@ -801,19 +801,10 @@
 		return Py_None;
 	}
 
-	// convert data to char
 	printf("[pyrit] Converting image data (float to char)\n");
-	chardata = (unsigned char *) malloc(w*h*3);
-	Float *d = data;
-	for (unsigned char *c = chardata; c != chardata + w*h*3; c++, d++) {
-		if (*d > 1.0)
-			*c = 255;
-		else
-			*c = (unsigned char)(*d * 255.0);
-	}
-	free(data);
+	chardata = sampler.getPixmap().getCharData();
 	o = Py_BuildValue("s#", chardata, w*h*3);
-	free(chardata);
+	delete[] chardata;
 	printf("[pyrit] Done.\n");
 	return o;
 }