--- a/ccdemos/common_sdl.h Mon Apr 28 11:44:11 2008 +0200
+++ b/ccdemos/common_sdl.h Tue Apr 29 13:56:29 2008 +0200
@@ -28,9 +28,10 @@
return;
Uint32 *bufp = (Uint32 *)screen->pixels;
- unsigned char c[3];
for (Float *fd = render_buffer; fd != render_buffer + w*h*3; fd += 3)
{
+#ifdef NO_SSE
+ unsigned char c[3];
for (int i = 0; i < 3; i++)
{
if (fd[i] > 1.0)
@@ -39,6 +40,11 @@
c[i] = (unsigned char)(fd[i] * 255.0);
}
*bufp = SDL_MapRGB(screen->format, c[0], c[1], c[2]);
+#else
+ __m64 m = _mm_cvtps_pi8(_mm_mul_ps(_mm_set_ps1(255.0),
+ _mm_min_ps(mOne, _mm_set_ps(0, fd[2],fd[1],fd[0]))));
+ *bufp = SDL_MapRGB(screen->format, ((char*)&m)[0], ((char*)&m)[1], ((char*)&m)[2]);
+#endif
bufp++;
}
--- a/src/pixmap.cc Mon Apr 28 11:44:11 2008 +0200
+++ b/src/pixmap.cc Tue Apr 29 13:56:29 2008 +0200
@@ -36,13 +36,24 @@
{
unsigned char *cdata = new unsigned char[w*h*3];
Float *fd = fdata;
+
+#ifdef NO_SSE
for (unsigned char *cd = cdata; cd != cdata + w*h*3; cd++, fd++)
- {
if (*fd > 1.0)
*cd = 255;
else
*cd = (unsigned char)(*fd * 255.0);
+#else
+ __m128 cmax = _mm_set_ps1(255.0);
+ __m64 m;
+ for (unsigned char *cd = cdata; cd < cdata + w*h*3; cd += 4, fd += 4)
+ {
+ m = _mm_cvtps_pi8(_mm_mul_ps(cmax,
+ _mm_min_ps(mOne, _mm_set_ps(fd[3],fd[2],fd[1],fd[0]))));
+ memcpy(cd, &m, 4);
}
+#endif
+
return cdata;
}
--- a/src/raytracermodule.cc Mon Apr 28 11:44:11 2008 +0200
+++ b/src/raytracermodule.cc Tue Apr 29 13:56:29 2008 +0200
@@ -801,19 +801,10 @@
return Py_None;
}
- // convert data to char
printf("[pyrit] Converting image data (float to char)\n");
- chardata = (unsigned char *) malloc(w*h*3);
- Float *d = data;
- for (unsigned char *c = chardata; c != chardata + w*h*3; c++, d++) {
- if (*d > 1.0)
- *c = 255;
- else
- *c = (unsigned char)(*d * 255.0);
- }
- free(data);
+ chardata = sampler.getPixmap().getCharData();
o = Py_BuildValue("s#", chardata, w*h*3);
- free(chardata);
+ delete[] chardata;
printf("[pyrit] Done.\n");
return o;
}