# HG changeset patch # User Radek Brich # Date 1209470189 -7200 # Node ID fcf1487b398b2223dde61174c2efb20f9f3f7c6e # Parent f7edb3b908169f2862593312d9994cc913a4f38c use SSE for float to char image buffer conversion diff -r f7edb3b90816 -r fcf1487b398b ccdemos/common_sdl.h --- a/ccdemos/common_sdl.h Mon Apr 28 11:44:11 2008 +0200 +++ b/ccdemos/common_sdl.h Tue Apr 29 13:56:29 2008 +0200 @@ -28,9 +28,10 @@ return; Uint32 *bufp = (Uint32 *)screen->pixels; - unsigned char c[3]; for (Float *fd = render_buffer; fd != render_buffer + w*h*3; fd += 3) { +#ifdef NO_SSE + unsigned char c[3]; for (int i = 0; i < 3; i++) { if (fd[i] > 1.0) @@ -39,6 +40,11 @@ c[i] = (unsigned char)(fd[i] * 255.0); } *bufp = SDL_MapRGB(screen->format, c[0], c[1], c[2]); +#else + __m64 m = _mm_cvtps_pi8(_mm_mul_ps(_mm_set_ps1(255.0), + _mm_min_ps(mOne, _mm_set_ps(0, fd[2],fd[1],fd[0])))); + *bufp = SDL_MapRGB(screen->format, ((char*)&m)[0], ((char*)&m)[1], ((char*)&m)[2]); +#endif bufp++; } diff -r f7edb3b90816 -r fcf1487b398b src/pixmap.cc --- a/src/pixmap.cc Mon Apr 28 11:44:11 2008 +0200 +++ b/src/pixmap.cc Tue Apr 29 13:56:29 2008 +0200 @@ -36,13 +36,24 @@ { unsigned char *cdata = new unsigned char[w*h*3]; Float *fd = fdata; + +#ifdef NO_SSE for (unsigned char *cd = cdata; cd != cdata + w*h*3; cd++, fd++) - { if (*fd > 1.0) *cd = 255; else *cd = (unsigned char)(*fd * 255.0); +#else + __m128 cmax = _mm_set_ps1(255.0); + __m64 m; + for (unsigned char *cd = cdata; cd < cdata + w*h*3; cd += 4, fd += 4) + { + m = _mm_cvtps_pi8(_mm_mul_ps(cmax, + _mm_min_ps(mOne, _mm_set_ps(fd[3],fd[2],fd[1],fd[0])))); + memcpy(cd, &m, 4); } +#endif + return cdata; } diff -r f7edb3b90816 -r fcf1487b398b src/raytracermodule.cc --- a/src/raytracermodule.cc Mon Apr 28 11:44:11 2008 +0200 +++ b/src/raytracermodule.cc Tue Apr 29 13:56:29 2008 +0200 @@ -801,19 +801,10 @@ return Py_None; } - // convert data to char printf("[pyrit] Converting image data (float to char)\n"); - chardata = (unsigned char *) malloc(w*h*3); - Float *d = data; - for (unsigned char *c = chardata; c != chardata + w*h*3; c++, d++) { - if (*d > 1.0) - *c = 255; - else - *c = (unsigned char)(*d * 255.0); - } - free(data); + chardata = sampler.getPixmap().getCharData(); o = Py_BuildValue("s#", chardata, w*h*3); - free(chardata); + delete[] chardata; printf("[pyrit] Done.\n"); return o; }