add MSVC compiler support, make it default for Windows
new header file simd.h for SSE abstraction and helpers
add mselect pseudo instruction for common or(and(...), andnot(...))
replace many SSE intrinsics with new names
new MemoryPool class (mempool.h) for faster KdNode allocation
remove setMaxDepth() from Octree and KdTree, make max_depth const,
it should be defined in constructor and never changed, change after
building tree would cause error in traversal
modify DefaultSampler to generate nice 2x2 packets of samples for packet tracing
optimize Box and BBox::intersect_packet
add precomputed invdir attribute to RayPacket
scons build system:
check for pthread library on Windows
check for SDL
generate include/config.h with variables detected by scons configuration
move auxiliary files to build/
add sanity checks
add writable operator[] to Vector
/*
* cpuflags
*
* Simple tool which detects CPU capabilities
* and outputs appropriate compiler flags.
*
* Usage:
* cpuflags [gcc version] [intelc version]
*
* Returns:
* [arch]
* [gcc flags]
* [intelc flags]
*
* The gcc/intelc version must be passed as floating point value,
* e.g. 4.23
*
* Copyright (C) 2008 Radek Brich <radek@brich.org>
*
* Based on x86cpucaps
* by Osamu Kayasono <jacobi@jcom.home.ne.jp>
* http://members.jcom.home.ne.jp/jacobi/linux/softwares.html
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define LEN_VENDORNAME 13
#define VENDOR_INTEL 1
#define VENDOR_AMD 2
#define VENDOR_CYRIX 3
#define VENDOR_CENTAUR 4
#define VENDOR_TRANSMETA 5
#define VENDOR_OTHERS 0
struct simdcaps
{
unsigned int has_mmx;
unsigned int has_sse;
unsigned int has_sse2;
unsigned int has_sse3;
unsigned int has_ssse3;
unsigned int has_sse41;
unsigned int has_sse42;
unsigned int has_mmxext;
unsigned int has_3dnowext;
unsigned int has_3dnow;
};
/* CPU caps */
#define FLAG_MMX (1<<23)
#define FLAG_SSE (1<<25)
#define FLAG_SSE2 (1<<26)
/* CPU caps 2 */
#define FLAG_SSE3 (1<<0)
#define FLAG_SSSE3 (1<<9)
#define FLAG_SSE41 (1<<19)
#define FLAG_SSE42 (1<<20)
/* AMD CPU caps */
#define FLAG_MMXEXT (1<<22)
#define FLAG_3DNOWEXT (1<<30)
#define FLAG_3DNOW (1<<31)
/* cpuid, from kernel source ( linux/include/asm-i386/processor.h ) */
inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
{
__asm__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op));
}
/*
* check SIMD capabilities
*/
int x86cpucaps_simd(struct simdcaps *simd)
{
int ex[4];
memset(&(*simd),0,sizeof(struct simdcaps));
/* check CPU has CPUID */
cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
if ( ex[0] < 1) return 1;
cpuid(0x80000001,&ex[0],&ex[1],&ex[2],&ex[3]);
if ( (ex[3] & FLAG_MMXEXT ) == FLAG_MMXEXT) simd->has_mmxext = 1;
if ( (ex[3] & FLAG_3DNOW ) == FLAG_3DNOW) simd->has_3dnow = 1;
if ( (ex[3] & FLAG_3DNOWEXT ) == FLAG_3DNOWEXT) simd->has_3dnowext = 1;
cpuid(1,&ex[0],&ex[1],&ex[2],&ex[3]);
if ( (ex[3] & FLAG_MMX ) == FLAG_MMX ) simd->has_mmx = 1;
if ( (ex[3] & FLAG_SSE ) == FLAG_SSE ) simd->has_sse = 1;
if ( (ex[3] & FLAG_SSE2 ) == FLAG_SSE2) simd->has_sse2 = 1;
if ( (ex[2] & FLAG_SSE3 ) == FLAG_SSE3 ) simd->has_sse3 = 1;
if ( (ex[2] & FLAG_SSSE3 ) == FLAG_SSSE3 ) simd->has_ssse3 = 1;
if ( (ex[2] & FLAG_SSE41 ) == FLAG_SSE41 ) simd->has_sse41 = 1;
if ( (ex[2] & FLAG_SSE42 ) == FLAG_SSE42 ) simd->has_sse42 = 1;
/* SSE CPU supports mmxext too */
if (simd->has_sse == 1) simd->has_mmxext = 1;
return 0;
}
/*
* check CPU Family-Model-Stepping
*/
int x86cpucaps_cpumodel()
{
int ex[4];
int f = 0;
/* check CPU has CPUID */
cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
if ( ex[0] < 1) return f;
cpuid(1,&ex[0],&ex[1],&ex[2],&ex[3]);
f = ex[0] & 0x0fff;
return f;
}
/*
* check CPU Vendor
*/
int x86cpucaps_vendor(char *vendorname)
{
int ex[4];
int f = 0;
char vendorstr[LEN_VENDORNAME];
/* check CPU has CPUID */
cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
if ( ex[0] < 1) return f;
/* read Vendor Strings */
vendorstr[0] = ex[1] & 0xff;
vendorstr[1] = (ex[1] >> 8) & 0xff;
vendorstr[2] = (ex[1] >> 16) & 0xff;
vendorstr[3] = (ex[1] >> 24) & 0xff;
vendorstr[4] = ex[3] & 0xff;
vendorstr[5] = (ex[3] >> 8) & 0xff;
vendorstr[6] = (ex[3] >> 16) & 0xff;
vendorstr[7] = (ex[3] >> 24) & 0xff;
vendorstr[8] = ex[2] & 0xff;
vendorstr[9] = (ex[2] >> 8) & 0xff;
vendorstr[10]= (ex[2] >> 16) & 0xff;
vendorstr[11]= (ex[2] >> 24) & 0xff;
vendorstr[12]= '\0';
if ( strcmp(vendorstr, "GenuineIntel") == 0 )
f = VENDOR_INTEL;
else if ( strcmp(vendorstr, "AuthenticAMD") == 0 )
f = VENDOR_AMD;
else if ( strcmp(vendorstr, "CyrixInstead") == 0 )
f = VENDOR_CYRIX;
else if ( strcmp(vendorstr, "CentaurHauls") == 0 )
f = VENDOR_CENTAUR;
else if ( strcmp(vendorstr, "GenuineTMx86") == 0 )
f = VENDOR_TRANSMETA;
strncpy(vendorname, vendorstr, LEN_VENDORNAME);
return f;
}
int main(int argc, char **argv)
{
int family, model, stepping;
char *arch, *gccarch, *gccsimd, *iccarch, *icctune, *iccsimd;
int cpu_id = x86cpucaps_cpumodel();
char vendorname[LEN_VENDORNAME];
int vendor_id = x86cpucaps_vendor(vendorname);
struct simdcaps simd;
float gccver = 999.;
float iccver = 999.;
if (argc > 1)
gccver = atof(argv[1]);
if (argc > 2)
iccver = atof(argv[2]);
family = (cpu_id & 0xf00) >> 8;
model = (cpu_id & 0x0f0) >> 4;
stepping = cpu_id & 0x00f;
switch (vendor_id)
{
case VENDOR_INTEL:
if (family == 4)
{
arch = "i486";
}
else if (family == 5)
{
if (model < 4) arch = "pentium";
else arch = "pentium-mmx";
}
else if (family == 6)
{
if (model <= 1) arch = "pentiumpro";
else if (model < 7) arch = "pentium2";
else if (model < 12) arch = "pentium3";
else if (model < 14) arch = "pentium-m";
else if (model == 14) arch = "prescott"; // core
else if (model == 15) arch = "core2";
}
else if (family > 6)
{ /* family == 15 */
arch = "pentium4";
}
else
{
arch = "i386";
}
break;
case VENDOR_AMD:
if (family == 4)
{
if (model <= 9) arch = "i486";
else arch = "i586";
}
else if (family == 5)
{
if (model <= 3) arch = "i586";
else if (model <= 7) arch = "k6";
else if (model == 8) arch = "k6-2";
else arch = "k6-3";
}
else if (family == 6)
{
if (model <= 3) arch = "athlon";
else if (model == 4) arch = "athlon-tbird";
else arch = "athlon-xp";
}
else if (family > 6)
{
arch = "k8";
}
else
{
arch = "unknown";
}
break;
case VENDOR_CYRIX:
if (family == 4) arch = "i586";
else if (family == 5) arch = "i586";
else if (family == 6) arch = "i686";
else arch = "unknown";
break;
case VENDOR_CENTAUR:
if (family == 5) arch = "i586";
else arch = "unknown";
break;
case VENDOR_TRANSMETA:
arch = "i686";
break;
default:
arch = "unknown";
break;
}
/* some targets not supported by older gcc */
gccarch = arch;
if (gccver < (float)4.3)
{
if (!strcmp(gccarch, "core2")) gccarch = "pentium-m";
}
if (gccver < (float)3.41)
{
if (!strcmp(gccarch, "prescott")) gccarch = "pentium4";
if (!strcmp(gccarch, "pentium-m")) gccarch = "pentium4";
}
if (gccver < (float)3.4)
{
if (!strcmp(gccarch, "k8")) gccarch = "athlon-xp";
}
if (gccver < (float)3.1)
{
if (strstr(gccarch, "athlon-") != NULL)
gccarch = "athlon";
else if (strstr(gccarch, "k6-") != NULL)
gccarch = "k6";
else if (!strcmp(gccarch, "pentium-mmx"))
gccarch = "pentium";
else if (!strcmp(gccarch, "pentium2")
|| !strcmp(gccarch, "pentium3")
|| !strcmp(gccarch, "pentium4"))
gccarch = "pentiumpro";
}
if (gccver < (float)3.0)
{
if (!strcmp(gccarch, "athlon"))
gccarch = "pentiumpro";
else if (!strcmp(gccarch, "k6"))
gccarch = "pentium";
}
if (gccver < (float)2.95)
{
if (!strcmp(gccarch, "pentiumpro"))
gccarch = "i686";
else if (!strcmp(gccarch, "pentium"))
gccarch = "i586";
}
/* SIMD options */
x86cpucaps_simd(&simd);
gccsimd = "";
if (gccver >= 3.1) {
if ( simd.has_3dnow || simd.has_3dnowext )
gccsimd = "-m3dnow";
else
{
if (gccver >= 4.3)
{
if (simd.has_sse41 || simd.has_sse42) gccsimd = "-msse4 -mfpmath=sse";
else if (simd.has_ssse3) gccsimd = "-mssse3 -mfpmath=sse";
}
else if ( gccver >= 3.4 && simd.has_sse3 ) gccsimd = "-msse3 -mfpmath=sse";
else if ( simd.has_sse2 ) gccsimd = "-msse2 -mfpmath=sse";
else if ( simd.has_sse ) gccsimd = "-msse";
else if ( simd.has_mmx ) gccsimd = "-mmmx";
}
}
/* IntelC options */
iccarch = arch;
icctune = arch;
iccsimd = "";
if (simd.has_sse41 || simd.has_sse42) iccsimd = "-xS";
else if (simd.has_ssse3) iccsimd = "-xT";
else if (simd.has_sse3) iccsimd = "-msse3 -xP";
else if (simd.has_sse2) iccsimd = "-msse2";
printf("%s\n", arch);
/* GCC flags */
if (gccver >= 4.2) gccarch = "native";
#ifdef __x86_64__
/* do not output i386 flags on x86_64 */
if (strcmp(gccarch, "core2") != 0 && strcmp(gccarch, "native") != 0)
printf("%s\n", gccsimd);
else
#endif
printf("-march=%s %s\n", gccarch, gccsimd);
/* IntelC flags */
printf("-march=%s -mtune=%s %s\n", iccarch, icctune, iccsimd);
return 0;
}