# HG changeset patch # User Radek Brich # Date 1207958565 -7200 # Node ID 2c154aad7f33216c2ce6f3a66cfb77677015bb47 # Parent 249553e1d4fe943642fa6050a29def008cae53bc added detection of optimal CPU flags for both GCC and IntelC diff -r 249553e1d4fe -r 2c154aad7f33 .bzrignore --- a/.bzrignore Fri Apr 11 12:46:36 2008 +0200 +++ b/.bzrignore Sat Apr 12 02:02:45 2008 +0200 @@ -4,3 +4,4 @@ .sconf_temp .optioncache config.log +tools/cpuflags diff -r 249553e1d4fe -r 2c154aad7f33 SConstruct --- a/SConstruct Fri Apr 11 12:46:36 2008 +0200 +++ b/SConstruct Sat Apr 12 02:02:45 2008 +0200 @@ -31,7 +31,7 @@ import os, sys env = Environment(ENV = {'PATH' : os.environ['PATH']}) -#Decider('MD5-timestamp') +Decider('MD5-timestamp') opt = Options(['.optioncache']) opt.AddOptions( @@ -61,24 +61,15 @@ context.Result(platform) return True -cpu = 'unknown' -def CheckCPU(context): - global cpu, platform - context.Message('Checking CPU model... ') - if (platform == 'linux'): - if (os.system("cat /proc/cpuinfo | grep 'Core(TM)2 CPU' >/dev/null") == 0): - cpu = 'core2' - context.Result(cpu) - return True - -intelc = Tool("intelc").exists(env) == True def CheckIntelC(context): - global intelc + global intelc, intelcversion context.Message('Checking for Intel C++ Compiler... ') + intelc = Tool("intelc").exists(env) == True if intelc: testenv = Environment() Tool("intelc").generate(testenv) - context.Result(str(testenv['INTEL_C_COMPILER_VERSION']/10.)) + intelcversion = str(testenv['INTEL_C_COMPILER_VERSION']/10.) + context.Result(intelcversion) else: context.Result(intelc) return intelc @@ -88,20 +79,29 @@ context.Message('Checking for GCC... ') gcc = "g++" in env['TOOLS'] if gcc: - gccversion = os.popen("g++ --version").read().split()[2] + gccversion = env['CCVERSION'] context.Result(gccversion) else: context.Result(False) return gcc +def CheckCPUFlags(context): + global cpu, cpuflags_gcc, cpuflags_intelc + context.Message('Checking CPU arch and flags... ') + env.Execute('@$CC tools/cpuflags.c -o tools/cpuflags') + (cpu, cpuflags_gcc, cpuflags_intelc) = os.popen('tools/cpuflags %s %s' + % (''.join(gccversion.rsplit('.',1)), intelcversion) ).read().split('\n')[:3] + context.Result(cpu) + return True + conf = Configure(env, custom_tests = { - 'CheckPlatform' : CheckPlatform, 'CheckCPU' : CheckCPU, + 'CheckPlatform' : CheckPlatform, 'CheckCPUFlags' : CheckCPUFlags, 'CheckIntelC' : CheckIntelC, 'CheckGCC' : CheckGCC}) conf.CheckPlatform() -conf.CheckCPU() conf.CheckGCC() conf.CheckIntelC() +conf.CheckCPUFlags() env = conf.Finish() @@ -113,14 +113,9 @@ add_flags = '' if cc == 'gcc': - add_flags += '-ffast-math ' -if cpu == 'core2': - if (cc == 'intelc' or gccversion[:3] == '4.3'): - add_flags += '-march=core2 -mtune=core2 ' - if cc == 'intelc': - add_flags += '-xT ' - if cc == 'gcc': - add_flags += '-msse3 -mfpmath=sse ' + add_flags += cpuflags_gcc + ' -ffast-math ' +if cc == 'intelc': + add_flags += cpuflags_intelc + ' ' if env['precision'] == 'double': add_flags += '-DPYRIT_DOUBLE ' diff -r 249553e1d4fe -r 2c154aad7f33 tools/cpuflags.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/cpuflags.c Sat Apr 12 02:02:45 2008 +0200 @@ -0,0 +1,368 @@ +/* + * cpuflags + * + * Simple tool which detects CPU capabilities + * and outputs appropriate compiler flags. + * + * Usage: + * cpuflags [gcc version] [intelc version] + * + * Returns: + * [arch] + * [gcc flags] + * [intelc flags] + * + * The gcc/intelc version must be passed as floating point value, + * e.g. 4.23 + * + * Copyright (C) 2008 Radek Brich + * + * Based on x86cpucaps + * by Osamu Kayasono + * http://members.jcom.home.ne.jp/jacobi/linux/softwares.html + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include + +#define LEN_VENDORNAME 13 + +#define VENDOR_INTEL 1 +#define VENDOR_AMD 2 +#define VENDOR_CYRIX 3 +#define VENDOR_CENTAUR 4 +#define VENDOR_TRANSMETA 5 +#define VENDOR_OTHERS 0 + +struct simdcaps +{ + unsigned int has_mmx; + unsigned int has_sse; + unsigned int has_sse2; + unsigned int has_sse3; + unsigned int has_ssse3; + unsigned int has_sse41; + unsigned int has_sse42; + unsigned int has_mmxext; + unsigned int has_3dnowext; + unsigned int has_3dnow; +}; + +/* CPU caps */ +#define FLAG_MMX (1<<23) +#define FLAG_SSE (1<<25) +#define FLAG_SSE2 (1<<26) + +/* CPU caps 2 */ +#define FLAG_SSE3 (1<<0) +#define FLAG_SSSE3 (1<<9) +#define FLAG_SSE41 (1<<19) +#define FLAG_SSE42 (1<<20) + +/* AMD CPU caps */ +#define FLAG_MMXEXT (1<<22) +#define FLAG_3DNOWEXT (1<<30) +#define FLAG_3DNOW (1<<31) + + +/* cpuid, from kernel source ( linux/include/asm-i386/processor.h ) */ +inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) +{ + __asm__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op)); +} + +/* + * check SIMD capabilities + */ +int x86cpucaps_simd(struct simdcaps *simd) +{ + int ex[4]; + + memset(&(*simd),0,sizeof(struct simdcaps)); + + /* check CPU has CPUID */ + cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]); + if ( ex[0] < 1) return 1; + + cpuid(0x80000001,&ex[0],&ex[1],&ex[2],&ex[3]); + if ( (ex[3] & FLAG_MMXEXT ) == FLAG_MMXEXT) simd->has_mmxext = 1; + if ( (ex[3] & FLAG_3DNOW ) == FLAG_3DNOW) simd->has_3dnow = 1; + if ( (ex[3] & FLAG_3DNOWEXT ) == FLAG_3DNOWEXT) simd->has_3dnowext = 1; + + cpuid(1,&ex[0],&ex[1],&ex[2],&ex[3]); + if ( (ex[3] & FLAG_MMX ) == FLAG_MMX ) simd->has_mmx = 1; + if ( (ex[3] & FLAG_SSE ) == FLAG_SSE ) simd->has_sse = 1; + if ( (ex[3] & FLAG_SSE2 ) == FLAG_SSE2) simd->has_sse2 = 1; + if ( (ex[2] & FLAG_SSE3 ) == FLAG_SSE3 ) simd->has_sse3 = 1; + if ( (ex[2] & FLAG_SSSE3 ) == FLAG_SSSE3 ) simd->has_ssse3 = 1; + if ( (ex[2] & FLAG_SSE41 ) == FLAG_SSE41 ) simd->has_sse41 = 1; + if ( (ex[2] & FLAG_SSE42 ) == FLAG_SSE42 ) simd->has_sse42 = 1; + + /* SSE CPU supports mmxext too */ + if (simd->has_sse == 1) simd->has_mmxext = 1; + + return 0; +} + +/* + * check CPU Family-Model-Stepping + */ +int x86cpucaps_cpumodel() +{ + int ex[4]; + int f = 0; + + /* check CPU has CPUID */ + cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]); + if ( ex[0] < 1) return f; + + cpuid(1,&ex[0],&ex[1],&ex[2],&ex[3]); + f = ex[0] & 0x0fff; + + return f; +} + +/* + * check CPU Vendor + */ +int x86cpucaps_vendor(char *vendorname) +{ + int ex[4]; + int f = 0; + char vendorstr[LEN_VENDORNAME]; + + /* check CPU has CPUID */ + cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]); + if ( ex[0] < 1) return f; + + /* read Vendor Strings */ + vendorstr[0] = ex[1] & 0xff; + vendorstr[1] = (ex[1] >> 8) & 0xff; + vendorstr[2] = (ex[1] >> 16) & 0xff; + vendorstr[3] = (ex[1] >> 24) & 0xff; + vendorstr[4] = ex[3] & 0xff; + vendorstr[5] = (ex[3] >> 8) & 0xff; + vendorstr[6] = (ex[3] >> 16) & 0xff; + vendorstr[7] = (ex[3] >> 24) & 0xff; + vendorstr[8] = ex[2] & 0xff; + vendorstr[9] = (ex[2] >> 8) & 0xff; + vendorstr[10]= (ex[2] >> 16) & 0xff; + vendorstr[11]= (ex[2] >> 24) & 0xff; + vendorstr[12]= '\0'; + + if ( strcmp(vendorstr, "GenuineIntel") == 0 ) + f = VENDOR_INTEL; + else if ( strcmp(vendorstr, "AuthenticAMD") == 0 ) + f = VENDOR_AMD; + else if ( strcmp(vendorstr, "CyrixInstead") == 0 ) + f = VENDOR_CYRIX; + else if ( strcmp(vendorstr, "CentaurHauls") == 0 ) + f = VENDOR_CENTAUR; + else if ( strcmp(vendorstr, "GenuineTMx86") == 0 ) + f = VENDOR_TRANSMETA; + + strncpy(vendorname, vendorstr, LEN_VENDORNAME); + + return f; +} + + +int main(int argc, char **argv) +{ + int family, model, stepping; + char *arch, *gccarch, *gccsimd, *iccarch, *icctune, *iccsimd; + + int cpu_id = x86cpucaps_cpumodel(); + char vendorname[LEN_VENDORNAME]; + int vendor_id = x86cpucaps_vendor(vendorname); + struct simdcaps simd; + + float gccver = 999.; + float iccver = 999.; + + if (argc > 1) + gccver = atof(argv[1]); + if (argc > 2) + iccver = atof(argv[2]); + + family = (cpu_id & 0xf00) >> 8; + model = (cpu_id & 0x0f0) >> 4; + stepping = cpu_id & 0x00f; + + switch (vendor_id) + { + + case VENDOR_INTEL: + if (family == 4) + { + arch = "i486"; + } + else if (family == 5) + { + if (model < 4) arch = "pentium"; + else arch = "pentium-mmx"; + } + else if (family == 6) + { + if (model <= 1) arch = "pentiumpro"; + else if (model < 7) arch = "pentium2"; + else if (model == 7) arch = "pentium3"; + else if (model < 15) arch = "pentium-m"; + else if (model == 15) + { + if (stepping < 6) arch = "core"; + else arch = "core2"; + } + + } + else if (family > 6) + { /* family == 15 */ + arch = "pentium4"; + } + else + { + arch = "i386"; + } + break; + + case VENDOR_AMD: + if (family == 4) + { + if (model <= 9) arch = "i486"; + else arch = "i586"; + } + else if (family == 5) + { + if (model <= 3) arch = "i586"; + else if (model <= 7) arch = "k6"; + else if (model == 8) arch = "k6-2"; + else arch = "k6-3"; + } + else if (family == 6) + { + if (model <= 3) arch = "athlon"; + else if (model == 4) arch = "athlon-tbird"; + else arch = "athlon-xp"; + } + else if (family > 6) + { + arch = "k8"; + } + else + { + arch = "unknown"; + } + break; + + case VENDOR_CYRIX: + if (family == 4) arch = "i586"; + else if (family == 5) arch = "i586"; + else if (family == 6) arch = "i686"; + else arch = "unknown"; + break; + + case VENDOR_CENTAUR: + if (family == 5) arch = "i586"; + else arch = "unknown"; + break; + + case VENDOR_TRANSMETA: + arch = "i686"; + break; + + default: + arch = "unknown"; + break; + + } + + /* some targets not supported by older gcc */ + gccarch = arch; + if (gccver < (float)4.3) + { + if (!strcmp(gccarch, "core2")) gccarch = "pentium3"; + } + if (gccver < (float)3.4) + { + if (!strcmp(gccarch, "k8")) gccarch = "athlon-xp"; + } + if (gccver < (float)3.1) + { + if (strstr(gccarch, "athlon-") != NULL) + gccarch = "athlon"; + else if (strstr(gccarch, "k6-") != NULL) + gccarch = "k6"; + else if (!strcmp(gccarch, "pentium-mmx")) + gccarch = "pentium"; + else if (!strcmp(gccarch, "pentium2") + || !strcmp(gccarch, "pentium3") + || !strcmp(gccarch, "pentium4")) + gccarch = "pentiumpro"; + } + + if (gccver < (float)3.0) + { + if (!strcmp(gccarch, "athlon")) + gccarch = "pentiumpro"; + else if (!strcmp(gccarch, "k6")) + gccarch = "pentium"; + } + + if (gccver < (float)2.9) + { + if (!strcmp(gccarch, "pentiumpro")) + gccarch = "i686"; + else if (!strcmp(gccarch, "pentium")) + gccarch = "i586"; + } + + /* SIMD options */ + x86cpucaps_simd(&simd); + gccsimd = ""; + if (gccver >= 3.1) { + if ( simd.has_3dnow || simd.has_3dnowext ) + gccsimd = "-m3dnow"; + else + { + if (gccver >= 4.3) + { + if (simd.has_sse41 || simd.has_sse42) gccsimd = "-msse4 -mfpmath=sse"; + else if (simd.has_ssse3) gccsimd = "-mssse3 -mfpmath=sse"; + } + else if ( gccver >= 3.4 && simd.has_sse3 ) gccsimd = "-msse3 -mfpmath=sse"; + else if ( simd.has_sse2 ) gccsimd = "-msse2 -mfpmath=sse"; + else if ( simd.has_sse ) gccsimd = "-msse"; + else if ( simd.has_mmx ) gccsimd = "-mmmx"; + } + } + + /* intelc options */ + iccarch = arch; + icctune = arch; + iccsimd = ""; + if (simd.has_sse41 || simd.has_sse42) iccsimd = "-xS"; + else if (simd.has_ssse3) iccsimd = "-xT"; + else if (simd.has_sse3) iccsimd = "-msse3 -xP"; + else if (simd.has_sse2) iccsimd = "-msse2"; + + printf("%s\n", arch); + if (gccver >= 4.2) gccarch = "native"; + printf("-march=%s -mtune=%s %s\n", gccarch, gccarch, gccsimd); + printf("-march=%s -mtune=%s %s\n", iccarch, icctune, iccsimd); + return 0; +}