added detection of optimal CPU flags for both GCC and IntelC pyrit
authorRadek Brich <radek.brich@devl.cz>
Sat, 12 Apr 2008 02:02:45 +0200 (2008-04-12)
branchpyrit
changeset 68 2c154aad7f33
parent 67 249553e1d4fe
child 69 303583d2fb97
added detection of optimal CPU flags for both GCC and IntelC
.bzrignore
SConstruct
tools/cpuflags.c
--- a/.bzrignore	Fri Apr 11 12:46:36 2008 +0200
+++ b/.bzrignore	Sat Apr 12 02:02:45 2008 +0200
@@ -4,3 +4,4 @@
 .sconf_temp
 .optioncache
 config.log
+tools/cpuflags
--- a/SConstruct	Fri Apr 11 12:46:36 2008 +0200
+++ b/SConstruct	Sat Apr 12 02:02:45 2008 +0200
@@ -31,7 +31,7 @@
 
 import os, sys
 env = Environment(ENV = {'PATH' : os.environ['PATH']})
-#Decider('MD5-timestamp')
+Decider('MD5-timestamp')
 
 opt = Options(['.optioncache'])
 opt.AddOptions(
@@ -61,24 +61,15 @@
 	context.Result(platform)
 	return True
 
-cpu = 'unknown'
-def CheckCPU(context):
-	global cpu, platform
-	context.Message('Checking CPU model... ')
-	if (platform == 'linux'):
-		if (os.system("cat /proc/cpuinfo | grep 'Core(TM)2 CPU' >/dev/null") == 0):
-			cpu = 'core2'
-	context.Result(cpu)
-	return True
-
-intelc = Tool("intelc").exists(env) == True
 def CheckIntelC(context):
-	global intelc
+	global intelc, intelcversion
 	context.Message('Checking for Intel C++ Compiler... ')
+	intelc = Tool("intelc").exists(env) == True
 	if intelc:
 		testenv = Environment()
 		Tool("intelc").generate(testenv)
-		context.Result(str(testenv['INTEL_C_COMPILER_VERSION']/10.))
+		intelcversion = str(testenv['INTEL_C_COMPILER_VERSION']/10.)
+		context.Result(intelcversion)
 	else:
 		context.Result(intelc)
 	return intelc
@@ -88,20 +79,29 @@
 	context.Message('Checking for GCC... ')
 	gcc = "g++" in env['TOOLS']
 	if gcc:
-		gccversion = os.popen("g++ --version").read().split()[2]
+		gccversion = env['CCVERSION']
 		context.Result(gccversion)
 	else:
 		context.Result(False)
 	return gcc
 
+def CheckCPUFlags(context):
+	global cpu, cpuflags_gcc, cpuflags_intelc
+	context.Message('Checking CPU arch and flags... ')
+	env.Execute('@$CC tools/cpuflags.c -o tools/cpuflags')
+	(cpu, cpuflags_gcc, cpuflags_intelc) = os.popen('tools/cpuflags %s %s'
+		% (''.join(gccversion.rsplit('.',1)), intelcversion) ).read().split('\n')[:3]
+	context.Result(cpu)
+	return True
+
 conf = Configure(env,
 	custom_tests = {
-		'CheckPlatform' : CheckPlatform, 'CheckCPU' : CheckCPU,
+		'CheckPlatform' : CheckPlatform, 'CheckCPUFlags' : CheckCPUFlags,
 		'CheckIntelC' : CheckIntelC, 'CheckGCC' : CheckGCC})
 conf.CheckPlatform()
-conf.CheckCPU()
 conf.CheckGCC()
 conf.CheckIntelC()
+conf.CheckCPUFlags()
 env = conf.Finish()
 
 
@@ -113,14 +113,9 @@
 
 add_flags = ''
 if cc == 'gcc':
-	add_flags += '-ffast-math '
-if cpu == 'core2':
-	if (cc == 'intelc' or gccversion[:3] == '4.3'):
-		add_flags += '-march=core2 -mtune=core2 '
-	if cc == 'intelc':
-		add_flags += '-xT '
-	if cc == 'gcc':
-		add_flags += '-msse3 -mfpmath=sse '
+	add_flags += cpuflags_gcc + ' -ffast-math '
+if cc == 'intelc':
+	add_flags += cpuflags_intelc + ' '
 
 if env['precision'] == 'double':
 	add_flags += '-DPYRIT_DOUBLE '
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cpuflags.c	Sat Apr 12 02:02:45 2008 +0200
@@ -0,0 +1,368 @@
+/*
+ * cpuflags
+ *
+ * Simple tool which detects CPU capabilities
+ * and outputs appropriate compiler flags.
+ *
+ * Usage:
+ *  cpuflags [gcc version] [intelc version]
+ *
+ * Returns:
+ *  [arch]
+ *  [gcc flags]
+ *  [intelc flags]
+ *
+ * The gcc/intelc version must be passed as floating point value,
+ * e.g. 4.23
+ *
+ * Copyright (C) 2008  Radek Brich <radek@brich.org>
+ *
+ * Based on x86cpucaps
+ * by Osamu Kayasono <jacobi@jcom.home.ne.jp>
+ * http://members.jcom.home.ne.jp/jacobi/linux/softwares.html
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define LEN_VENDORNAME 13
+
+#define VENDOR_INTEL     1
+#define VENDOR_AMD       2
+#define VENDOR_CYRIX     3
+#define VENDOR_CENTAUR   4
+#define VENDOR_TRANSMETA 5
+#define VENDOR_OTHERS    0
+
+struct simdcaps
+{
+   unsigned int has_mmx;
+   unsigned int has_sse;
+   unsigned int has_sse2;
+   unsigned int has_sse3;
+   unsigned int has_ssse3;
+   unsigned int has_sse41;
+   unsigned int has_sse42;
+   unsigned int has_mmxext;
+   unsigned int has_3dnowext;
+   unsigned int has_3dnow;
+};
+
+/* CPU caps */
+#define FLAG_MMX      (1<<23)
+#define FLAG_SSE      (1<<25)
+#define FLAG_SSE2     (1<<26)
+
+/* CPU caps 2 */
+#define FLAG_SSE3     (1<<0)
+#define FLAG_SSSE3    (1<<9)
+#define FLAG_SSE41    (1<<19)
+#define FLAG_SSE42    (1<<20)
+
+/* AMD CPU caps */
+#define FLAG_MMXEXT   (1<<22)
+#define FLAG_3DNOWEXT (1<<30)
+#define FLAG_3DNOW    (1<<31)
+
+
+/* cpuid, from kernel source ( linux/include/asm-i386/processor.h ) */
+inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+{
+   __asm__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op));
+}
+
+/*
+ * check SIMD capabilities
+ */
+int x86cpucaps_simd(struct simdcaps *simd)
+{
+   int ex[4];
+
+   memset(&(*simd),0,sizeof(struct simdcaps));
+
+   /* check CPU has CPUID */
+   cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
+   if ( ex[0] < 1) return 1;
+
+   cpuid(0x80000001,&ex[0],&ex[1],&ex[2],&ex[3]);
+   if ( (ex[3] & FLAG_MMXEXT ) == FLAG_MMXEXT) simd->has_mmxext = 1;
+   if ( (ex[3] & FLAG_3DNOW ) == FLAG_3DNOW) simd->has_3dnow = 1;
+   if ( (ex[3] & FLAG_3DNOWEXT ) == FLAG_3DNOWEXT) simd->has_3dnowext = 1;
+
+   cpuid(1,&ex[0],&ex[1],&ex[2],&ex[3]);
+   if ( (ex[3] & FLAG_MMX  ) == FLAG_MMX ) simd->has_mmx = 1;
+   if ( (ex[3] & FLAG_SSE  ) == FLAG_SSE ) simd->has_sse = 1;
+   if ( (ex[3] & FLAG_SSE2 ) == FLAG_SSE2) simd->has_sse2 = 1;
+   if ( (ex[2] & FLAG_SSE3  ) == FLAG_SSE3 ) simd->has_sse3 = 1;
+   if ( (ex[2] & FLAG_SSSE3  ) == FLAG_SSSE3 ) simd->has_ssse3 = 1;
+   if ( (ex[2] & FLAG_SSE41  ) == FLAG_SSE41 ) simd->has_sse41 = 1;
+   if ( (ex[2] & FLAG_SSE42  ) == FLAG_SSE42 ) simd->has_sse42 = 1;
+
+   /* SSE CPU supports mmxext too */
+   if (simd->has_sse == 1) simd->has_mmxext = 1;
+
+   return 0;
+}
+
+/*
+ * check CPU Family-Model-Stepping
+ */
+int x86cpucaps_cpumodel()
+{
+   int ex[4];
+   int f = 0;
+
+   /* check CPU has CPUID */
+   cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
+   if ( ex[0] < 1) return f;
+
+   cpuid(1,&ex[0],&ex[1],&ex[2],&ex[3]);
+   f = ex[0] & 0x0fff;
+
+   return f;
+}
+
+/*
+ * check CPU Vendor
+ */
+int x86cpucaps_vendor(char *vendorname)
+{
+   int ex[4];
+   int f = 0;
+   char vendorstr[LEN_VENDORNAME];
+
+   /* check CPU has CPUID */
+   cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
+   if ( ex[0] < 1) return f;
+
+   /* read Vendor Strings */
+   vendorstr[0] =  ex[1] & 0xff;
+   vendorstr[1] =  (ex[1] >> 8) & 0xff;
+   vendorstr[2] = (ex[1] >> 16) & 0xff;
+   vendorstr[3] = (ex[1] >> 24) & 0xff;
+   vendorstr[4] =  ex[3] & 0xff;
+   vendorstr[5] = (ex[3] >> 8) & 0xff;
+   vendorstr[6] = (ex[3] >> 16) & 0xff;
+   vendorstr[7] = (ex[3] >> 24) & 0xff;
+   vendorstr[8] =  ex[2] & 0xff;
+   vendorstr[9] = (ex[2] >> 8) & 0xff;
+   vendorstr[10]= (ex[2] >> 16) & 0xff;
+   vendorstr[11]= (ex[2] >> 24) & 0xff;
+   vendorstr[12]= '\0';
+
+   if ( strcmp(vendorstr, "GenuineIntel") == 0 )
+     f = VENDOR_INTEL;
+   else if ( strcmp(vendorstr, "AuthenticAMD") == 0 )
+     f = VENDOR_AMD;
+   else if ( strcmp(vendorstr, "CyrixInstead") == 0 )
+     f = VENDOR_CYRIX;
+   else if ( strcmp(vendorstr, "CentaurHauls") == 0 )
+     f = VENDOR_CENTAUR;
+   else if ( strcmp(vendorstr, "GenuineTMx86") == 0 )
+     f = VENDOR_TRANSMETA;
+
+   strncpy(vendorname, vendorstr, LEN_VENDORNAME);
+
+   return f;
+}
+
+
+int main(int argc, char **argv)
+{
+   int family, model, stepping;
+   char *arch, *gccarch, *gccsimd, *iccarch, *icctune, *iccsimd;
+
+   int cpu_id = x86cpucaps_cpumodel();
+   char vendorname[LEN_VENDORNAME];
+   int vendor_id = x86cpucaps_vendor(vendorname);
+   struct simdcaps simd;
+
+   float gccver = 999.;
+   float iccver = 999.;
+
+   if (argc > 1)
+     gccver = atof(argv[1]);
+   if (argc > 2)
+     iccver = atof(argv[2]);
+
+   family   = (cpu_id & 0xf00) >> 8;
+   model    = (cpu_id & 0x0f0) >> 4;
+   stepping =  cpu_id & 0x00f;
+
+   switch (vendor_id)
+   {
+
+   case VENDOR_INTEL:
+     if (family == 4)
+     {
+       arch = "i486";
+     }
+     else if (family == 5)
+     {
+       if (model < 4) arch = "pentium";
+       else arch = "pentium-mmx";
+     }
+     else if (family == 6)
+     {
+       if (model <= 1) arch = "pentiumpro";
+       else if (model < 7) arch = "pentium2";
+       else if (model == 7) arch = "pentium3";
+       else if (model < 15) arch = "pentium-m";
+       else if (model == 15)
+       {
+         if (stepping < 6) arch = "core";
+         else arch = "core2";
+       }
+
+     }
+     else if (family > 6)
+     { /* family == 15 */
+       arch = "pentium4";
+     }
+     else
+     {
+       arch = "i386";
+     }
+     break;
+
+   case VENDOR_AMD:
+     if (family == 4)
+     {
+       if (model <= 9) arch = "i486";
+       else arch = "i586";
+     }
+     else if (family == 5)
+     {
+       if (model <= 3) arch = "i586";
+       else if (model <= 7) arch = "k6";
+       else if (model == 8) arch = "k6-2";
+       else arch = "k6-3";
+     }
+     else if (family == 6)
+     {
+       if (model <= 3) arch = "athlon";
+       else if (model == 4) arch = "athlon-tbird";
+       else arch = "athlon-xp";
+     }
+     else if (family > 6)
+     {
+       arch = "k8";
+     }
+     else
+     {
+       arch = "unknown";
+     }
+     break;
+
+   case VENDOR_CYRIX:
+     if (family == 4) arch = "i586";
+     else if (family == 5) arch = "i586";
+     else if (family == 6) arch = "i686";
+     else arch = "unknown";
+     break;
+
+   case VENDOR_CENTAUR:
+     if (family == 5) arch = "i586";
+     else arch = "unknown";
+     break;
+
+   case VENDOR_TRANSMETA:
+     arch = "i686";
+     break;
+
+   default:
+     arch = "unknown";
+     break;
+
+   }
+
+   /* some targets not supported by older gcc */
+   gccarch = arch;
+   if (gccver < (float)4.3)
+   {
+     if (!strcmp(gccarch, "core2")) gccarch = "pentium3";
+   }
+   if (gccver < (float)3.4)
+   {
+     if (!strcmp(gccarch, "k8")) gccarch = "athlon-xp";
+   }
+   if (gccver < (float)3.1)
+   {
+     if (strstr(gccarch, "athlon-") != NULL)
+       gccarch = "athlon";
+     else if (strstr(gccarch, "k6-") != NULL)
+       gccarch = "k6";
+     else if (!strcmp(gccarch, "pentium-mmx"))
+       gccarch = "pentium";
+     else if (!strcmp(gccarch, "pentium2")
+     || !strcmp(gccarch, "pentium3")
+     || !strcmp(gccarch, "pentium4"))
+       gccarch = "pentiumpro";
+   }
+
+   if (gccver < (float)3.0)
+   {
+     if (!strcmp(gccarch, "athlon"))
+       gccarch = "pentiumpro";
+     else if (!strcmp(gccarch, "k6"))
+       gccarch = "pentium";
+   }
+
+   if (gccver < (float)2.9)
+   {
+     if (!strcmp(gccarch, "pentiumpro"))
+       gccarch = "i686";
+     else if (!strcmp(gccarch, "pentium"))
+       gccarch = "i586";
+   }
+
+   /* SIMD options */
+   x86cpucaps_simd(&simd);
+   gccsimd = "";
+   if (gccver >= 3.1) {
+     if ( simd.has_3dnow || simd.has_3dnowext )
+       gccsimd = "-m3dnow";
+     else
+     {
+       if (gccver >= 4.3)
+       {
+         if (simd.has_sse41 || simd.has_sse42) gccsimd = "-msse4 -mfpmath=sse";
+         else if (simd.has_ssse3) gccsimd = "-mssse3 -mfpmath=sse";
+       }
+       else if ( gccver >= 3.4 && simd.has_sse3 ) gccsimd = "-msse3 -mfpmath=sse";
+       else if ( simd.has_sse2 ) gccsimd = "-msse2 -mfpmath=sse";
+       else if ( simd.has_sse ) gccsimd = "-msse";
+       else if ( simd.has_mmx ) gccsimd = "-mmmx";
+     }
+   }
+
+   /* intelc options */
+   iccarch = arch;
+   icctune = arch;
+   iccsimd = "";
+   if (simd.has_sse41 || simd.has_sse42) iccsimd = "-xS";
+   else if (simd.has_ssse3) iccsimd = "-xT";
+   else if (simd.has_sse3) iccsimd = "-msse3 -xP";
+   else if (simd.has_sse2) iccsimd = "-msse2";
+
+   printf("%s\n", arch);
+   if (gccver >= 4.2) gccarch = "native";
+   printf("-march=%s -mtune=%s %s\n", gccarch, gccarch, gccsimd);
+   printf("-march=%s -mtune=%s %s\n", iccarch, icctune, iccsimd);
+   return 0;
+}