tools/cpuflags.c
author Radek Brich <radek.brich@devl.cz>
Tue, 06 May 2008 09:39:58 +0200
branchpyrit
changeset 93 96d65f841791
parent 70 4b84e90325c5
child 103 3b3257a410fe
permissions -rw-r--r--
more build script tuning make all float constants single precision solve many warnings from msvc and gcc with various -W... flags add common.cc file for dbgmsg() function witch apparently cannot be inlined fix python module building with msvc, add manifest file handling remove forgotten RenderrowData class add stanford models download script for windows (.bat)

/*
 * cpuflags
 *
 * Simple tool which detects CPU capabilities
 * and outputs appropriate compiler flags.
 *
 * Usage:
 *  cpuflags [gcc version] [intelc version]
 *
 * Returns:
 *  [arch]
 *  [gcc flags]
 *  [intelc flags]
 *
 * The gcc/intelc version must be passed as floating point value,
 * e.g. 4.23
 *
 * Copyright (C) 2008  Radek Brich <radek@brich.org>
 *
 * Based on x86cpucaps
 * by Osamu Kayasono <jacobi@jcom.home.ne.jp>
 * http://members.jcom.home.ne.jp/jacobi/linux/softwares.html
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define LEN_VENDORNAME 13

#define VENDOR_INTEL     1
#define VENDOR_AMD       2
#define VENDOR_CYRIX     3
#define VENDOR_CENTAUR   4
#define VENDOR_TRANSMETA 5
#define VENDOR_OTHERS    0

struct simdcaps
{
   unsigned int has_mmx;
   unsigned int has_sse;
   unsigned int has_sse2;
   unsigned int has_sse3;
   unsigned int has_ssse3;
   unsigned int has_sse41;
   unsigned int has_sse42;
   unsigned int has_mmxext;
   unsigned int has_3dnowext;
   unsigned int has_3dnow;
};

/* CPU caps */
#define FLAG_MMX      (1<<23)
#define FLAG_SSE      (1<<25)
#define FLAG_SSE2     (1<<26)

/* CPU caps 2 */
#define FLAG_SSE3     (1<<0)
#define FLAG_SSSE3    (1<<9)
#define FLAG_SSE41    (1<<19)
#define FLAG_SSE42    (1<<20)

/* AMD CPU caps */
#define FLAG_MMXEXT   (1<<22)
#define FLAG_3DNOWEXT (1<<30)
#define FLAG_3DNOW    (1<<31)


/* cpuid, from kernel source ( linux/include/asm-i386/processor.h ) */
inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
{
   __asm__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op));
}

/*
 * check SIMD capabilities
 */
int x86cpucaps_simd(struct simdcaps *simd)
{
   int ex[4];

   memset(&(*simd),0,sizeof(struct simdcaps));

   /* check CPU has CPUID */
   cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( ex[0] < 1) return 1;

   cpuid(0x80000001,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( (ex[3] & FLAG_MMXEXT ) == FLAG_MMXEXT) simd->has_mmxext = 1;
   if ( (ex[3] & FLAG_3DNOW ) == FLAG_3DNOW) simd->has_3dnow = 1;
   if ( (ex[3] & FLAG_3DNOWEXT ) == FLAG_3DNOWEXT) simd->has_3dnowext = 1;

   cpuid(1,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( (ex[3] & FLAG_MMX  ) == FLAG_MMX ) simd->has_mmx = 1;
   if ( (ex[3] & FLAG_SSE  ) == FLAG_SSE ) simd->has_sse = 1;
   if ( (ex[3] & FLAG_SSE2 ) == FLAG_SSE2) simd->has_sse2 = 1;
   if ( (ex[2] & FLAG_SSE3  ) == FLAG_SSE3 ) simd->has_sse3 = 1;
   if ( (ex[2] & FLAG_SSSE3  ) == FLAG_SSSE3 ) simd->has_ssse3 = 1;
   if ( (ex[2] & FLAG_SSE41  ) == FLAG_SSE41 ) simd->has_sse41 = 1;
   if ( (ex[2] & FLAG_SSE42  ) == FLAG_SSE42 ) simd->has_sse42 = 1;

   /* SSE CPU supports mmxext too */
   if (simd->has_sse == 1) simd->has_mmxext = 1;

   return 0;
}

/*
 * check CPU Family-Model-Stepping
 */
int x86cpucaps_cpumodel()
{
   int ex[4];
   int f = 0;

   /* check CPU has CPUID */
   cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( ex[0] < 1) return f;

   cpuid(1,&ex[0],&ex[1],&ex[2],&ex[3]);
   f = ex[0] & 0x0fff;

   return f;
}

/*
 * check CPU Vendor
 */
int x86cpucaps_vendor(char *vendorname)
{
   int ex[4];
   int f = 0;
   char vendorstr[LEN_VENDORNAME];

   /* check CPU has CPUID */
   cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( ex[0] < 1) return f;

   /* read Vendor Strings */
   vendorstr[0] =  ex[1] & 0xff;
   vendorstr[1] =  (ex[1] >> 8) & 0xff;
   vendorstr[2] = (ex[1] >> 16) & 0xff;
   vendorstr[3] = (ex[1] >> 24) & 0xff;
   vendorstr[4] =  ex[3] & 0xff;
   vendorstr[5] = (ex[3] >> 8) & 0xff;
   vendorstr[6] = (ex[3] >> 16) & 0xff;
   vendorstr[7] = (ex[3] >> 24) & 0xff;
   vendorstr[8] =  ex[2] & 0xff;
   vendorstr[9] = (ex[2] >> 8) & 0xff;
   vendorstr[10]= (ex[2] >> 16) & 0xff;
   vendorstr[11]= (ex[2] >> 24) & 0xff;
   vendorstr[12]= '\0';

   if ( strcmp(vendorstr, "GenuineIntel") == 0 )
     f = VENDOR_INTEL;
   else if ( strcmp(vendorstr, "AuthenticAMD") == 0 )
     f = VENDOR_AMD;
   else if ( strcmp(vendorstr, "CyrixInstead") == 0 )
     f = VENDOR_CYRIX;
   else if ( strcmp(vendorstr, "CentaurHauls") == 0 )
     f = VENDOR_CENTAUR;
   else if ( strcmp(vendorstr, "GenuineTMx86") == 0 )
     f = VENDOR_TRANSMETA;

   strncpy(vendorname, vendorstr, LEN_VENDORNAME);

   return f;
}


int main(int argc, char **argv)
{
   int family, model, stepping;
   char *arch, *gccarch, *gccsimd, *iccarch, *icctune, *iccsimd;

   int cpu_id = x86cpucaps_cpumodel();
   char vendorname[LEN_VENDORNAME];
   int vendor_id = x86cpucaps_vendor(vendorname);
   struct simdcaps simd;

   float gccver = 999.;
   float iccver = 999.;

   if (argc > 1)
     gccver = atof(argv[1]);
   if (argc > 2)
     iccver = atof(argv[2]);

   family   = (cpu_id & 0xf00) >> 8;
   model    = (cpu_id & 0x0f0) >> 4;
   stepping =  cpu_id & 0x00f;

   switch (vendor_id)
   {

   case VENDOR_INTEL:
     if (family == 4)
     {
       arch = "i486";
     }
     else if (family == 5)
     {
       if (model < 4) arch = "pentium";
       else arch = "pentium-mmx";
     }
     else if (family == 6)
     {
       if (model <= 1) arch = "pentiumpro";
       else if (model < 7) arch = "pentium2";
       else if (model < 12) arch = "pentium3";
       else if (model < 14) arch = "pentium-m";
       else if (model == 14) arch = "prescott"; // core
       else if (model == 15) arch = "core2";
     }
     else if (family > 6)
     { /* family == 15 */
       arch = "pentium4";
     }
     else
     {
       arch = "i386";
     }
     break;

   case VENDOR_AMD:
     if (family == 4)
     {
       if (model <= 9) arch = "i486";
       else arch = "i586";
     }
     else if (family == 5)
     {
       if (model <= 3) arch = "i586";
       else if (model <= 7) arch = "k6";
       else if (model == 8) arch = "k6-2";
       else arch = "k6-3";
     }
     else if (family == 6)
     {
       if (model <= 3) arch = "athlon";
       else if (model == 4) arch = "athlon-tbird";
       else arch = "athlon-xp";
     }
     else if (family > 6)
     {
       arch = "k8";
     }
     else
     {
       arch = "unknown";
     }
     break;

   case VENDOR_CYRIX:
     if (family == 4) arch = "i586";
     else if (family == 5) arch = "i586";
     else if (family == 6) arch = "i686";
     else arch = "unknown";
     break;

   case VENDOR_CENTAUR:
     if (family == 5) arch = "i586";
     else arch = "unknown";
     break;

   case VENDOR_TRANSMETA:
     arch = "i686";
     break;

   default:
     arch = "unknown";
     break;

   }

   /* some targets not supported by older gcc */
   gccarch = arch;
   if (gccver < (float)4.3)
   {
     if (!strcmp(gccarch, "core2")) gccarch = "pentium-m";
   }
   if (gccver < (float)3.41)
   {
     if (!strcmp(gccarch, "prescott")) gccarch = "pentium4";
     if (!strcmp(gccarch, "pentium-m")) gccarch = "pentium4";
   }
   if (gccver < (float)3.4)
   {
     if (!strcmp(gccarch, "k8")) gccarch = "athlon-xp";
   }
   if (gccver < (float)3.1)
   {
     if (strstr(gccarch, "athlon-") != NULL)
       gccarch = "athlon";
     else if (strstr(gccarch, "k6-") != NULL)
       gccarch = "k6";
     else if (!strcmp(gccarch, "pentium-mmx"))
       gccarch = "pentium";
     else if (!strcmp(gccarch, "pentium2")
     || !strcmp(gccarch, "pentium3")
     || !strcmp(gccarch, "pentium4"))
       gccarch = "pentiumpro";
   }
   if (gccver < (float)3.0)
   {
     if (!strcmp(gccarch, "athlon"))
       gccarch = "pentiumpro";
     else if (!strcmp(gccarch, "k6"))
       gccarch = "pentium";
   }
   if (gccver < (float)2.95)
   {
     if (!strcmp(gccarch, "pentiumpro"))
       gccarch = "i686";
     else if (!strcmp(gccarch, "pentium"))
       gccarch = "i586";
   }

   /* SIMD options */
   x86cpucaps_simd(&simd);
   gccsimd = "";
   if (gccver >= 3.1) {
     if ( simd.has_3dnow || simd.has_3dnowext )
       gccsimd = "-m3dnow";
     else
     {
       if (gccver >= 4.3)
       {
         if (simd.has_sse41 || simd.has_sse42) gccsimd = "-msse4 -mfpmath=sse";
         else if (simd.has_ssse3) gccsimd = "-mssse3 -mfpmath=sse";
       }
       else if ( gccver >= 3.4 && simd.has_sse3 ) gccsimd = "-msse3 -mfpmath=sse";
       else if ( simd.has_sse2 ) gccsimd = "-msse2 -mfpmath=sse";
       else if ( simd.has_sse ) gccsimd = "-msse";
       else if ( simd.has_mmx ) gccsimd = "-mmmx";
     }
   }

   /* IntelC options */
   iccarch = arch;
   icctune = arch;
   iccsimd = "";
   if (simd.has_sse41 || simd.has_sse42) iccsimd = "-xS";
   else if (simd.has_ssse3) iccsimd = "-xT";
   else if (simd.has_sse3) iccsimd = "-msse3 -xP";
   else if (simd.has_sse2) iccsimd = "-msse2";

   printf("%s\n", arch);

   /* GCC flags */
   if (gccver >= 4.2) gccarch = "native";
#ifdef __x86_64__
   /* do not output i386 flags on x86_64 */
   if (strcmp(gccarch, "core2") != 0 && strcmp(gccarch, "native") != 0)
     printf("%s\n", gccsimd);
   else
#endif
   printf("-march=%s %s\n", gccarch, gccsimd);

   /* IntelC flags */
   printf("-march=%s -mtune=%s %s\n", iccarch, icctune, iccsimd);

   return 0;
}