tools/cpuflags.c
author Radek Brich <radek.brich@devl.cz>
Sat, 12 Apr 2008 02:02:45 +0200 (2008-04-12)
branchpyrit
changeset 68 2c154aad7f33
child 70 4b84e90325c5
permissions -rw-r--r--
added detection of optimal CPU flags for both GCC and IntelC
/*
 * cpuflags
 *
 * Simple tool which detects CPU capabilities
 * and outputs appropriate compiler flags.
 *
 * Usage:
 *  cpuflags [gcc version] [intelc version]
 *
 * Returns:
 *  [arch]
 *  [gcc flags]
 *  [intelc flags]
 *
 * The gcc/intelc version must be passed as floating point value,
 * e.g. 4.23
 *
 * Copyright (C) 2008  Radek Brich <radek@brich.org>
 *
 * Based on x86cpucaps
 * by Osamu Kayasono <jacobi@jcom.home.ne.jp>
 * http://members.jcom.home.ne.jp/jacobi/linux/softwares.html
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define LEN_VENDORNAME 13

#define VENDOR_INTEL     1
#define VENDOR_AMD       2
#define VENDOR_CYRIX     3
#define VENDOR_CENTAUR   4
#define VENDOR_TRANSMETA 5
#define VENDOR_OTHERS    0

struct simdcaps
{
   unsigned int has_mmx;
   unsigned int has_sse;
   unsigned int has_sse2;
   unsigned int has_sse3;
   unsigned int has_ssse3;
   unsigned int has_sse41;
   unsigned int has_sse42;
   unsigned int has_mmxext;
   unsigned int has_3dnowext;
   unsigned int has_3dnow;
};

/* CPU caps */
#define FLAG_MMX      (1<<23)
#define FLAG_SSE      (1<<25)
#define FLAG_SSE2     (1<<26)

/* CPU caps 2 */
#define FLAG_SSE3     (1<<0)
#define FLAG_SSSE3    (1<<9)
#define FLAG_SSE41    (1<<19)
#define FLAG_SSE42    (1<<20)

/* AMD CPU caps */
#define FLAG_MMXEXT   (1<<22)
#define FLAG_3DNOWEXT (1<<30)
#define FLAG_3DNOW    (1<<31)


/* cpuid, from kernel source ( linux/include/asm-i386/processor.h ) */
inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
{
   __asm__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op));
}

/*
 * check SIMD capabilities
 */
int x86cpucaps_simd(struct simdcaps *simd)
{
   int ex[4];

   memset(&(*simd),0,sizeof(struct simdcaps));

   /* check CPU has CPUID */
   cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( ex[0] < 1) return 1;

   cpuid(0x80000001,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( (ex[3] & FLAG_MMXEXT ) == FLAG_MMXEXT) simd->has_mmxext = 1;
   if ( (ex[3] & FLAG_3DNOW ) == FLAG_3DNOW) simd->has_3dnow = 1;
   if ( (ex[3] & FLAG_3DNOWEXT ) == FLAG_3DNOWEXT) simd->has_3dnowext = 1;

   cpuid(1,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( (ex[3] & FLAG_MMX  ) == FLAG_MMX ) simd->has_mmx = 1;
   if ( (ex[3] & FLAG_SSE  ) == FLAG_SSE ) simd->has_sse = 1;
   if ( (ex[3] & FLAG_SSE2 ) == FLAG_SSE2) simd->has_sse2 = 1;
   if ( (ex[2] & FLAG_SSE3  ) == FLAG_SSE3 ) simd->has_sse3 = 1;
   if ( (ex[2] & FLAG_SSSE3  ) == FLAG_SSSE3 ) simd->has_ssse3 = 1;
   if ( (ex[2] & FLAG_SSE41  ) == FLAG_SSE41 ) simd->has_sse41 = 1;
   if ( (ex[2] & FLAG_SSE42  ) == FLAG_SSE42 ) simd->has_sse42 = 1;

   /* SSE CPU supports mmxext too */
   if (simd->has_sse == 1) simd->has_mmxext = 1;

   return 0;
}

/*
 * check CPU Family-Model-Stepping
 */
int x86cpucaps_cpumodel()
{
   int ex[4];
   int f = 0;

   /* check CPU has CPUID */
   cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( ex[0] < 1) return f;

   cpuid(1,&ex[0],&ex[1],&ex[2],&ex[3]);
   f = ex[0] & 0x0fff;

   return f;
}

/*
 * check CPU Vendor
 */
int x86cpucaps_vendor(char *vendorname)
{
   int ex[4];
   int f = 0;
   char vendorstr[LEN_VENDORNAME];

   /* check CPU has CPUID */
   cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( ex[0] < 1) return f;

   /* read Vendor Strings */
   vendorstr[0] =  ex[1] & 0xff;
   vendorstr[1] =  (ex[1] >> 8) & 0xff;
   vendorstr[2] = (ex[1] >> 16) & 0xff;
   vendorstr[3] = (ex[1] >> 24) & 0xff;
   vendorstr[4] =  ex[3] & 0xff;
   vendorstr[5] = (ex[3] >> 8) & 0xff;
   vendorstr[6] = (ex[3] >> 16) & 0xff;
   vendorstr[7] = (ex[3] >> 24) & 0xff;
   vendorstr[8] =  ex[2] & 0xff;
   vendorstr[9] = (ex[2] >> 8) & 0xff;
   vendorstr[10]= (ex[2] >> 16) & 0xff;
   vendorstr[11]= (ex[2] >> 24) & 0xff;
   vendorstr[12]= '\0';

   if ( strcmp(vendorstr, "GenuineIntel") == 0 )
     f = VENDOR_INTEL;
   else if ( strcmp(vendorstr, "AuthenticAMD") == 0 )
     f = VENDOR_AMD;
   else if ( strcmp(vendorstr, "CyrixInstead") == 0 )
     f = VENDOR_CYRIX;
   else if ( strcmp(vendorstr, "CentaurHauls") == 0 )
     f = VENDOR_CENTAUR;
   else if ( strcmp(vendorstr, "GenuineTMx86") == 0 )
     f = VENDOR_TRANSMETA;

   strncpy(vendorname, vendorstr, LEN_VENDORNAME);

   return f;
}


int main(int argc, char **argv)
{
   int family, model, stepping;
   char *arch, *gccarch, *gccsimd, *iccarch, *icctune, *iccsimd;

   int cpu_id = x86cpucaps_cpumodel();
   char vendorname[LEN_VENDORNAME];
   int vendor_id = x86cpucaps_vendor(vendorname);
   struct simdcaps simd;

   float gccver = 999.;
   float iccver = 999.;

   if (argc > 1)
     gccver = atof(argv[1]);
   if (argc > 2)
     iccver = atof(argv[2]);

   family   = (cpu_id & 0xf00) >> 8;
   model    = (cpu_id & 0x0f0) >> 4;
   stepping =  cpu_id & 0x00f;

   switch (vendor_id)
   {

   case VENDOR_INTEL:
     if (family == 4)
     {
       arch = "i486";
     }
     else if (family == 5)
     {
       if (model < 4) arch = "pentium";
       else arch = "pentium-mmx";
     }
     else if (family == 6)
     {
       if (model <= 1) arch = "pentiumpro";
       else if (model < 7) arch = "pentium2";
       else if (model == 7) arch = "pentium3";
       else if (model < 15) arch = "pentium-m";
       else if (model == 15)
       {
         if (stepping < 6) arch = "core";
         else arch = "core2";
       }

     }
     else if (family > 6)
     { /* family == 15 */
       arch = "pentium4";
     }
     else
     {
       arch = "i386";
     }
     break;

   case VENDOR_AMD:
     if (family == 4)
     {
       if (model <= 9) arch = "i486";
       else arch = "i586";
     }
     else if (family == 5)
     {
       if (model <= 3) arch = "i586";
       else if (model <= 7) arch = "k6";
       else if (model == 8) arch = "k6-2";
       else arch = "k6-3";
     }
     else if (family == 6)
     {
       if (model <= 3) arch = "athlon";
       else if (model == 4) arch = "athlon-tbird";
       else arch = "athlon-xp";
     }
     else if (family > 6)
     {
       arch = "k8";
     }
     else
     {
       arch = "unknown";
     }
     break;

   case VENDOR_CYRIX:
     if (family == 4) arch = "i586";
     else if (family == 5) arch = "i586";
     else if (family == 6) arch = "i686";
     else arch = "unknown";
     break;

   case VENDOR_CENTAUR:
     if (family == 5) arch = "i586";
     else arch = "unknown";
     break;

   case VENDOR_TRANSMETA:
     arch = "i686";
     break;

   default:
     arch = "unknown";
     break;

   }

   /* some targets not supported by older gcc */
   gccarch = arch;
   if (gccver < (float)4.3)
   {
     if (!strcmp(gccarch, "core2")) gccarch = "pentium3";
   }
   if (gccver < (float)3.4)
   {
     if (!strcmp(gccarch, "k8")) gccarch = "athlon-xp";
   }
   if (gccver < (float)3.1)
   {
     if (strstr(gccarch, "athlon-") != NULL)
       gccarch = "athlon";
     else if (strstr(gccarch, "k6-") != NULL)
       gccarch = "k6";
     else if (!strcmp(gccarch, "pentium-mmx"))
       gccarch = "pentium";
     else if (!strcmp(gccarch, "pentium2")
     || !strcmp(gccarch, "pentium3")
     || !strcmp(gccarch, "pentium4"))
       gccarch = "pentiumpro";
   }

   if (gccver < (float)3.0)
   {
     if (!strcmp(gccarch, "athlon"))
       gccarch = "pentiumpro";
     else if (!strcmp(gccarch, "k6"))
       gccarch = "pentium";
   }

   if (gccver < (float)2.9)
   {
     if (!strcmp(gccarch, "pentiumpro"))
       gccarch = "i686";
     else if (!strcmp(gccarch, "pentium"))
       gccarch = "i586";
   }

   /* SIMD options */
   x86cpucaps_simd(&simd);
   gccsimd = "";
   if (gccver >= 3.1) {
     if ( simd.has_3dnow || simd.has_3dnowext )
       gccsimd = "-m3dnow";
     else
     {
       if (gccver >= 4.3)
       {
         if (simd.has_sse41 || simd.has_sse42) gccsimd = "-msse4 -mfpmath=sse";
         else if (simd.has_ssse3) gccsimd = "-mssse3 -mfpmath=sse";
       }
       else if ( gccver >= 3.4 && simd.has_sse3 ) gccsimd = "-msse3 -mfpmath=sse";
       else if ( simd.has_sse2 ) gccsimd = "-msse2 -mfpmath=sse";
       else if ( simd.has_sse ) gccsimd = "-msse";
       else if ( simd.has_mmx ) gccsimd = "-mmmx";
     }
   }

   /* intelc options */
   iccarch = arch;
   icctune = arch;
   iccsimd = "";
   if (simd.has_sse41 || simd.has_sse42) iccsimd = "-xS";
   else if (simd.has_ssse3) iccsimd = "-xT";
   else if (simd.has_sse3) iccsimd = "-msse3 -xP";
   else if (simd.has_sse2) iccsimd = "-msse2";

   printf("%s\n", arch);
   if (gccver >= 4.2) gccarch = "native";
   printf("-march=%s -mtune=%s %s\n", gccarch, gccarch, gccsimd);
   printf("-march=%s -mtune=%s %s\n", iccarch, icctune, iccsimd);
   return 0;
}