tools/cpuflags.c
author Radek Brich <radek.brich@devl.cz>
Mon, 05 May 2008 15:31:14 +0200
branchpyrit
changeset 92 9af5c039b678
parent 70 4b84e90325c5
child 103 3b3257a410fe
permissions -rw-r--r--
add MSVC compiler support, make it default for Windows new header file simd.h for SSE abstraction and helpers add mselect pseudo instruction for common or(and(...), andnot(...)) replace many SSE intrinsics with new names new MemoryPool class (mempool.h) for faster KdNode allocation remove setMaxDepth() from Octree and KdTree, make max_depth const, it should be defined in constructor and never changed, change after building tree would cause error in traversal modify DefaultSampler to generate nice 2x2 packets of samples for packet tracing optimize Box and BBox::intersect_packet add precomputed invdir attribute to RayPacket scons build system: check for pthread library on Windows check for SDL generate include/config.h with variables detected by scons configuration move auxiliary files to build/ add sanity checks add writable operator[] to Vector

/*
 * cpuflags
 *
 * Simple tool which detects CPU capabilities
 * and outputs appropriate compiler flags.
 *
 * Usage:
 *  cpuflags [gcc version] [intelc version]
 *
 * Returns:
 *  [arch]
 *  [gcc flags]
 *  [intelc flags]
 *
 * The gcc/intelc version must be passed as floating point value,
 * e.g. 4.23
 *
 * Copyright (C) 2008  Radek Brich <radek@brich.org>
 *
 * Based on x86cpucaps
 * by Osamu Kayasono <jacobi@jcom.home.ne.jp>
 * http://members.jcom.home.ne.jp/jacobi/linux/softwares.html
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define LEN_VENDORNAME 13

#define VENDOR_INTEL     1
#define VENDOR_AMD       2
#define VENDOR_CYRIX     3
#define VENDOR_CENTAUR   4
#define VENDOR_TRANSMETA 5
#define VENDOR_OTHERS    0

struct simdcaps
{
   unsigned int has_mmx;
   unsigned int has_sse;
   unsigned int has_sse2;
   unsigned int has_sse3;
   unsigned int has_ssse3;
   unsigned int has_sse41;
   unsigned int has_sse42;
   unsigned int has_mmxext;
   unsigned int has_3dnowext;
   unsigned int has_3dnow;
};

/* CPU caps */
#define FLAG_MMX      (1<<23)
#define FLAG_SSE      (1<<25)
#define FLAG_SSE2     (1<<26)

/* CPU caps 2 */
#define FLAG_SSE3     (1<<0)
#define FLAG_SSSE3    (1<<9)
#define FLAG_SSE41    (1<<19)
#define FLAG_SSE42    (1<<20)

/* AMD CPU caps */
#define FLAG_MMXEXT   (1<<22)
#define FLAG_3DNOWEXT (1<<30)
#define FLAG_3DNOW    (1<<31)


/* cpuid, from kernel source ( linux/include/asm-i386/processor.h ) */
inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
{
   __asm__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op));
}

/*
 * check SIMD capabilities
 */
int x86cpucaps_simd(struct simdcaps *simd)
{
   int ex[4];

   memset(&(*simd),0,sizeof(struct simdcaps));

   /* check CPU has CPUID */
   cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( ex[0] < 1) return 1;

   cpuid(0x80000001,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( (ex[3] & FLAG_MMXEXT ) == FLAG_MMXEXT) simd->has_mmxext = 1;
   if ( (ex[3] & FLAG_3DNOW ) == FLAG_3DNOW) simd->has_3dnow = 1;
   if ( (ex[3] & FLAG_3DNOWEXT ) == FLAG_3DNOWEXT) simd->has_3dnowext = 1;

   cpuid(1,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( (ex[3] & FLAG_MMX  ) == FLAG_MMX ) simd->has_mmx = 1;
   if ( (ex[3] & FLAG_SSE  ) == FLAG_SSE ) simd->has_sse = 1;
   if ( (ex[3] & FLAG_SSE2 ) == FLAG_SSE2) simd->has_sse2 = 1;
   if ( (ex[2] & FLAG_SSE3  ) == FLAG_SSE3 ) simd->has_sse3 = 1;
   if ( (ex[2] & FLAG_SSSE3  ) == FLAG_SSSE3 ) simd->has_ssse3 = 1;
   if ( (ex[2] & FLAG_SSE41  ) == FLAG_SSE41 ) simd->has_sse41 = 1;
   if ( (ex[2] & FLAG_SSE42  ) == FLAG_SSE42 ) simd->has_sse42 = 1;

   /* SSE CPU supports mmxext too */
   if (simd->has_sse == 1) simd->has_mmxext = 1;

   return 0;
}

/*
 * check CPU Family-Model-Stepping
 */
int x86cpucaps_cpumodel()
{
   int ex[4];
   int f = 0;

   /* check CPU has CPUID */
   cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( ex[0] < 1) return f;

   cpuid(1,&ex[0],&ex[1],&ex[2],&ex[3]);
   f = ex[0] & 0x0fff;

   return f;
}

/*
 * check CPU Vendor
 */
int x86cpucaps_vendor(char *vendorname)
{
   int ex[4];
   int f = 0;
   char vendorstr[LEN_VENDORNAME];

   /* check CPU has CPUID */
   cpuid(0,&ex[0],&ex[1],&ex[2],&ex[3]);
   if ( ex[0] < 1) return f;

   /* read Vendor Strings */
   vendorstr[0] =  ex[1] & 0xff;
   vendorstr[1] =  (ex[1] >> 8) & 0xff;
   vendorstr[2] = (ex[1] >> 16) & 0xff;
   vendorstr[3] = (ex[1] >> 24) & 0xff;
   vendorstr[4] =  ex[3] & 0xff;
   vendorstr[5] = (ex[3] >> 8) & 0xff;
   vendorstr[6] = (ex[3] >> 16) & 0xff;
   vendorstr[7] = (ex[3] >> 24) & 0xff;
   vendorstr[8] =  ex[2] & 0xff;
   vendorstr[9] = (ex[2] >> 8) & 0xff;
   vendorstr[10]= (ex[2] >> 16) & 0xff;
   vendorstr[11]= (ex[2] >> 24) & 0xff;
   vendorstr[12]= '\0';

   if ( strcmp(vendorstr, "GenuineIntel") == 0 )
     f = VENDOR_INTEL;
   else if ( strcmp(vendorstr, "AuthenticAMD") == 0 )
     f = VENDOR_AMD;
   else if ( strcmp(vendorstr, "CyrixInstead") == 0 )
     f = VENDOR_CYRIX;
   else if ( strcmp(vendorstr, "CentaurHauls") == 0 )
     f = VENDOR_CENTAUR;
   else if ( strcmp(vendorstr, "GenuineTMx86") == 0 )
     f = VENDOR_TRANSMETA;

   strncpy(vendorname, vendorstr, LEN_VENDORNAME);

   return f;
}


int main(int argc, char **argv)
{
   int family, model, stepping;
   char *arch, *gccarch, *gccsimd, *iccarch, *icctune, *iccsimd;

   int cpu_id = x86cpucaps_cpumodel();
   char vendorname[LEN_VENDORNAME];
   int vendor_id = x86cpucaps_vendor(vendorname);
   struct simdcaps simd;

   float gccver = 999.;
   float iccver = 999.;

   if (argc > 1)
     gccver = atof(argv[1]);
   if (argc > 2)
     iccver = atof(argv[2]);

   family   = (cpu_id & 0xf00) >> 8;
   model    = (cpu_id & 0x0f0) >> 4;
   stepping =  cpu_id & 0x00f;

   switch (vendor_id)
   {

   case VENDOR_INTEL:
     if (family == 4)
     {
       arch = "i486";
     }
     else if (family == 5)
     {
       if (model < 4) arch = "pentium";
       else arch = "pentium-mmx";
     }
     else if (family == 6)
     {
       if (model <= 1) arch = "pentiumpro";
       else if (model < 7) arch = "pentium2";
       else if (model < 12) arch = "pentium3";
       else if (model < 14) arch = "pentium-m";
       else if (model == 14) arch = "prescott"; // core
       else if (model == 15) arch = "core2";
     }
     else if (family > 6)
     { /* family == 15 */
       arch = "pentium4";
     }
     else
     {
       arch = "i386";
     }
     break;

   case VENDOR_AMD:
     if (family == 4)
     {
       if (model <= 9) arch = "i486";
       else arch = "i586";
     }
     else if (family == 5)
     {
       if (model <= 3) arch = "i586";
       else if (model <= 7) arch = "k6";
       else if (model == 8) arch = "k6-2";
       else arch = "k6-3";
     }
     else if (family == 6)
     {
       if (model <= 3) arch = "athlon";
       else if (model == 4) arch = "athlon-tbird";
       else arch = "athlon-xp";
     }
     else if (family > 6)
     {
       arch = "k8";
     }
     else
     {
       arch = "unknown";
     }
     break;

   case VENDOR_CYRIX:
     if (family == 4) arch = "i586";
     else if (family == 5) arch = "i586";
     else if (family == 6) arch = "i686";
     else arch = "unknown";
     break;

   case VENDOR_CENTAUR:
     if (family == 5) arch = "i586";
     else arch = "unknown";
     break;

   case VENDOR_TRANSMETA:
     arch = "i686";
     break;

   default:
     arch = "unknown";
     break;

   }

   /* some targets not supported by older gcc */
   gccarch = arch;
   if (gccver < (float)4.3)
   {
     if (!strcmp(gccarch, "core2")) gccarch = "pentium-m";
   }
   if (gccver < (float)3.41)
   {
     if (!strcmp(gccarch, "prescott")) gccarch = "pentium4";
     if (!strcmp(gccarch, "pentium-m")) gccarch = "pentium4";
   }
   if (gccver < (float)3.4)
   {
     if (!strcmp(gccarch, "k8")) gccarch = "athlon-xp";
   }
   if (gccver < (float)3.1)
   {
     if (strstr(gccarch, "athlon-") != NULL)
       gccarch = "athlon";
     else if (strstr(gccarch, "k6-") != NULL)
       gccarch = "k6";
     else if (!strcmp(gccarch, "pentium-mmx"))
       gccarch = "pentium";
     else if (!strcmp(gccarch, "pentium2")
     || !strcmp(gccarch, "pentium3")
     || !strcmp(gccarch, "pentium4"))
       gccarch = "pentiumpro";
   }
   if (gccver < (float)3.0)
   {
     if (!strcmp(gccarch, "athlon"))
       gccarch = "pentiumpro";
     else if (!strcmp(gccarch, "k6"))
       gccarch = "pentium";
   }
   if (gccver < (float)2.95)
   {
     if (!strcmp(gccarch, "pentiumpro"))
       gccarch = "i686";
     else if (!strcmp(gccarch, "pentium"))
       gccarch = "i586";
   }

   /* SIMD options */
   x86cpucaps_simd(&simd);
   gccsimd = "";
   if (gccver >= 3.1) {
     if ( simd.has_3dnow || simd.has_3dnowext )
       gccsimd = "-m3dnow";
     else
     {
       if (gccver >= 4.3)
       {
         if (simd.has_sse41 || simd.has_sse42) gccsimd = "-msse4 -mfpmath=sse";
         else if (simd.has_ssse3) gccsimd = "-mssse3 -mfpmath=sse";
       }
       else if ( gccver >= 3.4 && simd.has_sse3 ) gccsimd = "-msse3 -mfpmath=sse";
       else if ( simd.has_sse2 ) gccsimd = "-msse2 -mfpmath=sse";
       else if ( simd.has_sse ) gccsimd = "-msse";
       else if ( simd.has_mmx ) gccsimd = "-mmmx";
     }
   }

   /* IntelC options */
   iccarch = arch;
   icctune = arch;
   iccsimd = "";
   if (simd.has_sse41 || simd.has_sse42) iccsimd = "-xS";
   else if (simd.has_ssse3) iccsimd = "-xT";
   else if (simd.has_sse3) iccsimd = "-msse3 -xP";
   else if (simd.has_sse2) iccsimd = "-msse2";

   printf("%s\n", arch);

   /* GCC flags */
   if (gccver >= 4.2) gccarch = "native";
#ifdef __x86_64__
   /* do not output i386 flags on x86_64 */
   if (strcmp(gccarch, "core2") != 0 && strcmp(gccarch, "native") != 0)
     printf("%s\n", gccsimd);
   else
#endif
   printf("-march=%s %s\n", gccarch, gccsimd);

   /* IntelC flags */
   printf("-march=%s -mtune=%s %s\n", iccarch, icctune, iccsimd);

   return 0;
}