Actual source code: sseenabled.c


  2: #include <petscsys.h>

  4: #if defined(PETSC_HAVE_SSE)

  6: #include PETSC_HAVE_SSE
  7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */

  9: PetscErrorCode  PetscSSEHardwareTest(PetscBool  *flag)
 10: {
 11:   char           vendor[13];
 12:   char           Intel[13]="GenuineIntel";
 13:   char           AMD[13]  ="AuthenticAMD";
 14:   char           Hygon[13]="HygonGenuine";
 15:   PetscBool      flg;

 17:   PetscStrncpy(vendor,"************",sizeof(vendor));
 18:   CPUID_GET_VENDOR(vendor);
 19:   PetscStrcmp(vendor,Intel,&flg);
 20:   if (!flg) PetscStrcmp(vendor,AMD,&flg);
 21:   if (!flg) {PetscStrcmp(vendor,Hygon,&flg);
 22:     if (flg) {
 23:     /* Intel, AMD, and Hygon use bit 25 of CPUID_FEATURES */
 24:     /* to denote availability of SSE Support */
 25:     unsigned long myeax,myebx,myecx,myedx;
 26:     CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
 27:     if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
 28:     else *flag = PETSC_FALSE;
 29:   }
 30:   return 0;
 31: }

 33: #if defined(PETSC_HAVE_FORK)
 34: #include <signal.h>
 35: /*
 36:    Early versions of the Linux kernel disables SSE hardware because
 37:    it does not know how to preserve the SSE state at a context switch.
 38:    To detect this feature, try an sse instruction in another process.
 39:    If it works, great!  If not, an illegal instruction signal will be thrown,
 40:    so catch it and return an error code.
 41: */
 42: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)

 44: static void PetscSSEDisabledHandler(int sig)
 45: {
 46:   signal(SIGILL,SIG_IGN);
 47:   exit(-1);
 48: }

 50: PetscErrorCode  PetscSSEOSEnabledTest_Linux(PetscBool  *flag)
 51: {
 52:   int status, pid = 0;

 54:   signal(SIGILL,PetscSSEDisabledHandler);
 55:   pid = fork();
 56:   if (pid==0) {
 57:     SSE_SCOPE_BEGIN;
 58:     XOR_PS(XMM0,XMM0);
 59:     SSE_SCOPE_END;
 60:     exit(0);
 61:   } else wait(&status);
 62:   if (!status) *flag = PETSC_TRUE;
 63:   else *flag = PETSC_FALSE;
 64:   return 0;
 65: }

 67: #else
 68: /*
 69:    Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
 70:    Windows ME/2000 doesn't disable SSE Hardware
 71: */
 72: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
 73: #endif

 75: PetscErrorCode  PetscSSEOSEnabledTest_TRUE(PetscBool  *flag)
 76: {
 77:   if (flag) *flag = PETSC_TRUE;
 78:   return 0;
 79: }

 81: #else  /* Not defined PETSC_HAVE_SSE */

 83: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
 84: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)

 86: PetscErrorCode  PetscSSEEnabledTest_FALSE(PetscBool  *flag)
 87: {
 88:   if (flag) *flag = PETSC_FALSE;
 89:   return 0;
 90: }

 92: #endif /* defined PETSC_HAVE_SSE */

 94: /*@C
 95:      PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
 96:      set can be used.  Some operating systems do not allow the use of these instructions despite
 97:      hardware availability.

 99:      Collective

101:      Input Parameter:
102: .    comm - the MPI Communicator

104:      Output Parameters:
105: +    lflag - Local Flag:  PETSC_TRUE if enabled in this process
106: -    gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm

108:      Notes:
109:      NULL can be specified for lflag or gflag if either of these values are not desired.

111:      Options Database Keys:
112: .    -disable_sse - Disable use of hand tuned Intel SSE implementations

114:      Level: developer
115: @*/
116: static PetscBool petsc_sse_local_is_untested  = PETSC_TRUE;
117: static PetscBool petsc_sse_enabled_local      = PETSC_FALSE;
118: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
119: static PetscBool petsc_sse_enabled_global     = PETSC_FALSE;
120: PetscErrorCode  PetscSSEIsEnabled(MPI_Comm comm,PetscBool  *lflag,PetscBool  *gflag)
121: {
122:   PetscBool      disabled_option;

124:   if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
125:     disabled_option = PETSC_FALSE;

127:     PetscOptionsGetBool(NULL,NULL,"-disable_sse",&disabled_option,NULL);
128:     if (disabled_option) {
129:       petsc_sse_local_is_untested  = PETSC_FALSE;
130:       petsc_sse_enabled_local      = PETSC_FALSE;
131:       petsc_sse_global_is_untested = PETSC_FALSE;
132:       petsc_sse_enabled_global     = PETSC_FALSE;
133:     }

135:     if (petsc_sse_local_is_untested) {
136:       PetscSSEHardwareTest(&petsc_sse_enabled_local);
137:       if (petsc_sse_enabled_local) {
138:         PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
139:       }
140:       petsc_sse_local_is_untested = PETSC_FALSE;
141:     }

143:     if (gflag && petsc_sse_global_is_untested) {
144:       MPIU_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPIU_BOOL,MPI_LAND,comm);

146:       petsc_sse_global_is_untested = PETSC_FALSE;
147:     }
148:   }

150:   if (lflag) *lflag = petsc_sse_enabled_local;
151:   if (gflag) *gflag = petsc_sse_enabled_global;
152:   return 0;
153: }