如何检查CPU是否支持SSE3指令集?
以下代码是否有效,以检查CPU是否支持SSE3指令集?
使用IsProcessorFeaturePresent()
函数显然在Windows XP上不起作用(请参阅http://msdn.microsoft.com/zh-cn/library/ms724482(v=vs.85).aspx )。
bool CheckSSE3() { int CPUInfo[4] = {-1}; //-- Get number of valid info ids __cpuid(CPUInfo, 0); int nIds = CPUInfo[0]; //-- Get info for id "1" if (nIds >= 1) { __cpuid(CPUInfo, 1); bool bSSE3NewInstructions = (CPUInfo[2] & 0x1) || false; return bSSE3NewInstructions; } return false; }
我创build了一个GitHub repro,它将检测所有主要的x86 ISA扩展的CPU和OS支持: https : //github.com/Mysticial/FeatureDetector
这是一个较短的版本:
首先你需要访问CPUID指令:
#ifdef _WIN32 // Windows #define cpuid(info, x) __cpuidex(info, x, 0) #else // GCC Intrinsics #include <cpuid.h> void cpuid(int info[4], int InfoType){ __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]); } #endif
然后你可以运行下面的代码:
// Misc. bool HW_MMX; bool HW_x64; bool HW_ABM; // Advanced Bit Manipulation bool HW_RDRAND; bool HW_BMI1; bool HW_BMI2; bool HW_ADX; bool HW_PREFETCHWT1; // SIMD: 128-bit bool HW_SSE; bool HW_SSE2; bool HW_SSE3; bool HW_SSSE3; bool HW_SSE41; bool HW_SSE42; bool HW_SSE4a; bool HW_AES; bool HW_SHA; // SIMD: 256-bit bool HW_AVX; bool HW_XOP; bool HW_FMA3; bool HW_FMA4; bool HW_AVX2; // SIMD: 512-bit bool HW_AVX512F; // AVX512 Foundation bool HW_AVX512CD; // AVX512 Conflict Detection bool HW_AVX512PF; // AVX512 Prefetch bool HW_AVX512ER; // AVX512 Exponential + Reciprocal bool HW_AVX512VL; // AVX512 Vector Length Extensions bool HW_AVX512BW; // AVX512 Byte + Word bool HW_AVX512DQ; // AVX512 Doubleword + Quadword bool HW_AVX512IFMA; // AVX512 Integer 52-bit Fused Multiply-Add bool HW_AVX512VBMI; // AVX512 Vector Byte Manipulation Instructions int info[4]; cpuid(info, 0); int nIds = info[0]; cpuid(info, 0x80000000); unsigned nExIds = info[0]; // Detect Features if (nIds >= 0x00000001){ cpuid(info,0x00000001); HW_MMX = (info[3] & ((int)1 << 23)) != 0; HW_SSE = (info[3] & ((int)1 << 25)) != 0; HW_SSE2 = (info[3] & ((int)1 << 26)) != 0; HW_SSE3 = (info[2] & ((int)1 << 0)) != 0; HW_SSSE3 = (info[2] & ((int)1 << 9)) != 0; HW_SSE41 = (info[2] & ((int)1 << 19)) != 0; HW_SSE42 = (info[2] & ((int)1 << 20)) != 0; HW_AES = (info[2] & ((int)1 << 25)) != 0; HW_AVX = (info[2] & ((int)1 << 28)) != 0; HW_FMA3 = (info[2] & ((int)1 << 12)) != 0; HW_RDRAND = (info[2] & ((int)1 << 30)) != 0; } if (nIds >= 0x00000007){ cpuid(info,0x00000007); HW_AVX2 = (info[1] & ((int)1 << 5)) != 0; HW_BMI1 = (info[1] & ((int)1 << 3)) != 0; HW_BMI2 = (info[1] & ((int)1 << 8)) != 0; HW_ADX = (info[1] & ((int)1 << 19)) != 0; HW_SHA = (info[1] & ((int)1 << 29)) != 0; HW_PREFETCHWT1 = (info[2] & ((int)1 << 0)) != 0; HW_AVX512F = (info[1] & ((int)1 << 16)) != 0; HW_AVX512CD = (info[1] & ((int)1 << 28)) != 0; HW_AVX512PF = (info[1] & ((int)1 << 26)) != 0; HW_AVX512ER = (info[1] & ((int)1 << 27)) != 0; HW_AVX512VL = (info[1] & ((int)1 << 31)) != 0; HW_AVX512BW = (info[1] & ((int)1 << 30)) != 0; HW_AVX512DQ = (info[1] & ((int)1 << 17)) != 0; HW_AVX512IFMA = (info[1] & ((int)1 << 21)) != 0; HW_AVX512VBMI = (info[2] & ((int)1 << 1)) != 0; } if (nExIds >= 0x80000001){ cpuid(info,0x80000001); HW_x64 = (info[3] & ((int)1 << 29)) != 0; HW_ABM = (info[2] & ((int)1 << 5)) != 0; HW_SSE4a = (info[2] & ((int)1 << 6)) != 0; HW_FMA4 = (info[2] & ((int)1 << 16)) != 0; HW_XOP = (info[2] & ((int)1 << 11)) != 0; }
请注意,这只能检测CPU是否支持这些指令。 要真正运行它们,还需要有操作系统支持。
具体而言,操作系统支持需要:
- x64指令。 (你需要一个64位的操作系统。)
- 使用(AVX)256位
ymm
寄存器的ymm
。 见安迪Lutomirski的答案如何检测到这一点。 - 使用(AVX512)512位
zmm
和掩码寄存器的zmm
。 检测操作系统对AVX512的支持与AVX相同,但使用标志0xe6
而不是0x6
。
Mysticial的答案有点危险 – 它解释了如何检测CPU支持,但不支持OS。 您需要使用_xgetbv来检查操作系统是否启用了所需的CPU扩展状态。 看到这里的另一个来源。 甚至海湾合作委员会也犯了同样的错误。 代码的肉是:
bool avxSupported = false; int cpuInfo[4]; __cpuid(cpuInfo, 1); bool osUsesXSAVE_XRSTORE = cpuInfo[2] & (1 << 27) || false; bool cpuAVXSuport = cpuInfo[2] & (1 << 28) || false; if (osUsesXSAVE_XRSTORE && cpuAVXSuport) { unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); avxSupported = (xcrFeatureMask & 0x6) == 0x6; }
经过相当多的search,我也find了来自英特尔的解决scheme:
链接: https : //software.intel.com/zh-cn/articles/how-to-detect-new-instruction-support-in-the-th-generation-intel-core-processor-family
void cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd) { #if defined(_MSC_VER) __cpuidex((int*)abcd, eax, ecx); #else uint32_t ebx, edx; # if defined( __i386__ ) && defined ( __PIC__ ) /* in case of PIC under 32-bit EBX cannot be clobbered */ __asm__("movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D" (ebx), # else __asm__("cpuid" : "+b" (ebx), # endif "+a" (eax), "+c" (ecx), "=d" (edx)); abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx; #endif } int check_xcr0_ymm() { uint32_t xcr0; #if defined(_MSC_VER) xcr0 = (uint32_t)_xgetbv(0); /* min VS2010 SP1 compiler is required */ #else __asm__("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); #endif return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */ }
另外请注意,GCC有一些可以使用的特殊内在函数(参见http://gcc.gnu.org/onlinedocs/gcc/X86-Built-in-Functions.html ):
if (__builtin_cpu_supports("avx2")) // ...
如果你把这些与上面的信息放在一起,那一切都会好的。