-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcpuinfo.cpp
108 lines (91 loc) · 2.73 KB
/
cpuinfo.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#include "infra/cpuinfo.h"
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)
#include <immintrin.h>
static int check_4th_gen_intel_core_features()
{
const int the_4th_gen_features = (_FEATURE_AVX2 | _FEATURE_FMA | _FEATURE_BMI | _FEATURE_LZCNT | _FEATURE_MOVBE);
return _may_i_use_cpu_feature(the_4th_gen_features);
}
#else /* non-Intel compiler */
#if defined (__arm__) || defined (__arm64__) || defined(__aarch64__)
static int check_4th_gen_intel_core_features()
{
return 0;
}
#else
#include <stdint.h>
#if defined(_MSC_VER)
#include <intrin.h>
#endif
static void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd)
{
#if defined(_MSC_VER)
__cpuidex(reinterpret_cast<int*>(abcd), eax, ecx);
#else
uint32_t ebx = 0, edx = 0;
#if defined(__i386__) && defined(__PIC__)
/* in case of PIC under 32-bit EBX cannot be clobbered */
__asm__("movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi"
: "=D"(ebx),
#else
__asm__("cpuid"
: "+b"(ebx),
#endif
"+a"(eax), "+c"(ecx), "=d"(edx));
abcd[0] = eax;
abcd[1] = ebx;
abcd[2] = ecx;
abcd[3] = edx;
#endif
}
static int check_xcr0_ymm()
{
uint32_t xcr0;
#if defined(_MSC_VER)
xcr0 = (uint32_t)_xgetbv(0); /* min VS2010 SP1 compiler is required */
#else
__asm__("xgetbv"
: "=a"(xcr0)
: "c"(0)
: "%edx");
#endif
return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */
}
static int check_4th_gen_intel_core_features()
{
uint32_t abcd[4];
uint32_t fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27));
uint32_t avx2_bmi12_mask = (1 << 5) | (1 << 3) | (1 << 8);
/* CPUID.(EAX=01H, ECX=0H):ECX.FMA[bit 12]==1 &&
CPUID.(EAX=01H, ECX=0H):ECX.MOVBE[bit 22]==1 &&
CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1 */
run_cpuid(1, 0, abcd);
if ((abcd[2] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask)
return 0;
if (!check_xcr0_ymm())
return 0;
/* CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1 &&
CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]==1 &&
CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]==1 */
run_cpuid(7, 0, abcd);
if ((abcd[1] & avx2_bmi12_mask) != avx2_bmi12_mask)
return 0;
/* CPUID.(EAX=80000001H):ECX.LZCNT[bit 5]==1 */
run_cpuid(0x80000001, 0, abcd);
if ((abcd[2] & (1 << 5)) == 0)
return 0;
return 1;
}
#endif
#endif
namespace inf::cpuinfo {
bool supports_avx2()
{
static int the_4th_gen_features_available = -1;
/* test is performed once */
if (the_4th_gen_features_available < 0) {
the_4th_gen_features_available = check_4th_gen_intel_core_features();
}
return the_4th_gen_features_available == 1;
}
}