1// Copyright 2018 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5//go:build 386 || amd64 || amd64p32
  6
  7package cpu
  8
  9import "runtime"
 10
 11const cacheLineSize = 64
 12
 13func initOptions() {
 14	options = []option{
 15		{Name: "adx", Feature: &X86.HasADX},
 16		{Name: "aes", Feature: &X86.HasAES},
 17		{Name: "avx", Feature: &X86.HasAVX},
 18		{Name: "avx2", Feature: &X86.HasAVX2},
 19		{Name: "avx512", Feature: &X86.HasAVX512},
 20		{Name: "avx512f", Feature: &X86.HasAVX512F},
 21		{Name: "avx512cd", Feature: &X86.HasAVX512CD},
 22		{Name: "avx512er", Feature: &X86.HasAVX512ER},
 23		{Name: "avx512pf", Feature: &X86.HasAVX512PF},
 24		{Name: "avx512vl", Feature: &X86.HasAVX512VL},
 25		{Name: "avx512bw", Feature: &X86.HasAVX512BW},
 26		{Name: "avx512dq", Feature: &X86.HasAVX512DQ},
 27		{Name: "avx512ifma", Feature: &X86.HasAVX512IFMA},
 28		{Name: "avx512vbmi", Feature: &X86.HasAVX512VBMI},
 29		{Name: "avx512vnniw", Feature: &X86.HasAVX5124VNNIW},
 30		{Name: "avx5124fmaps", Feature: &X86.HasAVX5124FMAPS},
 31		{Name: "avx512vpopcntdq", Feature: &X86.HasAVX512VPOPCNTDQ},
 32		{Name: "avx512vpclmulqdq", Feature: &X86.HasAVX512VPCLMULQDQ},
 33		{Name: "avx512vnni", Feature: &X86.HasAVX512VNNI},
 34		{Name: "avx512gfni", Feature: &X86.HasAVX512GFNI},
 35		{Name: "avx512vaes", Feature: &X86.HasAVX512VAES},
 36		{Name: "avx512vbmi2", Feature: &X86.HasAVX512VBMI2},
 37		{Name: "avx512bitalg", Feature: &X86.HasAVX512BITALG},
 38		{Name: "avx512bf16", Feature: &X86.HasAVX512BF16},
 39		{Name: "amxtile", Feature: &X86.HasAMXTile},
 40		{Name: "amxint8", Feature: &X86.HasAMXInt8},
 41		{Name: "amxbf16", Feature: &X86.HasAMXBF16},
 42		{Name: "bmi1", Feature: &X86.HasBMI1},
 43		{Name: "bmi2", Feature: &X86.HasBMI2},
 44		{Name: "cx16", Feature: &X86.HasCX16},
 45		{Name: "erms", Feature: &X86.HasERMS},
 46		{Name: "fma", Feature: &X86.HasFMA},
 47		{Name: "osxsave", Feature: &X86.HasOSXSAVE},
 48		{Name: "pclmulqdq", Feature: &X86.HasPCLMULQDQ},
 49		{Name: "popcnt", Feature: &X86.HasPOPCNT},
 50		{Name: "rdrand", Feature: &X86.HasRDRAND},
 51		{Name: "rdseed", Feature: &X86.HasRDSEED},
 52		{Name: "sse3", Feature: &X86.HasSSE3},
 53		{Name: "sse41", Feature: &X86.HasSSE41},
 54		{Name: "sse42", Feature: &X86.HasSSE42},
 55		{Name: "ssse3", Feature: &X86.HasSSSE3},
 56		{Name: "avxifma", Feature: &X86.HasAVXIFMA},
 57		{Name: "avxvnni", Feature: &X86.HasAVXVNNI},
 58		{Name: "avxvnniint8", Feature: &X86.HasAVXVNNIInt8},
 59
 60		// These capabilities should always be enabled on amd64:
 61		{Name: "sse2", Feature: &X86.HasSSE2, Required: runtime.GOARCH == "amd64"},
 62	}
 63}
 64
 65func archInit() {
 66
 67	// From internal/cpu
 68	const (
 69		// eax bits
 70		cpuid_AVXVNNI = 1 << 4
 71
 72		// ecx bits
 73		cpuid_SSE3            = 1 << 0
 74		cpuid_PCLMULQDQ       = 1 << 1
 75		cpuid_AVX512VBMI      = 1 << 1
 76		cpuid_AVX512VBMI2     = 1 << 6
 77		cpuid_SSSE3           = 1 << 9
 78		cpuid_AVX512GFNI      = 1 << 8
 79		cpuid_AVX512VAES      = 1 << 9
 80		cpuid_AVX512VNNI      = 1 << 11
 81		cpuid_AVX512BITALG    = 1 << 12
 82		cpuid_FMA             = 1 << 12
 83		cpuid_AVX512VPOPCNTDQ = 1 << 14
 84		cpuid_SSE41           = 1 << 19
 85		cpuid_SSE42           = 1 << 20
 86		cpuid_POPCNT          = 1 << 23
 87		cpuid_AES             = 1 << 25
 88		cpuid_OSXSAVE         = 1 << 27
 89		cpuid_AVX             = 1 << 28
 90
 91		// "Extended Feature Flag" bits returned in EBX for CPUID EAX=0x7 ECX=0x0
 92		cpuid_BMI1     = 1 << 3
 93		cpuid_AVX2     = 1 << 5
 94		cpuid_BMI2     = 1 << 8
 95		cpuid_ERMS     = 1 << 9
 96		cpuid_AVX512F  = 1 << 16
 97		cpuid_AVX512DQ = 1 << 17
 98		cpuid_ADX      = 1 << 19
 99		cpuid_AVX512CD = 1 << 28
100		cpuid_SHA      = 1 << 29
101		cpuid_AVX512BW = 1 << 30
102		cpuid_AVX512VL = 1 << 31
103
104		// "Extended Feature Flag" bits returned in ECX for CPUID EAX=0x7 ECX=0x0
105		cpuid_AVX512_VBMI      = 1 << 1
106		cpuid_AVX512_VBMI2     = 1 << 6
107		cpuid_GFNI             = 1 << 8
108		cpuid_AVX512VPCLMULQDQ = 1 << 10
109		cpuid_AVX512_BITALG    = 1 << 12
110
111		// edx bits
112		cpuid_FSRM = 1 << 4
113		// edx bits for CPUID 0x80000001
114		cpuid_RDTSCP = 1 << 27
115	)
116	// Additional constants not in internal/cpu
117	const (
118		// eax=1: edx
119		cpuid_SSE2 = 1 << 26
120		// eax=1: ecx
121		cpuid_CX16   = 1 << 13
122		cpuid_RDRAND = 1 << 30
123		// eax=7,ecx=0: ebx
124		cpuid_RDSEED     = 1 << 18
125		cpuid_AVX512IFMA = 1 << 21
126		cpuid_AVX512PF   = 1 << 26
127		cpuid_AVX512ER   = 1 << 27
128		// eax=7,ecx=0: edx
129		cpuid_AVX5124VNNIW = 1 << 2
130		cpuid_AVX5124FMAPS = 1 << 3
131		cpuid_AMXBF16      = 1 << 22
132		cpuid_AMXTile      = 1 << 24
133		cpuid_AMXInt8      = 1 << 25
134		// eax=7,ecx=1: eax
135		cpuid_AVX512BF16 = 1 << 5
136		cpuid_AVXIFMA    = 1 << 23
137		// eax=7,ecx=1: edx
138		cpuid_AVXVNNIInt8 = 1 << 4
139	)
140
141	Initialized = true
142
143	maxID, _, _, _ := cpuid(0, 0)
144
145	if maxID < 1 {
146		return
147	}
148
149	_, _, ecx1, edx1 := cpuid(1, 0)
150	X86.HasSSE2 = isSet(edx1, cpuid_SSE2)
151
152	X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
153	X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)
154	X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3)
155	X86.HasFMA = isSet(ecx1, cpuid_FMA)
156	X86.HasCX16 = isSet(ecx1, cpuid_CX16)
157	X86.HasSSE41 = isSet(ecx1, cpuid_SSE41)
158	X86.HasSSE42 = isSet(ecx1, cpuid_SSE42)
159	X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT)
160	X86.HasAES = isSet(ecx1, cpuid_AES)
161	X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE)
162	X86.HasRDRAND = isSet(ecx1, cpuid_RDRAND)
163
164	var osSupportsAVX, osSupportsAVX512 bool
165	// For XGETBV, OSXSAVE bit is required and sufficient.
166	if X86.HasOSXSAVE {
167		eax, _ := xgetbv()
168		// Check if XMM and YMM registers have OS support.
169		osSupportsAVX = isSet(eax, 1<<1) && isSet(eax, 1<<2)
170
171		if runtime.GOOS == "darwin" {
172			// Darwin requires special AVX512 checks, see cpu_darwin_x86.go
173			osSupportsAVX512 = osSupportsAVX && darwinSupportsAVX512()
174		} else {
175			// Check if OPMASK and ZMM registers have OS support.
176			osSupportsAVX512 = osSupportsAVX && isSet(eax, 1<<5) && isSet(eax, 1<<6) && isSet(eax, 1<<7)
177		}
178	}
179
180	X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX
181
182	if maxID < 7 {
183		return
184	}
185
186	eax7, ebx7, ecx7, edx7 := cpuid(7, 0)
187	X86.HasBMI1 = isSet(ebx7, cpuid_BMI1)
188	X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX
189	X86.HasBMI2 = isSet(ebx7, cpuid_BMI2)
190	X86.HasERMS = isSet(ebx7, cpuid_ERMS)
191	X86.HasRDSEED = isSet(ebx7, cpuid_RDSEED)
192	X86.HasADX = isSet(ebx7, cpuid_ADX)
193
194	X86.HasAVX512 = isSet(ebx7, cpuid_AVX512F) && osSupportsAVX512 // Because avx-512 foundation is the core required extension
195	if X86.HasAVX512 {
196		X86.HasAVX512F = true
197		X86.HasAVX512CD = isSet(ebx7, cpuid_AVX512CD)
198		X86.HasAVX512ER = isSet(ebx7, cpuid_AVX512ER)
199		X86.HasAVX512PF = isSet(ebx7, cpuid_AVX512PF)
200		X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL)
201		X86.HasAVX512BW = isSet(ebx7, cpuid_AVX512BW)
202		X86.HasAVX512DQ = isSet(ebx7, cpuid_AVX512DQ)
203		X86.HasAVX512IFMA = isSet(ebx7, cpuid_AVX512IFMA)
204		X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512_VBMI)
205		X86.HasAVX5124VNNIW = isSet(edx7, cpuid_AVX5124VNNIW)
206		X86.HasAVX5124FMAPS = isSet(edx7, cpuid_AVX5124FMAPS)
207		X86.HasAVX512VPOPCNTDQ = isSet(ecx7, cpuid_AVX512VPOPCNTDQ)
208		X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ)
209		X86.HasAVX512VNNI = isSet(ecx7, cpuid_AVX512VNNI)
210		X86.HasAVX512GFNI = isSet(ecx7, cpuid_AVX512GFNI)
211		X86.HasAVX512VAES = isSet(ecx7, cpuid_AVX512VAES)
212		X86.HasAVX512VBMI2 = isSet(ecx7, cpuid_AVX512VBMI2)
213		X86.HasAVX512BITALG = isSet(ecx7, cpuid_AVX512BITALG)
214	}
215
216	X86.HasAMXTile = isSet(edx7, cpuid_AMXTile)
217	X86.HasAMXInt8 = isSet(edx7, cpuid_AMXInt8)
218	X86.HasAMXBF16 = isSet(edx7, cpuid_AMXBF16)
219
220	// These features depend on the second level of extended features.
221	if eax7 >= 1 {
222		eax71, _, _, edx71 := cpuid(7, 1)
223		if X86.HasAVX512 {
224			X86.HasAVX512BF16 = isSet(eax71, cpuid_AVX512BF16)
225		}
226		if X86.HasAVX {
227			X86.HasAVXIFMA = isSet(eax71, cpuid_AVXIFMA)
228			X86.HasAVXVNNI = isSet(eax71, cpuid_AVXVNNI)
229			X86.HasAVXVNNIInt8 = isSet(edx71, cpuid_AVXVNNIInt8)
230		}
231	}
232}
233
234func isSet(hwc uint32, value uint32) bool {
235	return hwc&value != 0
236}