1// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
2
3// Package cpuid provides information about the CPU running the current program.
4//
5// CPU features are detected on startup, and kept for fast access through the life of the application.
6// Currently x86 / x64 (AMD64) as well as arm64 is supported.
7//
8// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
9//
10// Package home: https://github.com/klauspost/cpuid
11package cpuid
12
13import (
14 "flag"
15 "fmt"
16 "math"
17 "math/bits"
18 "os"
19 "runtime"
20 "strings"
21)
22
23// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
24// and Processor Programming Reference (PPR)
25
26// Vendor is a representation of a CPU vendor.
27type Vendor int
28
29const (
30 VendorUnknown Vendor = iota
31 Intel
32 AMD
33 VIA
34 Transmeta
35 NSC
36 KVM // Kernel-based Virtual Machine
37 MSVM // Microsoft Hyper-V or Windows Virtual PC
38 VMware
39 XenHVM
40 Bhyve
41 Hygon
42 SiS
43 RDC
44
45 Ampere
46 ARM
47 Broadcom
48 Cavium
49 DEC
50 Fujitsu
51 Infineon
52 Motorola
53 NVIDIA
54 AMCC
55 Qualcomm
56 Marvell
57
58 QEMU
59 QNX
60 ACRN
61 SRE
62 Apple
63
64 lastVendor
65)
66
67//go:generate stringer -type=FeatureID,Vendor
68
69// FeatureID is the ID of a specific cpu feature.
70type FeatureID int
71
72const (
73 // Keep index -1 as unknown
74 UNKNOWN = -1
75
76 // x86 features
77 ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
78 AESNI // Advanced Encryption Standard New Instructions
79 AMD3DNOW // AMD 3DNOW
80 AMD3DNOWEXT // AMD 3DNowExt
81 AMXBF16 // Tile computational operations on BFLOAT16 numbers
82 AMXFP16 // Tile computational operations on FP16 numbers
83 AMXINT8 // Tile computational operations on 8-bit integers
84 AMXFP8 // Tile computational operations on FP8 numbers
85 AMXTILE // Tile architecture
86 AMXTF32 // Tile architecture
87 AMXCOMPLEX // Matrix Multiplication of TF32 Tiles into Packed Single Precision Tile
88 AMXTRANSPOSE // Tile multiply where the first operand is transposed
89 APX_F // Intel APX
90 AVX // AVX functions
91 AVX10 // If set the Intel AVX10 Converged Vector ISA is supported
92 AVX10_128 // If set indicates that AVX10 128-bit vector support is present
93 AVX10_256 // If set indicates that AVX10 256-bit vector support is present
94 AVX10_512 // If set indicates that AVX10 512-bit vector support is present
95 AVX2 // AVX2 functions
96 AVX512BF16 // AVX-512 BFLOAT16 Instructions
97 AVX512BITALG // AVX-512 Bit Algorithms
98 AVX512BW // AVX-512 Byte and Word Instructions
99 AVX512CD // AVX-512 Conflict Detection Instructions
100 AVX512DQ // AVX-512 Doubleword and Quadword Instructions
101 AVX512ER // AVX-512 Exponential and Reciprocal Instructions
102 AVX512F // AVX-512 Foundation
103 AVX512FP16 // AVX-512 FP16 Instructions
104 AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
105 AVX512PF // AVX-512 Prefetch Instructions
106 AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
107 AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2
108 AVX512VL // AVX-512 Vector Length Extensions
109 AVX512VNNI // AVX-512 Vector Neural Network Instructions
110 AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
111 AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
112 AVXIFMA // AVX-IFMA instructions
113 AVXNECONVERT // AVX-NE-CONVERT instructions
114 AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one
115 AVXVNNI // AVX (VEX encoded) VNNI neural network instructions
116 AVXVNNIINT8 // AVX-VNNI-INT8 instructions
117 AVXVNNIINT16 // AVX-VNNI-INT16 instructions
118 BHI_CTRL // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
119 BMI1 // Bit Manipulation Instruction Set 1
120 BMI2 // Bit Manipulation Instruction Set 2
121 CETIBT // Intel CET Indirect Branch Tracking
122 CETSS // Intel CET Shadow Stack
123 CLDEMOTE // Cache Line Demote
124 CLMUL // Carry-less Multiplication
125 CLZERO // CLZERO instruction supported
126 CMOV // i686 CMOV
127 CMPCCXADD // CMPCCXADD instructions
128 CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB
129 CMPXCHG8 // CMPXCHG8 instruction
130 CPBOOST // Core Performance Boost
131 CPPC // AMD: Collaborative Processor Performance Control
132 CX16 // CMPXCHG16B Instruction
133 EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
134 ENQCMD // Enqueue Command
135 ERMS // Enhanced REP MOVSB/STOSB
136 F16C // Half-precision floating-point conversion
137 FLUSH_L1D // Flush L1D cache
138 FMA3 // Intel FMA 3. Does not imply AVX.
139 FMA4 // Bulldozer FMA4 functions
140 FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
141 FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
142 FSRM // Fast Short Rep Mov
143 FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9
144 FXSROPT // FXSAVE/FXRSTOR optimizations
145 GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
146 HLE // Hardware Lock Elision
147 HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
148 HTT // Hyperthreading (enabled)
149 HWA // Hardware assert supported. Indicates support for MSRC001_10
150 HYBRID_CPU // This part has CPUs of more than one type.
151 HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors
152 IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel)
153 IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR
154 IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
155 IBPB_BRTYPE // Indicates that MSR 49h (PRED_CMD) bit 0 (IBPB) flushes all branch type predictions from the CPU branch predictor
156 IBRS // AMD: Indirect Branch Restricted Speculation
157 IBRS_PREFERRED // AMD: IBRS is preferred over software solution
158 IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection
159 IBS // Instruction Based Sampling (AMD)
160 IBSBRNTRGT // Instruction Based Sampling Feature (AMD)
161 IBSFETCHSAM // Instruction Based Sampling Feature (AMD)
162 IBSFFV // Instruction Based Sampling Feature (AMD)
163 IBSOPCNT // Instruction Based Sampling Feature (AMD)
164 IBSOPCNTEXT // Instruction Based Sampling Feature (AMD)
165 IBSOPSAM // Instruction Based Sampling Feature (AMD)
166 IBSRDWROPCNT // Instruction Based Sampling Feature (AMD)
167 IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD)
168 IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported
169 IBS_OPDATA4 // AMD: IBS op data 4 MSR supported
170 IBS_OPFUSE // AMD: Indicates support for IbsOpFuse
171 IBS_PREVENTHOST // Disallowing IBS use by the host supported
172 IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4
173 IDPRED_CTRL // IPRED_DIS
174 INT_WBINVD // WBINVD/WBNOINVD are interruptible.
175 INVLPGB // NVLPGB and TLBSYNC instruction supported
176 KEYLOCKER // Key locker
177 KEYLOCKERW // Key locker wide
178 LAHF // LAHF/SAHF in long mode
179 LAM // If set, CPU supports Linear Address Masking
180 LBRVIRT // LBR virtualization
181 LZCNT // LZCNT instruction
182 MCAOVERFLOW // MCA overflow recovery support.
183 MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
184 MCOMMIT // MCOMMIT instruction supported
185 MD_CLEAR // VERW clears CPU buffers
186 MMX // standard MMX
187 MMXEXT // SSE integer functions or AMD MMX ext
188 MOVBE // MOVBE instruction (big-endian)
189 MOVDIR64B // Move 64 Bytes as Direct Store
190 MOVDIRI // Move Doubleword as Direct Store
191 MOVSB_ZL // Fast Zero-Length MOVSB
192 MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
193 MPX // Intel MPX (Memory Protection Extensions)
194 MSRIRC // Instruction Retired Counter MSR available
195 MSRLIST // Read/Write List of Model Specific Registers
196 MSR_PAGEFLUSH // Page Flush MSR available
197 NRIPS // Indicates support for NRIP save on VMEXIT
198 NX // NX (No-Execute) bit
199 OSXSAVE // XSAVE enabled by OS
200 PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption
201 POPCNT // POPCNT instruction
202 PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
203 PREFETCHI // PREFETCHIT0/1 instructions
204 PSFD // Predictive Store Forward Disable
205 RDPRU // RDPRU instruction supported
206 RDRAND // RDRAND instruction is available
207 RDSEED // RDSEED instruction is available
208 RDTSCP // RDTSCP Instruction
209 RRSBA_CTRL // Restricted RSB Alternate
210 RTM // Restricted Transactional Memory
211 RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort.
212 SBPB // Indicates support for the Selective Branch Predictor Barrier
213 SERIALIZE // Serialize Instruction Execution
214 SEV // AMD Secure Encrypted Virtualization supported
215 SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host
216 SEV_ALTERNATIVE // AMD SEV Alternate Injection supported
217 SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests
218 SEV_ES // AMD SEV Encrypted State supported
219 SEV_RESTRICTED // AMD SEV Restricted Injection supported
220 SEV_SNP // AMD SEV Secure Nested Paging supported
221 SGX // Software Guard Extensions
222 SGXLC // Software Guard Extensions Launch Control
223 SGXPQC // Software Guard Extensions 256-bit Encryption
224 SHA // Intel SHA Extensions
225 SME // AMD Secure Memory Encryption supported
226 SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced
227 SM3_X86 // SM3 instructions
228 SM4_X86 // SM4 instructions
229 SPEC_CTRL_SSBD // Speculative Store Bypass Disable
230 SRBDS_CTRL // SRBDS mitigation MSR available
231 SRSO_MSR_FIX // Indicates that software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO.
232 SRSO_NO // Indicates the CPU is not subject to the SRSO vulnerability
233 SRSO_USER_KERNEL_NO // Indicates the CPU is not subject to the SRSO vulnerability across user/kernel boundaries
234 SSE // SSE functions
235 SSE2 // P4 SSE functions
236 SSE3 // Prescott SSE3 functions
237 SSE4 // Penryn SSE4.1 functions
238 SSE42 // Nehalem SSE4.2 functions
239 SSE4A // AMD Barcelona microarchitecture SSE4a instructions
240 SSSE3 // Conroe SSSE3 functions
241 STIBP // Single Thread Indirect Branch Predictors
242 STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
243 STOSB_SHORT // Fast short STOSB
244 SUCCOR // Software uncorrectable error containment and recovery capability.
245 SVM // AMD Secure Virtual Machine
246 SVMDA // Indicates support for the SVM decode assists.
247 SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
248 SVML // AMD SVM lock. Indicates support for SVM-Lock.
249 SVMNP // AMD SVM nested paging
250 SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter
251 SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
252 SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
253 SYSEE // SYSENTER and SYSEXIT instructions
254 TBM // AMD Trailing Bit Manipulation
255 TDX_GUEST // Intel Trust Domain Extensions Guest
256 TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations
257 TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
258 TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
259 TSA_L1_NO // AMD only: Not vulnerable to TSA-L1
260 TSA_SQ_NO // AM onlyD: Not vulnerable to TSA-SQ
261 TSA_VERW_CLEAR // If set, the memory form of the VERW instruction may be used to help mitigate TSA
262 TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
263 TSXLDTRK // Intel TSX Suspend Load Address Tracking
264 VAES // Vector AES. AVX(512) versions requires additional checks.
265 VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits.
266 VMPL // AMD VM Permission Levels supported
267 VMSA_REGPROT // AMD VMSA Register Protection supported
268 VMX // Virtual Machine Extensions
269 VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
270 VTE // AMD Virtual Transparent Encryption supported
271 WAITPKG // TPAUSE, UMONITOR, UMWAIT
272 WBNOINVD // Write Back and Do Not Invalidate Cache
273 WRMSRNS // Non-Serializing Write to Model Specific Register
274 X87 // FPU
275 XGETBV1 // Supports XGETBV with ECX = 1
276 XOP // Bulldozer XOP functions
277 XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV
278 XSAVEC // Supports XSAVEC and the compacted form of XRSTOR.
279 XSAVEOPT // XSAVEOPT available
280 XSAVES // Supports XSAVES/XRSTORS and IA32_XSS
281
282 // ARM features:
283 AESARM // AES instructions
284 ARMCPUID // Some CPU ID registers readable at user-level
285 ASIMD // Advanced SIMD
286 ASIMDDP // SIMD Dot Product
287 ASIMDHP // Advanced SIMD half-precision floating point
288 ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
289 ATOMICS // Large System Extensions (LSE)
290 CRC32 // CRC32/CRC32C instructions
291 DCPOP // Data cache clean to Point of Persistence (DC CVAP)
292 EVTSTRM // Generic timer
293 FCMA // Floating point complex number addition and multiplication
294 FHM // FMLAL and FMLSL instructions
295 FP // Single-precision and double-precision floating point
296 FPHP // Half-precision floating point
297 GPA // Generic Pointer Authentication
298 JSCVT // Javascript-style double->int convert (FJCVTZS)
299 LRCPC // Weaker release consistency (LDAPR, etc)
300 PMULL // Polynomial Multiply instructions (PMULL/PMULL2)
301 RNDR // Random Number instructions
302 TLB // Outer Shareable and TLB range maintenance instructions
303 TS // Flag manipulation instructions
304 SHA1 // SHA-1 instructions (SHA1C, etc)
305 SHA2 // SHA-2 instructions (SHA256H, etc)
306 SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
307 SHA512 // SHA512 instructions
308 SM3 // SM3 instructions
309 SM4 // SM4 instructions
310 SVE // Scalable Vector Extension
311
312 // PMU
313 PMU_FIXEDCOUNTER_CYCLES
314 PMU_FIXEDCOUNTER_REFCYCLES
315 PMU_FIXEDCOUNTER_INSTRUCTIONS
316 PMU_FIXEDCOUNTER_TOPDOWN_SLOTS
317
318 // Keep it last. It automatically defines the size of []flagSet
319 lastID
320
321 firstID FeatureID = UNKNOWN + 1
322)
323
324// CPUInfo contains information about the detected system CPU.
325type CPUInfo struct {
326 BrandName string // Brand name reported by the CPU
327 VendorID Vendor // Comparable CPU vendor ID
328 VendorString string // Raw vendor string.
329 HypervisorVendorID Vendor // Hypervisor vendor
330 HypervisorVendorString string // Raw hypervisor vendor string
331 featureSet flagSet // Features of the CPU
332 PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
333 ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
334 LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
335 Family int // CPU family number
336 Model int // CPU model number
337 Stepping int // CPU stepping info
338 CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
339 Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
340 BoostFreq int64 // Max clock speed, if known, 0 otherwise
341 Cache struct {
342 L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
343 L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
344 L2 int // L2 Cache (per core or shared). Will be -1 if undetected
345 L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
346 }
347 SGX SGXSupport
348 AMDMemEncryption AMDMemEncryptionSupport
349 AVX10Level uint8
350 PMU PerformanceMonitoringInfo // holds information about the PMU
351
352 maxFunc uint32
353 maxExFunc uint32
354}
355
356// PerformanceMonitoringInfo holds information about CPU performance monitoring capabilities.
357// This is primarily populated from CPUID leaf 0xAh on x86
358type PerformanceMonitoringInfo struct {
359 // VersionID (x86 only): Version ID of architectural performance monitoring.
360 // A value of 0 means architectural performance monitoring is not supported or information is unavailable.
361 VersionID uint8
362 // NumGPPMC: Number of General-Purpose Performance Monitoring Counters per logical processor.
363 // On ARM, this is derived from PMCR_EL0.N (number of event counters).
364 NumGPCounters uint8
365 // GPPMCWidth: Bit width of General-Purpose Performance Monitoring Counters.
366 // On ARM, typically 64 for PMU event counters.
367 GPPMCWidth uint8
368 // NumFixedPMC: Number of Fixed-Function Performance Counters.
369 // Valid on x86 if VersionID > 1. On ARM, this typically includes at least the cycle counter (PMCCNTR_EL0).
370 NumFixedPMC uint8
371 // FixedPMCWidth: Bit width of Fixed-Function Performance Counters.
372 // Valid on x86 if VersionID > 1. On ARM, the cycle counter (PMCCNTR_EL0) is 64-bit.
373 FixedPMCWidth uint8
374 // Raw register output from CPUID leaf 0xAh.
375 RawEBX uint32
376 RawEAX uint32
377 RawEDX uint32
378}
379
380var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
381var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
382var xgetbv func(index uint32) (eax, edx uint32)
383var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
384var darwinHasAVX512 = func() bool { return false }
385
386// CPU contains information about the CPU as detected on startup,
387// or when Detect last was called.
388//
389// Use this as the primary entry point to you data.
390var CPU CPUInfo
391
392func init() {
393 initCPU()
394 Detect()
395}
396
397// Detect will re-detect current CPU info.
398// This will replace the content of the exported CPU variable.
399//
400// Unless you expect the CPU to change while you are running your program
401// you should not need to call this function.
402// If you call this, you must ensure that no other goroutine is accessing the
403// exported CPU variable.
404func Detect() {
405 // Set defaults
406 CPU.ThreadsPerCore = 1
407 CPU.Cache.L1I = -1
408 CPU.Cache.L1D = -1
409 CPU.Cache.L2 = -1
410 CPU.Cache.L3 = -1
411 safe := true
412 if detectArmFlag != nil {
413 safe = !*detectArmFlag
414 }
415 addInfo(&CPU, safe)
416 if displayFeats != nil && *displayFeats {
417 fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
418 // Exit with non-zero so tests will print value.
419 os.Exit(1)
420 }
421 if disableFlag != nil {
422 s := strings.Split(*disableFlag, ",")
423 for _, feat := range s {
424 feat := ParseFeature(strings.TrimSpace(feat))
425 if feat != UNKNOWN {
426 CPU.featureSet.unset(feat)
427 }
428 }
429 }
430}
431
432// DetectARM will detect ARM64 features.
433// This is NOT done automatically since it can potentially crash
434// if the OS does not handle the command.
435// If in the future this can be done safely this function may not
436// do anything.
437func DetectARM() {
438 addInfo(&CPU, false)
439}
440
441var detectArmFlag *bool
442var displayFeats *bool
443var disableFlag *string
444
445// Flags will enable flags.
446// This must be called *before* flag.Parse AND
447// Detect must be called after the flags have been parsed.
448// Note that this means that any detection used in init() functions
449// will not contain these flags.
450func Flags() {
451 disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
452 displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
453 detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
454}
455
456// Supports returns whether the CPU supports all of the requested features.
457func (c CPUInfo) Supports(ids ...FeatureID) bool {
458 for _, id := range ids {
459 if !c.featureSet.inSet(id) {
460 return false
461 }
462 }
463 return true
464}
465
466// Has allows for checking a single feature.
467// Should be inlined by the compiler.
468func (c *CPUInfo) Has(id FeatureID) bool {
469 return c.featureSet.inSet(id)
470}
471
472// AnyOf returns whether the CPU supports one or more of the requested features.
473func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
474 for _, id := range ids {
475 if c.featureSet.inSet(id) {
476 return true
477 }
478 }
479 return false
480}
481
482// Features contains several features combined for a fast check using
483// CpuInfo.HasAll
484type Features *flagSet
485
486// CombineFeatures allows to combine several features for a close to constant time lookup.
487func CombineFeatures(ids ...FeatureID) Features {
488 var v flagSet
489 for _, id := range ids {
490 v.set(id)
491 }
492 return &v
493}
494
495func (c *CPUInfo) HasAll(f Features) bool {
496 return c.featureSet.hasSetP(f)
497}
498
499// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
500var oneOfLevel = CombineFeatures(SYSEE, SYSCALL)
501var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2)
502var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
503var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
504var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
505
506// X64Level returns the microarchitecture level detected on the CPU.
507// If features are lacking or non x64 mode, 0 is returned.
508// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
509func (c CPUInfo) X64Level() int {
510 if !c.featureSet.hasOneOf(oneOfLevel) {
511 return 0
512 }
513 if c.featureSet.hasSetP(level4Features) {
514 return 4
515 }
516 if c.featureSet.hasSetP(level3Features) {
517 return 3
518 }
519 if c.featureSet.hasSetP(level2Features) {
520 return 2
521 }
522 if c.featureSet.hasSetP(level1Features) {
523 return 1
524 }
525 return 0
526}
527
528// Disable will disable one or several features.
529func (c *CPUInfo) Disable(ids ...FeatureID) bool {
530 for _, id := range ids {
531 c.featureSet.unset(id)
532 }
533 return true
534}
535
536// Enable will disable one or several features even if they were undetected.
537// This is of course not recommended for obvious reasons.
538func (c *CPUInfo) Enable(ids ...FeatureID) bool {
539 for _, id := range ids {
540 c.featureSet.set(id)
541 }
542 return true
543}
544
545// IsVendor returns true if vendor is recognized as Intel
546func (c CPUInfo) IsVendor(v Vendor) bool {
547 return c.VendorID == v
548}
549
550// FeatureSet returns all available features as strings.
551func (c CPUInfo) FeatureSet() []string {
552 s := make([]string, 0, c.featureSet.nEnabled())
553 s = append(s, c.featureSet.Strings()...)
554 return s
555}
556
557// RTCounter returns the 64-bit time-stamp counter
558// Uses the RDTSCP instruction. The value 0 is returned
559// if the CPU does not support the instruction.
560func (c CPUInfo) RTCounter() uint64 {
561 if !c.Has(RDTSCP) {
562 return 0
563 }
564 a, _, _, d := rdtscpAsm()
565 return uint64(a) | (uint64(d) << 32)
566}
567
568// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
569// This variable is OS dependent, but on Linux contains information
570// about the current cpu/core the code is running on.
571// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
572func (c CPUInfo) Ia32TscAux() uint32 {
573 if !c.Has(RDTSCP) {
574 return 0
575 }
576 _, _, ecx, _ := rdtscpAsm()
577 return ecx
578}
579
580// SveLengths returns arm SVE vector and predicate lengths in bits.
581// Will return 0, 0 if SVE is not enabled or otherwise unable to detect.
582func (c CPUInfo) SveLengths() (vl, pl uint64) {
583 if !c.Has(SVE) {
584 return 0, 0
585 }
586 return getVectorLength()
587}
588
589// LogicalCPU will return the Logical CPU the code is currently executing on.
590// This is likely to change when the OS re-schedules the running thread
591// to another CPU.
592// If the current core cannot be detected, -1 will be returned.
593func (c CPUInfo) LogicalCPU() int {
594 if c.maxFunc < 1 {
595 return -1
596 }
597 _, ebx, _, _ := cpuid(1)
598 return int(ebx >> 24)
599}
600
601// frequencies tries to compute the clock speed of the CPU. If leaf 15 is
602// supported, use it, otherwise parse the brand string. Yes, really.
603func (c *CPUInfo) frequencies() {
604 c.Hz, c.BoostFreq = 0, 0
605 mfi := maxFunctionID()
606 if mfi >= 0x15 {
607 eax, ebx, ecx, _ := cpuid(0x15)
608 if eax != 0 && ebx != 0 && ecx != 0 {
609 c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
610 }
611 }
612 if mfi >= 0x16 {
613 a, b, _, _ := cpuid(0x16)
614 // Base...
615 if a&0xffff > 0 {
616 c.Hz = int64(a&0xffff) * 1_000_000
617 }
618 // Boost...
619 if b&0xffff > 0 {
620 c.BoostFreq = int64(b&0xffff) * 1_000_000
621 }
622 }
623 if c.Hz > 0 {
624 return
625 }
626
627 // computeHz determines the official rated speed of a CPU from its brand
628 // string. This insanity is *actually the official documented way to do
629 // this according to Intel*, prior to leaf 0x15 existing. The official
630 // documentation only shows this working for exactly `x.xx` or `xxxx`
631 // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
632 // sizes.
633 model := c.BrandName
634 hz := strings.LastIndex(model, "Hz")
635 if hz < 3 {
636 return
637 }
638 var multiplier int64
639 switch model[hz-1] {
640 case 'M':
641 multiplier = 1000 * 1000
642 case 'G':
643 multiplier = 1000 * 1000 * 1000
644 case 'T':
645 multiplier = 1000 * 1000 * 1000 * 1000
646 }
647 if multiplier == 0 {
648 return
649 }
650 freq := int64(0)
651 divisor := int64(0)
652 decimalShift := int64(1)
653 var i int
654 for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
655 if model[i] >= '0' && model[i] <= '9' {
656 freq += int64(model[i]-'0') * decimalShift
657 decimalShift *= 10
658 } else if model[i] == '.' {
659 if divisor != 0 {
660 return
661 }
662 divisor = decimalShift
663 } else {
664 return
665 }
666 }
667 // we didn't find a space
668 if i < 0 {
669 return
670 }
671 if divisor != 0 {
672 c.Hz = (freq * multiplier) / divisor
673 return
674 }
675 c.Hz = freq * multiplier
676}
677
678// VM Will return true if the cpu id indicates we are in
679// a virtual machine.
680func (c CPUInfo) VM() bool {
681 return CPU.featureSet.inSet(HYPERVISOR)
682}
683
684// flags contains detected cpu features and characteristics
685type flags uint64
686
687// log2(bits_in_uint64)
688const flagBitsLog2 = 6
689const flagBits = 1 << flagBitsLog2
690const flagMask = flagBits - 1
691
692// flagSet contains detected cpu features and characteristics in an array of flags
693type flagSet [(lastID + flagMask) / flagBits]flags
694
695func (s *flagSet) inSet(feat FeatureID) bool {
696 return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
697}
698
699func (s *flagSet) set(feat FeatureID) {
700 s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
701}
702
703// setIf will set a feature if boolean is true.
704func (s *flagSet) setIf(cond bool, features ...FeatureID) {
705 if cond {
706 for _, offset := range features {
707 s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
708 }
709 }
710}
711
712func (s *flagSet) unset(offset FeatureID) {
713 bit := flags(1 << (offset & flagMask))
714 s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
715}
716
717// or with another flagset.
718func (s *flagSet) or(other flagSet) {
719 for i, v := range other[:] {
720 s[i] |= v
721 }
722}
723
724// hasSet returns whether all features are present.
725func (s *flagSet) hasSet(other flagSet) bool {
726 for i, v := range other[:] {
727 if s[i]&v != v {
728 return false
729 }
730 }
731 return true
732}
733
734// hasSet returns whether all features are present.
735func (s *flagSet) hasSetP(other *flagSet) bool {
736 for i, v := range other[:] {
737 if s[i]&v != v {
738 return false
739 }
740 }
741 return true
742}
743
744// hasOneOf returns whether one or more features are present.
745func (s *flagSet) hasOneOf(other *flagSet) bool {
746 for i, v := range other[:] {
747 if s[i]&v != 0 {
748 return true
749 }
750 }
751 return false
752}
753
754// nEnabled will return the number of enabled flags.
755func (s *flagSet) nEnabled() (n int) {
756 for _, v := range s[:] {
757 n += bits.OnesCount64(uint64(v))
758 }
759 return n
760}
761
762func flagSetWith(feat ...FeatureID) flagSet {
763 var res flagSet
764 for _, f := range feat {
765 res.set(f)
766 }
767 return res
768}
769
770// ParseFeature will parse the string and return the ID of the matching feature.
771// Will return UNKNOWN if not found.
772func ParseFeature(s string) FeatureID {
773 s = strings.ToUpper(s)
774 for i := firstID; i < lastID; i++ {
775 if i.String() == s {
776 return i
777 }
778 }
779 return UNKNOWN
780}
781
782// Strings returns an array of the detected features for FlagsSet.
783func (s flagSet) Strings() []string {
784 if len(s) == 0 {
785 return []string{""}
786 }
787 r := make([]string, 0)
788 for i := firstID; i < lastID; i++ {
789 if s.inSet(i) {
790 r = append(r, i.String())
791 }
792 }
793 return r
794}
795
796func maxExtendedFunction() uint32 {
797 eax, _, _, _ := cpuid(0x80000000)
798 return eax
799}
800
801func maxFunctionID() uint32 {
802 a, _, _, _ := cpuid(0)
803 return a
804}
805
806func brandName() string {
807 if maxExtendedFunction() >= 0x80000004 {
808 v := make([]uint32, 0, 48)
809 for i := uint32(0); i < 3; i++ {
810 a, b, c, d := cpuid(0x80000002 + i)
811 v = append(v, a, b, c, d)
812 }
813 return strings.Trim(string(valAsString(v...)), " ")
814 }
815 return "unknown"
816}
817
818func threadsPerCore() int {
819 mfi := maxFunctionID()
820 vend, _ := vendorID()
821
822 if mfi < 0x4 || (vend != Intel && vend != AMD) {
823 return 1
824 }
825
826 if mfi < 0xb {
827 if vend != Intel {
828 return 1
829 }
830 _, b, _, d := cpuid(1)
831 if (d & (1 << 28)) != 0 {
832 // v will contain logical core count
833 v := (b >> 16) & 255
834 if v > 1 {
835 a4, _, _, _ := cpuid(4)
836 // physical cores
837 v2 := (a4 >> 26) + 1
838 if v2 > 0 {
839 return int(v) / int(v2)
840 }
841 }
842 }
843 return 1
844 }
845 _, b, _, _ := cpuidex(0xb, 0)
846 if b&0xffff == 0 {
847 if vend == AMD {
848 // if >= Zen 2 0x8000001e EBX 15-8 bits means threads per core.
849 // The number of threads per core is ThreadsPerCore+1
850 // See PPR for AMD Family 17h Models 00h-0Fh (page 82)
851 fam, _, _ := familyModel()
852 _, _, _, d := cpuid(1)
853 if (d&(1<<28)) != 0 && fam >= 23 {
854 if maxExtendedFunction() >= 0x8000001e {
855 _, b, _, _ := cpuid(0x8000001e)
856 return int((b>>8)&0xff) + 1
857 }
858 return 2
859 }
860 }
861 return 1
862 }
863 return int(b & 0xffff)
864}
865
866func logicalCores() int {
867 mfi := maxFunctionID()
868 v, _ := vendorID()
869 switch v {
870 case Intel:
871 // Use this on old Intel processors
872 if mfi < 0xb {
873 if mfi < 1 {
874 return 0
875 }
876 // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
877 // that can be assigned to logical processors in a physical package.
878 // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
879 _, ebx, _, _ := cpuid(1)
880 logical := (ebx >> 16) & 0xff
881 return int(logical)
882 }
883 _, b, _, _ := cpuidex(0xb, 1)
884 return int(b & 0xffff)
885 case AMD, Hygon:
886 _, b, _, _ := cpuid(1)
887 return int((b >> 16) & 0xff)
888 default:
889 return 0
890 }
891}
892
893func familyModel() (family, model, stepping int) {
894 if maxFunctionID() < 0x1 {
895 return 0, 0, 0
896 }
897 eax, _, _, _ := cpuid(1)
898 // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
899 family = int((eax >> 8) & 0xf)
900 extFam := family == 0x6 // Intel is 0x6, needs extended model.
901 if family == 0xf {
902 // Add ExtFamily
903 family += int((eax >> 20) & 0xff)
904 extFam = true
905 }
906 // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
907 model = int((eax >> 4) & 0xf)
908 if extFam {
909 // Add ExtModel
910 model += int((eax >> 12) & 0xf0)
911 }
912 stepping = int(eax & 0xf)
913 return family, model, stepping
914}
915
916func physicalCores() int {
917 v, _ := vendorID()
918 switch v {
919 case Intel:
920 lc := logicalCores()
921 tpc := threadsPerCore()
922 if lc > 0 && tpc > 0 {
923 return lc / tpc
924 }
925 return 0
926 case AMD, Hygon:
927 lc := logicalCores()
928 tpc := threadsPerCore()
929 if lc > 0 && tpc > 0 {
930 return lc / tpc
931 }
932
933 // The following is inaccurate on AMD EPYC 7742 64-Core Processor
934 if maxExtendedFunction() >= 0x80000008 {
935 _, _, c, _ := cpuid(0x80000008)
936 if c&0xff > 0 {
937 return int(c&0xff) + 1
938 }
939 }
940 }
941 return 0
942}
943
944// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
945var vendorMapping = map[string]Vendor{
946 "AMDisbetter!": AMD,
947 "AuthenticAMD": AMD,
948 "CentaurHauls": VIA,
949 "GenuineIntel": Intel,
950 "TransmetaCPU": Transmeta,
951 "GenuineTMx86": Transmeta,
952 "Geode by NSC": NSC,
953 "VIA VIA VIA ": VIA,
954 "KVMKVMKVM": KVM,
955 "Linux KVM Hv": KVM,
956 "TCGTCGTCGTCG": QEMU,
957 "Microsoft Hv": MSVM,
958 "VMwareVMware": VMware,
959 "XenVMMXenVMM": XenHVM,
960 "bhyve bhyve ": Bhyve,
961 "HygonGenuine": Hygon,
962 "Vortex86 SoC": SiS,
963 "SiS SiS SiS ": SiS,
964 "RiseRiseRise": SiS,
965 "Genuine RDC": RDC,
966 "QNXQVMBSQG": QNX,
967 "ACRNACRNACRN": ACRN,
968 "SRESRESRESRE": SRE,
969 "Apple VZ": Apple,
970}
971
972func vendorID() (Vendor, string) {
973 _, b, c, d := cpuid(0)
974 v := string(valAsString(b, d, c))
975 vend, ok := vendorMapping[v]
976 if !ok {
977 return VendorUnknown, v
978 }
979 return vend, v
980}
981
982func hypervisorVendorID() (Vendor, string) {
983 // https://lwn.net/Articles/301888/
984 _, b, c, d := cpuid(0x40000000)
985 v := string(valAsString(b, c, d))
986 vend, ok := vendorMapping[v]
987 if !ok {
988 return VendorUnknown, v
989 }
990 return vend, v
991}
992
993func cacheLine() int {
994 if maxFunctionID() < 0x1 {
995 return 0
996 }
997
998 _, ebx, _, _ := cpuid(1)
999 cache := (ebx & 0xff00) >> 5 // cflush size
1000 if cache == 0 && maxExtendedFunction() >= 0x80000006 {
1001 _, _, ecx, _ := cpuid(0x80000006)
1002 cache = ecx & 0xff // cacheline size
1003 }
1004 // TODO: Read from Cache and TLB Information
1005 return int(cache)
1006}
1007
1008func (c *CPUInfo) cacheSize() {
1009 c.Cache.L1D = -1
1010 c.Cache.L1I = -1
1011 c.Cache.L2 = -1
1012 c.Cache.L3 = -1
1013 vendor, _ := vendorID()
1014 switch vendor {
1015 case Intel:
1016 if maxFunctionID() < 4 {
1017 return
1018 }
1019 c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
1020 for i := uint32(0); ; i++ {
1021 eax, ebx, ecx, _ := cpuidex(4, i)
1022 cacheType := eax & 15
1023 if cacheType == 0 {
1024 break
1025 }
1026 cacheLevel := (eax >> 5) & 7
1027 coherency := int(ebx&0xfff) + 1
1028 partitions := int((ebx>>12)&0x3ff) + 1
1029 associativity := int((ebx>>22)&0x3ff) + 1
1030 sets := int(ecx) + 1
1031 size := associativity * partitions * coherency * sets
1032 switch cacheLevel {
1033 case 1:
1034 if cacheType == 1 {
1035 // 1 = Data Cache
1036 c.Cache.L1D = size
1037 } else if cacheType == 2 {
1038 // 2 = Instruction Cache
1039 c.Cache.L1I = size
1040 } else {
1041 if c.Cache.L1D < 0 {
1042 c.Cache.L1I = size
1043 }
1044 if c.Cache.L1I < 0 {
1045 c.Cache.L1I = size
1046 }
1047 }
1048 case 2:
1049 c.Cache.L2 = size
1050 case 3:
1051 c.Cache.L3 = size
1052 }
1053 }
1054 case AMD, Hygon:
1055 // Untested.
1056 if maxExtendedFunction() < 0x80000005 {
1057 return
1058 }
1059 _, _, ecx, edx := cpuid(0x80000005)
1060 c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
1061 c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
1062
1063 if maxExtendedFunction() < 0x80000006 {
1064 return
1065 }
1066 _, _, ecx, _ = cpuid(0x80000006)
1067 c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
1068
1069 // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
1070 if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) {
1071 return
1072 }
1073
1074 // Xen Hypervisor is buggy and returns the same entry no matter ECX value.
1075 // Hack: When we encounter the same entry 100 times we break.
1076 nSame := 0
1077 var last uint32
1078 for i := uint32(0); i < math.MaxUint32; i++ {
1079 eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
1080
1081 level := (eax >> 5) & 7
1082 cacheNumSets := ecx + 1
1083 cacheLineSize := 1 + (ebx & 2047)
1084 cachePhysPartitions := 1 + ((ebx >> 12) & 511)
1085 cacheNumWays := 1 + ((ebx >> 22) & 511)
1086
1087 typ := eax & 15
1088 size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
1089 if typ == 0 {
1090 return
1091 }
1092
1093 // Check for the same value repeated.
1094 comb := eax ^ ebx ^ ecx
1095 if comb == last {
1096 nSame++
1097 if nSame == 100 {
1098 return
1099 }
1100 }
1101 last = comb
1102
1103 switch level {
1104 case 1:
1105 switch typ {
1106 case 1:
1107 // Data cache
1108 c.Cache.L1D = size
1109 case 2:
1110 // Inst cache
1111 c.Cache.L1I = size
1112 default:
1113 if c.Cache.L1D < 0 {
1114 c.Cache.L1I = size
1115 }
1116 if c.Cache.L1I < 0 {
1117 c.Cache.L1I = size
1118 }
1119 }
1120 case 2:
1121 c.Cache.L2 = size
1122 case 3:
1123 c.Cache.L3 = size
1124 }
1125 }
1126 }
1127}
1128
1129type SGXEPCSection struct {
1130 BaseAddress uint64
1131 EPCSize uint64
1132}
1133
1134type SGXSupport struct {
1135 Available bool
1136 LaunchControl bool
1137 SGX1Supported bool
1138 SGX2Supported bool
1139 MaxEnclaveSizeNot64 int64
1140 MaxEnclaveSize64 int64
1141 EPCSections []SGXEPCSection
1142}
1143
1144func hasSGX(available, lc bool) (rval SGXSupport) {
1145 rval.Available = available
1146
1147 if !available {
1148 return
1149 }
1150
1151 rval.LaunchControl = lc
1152
1153 a, _, _, d := cpuidex(0x12, 0)
1154 rval.SGX1Supported = a&0x01 != 0
1155 rval.SGX2Supported = a&0x02 != 0
1156 rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
1157 rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
1158 rval.EPCSections = make([]SGXEPCSection, 0)
1159
1160 for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
1161 eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
1162 leafType := eax & 0xf
1163
1164 if leafType == 0 {
1165 // Invalid subleaf, stop iterating
1166 break
1167 } else if leafType == 1 {
1168 // EPC Section subleaf
1169 baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
1170 size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
1171
1172 section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
1173 rval.EPCSections = append(rval.EPCSections, section)
1174 }
1175 }
1176
1177 return
1178}
1179
1180type AMDMemEncryptionSupport struct {
1181 Available bool
1182 CBitPossition uint32
1183 NumVMPL uint32
1184 PhysAddrReduction uint32
1185 NumEntryptedGuests uint32
1186 MinSevNoEsAsid uint32
1187}
1188
1189func hasAMDMemEncryption(available bool) (rval AMDMemEncryptionSupport) {
1190 rval.Available = available
1191 if !available {
1192 return
1193 }
1194
1195 _, b, c, d := cpuidex(0x8000001f, 0)
1196
1197 rval.CBitPossition = b & 0x3f
1198 rval.PhysAddrReduction = (b >> 6) & 0x3F
1199 rval.NumVMPL = (b >> 12) & 0xf
1200 rval.NumEntryptedGuests = c
1201 rval.MinSevNoEsAsid = d
1202
1203 return
1204}
1205
1206func support() flagSet {
1207 var fs flagSet
1208 mfi := maxFunctionID()
1209 vend, _ := vendorID()
1210 if mfi < 0x1 {
1211 return fs
1212 }
1213 family, model, _ := familyModel()
1214
1215 _, _, c, d := cpuid(1)
1216 fs.setIf((d&(1<<0)) != 0, X87)
1217 fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
1218 fs.setIf((d&(1<<11)) != 0, SYSEE)
1219 fs.setIf((d&(1<<15)) != 0, CMOV)
1220 fs.setIf((d&(1<<23)) != 0, MMX)
1221 fs.setIf((d&(1<<24)) != 0, FXSR)
1222 fs.setIf((d&(1<<25)) != 0, FXSROPT)
1223 fs.setIf((d&(1<<25)) != 0, SSE)
1224 fs.setIf((d&(1<<26)) != 0, SSE2)
1225 fs.setIf((c&1) != 0, SSE3)
1226 fs.setIf((c&(1<<5)) != 0, VMX)
1227 fs.setIf((c&(1<<9)) != 0, SSSE3)
1228 fs.setIf((c&(1<<19)) != 0, SSE4)
1229 fs.setIf((c&(1<<20)) != 0, SSE42)
1230 fs.setIf((c&(1<<25)) != 0, AESNI)
1231 fs.setIf((c&(1<<1)) != 0, CLMUL)
1232 fs.setIf(c&(1<<22) != 0, MOVBE)
1233 fs.setIf(c&(1<<23) != 0, POPCNT)
1234 fs.setIf(c&(1<<30) != 0, RDRAND)
1235
1236 // This bit has been reserved by Intel & AMD for use by hypervisors,
1237 // and indicates the presence of a hypervisor.
1238 fs.setIf(c&(1<<31) != 0, HYPERVISOR)
1239 fs.setIf(c&(1<<29) != 0, F16C)
1240 fs.setIf(c&(1<<13) != 0, CX16)
1241
1242 if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
1243 fs.setIf(threadsPerCore() > 1, HTT)
1244 }
1245 if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
1246 fs.setIf(threadsPerCore() > 1, HTT)
1247 }
1248 fs.setIf(c&1<<26 != 0, XSAVE)
1249 fs.setIf(c&1<<27 != 0, OSXSAVE)
1250 // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
1251 const avxCheck = 1<<26 | 1<<27 | 1<<28
1252 if c&avxCheck == avxCheck {
1253 // Check for OS support
1254 eax, _ := xgetbv(0)
1255 if (eax & 0x6) == 0x6 {
1256 fs.set(AVX)
1257 switch vend {
1258 case Intel:
1259 // Older than Haswell.
1260 fs.setIf(family == 6 && model < 60, AVXSLOW)
1261 case AMD:
1262 // Older than Zen 2
1263 fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
1264 }
1265 }
1266 }
1267 // FMA3 can be used with SSE registers, so no OS support is strictly needed.
1268 // fma3 and OSXSAVE needed.
1269 const fma3Check = 1<<12 | 1<<27
1270 fs.setIf(c&fma3Check == fma3Check, FMA3)
1271
1272 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
1273 if mfi >= 7 {
1274 _, ebx, ecx, edx := cpuidex(7, 0)
1275 if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
1276 fs.set(AVX2)
1277 }
1278 // CPUID.(EAX=7, ECX=0).EBX
1279 if (ebx & 0x00000008) != 0 {
1280 fs.set(BMI1)
1281 fs.setIf((ebx&0x00000100) != 0, BMI2)
1282 }
1283 fs.setIf(ebx&(1<<2) != 0, SGX)
1284 fs.setIf(ebx&(1<<4) != 0, HLE)
1285 fs.setIf(ebx&(1<<9) != 0, ERMS)
1286 fs.setIf(ebx&(1<<11) != 0, RTM)
1287 fs.setIf(ebx&(1<<14) != 0, MPX)
1288 fs.setIf(ebx&(1<<18) != 0, RDSEED)
1289 fs.setIf(ebx&(1<<19) != 0, ADX)
1290 fs.setIf(ebx&(1<<29) != 0, SHA)
1291
1292 // CPUID.(EAX=7, ECX=0).ECX
1293 fs.setIf(ecx&(1<<5) != 0, WAITPKG)
1294 fs.setIf(ecx&(1<<7) != 0, CETSS)
1295 fs.setIf(ecx&(1<<8) != 0, GFNI)
1296 fs.setIf(ecx&(1<<9) != 0, VAES)
1297 fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
1298 fs.setIf(ecx&(1<<13) != 0, TME)
1299 fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
1300 fs.setIf(ecx&(1<<23) != 0, KEYLOCKER)
1301 fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
1302 fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
1303 fs.setIf(ecx&(1<<29) != 0, ENQCMD)
1304 fs.setIf(ecx&(1<<30) != 0, SGXLC)
1305
1306 // CPUID.(EAX=7, ECX=0).EDX
1307 fs.setIf(edx&(1<<4) != 0, FSRM)
1308 fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL)
1309 fs.setIf(edx&(1<<10) != 0, MD_CLEAR)
1310 fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
1311 fs.setIf(edx&(1<<14) != 0, SERIALIZE)
1312 fs.setIf(edx&(1<<15) != 0, HYBRID_CPU)
1313 fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
1314 fs.setIf(edx&(1<<18) != 0, PCONFIG)
1315 fs.setIf(edx&(1<<20) != 0, CETIBT)
1316 fs.setIf(edx&(1<<26) != 0, IBPB)
1317 fs.setIf(edx&(1<<27) != 0, STIBP)
1318 fs.setIf(edx&(1<<28) != 0, FLUSH_L1D)
1319 fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP)
1320 fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP)
1321 fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD)
1322
1323 // CPUID.(EAX=7, ECX=1).EAX
1324 eax1, _, _, edx1 := cpuidex(7, 1)
1325 fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
1326 fs.setIf(eax1&(1<<1) != 0, SM3_X86)
1327 fs.setIf(eax1&(1<<2) != 0, SM4_X86)
1328 fs.setIf(eax1&(1<<7) != 0, CMPCCXADD)
1329 fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
1330 fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
1331 fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
1332 fs.setIf(eax1&(1<<22) != 0, HRESET)
1333 fs.setIf(eax1&(1<<23) != 0, AVXIFMA)
1334 fs.setIf(eax1&(1<<26) != 0, LAM)
1335
1336 // CPUID.(EAX=7, ECX=1).EDX
1337 fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8)
1338 fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT)
1339 fs.setIf(edx1&(1<<6) != 0, AMXTRANSPOSE)
1340 fs.setIf(edx1&(1<<7) != 0, AMXTF32)
1341 fs.setIf(edx1&(1<<8) != 0, AMXCOMPLEX)
1342 fs.setIf(edx1&(1<<10) != 0, AVXVNNIINT16)
1343 fs.setIf(edx1&(1<<14) != 0, PREFETCHI)
1344 fs.setIf(edx1&(1<<19) != 0, AVX10)
1345 fs.setIf(edx1&(1<<21) != 0, APX_F)
1346
1347 // Only detect AVX-512 features if XGETBV is supported
1348 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
1349 // Check for OS support
1350 eax, _ := xgetbv(0)
1351
1352 // Verify that XCR0[7:5] = โ111bโ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
1353 // ZMM16-ZMM31 state are enabled by OS)
1354 /// and that XCR0[2:1] = โ11bโ (XMM state and YMM state are enabled by OS).
1355 hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
1356 if runtime.GOOS == "darwin" {
1357 hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
1358 }
1359 if hasAVX512 {
1360 fs.setIf(ebx&(1<<16) != 0, AVX512F)
1361 fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
1362 fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
1363 fs.setIf(ebx&(1<<26) != 0, AVX512PF)
1364 fs.setIf(ebx&(1<<27) != 0, AVX512ER)
1365 fs.setIf(ebx&(1<<28) != 0, AVX512CD)
1366 fs.setIf(ebx&(1<<30) != 0, AVX512BW)
1367 fs.setIf(ebx&(1<<31) != 0, AVX512VL)
1368 // ecx
1369 fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
1370 fs.setIf(ecx&(1<<3) != 0, AMXFP8)
1371 fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
1372 fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
1373 fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
1374 fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
1375 // edx
1376 fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
1377 fs.setIf(edx&(1<<22) != 0, AMXBF16)
1378 fs.setIf(edx&(1<<23) != 0, AVX512FP16)
1379 fs.setIf(edx&(1<<24) != 0, AMXTILE)
1380 fs.setIf(edx&(1<<25) != 0, AMXINT8)
1381 // eax1 = CPUID.(EAX=7, ECX=1).EAX
1382 fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
1383 fs.setIf(eax1&(1<<19) != 0, WRMSRNS)
1384 fs.setIf(eax1&(1<<21) != 0, AMXFP16)
1385 fs.setIf(eax1&(1<<27) != 0, MSRLIST)
1386 }
1387 }
1388
1389 // CPUID.(EAX=7, ECX=2)
1390 _, _, _, edx = cpuidex(7, 2)
1391 fs.setIf(edx&(1<<0) != 0, PSFD)
1392 fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL)
1393 fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL)
1394 fs.setIf(edx&(1<<4) != 0, BHI_CTRL)
1395 fs.setIf(edx&(1<<5) != 0, MCDT_NO)
1396
1397 if fs.inSet(SGX) {
1398 eax, _, _, _ := cpuidex(0x12, 0)
1399 fs.setIf(eax&(1<<12) != 0, SGXPQC)
1400 }
1401
1402 // Add keylocker features.
1403 if fs.inSet(KEYLOCKER) && mfi >= 0x19 {
1404 _, ebx, _, _ := cpuidex(0x19, 0)
1405 fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4)
1406 }
1407
1408 // Add AVX10 features.
1409 if fs.inSet(AVX10) && mfi >= 0x24 {
1410 _, ebx, _, _ := cpuidex(0x24, 0)
1411 fs.setIf(ebx&(1<<16) != 0, AVX10_128)
1412 fs.setIf(ebx&(1<<17) != 0, AVX10_256)
1413 fs.setIf(ebx&(1<<18) != 0, AVX10_512)
1414 }
1415
1416 }
1417
1418 // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
1419 // EAX
1420 // Bit 00: XSAVEOPT is available.
1421 // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
1422 // Bit 02: Supports XGETBV with ECX = 1 if set.
1423 // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
1424 // Bits 31 - 04: Reserved.
1425 // EBX
1426 // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
1427 // ECX
1428 // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
1429 // EDX?
1430 // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
1431 if mfi >= 0xd {
1432 if fs.inSet(XSAVE) {
1433 eax, _, _, _ := cpuidex(0xd, 1)
1434 fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
1435 fs.setIf(eax&(1<<1) != 0, XSAVEC)
1436 fs.setIf(eax&(1<<2) != 0, XGETBV1)
1437 fs.setIf(eax&(1<<3) != 0, XSAVES)
1438 }
1439 }
1440 if maxExtendedFunction() >= 0x80000001 {
1441 _, _, c, d := cpuid(0x80000001)
1442 if (c & (1 << 5)) != 0 {
1443 fs.set(LZCNT)
1444 fs.set(POPCNT)
1445 }
1446 // ECX
1447 fs.setIf((c&(1<<0)) != 0, LAHF)
1448 fs.setIf((c&(1<<2)) != 0, SVM)
1449 fs.setIf((c&(1<<6)) != 0, SSE4A)
1450 fs.setIf((c&(1<<10)) != 0, IBS)
1451 fs.setIf((c&(1<<22)) != 0, TOPEXT)
1452
1453 // EDX
1454 fs.setIf(d&(1<<11) != 0, SYSCALL)
1455 fs.setIf(d&(1<<20) != 0, NX)
1456 fs.setIf(d&(1<<22) != 0, MMXEXT)
1457 fs.setIf(d&(1<<23) != 0, MMX)
1458 fs.setIf(d&(1<<24) != 0, FXSR)
1459 fs.setIf(d&(1<<25) != 0, FXSROPT)
1460 fs.setIf(d&(1<<27) != 0, RDTSCP)
1461 fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
1462 fs.setIf(d&(1<<31) != 0, AMD3DNOW)
1463
1464 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
1465 * used unless the OS has AVX support. */
1466 if fs.inSet(AVX) {
1467 fs.setIf((c&(1<<11)) != 0, XOP)
1468 fs.setIf((c&(1<<16)) != 0, FMA4)
1469 }
1470
1471 }
1472 if maxExtendedFunction() >= 0x80000007 {
1473 _, b, _, d := cpuid(0x80000007)
1474 fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
1475 fs.setIf((b&(1<<1)) != 0, SUCCOR)
1476 fs.setIf((b&(1<<2)) != 0, HWA)
1477 fs.setIf((d&(1<<9)) != 0, CPBOOST)
1478 }
1479
1480 if maxExtendedFunction() >= 0x80000008 {
1481 _, b, _, _ := cpuid(0x80000008)
1482 fs.setIf(b&(1<<28) != 0, PSFD)
1483 fs.setIf(b&(1<<27) != 0, CPPC)
1484 fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD)
1485 fs.setIf(b&(1<<23) != 0, PPIN)
1486 fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED)
1487 fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS)
1488 fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP)
1489 fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED)
1490 fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON)
1491 fs.setIf(b&(1<<15) != 0, STIBP)
1492 fs.setIf(b&(1<<14) != 0, IBRS)
1493 fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
1494 fs.setIf(b&(1<<12) != 0, IBPB)
1495 fs.setIf((b&(1<<9)) != 0, WBNOINVD)
1496 fs.setIf((b&(1<<8)) != 0, MCOMMIT)
1497 fs.setIf((b&(1<<4)) != 0, RDPRU)
1498 fs.setIf((b&(1<<3)) != 0, INVLPGB)
1499 fs.setIf((b&(1<<1)) != 0, MSRIRC)
1500 fs.setIf((b&(1<<0)) != 0, CLZERO)
1501 }
1502
1503 if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
1504 _, _, _, edx := cpuid(0x8000000A)
1505 fs.setIf((edx>>0)&1 == 1, SVMNP)
1506 fs.setIf((edx>>1)&1 == 1, LBRVIRT)
1507 fs.setIf((edx>>2)&1 == 1, SVML)
1508 fs.setIf((edx>>3)&1 == 1, NRIPS)
1509 fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
1510 fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
1511 fs.setIf((edx>>6)&1 == 1, SVMFBASID)
1512 fs.setIf((edx>>7)&1 == 1, SVMDA)
1513 fs.setIf((edx>>10)&1 == 1, SVMPF)
1514 fs.setIf((edx>>12)&1 == 1, SVMPFT)
1515 }
1516
1517 if maxExtendedFunction() >= 0x8000001a {
1518 eax, _, _, _ := cpuid(0x8000001a)
1519 fs.setIf((eax>>0)&1 == 1, FP128)
1520 fs.setIf((eax>>1)&1 == 1, MOVU)
1521 fs.setIf((eax>>2)&1 == 1, FP256)
1522 }
1523
1524 if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
1525 eax, _, _, _ := cpuid(0x8000001b)
1526 fs.setIf((eax>>0)&1 == 1, IBSFFV)
1527 fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
1528 fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
1529 fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
1530 fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
1531 fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
1532 fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
1533 fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
1534 fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE)
1535 fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX)
1536 fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1.
1537 fs.setIf((eax>>11)&1 == 1, IBS_ZEN4)
1538 }
1539
1540 if maxExtendedFunction() >= 0x8000001f && vend == AMD {
1541 a, _, _, _ := cpuid(0x8000001f)
1542 fs.setIf((a>>0)&1 == 1, SME)
1543 fs.setIf((a>>1)&1 == 1, SEV)
1544 fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
1545 fs.setIf((a>>3)&1 == 1, SEV_ES)
1546 fs.setIf((a>>4)&1 == 1, SEV_SNP)
1547 fs.setIf((a>>5)&1 == 1, VMPL)
1548 fs.setIf((a>>10)&1 == 1, SME_COHERENT)
1549 fs.setIf((a>>11)&1 == 1, SEV_64BIT)
1550 fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
1551 fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
1552 fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
1553 fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
1554 fs.setIf((a>>16)&1 == 1, VTE)
1555 fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
1556 }
1557
1558 if maxExtendedFunction() >= 0x80000021 && vend == AMD {
1559 a, _, c, _ := cpuid(0x80000021)
1560 fs.setIf((a>>31)&1 == 1, SRSO_MSR_FIX)
1561 fs.setIf((a>>30)&1 == 1, SRSO_USER_KERNEL_NO)
1562 fs.setIf((a>>29)&1 == 1, SRSO_NO)
1563 fs.setIf((a>>28)&1 == 1, IBPB_BRTYPE)
1564 fs.setIf((a>>27)&1 == 1, SBPB)
1565 fs.setIf((c>>1)&1 == 1, TSA_L1_NO)
1566 fs.setIf((c>>2)&1 == 1, TSA_SQ_NO)
1567 fs.setIf((a>>5)&1 == 1, TSA_VERW_CLEAR)
1568 }
1569 if vend == AMD {
1570 if family < 0x19 {
1571 // AMD CPUs that are older than Family 19h are not vulnerable to TSA but do not set TSA_L1_NO or TSA_SQ_NO.
1572 // Source: https://www.amd.com/content/dam/amd/en/documents/resources/bulletin/technical-guidance-for-mitigating-transient-scheduler-attacks.pdf
1573 fs.set(TSA_L1_NO)
1574 fs.set(TSA_SQ_NO)
1575 } else if family == 0x1a {
1576 // AMD Family 1Ah models 00h-4Fh and 60h-7Fh are also not vulnerable to TSA but do not set TSA_L1_NO or TSA_SQ_NO.
1577 // Future AMD CPUs will set these CPUID bits if appropriate. CPUs will be designed to set these CPUID bits if appropriate.
1578 notVuln := model <= 0x4f || (model >= 0x60 && model <= 0x7f)
1579 fs.setIf(notVuln, TSA_L1_NO, TSA_SQ_NO)
1580 }
1581 }
1582
1583 if mfi >= 0x20 {
1584 // Microsoft has decided to purposefully hide the information
1585 // of the guest TEE when VMs are being created using Hyper-V.
1586 //
1587 // This leads us to check for the Hyper-V cpuid features
1588 // (0x4000000C), and then for the `ebx` value set.
1589 //
1590 // For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part
1591 // we're mostly interested about,according to:
1592 // https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174
1593 _, ebx, _, _ := cpuid(0x4000000C)
1594 fs.setIf(ebx == 0xbe3, TDX_GUEST)
1595 }
1596
1597 if mfi >= 0x21 {
1598 // Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21).
1599 _, ebx, ecx, edx := cpuid(0x21)
1600 identity := string(valAsString(ebx, edx, ecx))
1601 fs.setIf(identity == "IntelTDX ", TDX_GUEST)
1602 }
1603
1604 return fs
1605}
1606
1607func (c *CPUInfo) supportAVX10() uint8 {
1608 if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) {
1609 _, ebx, _, _ := cpuidex(0x24, 0)
1610 return uint8(ebx)
1611 }
1612 return 0
1613}
1614
1615func valAsString(values ...uint32) []byte {
1616 r := make([]byte, 4*len(values))
1617 for i, v := range values {
1618 dst := r[i*4:]
1619 dst[0] = byte(v & 0xff)
1620 dst[1] = byte((v >> 8) & 0xff)
1621 dst[2] = byte((v >> 16) & 0xff)
1622 dst[3] = byte((v >> 24) & 0xff)
1623 switch {
1624 case dst[0] == 0:
1625 return r[:i*4]
1626 case dst[1] == 0:
1627 return r[:i*4+1]
1628 case dst[2] == 0:
1629 return r[:i*4+2]
1630 case dst[3] == 0:
1631 return r[:i*4+3]
1632 }
1633 }
1634 return r
1635}
1636
1637func parseLeaf0AH(c *CPUInfo, eax, ebx, edx uint32) (info PerformanceMonitoringInfo) {
1638 info.VersionID = uint8(eax & 0xFF)
1639 info.NumGPCounters = uint8((eax >> 8) & 0xFF)
1640 info.GPPMCWidth = uint8((eax >> 16) & 0xFF)
1641
1642 info.RawEBX = ebx
1643 info.RawEAX = eax
1644 info.RawEDX = edx
1645
1646 if info.VersionID > 1 { // This information is only valid if VersionID > 1
1647 info.NumFixedPMC = uint8(edx & 0x1F) // Bits 4:0
1648 info.FixedPMCWidth = uint8((edx >> 5) & 0xFF) // Bits 12:5
1649 }
1650 if info.VersionID > 0 {
1651 // first 4 fixed events are always instructions retired, cycles, ref cycles and topdown slots
1652 if ebx == 0x0 && info.NumFixedPMC == 3 {
1653 c.featureSet.set(PMU_FIXEDCOUNTER_INSTRUCTIONS)
1654 c.featureSet.set(PMU_FIXEDCOUNTER_CYCLES)
1655 c.featureSet.set(PMU_FIXEDCOUNTER_REFCYCLES)
1656 }
1657 if ebx == 0x0 && info.NumFixedPMC == 4 {
1658 c.featureSet.set(PMU_FIXEDCOUNTER_INSTRUCTIONS)
1659 c.featureSet.set(PMU_FIXEDCOUNTER_CYCLES)
1660 c.featureSet.set(PMU_FIXEDCOUNTER_REFCYCLES)
1661 c.featureSet.set(PMU_FIXEDCOUNTER_TOPDOWN_SLOTS)
1662 }
1663 if ebx != 0x0 {
1664 if ((ebx >> 0) & 1) == 0 {
1665 c.featureSet.set(PMU_FIXEDCOUNTER_INSTRUCTIONS)
1666 }
1667 if ((ebx >> 1) & 1) == 0 {
1668 c.featureSet.set(PMU_FIXEDCOUNTER_CYCLES)
1669 }
1670 if ((ebx >> 2) & 1) == 0 {
1671 c.featureSet.set(PMU_FIXEDCOUNTER_REFCYCLES)
1672 }
1673 if ((ebx >> 3) & 1) == 0 {
1674 c.featureSet.set(PMU_FIXEDCOUNTER_TOPDOWN_SLOTS)
1675 }
1676 }
1677 }
1678 return info
1679}