From 198c65dddcd9a65722cca49e2755cd2fb632f521 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Tue, 6 May 2025 19:46:21 -0400 Subject: [PATCH 1/2] Remove `tzcnt` from audited BMI1 instructions As it turns out [^1], compilers will emit this instruction in some circumstances even when BMI1 is not technically available on your architecture, since by happy-accident older CPU's decode this instruction in backward-compatible way for non-zero inputs. [^1]: https://stackoverflow.com/questions/61422827/does-x64-support-imply-bmi1-support --- src/auditor/instructions.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/auditor/instructions.json b/src/auditor/instructions.json index 8aa7bf9ad..f4863c01f 100644 --- a/src/auditor/instructions.json +++ b/src/auditor/instructions.json @@ -1035,8 +1035,7 @@ "bextr", "blsi", "blsmsk", - "blsr", - "tzcnt" + "blsr" ], "adcx": [] } From 665e5f16c69a969224310f1be838937039d9e9a4 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Tue, 6 May 2025 20:07:09 -0400 Subject: [PATCH 2/2] Add fix to `generate_instructions_list.jl` (+ re-run!) --- contrib/generate_instructions_list.jl | 7 ++ src/auditor/instructions.json | 109 +++++++++++++++++++++++++- 2 files changed, 114 insertions(+), 2 deletions(-) diff --git a/contrib/generate_instructions_list.jl b/contrib/generate_instructions_list.jl index 8536caa66..673fed433 100644 --- a/contrib/generate_instructions_list.jl +++ b/contrib/generate_instructions_list.jl @@ -57,6 +57,13 @@ function generate_dict() end end end + if name == "bmi1" + # `tzcnt` can be legally emitted by compilers in some cases even when + # the BMI1 feature is not available, so do not audit it. + # + # see: https://stackoverflow.com/questions/61422827/does-x64-support-imply-bmi1-support + deleteat!(instructions, findfirst(==("tzcnt"), instructions)) + end dict[name] = instructions end free(xml) diff --git a/src/auditor/instructions.json b/src/auditor/instructions.json index f4863c01f..5b757ca7b 100644 --- a/src/auditor/instructions.json +++ b/src/auditor/instructions.json @@ -200,7 +200,6 @@ "pmulld", "popcnt", "ptest", - "rex crc32", "roundpd", "roundps", "roundsd", @@ -301,6 +300,8 @@ "v4fmaddss", "v4fnmaddps", "v4fnmaddss", + "vaddph", + "vaddsh", "vaesdec", "vaesdeclast", "vaesenc", @@ -319,35 +320,72 @@ "vbroadcasti32x8", "vbroadcasti64x2", "vbroadcasti64x4", + "vcmpph", + "vcmpsh", + "vcomish", "vcompresspd", "vcompressps", + "vcvtdq2ph", "vcvtne2ps2bf16", "vcvtneps2bf16", + "vcvtpd2ph", "vcvtpd2qq", "vcvtpd2udq", "vcvtpd2uqq", + "vcvtph2dq", + "vcvtph2pd", + "vcvtph2psx", + "vcvtph2qq", + "vcvtph2udq", + "vcvtph2uqq", + "vcvtph2uw", + "vcvtph2w", + "vcvtps2phx", "vcvtps2qq", "vcvtps2udq", "vcvtps2uqq", "vcvtqq2pd", + "vcvtqq2ph", "vcvtqq2ps", + "vcvtsd2sh", "vcvtsd2usi", + "vcvtsh2sd", + "vcvtsh2si", + "vcvtsh2ss", + "vcvtsh2usi", + "vcvtsi2sh", + "vcvtss2sh", "vcvtss2usi", "vcvttpd2qq", "vcvttpd2udq", "vcvttpd2uqq", + "vcvttph2dq", + "vcvttph2qq", + "vcvttph2udq", + "vcvttph2uqq", + "vcvttph2uw", + "vcvttph2w", "vcvttps2qq", "vcvttps2udq", "vcvttps2uqq", "vcvttsd2usi", + "vcvttsh2si", + "vcvttsh2usi", "vcvttss2usi", "vcvtudq2pd", + "vcvtudq2ph", "vcvtudq2ps", "vcvtuqq2pd", + "vcvtuqq2ph", "vcvtuqq2ps", "vcvtusi2sd", + "vcvtusi2sh", "vcvtusi2ss", + "vcvtuw2ph", + "vcvtw2ph", "vdbpsadbw", + "vdivph", + "vdivsh", "vdpbf16ps", "vexp2pd", "vexp2ps", @@ -361,13 +399,53 @@ "vextracti32x8", "vextracti64x2", "vextracti64x4", + "vfcmaddcph", + "vfcmaddcsh", + "vfcmulcph", + "vfcmulcsh", "vfixupimmpd", "vfixupimmps", "vfixupimmsd", "vfixupimmss", + "vfmadd132ph", + "vfmadd132sh", + "vfmadd213ph", + "vfmadd213sh", + "vfmadd231ph", + "vfmadd231sh", + "vfmaddcph", + "vfmaddcsh", + "vfmaddsub132ph", + "vfmaddsub213ph", + "vfmaddsub231ph", + "vfmsub132ph", + "vfmsub132sh", + "vfmsub213ph", + "vfmsub213sh", + "vfmsub231ph", + "vfmsub231sh", + "vfmsubadd132ph", + "vfmsubadd213ph", + "vfmsubadd231ph", + "vfmulcph", + "vfmulcsh", + "vfnmadd132ph", + "vfnmadd132sh", + "vfnmadd213ph", + "vfnmadd213sh", + "vfnmadd231ph", + "vfnmadd231sh", + "vfnmsub132ph", + "vfnmsub132sh", + "vfnmsub213ph", + "vfnmsub213sh", + "vfnmsub231ph", + "vfnmsub231sh", "vfpclasspd", + "vfpclassph", "vfpclassps", "vfpclasssd", + "vfpclasssh", "vfpclassss", "vgatherdpd", "vgatherdps", @@ -382,12 +460,16 @@ "vgatherqpd", "vgatherqps", "vgetexppd", + "vgetexpph", "vgetexpps", "vgetexpsd", + "vgetexpsh", "vgetexpss", "vgetmantpd", + "vgetmantph", "vgetmantps", "vgetmantsd", + "vgetmantsh", "vgetmantss", "vgf2p8affineinvqb", "vgf2p8affineqb", @@ -400,12 +482,20 @@ "vinserti32x8", "vinserti64x2", "vinserti64x4", + "vmaxph", + "vmaxsh", + "vminph", + "vminsh", "vmovdqa32", "vmovdqa64", "vmovdqu16", "vmovdqu32", "vmovdqu64", "vmovdqu8", + "vmovsh", + "vmovw", + "vmulph", + "vmulsh", "vp2intersectd", "vp2intersectq", "vp4dpwssd", @@ -557,13 +647,19 @@ "vrcp28ps", "vrcp28sd", "vrcp28ss", + "vrcpph", + "vrcpsh", "vreducepd", + "vreduceph", "vreduceps", "vreducesd", + "vreducesh", "vreducess", "vrndscalepd", + "vrndscaleph", "vrndscaleps", "vrndscalesd", + "vrndscalesh", "vrndscaless", "vrsqrt14pd", "vrsqrt14ps", @@ -573,9 +669,13 @@ "vrsqrt28ps", "vrsqrt28sd", "vrsqrt28ss", + "vrsqrtph", + "vrsqrtsh", "vscalefpd", + "vscalefph", "vscalefps", "vscalefsd", + "vscalefsh", "vscalefss", "vscatterdpd", "vscatterdps", @@ -592,7 +692,12 @@ "vshuff32x4", "vshuff64x2", "vshufi32x4", - "vshufi64x2" + "vshufi64x2", + "vsqrtph", + "vsqrtsh", + "vsubph", + "vsubsh", + "vucomish" ], "f16c": [ "vcvtph2ps",