From e30369bc1c683aeab6ea74bc37b4ae77b03f79b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 14 Feb 2024 23:00:54 +0200 Subject: [PATCH] aarch64: Use regular hwcaps flags instead of HWCAP_CPUID for CPU feature detection on Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes the code much simpler (especially for adding support for other instruction set extensions), avoids needing inline assembly for this feature, and generally is more of the canonical way to do this. The CPU feature detection was added in 493fcde50a84cb23854335bcb0e55c6f383d55db, using HWCAP_CPUID. The argument for using that, was that HWCAP_CPUID was added much earlier in the kernel (in Linux v4.11), while the HWCAP flags for individual features always come later. This allows detecting support for new CPU extensions before the kernel exposes information about them via hwcap flags. However in practice, there's probably quite little advantage in this. E.g. HWCAP2_I8MM was added in Linux v5.10 - long after HWCAP_CPUID, but there's probably very little practical cases where one would run a kernel older than that on a CPU that supports those instructions. Additionally, we provide our own definitions of the flag values to check (as they are fixed constants anyway), with names not conflicting with the ones from system headers. This reduces the number of ifdefs needed, and allows detecting those features even if building with userland headers that are lacking the definitions of those flags. Also, slightly older versions of QEMU, e.g. 6.2 in Ubuntu 22.04, do expose support for these features via HWCAP flags, but the emulated cpuid registers are missing the bits for exposing e.g. I8MM. (This issue is fixed in later versions of QEMU though.) Signed-off-by: Martin Storsjö --- libavutil/aarch64/cpu.c | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c index f27fef3992..7a05391343 100644 --- a/libavutil/aarch64/cpu.c +++ b/libavutil/aarch64/cpu.c @@ -24,34 +24,20 @@ #include #include -#define get_cpu_feature_reg(reg, val) \ - __asm__("mrs %0, " #reg : "=r" (val)) +#define HWCAP_AARCH64_ASIMDDP (1 << 20) +#define HWCAP2_AARCH64_I8MM (1 << 13) static int detect_flags(void) { int flags = 0; -#if defined(HWCAP_CPUID) && HAVE_INLINE_ASM unsigned long hwcap = getauxval(AT_HWCAP); - // We can check for DOTPROD and I8MM using HWCAP_ASIMDDP and - // HWCAP2_I8MM too, avoiding to read the CPUID registers (which triggers - // a trap, handled by the kernel). However the HWCAP_* defines for these - // extensions are added much later than HWCAP_CPUID, so the userland - // headers might lack support for them even if the binary later is run - // on hardware that does support it (and where the kernel might support - // HWCAP_CPUID). - // See https://www.kernel.org/doc/html/latest/arm64/cpu-feature-registers.html - if (hwcap & HWCAP_CPUID) { - uint64_t tmp; + unsigned long hwcap2 = getauxval(AT_HWCAP2); - get_cpu_feature_reg(ID_AA64ISAR0_EL1, tmp); - if (((tmp >> 44) & 0xf) == 0x1) - flags |= AV_CPU_FLAG_DOTPROD; - get_cpu_feature_reg(ID_AA64ISAR1_EL1, tmp); - if (((tmp >> 52) & 0xf) == 0x1) - flags |= AV_CPU_FLAG_I8MM; - } -#endif + if (hwcap & HWCAP_AARCH64_ASIMDDP) + flags |= AV_CPU_FLAG_DOTPROD; + if (hwcap2 & HWCAP2_AARCH64_I8MM) + flags |= AV_CPU_FLAG_I8MM; return flags; }