avcodec/x86/lossless_videodsp: Remove obsolete MMX(EXT) functions

The only systems which benefit from these are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2022-06-20 07:31:42 +02:00 · 2022-06-20 07:31:42 +02:00 · fed07efcde
parent 230ea38de1
commit fed07efcde
2 changed files with 4 additions and 74 deletions
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm
@ -38,11 +38,11 @@ pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
 SECTION .text

 ;------------------------------------------------------------------------------
-; void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
-;                                const uint8_t *diff, int w,
-;                                int *left, int *left_top)
+; void ff_add_median_pred(uint8_t *dst, const uint8_t *top,
+;                         const uint8_t *diff, int w,
+;                         int *left, int *left_top)
 ;------------------------------------------------------------------------------
-%macro MEDIAN_PRED 0
+INIT_XMM sse2
 cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, left_top
    movu    m0, [topq]
    mova    m2, m0
@ -100,14 +100,6 @@ cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, left_top
    movzx   r2d, byte [topq-1]
    mov [left_topq], r2d
    RET
-%endmacro
-
-%if ARCH_X86_32
-INIT_MMX mmxext
-MEDIAN_PRED
-%endif
-INIT_XMM sse2
-MEDIAN_PRED


 %macro ADD_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
@ -240,10 +232,6 @@ cglobal add_bytes, 3,4,2, dst, src, w, size
    REP_RET
 %endmacro

-%if ARCH_X86_32
-INIT_MMX mmx
-ADD_BYTES
-%endif
 INIT_XMM sse2
 ADD_BYTES

--- a/libavcodec/x86/lossless_videodsp_init.c
+++ b/libavcodec/x86/lossless_videodsp_init.c
@ -19,17 +19,12 @@
 */

 #include "config.h"
-#include "libavutil/x86/asm.h"
 #include "../lossless_videodsp.h"
 #include "libavutil/x86/cpu.h"

-void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t w);
 void ff_add_bytes_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t w);
 void ff_add_bytes_avx2(uint8_t *dst, uint8_t *src, ptrdiff_t w);

-void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
-                               const uint8_t *diff, ptrdiff_t w,
-                               int *left, int *left_top);
 void ff_add_median_pred_sse2(uint8_t *dst, const uint8_t *top,
                             const uint8_t *diff, ptrdiff_t w,
                             int *left, int *left_top);
@ -47,63 +42,10 @@ int ff_add_left_pred_int16_unaligned_ssse3(uint16_t *dst, const uint16_t *src, u
 void ff_add_gradient_pred_ssse3(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width);
 void ff_add_gradient_pred_avx2(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width);

-#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
-static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top,
-                                 const uint8_t *diff, ptrdiff_t w,
-                                 int *left, int *left_top)
-{
-    x86_reg w2 = -w;
-    x86_reg x;
-    int l  = *left     & 0xff;
-    int tl = *left_top & 0xff;
-    int t;
-    __asm__ volatile (
-        "mov          %7, %3            \n"
-        "1:                             \n"
-        "movzbl (%3, %4), %2            \n"
-        "mov          %2, %k3           \n"
-        "sub         %b1, %b3           \n"
-        "add         %b0, %b3           \n"
-        "mov          %2, %1            \n"
-        "cmp          %0, %2            \n"
-        "cmovg        %0, %2            \n"
-        "cmovg        %1, %0            \n"
-        "cmp         %k3, %0            \n"
-        "cmovg       %k3, %0            \n"
-        "mov          %7, %3            \n"
-        "cmp          %2, %0            \n"
-        "cmovl        %2, %0            \n"
-        "add    (%6, %4), %b0           \n"
-        "mov         %b0, (%5, %4)      \n"
-        "inc          %4                \n"
-        "jl           1b                \n"
-        : "+&q"(l), "+&q"(tl), "=&r"(t), "=&q"(x), "+&r"(w2)
-        : "r"(dst + w), "r"(diff + w), "rm"(top + w)
-    );
-    *left     = l;
-    *left_top = tl;
-}
-#endif
-
 void ff_llviddsp_init_x86(LLVidDSPContext *c)
 {
    int cpu_flags = av_get_cpu_flags();

-#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
-    if (cpu_flags & AV_CPU_FLAG_CMOV)
-        c->add_median_pred = add_median_pred_cmov;
-#endif
-
-    if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
-        c->add_bytes = ff_add_bytes_mmx;
-    }
-
-    if (ARCH_X86_32 && EXTERNAL_MMXEXT(cpu_flags)) {
-        /* slower than cmov version on AMD */
-        if (!(cpu_flags & AV_CPU_FLAG_3DNOW))
-            c->add_median_pred = ff_add_median_pred_mmxext;
-    }
-
    if (EXTERNAL_SSE2(cpu_flags)) {
        c->add_bytes       = ff_add_bytes_sse2;
        c->add_median_pred = ff_add_median_pred_sse2;