1
0
Fork 0

lavu/fixed_dsp: R-V V scalarproduct

This commit is contained in:
Rémi Denis-Courmont 2023-10-03 20:46:04 +03:00
parent 71716406e6
commit eb73d178ea
2 changed files with 27 additions and 1 deletions

View File

@ -25,6 +25,7 @@
#include "libavutil/cpu.h"
#include "libavutil/fixed_dsp.h"
int ff_scalarproduct_fixed_rvv(const int *v1, const int *v2, int len);
void ff_butterflies_fixed_rvv(int *v1, int *v2, int len);
av_cold void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp)
@ -32,7 +33,10 @@ av_cold void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp)
#if HAVE_RVV
int flags = av_get_cpu_flags();
if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR))
if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
if (flags & AV_CPU_FLAG_RVV_I64)
fdsp->scalarproduct_fixed = ff_scalarproduct_fixed_rvv;
fdsp->butterflies_fixed = ff_butterflies_fixed_rvv;
}
#endif
}

View File

@ -20,6 +20,28 @@
#include "asm.S"
func ff_scalarproduct_fixed_rvv, zve64x
li t1, 1 << 30
vsetvli t0, zero, e64, m8, ta, ma
vmv.v.x v8, zero
vmv.s.x v0, t1
1:
vsetvli t0, a2, e32, m4, tu, ma
vle32.v v16, (a0)
sub a2, a2, t0
vle32.v v20, (a1)
sh2add a0, t0, a0
vwmacc.vv v8, v16, v20
sh2add a1, t0, a1
bnez a2, 1b
vsetvli t0, zero, e64, m8, ta, ma
vredsum.vs v0, v8, v0
vmv.x.s a0, v0
srai a0, a0, 31
ret
endfunc
// (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1]
func ff_butterflies_fixed_rvv, zve32x
1: