1
0
Fork 0

lavc/me_cmp: R-V V pix_abs_y2

C908:
pix_abs_0_2_c: 904.0
pix_abs_0_2_rvv_i32: 172.2
pix_abs_1_2_c: 460.0
pix_abs_1_2_rvv_i32: 168.2

Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
This commit is contained in:
sunyuechi 2024-02-06 21:46:07 +08:00 committed by Rémi Denis-Courmont
parent f1ec475f66
commit 37463d7979
2 changed files with 56 additions and 0 deletions

View File

@ -34,6 +34,10 @@ int ff_pix_abs16_x2_rvv(MpegEncContext *v, const uint8_t *pix1, const uint8_t *p
ptrdiff_t stride, int h);
int ff_pix_abs8_x2_rvv(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_pix_abs16_y2_rvv(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_pix_abs8_y2_rvv(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
av_cold void ff_me_cmp_init_riscv(MECmpContext *c, AVCodecContext *avctx)
{
@ -47,6 +51,8 @@ av_cold void ff_me_cmp_init_riscv(MECmpContext *c, AVCodecContext *avctx)
c->sad[1] = ff_pix_abs8_rvv;
c->pix_abs[0][1] = ff_pix_abs16_x2_rvv;
c->pix_abs[1][1] = ff_pix_abs8_x2_rvv;
c->pix_abs[0][2] = ff_pix_abs16_y2_rvv;
c->pix_abs[1][2] = ff_pix_abs8_y2_rvv;
}
#endif
}

View File

@ -115,3 +115,53 @@ func ff_pix_abs8_x2_rvv, zve32x
pix_abs_ret
endfunc
func ff_pix_abs16_y2_rvv, zve32x
csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma
add t1, a2, a3
vmv.s.x v0, zero
1:
vsetivli zero, 16, e8, m1, tu, ma
vle8.v v12, (a2)
vle8.v v24, (t1)
addi a4, a4, -1
vle8.v v4, (a1)
vaaddu.vv v12, v12, v24
vwsubu.vv v16, v4, v12
vwsubu.vv v20, v12, v4
add a1, a1, a3
vsetvli zero, zero, e16, m2, tu, ma
add a2, a2, a3
vmax.vv v16, v16, v20
add t1, t1, a3
vwredsum.vs v0, v16, v0
bnez a4, 1b
pix_abs_ret
endfunc
func ff_pix_abs8_y2_rvv, zve32x
csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma
add t1, a2, a3
vmv.s.x v0, zero
1:
vsetivli zero, 8, e8, mf2, tu, ma
vle8.v v12, (a2)
vle8.v v24, (t1)
addi a4, a4, -1
vle8.v v4, (a1)
vaaddu.vv v12, v12, v24
vwsubu.vv v16, v4, v12
vwsubu.vv v20, v12, v4
add a1, a1, a3
vsetvli zero, zero, e16, m1, tu, ma
add a2, a2, a3
vmax.vv v16, v16, v20
add t1, t1, a3
vwredsum.vs v0, v16, v0
bnez a4, 1b
pix_abs_ret
endfunc