1
0
Fork 0

lavc/idctdsp: improve R-V V add_pixels_clamped

This commit is contained in:
Rémi Denis-Courmont 2023-10-27 22:12:53 +03:00
parent 600c6f1b55
commit d48810f3a5
1 changed files with 14 additions and 14 deletions

View File

@ -23,7 +23,6 @@
func ff_put_pixels_clamped_rvv, zve32x
vsetivli zero, 8, e16, m1, ta, ma
vlseg8e16.v v24, (a0)
1:
/* RVV only has signed-signed and unsigned-unsigned clipping.
* We need two steps for signed-to-unsigned clipping. */
vsetvli t0, zero, e16, m8, ta, ma
@ -54,17 +53,18 @@ func ff_put_signed_pixels_clamped_rvv, zve64x
ret
endfunc
func ff_add_pixels_clamped_rvv, zve32x
vsetivli zero, 8, e8, mf2, ta, ma
vlseg8e16.v v24, (a0)
vlsseg8e8.v v16, (a1), a2
vwaddu.wv v24, v24, v16
vwaddu.wv v25, v25, v17
vwaddu.wv v26, v26, v18
vwaddu.wv v27, v27, v19
vwaddu.wv v28, v28, v20
vwaddu.wv v29, v29, v21
vwaddu.wv v30, v30, v22
vwaddu.wv v31, v31, v23
j 1b
func ff_add_pixels_clamped_rvv, zve64x
vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8
vlse64.v v16, (a1), a2
vsetvli zero, t0, e8, m4, ta, ma
vle16.v v24, (a0)
vwaddu.wv v24, v24, v16
vsetvli zero, zero, e16, m8, ta, ma
vmax.vx v24, v24, zero
vsetvli zero, zero, e8, m4, ta, ma
vnclipu.wi v16, v24, 0
vsetivli zero, 8, e8, mf2, ta, ma
vsse64.v v16, (a1), a2
ret
endfunc