1
0

avcodec: Remove DCT, FFT, MDCT and RDFT

They were replaced by TX from libavutil; the tremendous work
to get to this point (both creating TX as well as porting
the users of the components removed in this commit) was
completely performed by Lynne alone.

Removing the subsystems from configure may break some command lines,
because the --disable-fft etc. options are no longer recognized.

Co-authored-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt 2023-09-28 19:57:36 +02:00
parent d9464f3e34
commit 6f7bf64dbc
52 changed files with 2 additions and 8799 deletions

11
configure vendored
View File

@ -136,13 +136,9 @@ Component options:
--disable-w32threads disable Win32 threads [autodetect]
--disable-os2threads disable OS/2 threads [autodetect]
--disable-network disable network support [no]
--disable-dct disable DCT code
--disable-dwt disable DWT code
--disable-error-resilience disable error resilience code
--disable-lsp disable LSP code
--disable-mdct disable MDCT code
--disable-rdft disable RDFT code
--disable-fft disable FFT code
--disable-faan disable floating point AAN (I)DCT code
--disable-pixelutils disable pixel utils in libavutil
@ -2004,17 +2000,13 @@ PROGRAM_LIST="
"
SUBSYSTEM_LIST="
dct
dwt
error_resilience
faan
fast_unaligned
fft
lsp
mdct
pixelutils
network
rdft
"
# COMPONENT_LIST needs to come last to ensure correct dependency checking
@ -2766,7 +2758,6 @@ cbs_h266_select="cbs"
cbs_jpeg_select="cbs"
cbs_mpeg2_select="cbs"
cbs_vp9_select="cbs"
dct_select="rdft"
deflate_wrapper_deps="zlib"
dirac_parse_select="golomb"
dovi_rpu_select="golomb"
@ -2786,7 +2777,6 @@ frame_thread_encoder_deps="encoders threads"
inflate_wrapper_deps="zlib"
intrax8_select="blockdsp wmv2dsp"
iso_media_select="mpeg4audio"
mdct_select="fft"
me_cmp_select="idctdsp"
mpeg_er_select="error_resilience"
mpegaudio_select="mpegaudiodsp mpegaudioheader"
@ -2796,7 +2786,6 @@ mpegvideoenc_select="aandcttables fdctdsp me_cmp mpegvideo pixblockdsp"
msmpeg4dec_select="h263_decoder"
msmpeg4enc_select="h263_encoder"
vc1dsp_select="h264chroma qpeldsp startcode"
rdft_select="fft"
# decoders / encoders
aac_decoder_select="adts_header mpeg4audio sinewin"

View File

@ -48,11 +48,6 @@ Files that have MIPS copyright notice in them:
float_dsp_mips.c
libm_mips.h
softfloat_tables.h
* libavcodec/
fft_fixed_32.c
fft_init_table.c
fft_table.h
mdct_fixed_32.c
* libavcodec/mips/
aacdec_fixed.c
aacsbr_fixed.c
@ -70,9 +65,6 @@ Files that have MIPS copyright notice in them:
compute_antialias_float.h
lsp_mips.h
dsputil_mips.c
fft_mips.c
fft_table.h
fft_init_table.c
fmtconvert_mips.c
iirfilter_mips.c
mpegaudiodsp_mips_fixed.c

View File

@ -32,6 +32,7 @@ OBJS = ac3_parser.o \
allcodecs.o \
avcodec.o \
avdct.o \
avfft.o \
avpacket.o \
bitstream.o \
bitstream_filters.o \
@ -81,7 +82,6 @@ OBJS-$(CONFIG_CBS_JPEG) += cbs_jpeg.o
OBJS-$(CONFIG_CBS_MPEG2) += cbs_mpeg2.o
OBJS-$(CONFIG_CBS_VP9) += cbs_vp9.o
OBJS-$(CONFIG_CRYSTALHD) += crystalhd.o
OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o
OBJS-$(CONFIG_DEFLATE_WRAPPER) += zlib_wrapper.o
OBJS-$(CONFIG_DOVI_RPU) += dovi_rpu.o
OBJS-$(CONFIG_ERROR_RESILIENCE) += error_resilience.o
@ -90,9 +90,6 @@ OBJS-$(CONFIG_EXIF) += exif.o tiff_common.o
OBJS-$(CONFIG_FAANDCT) += faandct.o
OBJS-$(CONFIG_FAANIDCT) += faanidct.o
OBJS-$(CONFIG_FDCTDSP) += fdctdsp.o jfdctfst.o jfdctint.o
FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o
OBJS-$(CONFIG_FFT) += avfft.o fft_float.o fft_fixed_32.o \
fft_init_table.o $(FFT-OBJS-yes)
OBJS-$(CONFIG_FMTCONVERT) += fmtconvert.o
OBJS-$(CONFIG_GOLOMB) += golomb.o
OBJS-$(CONFIG_H263DSP) += h263dsp.o
@ -125,7 +122,6 @@ OBJS-$(CONFIG_LLVIDENCDSP) += lossless_videoencdsp.o
OBJS-$(CONFIG_LPC) += lpc.o
OBJS-$(CONFIG_LSP) += lsp.o
OBJS-$(CONFIG_LZF) += lzf.o
OBJS-$(CONFIG_MDCT) += mdct_float.o mdct_fixed_32.o
OBJS-$(CONFIG_ME_CMP) += me_cmp.o
OBJS-$(CONFIG_MEDIACODEC) += mediacodecdec_common.o mediacodec_surface.o mediacodec_wrapper.o mediacodec_sw_buffer.o
OBJS-$(CONFIG_MPEG_ER) += mpeg_er.o
@ -157,7 +153,6 @@ OBJS-$(CONFIG_QSV) += qsv.o
OBJS-$(CONFIG_QSVDEC) += qsvdec.o
OBJS-$(CONFIG_QSVENC) += qsvenc.o
OBJS-$(CONFIG_RANGECODER) += rangecoder.o
OBJS-$(CONFIG_RDFT) += rdft.o
OBJS-$(CONFIG_RV34DSP) += rv34dsp.o
OBJS-$(CONFIG_SINEWIN) += sinewin.o
OBJS-$(CONFIG_SNAPPY) += snappy.o
@ -1326,8 +1321,6 @@ TESTPROGS = avcodec \
TESTPROGS-$(CONFIG_AV1_VAAPI_ENCODER) += av1_levels
TESTPROGS-$(CONFIG_CABAC) += cabac
TESTPROGS-$(CONFIG_DCT) += avfft
TESTPROGS-$(CONFIG_FFT) += fft fft-fixed32
TESTPROGS-$(CONFIG_GOLOMB) += golomb
TESTPROGS-$(CONFIG_IDCTDSP) += dct
TESTPROGS-$(CONFIG_IIRFILTER) += iirfilter
@ -1347,7 +1340,6 @@ HOSTPROGS = aacps_tablegen \
aacps_fixed_tablegen \
cbrt_tablegen \
cbrt_fixed_tablegen \
cos_tablegen \
dv_tablegen \
motionpixels_tablegen \
mpegaudio_tablegen \
@ -1362,12 +1354,6 @@ CLEANFILES = *_tables.c *_tables.h *_tablegen$(HOSTEXESUF)
$(SUBDIR)tests/dct$(EXESUF): $(SUBDIR)dctref.o $(SUBDIR)aandcttab.o
$(SUBDIR)dv_tablegen$(HOSTEXESUF): $(SUBDIR)dvdata_host.o
TRIG_TABLES = cos cos_fixed sin
TRIG_TABLES := $(TRIG_TABLES:%=$(SUBDIR)%_tables.c)
$(TRIG_TABLES): $(SUBDIR)%_tables.c: $(SUBDIR)cos_tablegen$(HOSTEXESUF)
$(M)./$< $* > $@
ifdef CONFIG_SMALL
$(SUBDIR)%_tablegen$(HOSTEXESUF): HOSTCFLAGS += -DCONFIG_SMALL=1
else

View File

@ -1,5 +1,4 @@
# subsystems
OBJS-$(CONFIG_FFT) += aarch64/fft_init_aarch64.o
OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_init.o
OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o
OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o
@ -36,7 +35,6 @@ ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o
# subsystems
NEON-OBJS-$(CONFIG_AAC_DECODER) += aarch64/sbrdsp_neon.o
NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o
NEON-OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_neon.o
NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o
NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \
@ -47,7 +45,6 @@ NEON-OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_neon.o \
NEON-OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_neon.o
NEON-OBJS-$(CONFIG_IDCTDSP) += aarch64/idctdsp_neon.o \
aarch64/simple_idct_neon.o
NEON-OBJS-$(CONFIG_MDCT) += aarch64/mdct_neon.o
NEON-OBJS-$(CONFIG_ME_CMP) += aarch64/me_cmp_neon.o
NEON-OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_neon.o
NEON-OBJS-$(CONFIG_PIXBLOCKDSP) += aarch64/pixblockdsp_neon.o

View File

@ -1,25 +0,0 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AARCH64_ASM_OFFSETS_H
#define AVCODEC_AARCH64_ASM_OFFSETS_H
/* FFTContext */
#define IMDCT_HALF 0x48
#endif /* AVCODEC_AARCH64_ASM_OFFSETS_H */

View File

@ -1,52 +0,0 @@
/*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/fft.h"
void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
av_cold void ff_fft_init_aarch64(FFTContext *s)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) {
if (s->nbits < 17) {
s->fft_permute = ff_fft_permute_neon;
s->fft_calc = ff_fft_calc_neon;
}
#if CONFIG_MDCT
s->imdct_calc = ff_imdct_calc_neon;
s->imdct_half = ff_imdct_half_neon;
s->mdct_calc = ff_mdct_calc_neon;
s->mdct_permutation = FF_MDCT_PERM_INTERLEAVE;
#endif
}
}

View File

@ -1,447 +0,0 @@
/*
* ARM NEON optimised FFT
*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
* Copyright (c) 2009 Naotoshi Nojiri
* Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
*
* This algorithm (though not any of the implementation details) is
* based on libdjbfft by D. J. Bernstein.
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
#define M_SQRT1_2 0.70710678118654752440
.macro transpose d0, d1, s0, s1
trn1 \d0, \s0, \s1
trn2 \d1, \s0, \s1
.endm
function fft4_neon
AARCH64_VALID_JUMP_TARGET
ld1 {v0.2s,v1.2s,v2.2s,v3.2s}, [x0]
fadd v4.2s, v0.2s, v1.2s // r0+r1,i0+i1
fsub v6.2s, v0.2s, v1.2s // r0-r1,i0-i1
ext v16.8b, v2.8b, v3.8b, #4
ext v17.8b, v3.8b, v2.8b, #4
fadd v5.2s, v2.2s, v3.2s // i2+i3,r2+r3
fsub v7.2s, v16.2s, v17.2s // r3-r2,i2-i3
fadd v0.2s, v4.2s, v5.2s
fsub v2.2s, v4.2s, v5.2s
fadd v1.2s, v6.2s, v7.2s
fsub v3.2s, v6.2s, v7.2s
st1 {v0.2s,v1.2s,v2.2s,v3.2s}, [x0]
ret
endfunc
function fft8_neon
AARCH64_VALID_JUMP_TARGET
mov x1, x0
ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [x0], #32
ld1 {v16.2s,v17.2s,v18.2s,v19.2s}, [x0]
ext v22.8b, v2.8b, v3.8b, #4
ext v23.8b, v3.8b, v2.8b, #4
fadd v4.2s, v16.2s, v17.2s // r4+r5,i4+i5
fadd v5.2s, v18.2s, v19.2s // r6+r7,i6+i7
fsub v17.2s, v16.2s, v17.2s // r4-r5,i4-i5
fsub v19.2s, v18.2s, v19.2s // r6-r7,i6-i7
rev64 v27.2s, v28.2s // ???
fadd v20.2s, v0.2s, v1.2s // r0+r1,i0+i1
fadd v21.2s, v2.2s, v3.2s // r2+r3,i2+i3
fmul v26.2s, v17.2s, v28.2s // -a2r*w,a2i*w
ext v6.8b, v4.8b, v5.8b, #4
ext v7.8b, v5.8b, v4.8b, #4
fmul v27.2s, v19.2s, v27.2s // a3r*w,-a3i*w
fsub v23.2s, v22.2s, v23.2s // i2-i3,r3-r2
fsub v22.2s, v0.2s, v1.2s // r0-r1,i0-i1
fmul v24.2s, v17.2s, v28.s[1] // a2r*w,a2i*w
fmul v25.2s, v19.2s, v28.s[1] // a3r*w,a3i*w
fadd v0.2s, v20.2s, v21.2s
fsub v2.2s, v20.2s, v21.2s
fadd v1.2s, v22.2s, v23.2s
rev64 v26.2s, v26.2s
rev64 v27.2s, v27.2s
fsub v3.2s, v22.2s, v23.2s
fsub v6.2s, v6.2s, v7.2s
fadd v24.2s, v24.2s, v26.2s // a2r+a2i,a2i-a2r t1,t2
fadd v25.2s, v25.2s, v27.2s // a3r-a3i,a3i+a3r t5,t6
fadd v7.2s, v4.2s, v5.2s
fsub v18.2s, v2.2s, v6.2s
ext v26.8b, v24.8b, v25.8b, #4
ext v27.8b, v25.8b, v24.8b, #4
fadd v2.2s, v2.2s, v6.2s
fsub v16.2s, v0.2s, v7.2s
fadd v5.2s, v25.2s, v24.2s
fsub v4.2s, v26.2s, v27.2s
fadd v0.2s, v0.2s, v7.2s
fsub v17.2s, v1.2s, v5.2s
fsub v19.2s, v3.2s, v4.2s
fadd v3.2s, v3.2s, v4.2s
fadd v1.2s, v1.2s, v5.2s
st1 {v16.2s,v17.2s,v18.2s,v19.2s}, [x0]
st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [x1]
ret
endfunc
function fft16_neon
AARCH64_VALID_JUMP_TARGET
mov x1, x0
ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [x0], #32
ld1 {v16.2s,v17.2s,v18.2s,v19.2s}, [x0], #32
ext v22.8b, v2.8b, v3.8b, #4
ext v23.8b, v3.8b, v2.8b, #4
fadd v4.2s, v16.2s, v17.2s // r4+r5,i4+i5
fadd v5.2s, v18.2s, v19.2s // r6+r7,i6+i7
fsub v17.2s, v16.2s, v17.2s // r4-r5,i4-i5
fsub v19.2s, v18.2s, v19.2s // r6-r7,i6-i7
rev64 v27.2s, v28.2s // ???
fadd v20.2s, v0.2s, v1.2s // r0+r1,i0+i1
fadd v21.2s, v2.2s, v3.2s // r2+r3,i2+i3
fmul v26.2s, v17.2s, v28.2s // -a2r*w,a2i*w
ext v6.8b, v4.8b, v5.8b, #4
ext v7.8b, v5.8b, v4.8b, #4
fmul v27.2s, v19.2s, v27.2s // a3r*w,-a3i*w
fsub v23.2s, v22.2s, v23.2s // i2-i3,r3-r2
fsub v22.2s, v0.2s, v1.2s // r0-r1,i0-i1
fmul v24.2s, v17.2s, v28.s[1] // a2r*w,a2i*w
fmul v25.2s, v19.2s, v28.s[1] // a3r*w,a3i*w
fadd v0.2s, v20.2s, v21.2s
fsub v2.2s, v20.2s, v21.2s
fadd v1.2s, v22.2s, v23.2s
rev64 v26.2s, v26.2s
rev64 v27.2s, v27.2s
fsub v3.2s, v22.2s, v23.2s
fsub v6.2s, v6.2s, v7.2s
fadd v24.2s, v24.2s, v26.2s // a2r+a2i,a2i-a2r t1,t2
fadd v25.2s, v25.2s, v27.2s // a3r-a3i,a3i+a3r t5,t6
fadd v7.2s, v4.2s, v5.2s
fsub v18.2s, v2.2s, v6.2s
ld1 {v20.4s,v21.4s}, [x0], #32
ld1 {v22.4s,v23.4s}, [x0], #32
ext v26.8b, v24.8b, v25.8b, #4
ext v27.8b, v25.8b, v24.8b, #4
fadd v2.2s, v2.2s, v6.2s
fsub v16.2s, v0.2s, v7.2s
fadd v5.2s, v25.2s, v24.2s
fsub v4.2s, v26.2s, v27.2s
transpose v24.2d, v25.2d, v20.2d, v22.2d
transpose v26.2d, v27.2d, v21.2d, v23.2d
fadd v0.2s, v0.2s, v7.2s
fsub v17.2s, v1.2s, v5.2s
fsub v19.2s, v3.2s, v4.2s
fadd v3.2s, v3.2s, v4.2s
fadd v1.2s, v1.2s, v5.2s
ext v20.16b, v21.16b, v21.16b, #4
ext v21.16b, v23.16b, v23.16b, #4
zip1 v0.2d, v0.2d, v1.2d // {z[0], z[1]}
zip1 v1.2d, v2.2d, v3.2d // {z[2], z[3]}
zip1 v2.2d, v16.2d, v17.2d // {z[o1], z[o1+1]}
zip1 v3.2d, v18.2d, v19.2d // {z[o1+2],z[o1+3]}
// 2 x fft4
transpose v22.2d, v23.2d, v20.2d, v21.2d
fadd v4.4s, v24.4s, v25.4s
fadd v5.4s, v26.4s, v27.4s
fsub v6.4s, v24.4s, v25.4s
fsub v7.4s, v22.4s, v23.4s
ld1 {v23.4s}, [x14]
fadd v24.4s, v4.4s, v5.4s // {z[o2+0],z[o2+1]}
fsub v26.4s, v4.4s, v5.4s // {z[o2+2],z[o2+3]}
fadd v25.4s, v6.4s, v7.4s // {z[o3+0],z[o3+1]}
fsub v27.4s, v6.4s, v7.4s // {z[o3+2],z[o3+3]}
//fft_pass_neon_16
rev64 v7.4s, v25.4s
fmul v25.4s, v25.4s, v23.s[1]
fmul v7.4s, v7.4s, v29.4s
fmla v25.4s, v7.4s, v23.s[3] // {t1a,t2a,t5a,t6a}
zip1 v20.4s, v24.4s, v25.4s
zip2 v21.4s, v24.4s, v25.4s
fneg v22.4s, v20.4s
fadd v4.4s, v21.4s, v20.4s
fsub v6.4s, v20.4s, v21.4s // just the second half
fadd v5.4s, v21.4s, v22.4s // just the first half
tbl v4.16b, {v4.16b}, v30.16b // trans4_float
tbl v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
fsub v20.4s, v0.4s, v4.4s // {z[o2],z[o2+1]}
fadd v16.4s, v0.4s, v4.4s // {z[0], z[1]}
fsub v22.4s, v2.4s, v5.4s // {z[o3],z[o3+1]}
fadd v18.4s, v2.4s, v5.4s // {z[o1],z[o1+1]}
//second half
rev64 v6.4s, v26.4s
fmul v26.4s, v26.4s, v23.s[2]
rev64 v7.4s, v27.4s
fmul v27.4s, v27.4s, v23.s[3]
fmul v6.4s, v6.4s, v29.4s
fmul v7.4s, v7.4s, v29.4s
fmla v26.4s, v6.4s, v23.s[2] // {t1,t2,t5,t6}
fmla v27.4s, v7.4s, v23.s[1] // {t1a,t2a,t5a,t6a}
zip1 v24.4s, v26.4s, v27.4s
zip2 v25.4s, v26.4s, v27.4s
fneg v26.4s, v24.4s
fadd v4.4s, v25.4s, v24.4s
fsub v6.4s, v24.4s, v25.4s // just the second half
fadd v5.4s, v25.4s, v26.4s // just the first half
tbl v4.16b, {v4.16b}, v30.16b // trans4_float
tbl v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
fadd v17.4s, v1.4s, v4.4s // {z[2], z[3]}
fsub v21.4s, v1.4s, v4.4s // {z[o2+2],z[o2+3]}
fadd v19.4s, v3.4s, v5.4s // {z[o1+2],z[o1+3]}
fsub v23.4s, v3.4s, v5.4s // {z[o3+2],z[o3+3]}
st1 {v16.4s,v17.4s}, [x1], #32
st1 {v18.4s,v19.4s}, [x1], #32
st1 {v20.4s,v21.4s}, [x1], #32
st1 {v22.4s,v23.4s}, [x1], #32
ret
endfunc
const trans4_float, align=4
.byte 0, 1, 2, 3
.byte 8, 9, 10, 11
.byte 4, 5, 6, 7
.byte 12, 13, 14, 15
endconst
const trans8_float, align=4
.byte 24, 25, 26, 27
.byte 0, 1, 2, 3
.byte 28, 29, 30, 31
.byte 4, 5, 6, 7
endconst
function fft_pass_neon
sub x6, x2, #1 // n - 1, loop counter
lsl x5, x2, #3 // 2 * n * sizeof FFTSample
lsl x1, x2, #4 // 2 * n * sizeof FFTComplex
add x5, x4, x5 // wim
add x3, x1, x2, lsl #5 // 4 * n * sizeof FFTComplex
add x2, x0, x2, lsl #5 // &z[o2]
add x3, x0, x3 // &z[o3]
add x1, x0, x1 // &z[o1]
ld1 {v20.4s},[x2] // {z[o2],z[o2+1]}
ld1 {v22.4s},[x3] // {z[o3],z[o3+1]}
ld1 {v4.2s}, [x4], #8 // {wre[0],wre[1]}
trn2 v25.2d, v20.2d, v22.2d
sub x5, x5, #4 // wim--
trn1 v24.2d, v20.2d, v22.2d
ld1 {v5.s}[0], [x5], x7 // d5[0] = wim[-1]
rev64 v7.4s, v25.4s
fmul v25.4s, v25.4s, v4.s[1]
ld1 {v16.4s}, [x0] // {z[0],z[1]}
fmul v7.4s, v7.4s, v29.4s
ld1 {v17.4s}, [x1] // {z[o1],z[o1+1]}
prfm pldl1keep, [x2, #16]
prfm pldl1keep, [x3, #16]
fmla v25.4s, v7.4s, v5.s[0] // {t1a,t2a,t5a,t6a}
prfm pldl1keep, [x0, #16]
prfm pldl1keep, [x1, #16]
zip1 v20.4s, v24.4s, v25.4s
zip2 v21.4s, v24.4s, v25.4s
fneg v22.4s, v20.4s
fadd v4.4s, v21.4s, v20.4s
fsub v6.4s, v20.4s, v21.4s // just the second half
fadd v5.4s, v21.4s, v22.4s // just the first half
tbl v4.16b, {v4.16b}, v30.16b // trans4_float
tbl v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
fadd v20.4s, v16.4s, v4.4s
fsub v22.4s, v16.4s, v4.4s
fadd v21.4s, v17.4s, v5.4s
st1 {v20.4s}, [x0], #16 // {z[0], z[1]}
fsub v23.4s, v17.4s, v5.4s
st1 {v21.4s}, [x1], #16 // {z[o1],z[o1+1]}
st1 {v22.4s}, [x2], #16 // {z[o2],z[o2+1]}
st1 {v23.4s}, [x3], #16 // {z[o3],z[o3+1]}
1:
ld1 {v20.4s},[x2] // {z[o2],z[o2+1]}
ld1 {v22.4s},[x3] // {z[o3],z[o3+1]}
ld1 {v4.2s}, [x4], #8 // {wre[0],wre[1]}
transpose v26.2d, v27.2d, v20.2d, v22.2d
ld1 {v5.2s}, [x5], x7 // {wim[-1],wim[0]}
rev64 v6.4s, v26.4s
fmul v26.4s, v26.4s, v4.s[0]
rev64 v7.4s, v27.4s
fmul v27.4s, v27.4s, v4.s[1]
fmul v6.4s, v6.4s, v29.4s
fmul v7.4s, v7.4s, v29.4s
ld1 {v16.4s},[x0] // {z[0],z[1]}
fmla v26.4s, v6.4s, v5.s[1] // {t1,t2,t5,t6}
fmla v27.4s, v7.4s, v5.s[0] // {t1a,t2a,t5a,t6a}
ld1 {v17.4s},[x1] // {z[o1],z[o1+1]}
subs x6, x6, #1 // n--
zip1 v20.4s, v26.4s, v27.4s
zip2 v21.4s, v26.4s, v27.4s
fneg v22.4s, v20.4s
fadd v4.4s, v21.4s, v20.4s
fsub v6.4s, v20.4s, v21.4s // just the second half
fadd v5.4s, v21.4s, v22.4s // just the first half
tbl v4.16b, {v4.16b}, v30.16b // trans4_float
tbl v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
fadd v20.4s, v16.4s, v4.4s
fsub v22.4s, v16.4s, v4.4s
fadd v21.4s, v17.4s, v5.4s
st1 {v20.4s}, [x0], #16 // {z[0], z[1]}
fsub v23.4s, v17.4s, v5.4s
st1 {v21.4s}, [x1], #16 // {z[o1],z[o1+1]}
st1 {v22.4s}, [x2], #16 // {z[o2],z[o2+1]}
st1 {v23.4s}, [x3], #16 // {z[o3],z[o3+1]}
b.ne 1b
ret
endfunc
.macro def_fft n, n2, n4
function fft\n\()_neon, align=6
AARCH64_VALID_JUMP_TARGET
AARCH64_SIGN_LINK_REGISTER
stp x28, x30, [sp, #-16]!
add x28, x0, #\n4*2*8
bl fft\n2\()_neon
mov x0, x28
bl fft\n4\()_neon
add x0, x28, #\n4*1*8
bl fft\n4\()_neon
sub x0, x28, #\n4*2*8
ldp x28, x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
movrel x4, X(ff_cos_\n)
mov x2, #\n4>>1
b fft_pass_neon
endfunc
.endm
def_fft 32, 16, 8
def_fft 64, 32, 16
def_fft 128, 64, 32
def_fft 256, 128, 64
def_fft 512, 256, 128
def_fft 1024, 512, 256
def_fft 2048, 1024, 512
def_fft 4096, 2048, 1024
def_fft 8192, 4096, 2048
def_fft 16384, 8192, 4096
def_fft 32768, 16384, 8192
def_fft 65536, 32768, 16384
function ff_fft_calc_neon, export=1
prfm pldl1keep, [x1]
movrel x10, trans4_float
ldr w2, [x0]
movrel x11, trans8_float
sub w2, w2, #2
movrel x3, fft_tab_neon
ld1 {v30.16b}, [x10]
mov x7, #-8
movrel x12, pmmp
ldr x3, [x3, x2, lsl #3]
movrel x13, mppm
movrel x14, X(ff_cos_16)
ld1 {v31.16b}, [x11]
mov x0, x1
ld1 {v29.4s}, [x12] // pmmp
ld1 {v28.4s}, [x13]
br x3
endfunc
function ff_fft_permute_neon, export=1
mov x6, #1
ldr w2, [x0] // nbits
ldr x3, [x0, #16] // tmp_buf
ldr x0, [x0, #8] // revtab
lsl x6, x6, x2
mov x2, x6
1:
ld1 {v0.2s,v1.2s}, [x1], #16
ldr w4, [x0], #4
uxth w5, w4
lsr w4, w4, #16
add x5, x3, x5, lsl #3
add x4, x3, x4, lsl #3
st1 {v0.2s}, [x5]
st1 {v1.2s}, [x4]
subs x6, x6, #2
b.gt 1b
sub x1, x1, x2, lsl #3
1:
ld1 {v0.4s,v1.4s}, [x3], #32
st1 {v0.4s,v1.4s}, [x1], #32
subs x2, x2, #4
b.gt 1b
ret
endfunc
const fft_tab_neon, relocate=1
.quad fft4_neon
.quad fft8_neon
.quad fft16_neon
.quad fft32_neon
.quad fft64_neon
.quad fft128_neon
.quad fft256_neon
.quad fft512_neon
.quad fft1024_neon
.quad fft2048_neon
.quad fft4096_neon
.quad fft8192_neon
.quad fft16384_neon
.quad fft32768_neon
.quad fft65536_neon
endconst
const pmmp, align=4
.float +1.0, -1.0, -1.0, +1.0
endconst
const mppm, align=4
.float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
endconst

View File

@ -1,326 +0,0 @@
/*
* AArch64 NEON optimised MDCT
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
* Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
function ff_imdct_half_neon, export=1
stp x19, x20, [sp, #-32]!
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #16]
mov x12, #1
ldr w14, [x0, #28] // mdct_bits
ldr x4, [x0, #32] // tcos
ldr x3, [x0, #8] // revtab
lsl x12, x12, x14 // n = 1 << nbits
lsr x14, x12, #2 // n4 = n >> 2
add x7, x2, x12, lsl #1
mov x12, #-16
sub x7, x7, #16
ld2 {v16.2s,v17.2s}, [x7], x12 // d16=x,n1 d17=x,n0
ld2 {v0.2s,v1.2s}, [x2], #16 // d0 =m0,x d1 =m1,x
rev64 v17.2s, v17.2s
ld2 {v2.2s,v3.2s}, [x4], #16 // d2=c0,c1 d3=s0,s2
fmul v6.2s, v17.2s, v2.2s
fmul v7.2s, v0.2s, v2.2s
1:
subs x14, x14, #2
ldr w6, [x3], #4
fmul v4.2s, v0.2s, v3.2s
fmul v5.2s, v17.2s, v3.2s
fsub v4.2s, v6.2s, v4.2s
fadd v5.2s, v5.2s, v7.2s
ubfm x8, x6, #16, #31
ubfm x6, x6, #0, #15
add x8, x1, x8, lsl #3
add x6, x1, x6, lsl #3
b.eq 2f
ld2 {v16.2s,v17.2s}, [x7], x12
ld2 {v0.2s,v1.2s}, [x2], #16
rev64 v17.2s, v17.2s
ld2 {v2.2s,v3.2s}, [x4], #16 // d2=c0,c1 d3=s0,s2
fmul v6.2s, v17.2s, v2.2s
fmul v7.2s, v0.2s, v2.2s
st2 {v4.s,v5.s}[0], [x6]
st2 {v4.s,v5.s}[1], [x8]
b 1b
2:
st2 {v4.s,v5.s}[0], [x6]
st2 {v4.s,v5.s}[1], [x8]
mov x19, x0
mov x20, x1
bl X(ff_fft_calc_neon)
mov x12, #1
ldr w14, [x19, #28] // mdct_bits
ldr x4, [x19, #32] // tcos
lsl x12, x12, x14 // n = 1 << nbits
lsr x14, x12, #3 // n8 = n >> 3
add x4, x4, x14, lsl #3
add x6, x20, x14, lsl #3
sub x1, x4, #16
sub x3, x6, #16
mov x7, #-16
mov x8, x6
mov x0, x3
ld2 {v0.2s,v1.2s}, [x3], x7 // d0 =i1,r1 d1 =i0,r0
ld2 {v20.2s,v21.2s},[x6], #16 // d20=i2,r2 d21=i3,r3
ld2 {v16.2s,v17.2s},[x1], x7 // d16=c1,c0 d18=s1,s0
3:
subs x14, x14, #2
fmul v7.2s, v0.2s, v17.2s
ld2 {v18.2s,v19.2s},[x4], #16 // d17=c2,c3 d19=s2,s3
fmul v4.2s, v1.2s, v17.2s
fmul v6.2s, v21.2s, v19.2s
fmul v5.2s, v20.2s, v19.2s
fmul v22.2s, v1.2s, v16.2s
fmul v23.2s, v21.2s, v18.2s
fmul v24.2s, v0.2s, v16.2s
fmul v25.2s, v20.2s, v18.2s
fadd v7.2s, v7.2s, v22.2s
fadd v5.2s, v5.2s, v23.2s
fsub v4.2s, v4.2s, v24.2s
fsub v6.2s, v6.2s, v25.2s
b.eq 4f
ld2 {v0.2s,v1.2s}, [x3], x7
ld2 {v20.2s,v21.2s},[x6], #16
ld2 {v16.2s,v17.2s},[x1], x7 // d16=c1,c0 d18=s1,s0
rev64 v5.2s, v5.2s
rev64 v7.2s, v7.2s
st2 {v4.2s,v5.2s}, [x0], x7
st2 {v6.2s,v7.2s}, [x8], #16
b 3b
4:
rev64 v5.2s, v5.2s
rev64 v7.2s, v7.2s
st2 {v4.2s,v5.2s}, [x0]
st2 {v6.2s,v7.2s}, [x8]
ldr x30, [sp, #16]
AARCH64_VALIDATE_LINK_REGISTER
ldp x19, x20, [sp], #32
ret
endfunc
function ff_imdct_calc_neon, export=1
stp x19, x20, [sp, #-32]!
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #16]
ldr w3, [x0, #28] // mdct_bits
mov x19, #1
mov x20, x1
lsl x19, x19, x3
add x1, x1, x19
bl X(ff_imdct_half_neon)
add x0, x20, x19, lsl #2
add x1, x20, x19, lsl #1
sub x0, x0, #8
sub x2, x1, #16
mov x3, #-16
mov x6, #-8
1:
ld1 {v0.4s}, [x2], x3
prfum pldl1keep, [x0, #-16]
rev64 v0.4s, v0.4s
ld1 {v2.2s,v3.2s}, [x1], #16
fneg v4.4s, v0.4s
prfum pldl1keep, [x2, #-16]
rev64 v2.2s, v2.2s
rev64 v3.2s, v3.2s
ext v4.16b, v4.16b, v4.16b, #8
st1 {v2.2s}, [x0], x6
st1 {v3.2s}, [x0], x6
st1 {v4.4s}, [x20], #16
subs x19, x19, #16
b.gt 1b
ldr x30, [sp, #16]
AARCH64_VALIDATE_LINK_REGISTER
ldp x19, x20, [sp], #32
ret
endfunc
function ff_mdct_calc_neon, export=1
stp x19, x20, [sp, #-32]!
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #16]
mov x12, #1
ldr w14, [x0, #28] // mdct_bits
ldr x4, [x0, #32] // tcos
ldr x3, [x0, #8] // revtab
lsl x14, x12, x14 // n = 1 << nbits
add x7, x2, x14 // in4u
sub x9, x7, #16 // in4d
add x2, x7, x14, lsl #1 // in3u
add x8, x9, x14, lsl #1 // in3d
add x5, x4, x14, lsl #1
sub x5, x5, #16
sub x3, x3, #4
mov x12, #-16
lsr x13, x14, #1
ld2 {v16.2s,v17.2s}, [x9], x12 // in0u0,in0u1 in4d1,in4d0
ld2 {v18.2s,v19.2s}, [x8], x12 // in2u0,in2u1 in3d1,in3d0
ld2 {v0.2s, v1.2s}, [x7], #16 // in4u0,in4u1 in2d1,in2d0
rev64 v17.2s, v17.2s // in4d0,in4d1 in3d0,in3d1
rev64 v19.2s, v19.2s // in4d0,in4d1 in3d0,in3d1
ld2 {v2.2s, v3.2s}, [x2], #16 // in3u0,in3u1 in1d1,in1d0
fsub v0.2s, v17.2s, v0.2s // in4d-in4u I
ld2 {v20.2s,v21.2s}, [x4], #16 // c0,c1 s0,s1
rev64 v1.2s, v1.2s // in2d0,in2d1 in1d0,in1d1
rev64 v3.2s, v3.2s // in2d0,in2d1 in1d0,in1d1
ld2 {v30.2s,v31.2s}, [x5], x12 // c2,c3 s2,s3
fadd v2.2s, v2.2s, v19.2s // in3u+in3d -R
fsub v16.2s, v16.2s, v1.2s // in0u-in2d R
fadd v18.2s, v18.2s, v3.2s // in2u+in1d -I
1:
fmul v7.2s, v0.2s, v21.2s // I*s
ldr w10, [x3, x13]
fmul v6.2s, v2.2s, v20.2s // -R*c
ldr w6, [x3, #4]!
fmul v4.2s, v2.2s, v21.2s // -R*s
fmul v5.2s, v0.2s, v20.2s // I*c
fmul v24.2s, v16.2s, v30.2s // R*c
fmul v25.2s, v18.2s, v31.2s // -I*s
fmul v22.2s, v16.2s, v31.2s // R*s
fmul v23.2s, v18.2s, v30.2s // I*c
subs x14, x14, #16
subs x13, x13, #8
fsub v6.2s, v6.2s, v7.2s // -R*c-I*s
fadd v7.2s, v4.2s, v5.2s // -R*s+I*c
fsub v24.2s, v25.2s, v24.2s // I*s-R*c
fadd v25.2s, v22.2s, v23.2s // R*s-I*c
b.eq 1f
mov x12, #-16
ld2 {v16.2s,v17.2s}, [x9], x12 // in0u0,in0u1 in4d1,in4d0
ld2 {v18.2s,v19.2s}, [x8], x12 // in2u0,in2u1 in3d1,in3d0
fneg v7.2s, v7.2s // R*s-I*c
ld2 {v0.2s, v1.2s}, [x7], #16 // in4u0,in4u1 in2d1,in2d0
rev64 v17.2s, v17.2s // in4d0,in4d1 in3d0,in3d1
rev64 v19.2s, v19.2s // in4d0,in4d1 in3d0,in3d1
ld2 {v2.2s, v3.2s}, [x2], #16 // in3u0,in3u1 in1d1,in1d0
fsub v0.2s, v17.2s, v0.2s // in4d-in4u I
ld2 {v20.2s,v21.2s}, [x4], #16 // c0,c1 s0,s1
rev64 v1.2s, v1.2s // in2d0,in2d1 in1d0,in1d1
rev64 v3.2s, v3.2s // in2d0,in2d1 in1d0,in1d1
ld2 {v30.2s,v31.2s}, [x5], x12 // c2,c3 s2,s3
fadd v2.2s, v2.2s, v19.2s // in3u+in3d -R
fsub v16.2s, v16.2s, v1.2s // in0u-in2d R
fadd v18.2s, v18.2s, v3.2s // in2u+in1d -I
ubfm x12, x6, #16, #31
ubfm x6, x6, #0, #15
add x12, x1, x12, lsl #3
add x6, x1, x6, lsl #3
st2 {v6.s,v7.s}[0], [x6]
st2 {v6.s,v7.s}[1], [x12]
ubfm x6, x10, #16, #31
ubfm x10, x10, #0, #15
add x6 , x1, x6, lsl #3
add x10, x1, x10, lsl #3
st2 {v24.s,v25.s}[0], [x10]
st2 {v24.s,v25.s}[1], [x6]
b 1b
1:
fneg v7.2s, v7.2s // R*s-I*c
ubfm x12, x6, #16, #31
ubfm x6, x6, #0, #15
add x12, x1, x12, lsl #3
add x6, x1, x6, lsl #3
st2 {v6.s,v7.s}[0], [x6]
st2 {v6.s,v7.s}[1], [x12]
ubfm x6, x10, #16, #31
ubfm x10, x10, #0, #15
add x6 , x1, x6, lsl #3
add x10, x1, x10, lsl #3
st2 {v24.s,v25.s}[0], [x10]
st2 {v24.s,v25.s}[1], [x6]
mov x19, x0
mov x20, x1
bl X(ff_fft_calc_neon)
mov x12, #1
ldr w14, [x19, #28] // mdct_bits
ldr x4, [x19, #32] // tcos
lsl x12, x12, x14 // n = 1 << nbits
lsr x14, x12, #3 // n8 = n >> 3
add x4, x4, x14, lsl #3
add x6, x20, x14, lsl #3
sub x1, x4, #16
sub x3, x6, #16
mov x7, #-16
mov x8, x6
mov x0, x3
ld2 {v0.2s,v1.2s}, [x3], x7 // d0 =r1,i1 d1 =r0,i0
ld2 {v20.2s,v21.2s}, [x6], #16 // d20=r2,i2 d21=r3,i3
ld2 {v16.2s,v17.2s}, [x1], x7 // c1,c0 s1,s0
1:
subs x14, x14, #2
fmul v7.2s, v0.2s, v17.2s // r1*s1,r0*s0
ld2 {v18.2s,v19.2s}, [x4], #16 // c2,c3 s2,s3
fmul v4.2s, v1.2s, v17.2s // i1*s1,i0*s0
fmul v6.2s, v21.2s, v19.2s // i2*s2,i3*s3
fmul v5.2s, v20.2s, v19.2s // r2*s2,r3*s3
fmul v24.2s, v0.2s, v16.2s // r1*c1,r0*c0
fmul v25.2s, v20.2s, v18.2s // r2*c2,r3*c3
fmul v22.2s, v21.2s, v18.2s // i2*c2,i3*c3
fmul v23.2s, v1.2s, v16.2s // i1*c1,i0*c0
fadd v4.2s, v4.2s, v24.2s // i1*s1+r1*c1,i0*s0+r0*c0
fadd v6.2s, v6.2s, v25.2s // i2*s2+r2*c2,i3*s3+r3*c3
fsub v5.2s, v22.2s, v5.2s // i2*c2-r2*s2,i3*c3-r3*s3
fsub v7.2s, v23.2s, v7.2s // i1*c1-r1*s1,i0*c0-r0*s0
fneg v4.2s, v4.2s
fneg v6.2s, v6.2s
b.eq 1f
ld2 {v0.2s, v1.2s}, [x3], x7
ld2 {v20.2s,v21.2s}, [x6], #16
ld2 {v16.2s,v17.2s}, [x1], x7 // c1,c0 s1,s0
rev64 v5.2s, v5.2s
rev64 v7.2s, v7.2s
st2 {v4.2s,v5.2s}, [x0], x7
st2 {v6.2s,v7.2s}, [x8], #16
b 1b
1:
rev64 v5.2s, v5.2s
rev64 v7.2s, v7.2s
st2 {v4.2s,v5.2s}, [x0]
st2 {v6.2s,v7.2s}, [x8]
ldr x30, [sp, #16]
AARCH64_VALIDATE_LINK_REGISTER
ldp x19, x20, [sp], #32
ret
endfunc

View File

@ -23,15 +23,8 @@
#include "libavutil/aarch64/cpu.h"
#include "libavutil/attributes.h"
#include "libavutil/internal.h"
#include "libavcodec/fft.h"
#include "libavcodec/synth_filter.h"
#include "asm-offsets.h"
#if HAVE_NEON
AV_CHECK_OFFSET(FFTContext, imdct_half, IMDCT_HALF);
#endif
void ff_synth_filter_float_neon(AVTXContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset,
float synth_buf2[32], const float window[512],

View File

@ -19,8 +19,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "asm-offsets.h"
#include "libavutil/aarch64/asm.S"
.macro inner_loop

View File

@ -5,7 +5,6 @@ OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \
arm/ac3dsp_arm.o
OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_arm.o
OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o
OBJS-$(CONFIG_FFT) += arm/fft_init_arm.o
OBJS-$(CONFIG_FMTCONVERT) += arm/fmtconvert_init_arm.o
OBJS-$(CONFIG_G722DSP) += arm/g722dsp_init_arm.o
OBJS-$(CONFIG_H264CHROMA) += arm/h264chroma_init_arm.o
@ -25,7 +24,6 @@ OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o
OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_init_arm.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o
OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_init_arm.o
OBJS-$(CONFIG_RDFT) += arm/rdft_init_arm.o
OBJS-$(CONFIG_RV34DSP) += arm/rv34dsp_init_arm.o
OBJS-$(CONFIG_VC1DSP) += arm/vc1dsp_init_arm.o
OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o
@ -90,9 +88,7 @@ ARMV6-OBJS-$(CONFIG_TRUEHD_DECODER) += arm/mlpdsp_armv6.o
# VFP optimizations
# subsystems
VFP-OBJS-$(CONFIG_FFT) += arm/fft_vfp.o
VFP-OBJS-$(CONFIG_FMTCONVERT) += arm/fmtconvert_vfp.o
VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o
# decoders/encoders
VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_vfp.o
@ -107,7 +103,6 @@ NEON-OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_neon.o \
arm/int_neon.o
NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \
arm/blockdsp_neon.o
NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o
NEON-OBJS-$(CONFIG_FMTCONVERT) += arm/fmtconvert_neon.o
NEON-OBJS-$(CONFIG_G722DSP) += arm/g722dsp_neon.o
NEON-OBJS-$(CONFIG_H264CHROMA) += arm/h264cmc_neon.o
@ -121,10 +116,8 @@ NEON-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_neon.o \
NEON-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_neon.o \
arm/idctdsp_neon.o \
arm/simple_idct_neon.o
NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o
NEON-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_neon.o
NEON-OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_neon.o
NEON-OBJS-$(CONFIG_RDFT) += arm/rdft_neon.o
NEON-OBJS-$(CONFIG_VC1DSP) += arm/vc1dsp_init_neon.o \
arm/vc1dsp_neon.o
NEON-OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_neon.o

View File

@ -1,63 +0,0 @@
/*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/arm/cpu.h"
#include "libavcodec/fft.h"
void ff_fft_calc_vfp(FFTContext *s, FFTComplex *z);
void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
void ff_imdct_half_vfp(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
av_cold void ff_fft_init_arm(FFTContext *s)
{
int cpu_flags = av_get_cpu_flags();
if (have_vfp_vm(cpu_flags)) {
s->fft_calc = ff_fft_calc_vfp;
#if CONFIG_MDCT
s->imdct_half = ff_imdct_half_vfp;
#endif
}
if (have_neon(cpu_flags)) {
#if CONFIG_FFT
if (s->nbits < 17) {
s->fft_permute = ff_fft_permute_neon;
s->fft_calc = ff_fft_calc_neon;
}
#endif
#if CONFIG_MDCT
s->imdct_calc = ff_imdct_calc_neon;
s->imdct_half = ff_imdct_half_neon;
s->mdct_calc = ff_mdct_calc_neon;
s->mdct_permutation = FF_MDCT_PERM_INTERLEAVE;
#endif
}
}

375