FFmpeg
rv40dsp_init_arm.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdint.h>
22 
23 #include "libavutil/attributes.h"
24 #include "libavcodec/avcodec.h"
25 #include "libavcodec/rv34dsp.h"
26 #include "libavutil/arm/cpu.h"
27 
28 #define DECL_QPEL3(type, w, pos) \
29 void ff_ ## type ## _rv40_qpel ## w ## _mc ## pos ## _neon(uint8_t *dst, \
30  const uint8_t *src, \
31  ptrdiff_t stride)
32 
33 #define DECL_QPEL2(w, pos) \
34  DECL_QPEL3(put, w, pos); \
35  DECL_QPEL3(avg, w, pos)
36 
37 #define DECL_QPEL_XY(x, y) \
38  DECL_QPEL2(16, x ## y); \
39  DECL_QPEL2(8, x ## y)
40 
41 #define DECL_QPEL_Y(y) \
42  DECL_QPEL_XY(0, y); \
43  DECL_QPEL_XY(1, y); \
44  DECL_QPEL_XY(2, y); \
45  DECL_QPEL_XY(3, y); \
46 
47 DECL_QPEL_Y(0);
48 DECL_QPEL_Y(1);
49 DECL_QPEL_Y(2);
50 DECL_QPEL_Y(3);
51 
52 void ff_put_rv40_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
53 void ff_put_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
54 
55 void ff_avg_rv40_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
56 void ff_avg_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
57 
58 void ff_rv40_weight_func_16_neon(uint8_t *, uint8_t *, uint8_t *, int, int, ptrdiff_t);
59 void ff_rv40_weight_func_8_neon(uint8_t *, uint8_t *, uint8_t *, int, int, ptrdiff_t);
60 
61 int ff_rv40_h_loop_filter_strength_neon(uint8_t *src, ptrdiff_t stride,
62  int beta, int beta2, int edge,
63  int *p1, int *q1);
64 int ff_rv40_v_loop_filter_strength_neon(uint8_t *src, ptrdiff_t stride,
65  int beta, int beta2, int edge,
66  int *p1, int *q1);
67 
68 void ff_rv40_h_weak_loop_filter_neon(uint8_t *src, ptrdiff_t stride, int filter_p1,
69  int filter_q1, int alpha, int beta,
70  int lim_p0q0, int lim_q1, int lim_p1);
71 void ff_rv40_v_weak_loop_filter_neon(uint8_t *src, ptrdiff_t stride, int filter_p1,
72  int filter_q1, int alpha, int beta,
73  int lim_p0q0, int lim_q1, int lim_p1);
74 
76 {
77  c->put_pixels_tab[0][ 1] = ff_put_rv40_qpel16_mc10_neon;
78  c->put_pixels_tab[0][ 3] = ff_put_rv40_qpel16_mc30_neon;
79  c->put_pixels_tab[0][ 4] = ff_put_rv40_qpel16_mc01_neon;
80  c->put_pixels_tab[0][ 5] = ff_put_rv40_qpel16_mc11_neon;
81  c->put_pixels_tab[0][ 6] = ff_put_rv40_qpel16_mc21_neon;
82  c->put_pixels_tab[0][ 7] = ff_put_rv40_qpel16_mc31_neon;
83  c->put_pixels_tab[0][ 9] = ff_put_rv40_qpel16_mc12_neon;
84  c->put_pixels_tab[0][10] = ff_put_rv40_qpel16_mc22_neon;
85  c->put_pixels_tab[0][11] = ff_put_rv40_qpel16_mc32_neon;
86  c->put_pixels_tab[0][12] = ff_put_rv40_qpel16_mc03_neon;
87  c->put_pixels_tab[0][13] = ff_put_rv40_qpel16_mc13_neon;
88  c->put_pixels_tab[0][14] = ff_put_rv40_qpel16_mc23_neon;
89  c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_neon;
90  c->avg_pixels_tab[0][ 1] = ff_avg_rv40_qpel16_mc10_neon;
91  c->avg_pixels_tab[0][ 3] = ff_avg_rv40_qpel16_mc30_neon;
92  c->avg_pixels_tab[0][ 4] = ff_avg_rv40_qpel16_mc01_neon;
93  c->avg_pixels_tab[0][ 5] = ff_avg_rv40_qpel16_mc11_neon;
94  c->avg_pixels_tab[0][ 6] = ff_avg_rv40_qpel16_mc21_neon;
95  c->avg_pixels_tab[0][ 7] = ff_avg_rv40_qpel16_mc31_neon;
96  c->avg_pixels_tab[0][ 9] = ff_avg_rv40_qpel16_mc12_neon;
97  c->avg_pixels_tab[0][10] = ff_avg_rv40_qpel16_mc22_neon;
98  c->avg_pixels_tab[0][11] = ff_avg_rv40_qpel16_mc32_neon;
99  c->avg_pixels_tab[0][12] = ff_avg_rv40_qpel16_mc03_neon;
100  c->avg_pixels_tab[0][13] = ff_avg_rv40_qpel16_mc13_neon;
101  c->avg_pixels_tab[0][14] = ff_avg_rv40_qpel16_mc23_neon;
102  c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_neon;
103  c->put_pixels_tab[1][ 1] = ff_put_rv40_qpel8_mc10_neon;
104  c->put_pixels_tab[1][ 3] = ff_put_rv40_qpel8_mc30_neon;
105  c->put_pixels_tab[1][ 4] = ff_put_rv40_qpel8_mc01_neon;
106  c->put_pixels_tab[1][ 5] = ff_put_rv40_qpel8_mc11_neon;
107  c->put_pixels_tab[1][ 6] = ff_put_rv40_qpel8_mc21_neon;
108  c->put_pixels_tab[1][ 7] = ff_put_rv40_qpel8_mc31_neon;
109  c->put_pixels_tab[1][ 9] = ff_put_rv40_qpel8_mc12_neon;
110  c->put_pixels_tab[1][10] = ff_put_rv40_qpel8_mc22_neon;
111  c->put_pixels_tab[1][11] = ff_put_rv40_qpel8_mc32_neon;
112  c->put_pixels_tab[1][12] = ff_put_rv40_qpel8_mc03_neon;
113  c->put_pixels_tab[1][13] = ff_put_rv40_qpel8_mc13_neon;
114  c->put_pixels_tab[1][14] = ff_put_rv40_qpel8_mc23_neon;
115  c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_neon;
116  c->avg_pixels_tab[1][ 1] = ff_avg_rv40_qpel8_mc10_neon;
117  c->avg_pixels_tab[1][ 3] = ff_avg_rv40_qpel8_mc30_neon;
118  c->avg_pixels_tab[1][ 4] = ff_avg_rv40_qpel8_mc01_neon;
119  c->avg_pixels_tab[1][ 5] = ff_avg_rv40_qpel8_mc11_neon;
120  c->avg_pixels_tab[1][ 6] = ff_avg_rv40_qpel8_mc21_neon;
121  c->avg_pixels_tab[1][ 7] = ff_avg_rv40_qpel8_mc31_neon;
122  c->avg_pixels_tab[1][ 9] = ff_avg_rv40_qpel8_mc12_neon;
123  c->avg_pixels_tab[1][10] = ff_avg_rv40_qpel8_mc22_neon;
124  c->avg_pixels_tab[1][11] = ff_avg_rv40_qpel8_mc32_neon;
125  c->avg_pixels_tab[1][12] = ff_avg_rv40_qpel8_mc03_neon;
126  c->avg_pixels_tab[1][13] = ff_avg_rv40_qpel8_mc13_neon;
127  c->avg_pixels_tab[1][14] = ff_avg_rv40_qpel8_mc23_neon;
128  c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_neon;
129 
130  c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_neon;
131  c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_neon;
132  c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_neon;
133  c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_neon;
134 
135  c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_16_neon;
136  c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_8_neon;
137 
138  c->rv40_loop_filter_strength[0] = ff_rv40_h_loop_filter_strength_neon;
139  c->rv40_loop_filter_strength[1] = ff_rv40_v_loop_filter_strength_neon;
140  c->rv40_weak_loop_filter[0] = ff_rv40_h_weak_loop_filter_neon;
141  c->rv40_weak_loop_filter[1] = ff_rv40_v_weak_loop_filter_neon;
142 }
143 
145 {
146  int cpu_flags = av_get_cpu_flags();
147 
148  if (have_neon(cpu_flags))
150 }
stride
int stride
Definition: mace.c:144
q1
static const uint8_t q1[256]
Definition: twofish.c:96
ff_rv40_h_loop_filter_strength_neon
int ff_rv40_h_loop_filter_strength_neon(uint8_t *src, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:95
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:50
ff_avg_rv40_chroma_mc4_neon
void ff_avg_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int)
av_cold
#define av_cold
Definition: attributes.h:90
ff_rv40_h_weak_loop_filter_neon
void ff_rv40_h_weak_loop_filter_neon(uint8_t *src, ptrdiff_t stride, int filter_p1, int filter_q1, int alpha, int beta, int lim_p0q0, int lim_q1, int lim_p1)
ff_rv40_v_weak_loop_filter_neon
void ff_rv40_v_weak_loop_filter_neon(uint8_t *src, ptrdiff_t stride, int filter_p1, int filter_q1, int alpha, int beta, int lim_p0q0, int lim_q1, int lim_p1)
ff_put_rv40_chroma_mc4_neon
void ff_put_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int)
src
#define src
Definition: vp8dsp.c:255
cpu.h
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
RV34DSPContext
Definition: rv34dsp.h:57
DECL_QPEL_Y
#define DECL_QPEL_Y(y)
Definition: rv40dsp_init_arm.c:41
rv34dsp.h
ff_rv40_v_loop_filter_strength_neon
int ff_rv40_v_loop_filter_strength_neon(uint8_t *src, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
ff_rv40_weight_func_16_neon
void ff_rv40_weight_func_16_neon(uint8_t *, uint8_t *, uint8_t *, int, int, ptrdiff_t)
have_neon
#define have_neon(flags)
Definition: cpu.h:26
ff_rv40_weight_func_8_neon
void ff_rv40_weight_func_8_neon(uint8_t *, uint8_t *, uint8_t *, int, int, ptrdiff_t)
attributes.h
rv40dsp_init_neon
static av_cold void rv40dsp_init_neon(RV34DSPContext *c)
Definition: rv40dsp_init_arm.c:75
ff_avg_rv40_chroma_mc8_neon
void ff_avg_rv40_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int)
avcodec.h
ff_put_rv40_chroma_mc8_neon
void ff_put_rv40_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int)
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
ff_rv40dsp_init_arm
av_cold void ff_rv40dsp_init_arm(RV34DSPContext *c)
Definition: rv40dsp_init_arm.c:144