FFmpeg
wmv2dsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "config.h"
20 #include "libavutil/attributes.h"
21 #include "libavutil/common.h"
22 #include "idctdsp.h"
23 #include "mathops.h"
24 #include "qpeldsp.h"
25 #include "wmv2dsp.h"
26 
27 #define W0 2048
28 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
29 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
30 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
31 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
32 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
33 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
34 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
35 
36 static void wmv2_idct_row(short * b)
37 {
38  int s1, s2;
39  int a0, a1, a2, a3, a4, a5, a6, a7;
40 
41  /* step 1 */
42  a1 = W1 * b[1] + W7 * b[7];
43  a7 = W7 * b[1] - W1 * b[7];
44  a5 = W5 * b[5] + W3 * b[3];
45  a3 = W3 * b[5] - W5 * b[3];
46  a2 = W2 * b[2] + W6 * b[6];
47  a6 = W6 * b[2] - W2 * b[6];
48  a0 = W0 * b[0] + W0 * b[4];
49  a4 = W0 * b[0] - W0 * b[4];
50 
51  /* step 2 */
52  s1 = (int)(181U * (a1 - a5 + a7 - a3) + 128) >> 8; // 1, 3, 5, 7
53  s2 = (int)(181U * (a1 - a5 - a7 + a3) + 128) >> 8;
54 
55  /* step 3 */
56  b[0] = (a0 + a2 + a1 + a5 + (1 << 7)) >> 8;
57  b[1] = (a4 + a6 + s1 + (1 << 7)) >> 8;
58  b[2] = (a4 - a6 + s2 + (1 << 7)) >> 8;
59  b[3] = (a0 - a2 + a7 + a3 + (1 << 7)) >> 8;
60  b[4] = (a0 - a2 - a7 - a3 + (1 << 7)) >> 8;
61  b[5] = (a4 - a6 - s2 + (1 << 7)) >> 8;
62  b[6] = (a4 + a6 - s1 + (1 << 7)) >> 8;
63  b[7] = (a0 + a2 - a1 - a5 + (1 << 7)) >> 8;
64 }
65 
66 static void wmv2_idct_col(short * b)
67 {
68  int s1, s2;
69  int a0, a1, a2, a3, a4, a5, a6, a7;
70 
71  /* step 1, with extended precision */
72  a1 = (W1 * b[8 * 1] + W7 * b[8 * 7] + 4) >> 3;
73  a7 = (W7 * b[8 * 1] - W1 * b[8 * 7] + 4) >> 3;
74  a5 = (W5 * b[8 * 5] + W3 * b[8 * 3] + 4) >> 3;
75  a3 = (W3 * b[8 * 5] - W5 * b[8 * 3] + 4) >> 3;
76  a2 = (W2 * b[8 * 2] + W6 * b[8 * 6] + 4) >> 3;
77  a6 = (W6 * b[8 * 2] - W2 * b[8 * 6] + 4) >> 3;
78  a0 = (W0 * b[8 * 0] + W0 * b[8 * 4] ) >> 3;
79  a4 = (W0 * b[8 * 0] - W0 * b[8 * 4] ) >> 3;
80 
81  /* step 2 */
82  s1 = (int)(181U * (a1 - a5 + a7 - a3) + 128) >> 8;
83  s2 = (int)(181U * (a1 - a5 - a7 + a3) + 128) >> 8;
84 
85  /* step 3 */
86  b[8 * 0] = (a0 + a2 + a1 + a5 + (1 << 13)) >> 14;
87  b[8 * 1] = (a4 + a6 + s1 + (1 << 13)) >> 14;
88  b[8 * 2] = (a4 - a6 + s2 + (1 << 13)) >> 14;
89  b[8 * 3] = (a0 - a2 + a7 + a3 + (1 << 13)) >> 14;
90 
91  b[8 * 4] = (a0 - a2 - a7 - a3 + (1 << 13)) >> 14;
92  b[8 * 5] = (a4 - a6 - s2 + (1 << 13)) >> 14;
93  b[8 * 6] = (a4 + a6 - s1 + (1 << 13)) >> 14;
94  b[8 * 7] = (a0 + a2 - a1 - a5 + (1 << 13)) >> 14;
95 }
96 
97 static void wmv2_idct_add_c(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
98 {
99  int i;
100 
101  for (i = 0; i < 64; i += 8)
102  wmv2_idct_row(block + i);
103  for (i = 0; i < 8; i++)
104  wmv2_idct_col(block + i);
105 
106  for (i = 0; i < 8; i++) {
107  dest[0] = av_clip_uint8(dest[0] + block[0]);
108  dest[1] = av_clip_uint8(dest[1] + block[1]);
109  dest[2] = av_clip_uint8(dest[2] + block[2]);
110  dest[3] = av_clip_uint8(dest[3] + block[3]);
111  dest[4] = av_clip_uint8(dest[4] + block[4]);
112  dest[5] = av_clip_uint8(dest[5] + block[5]);
113  dest[6] = av_clip_uint8(dest[6] + block[6]);
114  dest[7] = av_clip_uint8(dest[7] + block[7]);
115  dest += line_size;
116  block += 8;
117  }
118 }
119 
120 static void wmv2_idct_put_c(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
121 {
122  int i;
123 
124  for (i = 0; i < 64; i += 8)
125  wmv2_idct_row(block + i);
126  for (i = 0; i < 8; i++)
127  wmv2_idct_col(block + i);
128 
129  for (i = 0; i < 8; i++) {
130  dest[0] = av_clip_uint8(block[0]);
131  dest[1] = av_clip_uint8(block[1]);
132  dest[2] = av_clip_uint8(block[2]);
133  dest[3] = av_clip_uint8(block[3]);
134  dest[4] = av_clip_uint8(block[4]);
135  dest[5] = av_clip_uint8(block[5]);
136  dest[6] = av_clip_uint8(block[6]);
137  dest[7] = av_clip_uint8(block[7]);
138  dest += line_size;
139  block += 8;
140  }
141 }
142 
143 static void wmv2_mspel8_h_lowpass(uint8_t *dst, const uint8_t *src,
144  int dstStride, int srcStride, int h)
145 {
146  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
147  int i;
148 
149  for (i = 0; i < h; i++) {
150  dst[0] = cm[(9 * (src[0] + src[1]) - (src[-1] + src[2]) + 8) >> 4];
151  dst[1] = cm[(9 * (src[1] + src[2]) - (src[0] + src[3]) + 8) >> 4];
152  dst[2] = cm[(9 * (src[2] + src[3]) - (src[1] + src[4]) + 8) >> 4];
153  dst[3] = cm[(9 * (src[3] + src[4]) - (src[2] + src[5]) + 8) >> 4];
154  dst[4] = cm[(9 * (src[4] + src[5]) - (src[3] + src[6]) + 8) >> 4];
155  dst[5] = cm[(9 * (src[5] + src[6]) - (src[4] + src[7]) + 8) >> 4];
156  dst[6] = cm[(9 * (src[6] + src[7]) - (src[5] + src[8]) + 8) >> 4];
157  dst[7] = cm[(9 * (src[7] + src[8]) - (src[6] + src[9]) + 8) >> 4];
158  dst += dstStride;
159  src += srcStride;
160  }
161 }
162 
163 static void wmv2_mspel8_v_lowpass(uint8_t *dst, const uint8_t *src,
164  int dstStride, int srcStride, int w)
165 {
166  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
167  int i;
168 
169  for (i = 0; i < w; i++) {
170  const int src_1 = src[-srcStride];
171  const int src0 = src[0];
172  const int src1 = src[srcStride];
173  const int src2 = src[2 * srcStride];
174  const int src3 = src[3 * srcStride];
175  const int src4 = src[4 * srcStride];
176  const int src5 = src[5 * srcStride];
177  const int src6 = src[6 * srcStride];
178  const int src7 = src[7 * srcStride];
179  const int src8 = src[8 * srcStride];
180  const int src9 = src[9 * srcStride];
181  dst[0 * dstStride] = cm[(9 * (src0 + src1) - (src_1 + src2) + 8) >> 4];
182  dst[1 * dstStride] = cm[(9 * (src1 + src2) - (src0 + src3) + 8) >> 4];
183  dst[2 * dstStride] = cm[(9 * (src2 + src3) - (src1 + src4) + 8) >> 4];
184  dst[3 * dstStride] = cm[(9 * (src3 + src4) - (src2 + src5) + 8) >> 4];
185  dst[4 * dstStride] = cm[(9 * (src4 + src5) - (src3 + src6) + 8) >> 4];
186  dst[5 * dstStride] = cm[(9 * (src5 + src6) - (src4 + src7) + 8) >> 4];
187  dst[6 * dstStride] = cm[(9 * (src6 + src7) - (src5 + src8) + 8) >> 4];
188  dst[7 * dstStride] = cm[(9 * (src7 + src8) - (src6 + src9) + 8) >> 4];
189  src++;
190  dst++;
191  }
192 }
193 
194 static void put_mspel8_mc10_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
195 {
196  uint8_t half[64];
197 
199  ff_put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
200 }
201 
202 static void put_mspel8_mc20_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
203 {
205 }
206 
207 static void put_mspel8_mc30_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
208 {
209  uint8_t half[64];
210 
212  ff_put_pixels8_l2_8(dst, src + 1, half, stride, stride, 8, 8);
213 }
214 
215 static void put_mspel8_mc02_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
216 {
218 }
219 
220 static void put_mspel8_mc12_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
221 {
222  uint8_t halfH[88];
223  uint8_t halfV[64];
224  uint8_t halfHV[64];
225 
226  wmv2_mspel8_h_lowpass(halfH, src - stride, 8, stride, 11);
227  wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
228  wmv2_mspel8_v_lowpass(halfHV, halfH + 8, 8, 8, 8);
229  ff_put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
230 }
231 
232 static void put_mspel8_mc32_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
233 {
234  uint8_t halfH[88];
235  uint8_t halfV[64];
236  uint8_t halfHV[64];
237 
238  wmv2_mspel8_h_lowpass(halfH, src - stride, 8, stride, 11);
239  wmv2_mspel8_v_lowpass(halfV, src + 1, 8, stride, 8);
240  wmv2_mspel8_v_lowpass(halfHV, halfH + 8, 8, 8, 8);
241  ff_put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
242 }
243 
244 static void put_mspel8_mc22_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
245 {
246  uint8_t halfH[88];
247 
248  wmv2_mspel8_h_lowpass(halfH, src - stride, 8, stride, 11);
249  wmv2_mspel8_v_lowpass(dst, halfH + 8, stride, 8, 8);
250 }
251 
253 {
254  c->idct_add = wmv2_idct_add_c;
255  c->idct_put = wmv2_idct_put_c;
256  c->idct_perm = FF_IDCT_PERM_NONE;
257 
258  c->put_mspel_pixels_tab[0] = ff_put_pixels8x8_c;
259  c->put_mspel_pixels_tab[1] = put_mspel8_mc10_c;
260  c->put_mspel_pixels_tab[2] = put_mspel8_mc20_c;
261  c->put_mspel_pixels_tab[3] = put_mspel8_mc30_c;
262  c->put_mspel_pixels_tab[4] = put_mspel8_mc02_c;
263  c->put_mspel_pixels_tab[5] = put_mspel8_mc12_c;
264  c->put_mspel_pixels_tab[6] = put_mspel8_mc22_c;
265  c->put_mspel_pixels_tab[7] = put_mspel8_mc32_c;
266 
267  if (ARCH_MIPS)
269 }
stride
int stride
Definition: mace.c:144
wmv2_idct_col
static void wmv2_idct_col(short *b)
Definition: wmv2dsp.c:66
wmv2_idct_add_c
static void wmv2_idct_add_c(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: wmv2dsp.c:97
w
uint8_t w
Definition: llviddspenc.c:38
put_mspel8_mc10_c
static void put_mspel8_mc10_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:194
b
#define b
Definition: input.c:40
ff_put_pixels8_l2_8
void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: qpeldsp.c:730
half
static uint8_t half(int a, int b)
Definition: mobiclip.c:540
W6
#define W6
Definition: wmv2dsp.c:33
wmv2_mspel8_h_lowpass
static void wmv2_mspel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: wmv2dsp.c:143
ff_crop_tab
#define ff_crop_tab
Definition: motionpixels_tablegen.c:26
put_mspel8_mc22_c
static void put_mspel8_mc22_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:244
put_mspel8_mc02_c
static void put_mspel8_mc02_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:215
U
#define U(x)
Definition: vp56_arith.h:37
ff_wmv2dsp_init_mips
av_cold void ff_wmv2dsp_init_mips(WMV2DSPContext *c)
Definition: wmv2dsp_init_mips.c:26
a1
#define a1
Definition: regdef.h:47
W0
#define W0
Definition: wmv2dsp.c:27
ff_put_pixels8x8_c
void ff_put_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: qpeldsp.c:703
av_cold
#define av_cold
Definition: attributes.h:90
s1
#define s1
Definition: regdef.h:38
wmv2_idct_put_c
static void wmv2_idct_put_c(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: wmv2dsp.c:120
a4
#define a4
Definition: regdef.h:50
W5
#define W5
Definition: wmv2dsp.c:32
put_mspel8_mc32_c
static void put_mspel8_mc32_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:232
wmv2dsp.h
src
#define src
Definition: vp8dsp.c:255
mathops.h
qpeldsp.h
ff_wmv2dsp_init
av_cold void ff_wmv2dsp_init(WMV2DSPContext *c)
Definition: wmv2dsp.c:252
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
put_mspel8_mc20_c
static void put_mspel8_mc20_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:202
s2
#define s2
Definition: regdef.h:39
FF_IDCT_PERM_NONE
@ FF_IDCT_PERM_NONE
Definition: idctdsp.h:38
W1
#define W1
Definition: wmv2dsp.c:28
W7
#define W7
Definition: wmv2dsp.c:34
put_mspel8_mc12_c
static void put_mspel8_mc12_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:220
attributes.h
put_mspel8_mc30_c
static void put_mspel8_mc30_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: wmv2dsp.c:207
a0
#define a0
Definition: regdef.h:46
src0
#define src0
Definition: h264pred.c:139
WMV2DSPContext
Definition: wmv2dsp.h:26
src1
#define src1
Definition: h264pred.c:140
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:271
a2
#define a2
Definition: regdef.h:48
common.h
idctdsp.h
wmv2_idct_row
static void wmv2_idct_row(short *b)
Definition: wmv2dsp.c:36
a5
#define a5
Definition: regdef.h:51
W3
#define W3
Definition: wmv2dsp.c:30
cm
#define cm
Definition: dvbsubdec.c:38
av_clip_uint8
#define av_clip_uint8
Definition: common.h:102
wmv2_mspel8_v_lowpass
static void wmv2_mspel8_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int w)
Definition: wmv2dsp.c:163
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
W2
#define W2
Definition: wmv2dsp.c:29
h
h
Definition: vp9dsp_template.c:2038
MAX_NEG_CROP
#define MAX_NEG_CROP
Definition: mathops.h:31
int
int
Definition: ffmpeg_filter.c:153
a3
#define a3
Definition: regdef.h:49