FFmpeg
vc1dsp.c
Go to the documentation of this file.
1 /*
2  * VC-1 and WMV3 decoder - DSP functions
3  * Copyright (c) 2006 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * VC-1 and WMV3 decoder
25  */
26 
27 #include "config_components.h"
28 
29 #include "libavutil/avassert.h"
30 #include "libavutil/common.h"
31 #include "libavutil/intreadwrite.h"
32 #include "h264chroma.h"
33 #include "qpeldsp.h"
34 #include "rnd_avg.h"
35 #include "vc1dsp.h"
36 #include "startcode.h"
37 #include "vc1_common.h"
38 
39 /* Apply overlap transform to horizontal edge */
40 static void vc1_v_overlap_c(uint8_t *src, ptrdiff_t stride)
41 {
42  int i;
43  int a, b, c, d;
44  int d1, d2;
45  int rnd = 1;
46  for (i = 0; i < 8; i++) {
47  a = src[-2 * stride];
48  b = src[-stride];
49  c = src[0];
50  d = src[stride];
51  d1 = (a - d + 3 + rnd) >> 3;
52  d2 = (a - d + b - c + 4 - rnd) >> 3;
53 
54  src[-2 * stride] = a - d1;
55  src[-stride] = av_clip_uint8(b - d2);
56  src[0] = av_clip_uint8(c + d2);
57  src[stride] = d + d1;
58  src++;
59  rnd = !rnd;
60  }
61 }
62 
63 /* Apply overlap transform to vertical edge */
64 static void vc1_h_overlap_c(uint8_t *src, ptrdiff_t stride)
65 {
66  int i;
67  int a, b, c, d;
68  int d1, d2;
69  int rnd = 1;
70  for (i = 0; i < 8; i++) {
71  a = src[-2];
72  b = src[-1];
73  c = src[0];
74  d = src[1];
75  d1 = (a - d + 3 + rnd) >> 3;
76  d2 = (a - d + b - c + 4 - rnd) >> 3;
77 
78  src[-2] = a - d1;
79  src[-1] = av_clip_uint8(b - d2);
80  src[0] = av_clip_uint8(c + d2);
81  src[1] = d + d1;
82  src += stride;
83  rnd = !rnd;
84  }
85 }
86 
87 static void vc1_v_s_overlap_c(int16_t *top, int16_t *bottom)
88 {
89  int i;
90  int a, b, c, d;
91  int d1, d2;
92  int rnd1 = 4, rnd2 = 3;
93  for (i = 0; i < 8; i++) {
94  a = top[48];
95  b = top[56];
96  c = bottom[0];
97  d = bottom[8];
98  d1 = a - d;
99  d2 = a - d + b - c;
100 
101  top[48] = ((a * 8) - d1 + rnd1) >> 3;
102  top[56] = ((b * 8) - d2 + rnd2) >> 3;
103  bottom[0] = ((c * 8) + d2 + rnd1) >> 3;
104  bottom[8] = ((d * 8) + d1 + rnd2) >> 3;
105 
106  bottom++;
107  top++;
108  rnd2 = 7 - rnd2;
109  rnd1 = 7 - rnd1;
110  }
111 }
112 
113 static void vc1_h_s_overlap_c(int16_t *left, int16_t *right, ptrdiff_t left_stride, ptrdiff_t right_stride, int flags)
114 {
115  int i;
116  int a, b, c, d;
117  int d1, d2;
118  int rnd1 = flags & 2 ? 3 : 4;
119  int rnd2 = 7 - rnd1;
120  for (i = 0; i < 8; i++) {
121  a = left[6];
122  b = left[7];
123  c = right[0];
124  d = right[1];
125  d1 = a - d;
126  d2 = a - d + b - c;
127 
128  left[6] = ((a * 8) - d1 + rnd1) >> 3;
129  left[7] = ((b * 8) - d2 + rnd2) >> 3;
130  right[0] = ((c * 8) + d2 + rnd1) >> 3;
131  right[1] = ((d * 8) + d1 + rnd2) >> 3;
132 
133  right += right_stride;
134  left += left_stride;
135  if (flags & 1) {
136  rnd2 = 7 - rnd2;
137  rnd1 = 7 - rnd1;
138  }
139  }
140 }
141 
142 /**
143  * VC-1 in-loop deblocking filter for one line
144  * @param src source block type
145  * @param stride block stride
146  * @param pq block quantizer
147  * @return whether other 3 pairs should be filtered or not
148  * @see 8.6
149  */
150 static av_always_inline int vc1_filter_line(uint8_t *src, ptrdiff_t stride, int pq)
151 {
152  int a0 = (2 * (src[-2 * stride] - src[1 * stride]) -
153  5 * (src[-1 * stride] - src[0 * stride]) + 4) >> 3;
154  int a0_sign = a0 >> 31; /* Store sign */
155 
156  a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */
157  if (a0 < pq) {
158  int a1 = FFABS((2 * (src[-4 * stride] - src[-1 * stride]) -
159  5 * (src[-3 * stride] - src[-2 * stride]) + 4) >> 3);
160  int a2 = FFABS((2 * (src[ 0 * stride] - src[ 3 * stride]) -
161  5 * (src[ 1 * stride] - src[ 2 * stride]) + 4) >> 3);
162  if (a1 < a0 || a2 < a0) {
163  int clip = src[-1 * stride] - src[0 * stride];
164  int clip_sign = clip >> 31;
165 
166  clip = ((clip ^ clip_sign) - clip_sign) >> 1;
167  if (clip) {
168  int a3 = FFMIN(a1, a2);
169  int d = 5 * (a3 - a0);
170  int d_sign = (d >> 31);
171 
172  d = ((d ^ d_sign) - d_sign) >> 3;
173  d_sign ^= a0_sign;
174 
175  if (d_sign ^ clip_sign)
176  d = 0;
177  else {
178  d = FFMIN(d, clip);
179  d = (d ^ d_sign) - d_sign; /* Restore sign */
180  src[-1 * stride] = av_clip_uint8(src[-1 * stride] - d);
181  src[ 0 * stride] = av_clip_uint8(src[ 0 * stride] + d);
182  }
183  return 1;
184  }
185  }
186  }
187  return 0;
188 }
189 
190 /**
191  * VC-1 in-loop deblocking filter
192  * @param src source block type
193  * @param step distance between horizontally adjacent elements
194  * @param stride distance between vertically adjacent elements
195  * @param len edge length to filter (4 or 8 pixels)
196  * @param pq block quantizer
197  * @see 8.6
198  */
199 static inline void vc1_loop_filter(uint8_t *src, int step, ptrdiff_t stride,
200  int len, int pq)
201 {
202  int i;
203  int filt3;
204 
205  for (i = 0; i < len; i += 4) {
206  filt3 = vc1_filter_line(src + 2 * step, stride, pq);
207  if (filt3) {
208  vc1_filter_line(src + 0 * step, stride, pq);
209  vc1_filter_line(src + 1 * step, stride, pq);
210  vc1_filter_line(src + 3 * step, stride, pq);
211  }
212  src += step * 4;
213  }
214 }
215 
216 static void vc1_v_loop_filter4_c(uint8_t *src, ptrdiff_t stride, int pq)
217 {
218  vc1_loop_filter(src, 1, stride, 4, pq);
219 }
220 
221 static void vc1_h_loop_filter4_c(uint8_t *src, ptrdiff_t stride, int pq)
222 {
223  vc1_loop_filter(src, stride, 1, 4, pq);
224 }
225 
226 static void vc1_v_loop_filter8_c(uint8_t *src, ptrdiff_t stride, int pq)
227 {
228  vc1_loop_filter(src, 1, stride, 8, pq);
229 }
230 
231 static void vc1_h_loop_filter8_c(uint8_t *src, ptrdiff_t stride, int pq)
232 {
233  vc1_loop_filter(src, stride, 1, 8, pq);
234 }
235 
236 static void vc1_v_loop_filter16_c(uint8_t *src, ptrdiff_t stride, int pq)
237 {
238  vc1_loop_filter(src, 1, stride, 16, pq);
239 }
240 
241 static void vc1_h_loop_filter16_c(uint8_t *src, ptrdiff_t stride, int pq)
242 {
243  vc1_loop_filter(src, stride, 1, 16, pq);
244 }
245 
246 /* Do inverse transform on 8x8 block */
247 static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
248 {
249  int i;
250  int dc = block[0];
251 
252  dc = (3 * dc + 1) >> 1;
253  dc = (3 * dc + 16) >> 5;
254 
255  for (i = 0; i < 8; i++) {
256  dest[0] = av_clip_uint8(dest[0] + dc);
257  dest[1] = av_clip_uint8(dest[1] + dc);
258  dest[2] = av_clip_uint8(dest[2] + dc);
259  dest[3] = av_clip_uint8(dest[3] + dc);
260  dest[4] = av_clip_uint8(dest[4] + dc);
261  dest[5] = av_clip_uint8(dest[5] + dc);
262  dest[6] = av_clip_uint8(dest[6] + dc);
263  dest[7] = av_clip_uint8(dest[7] + dc);
264  dest += stride;
265  }
266 }
267 
268 static void vc1_inv_trans_8x8_c(int16_t block[64])
269 {
270  int i;
271  register int t1, t2, t3, t4, t5, t6, t7, t8;
272  int16_t *src, *dst, temp[64];
273 
274  src = block;
275  dst = temp;
276  for (i = 0; i < 8; i++) {
277  t1 = 12 * (src[ 0] + src[32]) + 4;
278  t2 = 12 * (src[ 0] - src[32]) + 4;
279  t3 = 16 * src[16] + 6 * src[48];
280  t4 = 6 * src[16] - 16 * src[48];
281 
282  t5 = t1 + t3;
283  t6 = t2 + t4;
284  t7 = t2 - t4;
285  t8 = t1 - t3;
286 
287  t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
288  t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
289  t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
290  t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
291 
292  dst[0] = (t5 + t1) >> 3;
293  dst[1] = (t6 + t2) >> 3;
294  dst[2] = (t7 + t3) >> 3;
295  dst[3] = (t8 + t4) >> 3;
296  dst[4] = (t8 - t4) >> 3;
297  dst[5] = (t7 - t3) >> 3;
298  dst[6] = (t6 - t2) >> 3;
299  dst[7] = (t5 - t1) >> 3;
300 
301  src += 1;
302  dst += 8;
303  }
304 
305  src = temp;
306  dst = block;
307  for (i = 0; i < 8; i++) {
308  t1 = 12 * (src[ 0] + src[32]) + 64;
309  t2 = 12 * (src[ 0] - src[32]) + 64;
310  t3 = 16 * src[16] + 6 * src[48];
311  t4 = 6 * src[16] - 16 * src[48];
312 
313  t5 = t1 + t3;
314  t6 = t2 + t4;
315  t7 = t2 - t4;
316  t8 = t1 - t3;
317 
318  t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
319  t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
320  t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
321  t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
322 
323  dst[ 0] = (t5 + t1) >> 7;
324  dst[ 8] = (t6 + t2) >> 7;
325  dst[16] = (t7 + t3) >> 7;
326  dst[24] = (t8 + t4) >> 7;
327  dst[32] = (t8 - t4 + 1) >> 7;
328  dst[40] = (t7 - t3 + 1) >> 7;
329  dst[48] = (t6 - t2 + 1) >> 7;
330  dst[56] = (t5 - t1 + 1) >> 7;
331 
332  src++;
333  dst++;
334  }
335 }
336 
337 /* Do inverse transform on 8x4 part of block */
338 static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
339 {
340  int i;
341  int dc = block[0];
342 
343  dc = (3 * dc + 1) >> 1;
344  dc = (17 * dc + 64) >> 7;
345 
346  for (i = 0; i < 4; i++) {
347  dest[0] = av_clip_uint8(dest[0] + dc);
348  dest[1] = av_clip_uint8(dest[1] + dc);
349  dest[2] = av_clip_uint8(dest[2] + dc);
350  dest[3] = av_clip_uint8(dest[3] + dc);
351  dest[4] = av_clip_uint8(dest[4] + dc);
352  dest[5] = av_clip_uint8(dest[5] + dc);
353  dest[6] = av_clip_uint8(dest[6] + dc);
354  dest[7] = av_clip_uint8(dest[7] + dc);
355  dest += stride;
356  }
357 }
358 
359 static void vc1_inv_trans_8x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
360 {
361  int i;
362  register int t1, t2, t3, t4, t5, t6, t7, t8;
363  int16_t *src, *dst;
364 
365  src = block;
366  dst = block;
367 
368  for (i = 0; i < 4; i++) {
369  t1 = 12 * (src[0] + src[4]) + 4;
370  t2 = 12 * (src[0] - src[4]) + 4;
371  t3 = 16 * src[2] + 6 * src[6];
372  t4 = 6 * src[2] - 16 * src[6];
373 
374  t5 = t1 + t3;
375  t6 = t2 + t4;
376  t7 = t2 - t4;
377  t8 = t1 - t3;
378 
379  t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
380  t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
381  t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
382  t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
383 
384  dst[0] = (t5 + t1) >> 3;
385  dst[1] = (t6 + t2) >> 3;
386  dst[2] = (t7 + t3) >> 3;
387  dst[3] = (t8 + t4) >> 3;
388  dst[4] = (t8 - t4) >> 3;
389  dst[5] = (t7 - t3) >> 3;
390  dst[6] = (t6 - t2) >> 3;
391  dst[7] = (t5 - t1) >> 3;
392 
393  src += 8;
394  dst += 8;
395  }
396 
397  src = block;
398  for (i = 0; i < 8; i++) {
399  t1 = 17 * (src[ 0] + src[16]) + 64;
400  t2 = 17 * (src[ 0] - src[16]) + 64;
401  t3 = 22 * src[ 8] + 10 * src[24];
402  t4 = 22 * src[24] - 10 * src[ 8];
403 
404  dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t1 + t3) >> 7));
405  dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t2 - t4) >> 7));
406  dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t2 + t4) >> 7));
407  dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t1 - t3) >> 7));
408 
409  src++;
410  dest++;
411  }
412 }
413 
414 /* Do inverse transform on 4x8 parts of block */
415 static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
416 {
417  int i;
418  int dc = block[0];
419 
420  dc = (17 * dc + 4) >> 3;
421  dc = (12 * dc + 64) >> 7;
422 
423  for (i = 0; i < 8; i++) {
424  dest[0] = av_clip_uint8(dest[0] + dc);
425  dest[1] = av_clip_uint8(dest[1] + dc);
426  dest[2] = av_clip_uint8(dest[2] + dc);
427  dest[3] = av_clip_uint8(dest[3] + dc);
428  dest += stride;
429  }
430 }
431 
432 static void vc1_inv_trans_4x8_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
433 {
434  int i;
435  register int t1, t2, t3, t4, t5, t6, t7, t8;
436  int16_t *src, *dst;
437 
438  src = block;
439  dst = block;
440 
441  for (i = 0; i < 8; i++) {
442  t1 = 17 * (src[0] + src[2]) + 4;
443  t2 = 17 * (src[0] - src[2]) + 4;
444  t3 = 22 * src[1] + 10 * src[3];
445  t4 = 22 * src[3] - 10 * src[1];
446 
447  dst[0] = (t1 + t3) >> 3;
448  dst[1] = (t2 - t4) >> 3;
449  dst[2] = (t2 + t4) >> 3;
450  dst[3] = (t1 - t3) >> 3;
451 
452  src += 8;
453  dst += 8;
454  }
455 
456  src = block;
457  for (i = 0; i < 4; i++) {
458  t1 = 12 * (src[ 0] + src[32]) + 64;
459  t2 = 12 * (src[ 0] - src[32]) + 64;
460  t3 = 16 * src[16] + 6 * src[48];
461  t4 = 6 * src[16] - 16 * src[48];
462 
463  t5 = t1 + t3;
464  t6 = t2 + t4;
465  t7 = t2 - t4;
466  t8 = t1 - t3;
467 
468  t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
469  t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
470  t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
471  t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
472 
473  dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t5 + t1) >> 7));
474  dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t6 + t2) >> 7));
475  dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t7 + t3) >> 7));
476  dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t8 + t4) >> 7));
477  dest[4 * stride] = av_clip_uint8(dest[4 * stride] + ((t8 - t4 + 1) >> 7));
478  dest[5 * stride] = av_clip_uint8(dest[5 * stride] + ((t7 - t3 + 1) >> 7));
479  dest[6 * stride] = av_clip_uint8(dest[6 * stride] + ((t6 - t2 + 1) >> 7));
480  dest[7 * stride] = av_clip_uint8(dest[7 * stride] + ((t5 - t1 + 1) >> 7));
481 
482  src++;
483  dest++;
484  }
485 }
486 
487 /* Do inverse transform on 4x4 part of block */
488 static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
489 {
490  int i;
491  int dc = block[0];
492 
493  dc = (17 * dc + 4) >> 3;
494  dc = (17 * dc + 64) >> 7;
495 
496  for (i = 0; i < 4; i++) {
497  dest[0] = av_clip_uint8(dest[0] + dc);
498  dest[1] = av_clip_uint8(dest[1] + dc);
499  dest[2] = av_clip_uint8(dest[2] + dc);
500  dest[3] = av_clip_uint8(dest[3] + dc);
501  dest += stride;
502  }
503 }
504 
505 static void vc1_inv_trans_4x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
506 {
507  int i;
508  register int t1, t2, t3, t4;
509  int16_t *src, *dst;
510 
511  src = block;
512  dst = block;
513  for (i = 0; i < 4; i++) {
514  t1 = 17 * (src[0] + src[2]) + 4;
515  t2 = 17 * (src[0] - src[2]) + 4;
516  t3 = 22 * src[1] + 10 * src[3];
517  t4 = 22 * src[3] - 10 * src[1];
518 
519  dst[0] = (t1 + t3) >> 3;
520  dst[1] = (t2 - t4) >> 3;
521  dst[2] = (t2 + t4) >> 3;
522  dst[3] = (t1 - t3) >> 3;
523 
524  src += 8;
525  dst += 8;
526  }
527 
528  src = block;
529  for (i = 0; i < 4; i++) {
530  t1 = 17 * (src[0] + src[16]) + 64;
531  t2 = 17 * (src[0] - src[16]) + 64;
532  t3 = 22 * src[8] + 10 * src[24];
533  t4 = 22 * src[24] - 10 * src[8];
534 
535  dest[0 * stride] = av_clip_uint8(dest[0 * stride] + ((t1 + t3) >> 7));
536  dest[1 * stride] = av_clip_uint8(dest[1 * stride] + ((t2 - t4) >> 7));
537  dest[2 * stride] = av_clip_uint8(dest[2 * stride] + ((t2 + t4) >> 7));
538  dest[3 * stride] = av_clip_uint8(dest[3 * stride] + ((t1 - t3) >> 7));
539 
540  src++;
541  dest++;
542  }
543 }
544 
545 /* motion compensation functions */
546 
547 /* Filter in case of 2 filters */
548 #define VC1_MSPEL_FILTER_16B(DIR, TYPE) \
549 static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, \
550  int stride, \
551  int mode) \
552 { \
553  switch(mode) { \
554  case 0: /* no shift - should not occur */ \
555  return 0; \
556  case 1: /* 1/4 shift */ \
557  return -4 * src[-stride] + 53 * src[0] + \
558  18 * src[stride] - 3 * src[stride * 2]; \
559  case 2: /* 1/2 shift */ \
560  return -1 * src[-stride] + 9 * src[0] + \
561  9 * src[stride] - 1 * src[stride * 2]; \
562  case 3: /* 3/4 shift */ \
563  return -3 * src[-stride] + 18 * src[0] + \
564  53 * src[stride] - 4 * src[stride * 2]; \
565  } \
566  return 0; /* should not occur */ \
567 }
568 
569 VC1_MSPEL_FILTER_16B(ver, uint8_t)
570 VC1_MSPEL_FILTER_16B(hor, int16_t)
571 
572 /* Filter used to interpolate fractional pel values */
573 static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride,
574  int mode, int r)
575 {
576  switch (mode) {
577  case 0: // no shift
578  return src[0];
579  case 1: // 1/4 shift
580  return (-4 * src[-stride] + 53 * src[0] +
581  18 * src[stride] - 3 * src[stride * 2] + 32 - r) >> 6;
582  case 2: // 1/2 shift
583  return (-1 * src[-stride] + 9 * src[0] +
584  9 * src[stride] - 1 * src[stride * 2] + 8 - r) >> 4;
585  case 3: // 3/4 shift
586  return (-3 * src[-stride] + 18 * src[0] +
587  53 * src[stride] - 4 * src[stride * 2] + 32 - r) >> 6;
588  }
589  return 0; // should not occur
590 }
591 
592 /* Function used to do motion compensation with bicubic interpolation */
593 #define VC1_MSPEL_MC(OP, OP4, OPNAME) \
594 static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst, \
595  const uint8_t *src, \
596  ptrdiff_t stride, \
597  int hmode, \
598  int vmode, \
599  int rnd) \
600 { \
601  int i, j; \
602  \
603  if (vmode) { /* Horizontal filter to apply */ \
604  int r; \
605  \
606  if (hmode) { /* Vertical filter to apply, output to tmp */ \
607  static const int shift_value[] = { 0, 5, 1, 5 }; \
608  int shift = (shift_value[hmode] + shift_value[vmode]) >> 1; \
609  int16_t tmp[11 * 8], *tptr = tmp; \
610  \
611  r = (1 << (shift - 1)) + rnd - 1; \
612  \
613  src -= 1; \
614  for (j = 0; j < 8; j++) { \
615  for (i = 0; i < 11; i++) \
616  tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \
617  src += stride; \
618  tptr += 11; \
619  } \
620  \
621  r = 64 - rnd; \
622  tptr = tmp + 1; \
623  for (j = 0; j < 8; j++) { \
624  for (i = 0; i < 8; i++) \
625  OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \
626  dst += stride; \
627  tptr += 11; \
628  } \
629  \
630  return; \
631  } else { /* No horizontal filter, output 8 lines to dst */ \
632  r = 1 - rnd; \
633  \
634  for (j = 0; j < 8; j++) { \
635  for (i = 0; i < 8; i++) \
636  OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r)); \
637  src += stride; \
638  dst += stride; \
639  } \
640  return; \
641  } \
642  } \
643  \
644  /* Horizontal mode with no vertical mode */ \
645  for (j = 0; j < 8; j++) { \
646  for (i = 0; i < 8; i++) \
647  OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd)); \
648  dst += stride; \
649  src += stride; \
650  } \
651 }\
652 static av_always_inline void OPNAME ## vc1_mspel_mc_16(uint8_t *dst, \
653  const uint8_t *src, \
654  ptrdiff_t stride, \
655  int hmode, \
656  int vmode, \
657  int rnd) \
658 { \
659  int i, j; \
660  \
661  if (vmode) { /* Horizontal filter to apply */ \
662  int r; \
663  \
664  if (hmode) { /* Vertical filter to apply, output to tmp */ \
665  static const int shift_value[] = { 0, 5, 1, 5 }; \
666  int shift = (shift_value[hmode] + shift_value[vmode]) >> 1; \
667  int16_t tmp[19 * 16], *tptr = tmp; \
668  \
669  r = (1 << (shift - 1)) + rnd - 1; \
670  \
671  src -= 1; \
672  for (j = 0; j < 16; j++) { \
673  for (i = 0; i < 19; i++) \
674  tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \
675  src += stride; \
676  tptr += 19; \
677  } \
678  \
679  r = 64 - rnd; \
680  tptr = tmp + 1; \
681  for (j = 0; j < 16; j++) { \
682  for (i = 0; i < 16; i++) \
683  OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \
684  dst += stride; \
685  tptr += 19; \
686  } \
687  \
688  return; \
689  } else { /* No horizontal filter, output 8 lines to dst */ \
690  r = 1 - rnd; \
691  \
692  for (j = 0; j < 16; j++) { \
693  for (i = 0; i < 16; i++) \
694  OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r)); \
695  src += stride; \
696  dst += stride; \
697  } \
698  return; \
699  } \
700  } \
701  \
702  /* Horizontal mode with no vertical mode */ \
703  for (j = 0; j < 16; j++) { \
704  for (i = 0; i < 16; i++) \
705  OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd)); \
706  dst += stride; \
707  src += stride; \
708  } \
709 }\
710 static void OPNAME ## pixels8x8_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
711  int i;\
712  for(i=0; i<8; i++){\
713  OP4(*(uint32_t*)(block ), AV_RN32(pixels ));\
714  OP4(*(uint32_t*)(block+4), AV_RN32(pixels+4));\
715  pixels+=line_size;\
716  block +=line_size;\
717  }\
718 }\
719 static void OPNAME ## pixels16x16_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
720  int i;\
721  for(i=0; i<16; i++){\
722  OP4(*(uint32_t*)(block ), AV_RN32(pixels ));\
723  OP4(*(uint32_t*)(block+ 4), AV_RN32(pixels+ 4));\
724  OP4(*(uint32_t*)(block+ 8), AV_RN32(pixels+ 8));\
725  OP4(*(uint32_t*)(block+12), AV_RN32(pixels+12));\
726  pixels+=line_size;\
727  block +=line_size;\
728  }\
729 }
730 
731 #define op_put(a, b) (a) = av_clip_uint8(b)
732 #define op_avg(a, b) (a) = ((a) + av_clip_uint8(b) + 1) >> 1
733 #define op4_avg(a, b) (a) = rnd_avg32(a, b)
734 #define op4_put(a, b) (a) = (b)
735 
738 
739 /* pixel functions - really are entry points to vc1_mspel_mc */
740 
741 #define PUT_VC1_MSPEL(a, b) \
742 static void put_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst, \
743  const uint8_t *src, \
744  ptrdiff_t stride, int rnd) \
745 { \
746  put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
747 } \
748 static void avg_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst, \
749  const uint8_t *src, \
750  ptrdiff_t stride, int rnd) \
751 { \
752  avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
753 } \
754 static void put_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \
755  const uint8_t *src, \
756  ptrdiff_t stride, int rnd) \
757 { \
758  put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \
759 } \
760 static void avg_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \
761  const uint8_t *src, \
762  ptrdiff_t stride, int rnd) \
763 { \
764  avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \
765 }
766 
767 PUT_VC1_MSPEL(1, 0)
768 PUT_VC1_MSPEL(2, 0)
769 PUT_VC1_MSPEL(3, 0)
770 
771 PUT_VC1_MSPEL(0, 1)
772 PUT_VC1_MSPEL(1, 1)
773 PUT_VC1_MSPEL(2, 1)
774 PUT_VC1_MSPEL(3, 1)
775 
776 PUT_VC1_MSPEL(0, 2)
777 PUT_VC1_MSPEL(1, 2)
778 PUT_VC1_MSPEL(2, 2)
779 PUT_VC1_MSPEL(3, 2)
780 
781 PUT_VC1_MSPEL(0, 3)
782 PUT_VC1_MSPEL(1, 3)
783 PUT_VC1_MSPEL(2, 3)
784 PUT_VC1_MSPEL(3, 3)
785 
786 #define chroma_mc(a) \
787  ((A * src[a] + B * src[a + 1] + \
788  C * src[stride + a] + D * src[stride + a + 1] + 32 - 4) >> 6)
789 static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
790  const uint8_t *src /* align 1 */,
791  ptrdiff_t stride, int h, int x, int y)
792 {
793  const int A = (8 - x) * (8 - y);
794  const int B = (x) * (8 - y);
795  const int C = (8 - x) * (y);
796  const int D = (x) * (y);
797  int i;
798 
799  av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
800 
801  for (i = 0; i < h; i++) {
802  dst[0] = chroma_mc(0);
803  dst[1] = chroma_mc(1);
804  dst[2] = chroma_mc(2);
805  dst[3] = chroma_mc(3);
806  dst[4] = chroma_mc(4);
807  dst[5] = chroma_mc(5);
808  dst[6] = chroma_mc(6);
809  dst[7] = chroma_mc(7);
810  dst += stride;
811  src += stride;
812  }
813 }
814 
815 static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, const uint8_t *src,
816  ptrdiff_t stride, int h, int x, int y)
817 {
818  const int A = (8 - x) * (8 - y);
819  const int B = (x) * (8 - y);
820  const int C = (8 - x) * (y);
821  const int D = (x) * (y);
822  int i;
823 
824  av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
825 
826  for (i = 0; i < h; i++) {
827  dst[0] = chroma_mc(0);
828  dst[1] = chroma_mc(1);
829  dst[2] = chroma_mc(2);
830  dst[3] = chroma_mc(3);
831  dst += stride;
832  src += stride;
833  }
834 }
835 
836 #define avg2(a, b) (((a) + (b) + 1) >> 1)
837 static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
838  const uint8_t *src /* align 1 */,
839  ptrdiff_t stride, int h, int x, int y)
840 {
841  const int A = (8 - x) * (8 - y);
842  const int B = (x) * (8 - y);
843  const int C = (8 - x) * (y);
844  const int D = (x) * (y);
845  int i;
846 
847  av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
848 
849  for (i = 0; i < h; i++) {
850  dst[0] = avg2(dst[0], chroma_mc(0));
851  dst[1] = avg2(dst[1], chroma_mc(1));
852  dst[2] = avg2(dst[2], chroma_mc(2));
853  dst[3] = avg2(dst[3], chroma_mc(3));
854  dst[4] = avg2(dst[4], chroma_mc(4));
855  dst[5] = avg2(dst[5], chroma_mc(5));
856  dst[6] = avg2(dst[6], chroma_mc(6));
857  dst[7] = avg2(dst[7], chroma_mc(7));
858  dst += stride;
859  src += stride;
860  }
861 }
862 
863 static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst /* align 8 */,
864  const uint8_t *src /* align 1 */,
865  ptrdiff_t stride, int h, int x, int y)
866 {
867  const int A = (8 - x) * (8 - y);
868  const int B = ( x) * (8 - y);
869  const int C = (8 - x) * ( y);
870  const int D = ( x) * ( y);
871  int i;
872 
873  av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
874 
875  for (i = 0; i < h; i++) {
876  dst[0] = avg2(dst[0], chroma_mc(0));
877  dst[1] = avg2(dst[1], chroma_mc(1));
878  dst[2] = avg2(dst[2], chroma_mc(2));
879  dst[3] = avg2(dst[3], chroma_mc(3));
880  dst += stride;
881  src += stride;
882  }
883 }
884 
885 #if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
886 
887 static void sprite_h_c(uint8_t *dst, const uint8_t *src, int offset,
888  int advance, int count)
889 {
890  while (count--) {
891  int a = src[(offset >> 16)];
892  int b = src[(offset >> 16) + 1];
893  *dst++ = a + ((b - a) * (offset & 0xFFFF) >> 16);
894  offset += advance;
895  }
896 }
897 
898 static av_always_inline void sprite_v_template(uint8_t *dst,
899  const uint8_t *src1a,
900  const uint8_t *src1b,
901  int offset1,
902  int two_sprites,
903  const uint8_t *src2a,
904  const uint8_t *src2b,
905  int offset2,
906  int alpha, int scaled,
907  int width)
908 {
909  int a1, b1, a2, b2;
910  while (width--) {
911  a1 = *src1a++;
912  if (scaled) {
913  b1 = *src1b++;
914  a1 = a1 + ((b1 - a1) * offset1 >> 16);
915  }
916  if (two_sprites) {
917  a2 = *src2a++;
918  if (scaled > 1) {
919  b2 = *src2b++;
920  a2 = a2 + ((b2 - a2) * offset2 >> 16);
921  }
922  a1 = a1 + ((a2 - a1) * alpha >> 16);
923  }
924  *dst++ = a1;
925  }
926 }
927 
928 static void sprite_v_single_c(uint8_t *dst, const uint8_t *src1a,
929  const uint8_t *src1b,
930  int offset, int width)
931 {
932  sprite_v_template(dst, src1a, src1b, offset, 0, NULL, NULL, 0, 0, 1, width);
933 }
934 
935 static void sprite_v_double_noscale_c(uint8_t *dst, const uint8_t *src1a,
936  const uint8_t *src2a,
937  int alpha, int width)
938 {
939  sprite_v_template(dst, src1a, NULL, 0, 1, src2a, NULL, 0, alpha, 0, width);
940 }
941 
942 static void sprite_v_double_onescale_c(uint8_t *dst,
943  const uint8_t *src1a,
944  const uint8_t *src1b,
945  int offset1,
946  const uint8_t *src2a,
947  int alpha, int width)
948 {
949  sprite_v_template(dst, src1a, src1b, offset1, 1, src2a, NULL, 0, alpha, 1,
950  width);
951 }
952 
953 static void sprite_v_double_twoscale_c(uint8_t *dst,
954  const uint8_t *src1a,
955  const uint8_t *src1b,
956  int offset1,
957  const uint8_t *src2a,
958  const uint8_t *src2b,
959  int offset2,
960  int alpha,
961  int width)
962 {
963  sprite_v_template(dst, src1a, src1b, offset1, 1, src2a, src2b, offset2,
964  alpha, 2, width);
965 }
966 
967 #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
968 #define FN_ASSIGN(X, Y) \
969  dsp->put_vc1_mspel_pixels_tab[1][X+4*Y] = put_vc1_mspel_mc##X##Y##_c; \
970  dsp->put_vc1_mspel_pixels_tab[0][X+4*Y] = put_vc1_mspel_mc##X##Y##_16_c; \
971  dsp->avg_vc1_mspel_pixels_tab[1][X+4*Y] = avg_vc1_mspel_mc##X##Y##_c; \
972  dsp->avg_vc1_mspel_pixels_tab[0][X+4*Y] = avg_vc1_mspel_mc##X##Y##_16_c
973 
975 {
984 
989 
996 
997  dsp->put_vc1_mspel_pixels_tab[0][0] = put_pixels16x16_c;
998  dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_pixels16x16_c;
999  dsp->put_vc1_mspel_pixels_tab[1][0] = put_pixels8x8_c;
1000  dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_pixels8x8_c;
1001  FN_ASSIGN(0, 1);
1002  FN_ASSIGN(0, 2);
1003  FN_ASSIGN(0, 3);
1004 
1005  FN_ASSIGN(1, 0);
1006  FN_ASSIGN(1, 1);
1007  FN_ASSIGN(1, 2);
1008  FN_ASSIGN(1, 3);
1009 
1010  FN_ASSIGN(2, 0);
1011  FN_ASSIGN(2, 1);
1012  FN_ASSIGN(2, 2);
1013  FN_ASSIGN(2, 3);
1014 
1015  FN_ASSIGN(3, 0);
1016  FN_ASSIGN(3, 1);
1017  FN_ASSIGN(3, 2);
1018  FN_ASSIGN(3, 3);
1019 
1024 
1025 #if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
1026  dsp->sprite_h = sprite_h_c;
1027  dsp->sprite_v_single = sprite_v_single_c;
1028  dsp->sprite_v_double_noscale = sprite_v_double_noscale_c;
1029  dsp->sprite_v_double_onescale = sprite_v_double_onescale_c;
1030  dsp->sprite_v_double_twoscale = sprite_v_double_twoscale_c;
1031 #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
1032 
1035 
1036 #if ARCH_AARCH64
1038 #elif ARCH_ARM
1039  ff_vc1dsp_init_arm(dsp);
1040 #elif ARCH_PPC
1041  ff_vc1dsp_init_ppc(dsp);
1042 #elif ARCH_RISCV
1043  ff_vc1dsp_init_riscv(dsp);
1044 #elif ARCH_X86
1045  ff_vc1dsp_init_x86(dsp);
1046 #elif ARCH_MIPS
1047  ff_vc1dsp_init_mips(dsp);
1048 #elif ARCH_LOONGARCH
1050 #endif
1051 }
VC1DSPContext::sprite_v_double_noscale
void(* sprite_v_double_noscale)(uint8_t *dst, const uint8_t *src1a, const uint8_t *src2a, int alpha, int width)
Definition: vc1dsp.h:69
A
#define A(x)
Definition: vpx_arith.h:28
op_put
#define op_put(a, b)
Definition: vc1dsp.c:731
VC1_MSPEL_MC
#define VC1_MSPEL_MC(OP, OP4, OPNAME)
Definition: vc1dsp.c:593
VC1DSPContext::vc1_v_loop_filter16
void(* vc1_v_loop_filter16)(uint8_t *src, ptrdiff_t stride, int pq)
Definition: vc1dsp.h:53
r
const char * r
Definition: vf_curves.c:126
vc1dsp.h
ff_vc1dsp_init_aarch64
av_cold void ff_vc1dsp_init_aarch64(VC1DSPContext *dsp)
Definition: vc1dsp_init_aarch64.c:113
op4_put
#define op4_put(a, b)
Definition: vc1dsp.c:734
VC1DSPContext::vc1_inv_trans_4x4
void(* vc1_inv_trans_4x4)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:40
VC1DSPContext::avg_vc1_mspel_pixels_tab
vc1op_pixels_func avg_vc1_mspel_pixels_tab[2][16]
Definition: vc1dsp.h:60
VC1_MSPEL_FILTER_16B
#define VC1_MSPEL_FILTER_16B(DIR, TYPE)
Definition: vc1dsp.c:548
VC1DSPContext::avg_no_rnd_vc1_chroma_pixels_tab
h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3]
Definition: vc1dsp.h:64
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
vc1_v_overlap_c
static void vc1_v_overlap_c(uint8_t *src, ptrdiff_t stride)
Definition: vc1dsp.c:40
b
#define b
Definition: input.c:41
ff_startcode_find_candidate_c
int ff_startcode_find_candidate_c(const uint8_t *buf, int size)
Definition: startcode.c:32
t1
#define t1
Definition: regdef.h:29
VC1DSPContext::vc1_inv_trans_8x8_dc
void(* vc1_inv_trans_8x8_dc)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:41
chroma_mc
#define chroma_mc(a)
Definition: vc1dsp.c:786
ff_vc1dsp_init_mips
av_cold void ff_vc1dsp_init_mips(VC1DSPContext *dsp)
Definition: vc1dsp_init_mips.c:31
vc1_loop_filter
static void vc1_loop_filter(uint8_t *src, int step, ptrdiff_t stride, int len, int pq)
VC-1 in-loop deblocking filter.
Definition: vc1dsp.c:199
VC1DSPContext::put_no_rnd_vc1_chroma_pixels_tab
h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3]
Definition: vc1dsp.h:63
VC1DSPContext::vc1_inv_trans_4x4_dc
void(* vc1_inv_trans_4x4_dc)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:44
vc1_inv_trans_8x8_c
static void vc1_inv_trans_8x8_c(int16_t block[64])
Definition: vc1dsp.c:268
vc1_inv_trans_4x8_c
static void vc1_inv_trans_4x8_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:432
vc1_mspel_filter
static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
Definition: vc1dsp.c:573
D
D(D(float, sse)
Definition: rematrix_init.c:29
VC1DSPContext::vc1_h_overlap
void(* vc1_h_overlap)(uint8_t *src, ptrdiff_t stride)
Definition: vc1dsp.h:46
VC1DSPContext::vc1_v_loop_filter4
void(* vc1_v_loop_filter4)(uint8_t *src, ptrdiff_t stride, int pq)
Definition: vc1dsp.h:49
ff_vc1dsp_init_arm
av_cold void ff_vc1dsp_init_arm(VC1DSPContext *dsp)
Definition: vc1dsp_init_arm.c:27
b1
static double b1(void *priv, double x, double y)
Definition: vf_xfade.c:2035
PUT_VC1_MSPEL
#define PUT_VC1_MSPEL(a, b)
Definition: vc1dsp.c:741
VC1DSPContext::vc1_h_loop_filter4
void(* vc1_h_loop_filter4)(uint8_t *src, ptrdiff_t stride, int pq)
Definition: vc1dsp.h:50
avg_no_rnd_vc1_chroma_mc4_c
static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h, int x, int y)
Definition: vc1dsp.c:863
a1
#define a1
Definition: regdef.h:47
C
s EdgeDetect Foobar g libavfilter vf_edgedetect c libavfilter vf_foobar c edit libavfilter and add an entry for foobar following the pattern of the other filters edit libavfilter allfilters and add an entry for foobar following the pattern of the other filters configure make j< whatever > ffmpeg ffmpeg i you should get a foobar png with Lena edge detected That s your new playground is ready Some little details about what s going which in turn will define variables for the build system and the C
Definition: writing_filters.txt:58
avassert.h
put_no_rnd_vc1_chroma_mc4_c
static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h, int x, int y)
Definition: vc1dsp.c:815
rnd
#define rnd()
Definition: checkasm.h:163
VC1DSPContext::vc1_inv_trans_8x4_dc
void(* vc1_inv_trans_8x4_dc)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:42
av_cold
#define av_cold
Definition: attributes.h:90
VC1DSPContext::sprite_v_double_twoscale
void(* sprite_v_double_twoscale)(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset1, const uint8_t *src2a, const uint8_t *src2b, int offset2, int alpha, int width)
Definition: vc1dsp.h:72
clip
clip
Definition: af_crystalizer.c:121
VC1DSPContext::vc1_h_loop_filter16
void(* vc1_h_loop_filter16)(uint8_t *src, ptrdiff_t stride, int pq)
Definition: vc1dsp.h:54
width
#define width
intreadwrite.h
vc1_v_loop_filter8_c
static void vc1_v_loop_filter8_c(uint8_t *src, ptrdiff_t stride, int pq)
Definition: vc1dsp.c:226
VC1DSPContext::sprite_h
void(* sprite_h)(uint8_t *dst, const uint8_t *src, int offset, int advance, int count)
Definition: vc1dsp.h:67
t7
#define t7
Definition: regdef.h:35
B
#define B
Definition: huffyuv.h:42
vc1_filter_line
static av_always_inline int vc1_filter_line(uint8_t *src, ptrdiff_t stride, int pq)
VC-1 in-loop deblocking filter for one line.
Definition: vc1dsp.c:150
vc1_h_loop_filter4_c
static void vc1_h_loop_filter4_c(uint8_t *src, ptrdiff_t stride, int pq)
Definition: vc1dsp.c:221
vc1_inv_trans_4x4_dc_c
static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:488
VC1DSPContext::vc1_v_overlap
void(* vc1_v_overlap)(uint8_t *src, ptrdiff_t stride)
Definition: vc1dsp.h:45
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
vc1_inv_trans_4x8_dc_c
static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:415
vc1_inv_trans_8x8_dc_c
static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:247
NULL
#define NULL
Definition: coverity.c:32
VC1DSPContext::vc1_inv_trans_8x4
void(* vc1_inv_trans_8x4)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:38
t5
#define t5
Definition: regdef.h:33
vc1_unescape_buffer
static av_always_inline int vc1_unescape_buffer(const uint8_t *src, int size, uint8_t *dst)
Definition: vc1_common.h:70
t6
#define t6
Definition: regdef.h:34
qpeldsp.h
VC1DSPContext::vc1_inv_trans_4x8_dc
void(* vc1_inv_trans_4x8_dc)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:43
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
vc1_h_loop_filter8_c
static void vc1_h_loop_filter8_c(uint8_t *src, ptrdiff_t stride, int pq)
Definition: vc1dsp.c:231
put_no_rnd_vc1_chroma_mc8_c
static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h, int x, int y)
Definition: vc1dsp.c:789
ff_vc1dsp_init_x86
void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
Definition: vc1dsp_init.c:101
ff_vc1dsp_init
av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
Definition: vc1dsp.c:974
startcode.h
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
h264chroma.h
op4_avg
#define op4_avg(a, b)
Definition: vc1dsp.c:733
t8
#define t8
Definition: regdef.h:53
b2
static double b2(void *priv, double x, double y)
Definition: vf_xfade.c:2036
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
VC1DSPContext::sprite_v_single
void(* sprite_v_single)(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset, int width)
Definition: vc1dsp.h:68
op_avg
#define op_avg(a, b)
Definition: vc1dsp.c:732
a0
#define a0
Definition: regdef.h:46
vc1_common.h
VC1DSPContext::vc1_inv_trans_8x8
void(* vc1_inv_trans_8x8)(int16_t *b)
Definition: vc1dsp.h:37
VC1DSPContext::startcode_find_candidate
int(* startcode_find_candidate)(const uint8_t *buf, int size)
Search buf from the start for up to size bytes.
Definition: vc1dsp.h:82
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:67
VC1DSPContext::sprite_v_double_onescale
void(* sprite_v_double_onescale)(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset1, const uint8_t *src2a, int alpha, int width)
Definition: vc1dsp.h:70
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
VC1DSPContext::vc1_unescape_buffer
int(* vc1_unescape_buffer)(const uint8_t *src, int size, uint8_t *dst)
Definition: vc1dsp.h:85
t4
#define t4
Definition: regdef.h:32
t3
#define t3
Definition: regdef.h:31
vc1_h_s_overlap_c
static void vc1_h_s_overlap_c(int16_t *left, int16_t *right, ptrdiff_t left_stride, ptrdiff_t right_stride, int flags)
Definition: vc1dsp.c:113
a2
#define a2
Definition: regdef.h:48
common.h
vc1_inv_trans_4x4_c
static void vc1_inv_trans_4x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:505
FN_ASSIGN
#define FN_ASSIGN(X, Y)
Definition: vc1dsp.c:968
av_always_inline
#define av_always_inline
Definition: attributes.h:49
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
len
int len
Definition: vorbis_enc_data.h:426
vc1_v_s_overlap_c
static void vc1_v_s_overlap_c(int16_t *top, int16_t *bottom)
Definition: vc1dsp.c:87
ff_vc1dsp_init_riscv
av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp)
Definition: vc1dsp_init.c:33
stride
#define stride
Definition: h264pred_template.c:537
rnd_avg.h
VC1DSPContext
Definition: vc1dsp.h:35
VC1DSPContext::vc1_h_loop_filter8
void(* vc1_h_loop_filter8)(uint8_t *src, ptrdiff_t stride, int pq)
Definition: vc1dsp.h:52
VC1DSPContext::put_vc1_mspel_pixels_tab
vc1op_pixels_func put_vc1_mspel_pixels_tab[2][16]
Definition: vc1dsp.h:59
left
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
Definition: snow.txt:386
avg2
#define avg2(a, b)
Definition: vc1dsp.c:836
t2
#define t2
Definition: regdef.h:30
mode
mode
Definition: ebur128.h:83
VC1DSPContext::vc1_v_loop_filter8
void(* vc1_v_loop_filter8)(uint8_t *src, ptrdiff_t stride, int pq)
Definition: vc1dsp.h:51
vc1_h_overlap_c
static void vc1_h_overlap_c(uint8_t *src, ptrdiff_t stride)
Definition: vc1dsp.c:64
temp
else temp
Definition: vf_mcdeint.c:263
av_clip_uint8
#define av_clip_uint8
Definition: common.h:104
avg_no_rnd_vc1_chroma_mc8_c
static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h, int x, int y)
Definition: vc1dsp.c:837
ff_vc1dsp_init_ppc
av_cold void ff_vc1dsp_init_ppc(VC1DSPContext *dsp)
Definition: vc1dsp_altivec.c:354
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
d
d
Definition: ffmpeg_filter.c:409
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:474
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
vc1_v_loop_filter4_c
static void vc1_v_loop_filter4_c(uint8_t *src, ptrdiff_t stride, int pq)
Definition: vc1dsp.c:216
h
h
Definition: vp9dsp_template.c:2038
vc1_v_loop_filter16_c
static void vc1_v_loop_filter16_c(uint8_t *src, ptrdiff_t stride, int pq)
Definition: vc1dsp.c:236
vc1_inv_trans_8x4_c
static void vc1_inv_trans_8x4_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:359
VC1DSPContext::vc1_inv_trans_4x8
void(* vc1_inv_trans_4x8)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.h:39
vc1_inv_trans_8x4_dc_c
static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp.c:338
ff_vc1dsp_init_loongarch
av_cold void ff_vc1dsp_init_loongarch(VC1DSPContext *dsp)
Definition: vc1dsp_init_loongarch.c:37
a3
#define a3
Definition: regdef.h:49
VC1DSPContext::vc1_v_s_overlap
void(* vc1_v_s_overlap)(int16_t *top, int16_t *bottom)
Definition: vc1dsp.h:47
VC1DSPContext::vc1_h_s_overlap
void(* vc1_h_s_overlap)(int16_t *left, int16_t *right, ptrdiff_t left_stride, ptrdiff_t right_stride, int flags)
Definition: vc1dsp.h:48
vc1_h_loop_filter16_c
static void vc1_h_loop_filter16_c(uint8_t *src, ptrdiff_t stride, int pq)
Definition: vc1dsp.c:241