FFmpeg
vf_fspp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3  * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4  * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  */
22 
23 /**
24  * @file
25  * Fast Simple Post-processing filter
26  * This implementation is based on an algorithm described in
27  * "Aria Nosratinia Embedded Post-Processing for
28  * Enhancement of Compressed Images (1999)"
29  * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30  * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31  * them can be performed once per block, not per pixel. This allows for much
32  * higher speed.
33  *
34  * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35  * project, and ported by Arwa Arif for FFmpeg.
36  */
37 
38 #include "libavutil/emms.h"
39 #include "libavutil/imgutils.h"
40 #include "libavutil/mem.h"
41 #include "libavutil/mem_internal.h"
42 #include "libavutil/opt.h"
43 #include "libavutil/pixdesc.h"
44 #include "internal.h"
45 #include "qp_table.h"
46 #include "vf_fspp.h"
47 #include "video.h"
48 
49 #define OFFSET(x) offsetof(FSPPContext, x)
50 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
51 static const AVOption fspp_options[] = {
52  { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
53  { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
54  { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
55  { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
56  { NULL }
57 };
58 
60 
61 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
62  { 0, 48, 12, 60, 3, 51, 15, 63, },
63  { 32, 16, 44, 28, 35, 19, 47, 31, },
64  { 8, 56, 4, 52, 11, 59, 7, 55, },
65  { 40, 24, 36, 20, 43, 27, 39, 23, },
66  { 2, 50, 14, 62, 1, 49, 13, 61, },
67  { 34, 18, 46, 30, 33, 17, 45, 29, },
68  { 10, 58, 6, 54, 9, 57, 5, 53, },
69  { 42, 26, 38, 22, 41, 25, 37, 21, },
70 };
71 
72 static const short custom_threshold[64] = {
73 // values (296) can't be too high
74 // -it causes too big quant dependence
75 // or maybe overflow(check), which results in some flashing
76  71, 296, 295, 237, 71, 40, 38, 19,
77  245, 193, 185, 121, 102, 73, 53, 27,
78  158, 129, 141, 107, 97, 73, 50, 26,
79  102, 116, 109, 98, 82, 66, 45, 23,
80  71, 94, 95, 81, 70, 56, 38, 20,
81  56, 77, 74, 66, 56, 44, 30, 15,
82  38, 53, 50, 45, 38, 30, 21, 11,
83  20, 27, 26, 23, 20, 15, 11, 5
84 };
85 
86 //This func reads from 1 slice, 1 and clears 0 & 1
87 static void store_slice_c(uint8_t *dst, int16_t *src,
88  ptrdiff_t dst_stride, ptrdiff_t src_stride,
89  ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
90 {
91  int y, x;
92 #define STORE(pos) \
93  temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
94  src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
95  if (temp & 0x100) temp = ~(temp >> 31); \
96  dst[x + pos] = temp;
97 
98  for (y = 0; y < height; y++) {
99  const uint8_t *d = dither[y];
100  for (x = 0; x < width; x += 8) {
101  int temp;
102  STORE(0);
103  STORE(1);
104  STORE(2);
105  STORE(3);
106  STORE(4);
107  STORE(5);
108  STORE(6);
109  STORE(7);
110  }
111  src += src_stride;
112  dst += dst_stride;
113  }
114 }
115 
116 //This func reads from 2 slices, 0 & 2 and clears 2-nd
117 static void store_slice2_c(uint8_t *dst, int16_t *src,
118  ptrdiff_t dst_stride, ptrdiff_t src_stride,
119  ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
120 {
121  int y, x;
122 #define STORE2(pos) \
123  temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
124  src[x + pos + 16 * src_stride] = 0; \
125  if (temp & 0x100) temp = ~(temp >> 31); \
126  dst[x + pos] = temp;
127 
128  for (y = 0; y < height; y++) {
129  const uint8_t *d = dither[y];
130  for (x = 0; x < width; x += 8) {
131  int temp;
132  STORE2(0);
133  STORE2(1);
134  STORE2(2);
135  STORE2(3);
136  STORE2(4);
137  STORE2(5);
138  STORE2(6);
139  STORE2(7);
140  }
141  src += src_stride;
142  dst += dst_stride;
143  }
144 }
145 
146 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
147 {
148  int a;
149  for (a = 0; a < 64; a++)
150  thr_adr[a] = q * thr_adr_noq[a];
151 }
152 
153 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
154  int dst_stride, int src_stride,
155  int width, int height,
156  uint8_t *qp_store, int qp_stride, int is_luma)
157 {
158  int x, x0, y, es, qy, t;
159 
160  const int stride = is_luma ? p->temp_stride : (width + 16);
161  const int step = 6 - p->log2_count;
162  const int qpsh = 4 - p->hsub * !is_luma;
163  const int qpsv = 4 - p->vsub * !is_luma;
164 
165  DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
166  int16_t *block = (int16_t *)block_align;
167  int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
168 
169  memset(block3, 0, 4 * 8 * BLOCKSZ);
170 
171  if (!src || !dst) return;
172 
173  for (y = 0; y < height; y++) {
174  int index = 8 + 8 * stride + y * stride;
175  memcpy(p->src + index, src + y * src_stride, width);
176  for (x = 0; x < 8; x++) {
177  p->src[index - x - 1] = p->src[index + x ];
178  p->src[index + width + x ] = p->src[index + width - x - 1];
179  }
180  }
181 
182  for (y = 0; y < 8; y++) {
183  memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
184  memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
185  }
186  //FIXME (try edge emu)
187 
188  for (y = 8; y < 24; y++)
189  memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
190 
191  for (y = step; y < height + 8; y += step) { //step= 1,2
192  const int y1 = y - 8 + step; //l5-7 l4-6;
193  qy = y - 4;
194 
195  if (qy > height - 1) qy = height - 1;
196  if (qy < 0) qy = 0;
197 
198  qy = (qy >> qpsv) * qp_stride;
199  p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
200 
201  for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
202  p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
203 
204  if (p->qp)
205  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
206  else
207  for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
208  t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
209 
210  if (t < 0) t = 0; //t always < width-2
211 
212  t = qp_store[qy + (t >> qpsh)];
213  t = ff_norm_qscale(t, p->qscale_type);
214 
215  if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
216  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
217  }
218  p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
219  memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
220  memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
221  }
222 
223  es = width + 8 - x0; // 8, ...
224  if (es > 8)
225  p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
226 
227  p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
228  if (es > 3)
229  p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
230 
231  if (!(y1 & 7) && y1) {
232  if (y1 & 8)
233  p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
234  dst_stride, stride, width, 8, 5 - p->log2_count);
235  else
236  p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
237  dst_stride, stride, width, 8, 5 - p->log2_count);
238  }
239  }
240 
241  if (y & 7) { // height % 8 != 0
242  if (y & 8)
243  p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
244  dst_stride, stride, width, y&7, 5 - p->log2_count);
245  else
246  p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
247  dst_stride, stride, width, y&7, 5 - p->log2_count);
248  }
249 }
250 
251 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
252 {
253  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
254  int_simd16_t tmp10, tmp11, tmp12, tmp13;
255  int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
256  int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
257 
258  int16_t *dataptr;
259  int16_t *wsptr;
260  int16_t *threshold;
261  int ctr;
262 
263  dataptr = data;
264  wsptr = output;
265 
266  for (; cnt > 0; cnt -= 2) { //start positions
267  threshold = (int16_t *)thr_adr;//threshold_mtx
268  for (ctr = DCTSIZE; ctr > 0; ctr--) {
269  // Process columns from input, add to output.
270  tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
271  tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
272 
273  tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
274  tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
275 
276  tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
277  tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
278 
279  tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
280  tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
281 
282  // Even part of FDCT
283 
284  tmp10 = tmp0 + tmp3;
285  tmp13 = tmp0 - tmp3;
286  tmp11 = tmp1 + tmp2;
287  tmp12 = tmp1 - tmp2;
288 
289  d0 = tmp10 + tmp11;
290  d4 = tmp10 - tmp11;
291 
292  z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
293  d2 = tmp13 + z1;
294  d6 = tmp13 - z1;
295 
296  // Even part of IDCT
297 
298  THRESHOLD(tmp0, d0, threshold[0 * 8]);
299  THRESHOLD(tmp1, d2, threshold[2 * 8]);
300  THRESHOLD(tmp2, d4, threshold[4 * 8]);
301  THRESHOLD(tmp3, d6, threshold[6 * 8]);
302  tmp0 += 2;
303  tmp10 = (tmp0 + tmp2) >> 2;
304  tmp11 = (tmp0 - tmp2) >> 2;
305 
306  tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
307  tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
308 
309  tmp0 = tmp10 + tmp13; //->temps
310  tmp3 = tmp10 - tmp13; //->temps
311  tmp1 = tmp11 + tmp12; //->temps
312  tmp2 = tmp11 - tmp12; //->temps
313 
314  // Odd part of FDCT
315 
316  tmp10 = tmp4 + tmp5;
317  tmp11 = tmp5 + tmp6;
318  tmp12 = tmp6 + tmp7;
319 
320  z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
321  z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
322  z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
323  z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
324 
325  z11 = tmp7 + z3;
326  z13 = tmp7 - z3;
327 
328  d5 = z13 + z2;
329  d3 = z13 - z2;
330  d1 = z11 + z4;
331  d7 = z11 - z4;
332 
333  // Odd part of IDCT
334 
335  THRESHOLD(tmp4, d1, threshold[1 * 8]);
336  THRESHOLD(tmp5, d3, threshold[3 * 8]);
337  THRESHOLD(tmp6, d5, threshold[5 * 8]);
338  THRESHOLD(tmp7, d7, threshold[7 * 8]);
339 
340  //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
341  z13 = tmp6 + tmp5;
342  z10 = (tmp6 - tmp5) << 1;
343  z11 = tmp4 + tmp7;
344  z12 = (tmp4 - tmp7) << 1;
345 
346  tmp7 = (z11 + z13) >> 2; //+2 !
347  tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
348  z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
349  tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
350  tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
351 
352  tmp6 = tmp12 - tmp7;
353  tmp5 = tmp11 - tmp6;
354  tmp4 = tmp10 + tmp5;
355 
356  wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
357  wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
358  wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
359  wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
360  wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
361  wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
362  wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
363  wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
364  //
365  dataptr++; //next column
366  wsptr++;
367  threshold++;
368  }
369  dataptr += 8; //skip each second start pos
370  wsptr += 8;
371  }
372 }
373 
374 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
375 {
376  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
377  int_simd16_t tmp10, tmp11, tmp12, tmp13;
378  int_simd16_t z5, z10, z11, z12, z13;
379  int16_t *outptr;
380  int16_t *wsptr;
381 
382  cnt *= 4;
383  wsptr = workspace;
384  outptr = output_adr;
385  for (; cnt > 0; cnt--) {
386  // Even part
387  //Simd version reads 4x4 block and transposes it
388  tmp10 = wsptr[2] + wsptr[3];
389  tmp11 = wsptr[2] - wsptr[3];
390 
391  tmp13 = wsptr[0] + wsptr[1];
392  tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
393 
394  tmp0 = tmp10 + tmp13; //->temps
395  tmp3 = tmp10 - tmp13; //->temps
396  tmp1 = tmp11 + tmp12;
397  tmp2 = tmp11 - tmp12;
398 
399  // Odd part
400  //Also transpose, with previous:
401  // ---- ---- ||||
402  // ---- ---- idct ||||
403  // ---- ---- ---> ||||
404  // ---- ---- ||||
405  z13 = wsptr[4] + wsptr[5];
406  z10 = wsptr[4] - wsptr[5];
407  z11 = wsptr[6] + wsptr[7];
408  z12 = wsptr[6] - wsptr[7];
409 
410  tmp7 = z11 + z13;
411  tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
412 
413  z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
414  tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
415  tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
416 
417  tmp6 = (tmp12 << 3) - tmp7;
418  tmp5 = (tmp11 << 3) - tmp6;
419  tmp4 = (tmp10 << 3) + tmp5;
420 
421  // Final output stage: descale and write column
422  outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
423  outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
424  outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
425  outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
426  outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
427  outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
428  outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
429  outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
430  outptr++;
431 
432  wsptr += DCTSIZE; // advance pointer to next row
433  }
434 }
435 
436 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
437 {
438  int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
439  int_simd16_t tmp10, tmp11, tmp12, tmp13;
440  int_simd16_t z1, z2, z3, z4, z5, z11, z13;
441  int16_t *dataptr;
442 
443  cnt *= 4;
444  // Pass 1: process rows.
445 
446  dataptr = data;
447  for (; cnt > 0; cnt--) {
448  tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
449  tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
450  tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
451  tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
452  tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
453  tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
454  tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
455  tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
456 
457  // Even part
458 
459  tmp10 = tmp0 + tmp3;
460  tmp13 = tmp0 - tmp3;
461  tmp11 = tmp1 + tmp2;
462  tmp12 = tmp1 - tmp2;
463  //Even columns are written first, this leads to different order of columns
464  //in column_fidct(), but they are processed independently, so all ok.
465  //Later in the row_idct() columns readed at the same order.
466  dataptr[2] = tmp10 + tmp11;
467  dataptr[3] = tmp10 - tmp11;
468 
469  z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
470  dataptr[0] = tmp13 + z1;
471  dataptr[1] = tmp13 - z1;
472 
473  // Odd part
474 
475  tmp10 = (tmp4 + tmp5) << 2;
476  tmp11 = (tmp5 + tmp6) << 2;
477  tmp12 = (tmp6 + tmp7) << 2;
478 
479  z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
480  z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
481  z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
482  z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
483 
484  z11 = tmp7 + z3;
485  z13 = tmp7 - z3;
486 
487  dataptr[4] = z13 + z2;
488  dataptr[5] = z13 - z2;
489  dataptr[6] = z11 + z4;
490  dataptr[7] = z11 - z4;
491 
492  pixels++; // advance pointer to next column
493  dataptr += DCTSIZE;
494  }
495 }
496 
497 static const enum AVPixelFormat pix_fmts[] = {
505 };
506 
508 {
509  AVFilterContext *ctx = inlink->dst;
510  FSPPContext *fspp = ctx->priv;
511  const int h = FFALIGN(inlink->h + 16, 16);
513 
514  fspp->hsub = desc->log2_chroma_w;
515  fspp->vsub = desc->log2_chroma_h;
516 
517  fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
518  fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
519  fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
520 
521  if (!fspp->temp || !fspp->src)
522  return AVERROR(ENOMEM);
523 
524  fspp->store_slice = store_slice_c;
526  fspp->mul_thrmat = mul_thrmat_c;
528  fspp->row_idct = row_idct_c;
529  fspp->row_fdct = row_fdct_c;
530 
531 #if ARCH_X86
532  ff_fspp_init_x86(fspp);
533 #endif
534 
535  return 0;
536 }
537 
539 {
540  AVFilterContext *ctx = inlink->dst;
541  FSPPContext *fspp = ctx->priv;
542  AVFilterLink *outlink = ctx->outputs[0];
543  AVFrame *out = in;
544 
545  int qp_stride = 0;
546  int8_t *qp_table = NULL;
547  int i, bias;
548  int ret = 0;
549  int custom_threshold_m[64];
550 
551  bias = (1 << 4) + fspp->strength;
552 
553  for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
554  custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
555 
556  for (i = 0; i < 8; i++) {
557  fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
558  |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
559  |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
560  |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
561 
562  fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
563  |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
564  |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
565  |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
566  }
567 
568  if (fspp->qp)
569  fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
570 
571  /* if we are not in a constant user quantizer mode and we don't want to use
572  * the quantizers from the B-frames (B-frames often have a higher QP), we
573  * need to save the qp table from the last non B-frame; this is what the
574  * following code block does */
575  if (!fspp->qp && (fspp->use_bframe_qp || in->pict_type != AV_PICTURE_TYPE_B)) {
576  ret = ff_qp_table_extract(in, &qp_table, &qp_stride, NULL, &fspp->qscale_type);
577  if (ret < 0) {
578  av_frame_free(&in);
579  return ret;
580  }
581 
582  if (!fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
583  av_freep(&fspp->non_b_qp_table);
584  fspp->non_b_qp_table = qp_table;
585  fspp->non_b_qp_stride = qp_stride;
586  }
587  }
588 
589  if (fspp->log2_count && !ctx->is_disabled) {
590  if (!fspp->use_bframe_qp && fspp->non_b_qp_table) {
591  qp_table = fspp->non_b_qp_table;
592  qp_stride = fspp->non_b_qp_stride;
593  }
594 
595  if (qp_table || fspp->qp) {
596  const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
597  const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
598 
599  /* get a new frame if in-place is not possible or if the dimensions
600  * are not multiple of 8 */
601  if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
602  const int aligned_w = FFALIGN(inlink->w, 8);
603  const int aligned_h = FFALIGN(inlink->h, 8);
604 
605  out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
606  if (!out) {
607  av_frame_free(&in);
608  ret = AVERROR(ENOMEM);
609  goto finish;
610  }
612  out->width = in->width;
613  out->height = in->height;
614  }
615 
616  filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
617  inlink->w, inlink->h, qp_table, qp_stride, 1);
618  filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
619  cw, ch, qp_table, qp_stride, 0);
620  filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
621  cw, ch, qp_table, qp_stride, 0);
622  emms_c();
623  }
624  }
625 
626  if (in != out) {
627  if (in->data[3])
628  av_image_copy_plane(out->data[3], out->linesize[3],
629  in ->data[3], in ->linesize[3],
630  inlink->w, inlink->h);
631  av_frame_free(&in);
632  }
633  ret = ff_filter_frame(outlink, out);
634 finish:
635  if (qp_table != fspp->non_b_qp_table)
636  av_freep(&qp_table);
637  return ret;
638 }
639 
641 {
642  FSPPContext *fspp = ctx->priv;
643  av_freep(&fspp->temp);
644  av_freep(&fspp->src);
645  av_freep(&fspp->non_b_qp_table);
646 }
647 
648 static const AVFilterPad fspp_inputs[] = {
649  {
650  .name = "default",
651  .type = AVMEDIA_TYPE_VIDEO,
652  .config_props = config_input,
653  .filter_frame = filter_frame,
654  },
655 };
656 
658  .name = "fspp",
659  .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
660  .priv_size = sizeof(FSPPContext),
661  .uninit = uninit,
665  .priv_class = &fspp_class,
667 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:112
MULTIPLY16H
#define MULTIPLY16H(x, k)
Definition: vf_fspp.h:37
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
FIX_0_707106781
#define FIX_0_707106781
Definition: jfdctfst.c:117
FIX_0_541196100
#define FIX_0_541196100
Definition: jfdctfst.c:116
FSPPContext::column_fidct
void(* column_fidct)(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
Definition: vf_fspp.h:83
store_slice2_c
static void store_slice2_c(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.c:117
vf_fspp.h
qp_table.h
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
FSPPContext::hsub
int hsub
Definition: vf_fspp.h:61
STORE
#define STORE(pos)
mem_internal.h
out
FILE * out
Definition: movenc.c:55
FSPPContext::threshold_mtx_noq
uint64_t threshold_mtx_noq[8 *2]
Definition: vf_fspp.h:56
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1015
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2965
FILTER_PIXFMTS_ARRAY
#define FILTER_PIXFMTS_ARRAY(array)
Definition: internal.h:162
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
FLAGS
#define FLAGS
Definition: vf_fspp.c:50
FSPPContext::store_slice
void(* store_slice)(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.h:73
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:160
FSPPContext::vsub
int vsub
Definition: vf_fspp.h:62
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:374
pixdesc.h
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
AVFrame::width
int width
Definition: frame.h:446
AVOption
AVOption.
Definition: opt.h:346
data
const char data[16]
Definition: mxf.c:148
AV_PIX_FMT_YUV440P
@ AV_PIX_FMT_YUV440P
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:106
FSPPContext::src
uint8_t * src
Definition: vf_fspp.h:67
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:170
ff_norm_qscale
static int ff_norm_qscale(int qscale, enum AVVideoEncParamsType type)
Normalize the qscale factor FIXME Add support for other values of enum AVVideoEncParamsType besides A...
Definition: qp_table.h:39
video.h
FIX_1_082392200
#define FIX_1_082392200
Definition: 4xm.c:160
FSPPContext::row_idct
void(* row_idct)(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
Definition: vf_fspp.h:86
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:395
av_image_copy_plane
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:374
FIX_2_613125930
#define FIX_2_613125930
Definition: 4xm.c:163
row_fdct_c
static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
Definition: vf_fspp.c:436
finish
static void finish(void)
Definition: movenc.c:373
BLOCKSZ
#define BLOCKSZ
Definition: vf_fspp.h:29
FIX_0_382683433
#define FIX_0_382683433
Definition: jfdctfst.c:115
fspp_inputs
static const AVFilterPad fspp_inputs[]
Definition: vf_fspp.c:648
custom_threshold
static const short custom_threshold[64]
Definition: vf_fspp.c:72
mul_thrmat_c
static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
Definition: vf_fspp.c:146
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:33
FSPPContext::qscale_type
enum AVVideoEncParamsType qscale_type
Definition: vf_fspp.h:65
av_cold
#define av_cold
Definition: attributes.h:90
ff_vf_fspp
const AVFilter ff_vf_fspp
Definition: vf_fspp.c:657
ff_video_default_filterpad
const AVFilterPad ff_video_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_VIDEO.
Definition: video.c:37
column_fidct_c
static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
Definition: vf_fspp.c:251
AV_PIX_FMT_YUVJ422P
@ AV_PIX_FMT_YUVJ422P
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:86
emms_c
#define emms_c()
Definition: emms.h:63
width
#define width
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:59
FSPPContext::row_fdct
void(* row_fdct)(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
Definition: vf_fspp.h:89
DCTSIZE
#define DCTSIZE
Definition: jfdctfst.c:73
FSPPContext::non_b_qp_table
int8_t * non_b_qp_table
Definition: vf_fspp.h:69
FSPPContext::non_b_qp_stride
int non_b_qp_stride
Definition: vf_fspp.h:70
ctx
AVFormatContext * ctx
Definition: movenc.c:49
config_input
static int config_input(AVFilterLink *inlink)
Definition: vf_fspp.c:507
store_slice_c
static void store_slice_c(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.c:87
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
FSPPContext::log2_count
int log2_count
Definition: vf_fspp.h:59
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:182
AV_PIX_FMT_YUVJ444P
@ AV_PIX_FMT_YUVJ444P
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:87
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:709
bias
static int bias(int x, int c)
Definition: vqcdec.c:115
FSPPContext::qp
int qp
Definition: vf_fspp.h:64
AV_PIX_FMT_YUVJ420P
@ AV_PIX_FMT_YUVJ420P
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:85
FIX_1_306562965
#define FIX_1_306562965
Definition: jfdctfst.c:118
STORE2
#define STORE2(pos)
AV_PIX_FMT_GRAY8
@ AV_PIX_FMT_GRAY8
Y , 8bpp.
Definition: pixfmt.h:81
index
int index
Definition: gxfenc.c:90
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: vf_fspp.c:497
MAX_LEVEL
#define MAX_LEVEL
Definition: rl.h:36
AVFrame::pict_type
enum AVPictureType pict_type
Picture type of the frame.
Definition: frame.h:476
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:109
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
FSPPContext::strength
int strength
Definition: vf_fspp.h:60
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_fspp.c:640
av_frame_is_writable
int av_frame_is_writable(AVFrame *frame)
Check if the frame data is writable.
Definition: frame.c:645
OFFSET
#define OFFSET(x)
Definition: vf_fspp.c:49
height
#define height
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
fspp_options
static const AVOption fspp_options[]
Definition: vf_fspp.c:51
row_idct_c
static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
Definition: vf_fspp.c:374
FSPPContext::threshold_mtx
uint64_t threshold_mtx[8 *2]
Definition: vf_fspp.h:57
FIX_1_847759065
#define FIX_1_847759065
Definition: 4xm.c:162
internal.h
emms.h
FSPPContext::temp_stride
int temp_stride
Definition: vf_fspp.h:63
FSPPContext::use_bframe_qp
int use_bframe_qp
Definition: vf_fspp.h:71
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:31
AV_PIX_FMT_YUVJ440P
@ AV_PIX_FMT_YUVJ440P
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range
Definition: pixfmt.h:107
FIX_1_414213562
#define FIX_1_414213562
Definition: 4xm.c:161
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:39
DESCALE
#define DESCALE(x, n)
Definition: jfdctfst.c:134
stride
#define stride
Definition: h264pred_template.c:537
AVFilter
Filter definition.
Definition: avfilter.h:166
ret
ret
Definition: filter_design.txt:187
ff_qp_table_extract
int ff_qp_table_extract(AVFrame *frame, int8_t **table, int *table_w, int *table_h, enum AVVideoEncParamsType *qscale_type)
Extract a libpostproc-compatible QP table - an 8-bit QP value per 16x16 macroblock,...
Definition: qp_table.c:27
AVFrame::height
int height
Definition: frame.h:446
FSPPContext
Definition: vf_fspp.h:54
ff_fspp_init_x86
void ff_fspp_init_x86(FSPPContext *fspp)
Definition: vf_fspp_init.c:37
FIX_1_414213562_A
static const int16_t FIX_1_414213562_A
Definition: vf_fspp.h:48
AV_PICTURE_TYPE_B
@ AV_PICTURE_TYPE_B
Bi-dir predicted.
Definition: avutil.h:281
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:235
filter
static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, uint8_t *qp_store, int qp_stride, int is_luma)
Definition: vf_fspp.c:153
FSPPContext::mul_thrmat
void(* mul_thrmat)(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
Definition: vf_fspp.h:81
temp
else temp
Definition: vf_mcdeint.c:263
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
AVFilterContext
An instance of a filter.
Definition: avfilter.h:407
FSPPContext::store_slice2
void(* store_slice2)(uint8_t *dst, int16_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
Definition: vf_fspp.h:77
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
desc
const char * desc
Definition: libsvtav1.c:79
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
mem.h
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
FSPPContext::temp
int16_t * temp
Definition: vf_fspp.h:68
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
Definition: vf_fspp.c:538
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:251
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:183
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:80
d
d
Definition: ffmpeg_filter.c:424
FSPPContext::prev_q
int prev_q
Definition: vf_fspp.h:66
int32_t
int32_t
Definition: audioconvert.c:56
AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:155
imgutils.h
AVFrame::linesize
int linesize[AV_NUM_DATA_POINTERS]
For video, a positive or negative value, which is typically indicating the size in bytes of each pict...
Definition: frame.h:419
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:79
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(fspp)
int_simd16_t
int32_t int_simd16_t
Definition: vf_fspp.h:43
THRESHOLD
#define THRESHOLD(r, x, t)
Definition: vf_fspp.h:38
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:61