FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hevcdsp_template.c
Go to the documentation of this file.
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "get_bits.h"
24 #include "hevc.h"
25 
26 #include "bit_depth_template.c"
27 #include "hevcdsp.h"
28 
29 
30 static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int size,
31  GetBitContext *gb, int pcm_bit_depth)
32 {
33  int x, y;
34  pixel *dst = (pixel *)_dst;
35 
36  stride /= sizeof(pixel);
37 
38  for (y = 0; y < size; y++) {
39  for (x = 0; x < size; x++)
40  dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
41  dst += stride;
42  }
43 }
44 
45 static void FUNC(transquant_bypass4x4)(uint8_t *_dst, int16_t *coeffs,
46  ptrdiff_t stride)
47 {
48  int x, y;
49  pixel *dst = (pixel *)_dst;
50 
51  stride /= sizeof(pixel);
52 
53  for (y = 0; y < 4; y++) {
54  for (x = 0; x < 4; x++) {
55  dst[x] = av_clip_pixel(dst[x] + *coeffs);
56  coeffs++;
57  }
58  dst += stride;
59  }
60 }
61 
62 static void FUNC(transquant_bypass8x8)(uint8_t *_dst, int16_t *coeffs,
63  ptrdiff_t stride)
64 {
65  int x, y;
66  pixel *dst = (pixel *)_dst;
67 
68  stride /= sizeof(pixel);
69 
70  for (y = 0; y < 8; y++) {
71  for (x = 0; x < 8; x++) {
72  dst[x] = av_clip_pixel(dst[x] + *coeffs);
73  coeffs++;
74  }
75  dst += stride;
76  }
77 }
78 
79 static void FUNC(transquant_bypass16x16)(uint8_t *_dst, int16_t *coeffs,
80  ptrdiff_t stride)
81 {
82  int x, y;
83  pixel *dst = (pixel *)_dst;
84 
85  stride /= sizeof(pixel);
86 
87  for (y = 0; y < 16; y++) {
88  for (x = 0; x < 16; x++) {
89  dst[x] = av_clip_pixel(dst[x] + *coeffs);
90  coeffs++;
91  }
92  dst += stride;
93  }
94 }
95 
96 static void FUNC(transquant_bypass32x32)(uint8_t *_dst, int16_t *coeffs,
97  ptrdiff_t stride)
98 {
99  int x, y;
100  pixel *dst = (pixel *)_dst;
101 
102  stride /= sizeof(pixel);
103 
104  for (y = 0; y < 32; y++) {
105  for (x = 0; x < 32; x++) {
106  dst[x] = av_clip_pixel(dst[x] + *coeffs);
107  coeffs++;
108  }
109  dst += stride;
110  }
111 }
112 
113 static void FUNC(transform_skip)(uint8_t *_dst, int16_t *coeffs,
114  ptrdiff_t stride)
115 {
116  pixel *dst = (pixel *)_dst;
117  int shift = 13 - BIT_DEPTH;
118 #if BIT_DEPTH <= 13
119  int offset = 1 << (shift - 1);
120 #else
121  int offset = 0;
122 #endif
123  int x, y;
124 
125  stride /= sizeof(pixel);
126 
127  for (y = 0; y < 4 * 4; y += 4) {
128  for (x = 0; x < 4; x++)
129  dst[x] = av_clip_pixel(dst[x] + ((coeffs[y + x] + offset) >> shift));
130  dst += stride;
131  }
132 }
133 
134 #define SET(dst, x) (dst) = (x)
135 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
136 #define ADD_AND_SCALE(dst, x) \
137  (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
138 
139 #define TR_4x4_LUMA(dst, src, step, assign) \
140  do { \
141  int c0 = src[0 * step] + src[2 * step]; \
142  int c1 = src[2 * step] + src[3 * step]; \
143  int c2 = src[0 * step] - src[3 * step]; \
144  int c3 = 74 * src[1 * step]; \
145  \
146  assign(dst[2 * step], 74 * (src[0 * step] - \
147  src[2 * step] + \
148  src[3 * step])); \
149  assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
150  assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
151  assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
152  } while (0)
153 
154 static void FUNC(transform_4x4_luma_add)(uint8_t *_dst, int16_t *coeffs,
155  ptrdiff_t stride)
156 {
157  int i;
158  pixel *dst = (pixel *)_dst;
159  int shift = 7;
160  int add = 1 << (shift - 1);
161  int16_t *src = coeffs;
162 
163  stride /= sizeof(pixel);
164 
165  for (i = 0; i < 4; i++) {
166  TR_4x4_LUMA(src, src, 4, SCALE);
167  src++;
168  }
169 
170  shift = 20 - BIT_DEPTH;
171  add = 1 << (shift - 1);
172  for (i = 0; i < 4; i++) {
173  TR_4x4_LUMA(dst, coeffs, 1, ADD_AND_SCALE);
174  coeffs += 4;
175  dst += stride;
176  }
177 }
178 
179 #undef TR_4x4_LUMA
180 
181 #define TR_4(dst, src, dstep, sstep, assign) \
182  do { \
183  const int e0 = transform[8 * 0][0] * src[0 * sstep] + \
184  transform[8 * 2][0] * src[2 * sstep]; \
185  const int e1 = transform[8 * 0][1] * src[0 * sstep] + \
186  transform[8 * 2][1] * src[2 * sstep]; \
187  const int o0 = transform[8 * 1][0] * src[1 * sstep] + \
188  transform[8 * 3][0] * src[3 * sstep]; \
189  const int o1 = transform[8 * 1][1] * src[1 * sstep] + \
190  transform[8 * 3][1] * src[3 * sstep]; \
191  \
192  assign(dst[0 * dstep], e0 + o0); \
193  assign(dst[1 * dstep], e1 + o1); \
194  assign(dst[2 * dstep], e1 - o1); \
195  assign(dst[3 * dstep], e0 - o0); \
196  } while (0)
197 
198 static void FUNC(transform_4x4_add)(uint8_t *_dst, int16_t *coeffs,
199  ptrdiff_t stride)
200 {
201  int i;
202  pixel *dst = (pixel *)_dst;
203  int shift = 7;
204  int add = 1 << (shift - 1);
205  int16_t *src = coeffs;
206 
207  stride /= sizeof(pixel);
208 
209  for (i = 0; i < 4; i++) {
210  TR_4(src, src, 4, 4, SCALE);
211  src++;
212  }
213 
214  shift = 20 - BIT_DEPTH;
215  add = 1 << (shift - 1);
216  for (i = 0; i < 4; i++) {
217  TR_4(dst, coeffs, 1, 1, ADD_AND_SCALE);
218  coeffs += 4;
219  dst += stride;
220  }
221 }
222 
223 #define TR_8(dst, src, dstep, sstep, assign) \
224  do { \
225  int i, j; \
226  int e_8[4]; \
227  int o_8[4] = { 0 }; \
228  for (i = 0; i < 4; i++) \
229  for (j = 1; j < 8; j += 2) \
230  o_8[i] += transform[4 * j][i] * src[j * sstep]; \
231  TR_4(e_8, src, 1, 2 * sstep, SET); \
232  \
233  for (i = 0; i < 4; i++) { \
234  assign(dst[i * dstep], e_8[i] + o_8[i]); \
235  assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
236  } \
237  } while (0)
238 
239 #define TR_16(dst, src, dstep, sstep, assign) \
240  do { \
241  int i, j; \
242  int e_16[8]; \
243  int o_16[8] = { 0 }; \
244  for (i = 0; i < 8; i++) \
245  for (j = 1; j < 16; j += 2) \
246  o_16[i] += transform[2 * j][i] * src[j * sstep]; \
247  TR_8(e_16, src, 1, 2 * sstep, SET); \
248  \
249  for (i = 0; i < 8; i++) { \
250  assign(dst[i * dstep], e_16[i] + o_16[i]); \
251  assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
252  } \
253  } while (0)
254 
255 #define TR_32(dst, src, dstep, sstep, assign) \
256  do { \
257  int i, j; \
258  int e_32[16]; \
259  int o_32[16] = { 0 }; \
260  for (i = 0; i < 16; i++) \
261  for (j = 1; j < 32; j += 2) \
262  o_32[i] += transform[j][i] * src[j * sstep]; \
263  TR_16(e_32, src, 1, 2 * sstep, SET); \
264  \
265  for (i = 0; i < 16; i++) { \
266  assign(dst[i * dstep], e_32[i] + o_32[i]); \
267  assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
268  } \
269  } while (0)
270 
271 
272 
273 static void FUNC(transform_8x8_add)(uint8_t *_dst, int16_t *coeffs,
274  ptrdiff_t stride)
275 {
276  int i;
277  pixel *dst = (pixel *)_dst;
278  int shift = 7;
279  int add = 1 << (shift - 1);
280  int16_t *src = coeffs;
281 
282  stride /= sizeof(pixel);
283 
284  for (i = 0; i < 8; i++) {
285  TR_8(src, src, 8, 8, SCALE);
286  src++;
287  }
288 
289  shift = 20 - BIT_DEPTH;
290  add = 1 << (shift - 1);
291  for (i = 0; i < 8; i++) {
292  TR_8(dst, coeffs, 1, 1, ADD_AND_SCALE);
293  coeffs += 8;
294  dst += stride;
295  }
296 }
297 
298 static void FUNC(transform_16x16_add)(uint8_t *_dst, int16_t *coeffs,
299  ptrdiff_t stride)
300 {
301  int i;
302  pixel *dst = (pixel *)_dst;
303  int shift = 7;
304  int add = 1 << (shift - 1);
305  int16_t *src = coeffs;
306 
307  stride /= sizeof(pixel);
308 
309  for (i = 0; i < 16; i++) {
310  TR_16(src, src, 16, 16, SCALE);
311  src++;
312  }
313 
314  shift = 20 - BIT_DEPTH;
315  add = 1 << (shift - 1);
316  for (i = 0; i < 16; i++) {
317  TR_16(dst, coeffs, 1, 1, ADD_AND_SCALE);
318  coeffs += 16;
319  dst += stride;
320  }
321 }
322 
323 static void FUNC(transform_32x32_add)(uint8_t *_dst, int16_t *coeffs,
324  ptrdiff_t stride)
325 {
326  int i;
327  pixel *dst = (pixel *)_dst;
328  int shift = 7;
329  int add = 1 << (shift - 1);
330  int16_t *src = coeffs;
331 
332  stride /= sizeof(pixel);
333 
334  for (i = 0; i < 32; i++) {
335  TR_32(src, src, 32, 32, SCALE);
336  src++;
337  }
338  src = coeffs;
339  shift = 20 - BIT_DEPTH;
340  add = 1 << (shift - 1);
341  for (i = 0; i < 32; i++) {
342  TR_32(dst, coeffs, 1, 1, ADD_AND_SCALE);
343  coeffs += 32;
344  dst += stride;
345  }
346 }
347 
348 static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src,
349  ptrdiff_t stride, SAOParams *sao,
350  int *borders, int width, int height,
351  int c_idx, int class)
352 {
353  pixel *dst = (pixel *)_dst;
354  pixel *src = (pixel *)_src;
355  int offset_table[32] = { 0 };
356  int k, y, x;
357  int chroma = !!c_idx;
358  int shift = BIT_DEPTH - 5;
359  int *sao_offset_val = sao->offset_val[c_idx];
360  int sao_left_class = sao->band_position[c_idx];
361  int init_y = 0, init_x = 0;
362 
363  stride /= sizeof(pixel);
364 
365  switch (class) {
366  case 0:
367  if (!borders[2])
368  width -= (8 >> chroma) + 2;
369  if (!borders[3])
370  height -= (4 >> chroma) + 2;
371  break;
372  case 1:
373  init_y = -(4 >> chroma) - 2;
374  if (!borders[2])
375  width -= (8 >> chroma) + 2;
376  height = (4 >> chroma) + 2;
377  break;
378  case 2:
379  init_x = -(8 >> chroma) - 2;
380  width = (8 >> chroma) + 2;
381  if (!borders[3])
382  height -= (4 >> chroma) + 2;
383  break;
384  case 3:
385  init_y = -(4 >> chroma) - 2;
386  init_x = -(8 >> chroma) - 2;
387  width = (8 >> chroma) + 2;
388  height = (4 >> chroma) + 2;
389  break;
390  }
391 
392  dst = dst + (init_y * stride + init_x);
393  src = src + (init_y * stride + init_x);
394  for (k = 0; k < 4; k++)
395  offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
396  for (y = 0; y < height; y++) {
397  for (x = 0; x < width; x++)
398  dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
399  dst += stride;
400  src += stride;
401  }
402 }
403 
405  ptrdiff_t stride, SAOParams *sao,
406  int *borders, int width, int height,
407  int c_idx)
408 {
409  FUNC(sao_band_filter)(dst, src, stride, sao, borders,
410  width, height, c_idx, 0);
411 }
412 
414  ptrdiff_t stride, SAOParams *sao,
415  int *borders, int width, int height,
416  int c_idx)
417 {
418  FUNC(sao_band_filter)(dst, src, stride, sao, borders,
419  width, height, c_idx, 1);
420 }
421 
423  ptrdiff_t stride, SAOParams *sao,
424  int *borders, int width, int height,
425  int c_idx)
426 {
427  FUNC(sao_band_filter)(dst, src, stride, sao, borders,
428  width, height, c_idx, 2);
429 }
430 
431 static void FUNC(sao_band_filter_3)(uint8_t *_dst, uint8_t *_src,
432  ptrdiff_t stride, SAOParams *sao,
433  int *borders, int width, int height,
434  int c_idx)
435 {
436  FUNC(sao_band_filter)(_dst, _src, stride, sao, borders,
437  width, height, c_idx, 3);
438 }
439 
440 static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src,
441  ptrdiff_t stride, SAOParams *sao,
442  int *borders, int _width, int _height,
443  int c_idx, uint8_t vert_edge,
444  uint8_t horiz_edge, uint8_t diag_edge)
445 {
446  int x, y;
447  pixel *dst = (pixel *)_dst;
448  pixel *src = (pixel *)_src;
449  int chroma = !!c_idx;
450  int *sao_offset_val = sao->offset_val[c_idx];
451  int sao_eo_class = sao->eo_class[c_idx];
452  int init_x = 0, init_y = 0, width = _width, height = _height;
453 
454  static const int8_t pos[4][2][2] = {
455  { { -1, 0 }, { 1, 0 } }, // horizontal
456  { { 0, -1 }, { 0, 1 } }, // vertical
457  { { -1, -1 }, { 1, 1 } }, // 45 degree
458  { { 1, -1 }, { -1, 1 } }, // 135 degree
459  };
460  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
461 
462 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
463 
464  stride /= sizeof(pixel);
465 
466  if (!borders[2])
467  width -= (8 >> chroma) + 2;
468  if (!borders[3])
469  height -= (4 >> chroma) + 2;
470 
471  dst = dst + (init_y * stride + init_x);
472  src = src + (init_y * stride + init_x);
473  init_y = init_x = 0;
474  if (sao_eo_class != SAO_EO_VERT) {
475  if (borders[0]) {
476  int offset_val = sao_offset_val[0];
477  int y_stride = 0;
478  for (y = 0; y < height; y++) {
479  dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val);
480  y_stride += stride;
481  }
482  init_x = 1;
483  }
484  if (borders[2]) {
485  int offset_val = sao_offset_val[0];
486  int x_stride = width - 1;
487  for (x = 0; x < height; x++) {
488  dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val);
489  x_stride += stride;
490  }
491  width--;
492  }
493  }
494  if (sao_eo_class != SAO_EO_HORIZ) {
495  if (borders[1]) {
496  int offset_val = sao_offset_val[0];
497  for (x = init_x; x < width; x++)
498  dst[x] = av_clip_pixel(src[x] + offset_val);
499  init_y = 1;
500  }
501  if (borders[3]) {
502  int offset_val = sao_offset_val[0];
503  int y_stride = stride * (height - 1);
504  for (x = init_x; x < width; x++)
505  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
506  height--;
507  }
508  }
509  {
510  int y_stride = init_y * stride;
511  int pos_0_0 = pos[sao_eo_class][0][0];
512  int pos_0_1 = pos[sao_eo_class][0][1];
513  int pos_1_0 = pos[sao_eo_class][1][0];
514  int pos_1_1 = pos[sao_eo_class][1][1];
515 
516  int y_stride_0_1 = (init_y + pos_0_1) * stride;
517  int y_stride_1_1 = (init_y + pos_1_1) * stride;
518  for (y = init_y; y < height; y++) {
519  for (x = init_x; x < width; x++) {
520  int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
521  int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
522  int offset_val = edge_idx[2 + diff0 + diff1];
523  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
524  }
525  y_stride += stride;
526  y_stride_0_1 += stride;
527  y_stride_1_1 += stride;
528  }
529  }
530 
531  {
532  // Restore pixels that can't be modified
533  int save_upper_left = !diag_edge && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
534  if (vert_edge && sao_eo_class != SAO_EO_VERT)
535  for (y = init_y+save_upper_left; y< height; y++)
536  dst[y*stride] = src[y*stride];
537  if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
538  for(x = init_x+save_upper_left; x<width; x++)
539  dst[x] = src[x];
540  if(diag_edge && sao_eo_class == SAO_EO_135D)
541  dst[0] = src[0];
542  }
543 
544 #undef CMP
545 }
546 
547 static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
548  ptrdiff_t stride, SAOParams *sao,
549  int *borders, int _width, int _height,
550  int c_idx, uint8_t vert_edge,
551  uint8_t horiz_edge, uint8_t diag_edge)
552 {
553  int x, y;
554  pixel *dst = (pixel *)_dst;
555  pixel *src = (pixel *)_src;
556  int chroma = !!c_idx;
557  int *sao_offset_val = sao->offset_val[c_idx];
558  int sao_eo_class = sao->eo_class[c_idx];
559  int init_x = 0, init_y = 0, width = _width, height = _height;
560 
561  static const int8_t pos[4][2][2] = {
562  { { -1, 0 }, { 1, 0 } }, // horizontal
563  { { 0, -1 }, { 0, 1 } }, // vertical
564  { { -1, -1 }, { 1, 1 } }, // 45 degree
565  { { 1, -1 }, { -1, 1 } }, // 135 degree
566  };
567  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
568 
569 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
570 
571  stride /= sizeof(pixel);
572 
573  init_y = -(4 >> chroma) - 2;
574  if (!borders[2])
575  width -= (8 >> chroma) + 2;
576  height = (4 >> chroma) + 2;
577 
578  dst = dst + (init_y * stride + init_x);
579  src = src + (init_y * stride + init_x);
580  init_y = init_x = 0;
581  if (sao_eo_class != SAO_EO_VERT) {
582  if (borders[0]) {
583  int offset_val = sao_offset_val[0];
584  int y_stride = 0;
585  for (y = 0; y < height; y++) {
586  dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val);
587  y_stride += stride;
588  }
589  init_x = 1;
590  }
591  if (borders[2]) {
592  int offset_val = sao_offset_val[0];
593  int x_stride = width - 1;
594  for (x = 0; x < height; x++) {
595  dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val);
596  x_stride += stride;
597  }
598  width--;
599  }
600  }
601  {
602  int y_stride = init_y * stride;
603  int pos_0_0 = pos[sao_eo_class][0][0];
604  int pos_0_1 = pos[sao_eo_class][0][1];
605  int pos_1_0 = pos[sao_eo_class][1][0];
606  int pos_1_1 = pos[sao_eo_class][1][1];
607 
608  int y_stride_0_1 = (init_y + pos_0_1) * stride;
609  int y_stride_1_1 = (init_y + pos_1_1) * stride;
610  for (y = init_y; y < height; y++) {
611  for (x = init_x; x < width; x++) {
612  int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
613  int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
614  int offset_val = edge_idx[2 + diff0 + diff1];
615  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
616  }
617  y_stride += stride;
618  y_stride_0_1 += stride;
619  y_stride_1_1 += stride;
620  }
621  }
622 
623  {
624  // Restore pixels that can't be modified
625  int save_lower_left = !diag_edge && sao_eo_class == SAO_EO_45D && !borders[0];
626  if(vert_edge && sao_eo_class != SAO_EO_VERT)
627  for(y = init_y; y< height-save_lower_left; y++)
628  dst[y*stride] = src[y*stride];
629  if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
630  for(x = init_x+save_lower_left; x<width; x++)
631  dst[(height-1)*stride+x] = src[(height-1)*stride+x];
632  if(diag_edge && sao_eo_class == SAO_EO_45D)
633  dst[stride*(height-1)] = src[stride*(height-1)];
634  }
635 
636 #undef CMP
637 }
638 
639 static void FUNC(sao_edge_filter_2)(uint8_t *_dst, uint8_t *_src,
640  ptrdiff_t stride, SAOParams *sao,
641  int *borders, int _width, int _height,
642  int c_idx, uint8_t vert_edge,
643  uint8_t horiz_edge, uint8_t diag_edge)
644 {
645  int x, y;
646  pixel *dst = (pixel *)_dst;
647  pixel *src = (pixel *)_src;
648  int chroma = !!c_idx;
649  int *sao_offset_val = sao->offset_val[c_idx];
650  int sao_eo_class = sao->eo_class[c_idx];
651  int init_x = 0, init_y = 0, width = _width, height = _height;
652 
653  static const int8_t pos[4][2][2] = {
654  { { -1, 0 }, { 1, 0 } }, // horizontal
655  { { 0, -1 }, { 0, 1 } }, // vertical
656  { { -1, -1 }, { 1, 1 } }, // 45 degree
657  { { 1, -1 }, { -1, 1 } }, // 135 degree
658  };
659  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
660 
661 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
662 
663  stride /= sizeof(pixel);
664 
665  init_x = -(8 >> chroma) - 2;
666  width = (8 >> chroma) + 2;
667  if (!borders[3])
668  height -= (4 >> chroma) + 2;
669 
670  dst = dst + (init_y * stride + init_x);
671  src = src + (init_y * stride + init_x);
672  init_y = init_x = 0;
673  if (sao_eo_class != SAO_EO_HORIZ) {
674  if (borders[1]) {
675  int offset_val = sao_offset_val[0];
676  for (x = init_x; x < width; x++)
677  dst[x] = av_clip_pixel(src[x] + offset_val);
678  init_y = 1;
679  }
680  if (borders[3]) {
681  int offset_val = sao_offset_val[0];
682  int y_stride = stride * (height - 1);
683  for (x = init_x; x < width; x++)
684  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
685  height--;
686  }
687  }
688  {
689  int y_stride = init_y * stride;
690  int pos_0_0 = pos[sao_eo_class][0][0];
691  int pos_0_1 = pos[sao_eo_class][0][1];
692  int pos_1_0 = pos[sao_eo_class][1][0];
693  int pos_1_1 = pos[sao_eo_class][1][1];
694 
695  int y_stride_0_1 = (init_y + pos_0_1) * stride;
696  int y_stride_1_1 = (init_y + pos_1_1) * stride;
697  for (y = init_y; y < height; y++) {
698  for (x = init_x; x < width; x++) {
699  int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
700  int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
701  int offset_val = edge_idx[2 + diff0 + diff1];
702  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
703  }
704  y_stride += stride;
705  y_stride_0_1 += stride;
706  y_stride_1_1 += stride;
707  }
708  }
709 
710  {
711  // Restore pixels that can't be modified
712  int save_upper_right = !diag_edge && sao_eo_class == SAO_EO_45D && !borders[1];
713  if(vert_edge && sao_eo_class != SAO_EO_VERT)
714  for(y = init_y+save_upper_right; y< height; y++)
715  dst[y*stride+width-1] = src[y*stride+width-1];
716  if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
717  for(x = init_x; x<width-save_upper_right; x++)
718  dst[x] = src[x];
719  if(diag_edge && sao_eo_class == SAO_EO_45D)
720  dst[width-1] = src[width-1];
721  }
722 #undef CMP
723 }
724 
725 static void FUNC(sao_edge_filter_3)(uint8_t *_dst, uint8_t *_src,
726  ptrdiff_t stride, SAOParams *sao,
727  int *borders, int _width, int _height,
728  int c_idx, uint8_t vert_edge,
729  uint8_t horiz_edge, uint8_t diag_edge)
730 {
731  int x, y;
732  pixel *dst = (pixel *)_dst;
733  pixel *src = (pixel *)_src;
734  int chroma = !!c_idx;
735  int *sao_offset_val = sao->offset_val[c_idx];
736  int sao_eo_class = sao->eo_class[c_idx];
737  int init_x = 0, init_y = 0, width = _width, height = _height;
738 
739  static const int8_t pos[4][2][2] = {
740  { { -1, 0 }, { 1, 0 } }, // horizontal
741  { { 0, -1 }, { 0, 1 } }, // vertical
742  { { -1, -1 }, { 1, 1 } }, // 45 degree
743  { { 1, -1 }, { -1, 1 } }, // 135 degree
744  };
745  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
746 
747 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
748 
749  stride /= sizeof(pixel);
750 
751  init_y = -(4 >> chroma) - 2;
752  init_x = -(8 >> chroma) - 2;
753  width = (8 >> chroma) + 2;
754  height = (4 >> chroma) + 2;
755 
756 
757  dst = dst + (init_y * stride + init_x);
758  src = src + (init_y * stride + init_x);
759  init_y = init_x = 0;
760 
761  {
762  int y_stride = init_y * stride;
763  int pos_0_0 = pos[sao_eo_class][0][0];
764  int pos_0_1 = pos[sao_eo_class][0][1];
765  int pos_1_0 = pos[sao_eo_class][1][0];
766  int pos_1_1 = pos[sao_eo_class][1][1];
767 
768  int y_stride_0_1 = (init_y + pos_0_1) * stride;
769  int y_stride_1_1 = (init_y + pos_1_1) * stride;
770 
771  for (y = init_y; y < height; y++) {
772  for (x = init_x; x < width; x++) {
773  int diff0 = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
774  int diff1 = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
775  int offset_val = edge_idx[2 + diff0 + diff1];
776  dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
777  }
778  y_stride += stride;
779  y_stride_0_1 += stride;
780  y_stride_1_1 += stride;
781  }
782  }
783 
784  {
785  // Restore pixels that can't be modified
786  int save_lower_right = !diag_edge && sao_eo_class == SAO_EO_135D;
787  if(vert_edge && sao_eo_class != SAO_EO_VERT)
788  for(y = init_y; y< height-save_lower_right; y++)
789  dst[y*stride+width-1] = src[y*stride+width-1];
790  if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
791  for(x = init_x; x<width-save_lower_right; x++)
792  dst[(height-1)*stride+x] = src[(height-1)*stride+x];
793  if(diag_edge && sao_eo_class == SAO_EO_135D)
794  dst[stride*(height-1)+width-1] = src[stride*(height-1)+width-1];
795  }
796 #undef CMP
797 }
798 
799 #undef SET
800 #undef SCALE
801 #undef ADD_AND_SCALE
802 #undef TR_4
803 #undef TR_8
804 #undef TR_16
805 #undef TR_32
806 
807 static void FUNC(put_hevc_qpel_pixels)(int16_t *dst, ptrdiff_t dststride,
808  uint8_t *_src, ptrdiff_t _srcstride,
809  int width, int height, int16_t* mcbuffer)
810 {
811  int x, y;
812  pixel *src = (pixel *)_src;
813  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
814 
815  for (y = 0; y < height; y++) {
816  for (x = 0; x < width; x++)
817  dst[x] = src[x] << (14 - BIT_DEPTH);
818  src += srcstride;
819  dst += dststride;
820  }
821 }
822 
823 #define QPEL_FILTER_1(src, stride) \
824  (1 * -src[x - 3 * stride] + \
825  4 * src[x - 2 * stride] - \
826  10 * src[x - stride] + \
827  58 * src[x] + \
828  17 * src[x + stride] - \
829  5 * src[x + 2 * stride] + \
830  1 * src[x + 3 * stride])
831 
832 #define QPEL_FILTER_2(src, stride) \
833  (1 * -src[x - 3 * stride] + \
834  4 * src[x - 2 * stride] - \
835  11 * src[x - stride] + \
836  40 * src[x] + \
837  40 * src[x + stride] - \
838  11 * src[x + 2 * stride] + \
839  4 * src[x + 3 * stride] - \
840  1 * src[x + 4 * stride])
841 
842 #define QPEL_FILTER_3(src, stride) \
843  (1 * src[x - 2 * stride] - \
844  5 * src[x - stride] + \
845  17 * src[x] + \
846  58 * src[x + stride] - \
847  10 * src[x + 2 * stride] + \
848  4 * src[x + 3 * stride] - \
849  1 * src[x + 4 * stride])
850 
851 
852 #define PUT_HEVC_QPEL_H(H) \
853 static void FUNC(put_hevc_qpel_h ## H)(int16_t *dst, ptrdiff_t dststride, \
854  uint8_t *_src, ptrdiff_t _srcstride, \
855  int width, int height, \
856  int16_t* mcbuffer) \
857 { \
858  int x, y; \
859  pixel *src = (pixel*)_src; \
860  ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
861  \
862  for (y = 0; y < height; y++) { \
863  for (x = 0; x < width; x++) \
864  dst[x] = QPEL_FILTER_ ## H(src, 1) >> (BIT_DEPTH - 8); \
865  src += srcstride; \
866  dst += dststride; \
867  } \
868 }
869 
870 #define PUT_HEVC_QPEL_V(V) \
871 static void FUNC(put_hevc_qpel_v ## V)(int16_t *dst, ptrdiff_t dststride, \
872  uint8_t *_src, ptrdiff_t _srcstride, \
873  int width, int height, \
874  int16_t* mcbuffer) \
875 { \
876  int x, y; \
877  pixel *src = (pixel*)_src; \
878  ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
879  \
880  for (y = 0; y < height; y++) { \
881  for (x = 0; x < width; x++) \
882  dst[x] = QPEL_FILTER_ ## V(src, srcstride) >> (BIT_DEPTH - 8); \
883  src += srcstride; \
884  dst += dststride; \
885  } \
886 }
887 
888 #define PUT_HEVC_QPEL_HV(H, V) \
889 static void FUNC(put_hevc_qpel_h ## H ## v ## V)(int16_t *dst, \
890  ptrdiff_t dststride, \
891  uint8_t *_src, \
892  ptrdiff_t _srcstride, \
893  int width, int height, \
894  int16_t* mcbuffer) \
895 { \
896  int x, y; \
897  pixel *src = (pixel*)_src; \
898  ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
899  \
900  int16_t tmp_array[(MAX_PB_SIZE + 7) * MAX_PB_SIZE]; \
901  int16_t *tmp = tmp_array; \
902  \
903  src -= ff_hevc_qpel_extra_before[V] * srcstride; \
904  \
905  for (y = 0; y < height + ff_hevc_qpel_extra[V]; y++) { \
906  for (x = 0; x < width; x++) \
907  tmp[x] = QPEL_FILTER_ ## H(src, 1) >> (BIT_DEPTH - 8); \
908  src += srcstride; \
909  tmp += MAX_PB_SIZE; \
910  } \
911  \
912  tmp = tmp_array + ff_hevc_qpel_extra_before[V] * MAX_PB_SIZE; \
913  \
914  for (y = 0; y < height; y++) { \
915  for (x = 0; x < width; x++) \
916  dst[x] = QPEL_FILTER_ ## V(tmp, MAX_PB_SIZE) >> 6; \
917  tmp += MAX_PB_SIZE; \
918  dst += dststride; \
919  } \
920 }
921 
928 PUT_HEVC_QPEL_HV(1, 1)
929 PUT_HEVC_QPEL_HV(1, 2)
930 PUT_HEVC_QPEL_HV(1, 3)
931 PUT_HEVC_QPEL_HV(2, 1)
932 PUT_HEVC_QPEL_HV(2, 2)
933 PUT_HEVC_QPEL_HV(2, 3)
934 PUT_HEVC_QPEL_HV(3, 1)
935 PUT_HEVC_QPEL_HV(3, 2)
936 PUT_HEVC_QPEL_HV(3, 3)
937 
938 static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride,
939  uint8_t *_src, ptrdiff_t _srcstride,
940  int width, int height, int mx, int my,
941  int16_t* mcbuffer)
942 {
943  int x, y;
944  pixel *src = (pixel *)_src;
945  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
946 
947  for (y = 0; y < height; y++) {
948  for (x = 0; x < width; x++)
949  dst[x] = src[x] << (14 - BIT_DEPTH);
950  src += srcstride;
951  dst += dststride;
952  }
953 }
954 
955 #define EPEL_FILTER(src, stride) \
956  (filter_0 * src[x - stride] + \
957  filter_1 * src[x] + \
958  filter_2 * src[x + stride] + \
959  filter_3 * src[x + 2 * stride])
960 
961 static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
962  uint8_t *_src, ptrdiff_t _srcstride,
963  int width, int height, int mx, int my,
964  int16_t* mcbuffer)
965 {
966  int x, y;
967  pixel *src = (pixel *)_src;
968  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
969  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
970  int8_t filter_0 = filter[0];
971  int8_t filter_1 = filter[1];
972  int8_t filter_2 = filter[2];
973  int8_t filter_3 = filter[3];
974  for (y = 0; y < height; y++) {
975  for (x = 0; x < width; x++)
976  dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
977  src += srcstride;
978  dst += dststride;
979  }
980 }
981 
982 static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
983  uint8_t *_src, ptrdiff_t _srcstride,
984  int width, int height, int mx, int my,
985  int16_t* mcbuffer)
986 {
987  int x, y;
988  pixel *src = (pixel *)_src;
989  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
990  const int8_t *filter = ff_hevc_epel_filters[my - 1];
991  int8_t filter_0 = filter[0];
992  int8_t filter_1 = filter[1];
993  int8_t filter_2 = filter[2];
994  int8_t filter_3 = filter[3];
995 
996  for (y = 0; y < height; y++) {
997  for (x = 0; x < width; x++)
998  dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
999  src += srcstride;
1000  dst += dststride;
1001  }
1002 }
1003 
1004 static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
1005  uint8_t *_src, ptrdiff_t _srcstride,
1006  int width, int height, int mx, int my,
1007  int16_t* mcbuffer)
1008 {
1009  int x, y;
1010  pixel *src = (pixel *)_src;
1011  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1012  const int8_t *filter_h = ff_hevc_epel_filters[mx - 1];
1013  const int8_t *filter_v = ff_hevc_epel_filters[my - 1];
1014  int8_t filter_0 = filter_h[0];
1015  int8_t filter_1 = filter_h[1];
1016  int8_t filter_2 = filter_h[2];
1017  int8_t filter_3 = filter_h[3];
1018  int16_t tmp_array[(MAX_PB_SIZE + 3) * MAX_PB_SIZE];
1019  int16_t *tmp = tmp_array;
1020 
1021  src -= EPEL_EXTRA_BEFORE * srcstride;
1022 
1023  for (y = 0; y < height + EPEL_EXTRA; y++) {
1024  for (x = 0; x < width; x++)
1025  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1026  src += srcstride;
1027  tmp += MAX_PB_SIZE;
1028  }
1029 
1030  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1031  filter_0 = filter_v[0];
1032  filter_1 = filter_v[1];
1033  filter_2 = filter_v[2];
1034  filter_3 = filter_v[3];
1035  for (y = 0; y < height; y++) {
1036  for (x = 0; x < width; x++)
1037  dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
1038  tmp += MAX_PB_SIZE;
1039  dst += dststride;
1040  }
1041 }
1042 
1043 static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
1044  int16_t *src, ptrdiff_t srcstride,
1045  int width, int height)
1046 {
1047  int x, y;
1048  pixel *dst = (pixel *)_dst;
1049  ptrdiff_t dststride = _dststride / sizeof(pixel);
1050 
1051  int shift = 14 - BIT_DEPTH;
1052 #if BIT_DEPTH < 14
1053  int offset = 1 << (shift - 1);
1054 #else
1055  int offset = 0;
1056 #endif
1057  for (y = 0; y < height; y++) {
1058  for (x = 0; x < width; x++)
1059  dst[x] = av_clip_pixel((src[x] + offset) >> shift);
1060  dst += dststride;
1061  src += srcstride;
1062  }
1063 }
1064 
1065 static void FUNC(put_weighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride,
1066  int16_t *src1, int16_t *src2,
1067  ptrdiff_t srcstride,
1068  int width, int height)
1069 {
1070  int x, y;
1071  pixel *dst = (pixel *)_dst;
1072  ptrdiff_t dststride = _dststride / sizeof(pixel);
1073 
1074  int shift = 14 + 1 - BIT_DEPTH;
1075 #if BIT_DEPTH < 14
1076  int offset = 1 << (shift - 1);
1077 #else
1078  int offset = 0;
1079 #endif
1080 
1081  for (y = 0; y < height; y++) {
1082  for (x = 0; x < width; x++)
1083  dst[x] = av_clip_pixel((src1[x] + src2[x] + offset) >> shift);
1084  dst += dststride;
1085  src1 += srcstride;
1086  src2 += srcstride;
1087  }
1088 }
1089 
1090 static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
1091  uint8_t *_dst, ptrdiff_t _dststride,
1092  int16_t *src, ptrdiff_t srcstride,
1093  int width, int height)
1094 {
1095  int shift, log2Wd, wx, ox, x, y, offset;
1096  pixel *dst = (pixel *)_dst;
1097  ptrdiff_t dststride = _dststride / sizeof(pixel);
1098 
1099  shift = 14 - BIT_DEPTH;
1100  log2Wd = denom + shift;
1101  offset = 1 << (log2Wd - 1);
1102  wx = wlxFlag;
1103  ox = olxFlag * (1 << (BIT_DEPTH - 8));
1104 
1105  for (y = 0; y < height; y++) {
1106  for (x = 0; x < width; x++) {
1107  if (log2Wd >= 1) {
1108  dst[x] = av_clip_pixel(((src[x] * wx + offset) >> log2Wd) + ox);
1109  } else {
1110  dst[x] = av_clip_pixel(src[x] * wx + ox);
1111  }
1112  }
1113  dst += dststride;
1114  src += srcstride;
1115  }
1116 }
1117 
1118 static void FUNC(weighted_pred_avg)(uint8_t denom,
1119  int16_t wl0Flag, int16_t wl1Flag,
1120  int16_t ol0Flag, int16_t ol1Flag,
1121  uint8_t *_dst, ptrdiff_t _dststride,
1122  int16_t *src1, int16_t *src2,
1123  ptrdiff_t srcstride,
1124  int width, int height)
1125 {
1126  int shift, log2Wd, w0, w1, o0, o1, x, y;
1127  pixel *dst = (pixel *)_dst;
1128  ptrdiff_t dststride = _dststride / sizeof(pixel);
1129 
1130  shift = 14 - BIT_DEPTH;
1131  log2Wd = denom + shift;
1132  w0 = wl0Flag;
1133  w1 = wl1Flag;
1134  o0 = ol0Flag * (1 << (BIT_DEPTH - 8));
1135  o1 = ol1Flag * (1 << (BIT_DEPTH - 8));
1136 
1137  for (y = 0; y < height; y++) {
1138  for (x = 0; x < width; x++)
1139  dst[x] = av_clip_pixel((src1[x] * w0 + src2[x] * w1 +
1140  ((o0 + o1 + 1) << log2Wd)) >> (log2Wd + 1));
1141  dst += dststride;
1142  src1 += srcstride;
1143  src2 += srcstride;
1144  }
1145 }
1146 
1147 // line zero
1148 #define P3 pix[-4 * xstride]
1149 #define P2 pix[-3 * xstride]
1150 #define P1 pix[-2 * xstride]
1151 #define P0 pix[-1 * xstride]
1152 #define Q0 pix[0 * xstride]
1153 #define Q1 pix[1 * xstride]
1154 #define Q2 pix[2 * xstride]
1155 #define Q3 pix[3 * xstride]
1156 
1157 // line three. used only for deblocking decision
1158 #define TP3 pix[-4 * xstride + 3 * ystride]
1159 #define TP2 pix[-3 * xstride + 3 * ystride]
1160 #define TP1 pix[-2 * xstride + 3 * ystride]
1161 #define TP0 pix[-1 * xstride + 3 * ystride]
1162 #define TQ0 pix[0 * xstride + 3 * ystride]
1163 #define TQ1 pix[1 * xstride + 3 * ystride]
1164 #define TQ2 pix[2 * xstride + 3 * ystride]
1165 #define TQ3 pix[3 * xstride + 3 * ystride]
1166 
1168  ptrdiff_t _xstride, ptrdiff_t _ystride,
1169  int *_beta, int *_tc,
1170  uint8_t *_no_p, uint8_t *_no_q)
1171 {
1172  int d, j;
1173  pixel *pix = (pixel *)_pix;
1174  ptrdiff_t xstride = _xstride / sizeof(pixel);
1175  ptrdiff_t ystride = _ystride / sizeof(pixel);
1176 
1177  for (j = 0; j < 2; j++) {
1178  const int dp0 = abs(P2 - 2 * P1 + P0);
1179  const int dq0 = abs(Q2 - 2 * Q1 + Q0);
1180  const int dp3 = abs(TP2 - 2 * TP1 + TP0);
1181  const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
1182  const int d0 = dp0 + dq0;
1183  const int d3 = dp3 + dq3;
1184  const int beta = _beta[j] << (BIT_DEPTH - 8);
1185  const int tc = _tc[j] << (BIT_DEPTH - 8);
1186  const int no_p = _no_p[j];
1187  const int no_q = _no_q[j];
1188 
1189  if (d0 + d3 >= beta) {
1190  pix += 4 * ystride;
1191  continue;
1192  } else {
1193  const int beta_3 = beta >> 3;
1194  const int beta_2 = beta >> 2;
1195  const int tc25 = ((tc * 5 + 1) >> 1);
1196 
1197  if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
1198  abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
1199  (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
1200  // strong filtering
1201  const int tc2 = tc << 1;
1202  for (d = 0; d < 4; d++) {
1203  const int p3 = P3;
1204  const int p2 = P2;
1205  const int p1 = P1;
1206  const int p0 = P0;
1207  const int q0 = Q0;
1208  const int q1 = Q1;
1209  const int q2 = Q2;
1210  const int q3 = Q3;
1211  if (!no_p) {
1212  P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
1213  P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
1214  P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
1215  }
1216  if (!no_q) {
1217  Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
1218  Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
1219  Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
1220  }
1221  pix += ystride;
1222  }
1223  } else { // normal filtering
1224  int nd_p = 1;
1225  int nd_q = 1;
1226  const int tc_2 = tc >> 1;
1227  if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
1228  nd_p = 2;
1229  if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
1230  nd_q = 2;
1231 
1232  for (d = 0; d < 4; d++) {
1233  const int p2 = P2;
1234  const int p1 = P1;
1235  const int p0 = P0;
1236  const int q0 = Q0;
1237  const int q1 = Q1;
1238  const int q2 = Q2;
1239  int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
1240  if (abs(delta0) < 10 * tc) {
1241  delta0 = av_clip(delta0, -tc, tc);
1242  if (!no_p)
1243  P0 = av_clip_pixel(p0 + delta0);
1244  if (!no_q)
1245  Q0 = av_clip_pixel(q0 - delta0);
1246  if (!no_p && nd_p > 1) {
1247  const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
1248  P1 = av_clip_pixel(p1 + deltap1);
1249  }
1250  if (!no_q && nd_q > 1) {
1251  const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
1252  Q1 = av_clip_pixel(q1 + deltaq1);
1253  }
1254  }
1255  pix += ystride;
1256  }
1257  }
1258  }
1259  }
1260 }
1261 
1262 static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
1263  ptrdiff_t _ystride, int *_tc,
1264  uint8_t *_no_p, uint8_t *_no_q)
1265 {
1266  int d, j, no_p, no_q;
1267  pixel *pix = (pixel *)_pix;
1268  ptrdiff_t xstride = _xstride / sizeof(pixel);
1269  ptrdiff_t ystride = _ystride / sizeof(pixel);
1270 
1271  for (j = 0; j < 2; j++) {
1272  const int tc = _tc[j] << (BIT_DEPTH - 8);
1273  if (tc <= 0) {
1274  pix += 4 * ystride;
1275  continue;
1276  }
1277  no_p = _no_p[j];
1278  no_q = _no_q[j];
1279 
1280  for (d = 0; d < 4; d++) {
1281  int delta0;
1282  const int p1 = P1;
1283  const int p0 = P0;
1284  const int q0 = Q0;
1285  const int q1 = Q1;
1286  delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
1287  if (!no_p)
1288  P0 = av_clip_pixel(p0 + delta0);
1289  if (!no_q)
1290  Q0 = av_clip_pixel(q0 - delta0);
1291  pix += ystride;
1292  }
1293  }
1294 }
1295 
1296 static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1297  int *tc, uint8_t *no_p,
1298  uint8_t *no_q)
1299 {
1300  FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
1301 }
1302 
1303 static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1304  int *tc, uint8_t *no_p,
1305  uint8_t *no_q)
1306 {
1307  FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
1308 }
1309 
1310 static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1311  int *beta, int *tc, uint8_t *no_p,
1312  uint8_t *no_q)
1313 {
1314  FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
1315  beta, tc, no_p, no_q);
1316 }
1317 
1318 static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1319  int *beta, int *tc, uint8_t *no_p,
1320  uint8_t *no_q)
1321 {
1322  FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
1323  beta, tc, no_p, no_q);
1324 }
1325 
1326 #undef P3
1327 #undef P2
1328 #undef P1
1329 #undef P0
1330 #undef Q0
1331 #undef Q1
1332 #undef Q2
1333 #undef Q3
1334 
1335 #undef TP3
1336 #undef TP2
1337 #undef TP1
1338 #undef TP0
1339 #undef TQ0
1340 #undef TQ1
1341 #undef TQ2
1342 #undef TQ3