FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hevcdsp_template.c
Go to the documentation of this file.
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "get_bits.h"
24 #include "hevc.h"
25 
26 #include "bit_depth_template.c"
27 #include "hevcdsp.h"
28 
29 
30 static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
31  GetBitContext *gb, int pcm_bit_depth)
32 {
33  int x, y;
34  pixel *dst = (pixel *)_dst;
35 
36  stride /= sizeof(pixel);
37 
38  for (y = 0; y < height; y++) {
39  for (x = 0; x < width; x++)
40  dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
41  dst += stride;
42  }
43 }
44 
45 static av_always_inline void FUNC(transquant_bypass)(uint8_t *_dst, int16_t *coeffs,
46  ptrdiff_t stride, int size)
47 {
48  int x, y;
49  pixel *dst = (pixel *)_dst;
50 
51  stride /= sizeof(pixel);
52 
53  for (y = 0; y < size; y++) {
54  for (x = 0; x < size; x++) {
55  dst[x] = av_clip_pixel(dst[x] + *coeffs);
56  coeffs++;
57  }
58  dst += stride;
59  }
60 }
61 
62 static void FUNC(transform_add4x4)(uint8_t *_dst, int16_t *coeffs,
63  ptrdiff_t stride)
64 {
65  FUNC(transquant_bypass)(_dst, coeffs, stride, 4);
66 }
67 
68 static void FUNC(transform_add8x8)(uint8_t *_dst, int16_t *coeffs,
69  ptrdiff_t stride)
70 {
71  FUNC(transquant_bypass)(_dst, coeffs, stride, 8);
72 }
73 
74 static void FUNC(transform_add16x16)(uint8_t *_dst, int16_t *coeffs,
75  ptrdiff_t stride)
76 {
77  FUNC(transquant_bypass)(_dst, coeffs, stride, 16);
78 }
79 
80 static void FUNC(transform_add32x32)(uint8_t *_dst, int16_t *coeffs,
81  ptrdiff_t stride)
82 {
83  FUNC(transquant_bypass)(_dst, coeffs, stride, 32);
84 }
85 
86 
87 static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
88 {
89  int16_t *coeffs = (int16_t *) _coeffs;
90  int x, y;
91  int size = 1 << log2_size;
92 
93  if (mode) {
94  coeffs += size;
95  for (y = 0; y < size - 1; y++) {
96  for (x = 0; x < size; x++)
97  coeffs[x] += coeffs[x - size];
98  coeffs += size;
99  }
100  } else {
101  for (y = 0; y < size; y++) {
102  for (x = 1; x < size; x++)
103  coeffs[x] += coeffs[x - 1];
104  coeffs += size;
105  }
106  }
107 }
108 
109 static void FUNC(transform_skip)(int16_t *_coeffs, int16_t log2_size)
110 {
111  int shift = 15 - BIT_DEPTH - log2_size;
112  int x, y;
113  int size = 1 << log2_size;
114  int16_t *coeffs = _coeffs;
115 
116 
117  if (shift > 0) {
118  int offset = 1 << (shift - 1);
119  for (y = 0; y < size; y++) {
120  for (x = 0; x < size; x++) {
121  *coeffs = (*coeffs + offset) >> shift;
122  coeffs++;
123  }
124  }
125  } else {
126  for (y = 0; y < size; y++) {
127  for (x = 0; x < size; x++) {
128  *coeffs = *coeffs << -shift;
129  coeffs++;
130  }
131  }
132  }
133 }
134 
135 #define SET(dst, x) (dst) = (x)
136 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
137 #define ADD_AND_SCALE(dst, x) \
138  (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
139 
140 #define TR_4x4_LUMA(dst, src, step, assign) \
141  do { \
142  int c0 = src[0 * step] + src[2 * step]; \
143  int c1 = src[2 * step] + src[3 * step]; \
144  int c2 = src[0 * step] - src[3 * step]; \
145  int c3 = 74 * src[1 * step]; \
146  \
147  assign(dst[2 * step], 74 * (src[0 * step] - \
148  src[2 * step] + \
149  src[3 * step])); \
150  assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
151  assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
152  assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
153  } while (0)
154 
155 static void FUNC(transform_4x4_luma)(int16_t *coeffs)
156 {
157  int i;
158  int shift = 7;
159  int add = 1 << (shift - 1);
160  int16_t *src = coeffs;
161 
162  for (i = 0; i < 4; i++) {
163  TR_4x4_LUMA(src, src, 4, SCALE);
164  src++;
165  }
166 
167  shift = 20 - BIT_DEPTH;
168  add = 1 << (shift - 1);
169  for (i = 0; i < 4; i++) {
170  TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
171  coeffs += 4;
172  }
173 }
174 
175 #undef TR_4x4_LUMA
176 
177 #define TR_4(dst, src, dstep, sstep, assign, end) \
178  do { \
179  const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
180  const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
181  const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
182  const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
183  \
184  assign(dst[0 * dstep], e0 + o0); \
185  assign(dst[1 * dstep], e1 + o1); \
186  assign(dst[2 * dstep], e1 - o1); \
187  assign(dst[3 * dstep], e0 - o0); \
188  } while (0)
189 
190 #define TR_8(dst, src, dstep, sstep, assign, end) \
191  do { \
192  int i, j; \
193  int e_8[4]; \
194  int o_8[4] = { 0 }; \
195  for (i = 0; i < 4; i++) \
196  for (j = 1; j < end; j += 2) \
197  o_8[i] += transform[4 * j][i] * src[j * sstep]; \
198  TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
199  \
200  for (i = 0; i < 4; i++) { \
201  assign(dst[i * dstep], e_8[i] + o_8[i]); \
202  assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
203  } \
204  } while (0)
205 
206 #define TR_16(dst, src, dstep, sstep, assign, end) \
207  do { \
208  int i, j; \
209  int e_16[8]; \
210  int o_16[8] = { 0 }; \
211  for (i = 0; i < 8; i++) \
212  for (j = 1; j < end; j += 2) \
213  o_16[i] += transform[2 * j][i] * src[j * sstep]; \
214  TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
215  \
216  for (i = 0; i < 8; i++) { \
217  assign(dst[i * dstep], e_16[i] + o_16[i]); \
218  assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
219  } \
220  } while (0)
221 
222 #define TR_32(dst, src, dstep, sstep, assign, end) \
223  do { \
224  int i, j; \
225  int e_32[16]; \
226  int o_32[16] = { 0 }; \
227  for (i = 0; i < 16; i++) \
228  for (j = 1; j < end; j += 2) \
229  o_32[i] += transform[j][i] * src[j * sstep]; \
230  TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \
231  \
232  for (i = 0; i < 16; i++) { \
233  assign(dst[i * dstep], e_32[i] + o_32[i]); \
234  assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
235  } \
236  } while (0)
237 
238 #define IDCT_VAR4(H) \
239  int limit2 = FFMIN(col_limit + 4, H)
240 #define IDCT_VAR8(H) \
241  int limit = FFMIN(col_limit, H); \
242  int limit2 = FFMIN(col_limit + 4, H)
243 #define IDCT_VAR16(H) IDCT_VAR8(H)
244 #define IDCT_VAR32(H) IDCT_VAR8(H)
245 
246 #define IDCT(H) \
247 static void FUNC(idct_##H ##x ##H )( \
248  int16_t *coeffs, int col_limit) { \
249  int i; \
250  int shift = 7; \
251  int add = 1 << (shift - 1); \
252  int16_t *src = coeffs; \
253  IDCT_VAR ##H(H); \
254  \
255  for (i = 0; i < H; i++) { \
256  TR_ ## H(src, src, H, H, SCALE, limit2); \
257  if (limit2 < H && i%4 == 0 && !!i) \
258  limit2 -= 4; \
259  src++; \
260  } \
261  \
262  shift = 20 - BIT_DEPTH; \
263  add = 1 << (shift - 1); \
264  for (i = 0; i < H; i++) { \
265  TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
266  coeffs += H; \
267  } \
268 }
269 
270 #define IDCT_DC(H) \
271 static void FUNC(idct_##H ##x ##H ##_dc)( \
272  int16_t *coeffs) { \
273  int i, j; \
274  int shift = 14 - BIT_DEPTH; \
275  int add = 1 << (shift - 1); \
276  int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
277  \
278  for (j = 0; j < H; j++) { \
279  for (i = 0; i < H; i++) { \
280  coeffs[i+j*H] = coeff; \
281  } \
282  } \
283 }
284 
285 IDCT( 4)
286 IDCT( 8)
287 IDCT(16)
288 IDCT(32)
289 
290 IDCT_DC( 4)
291 IDCT_DC( 8)
292 IDCT_DC(16)
293 IDCT_DC(32)
294 
295 #undef TR_4
296 #undef TR_8
297 #undef TR_16
298 #undef TR_32
299 
300 #undef SET
301 #undef SCALE
302 #undef ADD_AND_SCALE
303 
304 static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src,
305  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
306  int *borders, int width, int height,
307  int c_idx)
308 {
309  pixel *dst = (pixel *)_dst;
310  pixel *src = (pixel *)_src;
311  int offset_table[32] = { 0 };
312  int k, y, x;
313  int shift = BIT_DEPTH - 5;
314  int16_t *sao_offset_val = sao->offset_val[c_idx];
315  int sao_left_class = sao->band_position[c_idx];
316 
317  stride_dst /= sizeof(pixel);
318  stride_src /= sizeof(pixel);
319 
320  for (k = 0; k < 4; k++)
321  offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
322  for (y = 0; y < height; y++) {
323  for (x = 0; x < width; x++)
324  dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
325  dst += stride_dst;
326  src += stride_src;
327  }
328 }
329 
330 #define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
331 
332 static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src,
333  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
334  int width, int height,
335  int c_idx, int init_x, int init_y) {
336 
337  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
338  static const int8_t pos[4][2][2] = {
339  { { -1, 0 }, { 1, 0 } }, // horizontal
340  { { 0, -1 }, { 0, 1 } }, // vertical
341  { { -1, -1 }, { 1, 1 } }, // 45 degree
342  { { 1, -1 }, { -1, 1 } }, // 135 degree
343  };
344  int16_t *sao_offset_val = sao->offset_val[c_idx];
345  int sao_eo_class = sao->eo_class[c_idx];
346  pixel *dst = (pixel *)_dst;
347  pixel *src = (pixel *)_src;
348 
349  int y_stride_src = init_y * stride_src;
350  int y_stride_dst = init_y * stride_dst;
351  int pos_0_0 = pos[sao_eo_class][0][0];
352  int pos_0_1 = pos[sao_eo_class][0][1];
353  int pos_1_0 = pos[sao_eo_class][1][0];
354  int pos_1_1 = pos[sao_eo_class][1][1];
355  int x, y;
356 
357  int y_stride_0_1 = (init_y + pos_0_1) * stride_src;
358  int y_stride_1_1 = (init_y + pos_1_1) * stride_src;
359  for (y = init_y; y < height; y++) {
360  for (x = init_x; x < width; x++) {
361  int diff0 = CMP(src[x + y_stride_src], src[x + pos_0_0 + y_stride_0_1]);
362  int diff1 = CMP(src[x + y_stride_src], src[x + pos_1_0 + y_stride_1_1]);
363  int offset_val = edge_idx[2 + diff0 + diff1];
364  dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + sao_offset_val[offset_val]);
365  }
366  y_stride_src += stride_src;
367  y_stride_dst += stride_dst;
368  y_stride_0_1 += stride_src;
369  y_stride_1_1 += stride_src;
370  }
371 }
372 
373 static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src,
374  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
375  int *borders, int _width, int _height,
376  int c_idx, uint8_t *vert_edge,
377  uint8_t *horiz_edge, uint8_t *diag_edge)
378 {
379  int x, y;
380  pixel *dst = (pixel *)_dst;
381  pixel *src = (pixel *)_src;
382  int16_t *sao_offset_val = sao->offset_val[c_idx];
383  int sao_eo_class = sao->eo_class[c_idx];
384  int init_x = 0, init_y = 0, width = _width, height = _height;
385 
386  stride_dst /= sizeof(pixel);
387  stride_src /= sizeof(pixel);
388 
389  if (sao_eo_class != SAO_EO_VERT) {
390  if (borders[0]) {
391  int offset_val = sao_offset_val[0];
392  for (y = 0; y < height; y++) {
393  dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
394  }
395  init_x = 1;
396  }
397  if (borders[2]) {
398  int offset_val = sao_offset_val[0];
399  int offset = width - 1;
400  for (x = 0; x < height; x++) {
401  dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
402  }
403  width--;
404  }
405  }
406  if (sao_eo_class != SAO_EO_HORIZ) {
407  if (borders[1]) {
408  int offset_val = sao_offset_val[0];
409  for (x = init_x; x < width; x++)
410  dst[x] = av_clip_pixel(src[x] + offset_val);
411  init_y = 1;
412  }
413  if (borders[3]) {
414  int offset_val = sao_offset_val[0];
415  int y_stride_dst = stride_dst * (height - 1);
416  int y_stride_src = stride_src * (height - 1);
417  for (x = init_x; x < width; x++)
418  dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
419  height--;
420  }
421  }
422 
423  FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y);
424 }
425 
426 static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
427  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
428  int *borders, int _width, int _height,
429  int c_idx, uint8_t *vert_edge,
430  uint8_t *horiz_edge, uint8_t *diag_edge)
431 {
432  int x, y;
433  pixel *dst = (pixel *)_dst;
434  pixel *src = (pixel *)_src;
435  int16_t *sao_offset_val = sao->offset_val[c_idx];
436  int sao_eo_class = sao->eo_class[c_idx];
437  int init_x = 0, init_y = 0, width = _width, height = _height;
438 
439  stride_dst /= sizeof(pixel);
440  stride_src /= sizeof(pixel);
441 
442  if (sao_eo_class != SAO_EO_VERT) {
443  if (borders[0]) {
444  int offset_val = sao_offset_val[0];
445  for (y = 0; y < height; y++) {
446  dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
447  }
448  init_x = 1;
449  }
450  if (borders[2]) {
451  int offset_val = sao_offset_val[0];
452  int offset = width - 1;
453  for (x = 0; x < height; x++) {
454  dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
455  }
456  width--;
457  }
458  }
459  if (sao_eo_class != SAO_EO_HORIZ) {
460  if (borders[1]) {
461  int offset_val = sao_offset_val[0];
462  for (x = init_x; x < width; x++)
463  dst[x] = av_clip_pixel(src[x] + offset_val);
464  init_y = 1;
465  }
466  if (borders[3]) {
467  int offset_val = sao_offset_val[0];
468  int y_stride_dst = stride_dst * (height - 1);
469  int y_stride_src = stride_src * (height - 1);
470  for (x = init_x; x < width; x++)
471  dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
472  height--;
473  }
474  }
475 
476  FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y);
477 
478  {
479  int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
480  int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D && !borders[1] && !borders[2];
481  int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
482  int save_lower_left = !diag_edge[3] && sao_eo_class == SAO_EO_45D && !borders[0] && !borders[3];
483 
484  // Restore pixels that can't be modified
485  if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
486  for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
487  dst[y*stride_dst] = src[y*stride_src];
488  }
489  if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
490  for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
491  dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
492  }
493 
494  if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
495  for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
496  dst[x] = src[x];
497  }
498  if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
499  for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
500  dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
501  }
502  if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
503  dst[0] = src[0];
504  if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
505  dst[width-1] = src[width-1];
506  if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
507  dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
508  if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
509  dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
510 
511  }
512 }
513 
514 #undef CMP
515 
516 ////////////////////////////////////////////////////////////////////////////////
517 //
518 ////////////////////////////////////////////////////////////////////////////////
519 static void FUNC(put_hevc_pel_pixels)(int16_t *dst,
520  uint8_t *_src, ptrdiff_t _srcstride,
521  int height, intptr_t mx, intptr_t my, int width)
522 {
523  int x, y;
524  pixel *src = (pixel *)_src;
525  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
526 
527  for (y = 0; y < height; y++) {
528  for (x = 0; x < width; x++)
529  dst[x] = src[x] << (14 - BIT_DEPTH);
530  src += srcstride;
531  dst += MAX_PB_SIZE;
532  }
533 }
534 
535 static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
536  int height, intptr_t mx, intptr_t my, int width)
537 {
538  int y;
539  pixel *src = (pixel *)_src;
540  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
541  pixel *dst = (pixel *)_dst;
542  ptrdiff_t dststride = _dststride / sizeof(pixel);
543 
544  for (y = 0; y < height; y++) {
545  memcpy(dst, src, width * sizeof(pixel));
546  src += srcstride;
547  dst += dststride;
548  }
549 }
550 
551 static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
552  int16_t *src2,
553  int height, intptr_t mx, intptr_t my, int width)
554 {
555  int x, y;
556  pixel *src = (pixel *)_src;
557  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
558  pixel *dst = (pixel *)_dst;
559  ptrdiff_t dststride = _dststride / sizeof(pixel);
560 
561  int shift = 14 + 1 - BIT_DEPTH;
562 #if BIT_DEPTH < 14
563  int offset = 1 << (shift - 1);
564 #else
565  int offset = 0;
566 #endif
567 
568  for (y = 0; y < height; y++) {
569  for (x = 0; x < width; x++)
570  dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
571  src += srcstride;
572  dst += dststride;
573  src2 += MAX_PB_SIZE;
574  }
575 }
576 
577 static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
578  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
579 {
580  int x, y;
581  pixel *src = (pixel *)_src;
582  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
583  pixel *dst = (pixel *)_dst;
584  ptrdiff_t dststride = _dststride / sizeof(pixel);
585  int shift = denom + 14 - BIT_DEPTH;
586 #if BIT_DEPTH < 14
587  int offset = 1 << (shift - 1);
588 #else
589  int offset = 0;
590 #endif
591 
592  ox = ox * (1 << (BIT_DEPTH - 8));
593  for (y = 0; y < height; y++) {
594  for (x = 0; x < width; x++)
595  dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
596  src += srcstride;
597  dst += dststride;
598  }
599 }
600 
601 static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
602  int16_t *src2,
603  int height, int denom, int wx0, int wx1,
604  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
605 {
606  int x, y;
607  pixel *src = (pixel *)_src;
608  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
609  pixel *dst = (pixel *)_dst;
610  ptrdiff_t dststride = _dststride / sizeof(pixel);
611 
612  int shift = 14 + 1 - BIT_DEPTH;
613  int log2Wd = denom + shift - 1;
614 
615  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
616  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
617  for (y = 0; y < height; y++) {
618  for (x = 0; x < width; x++) {
619  dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
620  }
621  src += srcstride;
622  dst += dststride;
623  src2 += MAX_PB_SIZE;
624  }
625 }
626 
627 ////////////////////////////////////////////////////////////////////////////////
628 //
629 ////////////////////////////////////////////////////////////////////////////////
630 #define QPEL_FILTER(src, stride) \
631  (filter[0] * src[x - 3 * stride] + \
632  filter[1] * src[x - 2 * stride] + \
633  filter[2] * src[x - stride] + \
634  filter[3] * src[x ] + \
635  filter[4] * src[x + stride] + \
636  filter[5] * src[x + 2 * stride] + \
637  filter[6] * src[x + 3 * stride] + \
638  filter[7] * src[x + 4 * stride])
639 
640 static void FUNC(put_hevc_qpel_h)(int16_t *dst,
641  uint8_t *_src, ptrdiff_t _srcstride,
642  int height, intptr_t mx, intptr_t my, int width)
643 {
644  int x, y;
645  pixel *src = (pixel*)_src;
646  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
647  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
648  for (y = 0; y < height; y++) {
649  for (x = 0; x < width; x++)
650  dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
651  src += srcstride;
652  dst += MAX_PB_SIZE;
653  }
654 }
655 
656 static void FUNC(put_hevc_qpel_v)(int16_t *dst,
657  uint8_t *_src, ptrdiff_t _srcstride,
658  int height, intptr_t mx, intptr_t my, int width)
659 {
660  int x, y;
661  pixel *src = (pixel*)_src;
662  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
663  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
664  for (y = 0; y < height; y++) {
665  for (x = 0; x < width; x++)
666  dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
667  src += srcstride;
668  dst += MAX_PB_SIZE;
669  }
670 }
671 
672 static void FUNC(put_hevc_qpel_hv)(int16_t *dst,
673  uint8_t *_src,
674  ptrdiff_t _srcstride,
675  int height, intptr_t mx,
676  intptr_t my, int width)
677 {
678  int x, y;
679  const int8_t *filter;
680  pixel *src = (pixel*)_src;
681  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
682  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
683  int16_t *tmp = tmp_array;
684 
685  src -= QPEL_EXTRA_BEFORE * srcstride;
686  filter = ff_hevc_qpel_filters[mx - 1];
687  for (y = 0; y < height + QPEL_EXTRA; y++) {
688  for (x = 0; x < width; x++)
689  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
690  src += srcstride;
691  tmp += MAX_PB_SIZE;
692  }
693 
694  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
695  filter = ff_hevc_qpel_filters[my - 1];
696  for (y = 0; y < height; y++) {
697  for (x = 0; x < width; x++)
698  dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
699  tmp += MAX_PB_SIZE;
700  dst += MAX_PB_SIZE;
701  }
702 }
703 
704 static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride,
705  uint8_t *_src, ptrdiff_t _srcstride,
706  int height, intptr_t mx, intptr_t my, int width)
707 {
708  int x, y;
709  pixel *src = (pixel*)_src;
710  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
711  pixel *dst = (pixel *)_dst;
712  ptrdiff_t dststride = _dststride / sizeof(pixel);
713  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
714  int shift = 14 - BIT_DEPTH;
715 
716 #if BIT_DEPTH < 14
717  int offset = 1 << (shift - 1);
718 #else
719  int offset = 0;
720 #endif
721 
722  for (y = 0; y < height; y++) {
723  for (x = 0; x < width; x++)
724  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
725  src += srcstride;
726  dst += dststride;
727  }
728 }
729 
730 static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
731  int16_t *src2,
732  int height, intptr_t mx, intptr_t my, int width)
733 {
734  int x, y;
735  pixel *src = (pixel*)_src;
736  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
737  pixel *dst = (pixel *)_dst;
738  ptrdiff_t dststride = _dststride / sizeof(pixel);
739 
740  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
741 
742  int shift = 14 + 1 - BIT_DEPTH;
743 #if BIT_DEPTH < 14
744  int offset = 1 << (shift - 1);
745 #else
746  int offset = 0;
747 #endif
748 
749  for (y = 0; y < height; y++) {
750  for (x = 0; x < width; x++)
751  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
752  src += srcstride;
753  dst += dststride;
754  src2 += MAX_PB_SIZE;
755  }
756 }
757 
758 static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride,
759  uint8_t *_src, ptrdiff_t _srcstride,
760  int height, intptr_t mx, intptr_t my, int width)
761 {
762  int x, y;
763  pixel *src = (pixel*)_src;
764  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
765  pixel *dst = (pixel *)_dst;
766  ptrdiff_t dststride = _dststride / sizeof(pixel);
767  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
768  int shift = 14 - BIT_DEPTH;
769 
770 #if BIT_DEPTH < 14
771  int offset = 1 << (shift - 1);
772 #else
773  int offset = 0;
774 #endif
775 
776  for (y = 0; y < height; y++) {
777  for (x = 0; x < width; x++)
778  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
779  src += srcstride;
780  dst += dststride;
781  }
782 }
783 
784 
785 static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
786  int16_t *src2,
787  int height, intptr_t mx, intptr_t my, int width)
788 {
789  int x, y;
790  pixel *src = (pixel*)_src;
791  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
792  pixel *dst = (pixel *)_dst;
793  ptrdiff_t dststride = _dststride / sizeof(pixel);
794 
795  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
796 
797  int shift = 14 + 1 - BIT_DEPTH;
798 #if BIT_DEPTH < 14
799  int offset = 1 << (shift - 1);
800 #else
801  int offset = 0;
802 #endif
803 
804  for (y = 0; y < height; y++) {
805  for (x = 0; x < width; x++)
806  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
807  src += srcstride;
808  dst += dststride;
809  src2 += MAX_PB_SIZE;
810  }
811 }
812 
813 static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride,
814  uint8_t *_src, ptrdiff_t _srcstride,
815  int height, intptr_t mx, intptr_t my, int width)
816 {
817  int x, y;
818  const int8_t *filter;
819  pixel *src = (pixel*)_src;
820  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
821  pixel *dst = (pixel *)_dst;
822  ptrdiff_t dststride = _dststride / sizeof(pixel);
823  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
824  int16_t *tmp = tmp_array;
825  int shift = 14 - BIT_DEPTH;
826 
827 #if BIT_DEPTH < 14
828  int offset = 1 << (shift - 1);
829 #else
830  int offset = 0;
831 #endif
832 
833  src -= QPEL_EXTRA_BEFORE * srcstride;
834  filter = ff_hevc_qpel_filters[mx - 1];
835  for (y = 0; y < height + QPEL_EXTRA; y++) {
836  for (x = 0; x < width; x++)
837  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
838  src += srcstride;
839  tmp += MAX_PB_SIZE;
840  }
841 
842  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
843  filter = ff_hevc_qpel_filters[my - 1];
844 
845  for (y = 0; y < height; y++) {
846  for (x = 0; x < width; x++)
847  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
848  tmp += MAX_PB_SIZE;
849  dst += dststride;
850  }
851 }
852 
853 static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
854  int16_t *src2,
855  int height, intptr_t mx, intptr_t my, int width)
856 {
857  int x, y;
858  const int8_t *filter;
859  pixel *src = (pixel*)_src;
860  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
861  pixel *dst = (pixel *)_dst;
862  ptrdiff_t dststride = _dststride / sizeof(pixel);
863  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
864  int16_t *tmp = tmp_array;
865  int shift = 14 + 1 - BIT_DEPTH;
866 #if BIT_DEPTH < 14
867  int offset = 1 << (shift - 1);
868 #else
869  int offset = 0;
870 #endif
871 
872  src -= QPEL_EXTRA_BEFORE * srcstride;
873  filter = ff_hevc_qpel_filters[mx - 1];
874  for (y = 0; y < height + QPEL_EXTRA; y++) {
875  for (x = 0; x < width; x++)
876  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
877  src += srcstride;
878  tmp += MAX_PB_SIZE;
879  }
880 
881  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
882  filter = ff_hevc_qpel_filters[my - 1];
883 
884  for (y = 0; y < height; y++) {
885  for (x = 0; x < width; x++)
886  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
887  tmp += MAX_PB_SIZE;
888  dst += dststride;
889  src2 += MAX_PB_SIZE;
890  }
891 }
892 
893 static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
894  uint8_t *_src, ptrdiff_t _srcstride,
895  int height, int denom, int wx, int ox,
896  intptr_t mx, intptr_t my, int width)
897 {
898  int x, y;
899  pixel *src = (pixel*)_src;
900  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
901  pixel *dst = (pixel *)_dst;
902  ptrdiff_t dststride = _dststride / sizeof(pixel);
903  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
904  int shift = denom + 14 - BIT_DEPTH;
905 #if BIT_DEPTH < 14
906  int offset = 1 << (shift - 1);
907 #else
908  int offset = 0;
909 #endif
910 
911  ox = ox * (1 << (BIT_DEPTH - 8));
912  for (y = 0; y < height; y++) {
913  for (x = 0; x < width; x++)
914  dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
915  src += srcstride;
916  dst += dststride;
917  }
918 }
919 
920 static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
921  int16_t *src2,
922  int height, int denom, int wx0, int wx1,
923  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
924 {
925  int x, y;
926  pixel *src = (pixel*)_src;
927  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
928  pixel *dst = (pixel *)_dst;
929  ptrdiff_t dststride = _dststride / sizeof(pixel);
930 
931  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
932 
933  int shift = 14 + 1 - BIT_DEPTH;
934  int log2Wd = denom + shift - 1;
935 
936  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
937  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
938  for (y = 0; y < height; y++) {
939  for (x = 0; x < width; x++)
940  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
941  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
942  src += srcstride;
943  dst += dststride;
944  src2 += MAX_PB_SIZE;
945  }
946 }
947 
948 static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
949  uint8_t *_src, ptrdiff_t _srcstride,
950  int height, int denom, int wx, int ox,
951  intptr_t mx, intptr_t my, int width)
952 {
953  int x, y;
954  pixel *src = (pixel*)_src;
955  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
956  pixel *dst = (pixel *)_dst;
957  ptrdiff_t dststride = _dststride / sizeof(pixel);
958  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
959  int shift = denom + 14 - BIT_DEPTH;
960 #if BIT_DEPTH < 14
961  int offset = 1 << (shift - 1);
962 #else
963  int offset = 0;
964 #endif
965 
966  ox = ox * (1 << (BIT_DEPTH - 8));
967  for (y = 0; y < height; y++) {
968  for (x = 0; x < width; x++)
969  dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
970  src += srcstride;
971  dst += dststride;
972  }
973 }
974 
975 static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
976  int16_t *src2,
977  int height, int denom, int wx0, int wx1,
978  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
979 {
980  int x, y;
981  pixel *src = (pixel*)_src;
982  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
983  pixel *dst = (pixel *)_dst;
984  ptrdiff_t dststride = _dststride / sizeof(pixel);
985 
986  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
987 
988  int shift = 14 + 1 - BIT_DEPTH;
989  int log2Wd = denom + shift - 1;
990 
991  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
992  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
993  for (y = 0; y < height; y++) {
994  for (x = 0; x < width; x++)
995  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
996  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
997  src += srcstride;
998  dst += dststride;
999  src2 += MAX_PB_SIZE;
1000  }
1001 }
1002 
1003 static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
1004  uint8_t *_src, ptrdiff_t _srcstride,
1005  int height, int denom, int wx, int ox,
1006  intptr_t mx, intptr_t my, int width)
1007 {
1008  int x, y;
1009  const int8_t *filter;
1010  pixel *src = (pixel*)_src;
1011  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1012  pixel *dst = (pixel *)_dst;
1013  ptrdiff_t dststride = _dststride / sizeof(pixel);
1014  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
1015  int16_t *tmp = tmp_array;
1016  int shift = denom + 14 - BIT_DEPTH;
1017 #if BIT_DEPTH < 14
1018  int offset = 1 << (shift - 1);
1019 #else
1020  int offset = 0;
1021 #endif
1022 
1023  src -= QPEL_EXTRA_BEFORE * srcstride;
1024  filter = ff_hevc_qpel_filters[mx - 1];
1025  for (y = 0; y < height + QPEL_EXTRA; y++) {
1026  for (x = 0; x < width; x++)
1027  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1028  src += srcstride;
1029  tmp += MAX_PB_SIZE;
1030  }
1031 
1032  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1033  filter = ff_hevc_qpel_filters[my - 1];
1034 
1035  ox = ox * (1 << (BIT_DEPTH - 8));
1036  for (y = 0; y < height; y++) {
1037  for (x = 0; x < width; x++)
1038  dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1039  tmp += MAX_PB_SIZE;
1040  dst += dststride;
1041  }
1042 }
1043 
1044 static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1045  int16_t *src2,
1046  int height, int denom, int wx0, int wx1,
1047  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1048 {
1049  int x, y;
1050  const int8_t *filter;
1051  pixel *src = (pixel*)_src;
1052  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1053  pixel *dst = (pixel *)_dst;
1054  ptrdiff_t dststride = _dststride / sizeof(pixel);
1055  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
1056  int16_t *tmp = tmp_array;
1057  int shift = 14 + 1 - BIT_DEPTH;
1058  int log2Wd = denom + shift - 1;
1059 
1060  src -= QPEL_EXTRA_BEFORE * srcstride;
1061  filter = ff_hevc_qpel_filters[mx - 1];
1062  for (y = 0; y < height + QPEL_EXTRA; y++) {
1063  for (x = 0; x < width; x++)
1064  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1065  src += srcstride;
1066  tmp += MAX_PB_SIZE;
1067  }
1068 
1069  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1070  filter = ff_hevc_qpel_filters[my - 1];
1071 
1072  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1073  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1074  for (y = 0; y < height; y++) {
1075  for (x = 0; x < width; x++)
1076  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1077  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1078  tmp += MAX_PB_SIZE;
1079  dst += dststride;
1080  src2 += MAX_PB_SIZE;
1081  }
1082 }
1083 
1084 ////////////////////////////////////////////////////////////////////////////////
1085 //
1086 ////////////////////////////////////////////////////////////////////////////////
1087 #define EPEL_FILTER(src, stride) \
1088  (filter[0] * src[x - stride] + \
1089  filter[1] * src[x] + \
1090  filter[2] * src[x + stride] + \
1091  filter[3] * src[x + 2 * stride])
1092 
1093 static void FUNC(put_hevc_epel_h)(int16_t *dst,
1094  uint8_t *_src, ptrdiff_t _srcstride,
1095  int height, intptr_t mx, intptr_t my, int width)
1096 {
1097  int x, y;
1098  pixel *src = (pixel *)_src;
1099  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1100  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1101  for (y = 0; y < height; y++) {
1102  for (x = 0; x < width; x++)
1103  dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1104  src += srcstride;
1105  dst += MAX_PB_SIZE;
1106  }
1107 }
1108 
1109 static void FUNC(put_hevc_epel_v)(int16_t *dst,
1110  uint8_t *_src, ptrdiff_t _srcstride,
1111  int height, intptr_t mx, intptr_t my, int width)
1112 {
1113  int x, y;
1114  pixel *src = (pixel *)_src;
1115  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1116  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1117 
1118  for (y = 0; y < height; y++) {
1119  for (x = 0; x < width; x++)
1120  dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
1121  src += srcstride;
1122  dst += MAX_PB_SIZE;
1123  }
1124 }
1125 
1126 static void FUNC(put_hevc_epel_hv)(int16_t *dst,
1127  uint8_t *_src, ptrdiff_t _srcstride,
1128  int height, intptr_t mx, intptr_t my, int width)
1129 {
1130  int x, y;
1131  pixel *src = (pixel *)_src;
1132  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1133  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1134  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1135  int16_t *tmp = tmp_array;
1136 
1137  src -= EPEL_EXTRA_BEFORE * srcstride;
1138 
1139  for (y = 0; y < height + EPEL_EXTRA; y++) {
1140  for (x = 0; x < width; x++)
1141  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1142  src += srcstride;
1143  tmp += MAX_PB_SIZE;
1144  }
1145 
1146  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1147  filter = ff_hevc_epel_filters[my - 1];
1148 
1149  for (y = 0; y < height; y++) {
1150  for (x = 0; x < width; x++)
1151  dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
1152  tmp += MAX_PB_SIZE;
1153  dst += MAX_PB_SIZE;
1154  }
1155 }
1156 
1157 static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1158  int height, intptr_t mx, intptr_t my, int width)
1159 {
1160  int x, y;
1161  pixel *src = (pixel *)_src;
1162  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1163  pixel *dst = (pixel *)_dst;
1164  ptrdiff_t dststride = _dststride / sizeof(pixel);
1165  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1166  int shift = 14 - BIT_DEPTH;
1167 #if BIT_DEPTH < 14
1168  int offset = 1 << (shift - 1);
1169 #else
1170  int offset = 0;
1171 #endif
1172 
1173  for (y = 0; y < height; y++) {
1174  for (x = 0; x < width; x++)
1175  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
1176  src += srcstride;
1177  dst += dststride;
1178  }
1179 }
1180 
1181 static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1182  int16_t *src2,
1183  int height, intptr_t mx, intptr_t my, int width)
1184 {
1185  int x, y;
1186  pixel *src = (pixel *)_src;
1187  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1188  pixel *dst = (pixel *)_dst;
1189  ptrdiff_t dststride = _dststride / sizeof(pixel);
1190  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1191  int shift = 14 + 1 - BIT_DEPTH;
1192 #if BIT_DEPTH < 14
1193  int offset = 1 << (shift - 1);
1194 #else
1195  int offset = 0;
1196 #endif
1197 
1198  for (y = 0; y < height; y++) {
1199  for (x = 0; x < width; x++) {
1200  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1201  }
1202  dst += dststride;
1203  src += srcstride;
1204  src2 += MAX_PB_SIZE;
1205  }
1206 }
1207 
1208 static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1209  int height, intptr_t mx, intptr_t my, int width)
1210 {
1211  int x, y;
1212  pixel *src = (pixel *)_src;
1213  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1214  pixel *dst = (pixel *)_dst;
1215  ptrdiff_t dststride = _dststride / sizeof(pixel);
1216  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1217  int shift = 14 - BIT_DEPTH;
1218 #if BIT_DEPTH < 14
1219  int offset = 1 << (shift - 1);
1220 #else
1221  int offset = 0;
1222 #endif
1223 
1224  for (y = 0; y < height; y++) {
1225  for (x = 0; x < width; x++)
1226  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
1227  src += srcstride;
1228  dst += dststride;
1229  }
1230 }
1231 
1232 static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1233  int16_t *src2,
1234  int height, intptr_t mx, intptr_t my, int width)
1235 {
1236  int x, y;
1237  pixel *src = (pixel *)_src;
1238  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1239  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1240  pixel *dst = (pixel *)_dst;
1241  ptrdiff_t dststride = _dststride / sizeof(pixel);
1242  int shift = 14 + 1 - BIT_DEPTH;
1243 #if BIT_DEPTH < 14
1244  int offset = 1 << (shift - 1);
1245 #else
1246  int offset = 0;
1247 #endif
1248 
1249  for (y = 0; y < height; y++) {
1250  for (x = 0; x < width; x++)
1251  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1252  dst += dststride;
1253  src += srcstride;
1254  src2 += MAX_PB_SIZE;
1255  }
1256 }
1257 
1258 static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1259  int height, intptr_t mx, intptr_t my, int width)
1260 {
1261  int x, y;
1262  pixel *src = (pixel *)_src;
1263  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1264  pixel *dst = (pixel *)_dst;
1265  ptrdiff_t dststride = _dststride / sizeof(pixel);
1266  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1267  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1268  int16_t *tmp = tmp_array;
1269  int shift = 14 - BIT_DEPTH;
1270 #if BIT_DEPTH < 14
1271  int offset = 1 << (shift - 1);
1272 #else
1273  int offset = 0;
1274 #endif
1275 
1276  src -= EPEL_EXTRA_BEFORE * srcstride;
1277 
1278  for (y = 0; y < height + EPEL_EXTRA; y++) {
1279  for (x = 0; x < width; x++)
1280  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1281  src += srcstride;
1282  tmp += MAX_PB_SIZE;
1283  }
1284 
1285  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1286  filter = ff_hevc_epel_filters[my - 1];
1287 
1288  for (y = 0; y < height; y++) {
1289  for (x = 0; x < width; x++)
1290  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
1291  tmp += MAX_PB_SIZE;
1292  dst += dststride;
1293  }
1294 }
1295 
1296 static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1297  int16_t *src2,
1298  int height, intptr_t mx, intptr_t my, int width)
1299 {
1300  int x, y;
1301  pixel *src = (pixel *)_src;
1302  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1303  pixel *dst = (pixel *)_dst;
1304  ptrdiff_t dststride = _dststride / sizeof(pixel);
1305  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1306  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1307  int16_t *tmp = tmp_array;
1308  int shift = 14 + 1 - BIT_DEPTH;
1309 #if BIT_DEPTH < 14
1310  int offset = 1 << (shift - 1);
1311 #else
1312  int offset = 0;
1313 #endif
1314 
1315  src -= EPEL_EXTRA_BEFORE * srcstride;
1316 
1317  for (y = 0; y < height + EPEL_EXTRA; y++) {
1318  for (x = 0; x < width; x++)
1319  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1320  src += srcstride;
1321  tmp += MAX_PB_SIZE;
1322  }
1323 
1324  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1325  filter = ff_hevc_epel_filters[my - 1];
1326 
1327  for (y = 0; y < height; y++) {
1328  for (x = 0; x < width; x++)
1329  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
1330  tmp += MAX_PB_SIZE;
1331  dst += dststride;
1332  src2 += MAX_PB_SIZE;
1333  }
1334 }
1335 
1336 static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1337  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1338 {
1339  int x, y;
1340  pixel *src = (pixel *)_src;
1341  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1342  pixel *dst = (pixel *)_dst;
1343  ptrdiff_t dststride = _dststride / sizeof(pixel);
1344  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1345  int shift = denom + 14 - BIT_DEPTH;
1346 #if BIT_DEPTH < 14
1347  int offset = 1 << (shift - 1);
1348 #else
1349  int offset = 0;
1350 #endif
1351 
1352  ox = ox * (1 << (BIT_DEPTH - 8));
1353  for (y = 0; y < height; y++) {
1354  for (x = 0; x < width; x++) {
1355  dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1356  }
1357  dst += dststride;
1358  src += srcstride;
1359  }
1360 }
1361 
1362 static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1363  int16_t *src2,
1364  int height, int denom, int wx0, int wx1,
1365  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1366 {
1367  int x, y;
1368  pixel *src = (pixel *)_src;
1369  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1370  pixel *dst = (pixel *)_dst;
1371  ptrdiff_t dststride = _dststride / sizeof(pixel);
1372  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1373  int shift = 14 + 1 - BIT_DEPTH;
1374  int log2Wd = denom + shift - 1;
1375 
1376  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1377  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1378  for (y = 0; y < height; y++) {
1379  for (x = 0; x < width; x++)
1380  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1381  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1382  src += srcstride;
1383  dst += dststride;
1384  src2 += MAX_PB_SIZE;
1385  }
1386 }
1387 
1388 static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1389  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1390 {
1391  int x, y;
1392  pixel *src = (pixel *)_src;
1393  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1394  pixel *dst = (pixel *)_dst;
1395  ptrdiff_t dststride = _dststride / sizeof(pixel);
1396  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1397  int shift = denom + 14 - BIT_DEPTH;
1398 #if BIT_DEPTH < 14
1399  int offset = 1 << (shift - 1);
1400 #else
1401  int offset = 0;
1402 #endif
1403 
1404  ox = ox * (1 << (BIT_DEPTH - 8));
1405  for (y = 0; y < height; y++) {
1406  for (x = 0; x < width; x++) {
1407  dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1408  }
1409  dst += dststride;
1410  src += srcstride;
1411  }
1412 }
1413 
1414 static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1415  int16_t *src2,
1416  int height, int denom, int wx0, int wx1,
1417  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1418 {
1419  int x, y;
1420  pixel *src = (pixel *)_src;
1421  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1422  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1423  pixel *dst = (pixel *)_dst;
1424  ptrdiff_t dststride = _dststride / sizeof(pixel);
1425  int shift = 14 + 1 - BIT_DEPTH;
1426  int log2Wd = denom + shift - 1;
1427 
1428  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1429  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1430  for (y = 0; y < height; y++) {
1431  for (x = 0; x < width; x++)
1432  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1433  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1434  src += srcstride;
1435  dst += dststride;
1436  src2 += MAX_PB_SIZE;
1437  }
1438 }
1439 
1440 static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1441  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1442 {
1443  int x, y;
1444  pixel *src = (pixel *)_src;
1445  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1446  pixel *dst = (pixel *)_dst;
1447  ptrdiff_t dststride = _dststride / sizeof(pixel);
1448  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1449  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1450  int16_t *tmp = tmp_array;
1451  int shift = denom + 14 - BIT_DEPTH;
1452 #if BIT_DEPTH < 14
1453  int offset = 1 << (shift - 1);
1454 #else
1455  int offset = 0;
1456 #endif
1457 
1458  src -= EPEL_EXTRA_BEFORE * srcstride;
1459 
1460  for (y = 0; y < height + EPEL_EXTRA; y++) {
1461  for (x = 0; x < width; x++)
1462  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1463  src += srcstride;
1464  tmp += MAX_PB_SIZE;
1465  }
1466 
1467  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1468  filter = ff_hevc_epel_filters[my - 1];
1469 
1470  ox = ox * (1 << (BIT_DEPTH - 8));
1471  for (y = 0; y < height; y++) {
1472  for (x = 0; x < width; x++)
1473  dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1474  tmp += MAX_PB_SIZE;
1475  dst += dststride;
1476  }
1477 }
1478 
1479 static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1480  int16_t *src2,
1481  int height, int denom, int wx0, int wx1,
1482  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1483 {
1484  int x, y;
1485  pixel *src = (pixel *)_src;
1486  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1487  pixel *dst = (pixel *)_dst;
1488  ptrdiff_t dststride = _dststride / sizeof(pixel);
1489  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1490  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1491  int16_t *tmp = tmp_array;
1492  int shift = 14 + 1 - BIT_DEPTH;
1493  int log2Wd = denom + shift - 1;
1494 
1495  src -= EPEL_EXTRA_BEFORE * srcstride;
1496 
1497  for (y = 0; y < height + EPEL_EXTRA; y++) {
1498  for (x = 0; x < width; x++)
1499  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1500  src += srcstride;
1501  tmp += MAX_PB_SIZE;
1502  }
1503 
1504  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1505  filter = ff_hevc_epel_filters[my - 1];
1506 
1507  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1508  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1509  for (y = 0; y < height; y++) {
1510  for (x = 0; x < width; x++)
1511  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1512  ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
1513  tmp += MAX_PB_SIZE;
1514  dst += dststride;
1515  src2 += MAX_PB_SIZE;
1516  }
1517 }// line zero
1518 #define P3 pix[-4 * xstride]
1519 #define P2 pix[-3 * xstride]
1520 #define P1 pix[-2 * xstride]
1521 #define P0 pix[-1 * xstride]
1522 #define Q0 pix[0 * xstride]
1523 #define Q1 pix[1 * xstride]
1524 #define Q2 pix[2 * xstride]
1525 #define Q3 pix[3 * xstride]
1526 
1527 // line three. used only for deblocking decision
1528 #define TP3 pix[-4 * xstride + 3 * ystride]
1529 #define TP2 pix[-3 * xstride + 3 * ystride]
1530 #define TP1 pix[-2 * xstride + 3 * ystride]
1531 #define TP0 pix[-1 * xstride + 3 * ystride]
1532 #define TQ0 pix[0 * xstride + 3 * ystride]
1533 #define TQ1 pix[1 * xstride + 3 * ystride]
1534 #define TQ2 pix[2 * xstride + 3 * ystride]
1535 #define TQ3 pix[3 * xstride + 3 * ystride]
1536 
1538  ptrdiff_t _xstride, ptrdiff_t _ystride,
1539  int beta, int *_tc,
1540  uint8_t *_no_p, uint8_t *_no_q)
1541 {
1542  int d, j;
1543  pixel *pix = (pixel *)_pix;
1544  ptrdiff_t xstride = _xstride / sizeof(pixel);
1545  ptrdiff_t ystride = _ystride / sizeof(pixel);
1546 
1547  beta <<= BIT_DEPTH - 8;
1548 
1549  for (j = 0; j < 2; j++) {
1550  const int dp0 = abs(P2 - 2 * P1 + P0);
1551  const int dq0 = abs(Q2 - 2 * Q1 + Q0);
1552  const int dp3 = abs(TP2 - 2 * TP1 + TP0);
1553  const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
1554  const int d0 = dp0 + dq0;
1555  const int d3 = dp3 + dq3;
1556  const int tc = _tc[j] << (BIT_DEPTH - 8);
1557  const int no_p = _no_p[j];
1558  const int no_q = _no_q[j];
1559 
1560  if (d0 + d3 >= beta) {
1561  pix += 4 * ystride;
1562  continue;
1563  } else {
1564  const int beta_3 = beta >> 3;
1565  const int beta_2 = beta >> 2;
1566  const int tc25 = ((tc * 5 + 1) >> 1);
1567 
1568  if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
1569  abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
1570  (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
1571  // strong filtering
1572  const int tc2 = tc << 1;
1573  for (d = 0; d < 4; d++) {
1574  const int p3 = P3;
1575  const int p2 = P2;
1576  const int p1 = P1;
1577  const int p0 = P0;
1578  const int q0 = Q0;
1579  const int q1 = Q1;
1580  const int q2 = Q2;
1581  const int q3 = Q3;
1582  if (!no_p) {
1583  P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
1584  P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
1585  P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
1586  }
1587  if (!no_q) {
1588  Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
1589  Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
1590  Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
1591  }
1592  pix += ystride;
1593  }
1594  } else { // normal filtering
1595  int nd_p = 1;
1596  int nd_q = 1;
1597  const int tc_2 = tc >> 1;
1598  if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
1599  nd_p = 2;
1600  if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
1601  nd_q = 2;
1602 
1603  for (d = 0; d < 4; d++) {
1604  const int p2 = P2;
1605  const int p1 = P1;
1606  const int p0 = P0;
1607  const int q0 = Q0;
1608  const int q1 = Q1;
1609  const int q2 = Q2;
1610  int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
1611  if (abs(delta0) < 10 * tc) {
1612  delta0 = av_clip(delta0, -tc, tc);
1613  if (!no_p)
1614  P0 = av_clip_pixel(p0 + delta0);
1615  if (!no_q)
1616  Q0 = av_clip_pixel(q0 - delta0);
1617  if (!no_p && nd_p > 1) {
1618  const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
1619  P1 = av_clip_pixel(p1 + deltap1);
1620  }
1621  if (!no_q && nd_q > 1) {
1622  const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
1623  Q1 = av_clip_pixel(q1 + deltaq1);
1624  }
1625  }
1626  pix += ystride;
1627  }
1628  }
1629  }
1630  }
1631 }
1632 
1633 static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
1634  ptrdiff_t _ystride, int *_tc,
1635  uint8_t *_no_p, uint8_t *_no_q)
1636 {
1637  int d, j, no_p, no_q;
1638  pixel *pix = (pixel *)_pix;
1639  ptrdiff_t xstride = _xstride / sizeof(pixel);
1640  ptrdiff_t ystride = _ystride / sizeof(pixel);
1641 
1642  for (j = 0; j < 2; j++) {
1643  const int tc = _tc[j] << (BIT_DEPTH - 8);
1644  if (tc <= 0) {
1645  pix += 4 * ystride;
1646  continue;
1647  }
1648  no_p = _no_p[j];
1649  no_q = _no_q[j];
1650 
1651  for (d = 0; d < 4; d++) {
1652  int delta0;
1653  const int p1 = P1;
1654  const int p0 = P0;
1655  const int q0 = Q0;
1656  const int q1 = Q1;
1657  delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
1658  if (!no_p)
1659  P0 = av_clip_pixel(p0 + delta0);
1660  if (!no_q)
1661  Q0 = av_clip_pixel(q0 - delta0);
1662  pix += ystride;
1663  }
1664  }
1665 }
1666 
1667 static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1668  int32_t *tc, uint8_t *no_p,
1669  uint8_t *no_q)
1670 {
1671  FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
1672 }
1673 
1674 static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1675  int32_t *tc, uint8_t *no_p,
1676  uint8_t *no_q)
1677 {
1678  FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
1679 }
1680 
1681 static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1682  int beta, int32_t *tc, uint8_t *no_p,
1683  uint8_t *no_q)
1684 {
1685  FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
1686  beta, tc, no_p, no_q);
1687 }
1688 
1689 static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1690  int beta, int32_t *tc, uint8_t *no_p,
1691  uint8_t *no_q)
1692 {
1693  FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
1694  beta, tc, no_p, no_q);
1695 }
1696 
1697 #undef P3
1698 #undef P2
1699 #undef P1
1700 #undef P0
1701 #undef Q0
1702 #undef Q1
1703 #undef Q2
1704 #undef Q3
1705 
1706 #undef TP3
1707 #undef TP2
1708 #undef TP1
1709 #undef TP0
1710 #undef TQ0
1711 #undef TQ1
1712 #undef TQ2
1713 #undef TQ3