FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hevcdsp_template.c
Go to the documentation of this file.
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "get_bits.h"
24 #include "hevcdec.h"
25 
26 #include "bit_depth_template.c"
27 #include "hevcdsp.h"
28 
29 static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
30  GetBitContext *gb, int pcm_bit_depth)
31 {
32  int x, y;
33  pixel *dst = (pixel *)_dst;
34 
35  stride /= sizeof(pixel);
36 
37  for (y = 0; y < height; y++) {
38  for (x = 0; x < width; x++)
39  dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
40  dst += stride;
41  }
42 }
43 
44 static av_always_inline void FUNC(add_residual)(uint8_t *_dst, int16_t *res,
45  ptrdiff_t stride, int size)
46 {
47  int x, y;
48  pixel *dst = (pixel *)_dst;
49 
50  stride /= sizeof(pixel);
51 
52  for (y = 0; y < size; y++) {
53  for (x = 0; x < size; x++) {
54  dst[x] = av_clip_pixel(dst[x] + *res);
55  res++;
56  }
57  dst += stride;
58  }
59 }
60 
61 static void FUNC(add_residual4x4)(uint8_t *_dst, int16_t *res,
62  ptrdiff_t stride)
63 {
64  FUNC(add_residual)(_dst, res, stride, 4);
65 }
66 
67 static void FUNC(add_residual8x8)(uint8_t *_dst, int16_t *res,
68  ptrdiff_t stride)
69 {
70  FUNC(add_residual)(_dst, res, stride, 8);
71 }
72 
73 static void FUNC(add_residual16x16)(uint8_t *_dst, int16_t *res,
74  ptrdiff_t stride)
75 {
76  FUNC(add_residual)(_dst, res, stride, 16);
77 }
78 
79 static void FUNC(add_residual32x32)(uint8_t *_dst, int16_t *res,
80  ptrdiff_t stride)
81 {
82  FUNC(add_residual)(_dst, res, stride, 32);
83 }
84 
85 static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
86 {
87  int16_t *coeffs = (int16_t *) _coeffs;
88  int x, y;
89  int size = 1 << log2_size;
90 
91  if (mode) {
92  coeffs += size;
93  for (y = 0; y < size - 1; y++) {
94  for (x = 0; x < size; x++)
95  coeffs[x] += coeffs[x - size];
96  coeffs += size;
97  }
98  } else {
99  for (y = 0; y < size; y++) {
100  for (x = 1; x < size; x++)
101  coeffs[x] += coeffs[x - 1];
102  coeffs += size;
103  }
104  }
105 }
106 
107 static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
108 {
109  int shift = 15 - BIT_DEPTH - log2_size;
110  int x, y;
111  int size = 1 << log2_size;
112 
113  if (shift > 0) {
114  int offset = 1 << (shift - 1);
115  for (y = 0; y < size; y++) {
116  for (x = 0; x < size; x++) {
117  *coeffs = (*coeffs + offset) >> shift;
118  coeffs++;
119  }
120  }
121  } else {
122  for (y = 0; y < size; y++) {
123  for (x = 0; x < size; x++) {
124  *coeffs = *(uint16_t*)coeffs << -shift;
125  coeffs++;
126  }
127  }
128  }
129 }
130 
131 #define SET(dst, x) (dst) = (x)
132 #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
133 
134 #define TR_4x4_LUMA(dst, src, step, assign) \
135  do { \
136  int c0 = src[0 * step] + src[2 * step]; \
137  int c1 = src[2 * step] + src[3 * step]; \
138  int c2 = src[0 * step] - src[3 * step]; \
139  int c3 = 74 * src[1 * step]; \
140  \
141  assign(dst[2 * step], 74 * (src[0 * step] - \
142  src[2 * step] + \
143  src[3 * step])); \
144  assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \
145  assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \
146  assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \
147  } while (0)
148 
149 static void FUNC(transform_4x4_luma)(int16_t *coeffs)
150 {
151  int i;
152  int shift = 7;
153  int add = 1 << (shift - 1);
154  int16_t *src = coeffs;
155 
156  for (i = 0; i < 4; i++) {
157  TR_4x4_LUMA(src, src, 4, SCALE);
158  src++;
159  }
160 
161  shift = 20 - BIT_DEPTH;
162  add = 1 << (shift - 1);
163  for (i = 0; i < 4; i++) {
164  TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
165  coeffs += 4;
166  }
167 }
168 
169 #undef TR_4x4_LUMA
170 
171 #define TR_4(dst, src, dstep, sstep, assign, end) \
172  do { \
173  const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
174  const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
175  const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
176  const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
177  \
178  assign(dst[0 * dstep], e0 + o0); \
179  assign(dst[1 * dstep], e1 + o1); \
180  assign(dst[2 * dstep], e1 - o1); \
181  assign(dst[3 * dstep], e0 - o0); \
182  } while (0)
183 
184 #define TR_8(dst, src, dstep, sstep, assign, end) \
185  do { \
186  int i, j; \
187  int e_8[4]; \
188  int o_8[4] = { 0 }; \
189  for (i = 0; i < 4; i++) \
190  for (j = 1; j < end; j += 2) \
191  o_8[i] += transform[4 * j][i] * src[j * sstep]; \
192  TR_4(e_8, src, 1, 2 * sstep, SET, 4); \
193  \
194  for (i = 0; i < 4; i++) { \
195  assign(dst[i * dstep], e_8[i] + o_8[i]); \
196  assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \
197  } \
198  } while (0)
199 
200 #define TR_16(dst, src, dstep, sstep, assign, end) \
201  do { \
202  int i, j; \
203  int e_16[8]; \
204  int o_16[8] = { 0 }; \
205  for (i = 0; i < 8; i++) \
206  for (j = 1; j < end; j += 2) \
207  o_16[i] += transform[2 * j][i] * src[j * sstep]; \
208  TR_8(e_16, src, 1, 2 * sstep, SET, 8); \
209  \
210  for (i = 0; i < 8; i++) { \
211  assign(dst[i * dstep], e_16[i] + o_16[i]); \
212  assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \
213  } \
214  } while (0)
215 
216 #define TR_32(dst, src, dstep, sstep, assign, end) \
217  do { \
218  int i, j; \
219  int e_32[16]; \
220  int o_32[16] = { 0 }; \
221  for (i = 0; i < 16; i++) \
222  for (j = 1; j < end; j += 2) \
223  o_32[i] += transform[j][i] * src[j * sstep]; \
224  TR_16(e_32, src, 1, 2 * sstep, SET, end / 2); \
225  \
226  for (i = 0; i < 16; i++) { \
227  assign(dst[i * dstep], e_32[i] + o_32[i]); \
228  assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \
229  } \
230  } while (0)
231 
232 #define IDCT_VAR4(H) \
233  int limit2 = FFMIN(col_limit + 4, H)
234 #define IDCT_VAR8(H) \
235  int limit = FFMIN(col_limit, H); \
236  int limit2 = FFMIN(col_limit + 4, H)
237 #define IDCT_VAR16(H) IDCT_VAR8(H)
238 #define IDCT_VAR32(H) IDCT_VAR8(H)
239 
240 #define IDCT(H) \
241 static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \
242  int col_limit) \
243 { \
244  int i; \
245  int shift = 7; \
246  int add = 1 << (shift - 1); \
247  int16_t *src = coeffs; \
248  IDCT_VAR ## H(H); \
249  \
250  for (i = 0; i < H; i++) { \
251  TR_ ## H(src, src, H, H, SCALE, limit2); \
252  if (limit2 < H && i%4 == 0 && !!i) \
253  limit2 -= 4; \
254  src++; \
255  } \
256  \
257  shift = 20 - BIT_DEPTH; \
258  add = 1 << (shift - 1); \
259  for (i = 0; i < H; i++) { \
260  TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \
261  coeffs += H; \
262  } \
263 }
264 
265 #define IDCT_DC(H) \
266 static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs) \
267 { \
268  int i, j; \
269  int shift = 14 - BIT_DEPTH; \
270  int add = 1 << (shift - 1); \
271  int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \
272  \
273  for (j = 0; j < H; j++) { \
274  for (i = 0; i < H; i++) { \
275  coeffs[i + j * H] = coeff; \
276  } \
277  } \
278 }
279 
280 IDCT( 4)
281 IDCT( 8)
282 IDCT(16)
283 IDCT(32)
284 
285 IDCT_DC( 4)
286 IDCT_DC( 8)
287 IDCT_DC(16)
288 IDCT_DC(32)
289 
290 #undef TR_4
291 #undef TR_8
292 #undef TR_16
293 #undef TR_32
294 
295 #undef SET
296 #undef SCALE
297 
298 static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src,
299  ptrdiff_t stride_dst, ptrdiff_t stride_src,
300  int16_t *sao_offset_val, int sao_left_class,
301  int width, int height)
302 {
303  pixel *dst = (pixel *)_dst;
304  pixel *src = (pixel *)_src;
305  int offset_table[32] = { 0 };
306  int k, y, x;
307  int shift = BIT_DEPTH - 5;
308 
309  stride_dst /= sizeof(pixel);
310  stride_src /= sizeof(pixel);
311 
312  for (k = 0; k < 4; k++)
313  offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
314  for (y = 0; y < height; y++) {
315  for (x = 0; x < width; x++)
316  dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
317  dst += stride_dst;
318  src += stride_src;
319  }
320 }
321 
322 #define CMP(a, b) (((a) > (b)) - ((a) < (b)))
323 
324 static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
325  int eo, int width, int height) {
326 
327  static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
328  static const int8_t pos[4][2][2] = {
329  { { -1, 0 }, { 1, 0 } }, // horizontal
330  { { 0, -1 }, { 0, 1 } }, // vertical
331  { { -1, -1 }, { 1, 1 } }, // 45 degree
332  { { 1, -1 }, { -1, 1 } }, // 135 degree
333  };
334  pixel *dst = (pixel *)_dst;
335  pixel *src = (pixel *)_src;
336  int a_stride, b_stride;
337  int x, y;
338  ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / sizeof(pixel);
339  stride_dst /= sizeof(pixel);
340 
341  a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
342  b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src;
343  for (y = 0; y < height; y++) {
344  for (x = 0; x < width; x++) {
345  int diff0 = CMP(src[x], src[x + a_stride]);
346  int diff1 = CMP(src[x], src[x + b_stride]);
347  int offset_val = edge_idx[2 + diff0 + diff1];
348  dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]);
349  }
350  src += stride_src;
351  dst += stride_dst;
352  }
353 }
354 
355 static void FUNC(sao_edge_restore_0)(uint8_t *_dst, uint8_t *_src,
356  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
357  int *borders, int _width, int _height,
358  int c_idx, uint8_t *vert_edge,
359  uint8_t *horiz_edge, uint8_t *diag_edge)
360 {
361  int x, y;
362  pixel *dst = (pixel *)_dst;
363  pixel *src = (pixel *)_src;
364  int16_t *sao_offset_val = sao->offset_val[c_idx];
365  int sao_eo_class = sao->eo_class[c_idx];
366  int init_x = 0, width = _width, height = _height;
367 
368  stride_dst /= sizeof(pixel);
369  stride_src /= sizeof(pixel);
370 
371  if (sao_eo_class != SAO_EO_VERT) {
372  if (borders[0]) {
373  int offset_val = sao_offset_val[0];
374  for (y = 0; y < height; y++) {
375  dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
376  }
377  init_x = 1;
378  }
379  if (borders[2]) {
380  int offset_val = sao_offset_val[0];
381  int offset = width - 1;
382  for (x = 0; x < height; x++) {
383  dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
384  }
385  width--;
386  }
387  }
388  if (sao_eo_class != SAO_EO_HORIZ) {
389  if (borders[1]) {
390  int offset_val = sao_offset_val[0];
391  for (x = init_x; x < width; x++)
392  dst[x] = av_clip_pixel(src[x] + offset_val);
393  }
394  if (borders[3]) {
395  int offset_val = sao_offset_val[0];
396  ptrdiff_t y_stride_dst = stride_dst * (height - 1);
397  ptrdiff_t y_stride_src = stride_src * (height - 1);
398  for (x = init_x; x < width; x++)
399  dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
400  height--;
401  }
402  }
403 }
404 
405 static void FUNC(sao_edge_restore_1)(uint8_t *_dst, uint8_t *_src,
406  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
407  int *borders, int _width, int _height,
408  int c_idx, uint8_t *vert_edge,
409  uint8_t *horiz_edge, uint8_t *diag_edge)
410 {
411  int x, y;
412  pixel *dst = (pixel *)_dst;
413  pixel *src = (pixel *)_src;
414  int16_t *sao_offset_val = sao->offset_val[c_idx];
415  int sao_eo_class = sao->eo_class[c_idx];
416  int init_x = 0, init_y = 0, width = _width, height = _height;
417 
418  stride_dst /= sizeof(pixel);
419  stride_src /= sizeof(pixel);
420 
421  if (sao_eo_class != SAO_EO_VERT) {
422  if (borders[0]) {
423  int offset_val = sao_offset_val[0];
424  for (y = 0; y < height; y++) {
425  dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
426  }
427  init_x = 1;
428  }
429  if (borders[2]) {
430  int offset_val = sao_offset_val[0];
431  int offset = width - 1;
432  for (x = 0; x < height; x++) {
433  dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
434  }
435  width--;
436  }
437  }
438  if (sao_eo_class != SAO_EO_HORIZ) {
439  if (borders[1]) {
440  int offset_val = sao_offset_val[0];
441  for (x = init_x; x < width; x++)
442  dst[x] = av_clip_pixel(src[x] + offset_val);
443  init_y = 1;
444  }
445  if (borders[3]) {
446  int offset_val = sao_offset_val[0];
447  ptrdiff_t y_stride_dst = stride_dst * (height - 1);
448  ptrdiff_t y_stride_src = stride_src * (height - 1);
449  for (x = init_x; x < width; x++)
450  dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
451  height--;
452  }
453  }
454 
455  {
456  int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
457  int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D && !borders[1] && !borders[2];
458  int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
459  int save_lower_left = !diag_edge[3] && sao_eo_class == SAO_EO_45D && !borders[0] && !borders[3];
460 
461  // Restore pixels that can't be modified
462  if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
463  for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
464  dst[y*stride_dst] = src[y*stride_src];
465  }
466  if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
467  for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
468  dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
469  }
470 
471  if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
472  for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
473  dst[x] = src[x];
474  }
475  if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
476  for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
477  dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
478  }
479  if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
480  dst[0] = src[0];
481  if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
482  dst[width-1] = src[width-1];
483  if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
484  dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
485  if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
486  dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
487 
488  }
489 }
490 
491 #undef CMP
492 
493 ////////////////////////////////////////////////////////////////////////////////
494 //
495 ////////////////////////////////////////////////////////////////////////////////
496 static void FUNC(put_hevc_pel_pixels)(int16_t *dst,
497  uint8_t *_src, ptrdiff_t _srcstride,
498  int height, intptr_t mx, intptr_t my, int width)
499 {
500  int x, y;
501  pixel *src = (pixel *)_src;
502  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
503 
504  for (y = 0; y < height; y++) {
505  for (x = 0; x < width; x++)
506  dst[x] = src[x] << (14 - BIT_DEPTH);
507  src += srcstride;
508  dst += MAX_PB_SIZE;
509  }
510 }
511 
512 static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
513  int height, intptr_t mx, intptr_t my, int width)
514 {
515  int y;
516  pixel *src = (pixel *)_src;
517  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
518  pixel *dst = (pixel *)_dst;
519  ptrdiff_t dststride = _dststride / sizeof(pixel);
520 
521  for (y = 0; y < height; y++) {
522  memcpy(dst, src, width * sizeof(pixel));
523  src += srcstride;
524  dst += dststride;
525  }
526 }
527 
528 static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
529  int16_t *src2,
530  int height, intptr_t mx, intptr_t my, int width)
531 {
532  int x, y;
533  pixel *src = (pixel *)_src;
534  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
535  pixel *dst = (pixel *)_dst;
536  ptrdiff_t dststride = _dststride / sizeof(pixel);
537 
538  int shift = 14 + 1 - BIT_DEPTH;
539 #if BIT_DEPTH < 14
540  int offset = 1 << (shift - 1);
541 #else
542  int offset = 0;
543 #endif
544 
545  for (y = 0; y < height; y++) {
546  for (x = 0; x < width; x++)
547  dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
548  src += srcstride;
549  dst += dststride;
550  src2 += MAX_PB_SIZE;
551  }
552 }
553 
554 static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
555  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
556 {
557  int x, y;
558  pixel *src = (pixel *)_src;
559  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
560  pixel *dst = (pixel *)_dst;
561  ptrdiff_t dststride = _dststride / sizeof(pixel);
562  int shift = denom + 14 - BIT_DEPTH;
563 #if BIT_DEPTH < 14
564  int offset = 1 << (shift - 1);
565 #else
566  int offset = 0;
567 #endif
568 
569  ox = ox * (1 << (BIT_DEPTH - 8));
570  for (y = 0; y < height; y++) {
571  for (x = 0; x < width; x++)
572  dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
573  src += srcstride;
574  dst += dststride;
575  }
576 }
577 
578 static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
579  int16_t *src2,
580  int height, int denom, int wx0, int wx1,
581  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
582 {
583  int x, y;
584  pixel *src = (pixel *)_src;
585  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
586  pixel *dst = (pixel *)_dst;
587  ptrdiff_t dststride = _dststride / sizeof(pixel);
588 
589  int shift = 14 + 1 - BIT_DEPTH;
590  int log2Wd = denom + shift - 1;
591 
592  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
593  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
594  for (y = 0; y < height; y++) {
595  for (x = 0; x < width; x++) {
596  dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1));
597  }
598  src += srcstride;
599  dst += dststride;
600  src2 += MAX_PB_SIZE;
601  }
602 }
603 
604 ////////////////////////////////////////////////////////////////////////////////
605 //
606 ////////////////////////////////////////////////////////////////////////////////
607 #define QPEL_FILTER(src, stride) \
608  (filter[0] * src[x - 3 * stride] + \
609  filter[1] * src[x - 2 * stride] + \
610  filter[2] * src[x - stride] + \
611  filter[3] * src[x ] + \
612  filter[4] * src[x + stride] + \
613  filter[5] * src[x + 2 * stride] + \
614  filter[6] * src[x + 3 * stride] + \
615  filter[7] * src[x + 4 * stride])
616 
617 static void FUNC(put_hevc_qpel_h)(int16_t *dst,
618  uint8_t *_src, ptrdiff_t _srcstride,
619  int height, intptr_t mx, intptr_t my, int width)
620 {
621  int x, y;
622  pixel *src = (pixel*)_src;
623  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
624  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
625  for (y = 0; y < height; y++) {
626  for (x = 0; x < width; x++)
627  dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
628  src += srcstride;
629  dst += MAX_PB_SIZE;
630  }
631 }
632 
633 static void FUNC(put_hevc_qpel_v)(int16_t *dst,
634  uint8_t *_src, ptrdiff_t _srcstride,
635  int height, intptr_t mx, intptr_t my, int width)
636 {
637  int x, y;
638  pixel *src = (pixel*)_src;
639  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
640  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
641  for (y = 0; y < height; y++) {
642  for (x = 0; x < width; x++)
643  dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
644  src += srcstride;
645  dst += MAX_PB_SIZE;
646  }
647 }
648 
649 static void FUNC(put_hevc_qpel_hv)(int16_t *dst,
650  uint8_t *_src,
651  ptrdiff_t _srcstride,
652  int height, intptr_t mx,
653  intptr_t my, int width)
654 {
655  int x, y;
656  const int8_t *filter;
657  pixel *src = (pixel*)_src;
658  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
659  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
660  int16_t *tmp = tmp_array;
661 
662  src -= QPEL_EXTRA_BEFORE * srcstride;
663  filter = ff_hevc_qpel_filters[mx - 1];
664  for (y = 0; y < height + QPEL_EXTRA; y++) {
665  for (x = 0; x < width; x++)
666  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
667  src += srcstride;
668  tmp += MAX_PB_SIZE;
669  }
670 
671  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
672  filter = ff_hevc_qpel_filters[my - 1];
673  for (y = 0; y < height; y++) {
674  for (x = 0; x < width; x++)
675  dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
676  tmp += MAX_PB_SIZE;
677  dst += MAX_PB_SIZE;
678  }
679 }
680 
681 static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride,
682  uint8_t *_src, ptrdiff_t _srcstride,
683  int height, intptr_t mx, intptr_t my, int width)
684 {
685  int x, y;
686  pixel *src = (pixel*)_src;
687  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
688  pixel *dst = (pixel *)_dst;
689  ptrdiff_t dststride = _dststride / sizeof(pixel);
690  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
691  int shift = 14 - BIT_DEPTH;
692 
693 #if BIT_DEPTH < 14
694  int offset = 1 << (shift - 1);
695 #else
696  int offset = 0;
697 #endif
698 
699  for (y = 0; y < height; y++) {
700  for (x = 0; x < width; x++)
701  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
702  src += srcstride;
703  dst += dststride;
704  }
705 }
706 
707 static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
708  int16_t *src2,
709  int height, intptr_t mx, intptr_t my, int width)
710 {
711  int x, y;
712  pixel *src = (pixel*)_src;
713  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
714  pixel *dst = (pixel *)_dst;
715  ptrdiff_t dststride = _dststride / sizeof(pixel);
716 
717  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
718 
719  int shift = 14 + 1 - BIT_DEPTH;
720 #if BIT_DEPTH < 14
721  int offset = 1 << (shift - 1);
722 #else
723  int offset = 0;
724 #endif
725 
726  for (y = 0; y < height; y++) {
727  for (x = 0; x < width; x++)
728  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
729  src += srcstride;
730  dst += dststride;
731  src2 += MAX_PB_SIZE;
732  }
733 }
734 
735 static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride,
736  uint8_t *_src, ptrdiff_t _srcstride,
737  int height, intptr_t mx, intptr_t my, int width)
738 {
739  int x, y;
740  pixel *src = (pixel*)_src;
741  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
742  pixel *dst = (pixel *)_dst;
743  ptrdiff_t dststride = _dststride / sizeof(pixel);
744  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
745  int shift = 14 - BIT_DEPTH;
746 
747 #if BIT_DEPTH < 14
748  int offset = 1 << (shift - 1);
749 #else
750  int offset = 0;
751 #endif
752 
753  for (y = 0; y < height; y++) {
754  for (x = 0; x < width; x++)
755  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
756  src += srcstride;
757  dst += dststride;
758  }
759 }
760 
761 
762 static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
763  int16_t *src2,
764  int height, intptr_t mx, intptr_t my, int width)
765 {
766  int x, y;
767  pixel *src = (pixel*)_src;
768  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
769  pixel *dst = (pixel *)_dst;
770  ptrdiff_t dststride = _dststride / sizeof(pixel);
771 
772  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
773 
774  int shift = 14 + 1 - BIT_DEPTH;
775 #if BIT_DEPTH < 14
776  int offset = 1 << (shift - 1);
777 #else
778  int offset = 0;
779 #endif
780 
781  for (y = 0; y < height; y++) {
782  for (x = 0; x < width; x++)
783  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
784  src += srcstride;
785  dst += dststride;
786  src2 += MAX_PB_SIZE;
787  }
788 }
789 
790 static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride,
791  uint8_t *_src, ptrdiff_t _srcstride,
792  int height, intptr_t mx, intptr_t my, int width)
793 {
794  int x, y;
795  const int8_t *filter;
796  pixel *src = (pixel*)_src;
797  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
798  pixel *dst = (pixel *)_dst;
799  ptrdiff_t dststride = _dststride / sizeof(pixel);
800  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
801  int16_t *tmp = tmp_array;
802  int shift = 14 - BIT_DEPTH;
803 
804 #if BIT_DEPTH < 14
805  int offset = 1 << (shift - 1);
806 #else
807  int offset = 0;
808 #endif
809 
810  src -= QPEL_EXTRA_BEFORE * srcstride;
811  filter = ff_hevc_qpel_filters[mx - 1];
812  for (y = 0; y < height + QPEL_EXTRA; y++) {
813  for (x = 0; x < width; x++)
814  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
815  src += srcstride;
816  tmp += MAX_PB_SIZE;
817  }
818 
819  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
820  filter = ff_hevc_qpel_filters[my - 1];
821 
822  for (y = 0; y < height; y++) {
823  for (x = 0; x < width; x++)
824  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
825  tmp += MAX_PB_SIZE;
826  dst += dststride;
827  }
828 }
829 
830 static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
831  int16_t *src2,
832  int height, intptr_t mx, intptr_t my, int width)
833 {
834  int x, y;
835  const int8_t *filter;
836  pixel *src = (pixel*)_src;
837  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
838  pixel *dst = (pixel *)_dst;
839  ptrdiff_t dststride = _dststride / sizeof(pixel);
840  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
841  int16_t *tmp = tmp_array;
842  int shift = 14 + 1 - BIT_DEPTH;
843 #if BIT_DEPTH < 14
844  int offset = 1 << (shift - 1);
845 #else
846  int offset = 0;
847 #endif
848 
849  src -= QPEL_EXTRA_BEFORE * srcstride;
850  filter = ff_hevc_qpel_filters[mx - 1];
851  for (y = 0; y < height + QPEL_EXTRA; y++) {
852  for (x = 0; x < width; x++)
853  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
854  src += srcstride;
855  tmp += MAX_PB_SIZE;
856  }
857 
858  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
859  filter = ff_hevc_qpel_filters[my - 1];
860 
861  for (y = 0; y < height; y++) {
862  for (x = 0; x < width; x++)
863  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
864  tmp += MAX_PB_SIZE;
865  dst += dststride;
866  src2 += MAX_PB_SIZE;
867  }
868 }
869 
870 static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
871  uint8_t *_src, ptrdiff_t _srcstride,
872  int height, int denom, int wx, int ox,
873  intptr_t mx, intptr_t my, int width)
874 {
875  int x, y;
876  pixel *src = (pixel*)_src;
877  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
878  pixel *dst = (pixel *)_dst;
879  ptrdiff_t dststride = _dststride / sizeof(pixel);
880  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
881  int shift = denom + 14 - BIT_DEPTH;
882 #if BIT_DEPTH < 14
883  int offset = 1 << (shift - 1);
884 #else
885  int offset = 0;
886 #endif
887 
888  ox = ox * (1 << (BIT_DEPTH - 8));
889  for (y = 0; y < height; y++) {
890  for (x = 0; x < width; x++)
891  dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
892  src += srcstride;
893  dst += dststride;
894  }
895 }
896 
897 static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
898  int16_t *src2,
899  int height, int denom, int wx0, int wx1,
900  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
901 {
902  int x, y;
903  pixel *src = (pixel*)_src;
904  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
905  pixel *dst = (pixel *)_dst;
906  ptrdiff_t dststride = _dststride / sizeof(pixel);
907 
908  const int8_t *filter = ff_hevc_qpel_filters[mx - 1];
909 
910  int shift = 14 + 1 - BIT_DEPTH;
911  int log2Wd = denom + shift - 1;
912 
913  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
914  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
915  for (y = 0; y < height; y++) {
916  for (x = 0; x < width; x++)
917  dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
918  ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
919  src += srcstride;
920  dst += dststride;
921  src2 += MAX_PB_SIZE;
922  }
923 }
924 
925 static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
926  uint8_t *_src, ptrdiff_t _srcstride,
927  int height, int denom, int wx, int ox,
928  intptr_t mx, intptr_t my, int width)
929 {
930  int x, y;
931  pixel *src = (pixel*)_src;
932  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
933  pixel *dst = (pixel *)_dst;
934  ptrdiff_t dststride = _dststride / sizeof(pixel);
935  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
936  int shift = denom + 14 - BIT_DEPTH;
937 #if BIT_DEPTH < 14
938  int offset = 1 << (shift - 1);
939 #else
940  int offset = 0;
941 #endif
942 
943  ox = ox * (1 << (BIT_DEPTH - 8));
944  for (y = 0; y < height; y++) {
945  for (x = 0; x < width; x++)
946  dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
947  src += srcstride;
948  dst += dststride;
949  }
950 }
951 
952 static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
953  int16_t *src2,
954  int height, int denom, int wx0, int wx1,
955  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
956 {
957  int x, y;
958  pixel *src = (pixel*)_src;
959  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
960  pixel *dst = (pixel *)_dst;
961  ptrdiff_t dststride = _dststride / sizeof(pixel);
962 
963  const int8_t *filter = ff_hevc_qpel_filters[my - 1];
964 
965  int shift = 14 + 1 - BIT_DEPTH;
966  int log2Wd = denom + shift - 1;
967 
968  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
969  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
970  for (y = 0; y < height; y++) {
971  for (x = 0; x < width; x++)
972  dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
973  ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
974  src += srcstride;
975  dst += dststride;
976  src2 += MAX_PB_SIZE;
977  }
978 }
979 
980 static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
981  uint8_t *_src, ptrdiff_t _srcstride,
982  int height, int denom, int wx, int ox,
983  intptr_t mx, intptr_t my, int width)
984 {
985  int x, y;
986  const int8_t *filter;
987  pixel *src = (pixel*)_src;
988  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
989  pixel *dst = (pixel *)_dst;
990  ptrdiff_t dststride = _dststride / sizeof(pixel);
991  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
992  int16_t *tmp = tmp_array;
993  int shift = denom + 14 - BIT_DEPTH;
994 #if BIT_DEPTH < 14
995  int offset = 1 << (shift - 1);
996 #else
997  int offset = 0;
998 #endif
999 
1000  src -= QPEL_EXTRA_BEFORE * srcstride;
1001  filter = ff_hevc_qpel_filters[mx - 1];
1002  for (y = 0; y < height + QPEL_EXTRA; y++) {
1003  for (x = 0; x < width; x++)
1004  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1005  src += srcstride;
1006  tmp += MAX_PB_SIZE;
1007  }
1008 
1009  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1010  filter = ff_hevc_qpel_filters[my - 1];
1011 
1012  ox = ox * (1 << (BIT_DEPTH - 8));
1013  for (y = 0; y < height; y++) {
1014  for (x = 0; x < width; x++)
1015  dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1016  tmp += MAX_PB_SIZE;
1017  dst += dststride;
1018  }
1019 }
1020 
1021 static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1022  int16_t *src2,
1023  int height, int denom, int wx0, int wx1,
1024  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1025 {
1026  int x, y;
1027  const int8_t *filter;
1028  pixel *src = (pixel*)_src;
1029  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1030  pixel *dst = (pixel *)_dst;
1031  ptrdiff_t dststride = _dststride / sizeof(pixel);
1032  int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
1033  int16_t *tmp = tmp_array;
1034  int shift = 14 + 1 - BIT_DEPTH;
1035  int log2Wd = denom + shift - 1;
1036 
1037  src -= QPEL_EXTRA_BEFORE * srcstride;
1038  filter = ff_hevc_qpel_filters[mx - 1];
1039  for (y = 0; y < height + QPEL_EXTRA; y++) {
1040  for (x = 0; x < width; x++)
1041  tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1042  src += srcstride;
1043  tmp += MAX_PB_SIZE;
1044  }
1045 
1046  tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1047  filter = ff_hevc_qpel_filters[my - 1];
1048 
1049  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1050  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1051  for (y = 0; y < height; y++) {
1052  for (x = 0; x < width; x++)
1053  dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1054  ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
1055  tmp += MAX_PB_SIZE;
1056  dst += dststride;
1057  src2 += MAX_PB_SIZE;
1058  }
1059 }
1060 
1061 ////////////////////////////////////////////////////////////////////////////////
1062 //
1063 ////////////////////////////////////////////////////////////////////////////////
1064 #define EPEL_FILTER(src, stride) \
1065  (filter[0] * src[x - stride] + \
1066  filter[1] * src[x] + \
1067  filter[2] * src[x + stride] + \
1068  filter[3] * src[x + 2 * stride])
1069 
1070 static void FUNC(put_hevc_epel_h)(int16_t *dst,
1071  uint8_t *_src, ptrdiff_t _srcstride,
1072  int height, intptr_t mx, intptr_t my, int width)
1073 {
1074  int x, y;
1075  pixel *src = (pixel *)_src;
1076  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1077  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1078  for (y = 0; y < height; y++) {
1079  for (x = 0; x < width; x++)
1080  dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1081  src += srcstride;
1082  dst += MAX_PB_SIZE;
1083  }
1084 }
1085 
1086 static void FUNC(put_hevc_epel_v)(int16_t *dst,
1087  uint8_t *_src, ptrdiff_t _srcstride,
1088  int height, intptr_t mx, intptr_t my, int width)
1089 {
1090  int x, y;
1091  pixel *src = (pixel *)_src;
1092  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1093  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1094 
1095  for (y = 0; y < height; y++) {
1096  for (x = 0; x < width; x++)
1097  dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
1098  src += srcstride;
1099  dst += MAX_PB_SIZE;
1100  }
1101 }
1102 
1103 static void FUNC(put_hevc_epel_hv)(int16_t *dst,
1104  uint8_t *_src, ptrdiff_t _srcstride,
1105  int height, intptr_t mx, intptr_t my, int width)
1106 {
1107  int x, y;
1108  pixel *src = (pixel *)_src;
1109  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1110  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1111  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1112  int16_t *tmp = tmp_array;
1113 
1114  src -= EPEL_EXTRA_BEFORE * srcstride;
1115 
1116  for (y = 0; y < height + EPEL_EXTRA; y++) {
1117  for (x = 0; x < width; x++)
1118  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1119  src += srcstride;
1120  tmp += MAX_PB_SIZE;
1121  }
1122 
1123  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1124  filter = ff_hevc_epel_filters[my - 1];
1125 
1126  for (y = 0; y < height; y++) {
1127  for (x = 0; x < width; x++)
1128  dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
1129  tmp += MAX_PB_SIZE;
1130  dst += MAX_PB_SIZE;
1131  }
1132 }
1133 
1134 static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1135  int height, intptr_t mx, intptr_t my, int width)
1136 {
1137  int x, y;
1138  pixel *src = (pixel *)_src;
1139  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1140  pixel *dst = (pixel *)_dst;
1141  ptrdiff_t dststride = _dststride / sizeof(pixel);
1142  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1143  int shift = 14 - BIT_DEPTH;
1144 #if BIT_DEPTH < 14
1145  int offset = 1 << (shift - 1);
1146 #else
1147  int offset = 0;
1148 #endif
1149 
1150  for (y = 0; y < height; y++) {
1151  for (x = 0; x < width; x++)
1152  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
1153  src += srcstride;
1154  dst += dststride;
1155  }
1156 }
1157 
1158 static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1159  int16_t *src2,
1160  int height, intptr_t mx, intptr_t my, int width)
1161 {
1162  int x, y;
1163  pixel *src = (pixel *)_src;
1164  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1165  pixel *dst = (pixel *)_dst;
1166  ptrdiff_t dststride = _dststride / sizeof(pixel);
1167  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1168  int shift = 14 + 1 - BIT_DEPTH;
1169 #if BIT_DEPTH < 14
1170  int offset = 1 << (shift - 1);
1171 #else
1172  int offset = 0;
1173 #endif
1174 
1175  for (y = 0; y < height; y++) {
1176  for (x = 0; x < width; x++) {
1177  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1178  }
1179  dst += dststride;
1180  src += srcstride;
1181  src2 += MAX_PB_SIZE;
1182  }
1183 }
1184 
1185 static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1186  int height, intptr_t mx, intptr_t my, int width)
1187 {
1188  int x, y;
1189  pixel *src = (pixel *)_src;
1190  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1191  pixel *dst = (pixel *)_dst;
1192  ptrdiff_t dststride = _dststride / sizeof(pixel);
1193  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1194  int shift = 14 - BIT_DEPTH;
1195 #if BIT_DEPTH < 14
1196  int offset = 1 << (shift - 1);
1197 #else
1198  int offset = 0;
1199 #endif
1200 
1201  for (y = 0; y < height; y++) {
1202  for (x = 0; x < width; x++)
1203  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
1204  src += srcstride;
1205  dst += dststride;
1206  }
1207 }
1208 
1209 static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1210  int16_t *src2,
1211  int height, intptr_t mx, intptr_t my, int width)
1212 {
1213  int x, y;
1214  pixel *src = (pixel *)_src;
1215  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1216  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1217  pixel *dst = (pixel *)_dst;
1218  ptrdiff_t dststride = _dststride / sizeof(pixel);
1219  int shift = 14 + 1 - BIT_DEPTH;
1220 #if BIT_DEPTH < 14
1221  int offset = 1 << (shift - 1);
1222 #else
1223  int offset = 0;
1224 #endif
1225 
1226  for (y = 0; y < height; y++) {
1227  for (x = 0; x < width; x++)
1228  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
1229  dst += dststride;
1230  src += srcstride;
1231  src2 += MAX_PB_SIZE;
1232  }
1233 }
1234 
1235 static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1236  int height, intptr_t mx, intptr_t my, int width)
1237 {
1238  int x, y;
1239  pixel *src = (pixel *)_src;
1240  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1241  pixel *dst = (pixel *)_dst;
1242  ptrdiff_t dststride = _dststride / sizeof(pixel);
1243  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1244  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1245  int16_t *tmp = tmp_array;
1246  int shift = 14 - BIT_DEPTH;
1247 #if BIT_DEPTH < 14
1248  int offset = 1 << (shift - 1);
1249 #else
1250  int offset = 0;
1251 #endif
1252 
1253  src -= EPEL_EXTRA_BEFORE * srcstride;
1254 
1255  for (y = 0; y < height + EPEL_EXTRA; y++) {
1256  for (x = 0; x < width; x++)
1257  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1258  src += srcstride;
1259  tmp += MAX_PB_SIZE;
1260  }
1261 
1262  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1263  filter = ff_hevc_epel_filters[my - 1];
1264 
1265  for (y = 0; y < height; y++) {
1266  for (x = 0; x < width; x++)
1267  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
1268  tmp += MAX_PB_SIZE;
1269  dst += dststride;
1270  }
1271 }
1272 
1273 static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1274  int16_t *src2,
1275  int height, intptr_t mx, intptr_t my, int width)
1276 {
1277  int x, y;
1278  pixel *src = (pixel *)_src;
1279  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1280  pixel *dst = (pixel *)_dst;
1281  ptrdiff_t dststride = _dststride / sizeof(pixel);
1282  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1283  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1284  int16_t *tmp = tmp_array;
1285  int shift = 14 + 1 - BIT_DEPTH;
1286 #if BIT_DEPTH < 14
1287  int offset = 1 << (shift - 1);
1288 #else
1289  int offset = 0;
1290 #endif
1291 
1292  src -= EPEL_EXTRA_BEFORE * srcstride;
1293 
1294  for (y = 0; y < height + EPEL_EXTRA; y++) {
1295  for (x = 0; x < width; x++)
1296  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1297  src += srcstride;
1298  tmp += MAX_PB_SIZE;
1299  }
1300 
1301  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1302  filter = ff_hevc_epel_filters[my - 1];
1303 
1304  for (y = 0; y < height; y++) {
1305  for (x = 0; x < width; x++)
1306  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
1307  tmp += MAX_PB_SIZE;
1308  dst += dststride;
1309  src2 += MAX_PB_SIZE;
1310  }
1311 }
1312 
1313 static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1314  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1315 {
1316  int x, y;
1317  pixel *src = (pixel *)_src;
1318  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1319  pixel *dst = (pixel *)_dst;
1320  ptrdiff_t dststride = _dststride / sizeof(pixel);
1321  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1322  int shift = denom + 14 - BIT_DEPTH;
1323 #if BIT_DEPTH < 14
1324  int offset = 1 << (shift - 1);
1325 #else
1326  int offset = 0;
1327 #endif
1328 
1329  ox = ox * (1 << (BIT_DEPTH - 8));
1330  for (y = 0; y < height; y++) {
1331  for (x = 0; x < width; x++) {
1332  dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1333  }
1334  dst += dststride;
1335  src += srcstride;
1336  }
1337 }
1338 
1339 static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1340  int16_t *src2,
1341  int height, int denom, int wx0, int wx1,
1342  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1343 {
1344  int x, y;
1345  pixel *src = (pixel *)_src;
1346  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1347  pixel *dst = (pixel *)_dst;
1348  ptrdiff_t dststride = _dststride / sizeof(pixel);
1349  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1350  int shift = 14 + 1 - BIT_DEPTH;
1351  int log2Wd = denom + shift - 1;
1352 
1353  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1354  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1355  for (y = 0; y < height; y++) {
1356  for (x = 0; x < width; x++)
1357  dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1358  ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
1359  src += srcstride;
1360  dst += dststride;
1361  src2 += MAX_PB_SIZE;
1362  }
1363 }
1364 
1365 static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1366  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1367 {
1368  int x, y;
1369  pixel *src = (pixel *)_src;
1370  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1371  pixel *dst = (pixel *)_dst;
1372  ptrdiff_t dststride = _dststride / sizeof(pixel);
1373  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1374  int shift = denom + 14 - BIT_DEPTH;
1375 #if BIT_DEPTH < 14
1376  int offset = 1 << (shift - 1);
1377 #else
1378  int offset = 0;
1379 #endif
1380 
1381  ox = ox * (1 << (BIT_DEPTH - 8));
1382  for (y = 0; y < height; y++) {
1383  for (x = 0; x < width; x++) {
1384  dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
1385  }
1386  dst += dststride;
1387  src += srcstride;
1388  }
1389 }
1390 
1391 static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1392  int16_t *src2,
1393  int height, int denom, int wx0, int wx1,
1394  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1395 {
1396  int x, y;
1397  pixel *src = (pixel *)_src;
1398  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1399  const int8_t *filter = ff_hevc_epel_filters[my - 1];
1400  pixel *dst = (pixel *)_dst;
1401  ptrdiff_t dststride = _dststride / sizeof(pixel);
1402  int shift = 14 + 1 - BIT_DEPTH;
1403  int log2Wd = denom + shift - 1;
1404 
1405  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1406  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1407  for (y = 0; y < height; y++) {
1408  for (x = 0; x < width; x++)
1409  dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
1410  ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
1411  src += srcstride;
1412  dst += dststride;
1413  src2 += MAX_PB_SIZE;
1414  }
1415 }
1416 
1417 static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1418  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
1419 {
1420  int x, y;
1421  pixel *src = (pixel *)_src;
1422  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1423  pixel *dst = (pixel *)_dst;
1424  ptrdiff_t dststride = _dststride / sizeof(pixel);
1425  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1426  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1427  int16_t *tmp = tmp_array;
1428  int shift = denom + 14 - BIT_DEPTH;
1429 #if BIT_DEPTH < 14
1430  int offset = 1 << (shift - 1);
1431 #else
1432  int offset = 0;
1433 #endif
1434 
1435  src -= EPEL_EXTRA_BEFORE * srcstride;
1436 
1437  for (y = 0; y < height + EPEL_EXTRA; y++) {
1438  for (x = 0; x < width; x++)
1439  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1440  src += srcstride;
1441  tmp += MAX_PB_SIZE;
1442  }
1443 
1444  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1445  filter = ff_hevc_epel_filters[my - 1];
1446 
1447  ox = ox * (1 << (BIT_DEPTH - 8));
1448  for (y = 0; y < height; y++) {
1449  for (x = 0; x < width; x++)
1450  dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
1451  tmp += MAX_PB_SIZE;
1452  dst += dststride;
1453  }
1454 }
1455 
1456 static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
1457  int16_t *src2,
1458  int height, int denom, int wx0, int wx1,
1459  int ox0, int ox1, intptr_t mx, intptr_t my, int width)
1460 {
1461  int x, y;
1462  pixel *src = (pixel *)_src;
1463  ptrdiff_t srcstride = _srcstride / sizeof(pixel);
1464  pixel *dst = (pixel *)_dst;
1465  ptrdiff_t dststride = _dststride / sizeof(pixel);
1466  const int8_t *filter = ff_hevc_epel_filters[mx - 1];
1467  int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
1468  int16_t *tmp = tmp_array;
1469  int shift = 14 + 1 - BIT_DEPTH;
1470  int log2Wd = denom + shift - 1;
1471 
1472  src -= EPEL_EXTRA_BEFORE * srcstride;
1473 
1474  for (y = 0; y < height + EPEL_EXTRA; y++) {
1475  for (x = 0; x < width; x++)
1476  tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
1477  src += srcstride;
1478  tmp += MAX_PB_SIZE;
1479  }
1480 
1481  tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
1482  filter = ff_hevc_epel_filters[my - 1];
1483 
1484  ox0 = ox0 * (1 << (BIT_DEPTH - 8));
1485  ox1 = ox1 * (1 << (BIT_DEPTH - 8));
1486  for (y = 0; y < height; y++) {
1487  for (x = 0; x < width; x++)
1488  dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
1489  ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
1490  tmp += MAX_PB_SIZE;
1491  dst += dststride;
1492  src2 += MAX_PB_SIZE;
1493  }
1494 }
1495 
1496 // line zero
1497 #define P3 pix[-4 * xstride]
1498 #define P2 pix[-3 * xstride]
1499 #define P1 pix[-2 * xstride]
1500 #define P0 pix[-1 * xstride]
1501 #define Q0 pix[0 * xstride]
1502 #define Q1 pix[1 * xstride]
1503 #define Q2 pix[2 * xstride]
1504 #define Q3 pix[3 * xstride]
1505 
1506 // line three. used only for deblocking decision
1507 #define TP3 pix[-4 * xstride + 3 * ystride]
1508 #define TP2 pix[-3 * xstride + 3 * ystride]
1509 #define TP1 pix[-2 * xstride + 3 * ystride]
1510 #define TP0 pix[-1 * xstride + 3 * ystride]
1511 #define TQ0 pix[0 * xstride + 3 * ystride]
1512 #define TQ1 pix[1 * xstride + 3 * ystride]
1513 #define TQ2 pix[2 * xstride + 3 * ystride]
1514 #define TQ3 pix[3 * xstride + 3 * ystride]
1515 
1517  ptrdiff_t _xstride, ptrdiff_t _ystride,
1518  int beta, int *_tc,
1519  uint8_t *_no_p, uint8_t *_no_q)
1520 {
1521  int d, j;
1522  pixel *pix = (pixel *)_pix;
1523  ptrdiff_t xstride = _xstride / sizeof(pixel);
1524  ptrdiff_t ystride = _ystride / sizeof(pixel);
1525 
1526  beta <<= BIT_DEPTH - 8;
1527 
1528  for (j = 0; j < 2; j++) {
1529  const int dp0 = abs(P2 - 2 * P1 + P0);
1530  const int dq0 = abs(Q2 - 2 * Q1 + Q0);
1531  const int dp3 = abs(TP2 - 2 * TP1 + TP0);
1532  const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0);
1533  const int d0 = dp0 + dq0;
1534  const int d3 = dp3 + dq3;
1535  const int tc = _tc[j] << (BIT_DEPTH - 8);
1536  const int no_p = _no_p[j];
1537  const int no_q = _no_q[j];
1538 
1539  if (d0 + d3 >= beta) {
1540  pix += 4 * ystride;
1541  continue;
1542  } else {
1543  const int beta_3 = beta >> 3;
1544  const int beta_2 = beta >> 2;
1545  const int tc25 = ((tc * 5 + 1) >> 1);
1546 
1547  if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
1548  abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
1549  (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
1550  // strong filtering
1551  const int tc2 = tc << 1;
1552  for (d = 0; d < 4; d++) {
1553  const int p3 = P3;
1554  const int p2 = P2;
1555  const int p1 = P1;
1556  const int p0 = P0;
1557  const int q0 = Q0;
1558  const int q1 = Q1;
1559  const int q2 = Q2;
1560  const int q3 = Q3;
1561  if (!no_p) {
1562  P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
1563  P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
1564  P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
1565  }
1566  if (!no_q) {
1567  Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
1568  Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
1569  Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
1570  }
1571  pix += ystride;
1572  }
1573  } else { // normal filtering
1574  int nd_p = 1;
1575  int nd_q = 1;
1576  const int tc_2 = tc >> 1;
1577  if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
1578  nd_p = 2;
1579  if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
1580  nd_q = 2;
1581 
1582  for (d = 0; d < 4; d++) {
1583  const int p2 = P2;
1584  const int p1 = P1;
1585  const int p0 = P0;
1586  const int q0 = Q0;
1587  const int q1 = Q1;
1588  const int q2 = Q2;
1589  int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
1590  if (abs(delta0) < 10 * tc) {
1591  delta0 = av_clip(delta0, -tc, tc);
1592  if (!no_p)
1593  P0 = av_clip_pixel(p0 + delta0);
1594  if (!no_q)
1595  Q0 = av_clip_pixel(q0 - delta0);
1596  if (!no_p && nd_p > 1) {
1597  const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
1598  P1 = av_clip_pixel(p1 + deltap1);
1599  }
1600  if (!no_q && nd_q > 1) {
1601  const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
1602  Q1 = av_clip_pixel(q1 + deltaq1);
1603  }
1604  }
1605  pix += ystride;
1606  }
1607  }
1608  }
1609  }
1610 }
1611 
1612 static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
1613  ptrdiff_t _ystride, int *_tc,
1614  uint8_t *_no_p, uint8_t *_no_q)
1615 {
1616  int d, j, no_p, no_q;
1617  pixel *pix = (pixel *)_pix;
1618  ptrdiff_t xstride = _xstride / sizeof(pixel);
1619  ptrdiff_t ystride = _ystride / sizeof(pixel);
1620 
1621  for (j = 0; j < 2; j++) {
1622  const int tc = _tc[j] << (BIT_DEPTH - 8);
1623  if (tc <= 0) {
1624  pix += 4 * ystride;
1625  continue;
1626  }
1627  no_p = _no_p[j];
1628  no_q = _no_q[j];
1629 
1630  for (d = 0; d < 4; d++) {
1631  int delta0;
1632  const int p1 = P1;
1633  const int p0 = P0;
1634  const int q0 = Q0;
1635  const int q1 = Q1;
1636  delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
1637  if (!no_p)
1638  P0 = av_clip_pixel(p0 + delta0);
1639  if (!no_q)
1640  Q0 = av_clip_pixel(q0 - delta0);
1641  pix += ystride;
1642  }
1643  }
1644 }
1645 
1646 static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1647  int32_t *tc, uint8_t *no_p,
1648  uint8_t *no_q)
1649 {
1650  FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
1651 }
1652 
1653 static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
1654  int32_t *tc, uint8_t *no_p,
1655  uint8_t *no_q)
1656 {
1657  FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
1658 }
1659 
1660 static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1661  int beta, int32_t *tc, uint8_t *no_p,
1662  uint8_t *no_q)
1663 {
1664  FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
1665  beta, tc, no_p, no_q);
1666 }
1667 
1668 static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
1669  int beta, int32_t *tc, uint8_t *no_p,
1670  uint8_t *no_q)
1671 {
1672  FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
1673  beta, tc, no_p, no_q);
1674 }
1675 
1676 #undef P3
1677 #undef P2
1678 #undef P1
1679 #undef P0
1680 #undef Q0
1681 #undef Q1
1682 #undef Q2
1683 #undef Q3
1684 
1685 #undef TP3
1686 #undef TP2
1687 #undef TP1
1688 #undef TP0
1689 #undef TQ0
1690 #undef TQ1
1691 #undef TQ2
1692 #undef TQ3
static void FUNC() put_hevc_qpel_bi_w_hv(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
static void FUNC() add_residual16x16(uint8_t *_dst, int16_t *res, ptrdiff_t stride)
static void FUNC() put_hevc_pel_uni_pixels(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
static void FUNC() put_hevc_qpel_bi_hv(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
static int shift(int a, int b)
Definition: sonic.c:82
static void FUNC() put_hevc_epel_bi_hv(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
static void FUNC() put_hevc_qpel_uni_w_h(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
static void FUNC() put_hevc_epel_uni_w_h(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
#define IDCT_DC(H)
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:381
static void FUNC() put_hevc_pel_bi_w_pixels(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
#define P1
static av_always_inline void FUNC() add_residual(uint8_t *_dst, int16_t *res, ptrdiff_t stride, int size)
#define Q0
#define BIT_DEPTH
#define tc
Definition: regdef.h:69
static const uint8_t q1[256]
Definition: twofish.c:96
static void FUNC() put_hevc_qpel_uni_w_hv(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
#define src
Definition: vp8dsp.c:254
static void FUNC() add_residual4x4(uint8_t *_dst, int16_t *res, ptrdiff_t stride)
static void FUNC() put_hevc_qpel_uni_h(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
static void FUNC() add_residual32x32(uint8_t *_dst, int16_t *res, ptrdiff_t stride)
static void FUNC() put_hevc_qpel_h(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
#define av_clip_pixel(a)
#define Q3
static void filter(int16_t *output, ptrdiff_t out_stride, int16_t *low, ptrdiff_t low_stride, int16_t *high, ptrdiff_t high_stride, int len, int clip)
Definition: cfhd.c:153
#define TQ1
#define P0
uint8_t
#define QPEL_FILTER(src, stride)
static void FUNC() sao_edge_restore_1(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, int *borders, int _width, int _height, int c_idx, uint8_t *vert_edge, uint8_t *horiz_edge, uint8_t *diag_edge)
static void FUNC() put_hevc_qpel_bi_h(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
#define QPEL_EXTRA_BEFORE
Definition: hevcdec.h:65
#define TQ2
static void FUNC() put_hevc_qpel_uni_w_v(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
#define height
static void FUNC() hevc_h_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int beta, int32_t *tc, uint8_t *no_p, uint8_t *no_q)
const int8_t ff_hevc_epel_filters[7][4]
Definition: hevcdsp.c:94
bitstream reader API header.
ptrdiff_t size
Definition: opengl_enc.c:101
#define TP3
const int8_t ff_hevc_qpel_filters[3][16]
Definition: hevcdsp.c:104
#define P3
static void FUNC() put_hevc_qpel_v(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
static void FUNC() put_hevc_epel_hv(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
static const uint8_t offset[127][2]
Definition: vf_spp.c:92
static const uint8_t q0[256]
Definition: twofish.c:77
static void FUNC() put_hevc_qpel_uni_hv(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
#define TP2
static void FUNC() add_residual8x8(uint8_t *_dst, int16_t *res, ptrdiff_t stride)
#define EPEL_FILTER(src, stride)
static void FUNC() put_hevc_pel_bi_pixels(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
static void FUNC() put_hevc_epel_bi_v(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
static void FUNC() sao_band_filter(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int16_t *sao_offset_val, int sao_left_class, int width, int height)
static void FUNC() put_hevc_epel_uni_w_hv(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
#define EPEL_EXTRA_BEFORE
Definition: hevcdec.h:62
#define width
static void FUNC(ff_hevc_idct_4x4, BIT_DEPTH)
int32_t
#define IDCT(H)
#define SCALE(dst, x)
static void FUNC() put_hevc_qpel_bi_v(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
static void FUNC() hevc_v_loop_filter_luma(uint8_t *pix, ptrdiff_t stride, int beta, int32_t *tc, uint8_t *no_p, uint8_t *no_q)
static void FUNC() put_hevc_epel_bi_w_h(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
static void FUNC() put_hevc_epel_uni_w_v(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
static void FUNC() put_hevc_pel_pixels(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
#define Q1
#define TP0
static void FUNC() hevc_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _xstride, ptrdiff_t _ystride, int *_tc, uint8_t *_no_p, uint8_t *_no_q)
static void FUNC() hevc_h_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, int32_t *tc, uint8_t *no_p, uint8_t *no_q)
static void FUNC() put_hevc_qpel_hv(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
static void FUNC() put_hevc_qpel_bi_w_v(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
static void FUNC() put_hevc_epel_uni_h(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
static void FUNC() dequant(int16_t *coeffs, int16_t log2_size)
uint8_t pixel
Definition: tiny_ssim.c:42
#define MAX_PB_SIZE
Definition: hevcdsp.h:30
static void FUNC() put_hevc_epel_uni_v(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
static void FUNC() put_pcm(uint8_t *_dst, ptrdiff_t stride, int width, int height, GetBitContext *gb, int pcm_bit_depth)
static void FUNC() put_hevc_epel_uni_hv(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
#define QPEL_EXTRA
Definition: hevcdec.h:67
static void FUNC() transform_rdpcm(int16_t *_coeffs, int16_t log2_size, int mode)
#define CMP(a, b)
static void FUNC() put_hevc_epel_v(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
#define TP1
static void FUNC() put_hevc_qpel_bi_w_h(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
static void FUNC() hevc_v_loop_filter_chroma(uint8_t *pix, ptrdiff_t stride, int32_t *tc, uint8_t *no_p, uint8_t *no_q)
int16_t offset_val[3][5]
SaoOffsetVal.
Definition: hevcdsp.h:40
static void FUNC() sao_edge_restore_0(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, int *borders, int _width, int _height, int c_idx, uint8_t *vert_edge, uint8_t *horiz_edge, uint8_t *diag_edge)
#define AV_INPUT_BUFFER_PADDING_SIZE
Required number of additionally allocated bytes at the end of the input bitstream for decoding...
Definition: avcodec.h:782
#define TR_4x4_LUMA(dst, src, step, assign)
#define P2
static const uint8_t offset_table[]
Definition: escape130.c:41
static void FUNC() put_hevc_pel_uni_w_pixels(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
static void FUNC() put_hevc_epel_bi_h(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
#define TQ3
static void FUNC() sao_edge_filter(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, int eo, int width, int height)
static void FUNC() transform_4x4_luma(int16_t *coeffs)
static void FUNC() hevc_loop_filter_luma(uint8_t *_pix, ptrdiff_t _xstride, ptrdiff_t _ystride, int beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q)
static void FUNC() put_hevc_epel_bi_w_hv(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
static void FUNC() put_hevc_epel_bi_w_v(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
#define EPEL_EXTRA
Definition: hevcdec.h:64
#define av_always_inline
Definition: attributes.h:39
#define TQ0
#define stride
static void FUNC() put_hevc_epel_h(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
static void FUNC() put_hevc_qpel_uni_v(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
#define Q2
mode
Use these values in ebur128_init (or'ed).
Definition: ebur128.h:83
static uint8_t tmp[11]
Definition: aes_ctr.c:26