FFmpeg
h264pred_template.c
Go to the documentation of this file.
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3  * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * H.264 / AVC / MPEG-4 part10 prediction functions.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27 
28 #include "libavutil/intreadwrite.h"
29 
30 #include "mathops.h"
31 
32 #include "bit_depth_template.c"
33 
34 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright,
35  ptrdiff_t _stride)
36 {
37  pixel *src = (pixel*)_src;
38  int stride = _stride>>(sizeof(pixel)-1);
39  const pixel4 a= AV_RN4PA(src-stride);
40 
41  AV_WN4PA(src+0*stride, a);
42  AV_WN4PA(src+1*stride, a);
43  AV_WN4PA(src+2*stride, a);
44  AV_WN4PA(src+3*stride, a);
45 }
46 
47 static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright,
48  ptrdiff_t _stride)
49 {
50  pixel *src = (pixel*)_src;
51  int stride = _stride>>(sizeof(pixel)-1);
56 }
57 
58 static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright,
59  ptrdiff_t _stride)
60 {
61  pixel *src = (pixel*)_src;
62  int stride = _stride>>(sizeof(pixel)-1);
63  const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
64  + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
65  const pixel4 a = PIXEL_SPLAT_X4(dc);
66 
67  AV_WN4PA(src+0*stride, a);
68  AV_WN4PA(src+1*stride, a);
69  AV_WN4PA(src+2*stride, a);
70  AV_WN4PA(src+3*stride, a);
71 }
72 
73 static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright,
74  ptrdiff_t _stride)
75 {
76  pixel *src = (pixel*)_src;
77  int stride = _stride>>(sizeof(pixel)-1);
78  const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
79  const pixel4 a = PIXEL_SPLAT_X4(dc);
80 
81  AV_WN4PA(src+0*stride, a);
82  AV_WN4PA(src+1*stride, a);
83  AV_WN4PA(src+2*stride, a);
84  AV_WN4PA(src+3*stride, a);
85 }
86 
87 static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright,
88  ptrdiff_t _stride)
89 {
90  pixel *src = (pixel*)_src;
91  int stride = _stride>>(sizeof(pixel)-1);
92  const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
93  const pixel4 a = PIXEL_SPLAT_X4(dc);
94 
95  AV_WN4PA(src+0*stride, a);
96  AV_WN4PA(src+1*stride, a);
97  AV_WN4PA(src+2*stride, a);
98  AV_WN4PA(src+3*stride, a);
99 }
100 
101 static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright,
102  ptrdiff_t _stride)
103 {
104  pixel *src = (pixel*)_src;
105  int stride = _stride>>(sizeof(pixel)-1);
106  const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
107 
108  AV_WN4PA(src+0*stride, a);
109  AV_WN4PA(src+1*stride, a);
110  AV_WN4PA(src+2*stride, a);
111  AV_WN4PA(src+3*stride, a);
112 }
113 
114 
115 #define LOAD_TOP_RIGHT_EDGE\
116  const unsigned av_unused t4 = topright[0];\
117  const unsigned av_unused t5 = topright[1];\
118  const unsigned av_unused t6 = topright[2];\
119  const unsigned av_unused t7 = topright[3];\
120 
121 #define LOAD_DOWN_LEFT_EDGE\
122  const unsigned av_unused l4 = src[-1+4*stride];\
123  const unsigned av_unused l5 = src[-1+5*stride];\
124  const unsigned av_unused l6 = src[-1+6*stride];\
125  const unsigned av_unused l7 = src[-1+7*stride];\
126 
127 #define LOAD_LEFT_EDGE\
128  const unsigned av_unused l0 = src[-1+0*stride];\
129  const unsigned av_unused l1 = src[-1+1*stride];\
130  const unsigned av_unused l2 = src[-1+2*stride];\
131  const unsigned av_unused l3 = src[-1+3*stride];\
132 
133 #define LOAD_TOP_EDGE\
134  const unsigned av_unused t0 = src[ 0-1*stride];\
135  const unsigned av_unused t1 = src[ 1-1*stride];\
136  const unsigned av_unused t2 = src[ 2-1*stride];\
137  const unsigned av_unused t3 = src[ 3-1*stride];\
138 
139 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
140  ptrdiff_t _stride)
141 {
142  pixel *src = (pixel*)_src;
143  int stride = _stride>>(sizeof(pixel)-1);
144  const int lt= src[-1-1*stride];
147 
148  src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
149  src[0+2*stride]=
150  src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
151  src[0+1*stride]=
152  src[1+2*stride]=
153  src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
154  src[0+0*stride]=
155  src[1+1*stride]=
156  src[2+2*stride]=
157  src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
158  src[1+0*stride]=
159  src[2+1*stride]=
160  src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
161  src[2+0*stride]=
162  src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
163  src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
164 }
165 
166 static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright,
167  ptrdiff_t _stride)
168 {
169  pixel *src = (pixel*)_src;
170  const pixel *topright = (const pixel*)_topright;
171  int stride = _stride>>(sizeof(pixel)-1);
174 // LOAD_LEFT_EDGE
175 
176  src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
177  src[1+0*stride]=
178  src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
179  src[2+0*stride]=
180  src[1+1*stride]=
181  src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
182  src[3+0*stride]=
183  src[2+1*stride]=
184  src[1+2*stride]=
185  src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
186  src[3+1*stride]=
187  src[2+2*stride]=
188  src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
189  src[3+2*stride]=
190  src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
191  src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
192 }
193 
194 static void FUNCC(pred4x4_vertical_right)(uint8_t *_src,
195  const uint8_t *topright,
196  ptrdiff_t _stride)
197 {
198  pixel *src = (pixel*)_src;
199  int stride = _stride>>(sizeof(pixel)-1);
200  const int lt= src[-1-1*stride];
203 
204  src[0+0*stride]=
205  src[1+2*stride]=(lt + t0 + 1)>>1;
206  src[1+0*stride]=
207  src[2+2*stride]=(t0 + t1 + 1)>>1;
208  src[2+0*stride]=
209  src[3+2*stride]=(t1 + t2 + 1)>>1;
210  src[3+0*stride]=(t2 + t3 + 1)>>1;
211  src[0+1*stride]=
212  src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
213  src[1+1*stride]=
214  src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
215  src[2+1*stride]=
216  src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
217  src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
218  src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
219  src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
220 }
221 
222 static void FUNCC(pred4x4_vertical_left)(uint8_t *_src,
223  const uint8_t *_topright,
224  ptrdiff_t _stride)
225 {
226  pixel *src = (pixel*)_src;
227  const pixel *topright = (const pixel*)_topright;
228  int stride = _stride>>(sizeof(pixel)-1);
231 
232  src[0+0*stride]=(t0 + t1 + 1)>>1;
233  src[1+0*stride]=
234  src[0+2*stride]=(t1 + t2 + 1)>>1;
235  src[2+0*stride]=
236  src[1+2*stride]=(t2 + t3 + 1)>>1;
237  src[3+0*stride]=
238  src[2+2*stride]=(t3 + t4+ 1)>>1;
239  src[3+2*stride]=(t4 + t5+ 1)>>1;
240  src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
241  src[1+1*stride]=
242  src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
243  src[2+1*stride]=
244  src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
245  src[3+1*stride]=
246  src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
247  src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
248 }
249 
250 static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
251  ptrdiff_t _stride)
252 {
253  pixel *src = (pixel*)_src;
254  int stride = _stride>>(sizeof(pixel)-1);
256 
257  src[0+0*stride]=(l0 + l1 + 1)>>1;
258  src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
259  src[2+0*stride]=
260  src[0+1*stride]=(l1 + l2 + 1)>>1;
261  src[3+0*stride]=
262  src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
263  src[2+1*stride]=
264  src[0+2*stride]=(l2 + l3 + 1)>>1;
265  src[3+1*stride]=
266  src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
267  src[3+2*stride]=
268  src[1+3*stride]=
269  src[0+3*stride]=
270  src[2+2*stride]=
271  src[2+3*stride]=
272  src[3+3*stride]=l3;
273 }
274 
275 static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src,
276  const uint8_t *topright,
277  ptrdiff_t _stride)
278 {
279  pixel *src = (pixel*)_src;
280  int stride = _stride>>(sizeof(pixel)-1);
281  const int lt= src[-1-1*stride];
284 
285  src[0+0*stride]=
286  src[2+1*stride]=(lt + l0 + 1)>>1;
287  src[1+0*stride]=
288  src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
289  src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
290  src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
291  src[0+1*stride]=
292  src[2+2*stride]=(l0 + l1 + 1)>>1;
293  src[1+1*stride]=
294  src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
295  src[0+2*stride]=
296  src[2+3*stride]=(l1 + l2+ 1)>>1;
297  src[1+2*stride]=
298  src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
299  src[0+3*stride]=(l2 + l3 + 1)>>1;
300  src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
301 }
302 
303 static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
304 {
305  int i;
306  pixel *src = (pixel*)_src;
307  int stride = _stride>>(sizeof(pixel)-1);
308  const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);
309  const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);
310  const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);
311  const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3);
312 
313  for(i=0; i<16; i++){
314  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
315  AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
316  AV_WN4PA(((pixel4*)(src+i*stride))+2, c);
317  AV_WN4PA(((pixel4*)(src+i*stride))+3, d);
318  }
319 }
320 
321 static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
322 {
323  int i;
324  pixel *src = (pixel*)_src;
325  stride >>= sizeof(pixel)-1;
326 
327  for(i=0; i<16; i++){
328  const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
329 
330  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
331  AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
332  AV_WN4PA(((pixel4*)(src+i*stride))+2, a);
333  AV_WN4PA(((pixel4*)(src+i*stride))+3, a);
334  }
335 }
336 
337 #define PREDICT_16x16_DC(v)\
338  for(i=0; i<16; i++){\
339  AV_WN4PA(src+ 0, v);\
340  AV_WN4PA(src+ 4, v);\
341  AV_WN4PA(src+ 8, v);\
342  AV_WN4PA(src+12, v);\
343  src += stride;\
344  }
345 
346 static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride)
347 {
348  int i, dc=0;
349  pixel *src = (pixel*)_src;
350  pixel4 dcsplat;
351  stride >>= sizeof(pixel)-1;
352 
353  for(i=0;i<16; i++){
354  dc+= src[-1+i*stride];
355  }
356 
357  for(i=0;i<16; i++){
358  dc+= src[i-stride];
359  }
360 
361  dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
362  PREDICT_16x16_DC(dcsplat);
363 }
364 
365 static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
366 {
367  int i, dc=0;
368  pixel *src = (pixel*)_src;
369  pixel4 dcsplat;
370  stride >>= sizeof(pixel)-1;
371 
372  for(i=0;i<16; i++){
373  dc+= src[-1+i*stride];
374  }
375 
376  dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
377  PREDICT_16x16_DC(dcsplat);
378 }
379 
380 static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
381 {
382  int i, dc=0;
383  pixel *src = (pixel*)_src;
384  pixel4 dcsplat;
385  stride >>= sizeof(pixel)-1;
386 
387  for(i=0;i<16; i++){
388  dc+= src[i-stride];
389  }
390 
391  dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
392  PREDICT_16x16_DC(dcsplat);
393 }
394 
395 #define PRED16x16_X(n, v) \
396 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
397 {\
398  int i;\
399  pixel *src = (pixel*)_src;\
400  stride >>= sizeof(pixel)-1;\
401  PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
402 }
403 
404 PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)
405 #if BIT_DEPTH == 8
406 PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)
407 PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)
408 #endif
409 
410 static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src,
411  ptrdiff_t _stride,
412  const int svq3,
413  const int rv40)
414 {
415  int i, j, k;
416  int a;
417  INIT_CLIP
419  int stride = _stride>>(sizeof(pixel)-1);
420  const pixel * const src0 = src +7-stride;
421  const pixel * src1 = src +8*stride-1;
422  const pixel * src2 = src1-2*stride; // == src+6*stride-1;
423  int H = src0[1] - src0[-1];
424  int V = src1[0] - src2[ 0];
425  for(k=2; k<=8; ++k) {
426  src1 += stride; src2 -= stride;
427  H += k*(src0[k] - src0[-k]);
428  V += k*(src1[0] - src2[ 0]);
429  }
430  if(svq3){
431  H = ( 5*(H/4) ) / 16;
432  V = ( 5*(V/4) ) / 16;
433 
434  /* required for 100% accuracy */
435  i = H; H = V; V = i;
436  }else if(rv40){
437  H = ( H + (H>>2) ) >> 4;
438  V = ( V + (V>>2) ) >> 4;
439  }else{
440  H = ( 5*H+32 ) >> 6;
441  V = ( 5*V+32 ) >> 6;
442  }
443 
444  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
445  for(j=16; j>0; --j) {
446  int b = a;
447  a += V;
448  for(i=-16; i<0; i+=4) {
449  src[16+i] = CLIP((b ) >> 5);
450  src[17+i] = CLIP((b+ H) >> 5);
451  src[18+i] = CLIP((b+2*H) >> 5);
452  src[19+i] = CLIP((b+3*H) >> 5);
453  b += 4*H;
454  }
455  src += stride;
456  }
457 }
458 
459 static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride)
460 {
461  FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
462 }
463 
464 static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride)
465 {
466  int i;
467  pixel *src = (pixel*)_src;
468  int stride = _stride>>(sizeof(pixel)-1);
469  const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
470  const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
471 
472  for(i=0; i<8; i++){
473  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
474  AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
475  }
476 }
477 
478 static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
479 {
480  int i;
481  pixel *src = (pixel*)_src;
482  int stride = _stride>>(sizeof(pixel)-1);
483  const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
484  const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
485 
486  for(i=0; i<16; i++){
487  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
488  AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
489  }
490 }
491 
492 static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride)
493 {
494  int i;
495  pixel *src = (pixel*)_src;
496  stride >>= sizeof(pixel)-1;
497 
498  for(i=0; i<8; i++){
499  const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
500  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
501  AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
502  }
503 }
504 
505 static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
506 {
507  int i;
508  pixel *src = (pixel*)_src;
509  stride >>= sizeof(pixel)-1;
510  for(i=0; i<16; i++){
511  const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
512  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
513  AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
514  }
515 }
516 
517 #define PRED8x8_X(n, v)\
518 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
519 {\
520  int i;\
521  const pixel4 a = PIXEL_SPLAT_X4(v);\
522  pixel *src = (pixel*)_src;\
523  stride >>= sizeof(pixel)-1;\
524  for(i=0; i<8; i++){\
525  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
526  AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
527  }\
528 }
529 
530 PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)
531 #if BIT_DEPTH == 8
532 PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)
533 PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)
534 #endif
535 
536 static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride)
537 {
540 }
541 
542 static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride)
543 {
544  int i;
545  int dc0, dc2;
546  pixel4 dc0splat, dc2splat;
547  pixel *src = (pixel*)_src;
548  stride >>= sizeof(pixel)-1;
549 
550  dc0=dc2=0;
551  for(i=0;i<4; i++){
552  dc0+= src[-1+i*stride];
553  dc2+= src[-1+(i+4)*stride];
554  }
555  dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
556  dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
557 
558  for(i=0; i<4; i++){
559  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
560  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat);
561  }
562  for(i=4; i<8; i++){
563  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
564  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat);
565  }
566 }
567 
568 static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
569 {
572 }
573 
574 static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride)
575 {
576  int i;
577  int dc0, dc1;
578  pixel4 dc0splat, dc1splat;
579  pixel *src = (pixel*)_src;
580  stride >>= sizeof(pixel)-1;
581 
582  dc0=dc1=0;
583  for(i=0;i<4; i++){
584  dc0+= src[i-stride];
585  dc1+= src[4+i-stride];
586  }
587  dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
588  dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
589 
590  for(i=0; i<4; i++){
591  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
592  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
593  }
594  for(i=4; i<8; i++){
595  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
596  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
597  }
598 }
599 
600 static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
601 {
602  int i;
603  int dc0, dc1;
604  pixel4 dc0splat, dc1splat;
605  pixel *src = (pixel*)_src;
606  stride >>= sizeof(pixel)-1;
607 
608  dc0=dc1=0;
609  for(i=0;i<4; i++){
610  dc0+= src[i-stride];
611  dc1+= src[4+i-stride];
612  }
613  dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
614  dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
615 
616  for(i=0; i<16; i++){
617  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
618  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
619  }
620 }
621 
622 static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride)
623 {
624  int i;
625  int dc0, dc1, dc2;
626  pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
627  pixel *src = (pixel*)_src;
628  stride >>= sizeof(pixel)-1;
629 
630  dc0=dc1=dc2=0;
631  for(i=0;i<4; i++){
632  dc0+= src[-1+i*stride] + src[i-stride];
633  dc1+= src[4+i-stride];
634  dc2+= src[-1+(i+4)*stride];
635  }
636  dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
637  dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
638  dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
639  dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
640 
641  for(i=0; i<4; i++){
642  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
643  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
644  }
645  for(i=4; i<8; i++){
646  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
647  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
648  }
649 }
650 
651 static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride)
652 {
653  int i;
654  int dc0, dc1, dc2, dc3, dc4;
655  pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
656  pixel *src = (pixel*)_src;
657  stride >>= sizeof(pixel)-1;
658 
659  dc0=dc1=dc2=dc3=dc4=0;
660  for(i=0;i<4; i++){
661  dc0+= src[-1+i*stride] + src[i-stride];
662  dc1+= src[4+i-stride];
663  dc2+= src[-1+(i+4)*stride];
664  dc3+= src[-1+(i+8)*stride];
665  dc4+= src[-1+(i+12)*stride];
666  }
667  dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
668  dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
669  dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
670  dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
671  dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
672  dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
673  dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
674  dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
675 
676  for(i=0; i<4; i++){
677  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
678  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
679  }
680  for(i=4; i<8; i++){
681  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
682  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
683  }
684  for(i=8; i<12; i++){
685  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
686  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
687  }
688  for(i=12; i<16; i++){
689  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
690  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
691  }
692 }
693 
694 //the following 4 function should not be optimized!
695 static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
696 {
699 }
700 
701 static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
702 {
705 }
706 
707 static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
708 {
711 }
712 
713 static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
714 {
717 }
718 
719 static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
720 {
723  FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
724 }
725 
726 static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
727 {
730  FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
731 }
732 
733 static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
734 {
737  FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
738 }
739 
740 static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
741 {
744  FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
745 }
746 
747 static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride)
748 {
749  int j, k;
750  int a;
751  INIT_CLIP
752  pixel *src = (pixel*)_src;
753  int stride = _stride>>(sizeof(pixel)-1);
754  const pixel * const src0 = src +3-stride;
755  const pixel * src1 = src +4*stride-1;
756  const pixel * src2 = src1-2*stride; // == src+2*stride-1;
757  int H = src0[1] - src0[-1];
758  int V = src1[0] - src2[ 0];
759  for(k=2; k<=4; ++k) {
760  src1 += stride; src2 -= stride;
761  H += k*(src0[k] - src0[-k]);
762  V += k*(src1[0] - src2[ 0]);
763  }
764  H = ( 17*H+16 ) >> 5;
765  V = ( 17*V+16 ) >> 5;
766 
767  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
768  for(j=8; j>0; --j) {
769  int b = a;
770  a += V;
771  src[0] = CLIP((b ) >> 5);
772  src[1] = CLIP((b+ H) >> 5);
773  src[2] = CLIP((b+2*H) >> 5);
774  src[3] = CLIP((b+3*H) >> 5);
775  src[4] = CLIP((b+4*H) >> 5);
776  src[5] = CLIP((b+5*H) >> 5);
777  src[6] = CLIP((b+6*H) >> 5);
778  src[7] = CLIP((b+7*H) >> 5);
779  src += stride;
780  }
781 }
782 
783 static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride)
784 {
785  int j, k;
786  int a;
787  INIT_CLIP
788  pixel *src = (pixel*)_src;
789  int stride = _stride>>(sizeof(pixel)-1);
790  const pixel * const src0 = src +3-stride;
791  const pixel * src1 = src +8*stride-1;
792  const pixel * src2 = src1-2*stride; // == src+6*stride-1;
793  int H = src0[1] - src0[-1];
794  int V = src1[0] - src2[ 0];
795 
796  for (k = 2; k <= 4; ++k) {
797  src1 += stride; src2 -= stride;
798  H += k*(src0[k] - src0[-k]);
799  V += k*(src1[0] - src2[ 0]);
800  }
801  for (; k <= 8; ++k) {
802  src1 += stride; src2 -= stride;
803  V += k*(src1[0] - src2[0]);
804  }
805 
806  H = (17*H+16) >> 5;
807  V = (5*V+32) >> 6;
808 
809  a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
810  for(j=16; j>0; --j) {
811  int b = a;
812  a += V;
813  src[0] = CLIP((b ) >> 5);
814  src[1] = CLIP((b+ H) >> 5);
815  src[2] = CLIP((b+2*H) >> 5);
816  src[3] = CLIP((b+3*H) >> 5);
817  src[4] = CLIP((b+4*H) >> 5);
818  src[5] = CLIP((b+5*H) >> 5);
819  src[6] = CLIP((b+6*H) >> 5);
820  src[7] = CLIP((b+7*H) >> 5);
821  src += stride;
822  }
823 }
824 
825 #define SRC(x,y) src[(x)+(y)*stride]
826 #define PL(y) \
827  const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
828 #define PREDICT_8x8_LOAD_LEFT \
829  const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
830  + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
831  PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
832  const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
833 
834 #define PT(x) \
835  const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
836 #define PREDICT_8x8_LOAD_TOP \
837  const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
838  + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
839  PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
840  const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
841  + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
842 
843 #define PTR(x) \
844  t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
845 #define PREDICT_8x8_LOAD_TOPRIGHT \
846  int t8, t9, t10, t11, t12, t13, t14, t15; \
847  if(has_topright) { \
848  PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
849  t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
850  } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
851 
852 #define PREDICT_8x8_LOAD_TOPLEFT \
853  const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
854 
855 #define PREDICT_8x8_DC(v) \
856  int y; \
857  for( y = 0; y < 8; y++ ) { \
858  AV_WN4PA(((pixel4*)src)+0, v); \
859  AV_WN4PA(((pixel4*)src)+1, v); \
860  src += stride; \
861  }
862 
863 static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft,
864  int has_topright, ptrdiff_t _stride)
865 {
866  pixel *src = (pixel*)_src;
867  int stride = _stride>>(sizeof(pixel)-1);
868 
870 }
871 static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft,
872  int has_topright, ptrdiff_t _stride)
873 {
874  pixel *src = (pixel*)_src;
875  int stride = _stride>>(sizeof(pixel)-1);
876 
878  const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
880 }
881 static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft,
882  int has_topright, ptrdiff_t _stride)
883 {
884  pixel *src = (pixel*)_src;
885  int stride = _stride>>(sizeof(pixel)-1);
886 
888  const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
890 }
891 static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft,
892  int has_topright, ptrdiff_t _stride)
893 {
894  pixel *src = (pixel*)_src;
895  int stride = _stride>>(sizeof(pixel)-1);
896 
899  const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
900  +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
902 }
903 static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft,
904  int has_topright, ptrdiff_t _stride)
905 {
906  pixel *src = (pixel*)_src;
907  int stride = _stride>>(sizeof(pixel)-1);
908  pixel4 a;
909 
911 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
912  AV_WN4PA(src+y*stride, a); \
913  AV_WN4PA(src+y*stride+4, a);
914  ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
915 #undef ROW
916 }
917 static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft,
918  int has_topright, ptrdiff_t _stride)
919 {
920  int y;
921  pixel *src = (pixel*)_src;
922  int stride = _stride>>(sizeof(pixel)-1);
923  pixel4 a, b;
924 
926  src[0] = t0;
927  src[1] = t1;
928  src[2] = t2;
929  src[3] = t3;
930  src[4] = t4;
931  src[5] = t5;
932  src[6] = t6;
933  src[7] = t7;
934  a = AV_RN4PA(((pixel4*)src)+0);
935  b = AV_RN4PA(((pixel4*)src)+1);
936  for( y = 1; y < 8; y++ ) {
937  AV_WN4PA(((pixel4*)(src+y*stride))+0, a);
938  AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
939  }
940 }
941 static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft,
942  int has_topright, ptrdiff_t _stride)
943 {
944  pixel *src = (pixel*)_src;
945  int stride = _stride>>(sizeof(pixel)-1);
948  SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
949  SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
950  SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
951  SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
952  SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
953  SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
954  SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
955  SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
956  SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
957  SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
958  SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
959  SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
960  SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
961  SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
962  SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
963 }
964 static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft,
965  int has_topright, ptrdiff_t _stride)
966 {
967  pixel *src = (pixel*)_src;
968  int stride = _stride>>(sizeof(pixel)-1);
972  SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
973  SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
974  SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
975  SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
976  SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
977  SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
978  SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
979  SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
980  SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
981  SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
982  SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
983  SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
984  SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
985  SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
986  SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
987 }
988 static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft,
989  int has_topright, ptrdiff_t _stride)
990 {
991  pixel *src = (pixel*)_src;
992  int stride = _stride>>(sizeof(pixel)-1);
996  SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
997  SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
998  SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
999  SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
1000  SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
1001  SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
1002  SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
1003  SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
1004  SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
1005  SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
1006  SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
1007  SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
1008  SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
1009  SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
1010  SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
1011  SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
1012  SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
1013  SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
1014  SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
1015  SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
1016  SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1017  SRC(7,0)= (t6 + t7 + 1) >> 1;
1018 }
1019 static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft,
1020  int has_topright, ptrdiff_t _stride)
1021 {
1022  pixel *src = (pixel*)_src;
1023  int stride = _stride>>(sizeof(pixel)-1);
1027  SRC(0,7)= (l6 + l7 + 1) >> 1;
1028  SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
1029  SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
1030  SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
1031  SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
1032  SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
1033  SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
1034  SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
1035  SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
1036  SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
1037  SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
1038  SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
1039  SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
1040  SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
1041  SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
1042  SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
1043  SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
1044  SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
1045  SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
1046  SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
1047  SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
1048  SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
1049 }
1050 static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft,
1051  int has_topright, ptrdiff_t _stride)
1052 {
1053  pixel *src = (pixel*)_src;
1054  int stride = _stride>>(sizeof(pixel)-1);
1057  SRC(0,0)= (t0 + t1 + 1) >> 1;
1058  SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
1059  SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
1060  SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
1061  SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
1062  SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
1063  SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
1064  SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
1065  SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
1066  SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
1067  SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
1068  SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1069  SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
1070  SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
1071  SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
1072  SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
1073  SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
1074  SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
1075  SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
1076  SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
1077  SRC(7,6)= (t10 + t11 + 1) >> 1;
1078  SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
1079 }
1080 static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
1081  int has_topright, ptrdiff_t _stride)
1082 {
1083  pixel *src = (pixel*)_src;
1084  int stride = _stride>>(sizeof(pixel)-1);
1086  SRC(0,0)= (l0 + l1 + 1) >> 1;
1087  SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
1088  SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
1089  SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
1090  SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
1091  SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
1092  SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
1093  SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
1094  SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
1095  SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
1096  SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
1097  SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
1098  SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
1099  SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
1100  SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
1101  SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
1102  SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
1103  SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
1104 }
1105 
1106 static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1107  int has_topright, ptrdiff_t _stride)
1108 {
1109  int i;
1110  pixel *src = (pixel*)_src;
1111  const dctcoef *block = (const dctcoef*)_block;
1112  pixel pix[8];
1113  int stride = _stride>>(sizeof(pixel)-1);
1115 
1116  pix[0] = t0;
1117  pix[1] = t1;
1118  pix[2] = t2;
1119  pix[3] = t3;
1120  pix[4] = t4;
1121  pix[5] = t5;
1122  pix[6] = t6;
1123  pix[7] = t7;
1124 
1125  for(i=0; i<8; i++){
1126  pixel v = pix[i];
1127  src[0*stride]= v += block[0];
1128  src[1*stride]= v += block[8];
1129  src[2*stride]= v += block[16];
1130  src[3*stride]= v += block[24];
1131  src[4*stride]= v += block[32];
1132  src[5*stride]= v += block[40];
1133  src[6*stride]= v += block[48];
1134  src[7*stride]= v + block[56];
1135  src++;
1136  block++;
1137  }
1138 
1139  memset(_block, 0, sizeof(dctcoef) * 64);
1140 }
1141 
1142 static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1143  int has_topright, ptrdiff_t _stride)
1144 {
1145  int i;
1146  pixel *src = (pixel*)_src;
1147  const dctcoef *block = (const dctcoef*)_block;
1148  pixel pix[8];
1149  int stride = _stride>>(sizeof(pixel)-1);
1151 
1152  pix[0] = l0;
1153  pix[1] = l1;
1154  pix[2] = l2;
1155  pix[3] = l3;
1156  pix[4] = l4;
1157  pix[5] = l5;
1158  pix[6] = l6;
1159  pix[7] = l7;
1160 
1161  for(i=0; i<8; i++){
1162  pixel v = pix[i];
1163  src[0]= v += block[0];
1164  src[1]= v += block[1];
1165  src[2]= v += block[2];
1166  src[3]= v += block[3];
1167  src[4]= v += block[4];
1168  src[5]= v += block[5];
1169  src[6]= v += block[6];
1170  src[7]= v + block[7];
1171  src+= stride;
1172  block+= 8;
1173  }
1174 
1175  memset(_block, 0, sizeof(dctcoef) * 64);
1176 }
1177 
1178 #undef PREDICT_8x8_LOAD_LEFT
1179 #undef PREDICT_8x8_LOAD_TOP
1180 #undef PREDICT_8x8_LOAD_TOPLEFT
1181 #undef PREDICT_8x8_LOAD_TOPRIGHT
1182 #undef PREDICT_8x8_DC
1183 #undef PTR
1184 #undef PT
1185 #undef PL
1186 #undef SRC
1187 
1188 static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, int16_t *_block,
1189  ptrdiff_t stride)
1190 {
1191  int i;
1192  pixel *pix = (pixel*)_pix;
1193  const dctcoef *block = (const dctcoef*)_block;
1194  stride >>= sizeof(pixel)-1;
1195  pix -= stride;
1196  for(i=0; i<4; i++){
1197  pixel v = pix[0];
1198  pix[1*stride]= v += block[0];
1199  pix[2*stride]= v += block[4];
1200  pix[3*stride]= v += block[8];
1201  pix[4*stride]= v + block[12];
1202  pix++;
1203  block++;
1204  }
1205 
1206  memset(_block, 0, sizeof(dctcoef) * 16);
1207 }
1208 
1209 static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, int16_t *_block,
1210  ptrdiff_t stride)
1211 {
1212  int i;
1213  pixel *pix = (pixel*)_pix;
1214  const dctcoef *block = (const dctcoef*)_block;
1215  stride >>= sizeof(pixel)-1;
1216  for(i=0; i<4; i++){
1217  pixel v = pix[-1];
1218  pix[0]= v += block[0];
1219  pix[1]= v += block[1];
1220  pix[2]= v += block[2];
1221  pix[3]= v + block[3];
1222  pix+= stride;
1223  block+= 4;
1224  }
1225 
1226  memset(_block, 0, sizeof(dctcoef) * 16);
1227 }
1228 
1229 static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, int16_t *_block,
1230  ptrdiff_t stride)
1231 {
1232  int i;
1233  pixel *pix = (pixel*)_pix;
1234  const dctcoef *block = (const dctcoef*)_block;
1235  stride >>= sizeof(pixel)-1;
1236  pix -= stride;
1237  for(i=0; i<8; i++){
1238  pixel v = pix[0];
1239  pix[1*stride]= v += block[0];
1240  pix[2*stride]= v += block[8];
1241  pix[3*stride]= v += block[16];
1242  pix[4*stride]= v += block[24];
1243  pix[5*stride]= v += block[32];
1244  pix[6*stride]= v += block[40];
1245  pix[7*stride]= v += block[48];
1246  pix[8*stride]= v + block[56];
1247  pix++;
1248  block++;
1249  }
1250 
1251  memset(_block, 0, sizeof(dctcoef) * 64);
1252 }
1253 
1254 static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, int16_t *_block,
1255  ptrdiff_t stride)
1256 {
1257  int i;
1258  pixel *pix = (pixel*)_pix;
1259  const dctcoef *block = (const dctcoef*)_block;
1260  stride >>= sizeof(pixel)-1;
1261  for(i=0; i<8; i++){
1262  pixel v = pix[-1];
1263  pix[0]= v += block[0];
1264  pix[1]= v += block[1];
1265  pix[2]= v += block[2];
1266  pix[3]= v += block[3];
1267  pix[4]= v += block[4];
1268  pix[5]= v += block[5];
1269  pix[6]= v += block[6];
1270  pix[7]= v + block[7];
1271  pix+= stride;
1272  block+= 8;
1273  }
1274 
1275  memset(_block, 0, sizeof(dctcoef) * 64);
1276 }
1277 
1278 static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,
1279  int16_t *block,
1280  ptrdiff_t stride)
1281 {
1282  int i;
1283  for(i=0; i<16; i++)
1284  FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1285 }
1286 
1287 static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix,
1288  const int *block_offset,
1289  int16_t *block,
1290  ptrdiff_t stride)
1291 {
1292  int i;
1293  for(i=0; i<16; i++)
1294  FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1295 }
1296 
1297 static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,
1298  int16_t *block, ptrdiff_t stride)
1299 {
1300  int i;
1301  for(i=0; i<4; i++)
1302  FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1303 }
1304 
1305 static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
1306  int16_t *block, ptrdiff_t stride)
1307 {
1308  int i;
1309  for(i=0; i<4; i++)
1310  FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1311  for(i=4; i<8; i++)
1312  FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1313 }
1314 
1315 static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,
1316  int16_t *block,
1317  ptrdiff_t stride)
1318 {
1319  int i;
1320  for(i=0; i<4; i++)
1321  FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1322 }
1323 
1324 static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix,
1325  const int *block_offset,
1326  int16_t *block, ptrdiff_t stride)
1327 {
1328  int i;
1329  for(i=0; i<4; i++)
1330  FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1331  for(i=4; i<8; i++)
1332  FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1333 }
pred8x8_vertical_add
static void FUNCC() pred8x8_vertical_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1297
pred8x8_mad_cow_dc_0l0
static void FUNC() pred8x8_mad_cow_dc_0l0(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:733
pred8x8_horizontal_add
static void FUNCC() pred8x8_horizontal_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1315
rv40
ptrdiff_t const int const int rv40
Definition: h264pred_template.c:414
pred8x8_plane
static void FUNCC() pred8x8_plane(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:747
pred16x16_horizontal_add
static void FUNCC() pred16x16_horizontal_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1287
pred8x8_mad_cow_dc_0lt
static void FUNC() pred8x8_mad_cow_dc_0lt(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:707
pred16x16_plane
static void FUNCC() pred16x16_plane(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:459
PREDICT_8x8_LOAD_LEFT
#define PREDICT_8x8_LOAD_LEFT
Definition: h264pred_template.c:828
pred8x8_top_dc
static void FUNCC() pred8x8_top_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:574
PREDICT_8x8_LOAD_TOPRIGHT
#define PREDICT_8x8_LOAD_TOPRIGHT
Definition: h264pred_template.c:845
pred8x16_dc
static void FUNCC() pred8x16_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:651
src1
const pixel * src1
Definition: h264pred_template.c:421
pred8x16_mad_cow_dc_0l0
static void FUNC() pred8x16_mad_cow_dc_0l0(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:740
pred8x16_horizontal_add
static void FUNCC() pred8x16_horizontal_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1324
pred8x8l_top_dc
static void FUNCC() pred8x8l_top_dc(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:881
b
#define b
Definition: input.c:41
_stride
ptrdiff_t _stride
Definition: h264pred_template.c:411
svq3
ptrdiff_t const int svq3
Definition: h264pred_template.c:412
LOAD_TOP_EDGE
#define LOAD_TOP_EDGE
Definition: h264pred_template.c:133
pred16x16_left_dc
static void FUNCC() pred16x16_left_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:365
pred8x8l_vertical_filter_add
static void FUNCC() pred8x8l_vertical_filter_add(uint8_t *_src, int16_t *_block, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1106
PREDICT_16x16_DC
#define PREDICT_16x16_DC(v)
Definition: h264pred_template.c:337
_src
uint8_t ptrdiff_t const uint8_t * _src
Definition: dsp.h:52
pixel4
#define pixel4
Definition: bit_depth_template.c:83
LOAD_LEFT_EDGE
#define LOAD_LEFT_EDGE
Definition: h264pred_template.c:127
dctcoef
#define dctcoef
Definition: bit_depth_template.c:84
pred4x4_left_dc
static void FUNCC() pred4x4_left_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:73
pred4x4_horizontal_up
static void FUNCC() pred4x4_horizontal_up(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:250
INIT_CLIP
#define INIT_CLIP
Definition: bit_depth_template.c:87
pred4x4_top_dc
static void FUNCC() pred4x4_top_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:87
pred4x4_down_right
static void FUNCC() pred4x4_down_right(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:139
pred8x8_vertical
static void FUNCC() pred8x8_vertical(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:464
t15
static int t15(InterplayACMContext *s, unsigned ind, unsigned col)
Definition: interplayacm.c:339
pred16x16_top_dc
static void FUNCC() pred16x16_top_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:380
pred8x8l_left_dc
static void FUNCC() pred8x8l_left_dc(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:871
intreadwrite.h
pred8x16_mad_cow_dc_l00
static void FUNC() pred8x16_mad_cow_dc_l00(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:726
a
int a
Definition: h264pred_template.c:416
AV_WN4PA
#define AV_WN4PA
Definition: bit_depth_template.c:95
pred4x4_horizontal
static void FUNCC() pred4x4_horizontal(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:47
pred8x8_mad_cow_dc_l0t
static void FUNC() pred8x8_mad_cow_dc_l0t(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:695
pred8x16_vertical_add
static void FUNCC() pred8x16_vertical_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1305
PIXEL_SPLAT_X4
#define PIXEL_SPLAT_X4(x)
Definition: bit_depth_template.c:96
PREDICT_8x8_DC
#define PREDICT_8x8_DC(v)
Definition: h264pred_template.c:855
pred4x4_horizontal_add
static void FUNCC() pred4x4_horizontal_add(uint8_t *_pix, int16_t *_block, ptrdiff_t stride)
Definition: h264pred_template.c:1209
pred8x16_mad_cow_dc_0lt
static void FUNC() pred8x16_mad_cow_dc_0lt(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:713
LOAD_TOP_RIGHT_EDGE
#define LOAD_TOP_RIGHT_EDGE
Definition: h264pred_template.c:115
pred8x8l_horizontal_add
static void FUNCC() pred8x8l_horizontal_add(uint8_t *_pix, int16_t *_block, ptrdiff_t stride)
Definition: h264pred_template.c:1254
NULL
#define NULL
Definition: coverity.c:32
pred8x8_dc
static void FUNCC() pred8x8_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:622
pixel
uint8_t pixel
Definition: tiny_ssim.c:41
pred8x8l_horizontal_up
static void FUNCC() pred8x8l_horizontal_up(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1080
pred8x8l_down_right
static void FUNCC() pred8x8l_down_right(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:964
pred16x16_horizontal
static void FUNCC() pred16x16_horizontal(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:321
pred4x4_dc
static void FUNCC() pred4x4_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:58
mathops.h
bit_depth_template.c
pred8x8_mad_cow_dc_l00
static void FUNC() pred8x8_mad_cow_dc_l00(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:719
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
pred4x4_horizontal_down
static void FUNCC() pred4x4_horizontal_down(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:275
FUNCC
#define FUNCC(a)
Definition: bit_depth_template.c:105
pred16x16_vertical_add
static void FUNCC() pred16x16_vertical_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1278
pred8x8_horizontal
static void FUNCC() pred8x8_horizontal(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:492
pred8x8_left_dc
static void FUNCC() pred8x8_left_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:542
pred8x8l_horizontal
static void FUNCC() pred8x8l_horizontal(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:903
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
PREDICT_8x8_LOAD_TOP
#define PREDICT_8x8_LOAD_TOP
Definition: h264pred_template.c:836
AV_RN4PA
#define AV_RN4PA
Definition: bit_depth_template.c:92
PRED8x8_X
#define PRED8x8_X(n, v)
Definition: h264pred_template.c:517
pred8x8l_down_left
static void FUNCC() pred8x8l_down_left(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:941
SRC
#define SRC(x, y)
Definition: h264pred_template.c:825
pred8x16_plane
static void FUNCC() pred8x16_plane(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:783
pred4x4_down_left
static void FUNCC() pred4x4_down_left(uint8_t *_src, const uint8_t *_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:166
ROW
#define ROW(y)
pred8x8l_dc
static void FUNCC() pred8x8l_dc(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:891
pred8x8l_vertical_right
static void FUNCC() pred8x8l_vertical_right(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:988
pred4x4_vertical_right
static void FUNCC() pred4x4_vertical_right(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:194
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
pred8x16_top_dc
static void FUNCC() pred8x16_top_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:600
src2
const pixel * src2
Definition: h264pred_template.c:422
pred8x16_left_dc
static void FUNCC() pred8x16_left_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:568
PRED16x16_X
#define PRED16x16_X(n, v)
Definition: h264pred_template.c:395
pred8x8_128_dc
FUNCC() pred8x8_128_dc(_src+8 *stride, stride)
pred16x16_vertical
static void FUNCC() pred16x16_vertical(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:303
pred4x4_128_dc
static void FUNCC() pred4x4_128_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:101
stride
int stride
Definition: h264pred_template.c:419
pred8x16_mad_cow_dc_l0t
static void FUNC() pred8x16_mad_cow_dc_l0t(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:701
FUNC
#define FUNC(a)
Definition: bit_depth_template.c:104
pred8x8l_vertical_add
static void FUNCC() pred8x8l_vertical_add(uint8_t *_pix, int16_t *_block, ptrdiff_t stride)
Definition: h264pred_template.c:1229
pred8x8l_horizontal_filter_add
static void FUNCC() pred8x8l_horizontal_filter_add(uint8_t *_src, int16_t *_block, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1142
pred8x8l_128_dc
static void FUNCC() pred8x8l_128_dc(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:863
pred4x4_vertical
static void FUNCC() pred4x4_vertical(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:34
pred16x16_dc
static void FUNCC() pred16x16_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:346
pred4x4_vertical_left
static void FUNCC() pred4x4_vertical_left(uint8_t *_src, const uint8_t *_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:222
src0
const pixel *const src0
Definition: h264pred_template.c:420
H
int H
Definition: h264pred_template.c:423
BIT_DEPTH
#define BIT_DEPTH
Definition: dsp_init.c:38
CLIP
@ CLIP
Definition: qdrw.c:37
pred8x8l_horizontal_down
static void FUNCC() pred8x8l_horizontal_down(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1019
pred8x8l_vertical_left
static void FUNCC() pred8x8l_vertical_left(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1050
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
pred8x8l_vertical
static void FUNCC() pred8x8l_vertical(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:917
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
pred8x16_vertical
static void FUNCC() pred8x16_vertical(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:478
pred8x16_horizontal
static void FUNCC() pred8x16_horizontal(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:505
PREDICT_8x8_LOAD_TOPLEFT
#define PREDICT_8x8_LOAD_TOPLEFT
Definition: h264pred_template.c:852
V
int V
Definition: h264pred_template.c:424
pred4x4_vertical_add
static void FUNCC() pred4x4_vertical_add(uint8_t *_pix, int16_t *_block, ptrdiff_t stride)
Definition: h264pred_template.c:1188