Go to the documentation of this file.
   34                        const uint8_t *
left, 
const uint8_t *_top)
 
   48                        const uint8_t *
left, 
const uint8_t *_top)
 
   57     for (y = 0; y < 8; y++) {
 
   65                          const uint8_t *
left, 
const uint8_t *_top)
 
   76     for (y = 0; y < 16; y++) {
 
   86                          const uint8_t *
left, 
const uint8_t *_top)
 
  101     for (y = 0; y < 32; y++) {
 
  115                       const uint8_t *_left, 
const uint8_t *top)
 
  128                       const uint8_t *_left, 
const uint8_t *top)
 
  135     for (y = 0; y < 8; y++) {
 
  145                         const uint8_t *_left, 
const uint8_t *top)
 
  152     for (y = 0; y < 16; y++) {
 
  164                         const uint8_t *_left, 
const uint8_t *top)
 
  171     for (y = 0; y < 32; y++) {
 
  189                      const uint8_t *_left, 
const uint8_t *_top)
 
  197     for (y = 0; y < 4; y++) {
 
  198         int l_m_tl = 
left[3 - y] - tl;
 
  209                      const uint8_t *_left, 
const uint8_t *_top)
 
  217     for (y = 0; y < 8; y++) {
 
  218         int l_m_tl = 
left[7 - y] - tl;
 
  233                        const uint8_t *_left, 
const uint8_t *_top)
 
  241     for (y = 0; y < 16; y++) {
 
  242         int l_m_tl = 
left[15 - y] - tl;
 
  265                        const uint8_t *_left, 
const uint8_t *_top)
 
  273     for (y = 0; y < 32; y++) {
 
  274         int l_m_tl = 
left[31 - y] - tl;
 
  315                      const uint8_t *_left, 
const uint8_t *_top)
 
  321                                 top[0] + top[1] + top[2] + top[3] + 4) >> 3);
 
  331                      const uint8_t *_left, 
const uint8_t *_top)
 
  338           left[6] + 
left[7] + top[0] + top[1] + top[2] + top[3] +
 
  339           top[4] + top[5] + top[6] + top[7] + 8) >> 4);
 
  343     for (y = 0; y < 8; y++) {
 
  351                        const uint8_t *_left, 
const uint8_t *_top)
 
  359           left[13] + 
left[14] + 
left[15] + top[0] + top[1] + top[2] + top[3] +
 
  360           top[4] + top[5] + top[6] + top[7] + top[8] + top[9] + top[10] +
 
  361           top[11] + top[12] + top[13] + top[14] + top[15] + 16) >> 5);
 
  365     for (y = 0; y < 16; y++) {
 
  375                        const uint8_t *_left, 
const uint8_t *_top)
 
  386           left[31] + top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
 
  387           top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + top[12] +
 
  388           top[13] + top[14] + top[15] + top[16] + top[17] + top[18] + top[19] +
 
  389           top[20] + top[21] + top[22] + top[23] + top[24] + top[25] + top[26] +
 
  390           top[27] + top[28] + top[29] + top[30] + top[31] + 32) >> 6);
 
  394     for (y = 0; y < 32; y++) {
 
  408                           const uint8_t *_left, 
const uint8_t *top)
 
  422                           const uint8_t *_left, 
const uint8_t *top)
 
  432     for (y = 0; y < 8; y++) {
 
  440                             const uint8_t *_left, 
const uint8_t *top)
 
  451     for (y = 0; y < 16; y++) {
 
  461                             const uint8_t *_left, 
const uint8_t *top)
 
  475     for (y = 0; y < 32; y++) {
 
  489                          const uint8_t *
left, 
const uint8_t *_top)
 
  503                          const uint8_t *
left, 
const uint8_t *_top)
 
  508         ((top[0] + top[1] + top[2] + top[3] +
 
  509           top[4] + top[5] + top[6] + top[7] + 4) >> 3);
 
  513     for (y = 0; y < 8; y++) {
 
  521                            const uint8_t *
left, 
const uint8_t *_top)
 
  526         ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
 
  527           top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
 
  528           top[12] + top[13] + top[14] + top[15] + 8) >> 4);
 
  532     for (y = 0; y < 16; y++) {
 
  542                            const uint8_t *
left, 
const uint8_t *_top)
 
  547         ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
 
  548           top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
 
  549           top[12] + top[13] + top[14] + top[15] + top[16] + top[17] +
 
  550           top[18] + top[19] + top[20] + top[21] + top[22] + top[23] +
 
  551           top[24] + top[25] + top[26] + top[27] + top[28] + top[29] +
 
  552           top[30] + top[31] + 16) >> 5);
 
  556     for (y = 0; y < 32; y++) {
 
  572                          const uint8_t *
left, 
const uint8_t *top)
 
  585                          const uint8_t *
left, 
const uint8_t *top)
 
  592     for (y = 0; y < 8; y++) {
 
  600                            const uint8_t *
left, 
const uint8_t *top)
 
  607     for (y = 0; y < 16; y++) {
 
  617                            const uint8_t *
left, 
const uint8_t *top)
 
  624     for (y = 0; y < 32; y++) {
 
  638                          const uint8_t *
left, 
const uint8_t *top)
 
  650                          const uint8_t *
left, 
const uint8_t *top)
 
  657     for (y = 0; y < 8; y++) {
 
  665                            const uint8_t *
left, 
const uint8_t *top)
 
  672     for (y = 0; y < 16; y++) {
 
  682                            const uint8_t *
left, 
const uint8_t *top)
 
  689     for (y = 0; y < 32; y++) {
 
  703                          const uint8_t *
left, 
const uint8_t *top)
 
  716                          const uint8_t *
left, 
const uint8_t *top)
 
  723     for (y = 0; y < 8; y++) {
 
  731                            const uint8_t *
left, 
const uint8_t *top)
 
  738     for (y = 0; y < 16; y++) {
 
  748                            const uint8_t *
left, 
const uint8_t *top)
 
  755     for (y = 0; y < 32; y++) {
 
  771 #define memset_bpc memset 
  775     for (n = 0; n < 
len; n++) {
 
  781 #define DST(x, y) dst[(x) + (y) * stride] 
  784                                 const uint8_t *
left, 
const uint8_t *_top)
 
  788     int a0 = top[0], 
a1 = top[1], 
a2 = top[2], 
a3 = top[3],
 
  789         a4 = top[4], 
a5 = top[5], a6 = top[6], a7 = top[7];
 
  796     DST(3,1) = 
DST(2,2) = 
DST(1,3) = (
a4 + 
a5 * 2 + a6 + 2) >> 2;
 
  797     DST(3,2) = 
DST(2,3) = (
a5 + a6 * 2 + a7 + 2) >> 2;
 
  801 #define def_diag_downleft(size) \ 
  802 static void diag_downleft_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ 
  803                                               const uint8_t *left, const uint8_t *_top) \ 
  805     pixel *dst = (pixel *) _dst; \ 
  806     const pixel *top = (const pixel *) _top; \ 
  810     stride /= sizeof(pixel); \ 
  811     for (i = 0; i < size - 2; i++) \ 
  812         v[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \ 
  813     v[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \ 
  815     for (j = 0; j < size; j++) { \ 
  816         memcpy(dst + j*stride, v + j, (size - 1 - j) * sizeof(pixel)); \ 
  817         memset_bpc(dst + j*stride + size - 1 - j, top[size - 1], j + 1); \ 
  825 static 
void diag_downright_4x4_c(uint8_t *_dst, ptrdiff_t 
stride,
 
  826                                  const uint8_t *_left, const uint8_t *_top)
 
  831     int tl = top[-1], 
a0 = top[0], 
a1 = top[1], 
a2 = top[2], 
a3 = top[3],
 
  835     DST(0,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
 
  836     DST(0,2) = 
DST(1,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
 
  837     DST(0,1) = 
DST(1,2) = 
DST(2,3) = (tl + l0 * 2 + l1 + 2) >> 2;
 
  838     DST(0,0) = 
DST(1,1) = 
DST(2,2) = 
DST(3,3) = (l0 + tl * 2 + 
a0 + 2) >> 2;
 
  839     DST(1,0) = 
DST(2,1) = 
DST(3,2) = (tl + 
a0 * 2 + 
a1 + 2) >> 2;
 
  844 #define def_diag_downright(size) \ 
  845 static void diag_downright_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ 
  846                                                const uint8_t *_left, const uint8_t *_top) \ 
  848     pixel *dst = (pixel *) _dst; \ 
  849     const pixel *top = (const pixel *) _top; \ 
  850     const pixel *left = (const pixel *) _left; \ 
  852     pixel v[size + size - 1]; \ 
  854     stride /= sizeof(pixel); \ 
  855     for (i = 0; i < size - 2; i++) { \ 
  856         v[i           ] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \ 
  857         v[size + 1 + i] = (top[i]  + top[i + 1]  * 2 + top[i + 2]  + 2) >> 2; \ 
  859     v[size - 2] = (left[size - 2] + left[size - 1] * 2 + top[-1] + 2) >> 2; \ 
  860     v[size - 1] = (left[size - 1] + top[-1] * 2 + top[ 0] + 2) >> 2; \ 
  861     v[size    ] = (top[-1] + top[0]  * 2 + top[ 1] + 2) >> 2; \ 
  863     for (j = 0; j < size; j++) \ 
  864         memcpy(dst + j*stride, v + size - 1 - j, size * sizeof(pixel)); \ 
  871 static 
void vert_right_4x4_c(uint8_t *_dst, ptrdiff_t 
stride,
 
  872                              const uint8_t *_left, const uint8_t *_top)
 
  877     int tl = top[-1], 
a0 = top[0], 
a1 = top[1], 
a2 = top[2], 
a3 = top[3],
 
  881     DST(0,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
 
  882     DST(0,2) = (tl + l0 * 2 + l1 + 2) >> 2;
 
  883     DST(0,0) = 
DST(1,2) = (tl + 
a0 + 1) >> 1;
 
  884     DST(0,1) = 
DST(1,3) = (l0 + tl * 2 + 
a0 + 2) >> 2;
 
  886     DST(1,1) = 
DST(2,3) = (tl + 
a0 * 2 + 
a1 + 2) >> 2;
 
  893 #define def_vert_right(size) \ 
  894 static void vert_right_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ 
  895                                            const uint8_t *_left, const uint8_t *_top) \ 
  897     pixel *dst = (pixel *) _dst; \ 
  898     const pixel *top = (const pixel *) _top; \ 
  899     const pixel *left = (const pixel *) _left; \ 
  901     pixel ve[size + size/2 - 1], vo[size + size/2 - 1]; \ 
  903     stride /= sizeof(pixel); \ 
  904     for (i = 0; i < size/2 - 2; i++) { \ 
  905         vo[i] = (left[i*2 + 3] + left[i*2 + 2] * 2 + left[i*2 + 1] + 2) >> 2; \ 
  906         ve[i] = (left[i*2 + 4] + left[i*2 + 3] * 2 + left[i*2 + 2] + 2) >> 2; \ 
  908     vo[size/2 - 2] = (left[size - 1] + left[size - 2] * 2 + left[size - 3] + 2) >> 2; \ 
  909     ve[size/2 - 2] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \ 
  911     ve[size/2 - 1] = (top[-1] + top[0] + 1) >> 1; \ 
  912     vo[size/2 - 1] = (left[size - 1] + top[-1] * 2 + top[0] + 2) >> 2; \ 
  913     for (i = 0; i < size - 1; i++) { \ 
  914         ve[size/2 + i] = (top[i] + top[i + 1] + 1) >> 1; \ 
  915         vo[size/2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \ 
  918     for (j = 0; j < size / 2; j++) { \ 
  919         memcpy(dst +  j*2     *stride, ve + size/2 - 1 - j, size * sizeof(pixel)); \ 
  920         memcpy(dst + (j*2 + 1)*stride, vo + size/2 - 1 - j, size * sizeof(pixel)); \ 
  928 static 
void hor_down_4x4_c(uint8_t *_dst, ptrdiff_t 
stride,
 
  929                            const uint8_t *_left, const uint8_t *_top)
 
  935         tl = top[-1], 
a0 = top[0], 
a1 = top[1], 
a2 = top[2];
 
  938     DST(2,0) = (tl + 
a0 * 2 + 
a1 + 2) >> 2;
 
  940     DST(0,0) = 
DST(2,1) = (tl + l0 + 1) >> 1;
 
  941     DST(1,0) = 
DST(3,1) = (
a0 + tl * 2 + l0 + 2) >> 2;
 
  942     DST(0,1) = 
DST(2,2) = (l0 + l1 + 1) >> 1;
 
  943     DST(1,1) = 
DST(3,2) = (tl + l0 * 2 + l1 + 2) >> 2;
 
  944     DST(0,2) = 
DST(2,3) = (l1 + l2 + 1) >> 1;
 
  945     DST(1,2) = 
DST(3,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
 
  946     DST(0,3) = (l2 + l3 + 1) >> 1;
 
  947     DST(1,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
 
  950 #define def_hor_down(size) \ 
  951 static void hor_down_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ 
  952                                          const uint8_t *_left, const uint8_t *_top) \ 
  954     pixel *dst = (pixel *) _dst; \ 
  955     const pixel *top = (const pixel *) _top; \ 
  956     const pixel *left = (const pixel *) _left; \ 
  958     pixel v[size * 3 - 2]; \ 
  960     stride /= sizeof(pixel); \ 
  961     for (i = 0; i < size - 2; i++) { \ 
  962         v[i*2       ] = (left[i + 1] + left[i + 0] + 1) >> 1; \ 
  963         v[i*2    + 1] = (left[i + 2] + left[i + 1] * 2 + left[i + 0] + 2) >> 2; \ 
  964         v[size*2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \ 
  966     v[size*2 - 2] = (top[-1] + left[size - 1] + 1) >> 1; \ 
  967     v[size*2 - 4] = (left[size - 1] + left[size - 2] + 1) >> 1; \ 
  968     v[size*2 - 1] = (top[0]  + top[-1] * 2 + left[size - 1] + 2) >> 2; \ 
  969     v[size*2 - 3] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \ 
  971     for (j = 0; j < size; j++) \ 
  972         memcpy(dst + j*stride, v + size*2 - 2 - j*2, size * sizeof(pixel)); \ 
  979 static 
void vert_left_4x4_c(uint8_t *_dst, ptrdiff_t 
stride,
 
  980                             const uint8_t *
left, const uint8_t *_top)
 
  984     int a0 = top[0], 
a1 = top[1], 
a2 = top[2], 
a3 = top[3],
 
  985         a4 = top[4], 
a5 = top[5], a6 = top[6];
 
  997     DST(3,3) = (
a4 + 
a5 * 2 + a6 + 2) >> 2;
 
 1000 #define def_vert_left(size) \ 
 1001 static void vert_left_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ 
 1002                                           const uint8_t *left, const uint8_t *_top) \ 
 1004     pixel *dst = (pixel *) _dst; \ 
 1005     const pixel *top = (const pixel *) _top; \ 
 1007     pixel ve[size - 1], vo[size - 1]; \ 
 1009     stride /= sizeof(pixel); \ 
 1010     for (i = 0; i < size - 2; i++) { \ 
 1011         ve[i] = (top[i] + top[i + 1] + 1) >> 1; \ 
 1012         vo[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \ 
 1014     ve[size - 2] = (top[size - 2] + top[size - 1] + 1) >> 1; \ 
 1015     vo[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \ 
 1017     for (j = 0; j < size / 2; j++) { \ 
 1018         memcpy(dst +  j*2      * stride, ve + j, (size - j - 1) * sizeof(pixel)); \ 
 1019         memset_bpc(dst +  j*2      * stride + size - j - 1, top[size - 1], j + 1); \ 
 1020         memcpy(dst + (j*2 + 1) * stride, vo + j, (size - j - 1) * sizeof(pixel)); \ 
 1021         memset_bpc(dst + (j*2 + 1) * stride + size - j - 1, top[size - 1], j + 1); \ 
 1029 static 
void hor_up_4x4_c(uint8_t *_dst, ptrdiff_t 
stride,
 
 1030                          const uint8_t *_left, const uint8_t *top)
 
 1037     DST(0,0) = (l0 + l1 + 1) >> 1;
 
 1038     DST(1,0) = (l0 + l1 * 2 + l2 + 2) >> 2;
 
 1039     DST(0,1) = 
DST(2,0) = (l1 + l2 + 1) >> 1;
 
 1040     DST(1,1) = 
DST(3,0) = (l1 + l2 * 2 + l3 + 2) >> 2;
 
 1041     DST(0,2) = 
DST(2,1) = (l2 + l3 + 1) >> 1;
 
 1042     DST(1,2) = 
DST(3,1) = (l2 + l3 * 3 + 2) >> 2;
 
 1046 #define def_hor_up(size) \ 
 1047 static void hor_up_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ 
 1048                                        const uint8_t *_left, const uint8_t *top) \ 
 1050     pixel *dst = (pixel *) _dst; \ 
 1051     const pixel *left = (const pixel *) _left; \ 
 1053     pixel v[size*2 - 2]; \ 
 1055     stride /= sizeof(pixel); \ 
 1056     for (i = 0; i < size - 2; i++) { \ 
 1057         v[i*2    ] = (left[i] + left[i + 1] + 1) >> 1; \ 
 1058         v[i*2 + 1] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \ 
 1060     v[size*2 - 4] = (left[size - 2] + left[size - 1] + 1) >> 1; \ 
 1061     v[size*2 - 3] = (left[size - 2] + left[size - 1] * 3 + 2) >> 2; \ 
 1063     for (j = 0; j < size / 2; j++) \ 
 1064         memcpy(dst + j*stride, v + j*2, size * sizeof(pixel)); \ 
 1065     for (j = size / 2; j < size; j++) { \ 
 1066         memcpy(dst + j*stride, v + j*2, (size*2 - 2 - j*2) * sizeof(pixel)); \ 
 1067         memset_bpc(dst + j*stride + size*2 - 2 - j*2, left[size - 1], \ 
 1088 #define init_intra_pred_bd_aware(tx, sz) \ 
 1089     dsp->intra_pred[tx][TM_VP8_PRED]          = tm_##sz##_c; \ 
 1090     dsp->intra_pred[tx][DC_128_PRED]          = dc_128_##sz##_c; \ 
 1091     dsp->intra_pred[tx][DC_127_PRED]          = dc_127_##sz##_c; \ 
 1092     dsp->intra_pred[tx][DC_129_PRED]          = dc_129_##sz##_c 
 1095     ff_vp9dsp_intrapred_init_10(dsp);
 
 1096 #define init_intra_pred(tx, sz) \ 
 1097     init_intra_pred_bd_aware(tx, sz) 
 1099     #define init_intra_pred(tx, sz) \ 
 1100     dsp->intra_pred[tx][VERT_PRED]            = vert_##sz##_c; \ 
 1101     dsp->intra_pred[tx][HOR_PRED]             = hor_##sz##_c; \ 
 1102     dsp->intra_pred[tx][DC_PRED]              = dc_##sz##_c; \ 
 1103     dsp->intra_pred[tx][DIAG_DOWN_LEFT_PRED]  = diag_downleft_##sz##_c; \ 
 1104     dsp->intra_pred[tx][DIAG_DOWN_RIGHT_PRED] = diag_downright_##sz##_c; \ 
 1105     dsp->intra_pred[tx][VERT_RIGHT_PRED]      = vert_right_##sz##_c; \ 
 1106     dsp->intra_pred[tx][HOR_DOWN_PRED]        = hor_down_##sz##_c; \ 
 1107     dsp->intra_pred[tx][VERT_LEFT_PRED]       = vert_left_##sz##_c; \ 
 1108     dsp->intra_pred[tx][HOR_UP_PRED]          = hor_up_##sz##_c; \ 
 1109     dsp->intra_pred[tx][LEFT_DC_PRED]         = dc_left_##sz##_c; \ 
 1110     dsp->intra_pred[tx][TOP_DC_PRED]          = dc_top_##sz##_c; \ 
 1111     init_intra_pred_bd_aware(tx, sz) 
 1119 #undef init_intra_pred 
 1120 #undef init_intra_pred_bd_aware 
 1123 #define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly) \ 
 1124 static void type_a##_##type_b##_##sz##x##sz##_add_c(uint8_t *_dst, \ 
 1126                                                     int16_t *_block, int eob) \ 
 1129     pixel *dst = (pixel *) _dst; \ 
 1130     dctcoef *block = (dctcoef *) _block, tmp[sz * sz], out[sz]; \ 
 1132     stride /= sizeof(pixel); \ 
 1133     if (has_dconly && eob == 1) { \ 
 1134         const int t  = ((((dctint) block[0] * 11585 + (1 << 13)) >> 14) \ 
 1135                                             * 11585 + (1 << 13)) >> 14; \ 
 1137         for (i = 0; i < sz; i++) { \ 
 1138             for (j = 0; j < sz; j++) \ 
 1139                 dst[j * stride] = av_clip_pixel(dst[j * stride] + \ 
 1141                                                  (int)(t + (1U << (bits - 1))) >> bits : \ 
 1148     for (i = 0; i < sz; i++) \ 
 1149         type_a##sz##_1d(block + i, sz, tmp + i * sz, 0); \ 
 1150     memset(block, 0, sz * sz * sizeof(*block)); \ 
 1151     for (i = 0; i < sz; i++) { \ 
 1152         type_b##sz##_1d(tmp + i, sz, out, 1); \ 
 1153         for (j = 0; j < sz; j++) \ 
 1154             dst[j * stride] = av_clip_pixel(dst[j * stride] + \ 
 1156                                              (int)(out[j] + (1U << (bits - 1))) >> bits : \ 
 1162 #define itxfm_wrap(sz, bits) \ 
 1163 itxfm_wrapper(idct,  idct,  sz, bits, 1) \ 
 1164 itxfm_wrapper(iadst, idct,  sz, bits, 0) \ 
 1165 itxfm_wrapper(idct,  iadst, sz, bits, 0) \ 
 1166 itxfm_wrapper(iadst, iadst, sz, bits, 0) 
 1168 #define IN(x) ((dctint) in[(x) * stride]) 
 1175     t0 = ((
IN(0) + 
IN(2)) * 11585 + (1 << 13)) >> 14;
 
 1176     t1 = ((
IN(0) - 
IN(2)) * 11585 + (1 << 13)) >> 14;
 
 1177     t2 = (
IN(1) *  6270 - 
IN(3) * 15137 + (1 << 13)) >> 14;
 
 1178     t3 = (
IN(1) * 15137 + 
IN(3) *  6270 + (1 << 13)) >> 14;
 
 1191     t0 =  5283 * 
IN(0) + 15212 * 
IN(2) +  9929 * 
IN(3);
 
 1192     t1 =  9929 * 
IN(0) -  5283 * 
IN(2) - 15212 * 
IN(3);
 
 1193     t2 = 13377 * (
IN(0) - 
IN(2) + 
IN(3));
 
 1196     out[0] = (
t0 + 
t3      + (1 << 13)) >> 14;
 
 1197     out[1] = (
t1 + 
t3      + (1 << 13)) >> 14;
 
 1198     out[2] = (
t2           + (1 << 13)) >> 14;
 
 1199     out[3] = (
t0 + 
t1 - 
t3 + (1 << 13)) >> 14;
 
 1207     dctint t0, t0a, 
t1, t1a, 
t2, t2a, 
t3, t3a, 
t4, t4a, 
t5, t5a, 
t6, t6a, 
t7, t7a;
 
 1209     t0a = ((
IN(0) + 
IN(4)) * 11585 + (1 << 13)) >> 14;
 
 1210     t1a = ((
IN(0) - 
IN(4)) * 11585 + (1 << 13)) >> 14;
 
 1211     t2a = (
IN(2) *  6270 - 
IN(6) * 15137 + (1 << 13)) >> 14;
 
 1212     t3a = (
IN(2) * 15137 + 
IN(6) *  6270 + (1 << 13)) >> 14;
 
 1213     t4a = (
IN(1) *  3196 - 
IN(7) * 16069 + (1 << 13)) >> 14;
 
 1214     t5a = (
IN(5) * 13623 - 
IN(3) *  9102 + (1 << 13)) >> 14;
 
 1215     t6a = (
IN(5) *  9102 + 
IN(3) * 13623 + (1 << 13)) >> 14;
 
 1216     t7a = (
IN(1) * 16069 + 
IN(7) *  3196 + (1 << 13)) >> 14;
 
 1227     t5  = ((t6a - t5a) * 11585 + (1 << 13)) >> 14;
 
 1228     t6  = ((t6a + t5a) * 11585 + (1 << 13)) >> 14;
 
 1243     dctint t0, t0a, 
t1, t1a, 
t2, t2a, 
t3, t3a, 
t4, t4a, 
t5, t5a, 
t6, t6a, 
t7, t7a;
 
 1245     t0a = 16305 * 
IN(7) +  1606 * 
IN(0);
 
 1246     t1a =  1606 * 
IN(7) - 16305 * 
IN(0);
 
 1247     t2a = 14449 * 
IN(5) +  7723 * 
IN(2);
 
 1248     t3a =  7723 * 
IN(5) - 14449 * 
IN(2);
 
 1249     t4a = 10394 * 
IN(3) + 12665 * 
IN(4);
 
 1250     t5a = 12665 * 
IN(3) - 10394 * 
IN(4);
 
 1251     t6a =  4756 * 
IN(1) + 15679 * 
IN(6);
 
 1252     t7a = 15679 * 
IN(1) -  4756 * 
IN(6);
 
 1254     t0 = (t0a + t4a + (1 << 13)) >> 14;
 
 1255     t1 = (t1a + t5a + (1 << 13)) >> 14;
 
 1256     t2 = (t2a + t6a + (1 << 13)) >> 14;
 
 1257     t3 = (t3a + t7a + (1 << 13)) >> 14;
 
 1258     t4 = (t0a - t4a + (1 << 13)) >> 14;
 
 1259     t5 = (t1a - t5a + (1 << 13)) >> 14;
 
 1260     t6 = (t2a - t6a + (1 << 13)) >> 14;
 
 1261     t7 = (t3a - t7a + (1 << 13)) >> 14;
 
 1263     t4a = 15137
U * 
t4 +  6270
U * 
t5;
 
 1264     t5a =  6270
U * 
t4 - 15137
U * 
t5;
 
 1265     t6a = 15137
U * 
t7 -  6270
U * 
t6;
 
 1266     t7a =  6270
U * 
t7 + 15137
U * 
t6;
 
 1273     out[1] = -((
dctint)((1
U << 13) + t4a + t6a) >> 14);
 
 1274     out[6] =   (
dctint)((1
U << 13) + t5a + t7a) >> 14;
 
 1275     t6     =   (
dctint)((1
U << 13) + t4a - t6a) >> 14;
 
 1276     t7     =   (
dctint)((1
U << 13) + t5a - t7a) >> 14;
 
 1289     dctint t0, 
t1, 
t2, 
t3, 
t4, 
t5, 
t6, 
t7, 
t8, 
t9, 
t10, 
t11, 
t12, t13, t14, 
t15;
 
 1290     dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
 
 1291     dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
 
 1293     t0a  = (
dctint)((
IN(0) + 
IN(8)) * 11585
U + (1 << 13)) >> 14;
 
 1294     t1a  = (
dctint)((
IN(0) - 
IN(8)) * 11585
U + (1 << 13)) >> 14;
 
 1295     t2a  = (
dctint)(
IN(4)  *  6270
U - 
IN(12) * 15137
U + (1 << 13)) >> 14;
 
 1296     t3a  = (
dctint)(
IN(4)  * 15137
U + 
IN(12) *  6270
U + (1 << 13)) >> 14;
 
 1297     t4a  = (
dctint)(
IN(2)  *  3196
U - 
IN(14) * 16069
U + (1 << 13)) >> 14;
 
 1298     t7a  = (
dctint)(
IN(2)  * 16069
U + 
IN(14) *  3196
U + (1 << 13)) >> 14;
 
 1299     t5a  = (
dctint)(
IN(10) * 13623
U - 
IN(6)  *  9102
U + (1 << 13)) >> 14;
 
 1300     t6a  = (
dctint)(
IN(10) *  9102
U + 
IN(6)  * 13623
U + (1 << 13)) >> 14;
 
 1301     t8a  = (
dctint)(
IN(1)  *  1606
U - 
IN(15) * 16305
U + (1 << 13)) >> 14;
 
 1302     t15a = (
dctint)(
IN(1)  * 16305
U + 
IN(15) *  1606
U + (1 << 13)) >> 14;
 
 1303     t9a  = (
dctint)(
IN(9)  * 12665
U - 
IN(7)  * 10394
U + (1 << 13)) >> 14;
 
 1304     t14a = (
dctint)(
IN(9)  * 10394
U + 
IN(7)  * 12665
U + (1 << 13)) >> 14;
 
 1305     t10a = (
dctint)(
IN(5)  *  7723
U - 
IN(11) * 14449
U + (1 << 13)) >> 14;
 
 1306     t13a = (
dctint)(
IN(5)  * 14449
U + 
IN(11) *  7723
U + (1 << 13)) >> 14;
 
 1307     t11a = (
dctint)(
IN(13) * 15679
U - 
IN(3)  *  4756
U + (1 << 13)) >> 14;
 
 1308     t12a = (
dctint)(
IN(13) *  4756
U + 
IN(3)  * 15679
U + (1 << 13)) >> 14;
 
 1327     t5a  = (
dctint)((
t6 - 
t5) * 11585
U + (1 << 13)) >> 14;
 
 1328     t6a  = (
dctint)((
t6 + 
t5) * 11585
U + (1 << 13)) >> 14;
 
 1329     t9a  = (
dctint)(  t14 *  6270
U - 
t9  * 15137
U  + (1 << 13)) >> 14;
 
 1330     t14a = (
dctint)(  t14 * 15137
U + 
t9  *  6270
U  + (1 << 13)) >> 14;
 
 1331     t10a = (
dctint)(-(t13 * 15137
U + 
t10 *  6270
U) + (1 << 13)) >> 14;
 
 1332     t13a = (
dctint)(  t13 *  6270
U - 
t10 * 15137
U  + (1 << 13)) >> 14;
 
 1351     t10a = (
dctint)((t13  - 
t10)  * 11585
U + (1 << 13)) >> 14;
 
 1352     t13a = (
dctint)((t13  + 
t10)  * 11585
U + (1 << 13)) >> 14;
 
 1353     t11  = (
dctint)((t12a - t11a) * 11585
U + (1 << 13)) >> 14;
 
 1354     t12  = (
dctint)((t12a + t11a) * 11585
U + (1 << 13)) >> 14;
 
 1356     out[ 0] = t0a + t15a;
 
 1357     out[ 1] = t1a + t14;
 
 1358     out[ 2] = t2a + t13a;
 
 1361     out[ 5] = 
t5  + t10a;
 
 1366     out[10] = 
t5  - t10a;
 
 1369     out[13] = t2a - t13a;
 
 1370     out[14] = t1a - t14;
 
 1371     out[15] = t0a - t15a;
 
 1377     dctint t0, 
t1, 
t2, 
t3, 
t4, 
t5, 
t6, 
t7, 
t8, 
t9, 
t10, 
t11, 
t12, t13, t14, 
t15;
 
 1378     dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
 
 1379     dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
 
 1381     t0  = 
IN(15) * 16364
U + 
IN(0)  *   804
U;
 
 1382     t1  = 
IN(15) *   804
U - 
IN(0)  * 16364
U;
 
 1383     t2  = 
IN(13) * 15893
U + 
IN(2)  *  3981
U;
 
 1384     t3  = 
IN(13) *  3981
U - 
IN(2)  * 15893
U;
 
 1385     t4  = 
IN(11) * 14811
U + 
IN(4)  *  7005
U;
 
 1386     t5  = 
IN(11) *  7005
U - 
IN(4)  * 14811
U;
 
 1387     t6  = 
IN(9)  * 13160
U + 
IN(6)  *  9760
U;
 
 1388     t7  = 
IN(9)  *  9760
U - 
IN(6)  * 13160
U;
 
 1389     t8  = 
IN(7)  * 11003
U + 
IN(8)  * 12140
U;
 
 1390     t9  = 
IN(7)  * 12140
U - 
IN(8)  * 11003
U;
 
 1394     t13 = 
IN(3)  * 15426
U - 
IN(12) *  5520
U;
 
 1395     t14 = 
IN(1)  *  2404
U + 
IN(14) * 16207
U;
 
 1403     t5a  = (
dctint)((1
U << 13) + 
t5 + t13) >> 14;
 
 1404     t6a  = (
dctint)((1
U << 13) + 
t6 + t14) >> 14;
 
 1411     t13a = (
dctint)((1
U << 13) + 
t5 - t13) >> 14;
 
 1412     t14a = (
dctint)((1
U << 13) + 
t6 - t14) >> 14;
 
 1415     t8   = t8a  * 16069
U + t9a  *  3196
U;
 
 1416     t9   = t8a  *  3196
U - t9a  * 16069
U;
 
 1417     t10  = t10a *  9102
U + t11a * 13623
U;
 
 1418     t11  = t10a * 13623
U - t11a *  9102
U;
 
 1419     t12  = t13a * 16069
U - t12a *  3196
U;
 
 1420     t13  = t13a *  3196
U + t12a * 16069
U;
 
 1421     t14  = t15a *  9102
U - t14a * 13623
U;
 
 1422     t15  = t15a * 13623
U + t14a *  9102
U;
 
 1433     t9a  = (
dctint)((1
U << 13) + 
t9  + t13) >> 14;
 
 1434     t10a = (
dctint)((1
U << 13) + 
t10 + t14) >> 14;
 
 1437     t13a = (
dctint)((1
U << 13) + 
t9  - t13) >> 14;
 
 1438     t14a = (
dctint)((1
U << 13) + 
t10 - t14) >> 14;
 
 1441     t4a  = 
t4 * 15137
U + 
t5 *  6270
U;
 
 1442     t5a  = 
t4 *  6270
U - 
t5 * 15137
U;
 
 1443     t6a  = 
t7 * 15137
U - 
t6 *  6270
U;
 
 1444     t7a  = 
t7 *  6270
U + 
t6 * 15137
U;
 
 1445     t12  = t12a * 15137
U + t13a *  6270
U;
 
 1446     t13  = t12a *  6270
U - t13a * 15137
U;
 
 1447     t14  = t15a * 15137
U - t14a *  6270
U;
 
 1448     t15  = t15a *  6270
U + t14a * 15137
U;
 
 1454     out[ 3] = -((
dctint)((1
U << 13) + t4a + t6a) >> 14);
 
 1455     out[12] =   (
dctint)((1
U << 13) + t5a + t7a) >> 14;
 
 1456     t6      =   (
dctint)((1
U << 13) + t4a - t6a) >> 14;
 
 1457     t7      =   (
dctint)((1
U << 13) + t5a - t7a) >> 14;
 
 1458     out[ 1] = -(t8a + t10a);
 
 1459     out[14] =   t9a + t11a;
 
 1464     t14a    =   (
dctint)((1
U << 13) + 
t12 - t14) >> 14;
 
 1465     t15a    =   (
dctint)((1
U << 13) + t13 - 
t15) >> 14;
 
 1467     out[ 7] = (
dctint)(-(t2a  + t3a)  * 11585
U  + (1 << 13)) >> 14;
 
 1468     out[ 8] = (
dctint)( (t2a  - t3a)  * 11585
U  + (1 << 13)) >> 14;
 
 1473     out[ 5] = (
dctint)(-(t14a + t15a) * 11585
U  + (1 << 13)) >> 14;
 
 1474     out[10] = (
dctint)( (t14a - t15a) * 11585
U  + (1 << 13)) >> 14;
 
 1528     dctint t13 = t12a - t13a;
 
 1529     dctint t14 = t15a - t14a;
 
 1531     dctint t16 = t16a + t17a;
 
 1532     dctint t17 = t16a - t17a;
 
 1533     dctint t18 = t19a - t18a;
 
 1534     dctint t19 = t19a + t18a;
 
 1535     dctint t20 = t20a + t21a;
 
 1536     dctint t21 = t20a - t21a;
 
 1537     dctint t22 = t23a - t22a;
 
 1538     dctint t23 = t23a + t22a;
 
 1539     dctint t24 = t24a + t25a;
 
 1540     dctint t25 = t24a - t25a;
 
 1541     dctint t26 = t27a - t26a;
 
 1543     dctint t28 = t28a + t29a;
 
 1544     dctint t29 = t28a - t29a;
 
 1545     dctint t30 = t31a - t30a;
 
 1546     dctint t31 = t31a + t30a;
 
 1548     t5a  = (
dctint)((
t6 - 
t5) * 11585
U             + (1 << 13)) >> 14;
 
 1549     t6a  = (
dctint)((
t6 + 
t5) * 11585
U             + (1 << 13)) >> 14;
 
 1550     t9a  = (
dctint)(  t14 *  6270
U - 
t9  * 15137
U  + (1 << 13)) >> 14;
 
 1551     t14a = (
dctint)(  t14 * 15137
U + 
t9  *  6270
U  + (1 << 13)) >> 14;
 
 1552     t10a = (
dctint)(-(t13 * 15137
U + 
t10 *  6270
U) + (1 << 13)) >> 14;
 
 1553     t13a = (
dctint)(  t13 *  6270
U - 
t10 * 15137
U  + (1 << 13)) >> 14;
 
 1554     t17a = (
dctint)(  t30 *  3196
U - t17 * 16069
U  + (1 << 13)) >> 14;
 
 1555     t30a = (
dctint)(  t30 * 16069
U + t17 *  3196
U  + (1 << 13)) >> 14;
 
 1556     t18a = (
dctint)(-(t29 * 16069
U + t18 *  3196
U) + (1 << 13)) >> 14;
 
 1557     t29a = (
dctint)(  t29 *  3196
U - t18 * 16069
U  + (1 << 13)) >> 14;
 
 1558     t21a = (
dctint)(  t26 * 13623
U - t21 *  9102
U  + (1 << 13)) >> 14;
 
 1559     t26a = (
dctint)(  t26 *  9102
U + t21 * 13623
U  + (1 << 13)) >> 14;
 
 1560     t22a = (
dctint)(-(t25 *  9102
U + t22 * 13623
U) + (1 << 13)) >> 14;
 
 1561     t25a = (
dctint)(  t25 * 13623
U - t22 *  9102
U  + (1 << 13)) >> 14;
 
 1596     t10a = (
dctint)((t13  - 
t10)  * 11585
U           + (1 << 13)) >> 14;
 
 1597     t13a = (
dctint)((t13  + 
t10)  * 11585
U           + (1 << 13)) >> 14;
 
 1598     t11  = (
dctint)((t12a - t11a) * 11585
U           + (1 << 13)) >> 14;
 
 1599     t12  = (
dctint)((t12a + t11a) * 11585
U           + (1 << 13)) >> 14;
 
 1600     t18a = (
dctint)(  t29  *  6270
U - t18  * 15137
U  + (1 << 13)) >> 14;
 
 1601     t29a = (
dctint)(  t29  * 15137
U + t18  *  6270
U  + (1 << 13)) >> 14;
 
 1602     t19  = (
dctint)(  t28a *  6270
U - t19a * 15137
U  + (1 << 13)) >> 14;
 
 1603     t28  = (
dctint)(  t28a * 15137
U + t19a *  6270
U  + (1 << 13)) >> 14;
 
 1604     t20  = (
dctint)(-(t27a * 15137
U + t20a *  6270
U) + (1 << 13)) >> 14;
 
 1605     t27  = (
dctint)(  t27a *  6270
U - t20a * 15137
U  + (1 << 13)) >> 14;
 
 1606     t21a = (
dctint)(-(t26  * 15137
U + t21  *  6270
U) + (1 << 13)) >> 14;
 
 1607     t26a = (
dctint)(  t26  *  6270
U - t21  * 15137
U  + (1 << 13)) >> 14;
 
 1642     t20  = (
dctint)((t27a - t20a) * 11585
U + (1 << 13)) >> 14;
 
 1643     t27  = (
dctint)((t27a + t20a) * 11585
U + (1 << 13)) >> 14;
 
 1644     t21a = (
dctint)((t26  - t21 ) * 11585
U + (1 << 13)) >> 14;
 
 1645     t26a = (
dctint)((t26  + t21 ) * 11585
U + (1 << 13)) >> 14;
 
 1646     t22  = (
dctint)((t25a - t22a) * 11585
U + (1 << 13)) >> 14;
 
 1647     t25  = (
dctint)((t25a + t22a) * 11585
U + (1 << 13)) >> 14;
 
 1648     t23a = (
dctint)((t24  - t23 ) * 11585
U + (1 << 13)) >> 14;
 
 1649     t24a = (
dctint)((t24  + t23 ) * 11585
U + (1 << 13)) >> 14;
 
 1652     out[ 1] = 
t1   + t30a;
 
 1654     out[ 3] = 
t3   + t28a;
 
 1656     out[ 5] = t5a  + t26a;
 
 1657     out[ 6] = t6a  + t25;
 
 1658     out[ 7] = 
t7   + t24a;
 
 1659     out[ 8] = 
t8   + t23a;
 
 1660     out[ 9] = t9a  + t22;
 
 1662     out[11] = t11a + t20;
 
 1663     out[12] = t12a + t19a;
 
 1664     out[13] = t13  + t18;
 
 1665     out[14] = t14a + t17a;
 
 1668     out[17] = t14a - t17a;
 
 1669     out[18] = t13  - t18;
 
 1670     out[19] = t12a - t19a;
 
 1671     out[20] = t11a - t20;
 
 1673     out[22] = t9a  - t22;
 
 1674     out[23] = 
t8   - t23a;
 
 1675     out[24] = 
t7   - t24a;
 
 1676     out[25] = t6a  - t25;
 
 1677     out[26] = t5a  - t26a;
 
 1679     out[28] = 
t3   - t28a;
 
 1681     out[30] = 
t1   - t30a;
 
 1721 #undef itxfm_wrapper 
 1726 #define init_itxfm(tx, sz) \ 
 1727     dsp->itxfm_add[tx][DCT_DCT]   = idct_idct_##sz##_add_c; \ 
 1728     dsp->itxfm_add[tx][DCT_ADST]  = iadst_idct_##sz##_add_c; \ 
 1729     dsp->itxfm_add[tx][ADST_DCT]  = idct_iadst_##sz##_add_c; \ 
 1730     dsp->itxfm_add[tx][ADST_ADST] = iadst_iadst_##sz##_add_c 
 1732 #define init_idct(tx, nm) \ 
 1733     dsp->itxfm_add[tx][DCT_DCT]   = \ 
 1734     dsp->itxfm_add[tx][ADST_DCT]  = \ 
 1735     dsp->itxfm_add[tx][DCT_ADST]  = \ 
 1736     dsp->itxfm_add[tx][ADST_ADST] = nm##_add_c 
 1749                                          ptrdiff_t stridea, ptrdiff_t strideb,
 
 1757     for (
i = 0; 
i < 8; 
i++, dst += stridea) {
 
 1759         int p3 = dst[strideb * -4], p2 = dst[strideb * -3];
 
 1760         int p1 = dst[strideb * -2], p0 = dst[strideb * -1];
 
 1761         int q0 = dst[strideb * +0], 
q1 = dst[strideb * +1];
 
 1762         int q2 = dst[strideb * +2], q3 = dst[strideb * +3];
 
 1764         int fm = 
FFABS(p3 - p2) <= I && 
FFABS(p2 - p1) <= I &&
 
 1768         int flat8out, flat8in;
 
 1774             p7 = dst[strideb * -8];
 
 1775             p6 = dst[strideb * -7];
 
 1776             p5 = dst[strideb * -6];
 
 1777             p4 = dst[strideb * -5];
 
 1778             q4 = dst[strideb * +4];
 
 1779             q5 = dst[strideb * +5];
 
 1780             q6 = dst[strideb * +6];
 
 1781             q7 = dst[strideb * +7];
 
 1783             flat8out = 
FFABS(p7 - p0) <= 
F && 
FFABS(p6 - p0) <= 
F &&
 
 1790             flat8in = 
FFABS(p3 - p0) <= 
F && 
FFABS(p2 - p0) <= 
F &&
 
 1794         if (wd >= 16 && flat8out && flat8in) {
 
 1795             dst[strideb * -7] = (p7 + p7 + p7 + p7 + p7 + p7 + p7 + p6 * 2 +
 
 1796                                  p5 + p4 + p3 + p2 + p1 + p0 + 
q0 + 8) >> 4;
 
 1797             dst[strideb * -6] = (p7 + p7 + p7 + p7 + p7 + p7 + p6 + p5 * 2 +
 
 1798                                  p4 + p3 + p2 + p1 + p0 + 
q0 + 
q1 + 8) >> 4;
 
 1799             dst[strideb * -5] = (p7 + p7 + p7 + p7 + p7 + p6 + p5 + p4 * 2 +
 
 1800                                  p3 + p2 + p1 + p0 + 
q0 + 
q1 + q2 + 8) >> 4;
 
 1801             dst[strideb * -4] = (p7 + p7 + p7 + p7 + p6 + p5 + p4 + p3 * 2 +
 
 1802                                  p2 + p1 + p0 + 
q0 + 
q1 + q2 + q3 + 8) >> 4;
 
 1803             dst[strideb * -3] = (p7 + p7 + p7 + p6 + p5 + p4 + p3 + p2 * 2 +
 
 1804                                  p1 + p0 + 
q0 + 
q1 + q2 + q3 + q4 + 8) >> 4;
 
 1805             dst[strideb * -2] = (p7 + p7 + p6 + p5 + p4 + p3 + p2 + p1 * 2 +
 
 1806                                  p0 + 
q0 + 
q1 + q2 + q3 + q4 + q5 + 8) >> 4;
 
 1807             dst[strideb * -1] = (p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 +
 
 1808                                  q0 + 
q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
 
 1809             dst[strideb * +0] = (p6 + p5 + p4 + p3 + p2 + p1 + p0 + 
q0 * 2 +
 
 1810                                  q1 + q2 + q3 + q4 + q5 + q6 + q7 + 8) >> 4;
 
 1811             dst[strideb * +1] = (p5 + p4 + p3 + p2 + p1 + p0 + 
q0 + 
q1 * 2 +
 
 1812                                  q2 + q3 + q4 + q5 + q6 + q7 + q7 + 8) >> 4;
 
 1813             dst[strideb * +2] = (p4 + p3 + p2 + p1 + p0 + 
q0 + 
q1 + q2 * 2 +
 
 1814                                  q3 + q4 + q5 + q6 + q7 + q7 + q7 + 8) >> 4;
 
 1815             dst[strideb * +3] = (p3 + p2 + p1 + p0 + 
q0 + 
q1 + q2 + q3 * 2 +
 
 1816                                  q4 + q5 + q6 + q7 + q7 + q7 + q7 + 8) >> 4;
 
 1817             dst[strideb * +4] = (p2 + p1 + p0 + 
q0 + 
q1 + q2 + q3 + q4 * 2 +
 
 1818                                  q5 + q6 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
 
 1819             dst[strideb * +5] = (p1 + p0 + 
q0 + 
q1 + q2 + q3 + q4 + q5 * 2 +
 
 1820                                  q6 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
 
 1821             dst[strideb * +6] = (p0 + 
q0 + 
q1 + q2 + q3 + q4 + q5 + q6 * 2 +
 
 1822                                  q7 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
 
 1823         } 
else if (wd >= 8 && flat8in) {
 
 1824             dst[strideb * -3] = (p3 + p3 + p3 + 2 * p2 + p1 + p0 + 
q0 + 4) >> 3;
 
 1825             dst[strideb * -2] = (p3 + p3 + p2 + 2 * p1 + p0 + 
q0 + 
q1 + 4) >> 3;
 
 1826             dst[strideb * -1] = (p3 + p2 + p1 + 2 * p0 + 
q0 + 
q1 + q2 + 4) >> 3;
 
 1827             dst[strideb * +0] = (p2 + p1 + p0 + 2 * 
q0 + 
q1 + q2 + q3 + 4) >> 3;
 
 1828             dst[strideb * +1] = (p1 + p0 + 
q0 + 2 * 
q1 + q2 + q3 + q3 + 4) >> 3;
 
 1829             dst[strideb * +2] = (p0 + 
q0 + 
q1 + 2 * q2 + q3 + q3 + q3 + 4) >> 3;
 
 1859 #define lf_8_fn(dir, wd, stridea, strideb) \ 
 1860 static void loop_filter_##dir##_##wd##_8_c(uint8_t *_dst, \ 
 1862                                            int E, int I, int H) \ 
 1864     pixel *dst = (pixel *) _dst; \ 
 1865     stride /= sizeof(pixel); \ 
 1866     loop_filter(dst, E, I, H, stridea, strideb, wd); \ 
 1869 #define lf_8_fns(wd) \ 
 1870 lf_8_fn(h, wd, stride, 1) \ 
 1871 lf_8_fn(v, wd, 1, stride) 
 1880 #define lf_16_fn(dir, stridea) \ 
 1881 static void loop_filter_##dir##_16_16_c(uint8_t *dst, \ 
 1883                                         int E, int I, int H) \ 
 1885     loop_filter_##dir##_16_8_c(dst, stride, E, I, H); \ 
 1886     loop_filter_##dir##_16_8_c(dst + 8 * stridea, stride, E, I, H); \ 
 1894 #define lf_mix_fn(dir, wd1, wd2, stridea) \ 
 1895 static void loop_filter_##dir##_##wd1##wd2##_16_c(uint8_t *dst, \ 
 1897                                                   int E, int I, int H) \ 
 1899     loop_filter_##dir##_##wd1##_8_c(dst, stride, E & 0xff, I & 0xff, H & 0xff); \ 
 1900     loop_filter_##dir##_##wd2##_8_c(dst + 8 * stridea, stride, E >> 8, I >> 8, H >> 8); \ 
 1903 #define lf_mix_fns(wd1, wd2) \ 
 1904 lf_mix_fn(h, wd1, wd2, stride) \ 
 1905 lf_mix_fn(v, wd1, wd2, sizeof(pixel)) 
 1940                                     const uint8_t *
src, ptrdiff_t src_stride,
 
 1952                                    const uint8_t *_src, ptrdiff_t src_stride,
 
 1958     dst_stride /= 
sizeof(
pixel);
 
 1959     src_stride /= 
sizeof(
pixel);
 
 1963         for (x = 0; x < 
w; x += 4)
 
 1971 #define fpel_fn(type, sz) \ 
 1972 static void type##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 
 1973                          const uint8_t *src, ptrdiff_t src_stride, \ 
 1974                          int h, int mx, int my) \ 
 1976     type##_c(dst, dst_stride, src, src_stride, sz, h); \ 
 1979 #define copy_avg_fn(sz) \ 
 1994 #define FILTER_8TAP(src, x, F, stride) \ 
 1995     av_clip_pixel((F[0] * src[x + -3 * stride] + \ 
 1996                    F[1] * src[x + -2 * stride] + \ 
 1997                    F[2] * src[x + -1 * stride] + \ 
 1998                    F[3] * src[x + +0 * stride] + \ 
 1999                    F[4] * src[x + +1 * stride] + \ 
 2000                    F[5] * src[x + +2 * stride] + \ 
 2001                    F[6] * src[x + +3 * stride] + \ 
 2002                    F[7] * src[x + +4 * stride] + 64) >> 7) 
 2004 static av_always_inline void do_8tap_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
 
 2005                                           const uint8_t *_src, ptrdiff_t src_stride,
 
 2006                                           int w, 
int h, ptrdiff_t ds,
 
 2012     dst_stride /= 
sizeof(
pixel);
 
 2013     src_stride /= 
sizeof(
pixel);
 
 2017         for (x = 0; x < 
w; x++)
 
 2029 #define filter_8tap_1d_fn(opn, opa, dir, ds) \ 
 2030 static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 
 2031                                                 const uint8_t *src, ptrdiff_t src_stride, \ 
 2032                                                 int w, int h, const int16_t *filter) \ 
 2034     do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \ 
 2042 #undef filter_8tap_1d_fn 
 2045                                           const uint8_t *_src, ptrdiff_t src_stride,
 
 2046                                           int w, 
int h, 
const int16_t *filterx,
 
 2047                                           const int16_t *filtery, 
int avg)
 
 2054     dst_stride /= 
sizeof(
pixel);
 
 2055     src_stride /= 
sizeof(
pixel);
 
 2056     src -= src_stride * 3;
 
 2060         for (x = 0; x < 
w; x++)
 
 2067     tmp_ptr = 
tmp + 64 * 3;
 
 2071         for (x = 0; x < 
w; x++)
 
 2073                 dst[x] = (dst[x] + 
FILTER_8TAP(tmp_ptr, x, filtery, 64) + 1) >> 1;
 
 2083 #define filter_8tap_2d_fn(opn, opa) \ 
 2084 static av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ 
 2085                                            const uint8_t *src, ptrdiff_t src_stride, \ 
 2086                                            int w, int h, const int16_t *filterx, \ 
 2087                                            const int16_t *filtery) \ 
 2089     do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \ 
 2095 #undef filter_8tap_2d_fn 
 2097 #define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \ 
 2098 static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 
 2099                                               const uint8_t *src, ptrdiff_t src_stride, \ 
 2100                                               int h, int mx, int my) \ 
 2102     avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \ 
 2103                             ff_vp9_subpel_filters[type_idx][dir_m]); \ 
 2106 #define filter_fn_2d(sz, type, type_idx, avg) \ 
 2107 static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ 
 2108                                            const uint8_t *src, ptrdiff_t src_stride, \ 
 2109                                            int h, int mx, int my) \ 
 2111     avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \ 
 2112                        ff_vp9_subpel_filters[type_idx][mx], \ 
 2113                        ff_vp9_subpel_filters[type_idx][my]); \ 
 2118 #define FILTER_BILIN(src, x, mxy, stride) \ 
 2119     (src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4)) 
 2121 static av_always_inline void do_bilin_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
 
 2122                                            const uint8_t *_src, ptrdiff_t src_stride,
 
 2123                                            int w, 
int h, ptrdiff_t ds, 
int mxy, 
int avg)
 
 2128     dst_stride /= 
sizeof(
pixel);
 
 2129     src_stride /= 
sizeof(
pixel);
 
 2133         for (x = 0; x < 
w; x++)
 
 2145 #define bilin_1d_fn(opn, opa, dir, ds) \ 
 2146 static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 
 2147                                                  const uint8_t *src, ptrdiff_t src_stride, \ 
 2148                                                  int w, int h, int mxy) \ 
 2150     do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \ 
 2161                                            const uint8_t *_src, ptrdiff_t src_stride,
 
 2162                                            int w, 
int h, 
int mx, 
int my, 
int avg)
 
 2169     dst_stride /= 
sizeof(
pixel);
 
 2170     src_stride /= 
sizeof(
pixel);
 
 2174         for (x = 0; x < 
w; x++)
 
 2185         for (x = 0; x < 
w; x++)
 
 2187                 dst[x] = (dst[x] + 
FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
 
 2197 #define bilin_2d_fn(opn, opa) \ 
 2198 static av_noinline void opn##_bilin_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ 
 2199                                             const uint8_t *src, ptrdiff_t src_stride, \ 
 2200                                             int w, int h, int mx, int my) \ 
 2202     do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \ 
 2210 #define bilinf_fn_1d(sz, dir, dir_m, avg) \ 
 2211 static void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 
 2212                                       const uint8_t *src, ptrdiff_t src_stride, \ 
 2213                                       int h, int mx, int my) \ 
 2215     avg##_bilin_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, dir_m); \ 
 2218 #define bilinf_fn_2d(sz, avg) \ 
 2219 static void avg##_bilin_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ 
 2220                                    const uint8_t *src, ptrdiff_t src_stride, \ 
 2221                                    int h, int mx, int my) \ 
 2223     avg##_bilin_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, mx, my); \ 
 2228 #define bilinf_fn_1d(a, b, c, d) 
 2229 #define bilinf_fn_2d(a, b) 
 2233 #define filter_fn(sz, avg) \ 
 2234 filter_fn_1d(sz, h, mx, regular, FILTER_8TAP_REGULAR, avg) \ 
 2235 filter_fn_1d(sz, v, my, regular, FILTER_8TAP_REGULAR, avg) \ 
 2236 filter_fn_2d(sz,        regular, FILTER_8TAP_REGULAR, avg) \ 
 2237 filter_fn_1d(sz, h, mx, smooth,  FILTER_8TAP_SMOOTH,  avg) \ 
 2238 filter_fn_1d(sz, v, my, smooth,  FILTER_8TAP_SMOOTH,  avg) \ 
 2239 filter_fn_2d(sz,        smooth,  FILTER_8TAP_SMOOTH,  avg) \ 
 2240 filter_fn_1d(sz, h, mx, sharp,   FILTER_8TAP_SHARP,   avg) \ 
 2241 filter_fn_1d(sz, v, my, sharp,   FILTER_8TAP_SHARP,   avg) \ 
 2242 filter_fn_2d(sz,        sharp,   FILTER_8TAP_SHARP,   avg) \ 
 2243 bilinf_fn_1d(sz, h, mx,                               avg) \ 
 2244 bilinf_fn_1d(sz, v, my,                               avg) \ 
 2245 bilinf_fn_2d(sz,                                      avg) 
 2247 #define filter_fn_set(avg) \ 
 2248 filter_fn(64, avg) \ 
 2249 filter_fn(32, avg) \ 
 2250 filter_fn(16, avg) \ 
 2258 #undef filter_fn_set 
 2273     ff_vp9dsp_mc_init_10(dsp);
 
 2276 #define init_fpel(idx1, idx2, sz, type) \ 
 2277     dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = type##sz##_c; \ 
 2278     dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = type##sz##_c; \ 
 2279     dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = type##sz##_c; \ 
 2280     dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = type##sz##_c 
 2282 #define init_copy_avg(idx, sz) \ 
 2283     init_fpel(idx, 0, sz, copy); \ 
 2284     init_fpel(idx, 1, sz, avg) 
 2292 #undef init_copy_avg 
 2297 #define init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type) \ 
 2298     dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_c; \ 
 2299     dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_c; \ 
 2300     dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_c 
 2303 #define init_subpel1 init_subpel1_bd_aware 
 2305 #define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \ 
 2306     init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type); \ 
 2307     dsp->mc[idx1][FILTER_BILINEAR    ][idx2][idxh][idxv] = type##_bilin_##sz##dir##_c 
 2310 #define init_subpel2(idx, idxh, idxv, dir, type) \ 
 2311     init_subpel1(0, idx, idxh, idxv, 64, dir, type); \ 
 2312     init_subpel1(1, idx, idxh, idxv, 32, dir, type); \ 
 2313     init_subpel1(2, idx, idxh, idxv, 16, dir, type); \ 
 2314     init_subpel1(3, idx, idxh, idxv,  8, dir, type); \ 
 2315     init_subpel1(4, idx, idxh, idxv,  4, dir, type) 
 2317 #define init_subpel3(idx, type) \ 
 2318     init_subpel2(idx, 1, 1, hv, type); \ 
 2319     init_subpel2(idx, 0, 1, v, type); \ 
 2320     init_subpel2(idx, 1, 0, h, type) 
 2328 #undef init_subpel1_bd_aware 
 2332                                               const uint8_t *_src, ptrdiff_t src_stride,
 
 2333                                               int w, 
int h, 
int mx, 
int my,
 
 2334                                               int dx, 
int dy, 
int avg,
 
 2337     int tmp_h = (((
h - 1) * dy + my) >> 4) + 8;
 
 2342     dst_stride /= 
sizeof(
pixel);
 
 2343     src_stride /= 
sizeof(
pixel);
 
 2344     src -= src_stride * 3;
 
 2347         int imx = mx, ioff = 0;
 
 2349         for (x = 0; x < 
w; x++) {
 
 2360     tmp_ptr = 
tmp + 64 * 3;
 
 2365         for (x = 0; x < 
w; x++)
 
 2373         tmp_ptr += (my >> 4) * 64;
 
 2379 #define scaled_filter_8tap_fn(opn, opa) \ 
 2380 static av_noinline void opn##_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, \ 
 2381                                             const uint8_t *src, ptrdiff_t src_stride, \ 
 2382                                             int w, int h, int mx, int my, int dx, int dy, \ 
 2383                                             const int16_t (*filters)[8]) \ 
 2385     do_scaled_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ 
 2392 #undef scaled_filter_8tap_fn 
 2396 #define scaled_filter_fn(sz, type, type_idx, avg) \ 
 2397 static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 
 2398                                            const uint8_t *src, ptrdiff_t src_stride, \ 
 2399                                            int h, int mx, int my, int dx, int dy) \ 
 2401     avg##_scaled_8tap_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy, \ 
 2402                         ff_vp9_subpel_filters[type_idx]); \ 
 2407 static av_always_inline void do_scaled_bilin_c(uint8_t *_dst, ptrdiff_t dst_stride,
 
 2408                                                const uint8_t *_src, ptrdiff_t src_stride,
 
 2409                                                int w, 
int h, 
int mx, 
int my,
 
 2410                                                int dx, 
int dy, 
int avg)
 
 2413     int tmp_h = (((
h - 1) * dy + my) >> 4) + 2;
 
 2417     dst_stride /= 
sizeof(
pixel);
 
 2418     src_stride /= 
sizeof(
pixel);
 
 2421         int imx = mx, ioff = 0;
 
 2423         for (x = 0; x < 
w; x++) {
 
 2438         for (x = 0; x < 
w; x++)
 
 2440                 dst[x] = (dst[x] + 
FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
 
 2446         tmp_ptr += (my >> 4) * 64;
 
 2452 #define scaled_bilin_fn(opn, opa) \ 
 2453 static av_noinline void opn##_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, \ 
 2454                                              const uint8_t *src, ptrdiff_t src_stride, \ 
 2455                                              int w, int h, int mx, int my, int dx, int dy) \ 
 2457     do_scaled_bilin_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, opa); \ 
 2463 #undef scaled_bilin_fn 
 2467 #define scaled_bilinf_fn(sz, avg) \ 
 2468 static void avg##_scaled_bilin_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 
 2469                                         const uint8_t *src, ptrdiff_t src_stride, \ 
 2470                                         int h, int mx, int my, int dx, int dy) \ 
 2472     avg##_scaled_bilin_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy); \ 
 2477 #define scaled_bilinf_fn(a, b) 
 2481 #define scaled_filter_fns(sz, avg) \ 
 2482 scaled_filter_fn(sz,        regular, FILTER_8TAP_REGULAR, avg) \ 
 2483 scaled_filter_fn(sz,        smooth,  FILTER_8TAP_SMOOTH,  avg) \ 
 2484 scaled_filter_fn(sz,        sharp,   FILTER_8TAP_SHARP,   avg) \ 
 2485 scaled_bilinf_fn(sz,                                      avg) 
 2487 #define scaled_filter_fn_set(avg) \ 
 2488 scaled_filter_fns(64, avg) \ 
 2489 scaled_filter_fns(32, avg) \ 
 2490 scaled_filter_fns(16, avg) \ 
 2491 scaled_filter_fns(8,  avg) \ 
 2492 scaled_filter_fns(4,  avg) 
 2497 #undef scaled_filter_fns 
 2498 #undef scaled_filter_fn_set 
 2499 #undef scaled_filter_fn 
 2500 #undef scaled_bilinf_fn 
 2510 #define init_scaled_bd_aware(idx1, idx2, sz, type) \ 
 2511     dsp->smc[idx1][FILTER_8TAP_SMOOTH ][idx2] = type##_scaled_smooth_##sz##_c; \ 
 2512     dsp->smc[idx1][FILTER_8TAP_REGULAR][idx2] = type##_scaled_regular_##sz##_c; \ 
 2513     dsp->smc[idx1][FILTER_8TAP_SHARP  ][idx2] = type##_scaled_sharp_##sz##_c 
 2516     ff_vp9dsp_scaled_mc_init_10(dsp);
 
 2517 #define init_scaled(a,b,c,d) init_scaled_bd_aware(a,b,c,d) 
 2519 #define init_scaled(idx1, idx2, sz, type) \ 
 2520     init_scaled_bd_aware(idx1, idx2, sz, type); \ 
 2521     dsp->smc[idx1][FILTER_BILINEAR    ][idx2] = type##_scaled_bilin_##sz##_c 
 2524 #define init_scaled_put_avg(idx, sz) \ 
 2525     init_scaled(idx, 0, sz, put); \ 
 2526     init_scaled(idx, 1, sz, avg) 
 2534 #undef init_scaled_put_avg 
 2536 #undef init_scaled_bd_aware 
 2541     FUNC(ff_vp9dsp_intrapred_init)(dsp);
 
 2542     vp9dsp_itxfm_init(dsp);
 
 2543     vp9dsp_loopfilter_init(dsp);
 
 2544     FUNC(ff_vp9dsp_mc_init)(dsp);
 
 2545     FUNC(ff_vp9dsp_scaled_mc_init)(dsp);
 
  
static const uint8_t q1[256]
#define FILTER_8TAP(src, x, F, stride)
#define init_intra_pred(tx, sz)
static void dc_top_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
static av_always_inline void idct4_1d(const dctcoef *in, ptrdiff_t stride, dctcoef *out, int pass)
#define init_idct(tx, nm)
#define lf_16_fn(dir, stridea)
#define init_copy_avg(idx, sz)
void(* loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
#define lf_mix_fns(wd1, wd2)
static void hor_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void tm_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
#define init_itxfm(tx, sz)
#define def_diag_downleft(size)
static void memset_bpc(uint16_t *dst, int val, int len)
static void diag_downleft_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
void(* loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
static av_always_inline void loop_filter(pixel *dst, int E, int I, int H, ptrdiff_t stridea, ptrdiff_t strideb, int wd)
static void dc_left_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static double val(void *priv, double ch)
static av_always_inline void iadst8_1d(const dctcoef *in, ptrdiff_t stride, dctcoef *out, int pass)
static av_always_inline void do_bilin_2d_c(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride, int w, int h, int mx, int my, int avg)
static void idct(int16_t block[64])
static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, int w, int h)
#define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly)
#define filters(fmt, inverse, clip, i, c)
static void tm_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
static int t15(InterplayACMContext *s, unsigned ind, unsigned col)
static void vert_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
static void dc_129_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_127_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
#define scaled_bilin_fn(opn, opa)
static void dc_128_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
#define PIXEL_SPLAT_X4(x)
static void tm_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
static const uint8_t q0[256]
static void vert_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static av_always_inline void do_scaled_8tap_c(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride, int w, int h, int mx, int my, int dx, int dy, int avg, const int16_t(*filters)[8])
static void tm_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
#define FILTER_BILIN(src, x, mxy, stride)
static av_always_inline void avg_c(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride, int w, int h)
static void dc_127_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
static void hor_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void dc_127_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_left_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void dc_128_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
#define filter_8tap_1d_fn(opn, opa, dir, ds)
#define init_scaled_put_avg(idx, sz)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
#define def_hor_down(size)
static void dc_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
static int t27(InterplayACMContext *s, unsigned ind, unsigned col)
static void dc_129_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void av_always_inline idct16_1d(float *dst, const float *src, int dst_stridea, int dst_strideb, int src_stridea, int src_strideb, int add)
av_cold void FUNC() ff_vp9dsp_init(VP9DSPContext *dsp)
static av_always_inline void iadst16_1d(const dctcoef *in, ptrdiff_t stride, dctcoef *out, int pass)
static void dc_top_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
static void dc_128_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static void dc_top_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
#define i(width, name, range_min, range_max)
static av_always_inline void iadst4_1d(const dctcoef *in, ptrdiff_t stride, dctcoef *out, int pass)
#define scaled_filter_fn_set(avg)
static void dc_129_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
#define def_diag_downright(size)
static void hor_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void dc_left_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void dc_128_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
static void dc_left_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static void vert_8x8_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
static void av_always_inline idct8_1d(float *dst, const float *src, int dst_stridea, int dst_strideb, int src_stridea, int src_strideb, int add)
#define bilin_1d_fn(opn, opa, dir, ds)
#define filter_8tap_2d_fn(opn, opa)
#define filter_fn_set(avg)
#define itxfm_wrap(sz, bits)
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
static void dc_top_4x4_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
#define scaled_filter_8tap_fn(opn, opa)
static void dc_127_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
#define init_subpel3(idx, type)
#define def_vert_left(size)
void(* loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
static void dc_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *_top)
#define def_vert_right(size)
static void hor_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *_left, const uint8_t *top)
static av_always_inline void do_8tap_2d_c(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride, int w, int h, const int16_t *filterx, const int16_t *filtery, int avg)
static void vert_16x16_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *_top)
#define bilin_2d_fn(opn, opa)
static void dc_129_32x32_c(uint8_t *_dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)