36 #define VP9_SYNCCODE 0x498342
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
150 #define MAX_SEGMENT 8
210 unsigned coef[4][2][2][6][6][3];
211 unsigned eob[4][2][2][6][6][2];
261 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
264 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
328 s->
cols = (w + 7) >> 3;
329 s->
rows = (h + 7) >> 3;
331 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
374 int chroma_blocks, chroma_eobs, bytesperpixel = s->
bytesperpixel;
381 chroma_blocks = 64 * 64 >> (s->
ss_h + s->
ss_v);
382 chroma_eobs = 16 * 16 >> (s->
ss_h + s->
ss_v);
388 16 * 16 + 2 * chroma_eobs) * sbs);
399 16 * 16 + 2 * chroma_eobs);
422 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
428 static const int inv_map_table[254] = {
429 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
430 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
431 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
432 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
433 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
434 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
435 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
436 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
437 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
438 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
439 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
440 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
441 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
442 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
443 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
444 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
445 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
446 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
494 s->
bpp = 8 + bits * 2;
503 res = pix_fmt_rgb[
bits];
511 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
534 res = pix_fmt_for_ss[
bits][1][1];
545 int c, i, j, k, l,
m,
n, w,
h, max, size2, res, sharp;
669 for (i = 0; i < 3; i++) {
675 "Ref pixfmt (%s) did not match current frame (%s)",
679 }
else if (refw == w && refh == h) {
682 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
684 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
688 s->
mvscale[i][0] = (refw << 14) / w;
689 s->
mvscale[i][1] = (refh << 14) / h;
719 for (i = 0; i < 4; i++)
722 for (i = 0; i < 2; i++)
740 for (i = 0; i < 7; i++)
744 for (i = 0; i < 3; i++)
753 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
761 for (i = 0; i < 8; i++) {
780 int qyac, qydc, quvac, quvdc, lflvl, sh;
790 qydc = av_clip_uintp2(qyac + s->
ydc_qdelta, 8);
793 qyac = av_clip_uintp2(qyac, 8);
812 av_clip_uintp2(lflvl + (s->
lf_delta.
ref[0] << sh), 6);
813 for (j = 1; j < 4; j++) {
829 av_log(ctx,
AV_LOG_ERROR,
"Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
835 for (max = 0; (s->
sb_cols >> max) >= 4; max++) ;
836 max =
FFMAX(0, max - 1);
871 if (size2 > size - (data2 - data)) {
901 for (i = 0; i < 2; i++)
904 for (i = 0; i < 2; i++)
905 for (j = 0; j < 2; j++)
909 for (i = 0; i < 2; i++)
910 for (j = 0; j < 3; j++)
918 for (i = 0; i < 4; i++) {
921 for (j = 0; j < 2; j++)
922 for (k = 0; k < 2; k++)
923 for (l = 0; l < 6; l++)
924 for (m = 0; m < 6; m++) {
927 if (m >= 3 && l == 0)
929 for (n = 0; n < 3; n++) {
939 for (j = 0; j < 2; j++)
940 for (k = 0; k < 2; k++)
941 for (l = 0; l < 6; l++)
942 for (m = 0; m < 6; m++) {
956 for (i = 0; i < 3; i++)
960 for (i = 0; i < 7; i++)
961 for (j = 0; j < 3; j++)
967 for (i = 0; i < 4; i++)
968 for (j = 0; j < 2; j++)
973 for (i = 0; i < 4; i++)
982 for (i = 0; i < 5; i++)
991 for (i = 0; i < 5; i++) {
1002 for (i = 0; i < 5; i++)
1008 for (i = 0; i < 4; i++)
1009 for (j = 0; j < 9; j++)
1014 for (i = 0; i < 4; i++)
1015 for (j = 0; j < 4; j++)
1016 for (k = 0; k < 3; k++)
1022 for (i = 0; i < 3; i++)
1026 for (i = 0; i < 2; i++) {
1030 for (j = 0; j < 10; j++)
1038 for (j = 0; j < 10; j++)
1044 for (i = 0; i < 2; i++) {
1045 for (j = 0; j < 2; j++)
1046 for (k = 0; k < 3; k++)
1051 for (j = 0; j < 3; j++)
1058 for (i = 0; i < 2; i++) {
1070 return (data2 - data) + size2;
1081 VP56mv *pmv,
int ref,
int z,
int idx,
int sb)
1083 static const int8_t mv_ref_blk_off[
N_BS_SIZES][8][2] = {
1084 [
BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1085 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1086 [
BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1087 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1088 [
BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1089 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1090 [
BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1091 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1092 [
BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1093 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1094 [
BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1095 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1096 [
BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1097 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1098 [
BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1099 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1100 [
BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1101 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1102 [
BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1103 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1104 [
BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1105 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1106 [
BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1107 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1108 [
BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1109 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1112 int row = s->
row, col = s->
col, row7 = s->
row7;
1113 const int8_t (*p)[2] = mv_ref_blk_off[b->
bs];
1114 #define INVALID_MV 0x80008000U
1118 #define RETURN_DIRECT_MV(mv) \
1120 uint32_t m = AV_RN32A(&mv); \
1124 } else if (mem == INVALID_MV) { \
1126 } else if (m != mem) { \
1133 if (sb == 2 || sb == 1) {
1135 }
else if (sb == 3) {
1141 #define RETURN_MV(mv) \
1146 av_assert2(idx == 1); \
1147 av_assert2(mem != INVALID_MV); \
1148 if (mem_sub8x8 == INVALID_MV) { \
1149 clamp_mv(&tmp, &mv, s); \
1150 m = AV_RN32A(&tmp); \
1155 mem_sub8x8 = AV_RN32A(&mv); \
1156 } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1157 clamp_mv(&tmp, &mv, s); \
1158 m = AV_RN32A(&tmp); \
1168 uint32_t m = AV_RN32A(&mv); \
1170 clamp_mv(pmv, &mv, s); \
1172 } else if (mem == INVALID_MV) { \
1174 } else if (m != mem) { \
1175 clamp_mv(pmv, &mv, s); \
1183 if (mv->
ref[0] == ref) {
1185 }
else if (mv->
ref[1] == ref) {
1191 if (mv->
ref[0] == ref) {
1193 }
else if (mv->
ref[1] == ref) {
1203 for (; i < 8; i++) {
1204 int c = p[i][0] + col,
r = p[i][1] + row;
1209 if (mv->
ref[0] == ref) {
1211 }
else if (mv->
ref[1] == ref) {
1223 if (mv->
ref[0] == ref) {
1225 }
else if (mv->
ref[1] == ref) {
1230 #define RETURN_SCALE_MV(mv, scale) \
1233 VP56mv mv_temp = { -mv.x, -mv.y }; \
1234 RETURN_MV(mv_temp); \
1241 for (i = 0; i < 8; i++) {
1242 int c = p[i][0] + col,
r = p[i][1] + row;
1247 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1250 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1264 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1267 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1279 #undef RETURN_SCALE_MV
1293 for (n = 0, m = 0; m <
c; m++) {
1319 n = (n << 3) | (bit << 1);
1332 return sign ? -(n + 1) : (n + 1);
1347 mode ==
NEWMV ? -1 : sb);
1349 if ((mode ==
NEWMV || sb == -1) &&
1364 if (mode ==
NEWMV) {
1378 mode ==
NEWMV ? -1 : sb);
1379 if ((mode ==
NEWMV || sb == -1) &&
1394 if (mode ==
NEWMV) {
1419 int v16 = v * 0x0101;
1427 uint32_t v32 = v * 0x01010101;
1436 uint64_t v64 = v * 0x0101010101010101ULL;
1442 uint32_t v32 = v * 0x01010101;
1457 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1460 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1468 int row = s->
row, col = s->
col, row7 = s->
row7;
1469 enum TxfmMode max_tx = max_tx_for_bl_bp[b->
bs];
1473 int vref, filter_id;
1490 for (
y = 0;
y < h4;
y++) {
1491 int idx_base = (
y + row) * 8 * s->
sb_cols + col;
1492 for (x = 0; x < w4; x++)
1493 pred =
FFMIN(pred, refsegmap[idx_base + x]);
1531 if (have_a && have_l) {
1555 }
else if (have_l) {
1603 l[0] = a[1] = b->
mode[1];
1605 l[0] = a[1] = b->
mode[1] = b->
mode[0];
1613 l[1] = a[1] = b->
mode[3];
1615 l[1] = a[1] = b->
mode[3] = b->
mode[2];
1619 l[1] = a[1] = b->
mode[3] = b->
mode[1];
1631 }
else if (b->
intra) {
1660 static const uint8_t size_group[10] = {
1661 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1663 int sz = size_group[b->
bs];
1674 static const uint8_t inter_mode_ctx_lut[14][14] = {
1675 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1676 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1677 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1678 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1679 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1680 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1681 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1682 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1683 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1684 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1685 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1686 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1687 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1688 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1723 }
else if (have_l) {
1754 if (refl == refa && refa == s->
varcompref[1]) {
1761 c = (refa == refl) ? 3 : 1;
1778 c = (refl == refa) ? 4 : 2;
1790 }
else if (have_l) {
1916 }
else if (have_l) {
1930 b->
ref[0] = 1 + bit;
1939 static const uint8_t off[10] = {
1940 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
2036 #define SPLAT_CTX(var, val, n) \
2038 case 1: var = val; break; \
2039 case 2: AV_WN16A(&var, val * 0x0101); break; \
2040 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2041 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2043 uint64_t v64 = val * 0x0101010101010101ULL; \
2044 AV_WN64A( &var, v64); \
2045 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2050 #define SPLAT_CTX(var, val, n) \
2052 case 1: var = val; break; \
2053 case 2: AV_WN16A(&var, val * 0x0101); break; \
2054 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2056 uint32_t v32 = val * 0x01010101; \
2057 AV_WN32A( &var, v32); \
2058 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2062 uint32_t v32 = val * 0x01010101; \
2063 AV_WN32A( &var, v32); \
2064 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2065 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2066 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2073 #define SET_CTXS(dir, off, n) \
2075 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2076 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2077 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2078 if (!s->keyframe && !s->intraonly) { \
2079 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2080 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2081 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2083 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2084 if (s->filtermode == FILTER_SWITCHABLE) { \
2085 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2090 case 1:
SET_CTXS(above, col, 1);
break;
2091 case 2:
SET_CTXS(above, col, 2);
break;
2092 case 4:
SET_CTXS(above, col, 4);
break;
2093 case 8:
SET_CTXS(above, col, 8);
break;
2096 case 1:
SET_CTXS(left, row7, 1);
break;
2097 case 2:
SET_CTXS(left, row7, 2);
break;
2098 case 4:
SET_CTXS(left, row7, 4);
break;
2099 case 8:
SET_CTXS(left, row7, 8);
break;
2119 for (n = 0; n < w4 * 2; n++) {
2123 for (n = 0; n < h4 * 2; n++) {
2131 for (
y = 0;
y < h4;
y++) {
2132 int x, o = (row +
y) * s->
sb_cols * 8 + col;
2136 for (x = 0; x < w4; x++) {
2140 }
else if (b->
comp) {
2141 for (x = 0; x < w4; x++) {
2142 mv[x].ref[0] = b->
ref[0];
2143 mv[x].ref[1] = b->
ref[1];
2148 for (x = 0; x < w4; x++) {
2149 mv[x].ref[0] = b->
ref[0];
2160 int is_tx32x32,
int is8bitsperpixel,
int bpp,
unsigned (*cnt)[6][3],
2161 unsigned (*eob)[6][2],
uint8_t (*p)[6][11],
2162 int nnz,
const int16_t *scan,
const int16_t (*nb)[2],
2163 const int16_t *band_counts,
const int16_t *qmul)
2165 int i = 0,
band = 0, band_left = band_counts[
band];
2179 cnt[
band][nnz][0]++;
2181 band_left = band_counts[++
band];
2183 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2185 if (++i == n_coeffs)
2192 cnt[
band][nnz][1]++;
2200 cnt[
band][nnz][2]++;
2203 cache[rc] = val = 2;
2237 if (!is8bitsperpixel) {
2262 #define STORE_COEF(c, i, v) do { \
2263 if (is8bitsperpixel) { \
2266 AV_WN32A(&c[i * 2], v); \
2270 band_left = band_counts[++
band];
2275 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2277 }
while (++i < n_coeffs);
2283 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2284 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2285 const int16_t (*nb)[2],
const int16_t *band_counts,
2286 const int16_t *qmul)
2289 nnz, scan, nb, band_counts, qmul);
2293 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2294 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2295 const int16_t (*nb)[2],
const int16_t *band_counts,
2296 const int16_t *qmul)
2299 nnz, scan, nb, band_counts, qmul);
2303 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2304 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2305 const int16_t (*nb)[2],
const int16_t *band_counts,
2306 const int16_t *qmul)
2309 nnz, scan, nb, band_counts, qmul);
2313 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2314 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2315 const int16_t (*nb)[2],
const int16_t *band_counts,
2316 const int16_t *qmul)
2319 nnz, scan, nb, band_counts, qmul);
2326 int row = s->
row, col = s->
col;
2331 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2332 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2333 int n, pl, x,
y, res;
2336 const int16_t *
const *yscans =
vp9_scans[tx];
2342 static const int16_t band_counts[4][8] = {
2343 { 1, 2, 3, 4, 3, 16 - 13 },
2344 { 1, 2, 3, 4, 11, 64 - 21 },
2345 { 1, 2, 3, 4, 11, 256 - 21 },
2346 { 1, 2, 3, 4, 11, 1024 - 21 },
2348 const int16_t *y_band_counts = band_counts[b->tx];
2349 const int16_t *uv_band_counts = band_counts[b->
uvtx];
2350 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2351 int total_coeff = 0;
2353 #define MERGE(la, end, step, rd) \
2354 for (n = 0; n < end; n += step) \
2355 la[n] = !!rd(&la[n])
2356 #define MERGE_CTX(step, rd) \
2358 MERGE(l, end_y, step, rd); \
2359 MERGE(a, end_x, step, rd); \
2362 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2363 for (n = 0, y = 0; y < end_y; y += step) { \
2364 for (x = 0; x < end_x; x += step, n += step * step) { \
2365 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2366 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2367 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2368 c, e, p, a[x] + l[y], yscans[txtp], \
2369 ynbs[txtp], y_band_counts, qmul[0]); \
2370 a[x] = l[y] = !!res; \
2371 total_coeff |= !!res; \
2373 AV_WN16A(&s->eob[n], res); \
2380 #define SPLAT(la, end, step, cond) \
2382 for (n = 1; n < end; n += step) \
2383 la[n] = la[n - 1]; \
2384 } else if (step == 4) { \
2386 for (n = 0; n < end; n += step) \
2387 AV_WN32A(&la[n], la[n] * 0x01010101); \
2389 for (n = 0; n < end; n += step) \
2390 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2394 if (HAVE_FAST_64BIT) { \
2395 for (n = 0; n < end; n += step) \
2396 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2398 for (n = 0; n < end; n += step) { \
2399 uint32_t v32 = la[n] * 0x01010101; \
2400 AV_WN32A(&la[n], v32); \
2401 AV_WN32A(&la[n + 4], v32); \
2405 for (n = 0; n < end; n += step) \
2406 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2409 #define SPLAT_CTX(step) \
2411 SPLAT(a, end_x, step, end_x == w4); \
2412 SPLAT(l, end_y, step, end_y == h4); \
2437 #define DECODE_UV_COEF_LOOP(step, v) \
2438 for (n = 0, y = 0; y < end_y; y += step) { \
2439 for (x = 0; x < end_x; x += step, n += step * step) { \
2440 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2441 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2442 16 * step * step, c, e, p, a[x] + l[y], \
2443 uvscan, uvnb, uv_band_counts, qmul[1]); \
2444 a[x] = l[y] = !!res; \
2445 total_coeff |= !!res; \
2447 AV_WN16A(&s->uveob[pl][n], res); \
2449 s->uveob[pl][n] = res; \
2461 for (pl = 0; pl < 2; pl++) {
2500 uint8_t *dst_edge, ptrdiff_t stride_edge,
2501 uint8_t *dst_inner, ptrdiff_t stride_inner,
2502 uint8_t *l,
int col,
int x,
int w,
2504 int p,
int ss_h,
int ss_v,
int bytesperpixel)
2506 int have_top = row > 0 || y > 0;
2508 int have_right = x < w - 1;
2510 static const uint8_t mode_conv[10][2 ][2 ] = {
2532 static const struct {
2541 [
DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2544 [
VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2545 [
HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2547 [
HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2548 [
TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2557 mode = mode_conv[
mode][have_left][have_top];
2558 if (edges[mode].needs_top) {
2560 int n_px_need = 4 << tx, n_px_have = (((s->
cols - col) << !ss_h) - x) * 4;
2561 int n_px_need_tr = 0;
2563 if (tx ==
TX_4X4 && edges[mode].needs_topright && have_right)
2570 top = !(row & 7) && !y ?
2572 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2574 topleft = !(row & 7) && !y ?
2576 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2577 &dst_inner[-stride_inner];
2581 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2582 (tx !=
TX_4X4 || !edges[mode].needs_topright || have_right) &&
2583 n_px_need + n_px_need_tr <= n_px_have) {
2587 if (n_px_need <= n_px_have) {
2588 memcpy(*a, top, n_px_need * bytesperpixel);
2590 #define memset_bpp(c, i1, v, i2, num) do { \
2591 if (bytesperpixel == 1) { \
2592 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2594 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2595 for (n = 0; n < (num); n++) { \
2596 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2600 memcpy(*a, top, n_px_have * bytesperpixel);
2601 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2604 #define memset_val(c, val, num) do { \
2605 if (bytesperpixel == 1) { \
2606 memset((c), (val), (num)); \
2609 for (n = 0; n < (num); n++) { \
2610 AV_WN16A(&(c)[n * 2], (val)); \
2614 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2616 if (edges[mode].needs_topleft) {
2617 if (have_left && have_top) {
2618 #define assign_bpp(c, i1, v, i2) do { \
2619 if (bytesperpixel == 1) { \
2620 (c)[(i1)] = (v)[(i2)]; \
2622 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2627 #define assign_val(c, i, v) do { \
2628 if (bytesperpixel == 1) { \
2631 AV_WN16A(&(c)[(i) * 2], (v)); \
2634 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2637 if (tx ==
TX_4X4 && edges[mode].needs_topright) {
2638 if (have_top && have_right &&
2639 n_px_need + n_px_need_tr <= n_px_have) {
2640 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2647 if (edges[mode].needs_left) {
2649 int n_px_need = 4 << tx, i, n_px_have = (((s->
rows - row) << !ss_v) -
y) * 4;
2650 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2651 ptrdiff_t
stride = x == 0 ? stride_edge : stride_inner;
2653 if (edges[mode].invert_left) {
2654 if (n_px_need <= n_px_have) {
2655 for (i = 0; i < n_px_need; i++)
2658 for (i = 0; i < n_px_have; i++)
2660 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2663 if (n_px_need <= n_px_have) {
2664 for (i = 0; i < n_px_need; i++)
2665 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2667 for (i = 0; i < n_px_have; i++)
2668 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2669 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2673 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2681 ptrdiff_t uv_off,
int bytesperpixel)
2685 int row = s->
row, col = s->
col;
2686 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2687 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
2688 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2689 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2691 int uvstep1d = 1 << b->
uvtx, p;
2696 for (
n = 0, y = 0; y < end_y; y += step1d) {
2697 uint8_t *ptr = dst, *ptr_r = dst_r;
2698 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2699 ptr_r += 4 * step1d * bytesperpixel,
n += step) {
2709 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2713 s->
block + 16 * n * bytesperpixel, eob);
2723 step = 1 << (b->
uvtx * 2);
2724 for (p = 0; p < 2; p++) {
2725 dst = s->
dst[1 + p];
2727 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2728 uint8_t *ptr = dst, *ptr_r = dst_r;
2729 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2730 ptr_r += 4 * uvstep1d * bytesperpixel,
n += step) {
2737 ptr, s->
uv_stride, l, col, x, w4, row, y,
2742 s->
uvblock[p] + 16 * n * bytesperpixel, eob);
2761 uint8_t *dst, ptrdiff_t dst_stride,
2762 const uint8_t *ref, ptrdiff_t ref_stride,
2764 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *in_mv,
2765 int px,
int py,
int pw,
int ph,
2766 int bw,
int bh,
int w,
int h,
int bytesperpixel,
2767 const uint16_t *scale,
const uint8_t *step)
2769 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2771 int refbw_m1, refbh_m1;
2775 mv.
x = av_clip(in_mv->
x, -(x + pw - px + 4) << 3, (s->
cols * 8 - x + px + 3) << 3);
2776 mv.
y = av_clip(in_mv->
y, -(y + ph - py + 4) << 3, (s->
rows * 8 - y + py + 3) << 3);
2785 ref += y * ref_stride + x * bytesperpixel;
2788 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2789 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2793 th = (y + refbh_m1 + 4 + 7) >> 6;
2795 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2797 ref - 3 * ref_stride - 3 * bytesperpixel,
2799 refbw_m1 + 8, refbh_m1 + 8,
2800 x - 3, y - 3, w, h);
2804 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2809 ptrdiff_t dst_stride,
2810 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2811 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2813 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *in_mv,
2814 int px,
int py,
int pw,
int ph,
2815 int bw,
int bh,
int w,
int h,
int bytesperpixel,
2816 const uint16_t *scale,
const uint8_t *step)
2819 int refbw_m1, refbh_m1;
2825 mv.
x = av_clip(in_mv->
x, -(x + pw - px + 4) << 4, (s->
cols * 4 - x + px + 3) << 4);
2828 mv.
x = av_clip(in_mv->
x, -(x + pw - px + 4) << 3, (s->
cols * 8 - x + px + 3) << 3);
2833 mv.
y = av_clip(in_mv->
y, -(y + ph - py + 4) << 4, (s->
rows * 4 - y + py + 3) << 4);
2836 mv.
y = av_clip(in_mv->
y, -(y + ph - py + 4) << 3, (s->
rows * 8 - y + py + 3) << 3);
2842 ref_u += y * src_stride_u + x * bytesperpixel;
2843 ref_v += y * src_stride_v + x * bytesperpixel;
2846 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2847 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2851 th = (y + refbh_m1 + 4 + 7) >> (6 - s->
ss_v);
2853 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2855 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2857 refbw_m1 + 8, refbh_m1 + 8,
2858 x - 3, y - 3, w, h);
2860 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2863 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2865 refbw_m1 + 8, refbh_m1 + 8,
2866 x - 3, y - 3, w, h);
2868 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2870 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2871 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2875 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2876 px, py, pw, ph, bw, bh, w, h, i) \
2877 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2878 mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2879 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2880 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2881 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2882 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2883 row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2884 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2886 #define FN(x) x##_scaled_8bpp
2887 #define BYTES_PER_PIXEL 1
2890 #undef BYTES_PER_PIXEL
2891 #define FN(x) x##_scaled_16bpp
2892 #define BYTES_PER_PIXEL 2
2895 #undef mc_chroma_dir
2897 #undef BYTES_PER_PIXEL
2901 uint8_t *dst, ptrdiff_t dst_stride,
2902 const uint8_t *ref, ptrdiff_t ref_stride,
2904 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2905 int bw,
int bh,
int w,
int h,
int bytesperpixel)
2907 int mx = mv->
x, my = mv->
y,
th;
2911 ref += y * ref_stride + x * bytesperpixel;
2917 th = (y + bh + 4 * !!my + 7) >> 6;
2919 if (x < !!mx * 3 || y < !!my * 3 ||
2920 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2922 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2924 bw + !!mx * 7, bh + !!my * 7,
2925 x - !!mx * 3, y - !!my * 3, w, h);
2929 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2934 ptrdiff_t dst_stride,
2935 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2936 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2938 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2939 int bw,
int bh,
int w,
int h,
int bytesperpixel)
2941 int mx = mv->
x << !s->
ss_h, my = mv->
y << !s->
ss_v,
th;
2945 ref_u += y * src_stride_u + x * bytesperpixel;
2946 ref_v += y * src_stride_v + x * bytesperpixel;
2952 th = (y + bh + 4 * !!my + 7) >> (6 - s->
ss_v);
2954 if (x < !!mx * 3 || y < !!my * 3 ||
2955 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2957 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2959 bw + !!mx * 7, bh + !!my * 7,
2960 x - !!mx * 3, y - !!my * 3, w, h);
2961 ref_u = s->
edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2962 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2965 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2967 bw + !!mx * 7, bh + !!my * 7,
2968 x - !!mx * 3, y - !!my * 3, w, h);
2969 ref_v = s->
edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2970 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2972 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2973 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2977 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2978 px, py, pw, ph, bw, bh, w, h, i) \
2979 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2980 mv, bw, bh, w, h, bytesperpixel)
2981 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2982 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2983 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2984 row, col, mv, bw, bh, w, h, bytesperpixel)
2986 #define FN(x) x##_8bpp
2987 #define BYTES_PER_PIXEL 1
2990 #undef BYTES_PER_PIXEL
2991 #define FN(x) x##_16bpp
2992 #define BYTES_PER_PIXEL 2
2994 #undef mc_luma_dir_dir
2995 #undef mc_chroma_dir_dir
2997 #undef BYTES_PER_PIXEL
3004 int row = s->
row, col = s->
col;
3007 if (bytesperpixel == 1) {
3008 inter_pred_scaled_8bpp(ctx);
3010 inter_pred_scaled_16bpp(ctx);
3013 if (bytesperpixel == 1) {
3014 inter_pred_8bpp(ctx);
3016 inter_pred_16bpp(ctx);
3022 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
3023 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
3024 int end_x =
FFMIN(2 * (s->
cols - col), w4);
3025 int end_y =
FFMIN(2 * (s->
rows - row), h4);
3027 int uvstep1d = 1 << b->
uvtx, p;
3031 for (
n = 0, y = 0; y < end_y; y += step1d) {
3033 for (x = 0; x < end_x; x += step1d,
3034 ptr += 4 * step1d * bytesperpixel,
n += step) {
3039 s->
block + 16 * n * bytesperpixel, eob);
3047 step = 1 << (b->
uvtx * 2);
3048 for (p = 0; p < 2; p++) {
3049 dst = s->
dst[p + 1];
3050 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
3052 for (x = 0; x < end_x; x += uvstep1d,
3053 ptr += 4 * uvstep1d * bytesperpixel,
n += step) {
3058 s->
uvblock[p] + 16 * n * bytesperpixel, eob);
3077 int row_and_7,
int col_and_7,
3078 int w,
int h,
int col_end,
int row_end,
3081 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3082 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3094 if (tx ==
TX_4X4 && (ss_v | ss_h)) {
3109 if (tx ==
TX_4X4 && !skip_inter) {
3110 int t = 1 << col_and_7, m_col = (t << w) - t,
y;
3112 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3114 for (
y = row_and_7;
y < h + row_and_7;
y++) {
3115 int col_mask_id = 2 - !(
y & wide_filter_row_mask[ss_v]);
3117 mask[0][
y][1] |= m_row_8;
3118 mask[0][
y][2] |= m_row_4;
3129 if ((ss_h & ss_v) && (col_end & 1) && (
y & 1)) {
3130 mask[1][
y][col_mask_id] |= (t << (w - 1)) - t;
3132 mask[1][
y][col_mask_id] |= m_col;
3135 mask[0][
y][3] |= m_col;
3137 if (ss_h && (col_end & 1))
3138 mask[1][
y][3] |= (t << (w - 1)) - t;
3140 mask[1][
y][3] |= m_col;
3144 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3147 int mask_id = (tx ==
TX_8X8);
3148 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3149 int l2 = tx + ss_h - 1, step1d;
3150 int m_row = m_col & masks[l2];
3154 if (ss_h && tx >
TX_8X8 && (w ^ (w - 1)) == 1) {
3155 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3156 int m_row_8 = m_row - m_row_16;
3158 for (y = row_and_7; y < h + row_and_7; y++) {
3159 mask[0][
y][0] |= m_row_16;
3160 mask[0][
y][1] |= m_row_8;
3163 for (y = row_and_7; y < h + row_and_7; y++)
3164 mask[0][y][mask_id] |= m_row;
3169 if (ss_v && tx >
TX_8X8 && (h ^ (h - 1)) == 1) {
3170 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3171 mask[1][y][0] |= m_col;
3172 if (y - row_and_7 == h - 1)
3173 mask[1][
y][1] |= m_col;
3175 for (y = row_and_7; y < h + row_and_7; y += step1d)
3176 mask[1][y][mask_id] |= m_col;
3178 }
else if (tx !=
TX_4X4) {
3181 mask_id = (tx ==
TX_8X8) || (h == ss_v);
3182 mask[1][row_and_7][mask_id] |= m_col;
3183 mask_id = (tx ==
TX_8X8) || (w == ss_h);
3184 for (y = row_and_7; y < h + row_and_7; y++)
3185 mask[0][y][mask_id] |= t;
3187 int t8 = t & wide_filter_col_mask[ss_h],
t4 = t -
t8;
3189 for (y = row_and_7; y < h + row_and_7; y++) {
3193 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3199 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3214 s->
min_mv.
x = -(128 + col * 64);
3215 s->
min_mv.
y = -(128 + row * 64);
3223 b->
uvtx = b->tx - ((s->
ss_h && w4 * 2 == (1 << b->tx)) ||
3224 (s->
ss_v && h4 * 2 == (1 << b->tx)));
3229 if (bytesperpixel == 1) {
3242 #define SPLAT_ZERO_CTX(v, n) \
3244 case 1: v = 0; break; \
3245 case 2: AV_ZERO16(&v); break; \
3246 case 4: AV_ZERO32(&v); break; \
3247 case 8: AV_ZERO64(&v); break; \
3248 case 16: AV_ZERO128(&v); break; \
3250 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3252 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3253 if (s->ss_##dir2) { \
3254 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3255 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3257 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3258 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3277 s->
block += w4 * h4 * 64 * bytesperpixel;
3280 s->
eob += 4 * w4 * h4;
3291 emu[0] = (col + w4) * 8 > f->
linesize[0] ||
3292 (row + h4) > s->
rows;
3293 emu[1] = (col + w4) * 4 > f->
linesize[1] ||
3294 (row + h4) > s->
rows;
3299 s->
dst[0] = f->
data[0] + yoff;
3307 s->
dst[1] = f->
data[1] + uvoff;
3308 s->
dst[2] = f->
data[2] + uvoff;
3327 for (
n = 0; o < w;
n++) {
3333 s->
tmp_y + o, 128,
h, 0, 0);
3334 o += bw * bytesperpixel;
3342 for (
n = s->
ss_h; o < w;
n++) {
3348 s->
tmp_uv[0] + o, 128,
h, 0, 0);
3350 s->
tmp_uv[1] + o, 128,
h, 0, 0);
3351 o += bw * bytesperpixel;
3364 mask_edges(lflvl->
mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3369 b->
uvtx, skip_inter);
3376 limit >>= (sharp + 3) >> 2;
3377 limit =
FFMIN(limit, 9 - sharp);
3379 limit =
FFMAX(limit, 1);
3388 s->
block += w4 * h4 * 64 * bytesperpixel;
3391 s->
eob += 4 * w4 * h4;
3398 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3406 ptrdiff_t hbs = 4 >> bl;
3413 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3414 }
else if (col + hbs < s->cols) {
3415 if (row + hbs < s->rows) {
3419 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3422 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3423 yoff += hbs * 8 * y_stride;
3424 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3425 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3428 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3429 yoff += hbs * 8 * bytesperpixel;
3430 uvoff += hbs * 8 * bytesperpixel >> s->
ss_h;
3431 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3434 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3436 yoff + 8 * hbs * bytesperpixel,
3437 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3438 yoff += hbs * 8 * y_stride;
3439 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3440 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3441 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3442 yoff + 8 * hbs * bytesperpixel,
3443 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3450 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3452 yoff + 8 * hbs * bytesperpixel,
3453 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3456 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3458 }
else if (row + hbs < s->rows) {
3461 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3462 yoff += hbs * 8 * y_stride;
3463 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3464 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3467 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3471 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3477 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3481 ptrdiff_t hbs = 4 >> bl;
3488 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3489 }
else if (s->
b->
bl == bl) {
3490 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3492 yoff += hbs * 8 * y_stride;
3493 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3494 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3496 yoff += hbs * 8 * bytesperpixel;
3497 uvoff += hbs * 8 * bytesperpixel >> s->
ss_h;
3498 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->
bl, b->
bp);
3502 if (col + hbs < s->cols) {
3503 if (row + hbs < s->rows) {
3504 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3505 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3506 yoff += hbs * 8 * y_stride;
3507 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3508 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3510 yoff + 8 * hbs * bytesperpixel,
3511 uvoff + (8 * hbs * bytesperpixel >> s->
ss_h), bl + 1);
3513 yoff += hbs * 8 * bytesperpixel;
3514 uvoff += hbs * 8 * bytesperpixel >> s->
ss_h;
3515 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3517 }
else if (row + hbs < s->rows) {
3518 yoff += hbs * 8 * y_stride;
3519 uvoff += hbs * 8 * uv_stride >> s->
ss_v;
3520 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3532 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3533 uint8_t *ptr = dst, *l = lvl, *hmask1 =
mask[
y], *hmask2 =
mask[y + 1 + ss_v];
3534 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3535 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3536 unsigned hm = hm1 | hm2 | hm13 | hm23;
3538 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3541 int L = *l,
H = L >> 4;
3544 if (hmask1[0] & x) {
3545 if (hmask2[0] & x) {
3551 }
else if (hm2 & x) {
3558 [0](ptr, ls,
E, I,
H);
3561 [0](ptr, ls, E, I, H);
3563 }
else if (hm2 & x) {
3564 int L = l[8 << ss_v],
H = L >> 4;
3568 [0](ptr + 8 * ls, ls, E, I, H);
3576 int L = *l,
H = L >> 4;
3588 }
else if (hm23 & x) {
3589 int L = l[8 << ss_v],
H = L >> 4;
3609 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3611 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3613 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3616 int L = *l,
H = L >> 4;
3620 if (vmask[0] & (x << (1 + ss_h))) {
3626 }
else if (vm & (x << (1 + ss_h))) {
3632 [!!(vmask[1] & (x << (1 + ss_h)))]
3633 [1](ptr, ls,
E, I,
H);
3636 [1](ptr, ls, E, I, H);
3638 }
else if (vm & (x << (1 + ss_h))) {
3639 int L = l[1 + ss_h],
H = L >> 4;
3643 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3648 int L = *l,
H = L >> 4;
3651 if (vm3 & (x << (1 + ss_h))) {
3660 }
else if (vm3 & (x << (1 + ss_h))) {
3661 int L = l[1 + ss_h],
H = L >> 4;
3678 int row,
int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3696 for (p = 0; p < 2; p++) {
3697 dst = f->
data[1 + p] + uvoff;
3705 int sb_start = ( idx *
n) >> log2_n;
3706 int sb_end = ((idx + 1) * n) >> log2_n;
3707 *start =
FFMIN(sb_start, n) << 3;
3708 *end =
FFMIN(sb_end, n) << 3;
3712 int max_count,
int update_factor)
3714 unsigned ct = ct0 + ct1, p2, p1;
3720 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3721 p2 = av_clip(p2, 1, 255);
3722 ct =
FFMIN(ct, max_count);
3723 update_factor =
FASTDIV(update_factor * ct, max_count);
3726 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3736 for (i = 0; i < 4; i++)
3737 for (j = 0; j < 2; j++)
3738 for (k = 0; k < 2; k++)
3739 for (l = 0; l < 6; l++)
3740 for (m = 0; m < 6; m++) {
3745 if (l == 0 && m >= 3)
3749 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3762 for (i = 0; i < 3; i++)
3766 for (i = 0; i < 4; i++)
3771 for (i = 0; i < 5; i++)
3777 for (i = 0; i < 5; i++)
3783 for (i = 0; i < 5; i++) {
3787 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3788 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3793 for (i = 0; i < 4; i++)
3794 for (j = 0; j < 4; j++) {
3798 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3799 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3805 for (i = 0; i < 2; i++) {
3811 adapt_prob(&p->
tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3819 for (i = 0; i < 4; i++) {
3823 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3829 for (i = 0; i < 7; i++) {
3833 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3834 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3843 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3844 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3849 for (i = 0; i < 2; i++) {
3851 unsigned *
c, (*c2)[2], sum;
3858 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3863 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3866 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3870 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3878 for (j = 0; j < 10; j++)
3879 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3881 for (j = 0; j < 2; j++) {
3884 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3885 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3890 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3891 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3903 for (i = 0; i < 4; i++) {
3907 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3927 for (i = 0; i < 10; i++) {
3931 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3963 for (i = 0; i < 3; i++) {
3968 for (i = 0; i < 8; i++) {
3990 int res, tile_row, tile_col, i, ref, row, col;
3992 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3998 }
else if (res == 0) {
4007 for (i = 0; i < 8; i++) {
4020 if (!retain_segmap_ref) {
4043 for (i = 0; i < 8; i++) {
4072 "Failed to allocate block buffers\n");
4078 for (i = 0; i < 4; i++) {
4079 for (j = 0; j < 2; j++)
4080 for (k = 0; k < 2; k++)
4081 for (l = 0; l < 6; l++)
4082 for (m = 0; m < 6; m++)
4119 if (tile_size > size) {
4134 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->
ss_v) {
4136 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4154 memcpy(&s->
c, &s->
c_b[tile_col],
sizeof(s->
c));
4158 col < s->tiling.tile_col_end;
4159 col += 8, yoff2 += 64 * bytesperpixel,
4160 uvoff2 += 64 * bytesperpixel >> s->
ss_h, lflvl_ptr++) {
4164 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
4176 memcpy(&s->
c_b[tile_col], &s->
c,
sizeof(s->
c));
4186 if (row + 8 < s->
rows) {
4188 f->
data[0] + yoff + 63 * ls_y,
4189 8 * s->
cols * bytesperpixel);
4191 f->
data[1] + uvoff + ((64 >> s->
ss_v) - 1) * ls_uv,
4192 8 * s->
cols * bytesperpixel >> s->
ss_h);
4194 f->
data[2] + uvoff + ((64 >> s->
ss_v) - 1) * ls_uv,
4195 8 * s->
cols * bytesperpixel >> s->
ss_h);
4202 lflvl_ptr = s->
lflvl;
4203 for (col = 0; col < s->
cols;
4204 col += 8, yoff2 += 64 * bytesperpixel,
4205 uvoff2 += 64 * bytesperpixel >> s->
ss_h, lflvl_ptr++) {
4221 }
while (s->
pass++ == 1);
4225 for (i = 0; i < 8; i++) {
4245 for (i = 0; i < 3; i++)
4247 for (i = 0; i < 8; i++)
4256 for (i = 0; i < 3; i++) {
4264 for (i = 0; i < 8; i++) {
4300 (!ssrc->intra_pred_data[0] || s->
cols != ssrc->cols || s->
rows != ssrc->rows)) {
4304 for (i = 0; i < 3; i++) {
4307 if (ssrc->frames[i].tf.f->data[0]) {
4312 for (i = 0; i < 8; i++) {
4315 if (ssrc->next_refs[i].f->data[0]) {
4323 s->
ss_v = ssrc->ss_v;
4324 s->
ss_h = ssrc->ss_h;
4332 if (ssrc->segmentation.enabled) {
also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B
const char const char void * val
#define AVERROR_INVALIDDATA
Invalid data found when processing input.