30 #define PROF_TEMP_OFFSET (MAX_PB_SIZE + 32)
36 *x_off -=
pps->subpic_x[subpic_idx] >>
sps->hshift[!is_luma];
37 *y_off -=
pps->subpic_y[subpic_idx] >>
sps->vshift[!is_luma];
43 *pic_width =
pps->subpic_width[subpic_idx] >>
sps->hshift[!is_luma];
44 *pic_height =
pps->subpic_height[subpic_idx] >>
sps->vshift[!is_luma];
48 int x_off,
int y_off,
const int block_w,
const int block_h,
const int is_luma)
57 int pic_width, pic_height;
62 if (x_off < extra_before || y_off < extra_before ||
63 x_off >= pic_width - block_w - extra_after ||
64 y_off >= pic_height - block_h - extra_after) {
66 int offset = extra_before * *src_stride + (extra_before <<
fc->ps.sps->pixel_shift);
67 int buf_offset = extra_before * edge_emu_stride + (extra_before <<
fc->ps.sps->pixel_shift);
69 fc->vdsp.emulated_edge_mc(dst, *
src -
offset, edge_emu_stride, *src_stride,
70 block_w + extra, block_h + extra, x_off - extra_before, y_off - extra_before,
71 pic_width, pic_height);
73 *
src = dst + buf_offset;
74 *src_stride = edge_emu_stride;
81 int x_sb,
int y_sb,
int x_off,
int y_off,
const int block_w,
const int block_h,
const int is_luma)
90 int pic_width, pic_height;
96 if (x_off < extra_before || y_off < extra_before ||
97 x_off >= pic_width - block_w - extra_after ||
98 y_off >= pic_height - block_h - extra_after||
99 (x_off != x_sb || y_off != y_sb)) {
100 const int ps =
fc->ps.sps->pixel_shift;
102 const int offset = extra_before * *src_stride + (extra_before << ps);
103 const int buf_offset = extra_before * edge_emu_stride + (extra_before << ps);
105 const int start_x =
FFMIN(
FFMAX(x_sb - extra_before, 0), pic_width - 1);
106 const int start_y =
FFMIN(
FFMAX(y_sb - extra_before, 0), pic_height - 1);
107 const int width =
FFMAX(
FFMIN(pic_width, x_sb + block_w + extra_after) - start_x, 1);
108 const int height =
FFMAX(
FFMIN(pic_height, y_sb + block_h + extra_after) - start_y, 1);
110 fc->vdsp.emulated_edge_mc(dst, *
src -
offset, edge_emu_stride, *src_stride, block_w + extra, block_h + extra,
111 x_off - start_x - extra_before, y_off - start_y - extra_before,
width,
height);
113 *
src = dst + buf_offset;
114 *src_stride = edge_emu_stride;
119 int x_off,
int y_off,
const int block_w,
const int block_h)
125 int pic_width, pic_height;
140 *
src = dst + buf_offset;
141 *src_stride = edge_emu_stride;
146 #define EMULATED_EDGE_LUMA(dst, src, src_stride, x_off, y_off) \
147 emulated_edge(lc, dst, src, src_stride, x_off, y_off, block_w, block_h, 1)
149 #define EMULATED_EDGE_CHROMA(dst, src, src_stride, x_off, y_off) \
150 emulated_edge(lc, dst, src, src_stride, x_off, y_off, block_w, block_h, 0)
152 #define EMULATED_EDGE_DMVR_LUMA(dst, src, src_stride, x_sb, y_sb, x_off, y_off) \
153 emulated_edge_dmvr(lc, dst, src, src_stride, x_sb, y_sb, x_off, y_off, block_w, block_h, 1)
155 #define EMULATED_EDGE_DMVR_CHROMA(dst, src, src_stride, x_sb, y_sb, x_off, y_off) \
156 emulated_edge_dmvr(lc, dst, src, src_stride, x_sb, y_sb, x_off, y_off, block_w, block_h, 0)
158 #define EMULATED_EDGE_BILINEAR(dst, src, src_stride, x_off, y_off) \
159 emulated_edge_bilinear(lc, dst, src, src_stride, x_off, y_off, pred_w, pred_h)
168 const int weight_flag = (
IS_P(sh->
r) &&
pps->r->pps_weighted_pred_flag) ||
169 (
IS_B(sh->
r) &&
pps->r->pps_weighted_bipred_flag);
174 *denom =
w->log2_denom[c_idx > 0];
175 *wx =
w->weight[lx][c_idx][mvf->
ref_idx[lx]];
176 *ox =
w->offset[lx][c_idx][mvf->
ref_idx[lx]];
188 const int bcw_idx = mvf->
bcw_idx;
189 const int weight_flag = (
IS_P(sh->
r) &&
pps->r->pps_weighted_pred_flag) ||
190 (
IS_B(sh->
r) &&
pps->r->pps_weighted_bipred_flag && !dmvr_flag);
191 if ((!weight_flag && !bcw_idx) || (bcw_idx && lc->
cu->
ciip_flag))
203 *denom =
w->log2_denom[c_idx > 0];
213 int x_off,
int y_off,
const int block_w,
const int block_h)
216 const uint8_t *
src =
ref->data[0];
217 ptrdiff_t src_stride =
ref->linesize[0];
218 const int idx =
av_log2(block_w) - 1;
219 const int mx =
mv->x & 0xf;
220 const int my =
mv->y & 0xf;
226 src += y_off * src_stride + (x_off * (1 <<
fc->ps.sps->pixel_shift));
230 fc->vvcdsp.inter.put[
LUMA][idx][!!my][!!mx](dst,
src, src_stride, block_h, hf, vf, block_w);
234 int x_off,
int y_off,
const int block_w,
const int block_h,
const int c_idx)
237 const uint8_t *
src =
ref->data[c_idx];
238 ptrdiff_t src_stride =
ref->linesize[c_idx];
239 int hs =
fc->ps.sps->hshift[c_idx];
240 int vs =
fc->ps.sps->vshift[c_idx];
241 const int idx =
av_log2(block_w) - 1;
247 x_off +=
mv->x >> (4 + hs);
248 y_off +=
mv->y >> (4 + vs);
249 src += y_off * src_stride + (x_off * (1 <<
fc->ps.sps->pixel_shift));
252 fc->vvcdsp.inter.put[
CHROMA][idx][!!my][!!mx](dst,
src, src_stride, block_h, hf, vf, block_w);
256 const AVFrame *
ref,
const MvField *mvf,
int x_off,
int y_off,
const int block_w,
const int block_h,
257 const int hf_idx,
const int vf_idx)
261 const Mv *
mv = mvf->
mv + lx;
262 const uint8_t *
src =
ref->data[0];
263 ptrdiff_t src_stride =
ref->linesize[0];
264 const int idx =
av_log2(block_w) - 1;
265 const int mx =
mv->x & 0xf;
266 const int my =
mv->y & 0xf;
273 src += y_off * src_stride + (x_off * (1 <<
fc->ps.sps->pixel_shift));
278 fc->vvcdsp.inter.put_uni_w[
LUMA][idx][!!my][!!mx](dst, dst_stride,
src, src_stride,
279 block_h, denom, wx, ox, hf, vf, block_w);
281 fc->vvcdsp.inter.put_uni[
LUMA][idx][!!my][!!mx](dst, dst_stride,
src, src_stride,
282 block_h, hf, vf, block_w);
287 const AVFrame *ref0,
const Mv *mv0,
const int x_off,
const int y_off,
const int block_w,
const int block_h,
288 const AVFrame *ref1,
const Mv *mv1,
const MvField *mvf,
const int hf_idx,
const int vf_idx,
289 const MvField *orig_mv,
const int sb_bdof_flag)
293 const int idx =
av_log2(block_w) - 1;
296 int denom, w0, w1, o0, o1;
299 for (
int i =
L0;
i <=
L1;
i++) {
301 const int mx =
mv->x & 0xf;
302 const int my =
mv->y & 0xf;
303 const int ox = x_off + (
mv->x >> 4);
304 const int oy = y_off + (
mv->y >> 4);
305 ptrdiff_t src_stride =
ref[
i]->linesize[0];
306 const uint8_t *
src =
ref[
i]->data[0] + oy * src_stride + (ox * (1 <<
fc->ps.sps->pixel_shift));
311 const int x_sb = x_off + (orig_mv->
mv[
i].
x >> 4);
312 const int y_sb = y_off + (orig_mv->
mv[
i].
y >> 4);
318 fc->vvcdsp.inter.put[
LUMA][idx][!!my][!!mx](
tmp[
i],
src, src_stride, block_h, hf, vf, block_w);
320 fc->vvcdsp.inter.bdof_fetch_samples(
tmp[
i],
src, src_stride, mx, my, block_w, block_h);
324 fc->vvcdsp.inter.apply_bdof(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h);
325 else if (weight_flag)
326 fc->vvcdsp.inter.w_avg(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h, denom, w0, w1, o0, o1);
328 fc->vvcdsp.inter.avg(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h);
332 const uint8_t *
src, ptrdiff_t src_stride,
int x_off,
int y_off,
333 const int block_w,
const int block_h,
const MvField *mvf,
const int c_idx,
334 const int hf_idx,
const int vf_idx)
338 const int hs =
fc->ps.sps->hshift[1];
339 const int vs =
fc->ps.sps->vshift[1];
340 const int idx =
av_log2(block_w) - 1;
341 const Mv *
mv = &mvf->
mv[lx];
348 x_off +=
mv->x >> (4 + hs);
349 y_off +=
mv->y >> (4 + vs);
350 src += y_off * src_stride + (x_off * (1 <<
fc->ps.sps->pixel_shift));
355 fc->vvcdsp.inter.put_uni_w[
CHROMA][idx][!!my][!!mx](dst, dst_stride,
src, src_stride,
356 block_h, denom, wx, ox, hf, vf, block_w);
358 fc->vvcdsp.inter.put_uni[
CHROMA][idx][!!my][!!mx](dst, dst_stride,
src, src_stride,
359 block_h, hf, vf, block_w);
364 const AVFrame *ref0,
const AVFrame *ref1,
const int x_off,
const int y_off,
365 const int block_w,
const int block_h,
const MvField *mvf,
const int c_idx,
366 const int hf_idx,
const int vf_idx,
const MvField *orig_mv,
const int dmvr_flag,
const int ciip_flag)
369 const int hs =
fc->ps.sps->hshift[1];
370 const int vs =
fc->ps.sps->vshift[1];
371 const int idx =
av_log2(block_w) - 1;
374 int denom, w0, w1, o0, o1;
375 const int weight_flag =
derive_weight(&denom, &w0, &w1, &o0, &o1, lc, mvf, c_idx, dmvr_flag);
377 for (
int i =
L0;
i <=
L1;
i++) {
381 const int ox = x_off + (
mv->x >> (4 + hs));
382 const int oy = y_off + (
mv->y >> (4 + vs));
383 ptrdiff_t src_stride =
ref[
i]->linesize[c_idx];
384 const uint8_t *
src =
ref[
i]->data[c_idx] + oy * src_stride + (ox * (1 <<
fc->ps.sps->pixel_shift));
388 const int x_sb = x_off + (orig_mv->
mv[
i].
x >> (4 + hs));
389 const int y_sb = y_off + (orig_mv->
mv[
i].
y >> (4 + vs));
394 fc->vvcdsp.inter.put[
CHROMA][idx][!!my][!!mx](
tmp[
i],
src, src_stride, block_h, hf, vf, block_w);
397 fc->vvcdsp.inter.w_avg(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h, denom, w0, w1, o0, o1);
399 fc->vvcdsp.inter.avg(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h);
403 const AVFrame *
ref,
const MvField *mvf,
int x_off,
int y_off,
const int block_w,
const int block_h,
404 const int cb_prof_flag,
const int16_t *diff_mv_x,
const int16_t *diff_mv_y)
407 const uint8_t *
src =
ref->data[0];
408 ptrdiff_t src_stride =
ref->linesize[0];
410 const int idx =
av_log2(block_w) - 1;
412 const Mv *
mv = mvf->
mv + lx;
413 const int mx =
mv->x & 0xf;
414 const int my =
mv->y & 0xf;
422 src += y_off * src_stride + (x_off * (1 <<
fc->ps.sps->pixel_shift));
427 fc->vvcdsp.inter.fetch_samples(prof_tmp,
src, src_stride, mx, my);
429 fc->vvcdsp.inter.apply_prof_uni(dst, dst_stride, prof_tmp, diff_mv_x, diff_mv_y);
431 fc->vvcdsp.inter.apply_prof_uni_w(dst, dst_stride, prof_tmp, diff_mv_x, diff_mv_y, denom, wx, ox);
434 fc->vvcdsp.inter.put_uni[
LUMA][idx][!!my][!!mx](dst, dst_stride,
src, src_stride, block_h, hf, vf, block_w);
436 fc->vvcdsp.inter.put_uni_w[
LUMA][idx][!!my][!!mx](dst, dst_stride,
src, src_stride, block_h, denom, wx, ox, hf, vf, block_w);
442 const int block_w,
const int block_h)
449 const int idx =
av_log2(block_w) - 1;
450 int denom, w0, w1, o0, o1;
451 const int weight_flag =
derive_weight(&denom, &w0, &w1, &o0, &o1, lc, mvf,
LUMA, 0);
453 for (
int i =
L0;
i <=
L1;
i++) {
455 const int mx =
mv->x & 0xf;
456 const int my =
mv->y & 0xf;
457 const int ox = x_off + (
mv->x >> 4);
458 const int oy = y_off + (
mv->y >> 4);
459 ptrdiff_t src_stride =
ref[
i]->linesize[0];
460 const uint8_t *
src =
ref[
i]->data[0] + oy * src_stride + (ox * (1 <<
fc->ps.sps->pixel_shift));
466 fc->vvcdsp.inter.put[
LUMA][idx][!!my][!!mx](
tmp[
i],
src, src_stride, block_h, hf, vf, block_w);
469 fc->vvcdsp.inter.fetch_samples(prof_tmp,
src, src_stride, mx, my);
475 fc->vvcdsp.inter.w_avg(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h, denom, w0, w1, o0, o1);
477 fc->vvcdsp.inter.avg(dst, dst_stride,
tmp[
L0],
tmp[
L1], block_w, block_h);
485 if (
mv->pred_flag &
mask) {
487 ref[lx] = rpl[lx].
ref[
mv->ref_idx[lx]];
495 #define POS(c_idx, x, y) \
496 &fc->frame->data[c_idx][((y) >> fc->ps.sps->vshift[c_idx]) * fc->frame->linesize[c_idx] + \
497 (((x) >> fc->ps.sps->hshift[c_idx]) << fc->ps.sps->pixel_shift)]
514 const int c_end =
fc->ps.sps->r->sps_chroma_format_idc ? 3 : 1;
518 for (
int c_idx = 0; c_idx < c_end; c_idx++) {
519 const int hs =
fc->ps.sps->hshift[c_idx];
520 const int vs =
fc->ps.sps->vshift[c_idx];
521 const int x = lc->
cu->
x0 >> hs;
522 const int y = lc->
cu->
y0 >> vs;
526 ptrdiff_t dst_stride =
fc->frame->linesize[c_idx];
528 int step_x = 1 << hs;
532 }
else if (mirror_type == 1) {
540 for (
int i = 0;
i < 2;
i++) {
542 const int lx =
mv->pred_flag -
PF_L0;
565 const int min_pu_width =
fc->ps.pps->min_pu_width;
579 const int x0,
const int y0,
const int sbw,
const int sbh,
const MvField *orig_mv,
const int sb_bdof_flag)
584 uint8_t *dst =
POS(0, x0, y0);
585 const ptrdiff_t dst_stride =
fc->frame->linesize[0];
586 uint8_t *inter = ciip_flag ? (uint8_t *)lc->
ciip_tmp1 : dst;
587 const ptrdiff_t inter_stride = ciip_flag ? (
MAX_PB_SIZE *
sizeof(uint16_t)) : dst_stride;
594 const int lx =
mv->pred_flag -
PF_L0;
596 mv, x0, y0, sbw, sbh, hf_idx, vf_idx);
599 &
mv->mv[0], x0, y0, sbw, sbh,
ref[1]->frame, &
mv->mv[1],
mv,
600 hf_idx, vf_idx, orig_mv, sb_bdof_flag);
605 fc->vvcdsp.intra.intra_pred(lc, x0, y0, sbw, sbh, 0);
607 fc->vvcdsp.lmcs.filter(inter, inter_stride, sbw, sbh, &
fc->ps.lmcs.fwd_lut);
608 fc->vvcdsp.inter.put_ciip(dst, dst_stride, sbw, sbh, inter, inter_stride, intra_weight);
614 const int x0,
const int y0,
const int sbw,
const int sbh,
const MvField *orig_mv,
const int dmvr_flag)
617 const int hs =
fc->ps.sps->hshift[1];
618 const int vs =
fc->ps.sps->vshift[1];
619 const int x0_c = x0 >> hs;
620 const int y0_c = y0 >> vs;
621 const int w_c = sbw >> hs;
622 const int h_c = sbh >> vs;
625 uint8_t* dst1 =
POS(1, x0, y0);
626 uint8_t* dst2 =
POS(2, x0, y0);
627 const ptrdiff_t dst1_stride =
fc->frame->linesize[1];
628 const ptrdiff_t dst2_stride =
fc->frame->linesize[2];
630 uint8_t *inter1 = do_ciip ? (uint8_t *)lc->
ciip_tmp1 : dst1;
631 const ptrdiff_t inter1_stride = do_ciip ? (
MAX_PB_SIZE *
sizeof(uint16_t)) : dst1_stride;
633 uint8_t *inter2 = do_ciip ? (uint8_t *)lc->
ciip_tmp2 : dst2;
634 const ptrdiff_t inter2_stride = do_ciip ? (
MAX_PB_SIZE *
sizeof(uint16_t)) : dst2_stride;
637 const int hf_idx = 0;
638 const int vf_idx = 0;
645 const int lx =
mv->pred_flag -
PF_L0;
650 x0_c, y0_c,
w_c, h_c,
mv,
CB, hf_idx, vf_idx);
652 x0_c, y0_c,
w_c, h_c,
mv,
CR, hf_idx, vf_idx);
658 x0_c, y0_c,
w_c, h_c,
mv,
CB, hf_idx, vf_idx, orig_mv, dmvr_flag, lc->
cu->
ciip_flag);
661 x0_c, y0_c,
w_c, h_c,
mv,
CR, hf_idx, vf_idx, orig_mv, dmvr_flag, lc->
cu->
ciip_flag);
666 fc->vvcdsp.intra.intra_pred(lc, x0, y0, sbw, sbh, 1);
667 fc->vvcdsp.intra.intra_pred(lc, x0, y0, sbw, sbh, 2);
668 fc->vvcdsp.inter.put_ciip(dst1, dst1_stride,
w_c, h_c, inter1, inter1_stride, intra_weight);
669 fc->vvcdsp.inter.put_ciip(dst2, dst2_stride,
w_c, h_c, inter2, inter2_stride, intra_weight);
677 const int sad_minus = sad[-
stride];
678 const int sad_center = sad[0];
679 const int sad_plus = sad[
stride];
681 int denom = (( sad_minus + sad_plus) - (sad_center << 1 ) ) << 3;
685 if (sad_minus == sad_center)
687 else if (sad_plus == sad_center)
690 int num = ( sad_minus - sad_plus ) * (1 << 4);
698 while (counter > 0) {
699 counter = counter - 1;
700 quotient = quotient << 1;
701 if ( num >= denom ) {
703 quotient = quotient + 1;
705 denom = (denom >> 1);
716 #define SAD_ARRAY_SIZE 5
719 const AVFrame *ref0,
const AVFrame *ref1,
const int x_off,
const int y_off,
const int block_w,
const int block_h)
722 const int sr_range = 2;
726 int min_dx, min_dy, min_sad, dx, dy;
729 min_dx = min_dy = dx = dy = 2;
731 for (
int i =
L0;
i <=
L1;
i++) {
732 const int pred_w = block_w + 2 * sr_range;
733 const int pred_h = block_h + 2 * sr_range;
735 const int mx =
mv->x & 0xf;
736 const int my =
mv->y & 0xf;
737 const int ox = x_off + (
mv->x >> 4) - sr_range;
738 const int oy = y_off + (
mv->y >> 4) - sr_range;
739 ptrdiff_t src_stride =
ref[
i]->linesize[
LUMA];
740 const uint8_t *
src =
ref[
i]->data[
LUMA] + oy * src_stride + (ox * (1 <<
fc->ps.sps->pixel_shift));
742 fc->vvcdsp.inter.dmvr[!!my][!!mx](
tmp[
i],
src, src_stride,
pred_h, mx, my, pred_w);
745 min_sad =
fc->vvcdsp.inter.sad(
tmp[
L0],
tmp[
L1], dx, dy, block_w, block_h);
746 min_sad -= min_sad >> 2;
747 sad[dy][dx] = min_sad;
749 if (min_sad >= block_w * block_h) {
754 if (dx != sr_range || dy != sr_range) {
755 sad[dy][dx] =
fc->vvcdsp.inter.sad(lc->
tmp, lc->
tmp1, dx, dy, block_w, block_h);
756 if (sad[dy][dx] < min_sad) {
757 min_sad = sad[dy][dx];
764 dmv[0] = (min_dx - sr_range) * (1 << 4);
765 dmv[1] = (min_dy - sr_range) * (1 << 4);
766 if (min_dx != 0 && min_dx != 4 && min_dy != 0 && min_dy != 4) {
771 for (
int i =
L0;
i <=
L1;
i++) {
773 mv->x += (1 - 2 *
i) * dmv[0];
774 mv->y += (1 - 2 *
i) * dmv[1];
778 if (min_sad < 2 * block_w * block_h) {
792 fc->ref->tab_dmvr_mvf[idx] = *mvf;
798 const int x0,
const int y0,
const int sbw,
const int sbh)
822 int sbw, sbh, sb_bdof_flag = 0;
830 for (
int sby = 0; sby <
mi->num_sb_y; sby++) {
831 for (
int sbx = 0; sbx <
mi->num_sb_x; sbx++) {
832 const int x0 = cu->
x0 + sbx * sbw;
833 const int y0 = cu->
y0 + sby * sbh;
840 if (
fc->ps.sps->r->sps_chroma_format_idc)
847 const int x0,
const int y0,
const int sbw,
const int sbh)
849 const int hs =
fc->ps.sps->hshift[1];
850 const int vs =
fc->ps.sps->vshift[1];
856 mvc->
mv[0].
x += (
unsigned int)mv2->
mv[0].
x;
857 mvc->
mv[0].
y += (
unsigned int)mv2->
mv[0].
y;
858 mvc->
mv[1].
x += (
unsigned int)mv2->
mv[1].
x;
859 mvc->
mv[1].
y += (
unsigned int)mv2->
mv[1].
y;
870 const int x0 = cu->
x0;
871 const int y0 = cu->
y0;
874 const int hs =
fc->ps.sps->hshift[1];
875 const int vs =
fc->ps.sps->vshift[1];
877 for (
int sby = 0; sby <
mi->num_sb_y; sby++) {
878 for (
int sbx = 0; sbx <
mi->num_sb_x; sbx++) {
879 const int x = x0 + sbx * sbw;
880 const int y = y0 + sby * sbh;
882 uint8_t *dst0 =
POS(0, x, y);
890 const int lx =
mi->pred_flag -
PF_L0;
898 if (
fc->ps.sps->r->sps_chroma_format_idc) {
925 uint8_t* dst0 =
POS(0, cu->
x0, cu->
y0);
938 const CTU *ctu =
fc->tab.ctus + rs;