FFmpeg
vvc_mc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2023-2024 Nuo Mi
3  * Copyright (c) 2023-2024 Wu Jianhua
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #include <string.h>
23 
24 #include "checkasm.h"
25 #include "libavcodec/vvc/ctu.h"
26 #include "libavcodec/vvc/data.h"
27 #include "libavcodec/vvc/dsp.h"
28 
29 #include "libavutil/common.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/mem_internal.h"
32 
33 static const uint32_t pixel_mask[] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff, 0x3fff3fff, 0xffffffff };
34 static const int sizes[] = { 2, 4, 8, 16, 32, 64, 128 };
35 
36 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
37 #define PIXEL_STRIDE (MAX_CTU_SIZE * 2)
38 #define EXTRA_BEFORE 3
39 #define EXTRA_AFTER 4
40 #define SRC_EXTRA (EXTRA_BEFORE + EXTRA_AFTER) * 2
41 #define SRC_BUF_SIZE (PIXEL_STRIDE + SRC_EXTRA) * (PIXEL_STRIDE + SRC_EXTRA)
42 #define DST_BUF_SIZE (MAX_CTU_SIZE * MAX_CTU_SIZE * 2)
43 #define SRC_OFFSET ((PIXEL_STRIDE + EXTRA_BEFORE * 2) * EXTRA_BEFORE)
44 
45 #define randomize_buffers(buf0, buf1, size, mask) \
46  do { \
47  int k; \
48  for (k = 0; k < size; k += 4 / sizeof(*buf0)) { \
49  uint32_t r = rnd() & mask; \
50  AV_WN32A(buf0 + k, r); \
51  AV_WN32A(buf1 + k, r); \
52  } \
53  } while (0)
54 
55 #define randomize_pixels(buf0, buf1, size) \
56  do { \
57  uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
58  randomize_buffers(buf0, buf1, size, mask); \
59  } while (0)
60 
61 #define randomize_avg_src(buf0, buf1, size) \
62  do { \
63  uint32_t mask = 0x3fff3fff; \
64  randomize_buffers(buf0, buf1, size, mask); \
65  } while (0)
66 
67 static void check_put_vvc_luma(void)
68 {
69  LOCAL_ALIGNED_32(int16_t, dst0, [DST_BUF_SIZE / 2]);
70  LOCAL_ALIGNED_32(int16_t, dst1, [DST_BUF_SIZE / 2]);
71  LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
72  LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
74 
75  declare_func(void, int16_t *dst, const uint8_t *src, const ptrdiff_t src_stride,
76  const int height, const int8_t *hf, const int8_t *vf, const int width);
77 
78  for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
81  for (int i = 0; i < 2; i++) {
82  for (int j = 0; j < 2; j++) {
83  for (int h = 4; h <= MAX_CTU_SIZE; h *= 2) {
84  for (int w = 4; w <= MAX_CTU_SIZE; w *= 2) {
85  const int idx = av_log2(w) - 1;
86  const int mx = rnd() % 16;
87  const int my = rnd() % 16;
88  const int8_t *hf = ff_vvc_inter_luma_filters[rnd() % 3][mx];
89  const int8_t *vf = ff_vvc_inter_luma_filters[rnd() % 3][my];
90  const char *type;
91  switch ((j << 1) | i) {
92  case 0: type = "put_luma_pixels"; break; // 0 0
93  case 1: type = "put_luma_h"; break; // 0 1
94  case 2: type = "put_luma_v"; break; // 1 0
95  case 3: type = "put_luma_hv"; break; // 1 1
96  }
97  if (check_func(c.inter.put[LUMA][idx][j][i], "%s_%d_%dx%d", type, bit_depth, w, h)) {
98  memset(dst0, 0, DST_BUF_SIZE);
99  memset(dst1, 0, DST_BUF_SIZE);
100  call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
101  call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
102  if (memcmp(dst0, dst1, DST_BUF_SIZE))
103  fail();
104  if (w == h)
105  bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
106  }
107  }
108  }
109  }
110  }
111  }
112  report("put_luma");
113 }
114 
115 static void check_put_vvc_luma_uni(void)
116 {
117  LOCAL_ALIGNED_32(uint8_t, dst0, [DST_BUF_SIZE]);
118  LOCAL_ALIGNED_32(uint8_t, dst1, [DST_BUF_SIZE]);
119  LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
120  LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
121 
123  declare_func(void, uint8_t *dst, ptrdiff_t dststride,
124  const uint8_t *src, ptrdiff_t srcstride, int height,
125  const int8_t *hf, const int8_t *vf, int width);
126 
127  for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
130  for (int i = 0; i < 2; i++) {
131  for (int j = 0; j < 2; j++) {
132  for (int h = 4; h <= MAX_CTU_SIZE; h *= 2) {
133  for (int w = 4; w <= MAX_CTU_SIZE; w *= 2) {
134  const int idx = av_log2(w) - 1;
135  const int mx = rnd() % VVC_INTER_LUMA_FACTS;
136  const int my = rnd() % VVC_INTER_LUMA_FACTS;
137  const int8_t *hf = ff_vvc_inter_luma_filters[rnd() % VVC_INTER_LUMA_FILTER_TYPES][mx];
138  const int8_t *vf = ff_vvc_inter_luma_filters[rnd() % VVC_INTER_LUMA_FILTER_TYPES][my];
139  const char *type;
140 
141  switch ((j << 1) | i) {
142  case 0: type = "put_uni_pixels"; break; // 0 0
143  case 1: type = "put_uni_h"; break; // 0 1
144  case 2: type = "put_uni_v"; break; // 1 0
145  case 3: type = "put_uni_hv"; break; // 1 1
146  }
147 
148  if (check_func(c.inter.put_uni[LUMA][idx][j][i], "%s_luma_%d_%dx%d", type, bit_depth, w, h)) {
149  memset(dst0, 0, DST_BUF_SIZE);
150  memset(dst1, 0, DST_BUF_SIZE);
151  call_ref(dst0, PIXEL_STRIDE, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
152  call_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
153  if (memcmp(dst0, dst1, DST_BUF_SIZE))
154  fail();
155  if (w == h)
156  bench_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
157  }
158  }
159  }
160  }
161  }
162  }
163  report("put_uni_luma");
164 }
165 
166 static void check_put_vvc_chroma(void)
167 {
168  LOCAL_ALIGNED_32(int16_t, dst0, [DST_BUF_SIZE / 2]);
169  LOCAL_ALIGNED_32(int16_t, dst1, [DST_BUF_SIZE / 2]);
170  LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
171  LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
173 
174  declare_func(void, int16_t *dst, const uint8_t *src, const ptrdiff_t src_stride,
175  const int height, const int8_t *hf, const int8_t *vf, const int width);
176 
177  for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
180  for (int i = 0; i < 2; i++) {
181  for (int j = 0; j < 2; j++) {
182  for (int h = 2; h <= MAX_CTU_SIZE; h *= 2) {
183  for (int w = 2; w <= MAX_CTU_SIZE; w *= 2) {
184  const int idx = av_log2(w) - 1;
185  const int mx = rnd() % VVC_INTER_CHROMA_FACTS;
186  const int my = rnd() % VVC_INTER_CHROMA_FACTS;
189  const char *type;
190  switch ((j << 1) | i) {
191  case 0: type = "put_chroma_pixels"; break; // 0 0
192  case 1: type = "put_chroma_h"; break; // 0 1
193  case 2: type = "put_chroma_v"; break; // 1 0
194  case 3: type = "put_chroma_hv"; break; // 1 1
195  }
196  if (check_func(c.inter.put[CHROMA][idx][j][i], "%s_%d_%dx%d", type, bit_depth, w, h)) {
197  memset(dst0, 0, DST_BUF_SIZE);
198  memset(dst1, 0, DST_BUF_SIZE);
199  call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
200  call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
201  if (memcmp(dst0, dst1, DST_BUF_SIZE))
202  fail();
203  if (w == h)
204  bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
205  }
206  }
207  }
208  }
209  }
210  }
211  report("put_chroma");
212 }
213 
214 static void check_put_vvc_chroma_uni(void)
215 {
216  LOCAL_ALIGNED_32(uint8_t, dst0, [DST_BUF_SIZE]);
217  LOCAL_ALIGNED_32(uint8_t, dst1, [DST_BUF_SIZE]);
218  LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
219  LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
220 
222  declare_func(void, uint8_t *dst, ptrdiff_t dststride,
223  const uint8_t *src, ptrdiff_t srcstride, int height,
224  const int8_t *hf, const int8_t *vf, int width);
225 
226  for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
229  for (int i = 0; i < 2; i++) {
230  for (int j = 0; j < 2; j++) {
231  for (int h = 4; h <= MAX_CTU_SIZE; h *= 2) {
232  for (int w = 4; w <= MAX_CTU_SIZE; w *= 2) {
233  const int idx = av_log2(w) - 1;
234  const int mx = rnd() % VVC_INTER_CHROMA_FACTS;
235  const int my = rnd() % VVC_INTER_CHROMA_FACTS;
238  const char *type;
239 
240  switch ((j << 1) | i) {
241  case 0: type = "put_uni_pixels"; break; // 0 0
242  case 1: type = "put_uni_h"; break; // 0 1
243  case 2: type = "put_uni_v"; break; // 1 0
244  case 3: type = "put_uni_hv"; break; // 1 1
245  }
246 
247  if (check_func(c.inter.put_uni[CHROMA][idx][j][i], "%s_chroma_%d_%dx%d", type, bit_depth, w, h)) {
248  memset(dst0, 0, DST_BUF_SIZE);
249  memset(dst1, 0, DST_BUF_SIZE);
250  call_ref(dst0, PIXEL_STRIDE, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
251  call_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
252  if (memcmp(dst0, dst1, DST_BUF_SIZE))
253  fail();
254  if (w == h)
255  bench_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
256  }
257  }
258  }
259  }
260  }
261  }
262  report("put_uni_chroma");
263 }
264 
265 #define AVG_SRC_BUF_SIZE (MAX_CTU_SIZE * MAX_CTU_SIZE)
266 #define AVG_DST_BUF_SIZE (MAX_PB_SIZE * MAX_PB_SIZE * 2)
267 
268 static void check_avg(void)
269 {
270  LOCAL_ALIGNED_32(int16_t, src00, [AVG_SRC_BUF_SIZE]);
271  LOCAL_ALIGNED_32(int16_t, src01, [AVG_SRC_BUF_SIZE]);
272  LOCAL_ALIGNED_32(int16_t, src10, [AVG_SRC_BUF_SIZE]);
273  LOCAL_ALIGNED_32(int16_t, src11, [AVG_SRC_BUF_SIZE]);
274  LOCAL_ALIGNED_32(uint8_t, dst0, [AVG_DST_BUF_SIZE]);
275  LOCAL_ALIGNED_32(uint8_t, dst1, [AVG_DST_BUF_SIZE]);
277 
278  for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
279  randomize_avg_src((uint8_t*)src00, (uint8_t*)src10, AVG_SRC_BUF_SIZE * sizeof(int16_t));
280  randomize_avg_src((uint8_t*)src01, (uint8_t*)src11, AVG_SRC_BUF_SIZE * sizeof(int16_t));
282  for (int h = 2; h <= MAX_CTU_SIZE; h *= 2) {
283  for (int w = 2; w <= MAX_CTU_SIZE; w *= 2) {
284  {
285  declare_func(void, uint8_t *dst, ptrdiff_t dst_stride,
286  const int16_t *src0, const int16_t *src1, int width, int height);
287  if (check_func(c.inter.avg, "avg_%d_%dx%d", bit_depth, w, h)) {
288  memset(dst0, 0, AVG_DST_BUF_SIZE);
289  memset(dst1, 0, AVG_DST_BUF_SIZE);
290  call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h);
291  call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h);
292  if (memcmp(dst0, dst1, DST_BUF_SIZE))
293  fail();
294  if (w == h)
295  bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h);
296  }
297  }
298  {
299  declare_func(void, uint8_t *dst, ptrdiff_t dst_stride,
300  const int16_t *src0, const int16_t *src1, int width, int height,
301  int denom, int w0, int w1, int o0, int o1);
302  {
303  const int denom = rnd() % 8;
304  const int w0 = rnd() % 256 - 128;
305  const int w1 = rnd() % 256 - 128;
306  const int o0 = rnd() % 256 - 128;
307  const int o1 = rnd() % 256 - 128;
308  if (check_func(c.inter.w_avg, "w_avg_%d_%dx%d", bit_depth, w, h)) {
309  memset(dst0, 0, AVG_DST_BUF_SIZE);
310  memset(dst1, 0, AVG_DST_BUF_SIZE);
311 
312  call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
313  call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0, o1);
314  if (memcmp(dst0, dst1, DST_BUF_SIZE))
315  fail();
316  if (w == h)
317  bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
318  }
319  }
320  }
321  }
322  }
323  }
324  report("avg");
325 }
326 
327 static void check_vvc_sad(void)
328 {
329  const int bit_depth = 10;
331  LOCAL_ALIGNED_32(uint16_t, src0, [MAX_CTU_SIZE * MAX_CTU_SIZE * 4]);
332  LOCAL_ALIGNED_32(uint16_t, src1, [MAX_CTU_SIZE * MAX_CTU_SIZE * 4]);
333  declare_func(int, const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
334 
337  for (int h = 8; h <= 16; h *= 2) {
338  for (int w = 8; w <= 16; w *= 2) {
339  for(int offy = 0; offy <= 4; offy++) {
340  for(int offx = 0; offx <= 4; offx++) {
341  if (w * h < 128)
342  continue;
343 
344  if (check_func(c.inter.sad, "sad_%dx%d", w, h)) {
345  int result0;
346  int result1;
347 
348  result0 = call_ref(src0 + PIXEL_STRIDE * 2 + 2, src1 + PIXEL_STRIDE * 2 + 2, offx, offy, w, h);
349  result1 = call_new(src0 + PIXEL_STRIDE * 2 + 2, src1 + PIXEL_STRIDE * 2 + 2, offx, offy, w, h);
350 
351  if (result1 != result0)
352  fail();
353  if(offx == 0 && offy == 0)
354  bench_new(src0 + PIXEL_STRIDE * 2 + 2, src1 + PIXEL_STRIDE * 2 + 2, offx, offy, w, h);
355  }
356  }
357  }
358  }
359  }
360 
361  report("sad");
362 }
363 
365 {
366  check_vvc_sad();
371  check_avg();
372 }
DST_BUF_SIZE
#define DST_BUF_SIZE
Definition: vvc_mc.c:42
ff_vvc_inter_chroma_filters
const int8_t ff_vvc_inter_chroma_filters[VVC_INTER_CHROMA_FILTER_TYPES][VVC_INTER_CHROMA_FACTS][VVC_INTER_CHROMA_TAPS]
Definition: data.c:1877
LUMA
#define LUMA
Definition: filter.c:31
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:421
SIZEOF_PIXEL
#define SIZEOF_PIXEL
Definition: vvc_mc.c:36
check_vvc_sad
static void check_vvc_sad(void)
Definition: vvc_mc.c:327
data.h
w
uint8_t w
Definition: llviddspenc.c:38
check_put_vvc_chroma_uni
static void check_put_vvc_chroma_uni(void)
Definition: vvc_mc.c:214
check_func
#define check_func(func,...)
Definition: checkasm.h:177
call_ref
#define call_ref(...)
Definition: checkasm.h:192
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
check_put_vvc_luma_uni
static void check_put_vvc_luma_uni(void)
Definition: vvc_mc.c:115
fail
#define fail()
Definition: checkasm.h:186
checkasm.h
PIXEL_STRIDE
#define PIXEL_STRIDE
Definition: vvc_mc.c:37
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
rnd
#define rnd()
Definition: checkasm.h:170
dsp.h
width
#define width
intreadwrite.h
VVC_INTER_CHROMA_FACTS
#define VVC_INTER_CHROMA_FACTS
Definition: data.h:53
AVG_DST_BUF_SIZE
#define AVG_DST_BUF_SIZE
Definition: vvc_mc.c:266
check_avg
static void check_avg(void)
Definition: vvc_mc.c:268
call_new
#define call_new(...)
Definition: checkasm.h:295
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:156
MAX_CTU_SIZE
#define MAX_CTU_SIZE
Definition: ctu.h:31
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_vvc_inter_luma_filters
const int8_t ff_vvc_inter_luma_filters[VVC_INTER_LUMA_FILTER_TYPES][VVC_INTER_LUMA_FACTS][VVC_INTER_LUMA_TAPS]
Definition: data.c:1735
pixel_mask
static const uint32_t pixel_mask[]
Definition: vvc_mc.c:33
VVC_INTER_LUMA_FILTER_TYPES
#define VVC_INTER_LUMA_FILTER_TYPES
Definition: data.h:48
ff_vvc_dsp_init
void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth)
Definition: dsp.c:98
height
#define height
randomize_avg_src
#define randomize_avg_src(buf0, buf1, size)
Definition: vvc_mc.c:61
VVC_INTER_LUMA_FACTS
#define VVC_INTER_LUMA_FACTS
Definition: data.h:51
sizes
static const int sizes[]
Definition: vvc_mc.c:34
report
#define report
Definition: checkasm.h:189
bench_new
#define bench_new(...)
Definition: checkasm.h:366
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
check_put_vvc_luma
static void check_put_vvc_luma(void)
Definition: vvc_mc.c:67
common.h
SRC_OFFSET
#define SRC_OFFSET
Definition: vvc_mc.c:43
VVC_INTER_CHROMA_FILTER_TYPES
#define VVC_INTER_CHROMA_FILTER_TYPES
Definition: data.h:49
check_put_vvc_chroma
static void check_put_vvc_chroma(void)
Definition: vvc_mc.c:166
CHROMA
@ CHROMA
Definition: vf_waveform.c:49
randomize_pixels
#define randomize_pixels(buf0, buf1, size)
Definition: vvc_mc.c:55
src0
const pixel *const src0
Definition: h264pred_template.c:420
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:181
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
checkasm_check_vvc_mc
void checkasm_check_vvc_mc(void)
Definition: vvc_mc.c:364
h
h
Definition: vp9dsp_template.c:2038
ctu.h
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
AVG_SRC_BUF_SIZE
#define AVG_SRC_BUF_SIZE
Definition: vvc_mc.c:265
VVCDSPContext
Definition: dsp.h:171
SRC_BUF_SIZE
#define SRC_BUF_SIZE
Definition: vvc_mc.c:41