FFmpeg
vvc_mc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2023-2024 Nuo Mi
3  * Copyright (c) 2023-2024 Wu Jianhua
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #include <string.h>
23 
24 #include "checkasm.h"
25 #include "libavcodec/vvc/ctu.h"
26 #include "libavcodec/vvc/data.h"
27 #include "libavcodec/vvc/dsp.h"
28 
29 #include "libavutil/common.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/mem_internal.h"
32 
33 static const uint32_t pixel_mask[] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff, 0x3fff3fff, 0xffffffff };
34 static const int sizes[] = { 2, 4, 8, 16, 32, 64, 128 };
35 
36 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
37 #define PIXEL_STRIDE (MAX_CTU_SIZE * 2)
38 #define EXTRA_BEFORE 3
39 #define EXTRA_AFTER 4
40 #define SRC_EXTRA (EXTRA_BEFORE + EXTRA_AFTER) * 2
41 #define SRC_BUF_SIZE (PIXEL_STRIDE + SRC_EXTRA) * (PIXEL_STRIDE + SRC_EXTRA)
42 #define DST_BUF_SIZE (MAX_CTU_SIZE * MAX_CTU_SIZE * 2)
43 #define SRC_OFFSET ((PIXEL_STRIDE + EXTRA_BEFORE * 2) * EXTRA_BEFORE)
44 
45 #define randomize_buffers(buf0, buf1, size, mask) \
46  do { \
47  int k; \
48  for (k = 0; k < size; k += 4) { \
49  uint32_t r = rnd() & mask; \
50  AV_WN32A(buf0 + k, r); \
51  AV_WN32A(buf1 + k, r); \
52  } \
53  } while (0)
54 
55 #define randomize_pixels(buf0, buf1, size) \
56  do { \
57  uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
58  randomize_buffers(buf0, buf1, size, mask); \
59  } while (0)
60 
61 #define randomize_avg_src(buf0, buf1, size) \
62  do { \
63  uint32_t mask = 0x3fff3fff; \
64  randomize_buffers(buf0, buf1, size, mask); \
65  } while (0)
66 
67 static void check_put_vvc_luma(void)
68 {
69  LOCAL_ALIGNED_32(int16_t, dst0, [DST_BUF_SIZE / 2]);
70  LOCAL_ALIGNED_32(int16_t, dst1, [DST_BUF_SIZE / 2]);
71  LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
72  LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
74 
75  declare_func(void, int16_t *dst, const uint8_t *src, const ptrdiff_t src_stride,
76  const int height, const int8_t *hf, const int8_t *vf, const int width);
77 
78  for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
81  for (int i = 0; i < 2; i++) {
82  for (int j = 0; j < 2; j++) {
83  for (int h = 4; h <= MAX_CTU_SIZE; h *= 2) {
84  for (int w = 4; w <= MAX_CTU_SIZE; w *= 2) {
85  const int idx = av_log2(w) - 1;
86  const int mx = rnd() % 16;
87  const int my = rnd() % 16;
88  const int8_t *hf = ff_vvc_inter_luma_filters[rnd() % 3][mx];
89  const int8_t *vf = ff_vvc_inter_luma_filters[rnd() % 3][my];
90  const char *type;
91  switch ((j << 1) | i) {
92  case 0: type = "put_luma_pixels"; break; // 0 0
93  case 1: type = "put_luma_h"; break; // 0 1
94  case 2: type = "put_luma_v"; break; // 1 0
95  case 3: type = "put_luma_hv"; break; // 1 1
96  }
97  if (check_func(c.inter.put[LUMA][idx][j][i], "%s_%d_%dx%d", type, bit_depth, w, h)) {
98  memset(dst0, 0, DST_BUF_SIZE);
99  memset(dst1, 0, DST_BUF_SIZE);
100  call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
101  call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
102  if (memcmp(dst0, dst1, DST_BUF_SIZE))
103  fail();
104  if (w == h)
105  bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
106  }
107  }
108  }
109  }
110  }
111  }
112  report("put_luma");
113 }
114 
115 static void check_put_vvc_luma_uni(void)
116 {
117  LOCAL_ALIGNED_32(uint8_t, dst0, [DST_BUF_SIZE]);
118  LOCAL_ALIGNED_32(uint8_t, dst1, [DST_BUF_SIZE]);
119  LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
120  LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
121 
123  declare_func(void, uint8_t *dst, ptrdiff_t dststride,
124  uint8_t *src, ptrdiff_t srcstride, int height, const int8_t *hf, const int8_t *vf, int width);
125 
126  for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
129  for (int i = 0; i < 2; i++) {
130  for (int j = 0; j < 2; j++) {
131  for (int h = 4; h <= MAX_CTU_SIZE; h *= 2) {
132  for (int w = 4; w <= MAX_CTU_SIZE; w *= 2) {
133  const int idx = av_log2(w) - 1;
134  const int mx = rnd() % VVC_INTER_LUMA_FACTS;
135  const int my = rnd() % VVC_INTER_LUMA_FACTS;
136  const int8_t *hf = ff_vvc_inter_luma_filters[rnd() % VVC_INTER_FILTER_TYPES][mx];
137  const int8_t *vf = ff_vvc_inter_luma_filters[rnd() % VVC_INTER_FILTER_TYPES][my];
138  const char *type;
139 
140  switch ((j << 1) | i) {
141  case 0: type = "put_uni_pixels"; break; // 0 0
142  case 1: type = "put_uni_h"; break; // 0 1
143  case 2: type = "put_uni_v"; break; // 1 0
144  case 3: type = "put_uni_hv"; break; // 1 1
145  }
146 
147  if (check_func(c.inter.put_uni[LUMA][idx][j][i], "%s_luma_%d_%dx%d", type, bit_depth, w, h)) {
148  memset(dst0, 0, DST_BUF_SIZE);
149  memset(dst1, 0, DST_BUF_SIZE);
150  call_ref(dst0, PIXEL_STRIDE, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
151  call_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
152  if (memcmp(dst0, dst1, DST_BUF_SIZE))
153  fail();
154  if (w == h)
155  bench_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
156  }
157  }
158  }
159  }
160  }
161  }
162  report("put_uni_luma");
163 }
164 
165 static void check_put_vvc_chroma(void)
166 {
167  LOCAL_ALIGNED_32(int16_t, dst0, [DST_BUF_SIZE / 2]);
168  LOCAL_ALIGNED_32(int16_t, dst1, [DST_BUF_SIZE / 2]);
169  LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
170  LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
172 
173  declare_func(void, int16_t *dst, const uint8_t *src, const ptrdiff_t src_stride,
174  const int height, const int8_t *hf, const int8_t *vf, const int width);
175 
176  for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
179  for (int i = 0; i < 2; i++) {
180  for (int j = 0; j < 2; j++) {
181  for (int h = 2; h <= MAX_CTU_SIZE; h *= 2) {
182  for (int w = 2; w <= MAX_CTU_SIZE; w *= 2) {
183  const int idx = av_log2(w) - 1;
184  const int mx = rnd() % VVC_INTER_CHROMA_FACTS;
185  const int my = rnd() % VVC_INTER_CHROMA_FACTS;
186  const int8_t *hf = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_FILTER_TYPES][mx];
187  const int8_t *vf = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_FILTER_TYPES][my];
188  const char *type;
189  switch ((j << 1) | i) {
190  case 0: type = "put_chroma_pixels"; break; // 0 0
191  case 1: type = "put_chroma_h"; break; // 0 1
192  case 2: type = "put_chroma_v"; break; // 1 0
193  case 3: type = "put_chroma_hv"; break; // 1 1
194  }
195  if (check_func(c.inter.put[CHROMA][idx][j][i], "%s_%d_%dx%d", type, bit_depth, w, h)) {
196  memset(dst0, 0, DST_BUF_SIZE);
197  memset(dst1, 0, DST_BUF_SIZE);
198  call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
199  call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
200  if (memcmp(dst0, dst1, DST_BUF_SIZE))
201  fail();
202  if (w == h)
203  bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
204  }
205  }
206  }
207  }
208  }
209  }
210  report("put_chroma");
211 }
212 
213 static void check_put_vvc_chroma_uni(void)
214 {
215  LOCAL_ALIGNED_32(uint8_t, dst0, [DST_BUF_SIZE]);
216  LOCAL_ALIGNED_32(uint8_t, dst1, [DST_BUF_SIZE]);
217  LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
218  LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
219 
221  declare_func(void, uint8_t *dst, ptrdiff_t dststride,
222  uint8_t *src, ptrdiff_t srcstride, int height, const int8_t *hf, const int8_t *vf, int width);
223 
224  for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
227  for (int i = 0; i < 2; i++) {
228  for (int j = 0; j < 2; j++) {
229  for (int h = 4; h <= MAX_CTU_SIZE; h *= 2) {
230  for (int w = 4; w <= MAX_CTU_SIZE; w *= 2) {
231  const int idx = av_log2(w) - 1;
232  const int mx = rnd() % VVC_INTER_CHROMA_FACTS;
233  const int my = rnd() % VVC_INTER_CHROMA_FACTS;
234  const int8_t *hf = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_FILTER_TYPES][mx];
235  const int8_t *vf = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_FILTER_TYPES][my];
236  const char *type;
237 
238  switch ((j << 1) | i) {
239  case 0: type = "put_uni_pixels"; break; // 0 0
240  case 1: type = "put_uni_h"; break; // 0 1
241  case 2: type = "put_uni_v"; break; // 1 0
242  case 3: type = "put_uni_hv"; break; // 1 1
243  }
244 
245  if (check_func(c.inter.put_uni[CHROMA][idx][j][i], "%s_chroma_%d_%dx%d", type, bit_depth, w, h)) {
246  memset(dst0, 0, DST_BUF_SIZE);
247  memset(dst1, 0, DST_BUF_SIZE);
248  call_ref(dst0, PIXEL_STRIDE, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
249  call_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
250  if (memcmp(dst0, dst1, DST_BUF_SIZE))
251  fail();
252  if (w == h)
253  bench_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
254  }
255  }
256  }
257  }
258  }
259  }
260  report("put_uni_chroma");
261 }
262 
263 #define AVG_SRC_BUF_SIZE (MAX_CTU_SIZE * MAX_CTU_SIZE)
264 #define AVG_DST_BUF_SIZE (MAX_PB_SIZE * MAX_PB_SIZE * 2)
265 
266 static void check_avg(void)
267 {
268  LOCAL_ALIGNED_32(int16_t, src00, [AVG_SRC_BUF_SIZE]);
269  LOCAL_ALIGNED_32(int16_t, src01, [AVG_SRC_BUF_SIZE]);
270  LOCAL_ALIGNED_32(int16_t, src10, [AVG_SRC_BUF_SIZE]);
271  LOCAL_ALIGNED_32(int16_t, src11, [AVG_SRC_BUF_SIZE]);
272  LOCAL_ALIGNED_32(uint8_t, dst0, [AVG_DST_BUF_SIZE]);
273  LOCAL_ALIGNED_32(uint8_t, dst1, [AVG_DST_BUF_SIZE]);
275 
276  for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
277  randomize_avg_src((uint8_t*)src00, (uint8_t*)src10, AVG_SRC_BUF_SIZE * sizeof(int16_t));
278  randomize_avg_src((uint8_t*)src01, (uint8_t*)src11, AVG_SRC_BUF_SIZE * sizeof(int16_t));
280  for (int h = 2; h <= MAX_CTU_SIZE; h *= 2) {
281  for (int w = 2; w <= MAX_CTU_SIZE; w *= 2) {
282  {
283  declare_func(void, uint8_t *dst, ptrdiff_t dst_stride,
284  const int16_t *src0, const int16_t *src1, int width, int height);
285  if (check_func(c.inter.avg, "avg_%d_%dx%d", bit_depth, w, h)) {
286  memset(dst0, 0, AVG_DST_BUF_SIZE);
287  memset(dst1, 0, AVG_DST_BUF_SIZE);
288  call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h);
289  call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h);
290  if (memcmp(dst0, dst1, DST_BUF_SIZE))
291  fail();
292  if (w == h)
293  bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h);
294  }
295  }
296  {
297  declare_func(void, uint8_t *dst, ptrdiff_t dst_stride,
298  const int16_t *src0, const int16_t *src1, int width, int height,
299  int denom, int w0, int w1, int o0, int o1);
300  {
301  const int denom = rnd() % 8;
302  const int w0 = rnd() % 256 - 128;
303  const int w1 = rnd() % 256 - 128;
304  const int o0 = rnd() % 256 - 128;
305  const int o1 = rnd() % 256 - 128;
306  if (check_func(c.inter.w_avg, "w_avg_%d_%dx%d", bit_depth, w, h)) {
307  memset(dst0, 0, AVG_DST_BUF_SIZE);
308  memset(dst1, 0, AVG_DST_BUF_SIZE);
309 
310  call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
311  call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0, o1);
312  if (memcmp(dst0, dst1, DST_BUF_SIZE))
313  fail();
314  if (w == h)
315  bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
316  }
317  }
318  }
319  }
320  }
321  }
322  report("avg");
323 }
324 
326 {
331  check_avg();
332 }
DST_BUF_SIZE
#define DST_BUF_SIZE
Definition: vvc_mc.c:42
mem_internal.h
dsp.h
src1
const pixel * src1
Definition: h264pred_template.c:421
ff_vvc_inter_luma_filters
const int8_t ff_vvc_inter_luma_filters[VVC_INTER_FILTER_TYPES][VVC_INTER_LUMA_FACTS][VVC_INTER_LUMA_TAPS]
Definition: data.c:1735
SIZEOF_PIXEL
#define SIZEOF_PIXEL
Definition: vvc_mc.c:36
w
uint8_t w
Definition: llviddspenc.c:38
check_put_vvc_chroma_uni
static void check_put_vvc_chroma_uni(void)
Definition: vvc_mc.c:213
check_func
#define check_func(func,...)
Definition: checkasm.h:170
call_ref
#define call_ref(...)
Definition: checkasm.h:185
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
check_put_vvc_luma_uni
static void check_put_vvc_luma_uni(void)
Definition: vvc_mc.c:115
fail
#define fail()
Definition: checkasm.h:179
checkasm.h
PIXEL_STRIDE
#define PIXEL_STRIDE
Definition: vvc_mc.c:37
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
ff_vvc_inter_chroma_filters
const int8_t ff_vvc_inter_chroma_filters[VVC_INTER_FILTER_TYPES][VVC_INTER_CHROMA_FACTS][VVC_INTER_CHROMA_TAPS]
Definition: data.c:1798
rnd
#define rnd()
Definition: checkasm.h:163
width
#define width
intreadwrite.h
ff_vvc_dsp_init
void ff_vvc_dsp_init(VVCDSPContext *vvcdsp, int bit_depth)
Definition: dsp.c:98
AVG_DST_BUF_SIZE
#define AVG_DST_BUF_SIZE
Definition: vvc_mc.c:264
check_avg
static void check_avg(void)
Definition: vvc_mc.c:266
call_new
#define call_new(...)
Definition: checkasm.h:288
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:156
VVC_INTER_FILTER_TYPES
#define VVC_INTER_FILTER_TYPES
Definition: data.h:46
MAX_CTU_SIZE
#define MAX_CTU_SIZE
Definition: ctu.h:31
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
pixel_mask
static const uint32_t pixel_mask[]
Definition: vvc_mc.c:33
height
#define height
randomize_avg_src
#define randomize_avg_src(buf0, buf1, size)
Definition: vvc_mc.c:61
VVC_INTER_CHROMA_FACTS
#define VVC_INTER_CHROMA_FACTS
Definition: data.h:49
sizes
static const int sizes[]
Definition: vvc_mc.c:34
report
#define report
Definition: checkasm.h:182
bench_new
#define bench_new(...)
Definition: checkasm.h:358
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
check_put_vvc_luma
static void check_put_vvc_luma(void)
Definition: vvc_mc.c:67
common.h
SRC_OFFSET
#define SRC_OFFSET
Definition: vvc_mc.c:43
check_put_vvc_chroma
static void check_put_vvc_chroma(void)
Definition: vvc_mc.c:165
data.h
CHROMA
@ CHROMA
Definition: vf_waveform.c:49
randomize_pixels
#define randomize_pixels(buf0, buf1, size)
Definition: vvc_mc.c:55
src0
const pixel *const src0
Definition: h264pred_template.c:420
VVC_INTER_LUMA_FACTS
#define VVC_INTER_LUMA_FACTS
Definition: data.h:47
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:174
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
checkasm_check_vvc_mc
void checkasm_check_vvc_mc(void)
Definition: vvc_mc.c:325
h
h
Definition: vp9dsp_template.c:2038
ctu.h
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
AVG_SRC_BUF_SIZE
#define AVG_SRC_BUF_SIZE
Definition: vvc_mc.c:263
LUMA
#define LUMA
Definition: hevc_filter.c:31
VVCDSPContext
Definition: dsp.h:158
SRC_BUF_SIZE
#define SRC_BUF_SIZE
Definition: vvc_mc.c:41