FFmpeg
hpeldsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include <assert.h>
20 #include <stddef.h>
21 
22 #include "checkasm.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavutil/macros.h"
25 #include "libavutil/mem_internal.h"
26 #include "libavcodec/avcodec.h"
27 #include "libavcodec/hpeldsp.h"
28 
29 #define MAX_BLOCK_SIZE 16
30 #define MAX_HEIGHT 16
31 #define MAX_STRIDE 64
32 // BUF_SIZE is bigger than necessary in order to test strides > block width.
33 #define BUF_SIZE ((MAX_HEIGHT - 1) * MAX_STRIDE + MAX_BLOCK_SIZE)
34 // Due to hpel interpolation the input needs to have one more line than
35 // the output and the last line needs one more element.
36 // The input is not subject to alignment requirements; making the input buffer
37 // bigger (by MAX_BLOCK_SIZE - 1) allows us to use a random misalignment.
38 #define INPUT_BUF_SIZE (MAX_HEIGHT * MAX_STRIDE + MAX_BLOCK_SIZE + 1 + (MAX_BLOCK_SIZE - 1))
39 
40 #define randomize_buffers(buf0, buf1) \
41  do { \
42  static_assert(sizeof(buf0) == sizeof(buf1), "Incompatible buffers"); \
43  static_assert(!(sizeof(buf0) % 4), "Tail handling needed"); \
44  static_assert(sizeof(buf0[0]) == 1 && sizeof(buf1[0]) == 1, \
45  "Pointer arithmetic needs to be adapted"); \
46  for (size_t k = 0; k < sizeof(buf0); k += 4) { \
47  uint32_t r = rnd(); \
48  AV_WN32A(buf0 + k, r); \
49  AV_WN32A(buf1 + k, r); \
50  } \
51  } while (0)
52 
53 
55 {
56  DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, srcbuf0)[INPUT_BUF_SIZE];
57  DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, srcbuf1)[INPUT_BUF_SIZE];
58  DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, dstbuf0)[BUF_SIZE];
59  DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, dstbuf1)[BUF_SIZE];
60  HpelDSPContext hdsp;
61  static const struct {
62  const char *name;
63  size_t offset;
64  unsigned nb_blocksizes;
65  } tests[] = {
66 #define TEST(NAME, NB) { .name = #NAME, .offset = offsetof(HpelDSPContext, NAME), .nb_blocksizes = NB }
67  TEST(put_pixels_tab, 4),
68  TEST(avg_pixels_tab, 4),
69  TEST(put_no_rnd_pixels_tab, 2), // put_no_rnd_pixels_tab only has two usable blocksizes
70  TEST(avg_no_rnd_pixels_tab, 1),
71  };
72  declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h);
73 
75 
76  for (size_t i = 0; i < FF_ARRAY_ELEMS(tests); ++i) {
77  op_pixels_func (*func_tab)[4] = (op_pixels_func (*)[4])((char*)&hdsp + tests[i].offset);
78  for (unsigned j = 0; j < tests[i].nb_blocksizes; ++j) {
79  const unsigned blocksize = MAX_BLOCK_SIZE >> j;
80  // h must always be a multiple of four, except when width is two or four.
81  const unsigned h_mult = blocksize <= 4 ? 2 : 4;
82 
83  for (unsigned dxy = 0; dxy < 4; ++dxy) {
84  if (check_func(func_tab[j][dxy], "%s[%u][%u]", tests[i].name, j, dxy)) {
85  // Don't always use output that is 16-aligned.
86  size_t dst_offset = (rnd() % (MAX_BLOCK_SIZE / blocksize)) * blocksize;
87  size_t src_offset = rnd() % MAX_BLOCK_SIZE;
88  ptrdiff_t stride = (rnd() % (MAX_STRIDE / blocksize) + 1) * blocksize;
89  int h = (rnd() % (MAX_HEIGHT / h_mult) + 1) * h_mult;
90  const uint8_t *src0 = srcbuf0 + src_offset, *src1 = srcbuf1 + src_offset;
91  uint8_t *dst0 = dstbuf0 + dst_offset, *dst1 = dstbuf1 + dst_offset;
92 
93  if (rnd() & 1) {
94  // Flip stride.
95  dst1 += (h - 1) * stride;
96  dst0 += (h - 1) * stride;
97  // Due to interpolation potentially h + 1 lines are read
98  // from src, hence h * stride.
99  src0 += h * stride;
100  src1 += h * stride;
101  stride = -stride;
102  }
103 
104  randomize_buffers(srcbuf0, srcbuf1);
105  randomize_buffers(dstbuf0, dstbuf1);
106  call_ref(dst0, src0, stride, h);
107  call_new(dst1, src1, stride, h);
108  if (memcmp(srcbuf0, srcbuf1, sizeof(srcbuf0)) || memcmp(dstbuf0, dstbuf1, sizeof(dstbuf0)))
109  fail();
110  bench_new(dst0, src0, stride, h);
111  }
112  }
113  }
114  }
115 }
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:199
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:420
checkasm_check_hpeldsp
void checkasm_check_hpeldsp(void)
Definition: hpeldsp.c:54
check_func
#define check_func(func,...)
Definition: checkasm.h:193
call_ref
#define call_ref(...)
Definition: checkasm.h:208
BUF_SIZE
#define BUF_SIZE
Definition: hpeldsp.c:33
func_tab
static const struct @67 func_tab[]
macros.h
fail
#define fail()
Definition: checkasm.h:202
checkasm.h
rnd
#define rnd()
Definition: checkasm.h:186
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
intreadwrite.h
TEST
#define TEST(NAME, NB)
MAX_HEIGHT
#define MAX_HEIGHT
Definition: hpeldsp.c:30
call_new
#define call_new(...)
Definition: checkasm.h:311
MAX_BLOCK_SIZE
#define MAX_BLOCK_SIZE
Definition: hpeldsp.c:29
INPUT_BUF_SIZE
#define INPUT_BUF_SIZE
Definition: hpeldsp.c:38
op_pixels_func
void(* op_pixels_func)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Average and put pixel Widths can be 16, 8, 4 or 2.
Definition: hpeldsp.h:39
HpelDSPContext
Half-pel DSP context.
Definition: hpeldsp.h:46
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:104
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
randomize_buffers
#define randomize_buffers(buf0, buf1)
Definition: hpeldsp.c:40
tests
const TestCase tests[]
Definition: fifo_muxer.c:363
bench_new
#define bench_new(...)
Definition: checkasm.h:396
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
avcodec.h
stride
#define stride
Definition: h264pred_template.c:536
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:30
MAX_STRIDE
#define MAX_STRIDE
Definition: hpeldsp.c:31
AV_CPU_FLAG_MMXEXT
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:31
src0
const pixel *const src0
Definition: h264pred_template.c:419
AV_CODEC_FLAG_BITEXACT
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
Definition: avcodec.h:322
hpeldsp.h
h
h
Definition: vp9dsp_template.c:2070
src
#define src
Definition: vp8dsp.c:248
ff_hpeldsp_init
av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
Definition: hpeldsp.c:337