FFmpeg
sw_scale.c
Go to the documentation of this file.
1 /*
2  *
3  * This file is part of FFmpeg.
4  *
5  * FFmpeg is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * FFmpeg is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18  */
19 
20 #include <string.h>
21 
22 #include "libavutil/common.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavutil/mem_internal.h"
25 
26 #include "libswscale/swscale.h"
28 
29 #include "checkasm.h"
30 
31 #define randomize_buffers(buf, size) \
32  do { \
33  int j; \
34  for (j = 0; j < size; j+=4) \
35  AV_WN32(buf + j, rnd()); \
36  } while (0)
37 
38 // This reference function is the same approximate algorithm employed by the
39 // SIMD functions
40 static void ref_function(const int16_t *filter, int filterSize,
41  const int16_t **src, uint8_t *dest, int dstW,
42  const uint8_t *dither, int offset)
43 {
44  int i, d;
45  d = ((filterSize - 1) * 8 + dither[0]) >> 4;
46  for ( i = 0; i < dstW; i++) {
47  int16_t val = d;
48  int j;
49  union {
50  int val;
51  int16_t v[2];
52  } t;
53  for (j = 0; j < filterSize; j++){
54  t.val = (int)src[j][i + offset] * (int)filter[j];
55  val += t.v[1];
56  }
57  dest[i]= av_clip_uint8(val>>3);
58  }
59 }
60 
61 static void check_yuv2yuvX(void)
62 {
63  struct SwsContext *ctx;
64  int fsi, osi, isi, i, j;
65  int dstW;
66 #define LARGEST_FILTER 16
67 #define FILTER_SIZES 4
68  static const int filter_sizes[FILTER_SIZES] = {1, 4, 8, 16};
69 #define LARGEST_INPUT_SIZE 512
70 #define INPUT_SIZES 6
71  static const int input_sizes[INPUT_SIZES] = {8, 24, 128, 144, 256, 512};
72 
73  declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *filter,
74  int filterSize, const int16_t **src, uint8_t *dest,
75  int dstW, const uint8_t *dither, int offset);
76 
77  const int16_t **src;
78  LOCAL_ALIGNED_8(int16_t, src_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
79  LOCAL_ALIGNED_8(int16_t, filter_coeff, [LARGEST_FILTER]);
80  LOCAL_ALIGNED_8(uint8_t, dst0, [LARGEST_INPUT_SIZE]);
81  LOCAL_ALIGNED_8(uint8_t, dst1, [LARGEST_INPUT_SIZE]);
83  union VFilterData{
84  const int16_t *src;
85  uint16_t coeff[8];
86  } *vFilterData;
87  uint8_t d_val = rnd();
88  memset(dither, d_val, LARGEST_INPUT_SIZE);
89  randomize_buffers((uint8_t*)src_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));
90  randomize_buffers((uint8_t*)filter_coeff, LARGEST_FILTER * sizeof(int16_t));
92  if (sws_init_context(ctx, NULL, NULL) < 0)
93  fail();
94 
96  for(isi = 0; isi < INPUT_SIZES; ++isi){
97  dstW = input_sizes[isi];
98  for(osi = 0; osi < 64; osi += 16){
99  for(fsi = 0; fsi < FILTER_SIZES; ++fsi){
100  src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]);
101  vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData));
102  memset(vFilterData, 0, (filter_sizes[fsi] + 2) * sizeof(union VFilterData));
103  for(i = 0; i < filter_sizes[fsi]; ++i){
104  src[i] = &src_pixels[i * LARGEST_INPUT_SIZE];
105  vFilterData[i].src = src[i];
106  for(j = 0; j < 4; ++j)
107  vFilterData[i].coeff[j + 4] = filter_coeff[i];
108  }
109  if (check_func(ctx->yuv2planeX, "yuv2yuvX_%d_%d_%d", filter_sizes[fsi], osi, dstW)){
110  memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
111  memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
112 
113  // The reference function is not the scalar function selected when mmx
114  // is deactivated as the SIMD functions do not give the same result as
115  // the scalar ones due to rounding. The SIMD functions are activated by
116  // the flag SWS_ACCURATE_RND
117  ref_function(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
118  // There's no point in calling new for the reference function
119  if(ctx->use_mmx_vfilter){
120  call_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
121  if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0])))
122  fail();
123  if(dstW == LARGEST_INPUT_SIZE)
124  bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
125  }
126  }
127  av_freep(&src);
128  av_freep(&vFilterData);
129  }
130  }
131  }
133 #undef FILTER_SIZES
134 }
135 
136 #undef SRC_PIXELS
137 #define SRC_PIXELS 128
138 
139 static void check_hscale(void)
140 {
141 #define MAX_FILTER_WIDTH 40
142 #define FILTER_SIZES 5
143  static const int filter_sizes[FILTER_SIZES] = { 4, 8, 16, 32, 40 };
144 
145 #define HSCALE_PAIRS 2
146  static const int hscale_pairs[HSCALE_PAIRS][2] = {
147  { 8, 14 },
148  { 8, 18 },
149  };
150 
151  int i, j, fsi, hpi, width;
152  struct SwsContext *ctx;
153 
154  // padded
155  LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]);
156  LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);
157  LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);
158 
159  // padded
161  LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);
162 
163  // The dst parameter here is either int16_t or int32_t but we use void* to
164  // just cover both cases.
165  declare_func_emms(AV_CPU_FLAG_MMX, void, void *c, void *dst, int dstW,
166  const uint8_t *src, const int16_t *filter,
167  const int32_t *filterPos, int filterSize);
168 
170  if (sws_init_context(ctx, NULL, NULL) < 0)
171  fail();
172 
174 
175  for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) {
176  for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
177  width = filter_sizes[fsi];
178 
179  ctx->srcBpc = hscale_pairs[hpi][0];
180  ctx->dstBpc = hscale_pairs[hpi][1];
181  ctx->hLumFilterSize = ctx->hChrFilterSize = width;
182 
183  for (i = 0; i < SRC_PIXELS; i++) {
184  filterPos[i] = i;
185 
186  // These filter cofficients are chosen to try break two corner
187  // cases, namely:
188  //
189  // - Negative filter coefficients. The filters output signed
190  // values, and it should be possible to end up with negative
191  // output values.
192  //
193  // - Positive clipping. The hscale filter function has clipping
194  // at (1<<15) - 1
195  //
196  // The coefficients sum to the 1.0 point for the hscale
197  // functions (1 << 14).
198 
199  for (j = 0; j < width; j++) {
200  filter[i * width + j] = -((1 << 14) / (width - 1));
201  }
202  filter[i * width + (rnd() % width)] = ((1 << 15) - 1);
203  }
204 
205  for (i = 0; i < MAX_FILTER_WIDTH; i++) {
206  // These values should be unused in SIMD implementations but
207  // may still be read, random coefficients here should help show
208  // issues where they are used in error.
209 
210  filter[SRC_PIXELS * width + i] = rnd();
211  }
213 
214  if (check_func(ctx->hcScale, "hscale_%d_to_%d_width%d", ctx->srcBpc, ctx->dstBpc + 1, width)) {
215  memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
216  memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));
217 
218  call_ref(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
219  call_new(NULL, dst1, SRC_PIXELS, src, filter, filterPos, width);
220  if (memcmp(dst0, dst1, SRC_PIXELS * sizeof(dst0[0])))
221  fail();
222  bench_new(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
223  }
224  }
225  }
227 }
228 
230 {
231  check_hscale();
232  report("hscale");
233  check_yuv2yuvX();
234  report("yuv2yuvX");
235 }
FILTER_SIZES
#define FILTER_SIZES
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:133
SwsContext::dstW
int dstW
Width of destination luma/alpha planes.
Definition: swscale_internal.h:481
mem_internal.h
check_yuv2yuvX
static void check_yuv2yuvX(void)
Definition: sw_scale.c:61
check_func
#define check_func(func,...)
Definition: checkasm.h:127
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
call_ref
#define call_ref(...)
Definition: checkasm.h:142
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:31
fail
#define fail()
Definition: checkasm.h:136
checkasm.h
val
static double val(void *priv, double ch)
Definition: aeval.c:75
check_hscale
static void check_hscale(void)
Definition: sw_scale.c:139
rnd
#define rnd()
Definition: checkasm.h:120
width
#define width
intreadwrite.h
LARGEST_FILTER
#define LARGEST_FILTER
ctx
AVFormatContext * ctx
Definition: movenc.c:48
LOCAL_ALIGNED_8
#define LOCAL_ALIGNED_8(t, v,...)
Definition: mem_internal.h:124
HSCALE_PAIRS
#define HSCALE_PAIRS
SRC_PIXELS
#define SRC_PIXELS
Definition: sw_scale.c:137
call_new
#define call_new(...)
Definition: checkasm.h:214
NULL
#define NULL
Definition: coverity.c:32
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:136
src
#define src
Definition: vp8dsp.c:255
sws_alloc_context
struct SwsContext * sws_alloc_context(void)
Allocate an empty SwsContext.
Definition: utils.c:1087
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_sws_init_scale
void ff_sws_init_scale(SwsContext *c)
Definition: swscale.c:582
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
report
#define report
Definition: checkasm.h:139
i
int i
Definition: input.c:406
bench_new
#define bench_new(...)
Definition: checkasm.h:277
common.h
LARGEST_INPUT_SIZE
#define LARGEST_INPUT_SIZE
swscale_internal.h
INPUT_SIZES
#define INPUT_SIZES
randomize_buffers
#define randomize_buffers(buf, size)
Definition: sw_scale.c:31
sws_init_context
av_warn_unused_result int sws_init_context(struct SwsContext *sws_context, SwsFilter *srcFilter, SwsFilter *dstFilter)
Initialize the swscaler context sws_context.
Definition: utils.c:1173
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:29
sws_freeContext
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2244
ref_function
static void ref_function(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Definition: sw_scale.c:40
av_clip_uint8
#define av_clip_uint8
Definition: common.h:128
MAX_FILTER_WIDTH
#define MAX_FILTER_WIDTH
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:48
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
d
d
Definition: ffmpeg_filter.c:156
int32_t
int32_t
Definition: audioconvert.c:56
coeff
static const double coeff[2][5]
Definition: vf_owdenoise.c:73
checkasm_check_sw_scale
void checkasm_check_sw_scale(void)
Definition: sw_scale.c:229
SwsContext
Definition: swscale_internal.h:283
int
int
Definition: ffmpeg_filter.c:156
swscale.h
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:58