FFmpeg
vf_vmafmotion.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
3  * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Calculate VMAF Motion score.
25  */
26 
27 #include "libavutil/file_open.h"
28 #include "libavutil/opt.h"
29 #include "libavutil/pixdesc.h"
30 #include "avfilter.h"
31 #include "formats.h"
32 #include "internal.h"
33 #include "vmaf_motion.h"
34 
35 #define BIT_SHIFT 15
36 
37 static const float FILTER_5[5] = {
38  0.054488685,
39  0.244201342,
40  0.402619947,
41  0.244201342,
42  0.054488685
43 };
44 
45 typedef struct VMAFMotionContext {
46  const AVClass *class;
48  FILE *stats_file;
51 
52 #define OFFSET(x) offsetof(VMAFMotionContext, x)
53 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
54 
55 static const AVOption vmafmotion_options[] = {
56  {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
57  { NULL }
58 };
59 
60 AVFILTER_DEFINE_CLASS(vmafmotion);
61 
62 static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w,
63  int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
64 {
65  ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
66  ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
67  uint64_t sum = 0;
68  int i, j;
69 
70  for (i = 0; i < h; i++) {
71  for (j = 0; j < w; j++) {
72  sum += abs(img1[j] - img2[j]);
73  }
74  img1 += img1_stride;
75  img2 += img2_stride;
76  }
77 
78  return sum;
79 }
80 
81 static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src,
82  uint16_t *dst, int w, int h, ptrdiff_t _src_stride,
83  ptrdiff_t _dst_stride)
84 {
85  ptrdiff_t src_stride = _src_stride / sizeof(*src);
86  ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
87  int radius = filt_w / 2;
88  int borders_left = radius;
89  int borders_right = w - (filt_w - radius);
90  int i, j, k;
91  int sum = 0;
92 
93  for (i = 0; i < h; i++) {
94  for (j = 0; j < borders_left; j++) {
95  sum = 0;
96  for (k = 0; k < filt_w; k++) {
97  int j_tap = FFABS(j - radius + k);
98  if (j_tap >= w) {
99  j_tap = w - (j_tap - w + 1);
100  }
101  sum += filter[k] * src[i * src_stride + j_tap];
102  }
103  dst[i * dst_stride + j] = sum >> BIT_SHIFT;
104  }
105 
106  for (j = borders_left; j < borders_right; j++) {
107  int sum = 0;
108  for (k = 0; k < filt_w; k++) {
109  sum += filter[k] * src[i * src_stride + j - radius + k];
110  }
111  dst[i * dst_stride + j] = sum >> BIT_SHIFT;
112  }
113 
114  for (j = borders_right; j < w; j++) {
115  sum = 0;
116  for (k = 0; k < filt_w; k++) {
117  int j_tap = FFABS(j - radius + k);
118  if (j_tap >= w) {
119  j_tap = w - (j_tap - w + 1);
120  }
121  sum += filter[k] * src[i * src_stride + j_tap];
122  }
123  dst[i * dst_stride + j] = sum >> BIT_SHIFT;
124  }
125  }
126 }
127 
128 #define conv_y_fn(type, bits) \
129 static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
130  const uint8_t *_src, uint16_t *dst, \
131  int w, int h, ptrdiff_t _src_stride, \
132  ptrdiff_t _dst_stride) \
133 { \
134  const type *src = (const type *) _src; \
135  ptrdiff_t src_stride = _src_stride / sizeof(*src); \
136  ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
137  int radius = filt_w / 2; \
138  int borders_top = radius; \
139  int borders_bottom = h - (filt_w - radius); \
140  int i, j, k; \
141  int sum = 0; \
142  \
143  for (i = 0; i < borders_top; i++) { \
144  for (j = 0; j < w; j++) { \
145  sum = 0; \
146  for (k = 0; k < filt_w; k++) { \
147  int i_tap = FFABS(i - radius + k); \
148  if (i_tap >= h) { \
149  i_tap = h - (i_tap - h + 1); \
150  } \
151  sum += filter[k] * src[i_tap * src_stride + j]; \
152  } \
153  dst[i * dst_stride + j] = sum >> bits; \
154  } \
155  } \
156  for (i = borders_top; i < borders_bottom; i++) { \
157  for (j = 0; j < w; j++) { \
158  sum = 0; \
159  for (k = 0; k < filt_w; k++) { \
160  sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
161  } \
162  dst[i * dst_stride + j] = sum >> bits; \
163  } \
164  } \
165  for (i = borders_bottom; i < h; i++) { \
166  for (j = 0; j < w; j++) { \
167  sum = 0; \
168  for (k = 0; k < filt_w; k++) { \
169  int i_tap = FFABS(i - radius + k); \
170  if (i_tap >= h) { \
171  i_tap = h - (i_tap - h + 1); \
172  } \
173  sum += filter[k] * src[i_tap * src_stride + j]; \
174  } \
175  dst[i * dst_stride + j] = sum >> bits; \
176  } \
177  } \
178 }
179 
180 conv_y_fn(uint8_t, 8)
181 conv_y_fn(uint16_t, 10)
182 
183 static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
184  dsp->convolution_x = convolution_x;
185  dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
186  dsp->sad = image_sad;
187 }
188 
190 {
191  double score;
192 
193  s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
194  s->width, s->height, ref->linesize[0], s->stride);
195  s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0],
196  s->width, s->height, s->stride, s->stride);
197 
198  if (!s->nb_frames) {
199  score = 0.0;
200  } else {
201  uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0],
202  s->width, s->height, s->stride, s->stride);
203  // the output score is always normalized to 8 bits
204  score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8)));
205  }
206 
207  FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
208  s->nb_frames++;
209  s->motion_sum += score;
210 
211  return score;
212 }
213 
214 static void set_meta(AVDictionary **metadata, const char *key, float d)
215 {
216  char value[128];
217  snprintf(value, sizeof(value), "%0.2f", d);
218  av_dict_set(metadata, key, value, 0);
219 }
220 
222 {
223  VMAFMotionContext *s = ctx->priv;
224  double score;
225 
226  score = ff_vmafmotion_process(&s->data, ref);
227  set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
228  if (s->stats_file) {
229  fprintf(s->stats_file,
230  "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score);
231  }
232 }
233 
234 
236  int w, int h, enum AVPixelFormat fmt)
237 {
238  size_t data_sz;
239  int i;
241 
242  if (w < 3 || h < 3)
243  return AVERROR(EINVAL);
244 
245  s->width = w;
246  s->height = h;
247  s->stride = FFALIGN(w * sizeof(uint16_t), 32);
248 
249  data_sz = (size_t) s->stride * h;
250  if (!(s->blur_data[0] = av_malloc(data_sz)) ||
251  !(s->blur_data[1] = av_malloc(data_sz)) ||
252  !(s->temp_data = av_malloc(data_sz))) {
253  return AVERROR(ENOMEM);
254  }
255 
256  for (i = 0; i < 5; i++) {
257  s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
258  }
259 
260  vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
261 
262  return 0;
263 }
264 
266 {
267  AVFilterFormats *fmts_list = NULL;
268  int format, ret;
269 
270  for (format = 0; av_pix_fmt_desc_get(format); format++) {
273  (desc->flags & AV_PIX_FMT_FLAG_PLANAR || desc->nb_components == 1) &&
274  (!(desc->flags & AV_PIX_FMT_FLAG_BE) == !HAVE_BIGENDIAN || desc->comp[0].depth == 8) &&
275  (desc->comp[0].depth == 8 || desc->comp[0].depth == 10) &&
276  (ret = ff_add_format(&fmts_list, format)) < 0)
277  return ret;
278  }
279 
280  return ff_set_common_formats(ctx, fmts_list);
281 }
282 
284 {
285  AVFilterContext *ctx = inlink->dst;
286  VMAFMotionContext *s = ctx->priv;
287 
288  return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
289  ctx->inputs[0]->h, ctx->inputs[0]->format);
290 }
291 
293 {
294  av_free(s->blur_data[0]);
295  av_free(s->blur_data[1]);
296  av_free(s->temp_data);
297 
298  return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
299 }
300 
302 {
303  AVFilterContext *ctx = inlink->dst;
305  return ff_filter_frame(ctx->outputs[0], ref);
306 }
307 
309 {
310  VMAFMotionContext *s = ctx->priv;
311 
312  if (s->stats_file_str) {
313  if (!strcmp(s->stats_file_str, "-")) {
314  s->stats_file = stdout;
315  } else {
316  s->stats_file = avpriv_fopen_utf8(s->stats_file_str, "w");
317  if (!s->stats_file) {
318  int err = AVERROR(errno);
319  char buf[128];
320  av_strerror(err, buf, sizeof(buf));
321  av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
322  s->stats_file_str, buf);
323  return err;
324  }
325  }
326  }
327 
328  return 0;
329 }
330 
332 {
333  VMAFMotionContext *s = ctx->priv;
334  double avg_motion = ff_vmafmotion_uninit(&s->data);
335 
336  if (s->data.nb_frames > 0) {
337  av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
338  }
339 
340  if (s->stats_file && s->stats_file != stdout)
341  fclose(s->stats_file);
342 }
343 
344 static const AVFilterPad vmafmotion_inputs[] = {
345  {
346  .name = "reference",
347  .type = AVMEDIA_TYPE_VIDEO,
348  .filter_frame = filter_frame,
349  .config_props = config_input_ref,
350  },
351 };
352 
353 static const AVFilterPad vmafmotion_outputs[] = {
354  {
355  .name = "default",
356  .type = AVMEDIA_TYPE_VIDEO,
357  },
358 };
359 
361  .name = "vmafmotion",
362  .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."),
363  .init = init,
364  .uninit = uninit,
365  .priv_size = sizeof(VMAFMotionContext),
366  .priv_class = &vmafmotion_class,
371 };
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:969
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2888
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
OFFSET
#define OFFSET(x)
Definition: vf_vmafmotion.c:52
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:330
pixdesc.h
w
uint8_t w
Definition: llviddspenc.c:38
AVOption
AVOption.
Definition: opt.h:251
FILTER_QUERY_FUNC
#define FILTER_QUERY_FUNC(func)
Definition: internal.h:171
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
AVDictionary
Definition: dict.c:32
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:165
ff_vmafmotion_process
double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
Definition: vf_vmafmotion.c:189
BIT_SHIFT
#define BIT_SHIFT
Definition: vf_vmafmotion.c:35
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
AVFilterFormats
A list of supported formats for one end of a filter link.
Definition: formats.h:64
formats.h
VMAFMotionContext::stats_file
FILE * stats_file
Definition: vf_vmafmotion.c:48
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
Definition: vf_vmafmotion.c:301
av_strerror
int av_strerror(int errnum, char *errbuf, size_t errbuf_size)
Put a description of the AVERROR code errnum in errbuf.
Definition: error.c:108
AV_PIX_FMT_FLAG_HWACCEL
#define AV_PIX_FMT_FLAG_HWACCEL
Pixel format is an HW accelerated format.
Definition: pixdesc.h:128
convolution_x
static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src, uint16_t *dst, int w, int h, ptrdiff_t _src_stride, ptrdiff_t _dst_stride)
Definition: vf_vmafmotion.c:81
config_input_ref
static int config_input_ref(AVFilterLink *inlink)
Definition: vf_vmafmotion.c:283
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:49
lrint
#define lrint
Definition: tablegen.h:53
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
av_cold
#define av_cold
Definition: attributes.h:90
ff_set_common_formats
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:749
VMAFMotionData
Definition: vmaf_motion.h:42
s
#define s(width, name)
Definition: cbs_vp9.c:256
format
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate. The lists are not just lists
ff_vmafmotion_init
int ff_vmafmotion_init(VMAFMotionData *s, int w, int h, enum AVPixelFormat fmt)
Definition: vf_vmafmotion.c:235
ctx
AVFormatContext * ctx
Definition: movenc.c:48
key
const char * key
Definition: hwcontext_opencl.c:174
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:194
file_open.h
img1
static uint8_t img1[WIDTH *HEIGHT]
Definition: motion.c:43
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:64
if
if(ret)
Definition: filter_design.txt:179
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(vmafmotion)
vmafmotion_outputs
static const AVFilterPad vmafmotion_outputs[]
Definition: vf_vmafmotion.c:353
ff_add_format
int ff_add_format(AVFilterFormats **avff, int64_t fmt)
Add fmt to the list of media formats contained in *avff.
Definition: formats.c:449
double
double
Definition: af_crystalizer.c:132
abs
#define abs(x)
Definition: cuda_runtime.h:35
ff_vmafmotion_uninit
double ff_vmafmotion_uninit(VMAFMotionData *s)
Definition: vf_vmafmotion.c:292
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:115
AV_PIX_FMT_FLAG_RGB
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:136
img2
static uint8_t img2[WIDTH *HEIGHT]
Definition: motion.c:44
AV_PIX_FMT_FLAG_BITSTREAM
#define AV_PIX_FMT_FLAG_BITSTREAM
All values of a component are bit-wise packed end to end.
Definition: pixdesc.h:124
init
static av_cold int init(AVFilterContext *ctx)
Definition: vf_vmafmotion.c:308
ff_vf_vmafmotion
const AVFilter ff_vf_vmafmotion
Definition: vf_vmafmotion.c:360
AV_LOG_INFO
#define AV_LOG_INFO
Standard information.
Definition: log.h:191
internal.h
image_sad
static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w, int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
Definition: vf_vmafmotion.c:62
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
VMAFMotionContext
Definition: vf_vmafmotion.c:45
value
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default value
Definition: writing_filters.txt:86
AV_PIX_FMT_FLAG_BE
#define AV_PIX_FMT_FLAG_BE
Pixel format is big-endian.
Definition: pixdesc.h:116
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:55
avpriv_fopen_utf8
FILE * avpriv_fopen_utf8(const char *path, const char *mode)
Open a file using a UTF-8 filename.
Definition: file_open.c:159
AVFilter
Filter definition.
Definition: avfilter.h:161
ret
ret
Definition: filter_design.txt:187
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
vmaf_motion.h
VMAFMotionContext::data
VMAFMotionData data
Definition: vf_vmafmotion.c:47
query_formats
static int query_formats(AVFilterContext *ctx)
Definition: vf_vmafmotion.c:265
FILTER_5
static const float FILTER_5[5]
Definition: vf_vmafmotion.c:37
avfilter.h
AVFILTER_FLAG_METADATA_ONLY
#define AVFILTER_FLAG_METADATA_ONLY
The filter is a "metadata" filter - it does not modify the frame data in any way.
Definition: avfilter.h:133
VMAFMotionDSPContext
Definition: vmaf_motion.h:29
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:112
AV_PIX_FMT_FLAG_PLANAR
#define AV_PIX_FMT_FLAG_PLANAR
At least one pixel component is not in the first data plane.
Definition: pixdesc.h:132
do_vmafmotion
static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
Definition: vf_vmafmotion.c:221
vmafmotion_inputs
static const AVFilterPad vmafmotion_inputs[]
Definition: vf_vmafmotion.c:344
AVFilterContext
An instance of a filter.
Definition: avfilter.h:392
FLAGS
#define FLAGS
Definition: vf_vmafmotion.c:53
desc
const char * desc
Definition: libsvtav1.c:83
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
conv_y_fn
#define conv_y_fn(type, bits)
Definition: vf_vmafmotion.c:128
vmafmotion_options
static const AVOption vmafmotion_options[]
Definition: vf_vmafmotion.c:55
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
av_free
#define av_free(p)
Definition: tableprint_vlc.h:33
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:195
av_dict_set
int av_dict_set(AVDictionary **pm, const char *key, const char *value, int flags)
Set the given entry in *pm, overwriting an existing entry.
Definition: dict.c:86
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_vmafmotion.c:331
VMAFMotionContext::stats_file_str
char * stats_file_str
Definition: vf_vmafmotion.c:49
d
d
Definition: ffmpeg_filter.c:156
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
h
h
Definition: vp9dsp_template.c:2038
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Definition: opt.h:229
AV_PIX_FMT_FLAG_PAL
#define AV_PIX_FMT_FLAG_PAL
Pixel format has a palette in data[1], values are indexes in this palette.
Definition: pixdesc.h:120
snprintf
#define snprintf
Definition: snprintf.h:34
set_meta
static void set_meta(AVDictionary **metadata, const char *key, float d)
Definition: vf_vmafmotion.c:214