FFmpeg
vf_thumbnail.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011 Smartjog S.A.S, Clément Bœsch <clement.boesch@smartjog.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * Potential thumbnail lookup filter to reduce the risk of an inappropriate
24  * selection (such as a black frame) we could get with an absolute seek.
25  *
26  * Simplified version of algorithm by Vadim Zaliva <lord@crocodile.org>.
27  * @see http://notbrainsurgery.livejournal.com/29773.html
28  */
29 
30 #include "libavutil/opt.h"
31 #include "libavutil/pixdesc.h"
32 #include "avfilter.h"
33 #include "internal.h"
34 
35 #define HIST_SIZE (3*256)
36 
37 struct thumb_frame {
38  AVFrame *buf; ///< cached frame
39  int histogram[HIST_SIZE]; ///< RGB color distribution histogram of the frame
40 };
41 
42 typedef struct ThumbContext {
43  const AVClass *class;
44  int n; ///< current frame
45  int n_frames; ///< number of frames for analysis
46  struct thumb_frame *frames; ///< the n_frames frames
47  AVRational tb; ///< copy of the input timebase to ease access
48 
49  int planewidth[4];
50  int planeheight[4];
51 } ThumbContext;
52 
53 #define OFFSET(x) offsetof(ThumbContext, x)
54 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
55 
56 static const AVOption thumbnail_options[] = {
57  { "n", "set the frames batch size", OFFSET(n_frames), AV_OPT_TYPE_INT, {.i64=100}, 2, INT_MAX, FLAGS },
58  { NULL }
59 };
60 
62 
64 {
65  ThumbContext *s = ctx->priv;
66 
67  s->frames = av_calloc(s->n_frames, sizeof(*s->frames));
68  if (!s->frames) {
69  av_log(ctx, AV_LOG_ERROR,
70  "Allocation failure, try to lower the number of frames\n");
71  return AVERROR(ENOMEM);
72  }
73  av_log(ctx, AV_LOG_VERBOSE, "batch size: %d frames\n", s->n_frames);
74  return 0;
75 }
76 
77 /**
78  * @brief Compute Sum-square deviation to estimate "closeness".
79  * @param hist color distribution histogram
80  * @param median average color distribution histogram
81  * @return sum of squared errors
82  */
83 static double frame_sum_square_err(const int *hist, const double *median)
84 {
85  int i;
86  double err, sum_sq_err = 0;
87 
88  for (i = 0; i < HIST_SIZE; i++) {
89  err = median[i] - (double)hist[i];
90  sum_sq_err += err*err;
91  }
92  return sum_sq_err;
93 }
94 
96 {
97  AVFrame *picref;
98  ThumbContext *s = ctx->priv;
99  int i, j, best_frame_idx = 0;
100  int nb_frames = s->n;
101  double avg_hist[HIST_SIZE] = {0}, sq_err, min_sq_err = -1;
102 
103  // average histogram of the N frames
104  for (j = 0; j < FF_ARRAY_ELEMS(avg_hist); j++) {
105  for (i = 0; i < nb_frames; i++)
106  avg_hist[j] += (double)s->frames[i].histogram[j];
107  avg_hist[j] /= nb_frames;
108  }
109 
110  // find the frame closer to the average using the sum of squared errors
111  for (i = 0; i < nb_frames; i++) {
112  sq_err = frame_sum_square_err(s->frames[i].histogram, avg_hist);
113  if (i == 0 || sq_err < min_sq_err)
114  best_frame_idx = i, min_sq_err = sq_err;
115  }
116 
117  // free and reset everything (except the best frame buffer)
118  for (i = 0; i < nb_frames; i++) {
119  memset(s->frames[i].histogram, 0, sizeof(s->frames[i].histogram));
120  if (i != best_frame_idx)
121  av_frame_free(&s->frames[i].buf);
122  }
123  s->n = 0;
124 
125  // raise the chosen one
126  picref = s->frames[best_frame_idx].buf;
127  av_log(ctx, AV_LOG_INFO, "frame id #%d (pts_time=%f) selected "
128  "from a set of %d images\n", best_frame_idx,
129  picref->pts * av_q2d(s->tb), nb_frames);
130  s->frames[best_frame_idx].buf = NULL;
131 
132  return picref;
133 }
134 
136 {
137  int i, j;
138  AVFilterContext *ctx = inlink->dst;
139  ThumbContext *s = ctx->priv;
140  AVFilterLink *outlink = ctx->outputs[0];
141  int *hist = s->frames[s->n].histogram;
142  const uint8_t *p = frame->data[0];
143 
144  // keep a reference of each frame
145  s->frames[s->n].buf = frame;
146 
147  // update current frame histogram
148  switch (inlink->format) {
149  case AV_PIX_FMT_RGB24:
150  case AV_PIX_FMT_BGR24:
151  for (j = 0; j < inlink->h; j++) {
152  for (i = 0; i < inlink->w; i++) {
153  hist[0*256 + p[i*3 ]]++;
154  hist[1*256 + p[i*3 + 1]]++;
155  hist[2*256 + p[i*3 + 2]]++;
156  }
157  p += frame->linesize[0];
158  }
159  break;
160  case AV_PIX_FMT_RGB0:
161  case AV_PIX_FMT_BGR0:
162  case AV_PIX_FMT_RGBA:
163  case AV_PIX_FMT_BGRA:
164  for (j = 0; j < inlink->h; j++) {
165  for (i = 0; i < inlink->w; i++) {
166  hist[0*256 + p[i*4 ]]++;
167  hist[1*256 + p[i*4 + 1]]++;
168  hist[2*256 + p[i*4 + 2]]++;
169  }
170  p += frame->linesize[0];
171  }
172  break;
173  case AV_PIX_FMT_0RGB:
174  case AV_PIX_FMT_0BGR:
175  case AV_PIX_FMT_ARGB:
176  case AV_PIX_FMT_ABGR:
177  for (j = 0; j < inlink->h; j++) {
178  for (i = 0; i < inlink->w; i++) {
179  hist[0*256 + p[i*4 + 1]]++;
180  hist[1*256 + p[i*4 + 2]]++;
181  hist[2*256 + p[i*4 + 3]]++;
182  }
183  p += frame->linesize[0];
184  }
185  break;
186  default:
187  for (int plane = 0; plane < 3; plane++) {
188  const uint8_t *p = frame->data[plane];
189  for (j = 0; j < s->planeheight[plane]; j++) {
190  for (i = 0; i < s->planewidth[plane]; i++)
191  hist[256*plane + p[i]]++;
192  p += frame->linesize[plane];
193  }
194  }
195  break;
196  }
197 
198  // no selection until the buffer of N frames is filled up
199  s->n++;
200  if (s->n < s->n_frames)
201  return 0;
202 
203  return ff_filter_frame(outlink, get_best_frame(ctx));
204 }
205 
207 {
208  int i;
209  ThumbContext *s = ctx->priv;
210  for (i = 0; i < s->n_frames && s->frames && s->frames[i].buf; i++)
211  av_frame_free(&s->frames[i].buf);
212  av_freep(&s->frames);
213 }
214 
216 {
217  AVFilterContext *ctx = link->src;
218  ThumbContext *s = ctx->priv;
219  int ret = ff_request_frame(ctx->inputs[0]);
220 
221  if (ret == AVERROR_EOF && s->n) {
222  ret = ff_filter_frame(link, get_best_frame(ctx));
223  if (ret < 0)
224  return ret;
225  ret = AVERROR_EOF;
226  }
227  if (ret < 0)
228  return ret;
229  return 0;
230 }
231 
233 {
234  AVFilterContext *ctx = inlink->dst;
235  ThumbContext *s = ctx->priv;
237 
238  s->tb = inlink->time_base;
239  s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
240  s->planewidth[0] = s->planewidth[3] = inlink->w;
241  s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
242  s->planeheight[0] = s->planeheight[3] = inlink->h;
243 
244  return 0;
245 }
246 
248 {
249  static const enum AVPixelFormat pix_fmts[] = {
264  };
265  AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
266  if (!fmts_list)
267  return AVERROR(ENOMEM);
268  return ff_set_common_formats(ctx, fmts_list);
269 }
270 
271 static const AVFilterPad thumbnail_inputs[] = {
272  {
273  .name = "default",
274  .type = AVMEDIA_TYPE_VIDEO,
275  .config_props = config_props,
276  .filter_frame = filter_frame,
277  },
278  { NULL }
279 };
280 
281 static const AVFilterPad thumbnail_outputs[] = {
282  {
283  .name = "default",
284  .type = AVMEDIA_TYPE_VIDEO,
285  .request_frame = request_frame,
286  },
287  { NULL }
288 };
289 
291  .name = "thumbnail",
292  .description = NULL_IF_CONFIG_SMALL("Select the most representative frame in a given sequence of consecutive frames."),
293  .priv_size = sizeof(ThumbContext),
294  .init = init,
295  .uninit = uninit,
297  .inputs = thumbnail_inputs,
298  .outputs = thumbnail_outputs,
299  .priv_class = &thumbnail_class,
301 };
#define NULL
Definition: coverity.c:32
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2573
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
int planeheight[4]
Definition: vf_thumbnail.c:50
AVOption.
Definition: opt.h:248
const char * desc
Definition: libsvtav1.c:79
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
Main libavfilter public API header.
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:68
static av_cold int init(AVFilterContext *ctx)
Definition: vf_thumbnail.c:63
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:168
#define FF_ARRAY_ELEMS(a)
packed BGR 8:8:8, 32bpp, XBGRXBGR... X=unused/undefined
Definition: pixfmt.h:239
int n
current frame
Definition: vf_thumbnail.c:44
uint8_t log2_chroma_w
Amount to shift the luma width right to find the chroma width.
Definition: pixdesc.h:92
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
Definition: mem.c:245
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:287
#define AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC
Some filters support a generic "enable" expression option that can be used to enable or disable a fil...
Definition: avfilter.h:126
const char * name
Pad name.
Definition: internal.h:60
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:349
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1094
AVFrame * buf
cached frame
Definition: vf_thumbnail.c:38
planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
Definition: pixfmt.h:101
uint8_t
#define av_cold
Definition: attributes.h:88
packed RGB 8:8:8, 32bpp, RGBXRGBX... X=unused/undefined
Definition: pixfmt.h:238
AVOptions.
#define OFFSET(x)
Definition: vf_thumbnail.c:53
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:411
packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
Definition: pixfmt.h:94
struct thumb_frame * frames
the n_frames frames
Definition: vf_thumbnail.c:46
static double av_q2d(AVRational a)
Convert an AVRational to a double.
Definition: rational.h:104
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range...
Definition: pixfmt.h:100
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:79
#define AVERROR_EOF
End of file.
Definition: error.h:55
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:210
#define av_log(a,...)
A filter pad used for either input or output.
Definition: internal.h:54
planar YUV 4:2:2 24bpp, (1 Cr & Cb sample per 2x1 Y & A samples)
Definition: pixfmt.h:176
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:588
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
Definition: pixdesc.h:101
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:204
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:117
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
Definition: pixfmt.h:95
void * priv
private data for use by the filter
Definition: avfilter.h:356
static const AVFilterPad thumbnail_outputs[]
Definition: vf_thumbnail.c:281
packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
Definition: pixfmt.h:92
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
Definition: pixfmt.h:93
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
int planewidth[4]
Definition: vf_thumbnail.c:49
static int query_formats(AVFilterContext *ctx)
Definition: vf_thumbnail.c:247
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:78
#define FLAGS
Definition: vf_thumbnail.c:54
AVFormatContext * ctx
Definition: movenc.c:48
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
#define s(width, name)
Definition: cbs_vp9.c:257
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:69
static AVFrame * get_best_frame(AVFilterContext *ctx)
Definition: vf_thumbnail.c:95
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
static int request_frame(AVFilterLink *link)
Definition: vf_thumbnail.c:215
static int thumbnail(AVFilterContext *ctx, int *histogram, AVFrame *in)
#define AV_LOG_INFO
Standard information.
Definition: log.h:205
AVFilter ff_vf_thumbnail
Definition: vf_thumbnail.c:290
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:349
planar YUV 4:4:4 32bpp, (1 Cr & Cb sample per 1x1 Y & A samples)
Definition: pixfmt.h:177
static int config_props(AVFilterLink *inlink)
Definition: vf_thumbnail.c:232
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
AVRational tb
copy of the input timebase to ease access
Definition: vf_thumbnail.c:47
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:72
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:145
int n_frames
number of frames for analysis
Definition: vf_thumbnail.c:45
Rational number (pair of numerator and denominator).
Definition: rational.h:58
packed BGR 8:8:8, 32bpp, BGRXBGRX... X=unused/undefined
Definition: pixfmt.h:240
const char * name
Filter name.
Definition: avfilter.h:149
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
Definition: vf_thumbnail.c:135
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:353
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_thumbnail.c:206
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:303
static double frame_sum_square_err(const int *hist, const double *median)
Compute Sum-square deviation to estimate "closeness".
Definition: vf_thumbnail.c:83
#define flags(name, subs,...)
Definition: cbs_av1.c:561
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:332
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
#define HIST_SIZE
Definition: vf_thumbnail.c:35
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:215
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:80
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:73
AVFILTER_DEFINE_CLASS(thumbnail)
static const AVOption thumbnail_options[]
Definition: vf_thumbnail.c:56
A list of supported formats for one end of a filter link.
Definition: formats.h:65
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples) full scale (JPEG), deprecated in favor ...
Definition: pixfmt.h:258
An instance of a filter.
Definition: avfilter.h:341
#define av_freep(p)
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:99
int histogram[HIST_SIZE]
RGB color distribution histogram of the frame.
Definition: vf_thumbnail.c:39
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:408
internal API functions
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
packed RGB 8:8:8, 32bpp, XRGBXRGB... X=unused/undefined
Definition: pixfmt.h:237
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
static const AVFilterPad thumbnail_inputs[]
Definition: vf_thumbnail.c:271
int i
Definition: input.c:407
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58