FFmpeg
vf_thumbnail.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011 Smartjog S.A.S, Clément Bœsch <clement.boesch@smartjog.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * Potential thumbnail lookup filter to reduce the risk of an inappropriate
24  * selection (such as a black frame) we could get with an absolute seek.
25  *
26  * Simplified version of algorithm by Vadim Zaliva <lord@crocodile.org>.
27  * @see http://notbrainsurgery.livejournal.com/29773.html
28  */
29 
30 #include "libavutil/opt.h"
31 #include "libavutil/pixdesc.h"
32 #include "avfilter.h"
33 #include "internal.h"
34 
35 #define HIST_SIZE (3*256)
36 
37 struct thumb_frame {
38  AVFrame *buf; ///< cached frame
39  int histogram[HIST_SIZE]; ///< RGB color distribution histogram of the frame
40 };
41 
42 typedef struct ThumbContext {
43  const AVClass *class;
44  int n; ///< current frame
45  int n_frames; ///< number of frames for analysis
46  struct thumb_frame *frames; ///< the n_frames frames
47  AVRational tb; ///< copy of the input timebase to ease access
48 
49  int planewidth[4];
50  int planeheight[4];
51 } ThumbContext;
52 
53 #define OFFSET(x) offsetof(ThumbContext, x)
54 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
55 
56 static const AVOption thumbnail_options[] = {
57  { "n", "set the frames batch size", OFFSET(n_frames), AV_OPT_TYPE_INT, {.i64=100}, 2, INT_MAX, FLAGS },
58  { NULL }
59 };
60 
62 
64 {
65  ThumbContext *s = ctx->priv;
66 
67  s->frames = av_calloc(s->n_frames, sizeof(*s->frames));
68  if (!s->frames) {
70  "Allocation failure, try to lower the number of frames\n");
71  return AVERROR(ENOMEM);
72  }
73  av_log(ctx, AV_LOG_VERBOSE, "batch size: %d frames\n", s->n_frames);
74  return 0;
75 }
76 
77 /**
78  * @brief Compute Sum-square deviation to estimate "closeness".
79  * @param hist color distribution histogram
80  * @param median average color distribution histogram
81  * @return sum of squared errors
82  */
83 static double frame_sum_square_err(const int *hist, const double *median)
84 {
85  int i;
86  double err, sum_sq_err = 0;
87 
88  for (i = 0; i < HIST_SIZE; i++) {
89  err = median[i] - (double)hist[i];
90  sum_sq_err += err*err;
91  }
92  return sum_sq_err;
93 }
94 
96 {
97  AVFrame *picref;
98  ThumbContext *s = ctx->priv;
99  int i, j, best_frame_idx = 0;
100  int nb_frames = s->n;
101  double avg_hist[HIST_SIZE] = {0}, sq_err, min_sq_err = -1;
102 
103  // average histogram of the N frames
104  for (j = 0; j < FF_ARRAY_ELEMS(avg_hist); j++) {
105  for (i = 0; i < nb_frames; i++)
106  avg_hist[j] += (double)s->frames[i].histogram[j];
107  avg_hist[j] /= nb_frames;
108  }
109 
110  // find the frame closer to the average using the sum of squared errors
111  for (i = 0; i < nb_frames; i++) {
112  sq_err = frame_sum_square_err(s->frames[i].histogram, avg_hist);
113  if (i == 0 || sq_err < min_sq_err)
114  best_frame_idx = i, min_sq_err = sq_err;
115  }
116 
117  // free and reset everything (except the best frame buffer)
118  for (i = 0; i < nb_frames; i++) {
119  memset(s->frames[i].histogram, 0, sizeof(s->frames[i].histogram));
120  if (i != best_frame_idx)
121  av_frame_free(&s->frames[i].buf);
122  }
123  s->n = 0;
124 
125  // raise the chosen one
126  picref = s->frames[best_frame_idx].buf;
127  av_log(ctx, AV_LOG_INFO, "frame id #%d (pts_time=%f) selected "
128  "from a set of %d images\n", best_frame_idx,
129  picref->pts * av_q2d(s->tb), nb_frames);
130  s->frames[best_frame_idx].buf = NULL;
131 
132  return picref;
133 }
134 
136 {
137  int i, j;
138  AVFilterContext *ctx = inlink->dst;
139  ThumbContext *s = ctx->priv;
140  AVFilterLink *outlink = ctx->outputs[0];
141  int *hist = s->frames[s->n].histogram;
142  const uint8_t *p = frame->data[0];
143 
144  // keep a reference of each frame
145  s->frames[s->n].buf = frame;
146 
147  // update current frame histogram
148  switch (inlink->format) {
149  case AV_PIX_FMT_RGB24:
150  case AV_PIX_FMT_BGR24:
151  for (j = 0; j < inlink->h; j++) {
152  for (i = 0; i < inlink->w; i++) {
153  hist[0*256 + p[i*3 ]]++;
154  hist[1*256 + p[i*3 + 1]]++;
155  hist[2*256 + p[i*3 + 2]]++;
156  }
157  p += frame->linesize[0];
158  }
159  break;
160  case AV_PIX_FMT_RGB0:
161  case AV_PIX_FMT_BGR0:
162  case AV_PIX_FMT_RGBA:
163  case AV_PIX_FMT_BGRA:
164  for (j = 0; j < inlink->h; j++) {
165  for (i = 0; i < inlink->w; i++) {
166  hist[0*256 + p[i*4 ]]++;
167  hist[1*256 + p[i*4 + 1]]++;
168  hist[2*256 + p[i*4 + 2]]++;
169  }
170  p += frame->linesize[0];
171  }
172  break;
173  case AV_PIX_FMT_0RGB:
174  case AV_PIX_FMT_0BGR:
175  case AV_PIX_FMT_ARGB:
176  case AV_PIX_FMT_ABGR:
177  for (j = 0; j < inlink->h; j++) {
178  for (i = 0; i < inlink->w; i++) {
179  hist[0*256 + p[i*4 + 1]]++;
180  hist[1*256 + p[i*4 + 2]]++;
181  hist[2*256 + p[i*4 + 3]]++;
182  }
183  p += frame->linesize[0];
184  }
185  break;
186  default:
187  for (int plane = 0; plane < 3; plane++) {
188  const uint8_t *p = frame->data[plane];
189  for (j = 0; j < s->planeheight[plane]; j++) {
190  for (i = 0; i < s->planewidth[plane]; i++)
191  hist[256*plane + p[i]]++;
192  p += frame->linesize[plane];
193  }
194  }
195  break;
196  }
197 
198  // no selection until the buffer of N frames is filled up
199  s->n++;
200  if (s->n < s->n_frames)
201  return 0;
202 
203  return ff_filter_frame(outlink, get_best_frame(ctx));
204 }
205 
207 {
208  int i;
209  ThumbContext *s = ctx->priv;
210  for (i = 0; i < s->n_frames && s->frames && s->frames[i].buf; i++)
211  av_frame_free(&s->frames[i].buf);
212  av_freep(&s->frames);
213 }
214 
216 {
217  AVFilterContext *ctx = link->src;
218  ThumbContext *s = ctx->priv;
219  int ret = ff_request_frame(ctx->inputs[0]);
220 
221  if (ret == AVERROR_EOF && s->n) {
223  if (ret < 0)
224  return ret;
225  ret = AVERROR_EOF;
226  }
227  if (ret < 0)
228  return ret;
229  return 0;
230 }
231 
233 {
234  AVFilterContext *ctx = inlink->dst;
235  ThumbContext *s = ctx->priv;
237 
238  s->tb = inlink->time_base;
239  s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
240  s->planewidth[0] = s->planewidth[3] = inlink->w;
241  s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
242  s->planeheight[0] = s->planeheight[3] = inlink->h;
243 
244  return 0;
245 }
246 
247 static const enum AVPixelFormat pix_fmts[] = {
262 };
263 
264 static const AVFilterPad thumbnail_inputs[] = {
265  {
266  .name = "default",
267  .type = AVMEDIA_TYPE_VIDEO,
268  .config_props = config_props,
269  .filter_frame = filter_frame,
270  },
271 };
272 
273 static const AVFilterPad thumbnail_outputs[] = {
274  {
275  .name = "default",
276  .type = AVMEDIA_TYPE_VIDEO,
277  .request_frame = request_frame,
278  },
279 };
280 
282  .name = "thumbnail",
283  .description = NULL_IF_CONFIG_SMALL("Select the most representative frame in a given sequence of consecutive frames."),
284  .priv_size = sizeof(ThumbContext),
285  .init = init,
286  .uninit = uninit,
290  .priv_class = &thumbnail_class,
292 };
ThumbContext
Definition: vf_thumbnail.c:42
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(thumbnail)
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:999
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2662
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
thumb_frame::histogram
int histogram[HIST_SIZE]
RGB color distribution histogram of the frame.
Definition: vf_thumbnail.c:39
FILTER_PIXFMTS_ARRAY
#define FILTER_PIXFMTS_ARRAY(array)
Definition: internal.h:170
thumb_frame::buf
AVFrame * buf
cached frame
Definition: vf_thumbnail.c:38
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:111
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:325
pixdesc.h
AVFrame::pts
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:432
AVOption
AVOption.
Definition: opt.h:251
ff_request_frame
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:400
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:196
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:69
AV_PIX_FMT_BGRA
@ AV_PIX_FMT_BGRA
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
Definition: pixfmt.h:95
AV_PIX_FMT_YUV440P
@ AV_PIX_FMT_YUV440P
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:99
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:175
OFFSET
#define OFFSET(x)
Definition: vf_thumbnail.c:53
thumb_frame
Definition: vf_thumbnail.c:37
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: vf_thumbnail.c:247
ThumbContext::frames
struct thumb_frame * frames
the n_frames frames
Definition: vf_thumbnail.c:46
AV_PIX_FMT_GBRAP
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:205
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:49
AV_PIX_FMT_YUVJ411P
@ AV_PIX_FMT_YUVJ411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples) full scale (JPEG), deprecated in favor ...
Definition: pixfmt.h:248
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
AV_PIX_FMT_YUVJ422P
@ AV_PIX_FMT_YUVJ422P
planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV422P and setting col...
Definition: pixfmt.h:79
ThumbContext::planeheight
int planeheight[4]
Definition: vf_thumbnail.c:50
s
#define s(width, name)
Definition: cbs_vp9.c:256
AV_PIX_FMT_YUVA420P
@ AV_PIX_FMT_YUVA420P
planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
Definition: pixfmt.h:101
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:50
av_q2d
static double av_q2d(AVRational a)
Convert an AVRational to a double.
Definition: rational.h:104
ThumbContext::tb
AVRational tb
copy of the input timebase to ease access
Definition: vf_thumbnail.c:47
ctx
AVFormatContext * ctx
Definition: movenc.c:48
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
ThumbContext::n_frames
int n_frames
number of frames for analysis
Definition: vf_thumbnail.c:45
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:190
get_best_frame
static AVFrame * get_best_frame(AVFilterContext *ctx)
Definition: vf_thumbnail.c:95
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
AV_PIX_FMT_YUVJ444P
@ AV_PIX_FMT_YUVJ444P
planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV444P and setting col...
Definition: pixfmt.h:80
AV_PIX_FMT_RGBA
@ AV_PIX_FMT_RGBA
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
Definition: pixfmt.h:93
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
FLAGS
#define FLAGS
Definition: vf_thumbnail.c:54
AV_PIX_FMT_YUVJ420P
@ AV_PIX_FMT_YUVJ420P
planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV420P and setting col...
Definition: pixfmt.h:78
double
double
Definition: af_crystalizer.c:132
config_props
static int config_props(AVFilterLink *inlink)
Definition: vf_thumbnail.c:232
AV_PIX_FMT_BGR0
@ AV_PIX_FMT_BGR0
packed BGR 8:8:8, 32bpp, BGRXBGRX... X=unused/undefined
Definition: pixfmt.h:230
AV_PIX_FMT_ABGR
@ AV_PIX_FMT_ABGR
packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
Definition: pixfmt.h:94
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:68
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
request_frame
static int request_frame(AVFilterLink *link)
Definition: vf_thumbnail.c:215
thumbnail_outputs
static const AVFilterPad thumbnail_outputs[]
Definition: vf_thumbnail.c:273
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: vf_thumbnail.c:206
AV_PIX_FMT_YUVA444P
@ AV_PIX_FMT_YUVA444P
planar YUV 4:4:4 32bpp, (1 Cr & Cb sample per 1x1 Y & A samples)
Definition: pixfmt.h:167
ThumbContext::n
int n
current frame
Definition: vf_thumbnail.c:44
AV_PIX_FMT_RGB0
@ AV_PIX_FMT_RGB0
packed RGB 8:8:8, 32bpp, RGBXRGBX... X=unused/undefined
Definition: pixfmt.h:228
AV_LOG_INFO
#define AV_LOG_INFO
Standard information.
Definition: log.h:191
internal.h
AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC
#define AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC
Some filters support a generic "enable" expression option that can be used to enable or disable a fil...
Definition: avfilter.h:152
AV_PIX_FMT_ARGB
@ AV_PIX_FMT_ARGB
packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
Definition: pixfmt.h:92
thumbnail
static int thumbnail(AVFilterContext *ctx, int *histogram, AVFrame *in)
Definition: vf_thumbnail_cuda.c:199
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
Definition: vf_thumbnail.c:135
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
frame_sum_square_err
static double frame_sum_square_err(const int *hist, const double *median)
Compute Sum-square deviation to estimate "closeness".
Definition: vf_thumbnail.c:83
AV_PIX_FMT_YUVJ440P
@ AV_PIX_FMT_YUVJ440P
planar YUV 4:4:0 full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV440P and setting color_range
Definition: pixfmt.h:100
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:55
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:272
AVFilter
Filter definition.
Definition: avfilter.h:171
ret
ret
Definition: filter_design.txt:187
HIST_SIZE
#define HIST_SIZE
Definition: vf_thumbnail.c:35
AV_PIX_FMT_0BGR
@ AV_PIX_FMT_0BGR
packed BGR 8:8:8, 32bpp, XBGRXBGR... X=unused/undefined
Definition: pixfmt.h:229
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:65
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:225
thumbnail_inputs
static const AVFilterPad thumbnail_inputs[]
Definition: vf_thumbnail.c:264
avfilter.h
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
AVFilterContext
An instance of a filter.
Definition: avfilter.h:408
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:158
desc
const char * desc
Definition: libsvtav1.c:83
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:191
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
AV_PIX_FMT_YUV411P
@ AV_PIX_FMT_YUV411P
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:73
AV_PIX_FMT_0RGB
@ AV_PIX_FMT_0RGB
packed RGB 8:8:8, 32bpp, XRGBXRGB... X=unused/undefined
Definition: pixfmt.h:227
AV_PIX_FMT_YUV410P
@ AV_PIX_FMT_YUV410P
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:72
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
init
static av_cold int init(AVFilterContext *ctx)
Definition: vf_thumbnail.c:63
thumbnail_options
static const AVOption thumbnail_options[]
Definition: vf_thumbnail.c:56
ff_vf_thumbnail
const AVFilter ff_vf_thumbnail
Definition: vf_thumbnail.c:281
ThumbContext::planewidth
int planewidth[4]
Definition: vf_thumbnail.c:49
AV_PIX_FMT_YUVA422P
@ AV_PIX_FMT_YUVA422P
planar YUV 4:2:2 24bpp, (1 Cr & Cb sample per 2x1 Y & A samples)
Definition: pixfmt.h:166