55 #define OFFSET(x) offsetof(DnnProcessingContext, x) 56 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM 60 #if (CONFIG_LIBTENSORFLOW == 1) 63 #if (CONFIG_LIBOPENVINO == 1) 111 av_log(ctx,
AV_LOG_WARNING,
"this backend does not support async execution, roll back to sync.\n");
114 #if !HAVE_PTHREAD_CANCEL 138 #define LOG_FORMAT_CHANNEL_MISMATCH() \ 139 av_log(ctx, AV_LOG_ERROR, \ 140 "the frame's format %s does not match " \ 141 "the model input channel %d\n", \ 142 av_get_pix_fmt_name(fmt), \ 143 model_input->channels); 151 if (model_input->
height != -1 && model_input->
height != inlink->
h) {
153 model_input->
height, inlink->
h);
156 if (model_input->
width != -1 && model_input->
width != inlink->
w) {
158 model_input->
width, inlink->
w);
231 if (inlink->
w != outlink->
w || inlink->
h != outlink->
h) {
283 for (
int i = 1;
i < 3; ++
i) {
287 bytewidth, uv_height);
396 *out_pts = out_frame->
pts +
pts;
458 int64_t out_pts =
pts;
511 .
name =
"dnn_processing",
517 .
inputs = dnn_processing_inputs,
518 .
outputs = dnn_processing_outputs,
519 .priv_class = &dnn_processing_class,
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
int av_image_get_linesize(enum AVPixelFormat pix_fmt, int width, int plane)
Compute the size of an image line with format pix_fmt and width width for the plane plane...
static enum AVPixelFormat pix_fmt
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
This structure describes decoded (raw) audio or video data.
static int prepare_uv_scale(AVFilterLink *outlink)
static const AVOption dnn_processing_options[]
8 bits gray, 8 bits alpha
void(* free_model)(DNNModel **model)
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
#define AV_LOG_WARNING
Something somehow does not look correct.
packed RGB 8:8:8, 24bpp, RGBRGB...
int h
agreed upon image height
struct SwsContext * sws_uv_scale
static const AVFilterPad dnn_processing_outputs[]
static int flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)
int av_usleep(unsigned usec)
Sleep for a period of time.
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
static av_cold void uninit(AVFilterContext *ctx)
uint8_t log2_chroma_w
Amount to shift the luma width right to find the chroma width.
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
const char * name
Pad name.
AVFilterLink ** inputs
array of pointers to input links
#define av_assert0(cond)
assert() equivalent, that is always enabled.
AVFilter ff_vf_dnn_processing
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
DNNReturnType(* execute_model)(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame)
DNNReturnType(* execute_model_async)(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame)
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink)
struct SwsContext * sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, int dstW, int dstH, enum AVPixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
Allocate and return an SwsContext.
#define AVERROR_EOF
End of file.
static int config_input(AVFilterLink *inlink)
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
A filter pad used for either input or output.
A link between two filters.
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
DNNReturnType(* get_input)(void *model, DNNData *input, const char *input_name)
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
DNNModel *(* load_model)(const char *model_filename, const char *options, AVFilterContext *filter_ctx)
static av_cold int init(AVFilterContext *context)
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
void * priv
private data for use by the filter
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
simple assert() macros that are a bit more flexible than ISO C assert().
static FilteringContext * filter_ctx
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
int w
agreed upon image width
uint64_t flags
Combination of AV_PIX_FMT_FLAG_...
uint8_t nb_components
The number of components each pixel has, (1-4)
static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in)
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
DNNReturnType(* flush)(const DNNModel *model)
packed RGB 8:8:8, 24bpp, BGRBGR...
AVFilterContext * src
source filter
static const AVFilterPad dnn_processing_inputs[]
AVFILTER_DEFINE_CLASS(dnn_processing)
DNN inference engine interface.
static const AVFilterPad outputs[]
int format
agreed upon media format
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
static int activate(AVFilterContext *filter_ctx)
#define AV_PIX_FMT_GRAYF32
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])
swscale wrapper, so we don't need to export the SwsContext.
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Describe the class of an AVClass context structure.
static int query_formats(AVFilterContext *context)
static int activate_async(AVFilterContext *filter_ctx)
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
static int activate_sync(AVFilterContext *filter_ctx)
const char * name
Filter name.
DNNAsyncStatusType(* get_async_result)(const DNNModel *model, AVFrame **in, AVFrame **out)
AVFilterLink ** outputs
array of pointers to output links
static enum AVPixelFormat pix_fmts[]
DNNModule * ff_get_dnn_module(DNNBackendType backend_type)
void avpriv_report_missing_feature(void *avc, const char *msg,...) av_printf_format(2
Log a generic warning message about a missing feature.
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
DNNReturnType(* get_output)(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
FF_FILTER_FORWARD_WANTED(outlink, inlink)
AVFilterContext * dst
dest filter
#define LOG_FORMAT_CHANNEL_MISMATCH()
and forward the result(frame or status change) to the corresponding input.If nothing is possible
DNNBackendType backend_type
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
static int config_output(AVFilterLink *outlink)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are in without and describe what they for example set the foo of the bar offset is the offset of the field in your local context
AVPixelFormat
Pixel format.
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
#define AV_CEIL_RSHIFT(a, b)
#define check(x, y, S, v)