FFmpeg
dnn_backend_native.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * DNN native backend implementation.
24  */
25 
26 #include "dnn_backend_native.h"
27 #include "libavutil/avassert.h"
30 #include "dnn_io_proc.h"
31 
32 #define OFFSET(x) offsetof(NativeContext, x)
33 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
34 static const AVOption dnn_native_options[] = {
35  { "conv2d_threads", "threads num for conv2d layer", OFFSET(options.conv2d_threads), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS },
36  { NULL },
37 };
38 
39 static const AVClass dnn_native_class = {
40  .class_name = "dnn_native",
41  .item_name = av_default_item_name,
42  .option = dnn_native_options,
43  .version = LIBAVUTIL_VERSION_INT,
44  .category = AV_CLASS_CATEGORY_FILTER,
45 };
46 
47 static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
48  const char **output_names, uint32_t nb_output, AVFrame *out_frame,
49  int do_ioproc);
50 
51 static DNNReturnType get_input_native(void *model, DNNData *input, const char *input_name)
52 {
53  NativeModel *native_model = model;
54  NativeContext *ctx = &native_model->ctx;
55 
56  for (int i = 0; i < native_model->operands_num; ++i) {
57  DnnOperand *oprd = &native_model->operands[i];
58  if (strcmp(oprd->name, input_name) == 0) {
59  if (oprd->type != DOT_INPUT) {
60  av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name);
61  return DNN_ERROR;
62  }
63  input->dt = oprd->data_type;
64  av_assert0(oprd->dims[0] == 1);
65  input->height = oprd->dims[1];
66  input->width = oprd->dims[2];
67  input->channels = oprd->dims[3];
68  return DNN_SUCCESS;
69  }
70  }
71 
72  // do not find the input operand
73  av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
74  return DNN_ERROR;
75 }
76 
77 static DNNReturnType get_output_native(void *model, const char *input_name, int input_width, int input_height,
78  const char *output_name, int *output_width, int *output_height)
79 {
81  NativeModel *native_model = model;
82  NativeContext *ctx = &native_model->ctx;
83  AVFrame *in_frame = av_frame_alloc();
84  AVFrame *out_frame = NULL;
85 
86  if (!in_frame) {
87  av_log(ctx, AV_LOG_ERROR, "Could not allocate memory for input frame\n");
88  return DNN_ERROR;
89  }
90 
91  out_frame = av_frame_alloc();
92 
93  if (!out_frame) {
94  av_log(ctx, AV_LOG_ERROR, "Could not allocate memory for output frame\n");
95  av_frame_free(&in_frame);
96  return DNN_ERROR;
97  }
98 
99  in_frame->width = input_width;
100  in_frame->height = input_height;
101 
102  ret = execute_model_native(native_model->model, input_name, in_frame, &output_name, 1, out_frame, 0);
103  *output_width = out_frame->width;
104  *output_height = out_frame->height;
105 
106  av_frame_free(&out_frame);
107  av_frame_free(&in_frame);
108  return ret;
109 }
110 
111 // Loads model and its parameters that are stored in a binary file with following structure:
112 // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
113 // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
114 // For DEPTH_TO_SPACE layer: block_size
115 DNNModel *ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
116 {
117 #define DNN_NATIVE_MAGIC "FFMPEGDNNNATIVE"
118  DNNModel *model = NULL;
119  // sizeof - 1 to skip the terminating '\0' which is not written in the file
120  char buf[sizeof(DNN_NATIVE_MAGIC) - 1];
121  int version, header_size, major_version_expected = 1;
122  NativeModel *native_model = NULL;
123  AVIOContext *model_file_context;
124  int file_size, dnn_size, parsed_size;
125  int32_t layer;
126  DNNLayerType layer_type;
127 
128  if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
129  return NULL;
130  }
131  file_size = avio_size(model_file_context);
132 
133  model = av_mallocz(sizeof(DNNModel));
134  if (!model){
135  goto fail;
136  }
137 
138  /**
139  * check file header with string and version
140  */
141  if (avio_read(model_file_context, buf, sizeof(buf)) != sizeof(buf) ||
142  memcmp(buf, DNN_NATIVE_MAGIC, sizeof(buf)))
143  goto fail;
144  dnn_size = sizeof(buf);
145 
146  version = (int32_t)avio_rl32(model_file_context);
147  dnn_size += 4;
148  if (version != major_version_expected) {
149  goto fail;
150  }
151 
152  // currently no need to check minor version
153  version = (int32_t)avio_rl32(model_file_context);
154  dnn_size += 4;
155  header_size = dnn_size;
156 
157  native_model = av_mallocz(sizeof(NativeModel));
158  if (!native_model){
159  goto fail;
160  }
161  model->model = native_model;
162 
163  native_model->ctx.class = &dnn_native_class;
164  model->options = options;
165  if (av_opt_set_from_string(&native_model->ctx, model->options, NULL, "=", "&") < 0)
166  goto fail;
167  native_model->model = model;
168 
169 #if !HAVE_PTHREAD_CANCEL
170  if (native_model->ctx.options.conv2d_threads > 1){
171  av_log(&native_model->ctx, AV_LOG_WARNING, "'conv2d_threads' option was set but it is not supported "
172  "on this build (pthread support is required)\n");
173  }
174 #endif
175 
176  avio_seek(model_file_context, file_size - 8, SEEK_SET);
177  native_model->layers_num = (int32_t)avio_rl32(model_file_context);
178  native_model->operands_num = (int32_t)avio_rl32(model_file_context);
179  dnn_size += 8;
180  avio_seek(model_file_context, header_size, SEEK_SET);
181 
182  native_model->layers = av_mallocz(native_model->layers_num * sizeof(Layer));
183  if (!native_model->layers){
184  goto fail;
185  }
186 
187  native_model->operands = av_mallocz(native_model->operands_num * sizeof(DnnOperand));
188  if (!native_model->operands){
189  goto fail;
190  }
191 
192  for (layer = 0; layer < native_model->layers_num; ++layer){
193  layer_type = (int32_t)avio_rl32(model_file_context);
194  dnn_size += 4;
195 
196  if (layer_type >= DLT_COUNT) {
197  goto fail;
198  }
199 
200  native_model->layers[layer].type = layer_type;
201  parsed_size = ff_layer_funcs[layer_type].pf_load(&native_model->layers[layer], model_file_context, file_size, native_model->operands_num);
202  if (!parsed_size) {
203  goto fail;
204  }
205  dnn_size += parsed_size;
206  }
207 
208  for (int32_t i = 0; i < native_model->operands_num; ++i){
209  DnnOperand *oprd;
210  int32_t name_len;
211  int32_t operand_index = (int32_t)avio_rl32(model_file_context);
212  dnn_size += 4;
213 
214  if (operand_index >= native_model->operands_num) {
215  goto fail;
216  }
217 
218  oprd = &native_model->operands[operand_index];
219  name_len = (int32_t)avio_rl32(model_file_context);
220  dnn_size += 4;
221 
222  avio_get_str(model_file_context, name_len, oprd->name, sizeof(oprd->name));
223  dnn_size += name_len;
224 
225  oprd->type = (int32_t)avio_rl32(model_file_context);
226  dnn_size += 4;
227 
228  oprd->data_type = (int32_t)avio_rl32(model_file_context);
229  dnn_size += 4;
230 
231  for (int32_t dim = 0; dim < 4; ++dim) {
232  oprd->dims[dim] = (int32_t)avio_rl32(model_file_context);
233  dnn_size += 4;
234  }
235  if (oprd->type == DOT_INPUT && oprd->dims[0] != 1)
236  goto fail;
237 
238  oprd->isNHWC = 1;
239  }
240 
241  avio_closep(&model_file_context);
242 
243  if (dnn_size != file_size){
244  ff_dnn_free_model_native(&model);
245  return NULL;
246  }
247 
248  model->get_input = &get_input_native;
249  model->get_output = &get_output_native;
250  model->filter_ctx = filter_ctx;
251  model->func_type = func_type;
252 
253  return model;
254 
255 fail:
256  ff_dnn_free_model_native(&model);
257  avio_closep(&model_file_context);
258  return NULL;
259 }
260 
261 static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
262  const char **output_names, uint32_t nb_output, AVFrame *out_frame,
263  int do_ioproc)
264 {
265  NativeModel *native_model = model->model;
266  NativeContext *ctx = &native_model->ctx;
267  int32_t layer;
269  DnnOperand *oprd = NULL;
270 
271  if (native_model->layers_num <= 0 || native_model->operands_num <= 0) {
272  av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n");
273  return DNN_ERROR;
274  }
275 
276  for (int i = 0; i < native_model->operands_num; ++i) {
277  oprd = &native_model->operands[i];
278  if (strcmp(oprd->name, input_name) == 0) {
279  if (oprd->type != DOT_INPUT) {
280  av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name);
281  return DNN_ERROR;
282  }
283  break;
284  }
285  oprd = NULL;
286  }
287  if (!oprd) {
288  av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
289  return DNN_ERROR;
290  }
291 
292  oprd->dims[1] = in_frame->height;
293  oprd->dims[2] = in_frame->width;
294 
295  av_freep(&oprd->data);
297  if (oprd->length <= 0) {
298  av_log(ctx, AV_LOG_ERROR, "The input data length overflow\n");
299  return DNN_ERROR;
300  }
301  oprd->data = av_malloc(oprd->length);
302  if (!oprd->data) {
303  av_log(ctx, AV_LOG_ERROR, "Failed to malloc memory for input data\n");
304  return DNN_ERROR;
305  }
306 
307  input.height = oprd->dims[1];
308  input.width = oprd->dims[2];
309  input.channels = oprd->dims[3];
310  input.data = oprd->data;
311  input.dt = oprd->data_type;
312  if (do_ioproc) {
313  if (native_model->model->frame_pre_proc != NULL) {
314  native_model->model->frame_pre_proc(in_frame, &input, native_model->model->filter_ctx);
315  } else {
316  ff_proc_from_frame_to_dnn(in_frame, &input, native_model->model->func_type, ctx);
317  }
318  }
319 
320  if (nb_output != 1) {
321  // currently, the filter does not need multiple outputs,
322  // so we just pending the support until we really need it.
323  avpriv_report_missing_feature(ctx, "multiple outputs");
324  return DNN_ERROR;
325  }
326 
327  for (layer = 0; layer < native_model->layers_num; ++layer){
328  DNNLayerType layer_type = native_model->layers[layer].type;
329  if (ff_layer_funcs[layer_type].pf_exec(native_model->operands,
330  native_model->layers[layer].input_operand_indexes,
331  native_model->layers[layer].output_operand_index,
332  native_model->layers[layer].params,
333  &native_model->ctx) == DNN_ERROR) {
334  av_log(ctx, AV_LOG_ERROR, "Failed to execute model\n");
335  return DNN_ERROR;
336  }
337  }
338 
339  for (uint32_t i = 0; i < nb_output; ++i) {
340  DnnOperand *oprd = NULL;
341  const char *output_name = output_names[i];
342  for (int j = 0; j < native_model->operands_num; ++j) {
343  if (strcmp(native_model->operands[j].name, output_name) == 0) {
344  oprd = &native_model->operands[j];
345  break;
346  }
347  }
348 
349  if (oprd == NULL) {
350  av_log(ctx, AV_LOG_ERROR, "Could not find output in model\n");
351  return DNN_ERROR;
352  }
353 
354  output.data = oprd->data;
355  output.height = oprd->dims[1];
356  output.width = oprd->dims[2];
357  output.channels = oprd->dims[3];
358  output.dt = oprd->data_type;
359 
360  if (do_ioproc) {
361  if (native_model->model->frame_post_proc != NULL) {
362  native_model->model->frame_post_proc(out_frame, &output, native_model->model->filter_ctx);
363  } else {
364  ff_proc_from_dnn_to_frame(out_frame, &output, ctx);
365  }
366  } else {
367  out_frame->width = output.width;
368  out_frame->height = output.height;
369  }
370  }
371 
372  return DNN_SUCCESS;
373 }
374 
375 DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
376  const char **output_names, uint32_t nb_output, AVFrame *out_frame)
377 {
378  NativeModel *native_model = model->model;
379  NativeContext *ctx = &native_model->ctx;
380 
381  if (!in_frame) {
382  av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n");
383  return DNN_ERROR;
384  }
385 
386  if (!out_frame) {
387  av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
388  return DNN_ERROR;
389  }
390 
391  return execute_model_native(model, input_name, in_frame, output_names, nb_output, out_frame, 1);
392 }
393 
395 {
396  int32_t result = 1;
397  for (int i = 0; i < 4; ++i)
398  result *= oprd->dims[i];
399 
400  return result;
401 }
402 
404 {
405  // currently, we just support DNN_FLOAT
406  uint64_t len = sizeof(float);
407  for (int i = 0; i < 4; i++) {
408  len *= oprd->dims[i];
409  if (len > INT32_MAX)
410  return 0;
411  }
412  return len;
413 }
414 
416 {
417  NativeModel *native_model;
418  ConvolutionalParams *conv_params;
419  int32_t layer;
420 
421  if (*model)
422  {
423  if ((*model)->model) {
424  native_model = (*model)->model;
425  if (native_model->layers) {
426  for (layer = 0; layer < native_model->layers_num; ++layer){
427  if (native_model->layers[layer].type == DLT_CONV2D){
428  conv_params = (ConvolutionalParams *)native_model->layers[layer].params;
429  av_freep(&conv_params->kernel);
430  av_freep(&conv_params->biases);
431  }
432  av_freep(&native_model->layers[layer].params);
433  }
434  av_freep(&native_model->layers);
435  }
436 
437  if (native_model->operands) {
438  for (uint32_t operand = 0; operand < native_model->operands_num; ++operand)
439  av_freep(&native_model->operands[operand].data);
440  av_freep(&native_model->operands);
441  }
442 
443  av_freep(&native_model);
444  }
445  av_freep(model);
446  }
447 }
int avio_open(AVIOContext **s, const char *url, int flags)
Create and initialize a AVIOContext for accessing the resource indicated by url.
Definition: aviobuf.c:1137
void * model
Definition: dnn_interface.h:71
#define NULL
Definition: coverity.c:32
Bytestream IO Context.
Definition: avio.h:161
int8_t isNHWC
NHWC if 1, otherwise NCHW.
int64_t avio_size(AVIOContext *s)
Get the filesize.
Definition: aviobuf.c:342
version
Definition: libkvazaar.c:320
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
AVOption.
Definition: opt.h:248
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:200
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
int channels
Definition: dnn_interface.h:60
DNN inference functions interface for native backend.
int64_t avio_seek(AVIOContext *s, int64_t offset, int whence)
fseek() equivalent for AVIOContext.
Definition: aviobuf.c:253
#define AVIO_FLAG_READ
read-only
Definition: avio.h:674
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:235
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:237
DNNOperandType type
input/output/intermediate operand of the network
const char * options
Definition: dnn_interface.h:73
DnnOperand * operands
int32_t input_operand_indexes[4]
a layer can have multiple inputs and one output.
int av_opt_set_from_string(void *ctx, const char *opts, const char *const *shorthand, const char *key_val_sep, const char *pairs_sep)
Parse the key-value pairs list in opts.
Definition: opt.c:1559
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:72
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
#define av_malloc(s)
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:191
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
Definition: dnn_io_proc.c:26
#define DNN_NATIVE_MAGIC
const AVClass * class
DNNDataType data_type
support different kinds of data type such as float, half float, int8 etc, first support float now...
int32_t ff_calculate_operand_dims_count(const DnnOperand *oprd)
NativeContext ctx
#define av_log(a,...)
int avio_read(AVIOContext *s, unsigned char *buf, int size)
Read size bytes from AVIOContext into buf.
Definition: aviobuf.c:633
DNNModel * model
DNNReturnType(* get_input)(void *model, DNNData *input, const char *input_name)
Definition: dnn_interface.h:80
int width
Definition: frame.h:376
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame, int do_ioproc)
int height
Definition: dnn_interface.h:60
FramePrePostProc frame_pre_proc
Definition: dnn_interface.h:86
unsigned int avio_rl32(AVIOContext *s)
Definition: aviobuf.c:750
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:204
void * data
Definition: dnn_interface.h:59
void * data
data pointer with data length in bytes.
simple assert() macros that are a bit more flexible than ISO C assert().
static FilteringContext * filter_ctx
Definition: transcoding.c:48
int32_t dims[4]
there are two memory layouts, NHWC or NCHW, so we use dims, dims[0] is Number.
#define fail()
Definition: checkasm.h:133
static DNNReturnType get_output_native(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
char name[128]
to avoid possible memory leak, do not use char *name
int32_t
AVFormatContext * ctx
Definition: movenc.c:48
#define OFFSET(x)
DNNReturnType
Definition: dnn_interface.h:33
void ff_dnn_free_model_native(DNNModel **model)
static const AVClass dnn_native_class
static const AVOption dnn_native_options[]
static DNNReturnType get_input_native(void *model, DNNData *input, const char *input_name)
DNN input&output process between AVFrame and DNNData.
Describe the class of an AVClass context structure.
Definition: log.h:67
DNNFunctionType func_type
Definition: dnn_interface.h:77
DNNFunctionType
Definition: dnn_interface.h:51
DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, DNNFunctionType func_type, void *log_ctx)
Definition: dnn_io_proc.c:207
DNNModel * ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
int dim
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
DNNLayerType type
DNNLayerType
the enum value of DNNLayerType should not be changed, the same values are used in convert_from_tensor...
void avpriv_report_missing_feature(void *avc, const char *msg,...) av_printf_format(2
Log a generic warning message about a missing feature.
#define FLAGS
const OptionDef options[]
Definition: ffmpeg_opt.c:3427
DNNReturnType(* get_output)(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
Definition: dnn_interface.h:82
NativeOptions options
int len
void * params
const LayerFunc ff_layer_funcs[DLT_COUNT]
DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame)
An instance of a filter.
Definition: avfilter.h:341
and forward the result(frame or status change) to the corresponding input.If nothing is possible
int height
Definition: frame.h:376
#define av_freep(p)
FramePrePostProc frame_post_proc
Definition: dnn_interface.h:89
int avio_get_str(AVIOContext *pb, int maxlen, char *buf, int buflen)
Read a string from pb into buf.
Definition: aviobuf.c:860
AVFilterContext * filter_ctx
Definition: dnn_interface.h:75
DNNDataType dt
Definition: dnn_interface.h:62
int avio_closep(AVIOContext **s)
Close the resource accessed by the AVIOContext *s, free it and set the pointer pointing to it to NULL...
Definition: aviobuf.c:1192
int32_t ff_calculate_operand_data_length(const DnnOperand *oprd)
int i
Definition: input.c:407
int32_t output_operand_index
LAYER_LOAD_FUNC pf_load