FFmpeg
dnn_backend_native.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * DNN native backend implementation.
24  */
25 
26 #include "dnn_backend_native.h"
27 #include "libavutil/avassert.h"
30 #include "dnn_io_proc.h"
31 
32 #define OFFSET(x) offsetof(NativeContext, x)
33 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
34 static const AVOption dnn_native_options[] = {
35  { "conv2d_threads", "threads num for conv2d layer", OFFSET(options.conv2d_threads), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS },
36  { NULL },
37 };
38 
39 static const AVClass dnn_native_class = {
40  .class_name = "dnn_native",
41  .item_name = av_default_item_name,
42  .option = dnn_native_options,
43  .version = LIBAVUTIL_VERSION_INT,
44  .category = AV_CLASS_CATEGORY_FILTER,
45 };
46 
47 static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
48  const char **output_names, uint32_t nb_output, AVFrame *out_frame,
49  int do_ioproc);
50 
51 static DNNReturnType get_input_native(void *model, DNNData *input, const char *input_name)
52 {
53  NativeModel *native_model = model;
54  NativeContext *ctx = &native_model->ctx;
55 
56  for (int i = 0; i < native_model->operands_num; ++i) {
57  DnnOperand *oprd = &native_model->operands[i];
58  if (strcmp(oprd->name, input_name) == 0) {
59  if (oprd->type != DOT_INPUT) {
60  av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name);
61  return DNN_ERROR;
62  }
63  input->dt = oprd->data_type;
64  av_assert0(oprd->dims[0] == 1);
65  input->height = oprd->dims[1];
66  input->width = oprd->dims[2];
67  input->channels = oprd->dims[3];
68  return DNN_SUCCESS;
69  }
70  }
71 
72  // do not find the input operand
73  av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
74  return DNN_ERROR;
75 }
76 
77 static DNNReturnType get_output_native(void *model, const char *input_name, int input_width, int input_height,
78  const char *output_name, int *output_width, int *output_height)
79 {
81  NativeModel *native_model = model;
82  NativeContext *ctx = &native_model->ctx;
83  AVFrame *in_frame = av_frame_alloc();
84  AVFrame *out_frame = NULL;
85 
86  if (!in_frame) {
87  av_log(ctx, AV_LOG_ERROR, "Could not allocate memory for input frame\n");
88  return DNN_ERROR;
89  }
90 
91  out_frame = av_frame_alloc();
92 
93  if (!out_frame) {
94  av_log(ctx, AV_LOG_ERROR, "Could not allocate memory for output frame\n");
95  av_frame_free(&in_frame);
96  return DNN_ERROR;
97  }
98 
99  in_frame->width = input_width;
100  in_frame->height = input_height;
101 
102  ret = execute_model_native(native_model->model, input_name, in_frame, &output_name, 1, out_frame, 0);
103  *output_width = out_frame->width;
104  *output_height = out_frame->height;
105 
106  av_frame_free(&out_frame);
107  av_frame_free(&in_frame);
108  return ret;
109 }
110 
111 // Loads model and its parameters that are stored in a binary file with following structure:
112 // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
113 // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
114 // For DEPTH_TO_SPACE layer: block_size
115 DNNModel *ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
116 {
117  DNNModel *model = NULL;
118  char header_expected[] = "FFMPEGDNNNATIVE";
119  char *buf;
120  size_t size;
121  int version, header_size, major_version_expected = 1;
122  NativeModel *native_model = NULL;
123  AVIOContext *model_file_context;
124  int file_size, dnn_size, parsed_size;
125  int32_t layer;
126  DNNLayerType layer_type;
127 
128  if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
129  return NULL;
130  }
131  file_size = avio_size(model_file_context);
132 
133  model = av_mallocz(sizeof(DNNModel));
134  if (!model){
135  goto fail;
136  }
137 
138  /**
139  * check file header with string and version
140  */
141  size = sizeof(header_expected);
142  buf = av_malloc(size);
143  if (!buf) {
144  goto fail;
145  }
146 
147  // size - 1 to skip the ending '\0' which is not saved in file
148  avio_get_str(model_file_context, size - 1, buf, size);
149  dnn_size = size - 1;
150  if (strncmp(buf, header_expected, size) != 0) {
151  av_freep(&buf);
152  goto fail;
153  }
154  av_freep(&buf);
155 
156  version = (int32_t)avio_rl32(model_file_context);
157  dnn_size += 4;
158  if (version != major_version_expected) {
159  goto fail;
160  }
161 
162  // currently no need to check minor version
163  version = (int32_t)avio_rl32(model_file_context);
164  dnn_size += 4;
165  header_size = dnn_size;
166 
167  native_model = av_mallocz(sizeof(NativeModel));
168  if (!native_model){
169  goto fail;
170  }
171 
172  native_model->ctx.class = &dnn_native_class;
173  model->options = options;
174  if (av_opt_set_from_string(&native_model->ctx, model->options, NULL, "=", "&") < 0)
175  goto fail;
176  model->model = (void *)native_model;
177  native_model->model = model;
178 
179 #if !HAVE_PTHREAD_CANCEL
180  if (native_model->ctx.options.conv2d_threads > 1){
181  av_log(&native_model->ctx, AV_LOG_WARNING, "'conv2d_threads' option was set but it is not supported "
182  "on this build (pthread support is required)\n");
183  }
184 #endif
185 
186  avio_seek(model_file_context, file_size - 8, SEEK_SET);
187  native_model->layers_num = (int32_t)avio_rl32(model_file_context);
188  native_model->operands_num = (int32_t)avio_rl32(model_file_context);
189  dnn_size += 8;
190  avio_seek(model_file_context, header_size, SEEK_SET);
191 
192  native_model->layers = av_mallocz(native_model->layers_num * sizeof(Layer));
193  if (!native_model->layers){
194  goto fail;
195  }
196 
197  native_model->operands = av_mallocz(native_model->operands_num * sizeof(DnnOperand));
198  if (!native_model->operands){
199  goto fail;
200  }
201 
202  for (layer = 0; layer < native_model->layers_num; ++layer){
203  layer_type = (int32_t)avio_rl32(model_file_context);
204  dnn_size += 4;
205 
206  if (layer_type >= DLT_COUNT) {
207  goto fail;
208  }
209 
210  native_model->layers[layer].type = layer_type;
211  parsed_size = ff_layer_funcs[layer_type].pf_load(&native_model->layers[layer], model_file_context, file_size, native_model->operands_num);
212  if (!parsed_size) {
213  goto fail;
214  }
215  dnn_size += parsed_size;
216  }
217 
218  for (int32_t i = 0; i < native_model->operands_num; ++i){
219  DnnOperand *oprd;
220  int32_t name_len;
221  int32_t operand_index = (int32_t)avio_rl32(model_file_context);
222  dnn_size += 4;
223 
224  if (operand_index >= native_model->operands_num) {
225  goto fail;
226  }
227 
228  oprd = &native_model->operands[operand_index];
229  name_len = (int32_t)avio_rl32(model_file_context);
230  dnn_size += 4;
231 
232  avio_get_str(model_file_context, name_len, oprd->name, sizeof(oprd->name));
233  dnn_size += name_len;
234 
235  oprd->type = (int32_t)avio_rl32(model_file_context);
236  dnn_size += 4;
237 
238  oprd->data_type = (int32_t)avio_rl32(model_file_context);
239  dnn_size += 4;
240 
241  for (int32_t dim = 0; dim < 4; ++dim) {
242  oprd->dims[dim] = (int32_t)avio_rl32(model_file_context);
243  dnn_size += 4;
244  }
245 
246  oprd->isNHWC = 1;
247  }
248 
249  avio_closep(&model_file_context);
250 
251  if (dnn_size != file_size){
252  ff_dnn_free_model_native(&model);
253  return NULL;
254  }
255 
256  model->get_input = &get_input_native;
257  model->get_output = &get_output_native;
258  model->filter_ctx = filter_ctx;
259  model->func_type = func_type;
260 
261  return model;
262 
263 fail:
264  ff_dnn_free_model_native(&model);
265  avio_closep(&model_file_context);
266  return NULL;
267 }
268 
269 static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
270  const char **output_names, uint32_t nb_output, AVFrame *out_frame,
271  int do_ioproc)
272 {
273  NativeModel *native_model = model->model;
274  NativeContext *ctx = &native_model->ctx;
275  int32_t layer;
277  DnnOperand *oprd = NULL;
278 
279  if (native_model->layers_num <= 0 || native_model->operands_num <= 0) {
280  av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n");
281  return DNN_ERROR;
282  }
283 
284  for (int i = 0; i < native_model->operands_num; ++i) {
285  oprd = &native_model->operands[i];
286  if (strcmp(oprd->name, input_name) == 0) {
287  if (oprd->type != DOT_INPUT) {
288  av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name);
289  return DNN_ERROR;
290  }
291  break;
292  }
293  oprd = NULL;
294  }
295  if (!oprd) {
296  av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
297  return DNN_ERROR;
298  }
299 
300  oprd->dims[1] = in_frame->height;
301  oprd->dims[2] = in_frame->width;
302 
303  av_freep(&oprd->data);
305  if (oprd->length <= 0) {
306  av_log(ctx, AV_LOG_ERROR, "The input data length overflow\n");
307  return DNN_ERROR;
308  }
309  oprd->data = av_malloc(oprd->length);
310  if (!oprd->data) {
311  av_log(ctx, AV_LOG_ERROR, "Failed to malloc memory for input data\n");
312  return DNN_ERROR;
313  }
314 
315  input.height = oprd->dims[1];
316  input.width = oprd->dims[2];
317  input.channels = oprd->dims[3];
318  input.data = oprd->data;
319  input.dt = oprd->data_type;
320  if (do_ioproc) {
321  if (native_model->model->pre_proc != NULL) {
322  native_model->model->pre_proc(in_frame, &input, native_model->model->filter_ctx);
323  } else {
324  ff_proc_from_frame_to_dnn(in_frame, &input, native_model->model->func_type, ctx);
325  }
326  }
327 
328  if (nb_output != 1) {
329  // currently, the filter does not need multiple outputs,
330  // so we just pending the support until we really need it.
331  avpriv_report_missing_feature(ctx, "multiple outputs");
332  return DNN_ERROR;
333  }
334 
335  for (layer = 0; layer < native_model->layers_num; ++layer){
336  DNNLayerType layer_type = native_model->layers[layer].type;
337  if (ff_layer_funcs[layer_type].pf_exec(native_model->operands,
338  native_model->layers[layer].input_operand_indexes,
339  native_model->layers[layer].output_operand_index,
340  native_model->layers[layer].params,
341  &native_model->ctx) == DNN_ERROR) {
342  av_log(ctx, AV_LOG_ERROR, "Failed to execuet model\n");
343  return DNN_ERROR;
344  }
345  }
346 
347  for (uint32_t i = 0; i < nb_output; ++i) {
348  DnnOperand *oprd = NULL;
349  const char *output_name = output_names[i];
350  for (int j = 0; j < native_model->operands_num; ++j) {
351  if (strcmp(native_model->operands[j].name, output_name) == 0) {
352  oprd = &native_model->operands[j];
353  break;
354  }
355  }
356 
357  if (oprd == NULL) {
358  av_log(ctx, AV_LOG_ERROR, "Could not find output in model\n");
359  return DNN_ERROR;
360  }
361 
362  output.data = oprd->data;
363  output.height = oprd->dims[1];
364  output.width = oprd->dims[2];
365  output.channels = oprd->dims[3];
366  output.dt = oprd->data_type;
367 
368  if (do_ioproc) {
369  if (native_model->model->post_proc != NULL) {
370  native_model->model->post_proc(out_frame, &output, native_model->model->filter_ctx);
371  } else {
372  ff_proc_from_dnn_to_frame(out_frame, &output, ctx);
373  }
374  } else {
375  out_frame->width = output.width;
376  out_frame->height = output.height;
377  }
378  }
379 
380  return DNN_SUCCESS;
381 }
382 
383 DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
384  const char **output_names, uint32_t nb_output, AVFrame *out_frame)
385 {
386  NativeModel *native_model = model->model;
387  NativeContext *ctx = &native_model->ctx;
388 
389  if (!in_frame) {
390  av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n");
391  return DNN_ERROR;
392  }
393 
394  if (!out_frame) {
395  av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
396  return DNN_ERROR;
397  }
398 
399  return execute_model_native(model, input_name, in_frame, output_names, nb_output, out_frame, 1);
400 }
401 
403 {
404  int32_t result = 1;
405  for (int i = 0; i < 4; ++i)
406  result *= oprd->dims[i];
407 
408  return result;
409 }
410 
412 {
413  // currently, we just support DNN_FLOAT
414  uint64_t len = sizeof(float);
415  for (int i = 0; i < 4; i++) {
416  len *= oprd->dims[i];
417  if (len > INT32_MAX)
418  return 0;
419  }
420  return len;
421 }
422 
424 {
425  NativeModel *native_model;
426  ConvolutionalParams *conv_params;
427  int32_t layer;
428 
429  if (*model)
430  {
431  if ((*model)->model) {
432  native_model = (*model)->model;
433  if (native_model->layers) {
434  for (layer = 0; layer < native_model->layers_num; ++layer){
435  if (native_model->layers[layer].type == DLT_CONV2D){
436  conv_params = (ConvolutionalParams *)native_model->layers[layer].params;
437  av_freep(&conv_params->kernel);
438  av_freep(&conv_params->biases);
439  }
440  av_freep(&native_model->layers[layer].params);
441  }
442  av_freep(&native_model->layers);
443  }
444 
445  if (native_model->operands) {
446  for (uint32_t operand = 0; operand < native_model->operands_num; ++operand)
447  av_freep(&native_model->operands[operand].data);
448  av_freep(&native_model->operands);
449  }
450 
451  av_freep(&native_model);
452  }
453  av_freep(model);
454  }
455 }
int avio_open(AVIOContext **s, const char *url, int flags)
Create and initialize a AVIOContext for accessing the resource indicated by url.
Definition: aviobuf.c:1141
void * model
Definition: dnn_interface.h:68
#define NULL
Definition: coverity.c:32
Bytestream IO Context.
Definition: avio.h:161
int8_t isNHWC
NHWC if 1, otherwise NCHW.
int64_t avio_size(AVIOContext *s)
Get the filesize.
Definition: aviobuf.c:346
version
Definition: libkvazaar.c:320
This structure describes decoded (raw) audio or video data.
Definition: frame.h:314
AVOption.
Definition: opt.h:248
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:200
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
int channels
Definition: dnn_interface.h:60
DNN inference functions interface for native backend.
int64_t avio_seek(AVIOContext *s, int64_t offset, int whence)
fseek() equivalent for AVIOContext.
Definition: aviobuf.c:253
#define AVIO_FLAG_READ
read-only
Definition: avio.h:674
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:235
int(* post_proc)(AVFrame *frame_out, DNNData *model_output, AVFilterContext *filter_ctx)
Definition: dnn_interface.h:86
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:237
DNNOperandType type
input/output/intermediate operand of the network
const char * options
Definition: dnn_interface.h:70
DnnOperand * operands
int32_t input_operand_indexes[4]
a layer can have multiple inputs and one output.
int av_opt_set_from_string(void *ctx, const char *opts, const char *const *shorthand, const char *key_val_sep, const char *pairs_sep)
Parse the key-value pairs list in opts.
Definition: opt.c:1559
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:72
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
#define av_malloc(s)
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:190
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
int(* pre_proc)(AVFrame *frame_in, DNNData *model_input, AVFilterContext *filter_ctx)
Definition: dnn_interface.h:83
DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
Definition: dnn_io_proc.c:26
const AVClass * class
DNNDataType data_type
support different kinds of data type such as float, half float, int8 etc, first support float now...
int32_t ff_calculate_operand_dims_count(const DnnOperand *oprd)
ptrdiff_t size
Definition: opengl_enc.c:100
NativeContext ctx
#define av_log(a,...)
DNNModel * model
DNNReturnType(* get_input)(void *model, DNNData *input, const char *input_name)
Definition: dnn_interface.h:77
int width
Definition: frame.h:372
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame, int do_ioproc)
int height
Definition: dnn_interface.h:60
unsigned int avio_rl32(AVIOContext *s)
Definition: aviobuf.c:754
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
void * data
Definition: dnn_interface.h:59
void * data
data pointer with data length in bytes.
simple assert() macros that are a bit more flexible than ISO C assert().
static FilteringContext * filter_ctx
Definition: transcoding.c:47
int32_t dims[4]
there are two memory layouts, NHWC or NCHW, so we use dims, dims[0] is Number.
#define fail()
Definition: checkasm.h:133
static DNNReturnType get_output_native(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
char name[128]
to avoid possible memory leak, do not use char *name
int32_t
AVFormatContext * ctx
Definition: movenc.c:48
#define OFFSET(x)
DNNReturnType
Definition: dnn_interface.h:33
void ff_dnn_free_model_native(DNNModel **model)
static const AVClass dnn_native_class
static const AVOption dnn_native_options[]
static DNNReturnType get_input_native(void *model, DNNData *input, const char *input_name)
DNN input&output process between AVFrame and DNNData.
Describe the class of an AVClass context structure.
Definition: log.h:67
DNNFunctionType func_type
Definition: dnn_interface.h:74
DNNFunctionType
Definition: dnn_interface.h:51
DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, DNNFunctionType func_type, void *log_ctx)
Definition: dnn_io_proc.c:207
DNNModel * ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
int dim
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
DNNLayerType type
DNNLayerType
the enum value of DNNLayerType should not be changed, the same values are used in convert_from_tensor...
void avpriv_report_missing_feature(void *avc, const char *msg,...) av_printf_format(2
Log a generic warning message about a missing feature.
#define FLAGS
const OptionDef options[]
Definition: ffmpeg_opt.c:3424
DNNReturnType(* get_output)(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
Definition: dnn_interface.h:79
NativeOptions options
int len
void * params
const LayerFunc ff_layer_funcs[DLT_COUNT]
DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame)
An instance of a filter.
Definition: avfilter.h:341
and forward the result(frame or status change) to the corresponding input.If nothing is possible
int height
Definition: frame.h:372
#define av_freep(p)
int avio_get_str(AVIOContext *pb, int maxlen, char *buf, int buflen)
Read a string from pb into buf.
Definition: aviobuf.c:864
AVFilterContext * filter_ctx
Definition: dnn_interface.h:72
DNNDataType dt
Definition: dnn_interface.h:62
int avio_closep(AVIOContext **s)
Close the resource accessed by the AVIOContext *s, free it and set the pointer pointing to it to NULL...
Definition: aviobuf.c:1196
int32_t ff_calculate_operand_data_length(const DnnOperand *oprd)
int i
Definition: input.c:407
int32_t output_operand_index
LAYER_LOAD_FUNC pf_load