32 #include "../internal.h" 37 #include <tensorflow/c/c_api.h> 56 #define OFFSET(x) offsetof(TFContext, x) 57 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM 66 const char **output_names, uint32_t nb_output,
AVFrame *out_frame,
77 unsigned char *graph_data =
NULL;
79 long size, bytes_read;
92 bytes_read =
avio_read(model_file_context, graph_data, size);
94 if (bytes_read != size){
99 graph_buf = TF_NewBuffer();
100 graph_buf->data = graph_data;
101 graph_buf->length =
size;
115 size =
sizeof(float);
125 return TF_AllocateTensor(dt, input_dims, 4,
126 input_dims[1] * input_dims[2] * input_dims[3] * size);
137 tf_output.oper = TF_GraphOperationByName(tf_model->
graph, input_name);
138 if (!tf_output.oper) {
144 input->
dt = TF_OperationOutputType(tf_output);
146 status = TF_NewStatus();
147 TF_GraphGetTensorShape(tf_model->
graph, tf_output, dims, 4, status);
148 if (TF_GetCode(status) != TF_OK){
149 TF_DeleteStatus(status);
150 av_log(ctx,
AV_LOG_ERROR,
"Failed to get input tensor shape: number of dimension incorrect\n");
153 TF_DeleteStatus(status);
158 input->
width = dims[2];
165 const char *output_name,
int *output_width,
int *output_height)
185 in_frame->
width = input_width;
186 in_frame->
height = input_height;
189 *output_width = out_frame->
width;
190 *output_height = out_frame->
height;
200 TF_Buffer *graph_def;
201 TF_ImportGraphDefOptions *graph_opts;
202 TF_SessionOptions *sess_opts;
203 const TF_Operation *init_op;
205 int sess_config_length = 0;
234 if (sess_config_length % 2 != 0) {
236 "please re-generate the config.\n",
241 sess_config_length -= 2;
242 sess_config_length /= 2;
244 sess_config =
av_malloc(sess_config_length);
250 for (
int i = 0;
i < sess_config_length;
i++) {
251 int index = 2 + (sess_config_length - 1 -
i) * 2;
254 sess_config[
i] = strtol(tmp,
NULL, 16);
264 tf_model->
graph = TF_NewGraph();
265 tf_model->
status = TF_NewStatus();
266 graph_opts = TF_NewImportGraphDefOptions();
267 TF_GraphImportGraphDef(tf_model->
graph, graph_def, graph_opts, tf_model->
status);
268 TF_DeleteImportGraphDefOptions(graph_opts);
269 TF_DeleteBuffer(graph_def);
270 if (TF_GetCode(tf_model->
status) != TF_OK){
271 TF_DeleteGraph(tf_model->
graph);
272 TF_DeleteStatus(tf_model->
status);
278 init_op = TF_GraphOperationByName(tf_model->
graph,
"init");
279 sess_opts = TF_NewSessionOptions();
282 TF_SetConfig(sess_opts, sess_config, sess_config_length,tf_model->
status);
284 if (TF_GetCode(tf_model->
status) != TF_OK) {
292 TF_DeleteSessionOptions(sess_opts);
293 if (TF_GetCode(tf_model->
status) != TF_OK)
305 if (TF_GetCode(tf_model->
status) != TF_OK)
315 #define NAME_BUFFER_SIZE 256 322 TF_OperationDescription *op_desc;
324 int64_t strides[] = {1, 1, 1, 1};
335 op_desc = TF_NewOperation(tf_model->
graph,
"Const", name_buffer);
336 TF_SetAttrType(op_desc,
"dtype", TF_FLOAT);
342 tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size *
sizeof(
float));
343 memcpy(TF_TensorData(tensor), params->
kernel, size *
sizeof(
float));
344 TF_SetAttrTensor(op_desc,
"value", tensor, tf_model->
status);
345 if (TF_GetCode(tf_model->
status) != TF_OK){
346 av_log(ctx,
AV_LOG_ERROR,
"Failed to set value for kernel of conv layer %d\n", layer);
349 op = TF_FinishOperation(op_desc, tf_model->
status);
350 if (TF_GetCode(tf_model->
status) != TF_OK){
356 op_desc = TF_NewOperation(tf_model->
graph,
"Transpose", name_buffer);
358 TF_AddInput(op_desc, input);
359 input.oper = transpose_op;
360 TF_AddInput(op_desc, input);
361 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
362 TF_SetAttrType(op_desc,
"Tperm", TF_INT32);
363 op = TF_FinishOperation(op_desc, tf_model->
status);
364 if (TF_GetCode(tf_model->
status) != TF_OK){
370 op_desc = TF_NewOperation(tf_model->
graph,
"Conv2D", name_buffer);
371 input.oper = *cur_op;
372 TF_AddInput(op_desc, input);
374 TF_AddInput(op_desc, input);
375 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
376 TF_SetAttrIntList(op_desc,
"strides", strides, 4);
377 TF_SetAttrString(op_desc,
"padding",
"VALID", 5);
378 *cur_op = TF_FinishOperation(op_desc, tf_model->
status);
379 if (TF_GetCode(tf_model->
status) != TF_OK){
385 op_desc = TF_NewOperation(tf_model->
graph,
"Const", name_buffer);
386 TF_SetAttrType(op_desc,
"dtype", TF_FLOAT);
389 tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->
output_num *
sizeof(
float));
390 memcpy(TF_TensorData(tensor), params->
biases, params->
output_num *
sizeof(
float));
391 TF_SetAttrTensor(op_desc,
"value", tensor, tf_model->
status);
392 if (TF_GetCode(tf_model->
status) != TF_OK){
393 av_log(ctx,
AV_LOG_ERROR,
"Failed to set value for conv_biases of conv layer %d\n", layer);
396 op = TF_FinishOperation(op_desc, tf_model->
status);
397 if (TF_GetCode(tf_model->
status) != TF_OK){
403 op_desc = TF_NewOperation(tf_model->
graph,
"BiasAdd", name_buffer);
404 input.oper = *cur_op;
405 TF_AddInput(op_desc, input);
407 TF_AddInput(op_desc, input);
408 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
409 *cur_op = TF_FinishOperation(op_desc, tf_model->
status);
410 if (TF_GetCode(tf_model->
status) != TF_OK){
418 op_desc = TF_NewOperation(tf_model->
graph,
"Relu", name_buffer);
421 op_desc = TF_NewOperation(tf_model->
graph,
"Tanh", name_buffer);
424 op_desc = TF_NewOperation(tf_model->
graph,
"Sigmoid", name_buffer);
430 input.oper = *cur_op;
431 TF_AddInput(op_desc, input);
432 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
433 *cur_op = TF_FinishOperation(op_desc, tf_model->
status);
434 if (TF_GetCode(tf_model->
status) != TF_OK){
435 av_log(ctx,
AV_LOG_ERROR,
"Failed to add activation function to conv layer %d\n", layer);
446 TF_OperationDescription *op_desc;
451 op_desc = TF_NewOperation(tf_model->
graph,
"DepthToSpace", name_buffer);
452 input.oper = *cur_op;
454 TF_AddInput(op_desc, input);
455 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
456 TF_SetAttrInt(op_desc,
"block_size", params->
block_size);
457 *cur_op = TF_FinishOperation(op_desc, tf_model->
status);
458 if (TF_GetCode(tf_model->
status) != TF_OK){
472 TF_OperationDescription *op_desc;
475 int64_t pads_shape[] = {4, 2};
480 op_desc = TF_NewOperation(tf_model->
graph,
"Const", name_buffer);
481 TF_SetAttrType(op_desc,
"dtype", TF_INT32);
482 tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 *
sizeof(
int32_t));
483 pads = (
int32_t *)TF_TensorData(tensor);
492 TF_SetAttrTensor(op_desc,
"value", tensor, tf_model->
status);
493 if (TF_GetCode(tf_model->
status) != TF_OK){
497 op = TF_FinishOperation(op_desc, tf_model->
status);
498 if (TF_GetCode(tf_model->
status) != TF_OK){
503 op_desc = TF_NewOperation(tf_model->
graph,
"MirrorPad",
"mirror_pad");
504 input.oper = *cur_op;
506 TF_AddInput(op_desc, input);
508 TF_AddInput(op_desc, input);
509 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
510 TF_SetAttrType(op_desc,
"Tpaddings", TF_INT32);
511 TF_SetAttrString(op_desc,
"mode",
"SYMMETRIC", 9);
512 *cur_op = TF_FinishOperation(op_desc, tf_model->
status);
513 if (TF_GetCode(tf_model->
status) != TF_OK){
527 TF_OperationDescription *op_desc;
534 op_desc = TF_NewOperation(tf_model->
graph,
"Const", name_buffer);
535 TF_SetAttrType(op_desc,
"dtype", TF_FLOAT);
536 tensor = TF_AllocateTensor(TF_FLOAT,
NULL, 0, TF_DataTypeSize(TF_FLOAT));
537 y = (
float *)TF_TensorData(tensor);
539 TF_SetAttrTensor(op_desc,
"value", tensor, tf_model->
status);
540 if (TF_GetCode(tf_model->
status) != TF_OK){
544 op = TF_FinishOperation(op_desc, tf_model->
status);
545 if (TF_GetCode(tf_model->
status) != TF_OK){
551 op_desc = TF_NewOperation(tf_model->
graph,
"Maximum", name_buffer);
552 input.oper = *cur_op;
554 TF_AddInput(op_desc, input);
556 TF_AddInput(op_desc, input);
557 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
558 *cur_op = TF_FinishOperation(op_desc, tf_model->
status);
559 if (TF_GetCode(tf_model->
status) != TF_OK){
571 TF_OperationDescription *op_desc;
573 TF_Operation *transpose_op;
577 int64_t transpose_perm_shape[] = {4};
578 int64_t input_shape[] = {1, -1, -1, -1};
589 native_model = model->
model;
590 tf_model->
graph = TF_NewGraph();
591 tf_model->
status = TF_NewStatus();
593 #define CLEANUP_ON_ERROR(tf_model) \ 595 TF_DeleteGraph(tf_model->graph); \ 596 TF_DeleteStatus(tf_model->status); \ 597 av_log(ctx, AV_LOG_ERROR, "Failed to set value or add operator to layer\n"); \ 601 op_desc = TF_NewOperation(tf_model->
graph,
"Placeholder",
"x");
602 TF_SetAttrType(op_desc,
"dtype", TF_FLOAT);
603 TF_SetAttrShape(op_desc,
"shape", input_shape, 4);
604 op = TF_FinishOperation(op_desc, tf_model->
status);
605 if (TF_GetCode(tf_model->
status) != TF_OK){
609 op_desc = TF_NewOperation(tf_model->
graph,
"Const",
"transpose_perm");
610 TF_SetAttrType(op_desc,
"dtype", TF_INT32);
611 tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 *
sizeof(
int32_t));
612 transpose_perm = (
int32_t *)TF_TensorData(tensor);
613 transpose_perm[0] = 1;
614 transpose_perm[1] = 2;
615 transpose_perm[2] = 3;
616 transpose_perm[3] = 0;
617 TF_SetAttrTensor(op_desc,
"value", tensor, tf_model->
status);
618 if (TF_GetCode(tf_model->
status) != TF_OK){
621 transpose_op = TF_FinishOperation(op_desc, tf_model->
status);
623 for (layer = 0; layer < native_model->
layers_num; ++layer){
653 op_desc = TF_NewOperation(tf_model->
graph,
"Identity",
"y");
656 TF_AddInput(op_desc, input);
657 TF_FinishOperation(op_desc, tf_model->
status);
658 if (TF_GetCode(tf_model->
status) != TF_OK){
682 tf_model->
ctx.
class = &dnn_tensorflow_class;
683 tf_model->
model = model;
703 model->
model = tf_model;
714 const char **output_names, uint32_t nb_output,
AVFrame *out_frame,
717 TF_Output *tf_outputs;
721 TF_Tensor **output_tensors;
723 TF_Tensor *input_tensor;
730 tf_input.oper = TF_GraphOperationByName(tf_model->
graph, input_name);
741 input.
data = (
float *)TF_TensorData(input_tensor);
751 if (nb_output != 1) {
759 if (tf_outputs ==
NULL) {
765 if (!output_tensors) {
771 for (
int i = 0;
i < nb_output; ++
i) {
772 tf_outputs[
i].oper = TF_GraphOperationByName(tf_model->
graph, output_names[
i]);
773 if (!tf_outputs[i].oper) {
779 tf_outputs[
i].index = 0;
783 &tf_input, &input_tensor, 1,
784 tf_outputs, output_tensors, nb_output,
786 if (TF_GetCode(tf_model->
status) != TF_OK) {
793 for (uint32_t
i = 0;
i < nb_output; ++
i) {
794 output.
height = TF_Dim(output_tensors[
i], 1);
795 output.
width = TF_Dim(output_tensors[i], 2);
796 output.
channels = TF_Dim(output_tensors[i], 3);
797 output.
data = TF_TensorData(output_tensors[i]);
798 output.
dt = TF_TensorType(output_tensors[i]);
812 for (uint32_t
i = 0;
i < nb_output; ++
i) {
813 if (output_tensors[
i]) {
814 TF_DeleteTensor(output_tensors[i]);
817 TF_DeleteTensor(input_tensor);
824 const char **output_names, uint32_t nb_output,
AVFrame *out_frame)
839 return execute_model_tf(model, input_name, in_frame, output_names, nb_output, out_frame, 1);
847 tf_model = (*model)->
model;
848 if (tf_model->
graph){
849 TF_DeleteGraph(tf_model->
graph);
856 TF_DeleteStatus(tf_model->
status);
int avio_open(AVIOContext **s, const char *url, int flags)
Create and initialize a AVIOContext for accessing the resource indicated by url.
int64_t avio_size(AVIOContext *s)
Get the filesize.
This structure describes decoded (raw) audio or video data.
ptrdiff_t const GLvoid * data
union DnnLayerMaximumParams::@202 val
static TF_Buffer * read_graph(const char *model_filename)
void av_opt_set_defaults(void *s)
Set the values of all AVOption fields to their default values.
DNNActivationFunc activation
DNN inference functions interface for native backend.
#define AVIO_FLAG_READ
read-only
static const AVOption dnn_tensorflow_options[]
int(* post_proc)(AVFrame *frame_out, DNNData *model_output, AVFilterContext *filter_ctx)
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
int av_opt_set_from_string(void *ctx, const char *opts, const char *const *shorthand, const char *key_val_sep, const char *pairs_sep)
Parse the key-value pairs list in opts.
#define av_assert0(cond)
assert() equivalent, that is always enabled.
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame)
int(* pre_proc)(AVFrame *frame_in, DNNData *model_input, AVFilterContext *filter_ctx)
DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
static DNNReturnType get_output_tf(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
DNN inference functions interface for TensorFlow backend.
int avio_read(AVIOContext *s, unsigned char *buf, int size)
Read size bytes from AVIOContext into buf.
DNNReturnType(* get_input)(void *model, DNNData *input, const char *input_name)
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
static DNNReturnType add_maximum_layer(TFModel *tf_model, TF_Operation **cur_op, DnnLayerMaximumParams *params, const int layer)
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
simple assert() macros that are a bit more flexible than ISO C assert().
static FilteringContext * filter_ctx
static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input_name)
static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
DNNModel * ff_dnn_load_model_tf(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
DNN inference functions interface for native backend.
DNN inference functions interface for native backend.
void ff_dnn_free_model_native(DNNModel **model)
static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op, ConvolutionalParams *params, const int layer)
static TF_Tensor * allocate_input_tensor(const DNNData *input)
static void free_buffer(void *data, size_t length)
static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op, DepthToSpaceParams *params, const int layer)
static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame, int do_ioproc)
DNN input&output process between AVFrame and DNNData.
Describe the class of an AVClass context structure.
DNNFunctionType func_type
layer pad (equivalent to tf.pad) for native backend.
DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, DNNFunctionType func_type, void *log_ctx)
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
DNNModel * ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
AVFILTER_DEFINE_CLASS(dnn_tensorflow)
void avpriv_report_missing_feature(void *avc, const char *msg,...) av_printf_format(2
Log a generic warning message about a missing feature.
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
const OptionDef options[]
DNNReturnType(* get_output)(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
void ff_dnn_free_model_tf(DNNModel **model)
static void transpose_perm(int16_t *out, int16_t *in, int num_vect, const uint8_t line_len[2], int length_div)
Interpret the input data as in the following table:
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a it should return
static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op, LayerPadParams *params, const int layer)
#define av_malloc_array(a, b)
AVFilterContext * filter_ctx
int avio_closep(AVIOContext **s)
Close the resource accessed by the AVIOContext *s, free it and set the pointer pointing to it to NULL...
#define CLEANUP_ON_ERROR(tf_model)
void * av_mallocz_array(size_t nmemb, size_t size)