FFmpeg
dnn_backend_native_layer_conv2d.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/thread.h"
23 #include "libavutil/cpu.h"
25 
26 #define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
27 
28 //struct to pass parameters
29 typedef struct ThreadCommonParam{
33  const void *parameters;
35  float *output_data;
37 
38 typedef struct ThreadParam{
41 #if HAVE_PTHREAD_CANCEL
42  pthread_t thread;
43 #endif
44 } ThreadParam;
45 
46 int ff_dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
47 {
48  ConvolutionalParams *conv_params;
49  int kernel_size;
50  int dnn_size = 0;
51  conv_params = av_malloc(sizeof(*conv_params));
52  if (!conv_params)
53  return 0;
54 
55  conv_params->dilation = (int32_t)avio_rl32(model_file_context);
56  conv_params->padding_method = (int32_t)avio_rl32(model_file_context);
57  conv_params->activation = (int32_t)avio_rl32(model_file_context);
58  conv_params->input_num = (int32_t)avio_rl32(model_file_context);
59  conv_params->output_num = (int32_t)avio_rl32(model_file_context);
60  conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
61  conv_params->has_bias = (int32_t)avio_rl32(model_file_context);
62  dnn_size += 28;
63 
64  kernel_size = conv_params->input_num * conv_params->output_num *
65  conv_params->kernel_size * conv_params->kernel_size;
66  dnn_size += kernel_size * 4;
67  if (conv_params->has_bias)
68  dnn_size += conv_params->output_num * 4;
69 
70  if (dnn_size > file_size || conv_params->input_num <= 0 ||
71  conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
72  av_freep(&conv_params);
73  return 0;
74  }
75 
76  conv_params->kernel = av_malloc_array(kernel_size, sizeof(*conv_params->kernel));
77  if (!conv_params->kernel) {
78  av_freep(&conv_params);
79  return 0;
80  }
81  for (int i = 0; i < kernel_size; ++i) {
82  conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
83  }
84 
85  conv_params->biases = NULL;
86  if (conv_params->has_bias) {
87  conv_params->biases = av_malloc_array(conv_params->output_num, sizeof(*conv_params->biases));
88  if (!conv_params->biases){
89  av_freep(&conv_params->kernel);
90  av_freep(&conv_params);
91  return 0;
92  }
93  for (int i = 0; i < conv_params->output_num; ++i){
94  conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
95  }
96  }
97 
98  layer->params = conv_params;
99 
100  layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
101  layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
102  dnn_size += 8;
103 
104  if (layer->input_operand_indexes[0] >= operands_num || layer->output_operand_index >= operands_num) {
105  return 0;
106  }
107 
108  return dnn_size;
109 }
110 
111 static void * dnn_execute_layer_conv2d_thread(void *threadarg)
112 {
113  //pass parameters
114  ThreadParam *thread_param = threadarg;
115  ThreadCommonParam *thread_common_param = thread_param->thread_common_param;
116  DnnOperand *operands = thread_common_param->operands;
117  int32_t input_operand_index = thread_common_param->input_operand_indexes[0];
118  int height = operands[input_operand_index].dims[1];
119  int width = operands[input_operand_index].dims[2];
120  int channel = operands[input_operand_index].dims[3];
121  const float *input = operands[input_operand_index].data;
122  const ConvolutionalParams *conv_params = thread_common_param->parameters;
123 
124  int radius = conv_params->kernel_size >> 1;
125  int src_linesize = width * conv_params->input_num;
126  int filter_linesize = conv_params->kernel_size * conv_params->input_num;
127  int filter_size = conv_params->kernel_size * filter_linesize;
128  int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
129 
130  float *output = thread_common_param->output_data;
131  output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_param->thread_start - pad_size);
132 
133  av_assert0(channel == conv_params->input_num);
134 
135  for (int y = thread_param->thread_start; y < thread_param->thread_end; ++y) {
136  for (int x = pad_size; x < width - pad_size; ++x) {
137  for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) {
138  if (conv_params->has_bias)
139  output[n_filter] = conv_params->biases[n_filter];
140  else
141  output[n_filter] = 0.f;
142 
143  for (int ch = 0; ch < conv_params->input_num; ++ch) {
144  for (int kernel_y = 0; kernel_y < conv_params->kernel_size; ++kernel_y) {
145  for (int kernel_x = 0; kernel_x < conv_params->kernel_size; ++kernel_x) {
146  float input_pel;
147  if (conv_params->padding_method == SAME_CLAMP_TO_EDGE) {
148  int y_pos = CLAMP_TO_EDGE(y + (kernel_y - radius) * conv_params->dilation, height);
149  int x_pos = CLAMP_TO_EDGE(x + (kernel_x - radius) * conv_params->dilation, width);
150  input_pel = input[y_pos * src_linesize + x_pos * conv_params->input_num + ch];
151  } else {
152  int y_pos = y + (kernel_y - radius) * conv_params->dilation;
153  int x_pos = x + (kernel_x - radius) * conv_params->dilation;
154  input_pel = (x_pos < 0 || x_pos >= width || y_pos < 0 || y_pos >= height) ? 0.0 :
155  input[y_pos * src_linesize + x_pos * conv_params->input_num + ch];
156  }
157 
158 
159  output[n_filter] += input_pel * conv_params->kernel[n_filter * filter_size + kernel_y * filter_linesize +
160  kernel_x * conv_params->input_num + ch];
161  }
162  }
163  }
164  switch (conv_params->activation){
165  case RELU:
166  output[n_filter] = FFMAX(output[n_filter], 0.0);
167  break;
168  case TANH:
169  output[n_filter] = 2.0f / (1.0f + exp(-2.0f * output[n_filter])) - 1.0f;
170  break;
171  case SIGMOID:
172  output[n_filter] = 1.0f / (1.0f + exp(-output[n_filter]));
173  break;
174  case NONE:
175  break;
176  case LEAKY_RELU:
177  output[n_filter] = FFMAX(output[n_filter], 0.0) + 0.2 * FFMIN(output[n_filter], 0.0);
178  }
179  }
180  output += conv_params->output_num;
181  }
182  }
183  return NULL;
184 }
185 
186 
187 int ff_dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes,
188  int32_t output_operand_index, const void *parameters, NativeContext *ctx)
189 {
190 #if HAVE_PTHREAD_CANCEL
191  int thread_num = (ctx->options.conv2d_threads <= 0 || ctx->options.conv2d_threads > av_cpu_count())
192  ? (av_cpu_count() + 1) : (ctx->options.conv2d_threads);
193  int ret = 0, thread_stride;
194  ThreadParam *thread_param;
195 #else
196  ThreadParam thread_param = { 0 };
197 #endif
198  ThreadCommonParam thread_common_param;
199  const ConvolutionalParams *conv_params = parameters;
200  int height = operands[input_operand_indexes[0]].dims[1];
201  int width = operands[input_operand_indexes[0]].dims[2];
202  int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
203  DnnOperand *output_operand = &operands[output_operand_index];
204  void *tmp;
205 
206  output_operand->dims[0] = operands[input_operand_indexes[0]].dims[0];
207  output_operand->dims[1] = height - pad_size * 2;
208  output_operand->dims[2] = width - pad_size * 2;
209  output_operand->dims[3] = conv_params->output_num;
210  output_operand->data_type = operands[input_operand_indexes[0]].data_type;
211  output_operand->length = ff_calculate_operand_data_length(output_operand);
212  if (output_operand->length <= 0) {
213  av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
214  return AVERROR(EINVAL);
215  }
216  tmp = av_realloc(output_operand->data, output_operand->length);
217  if (!tmp) {
218  av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
219  return AVERROR(ENOMEM);
220  }
221  output_operand->data = tmp;
222  thread_common_param.output_data = output_operand->data;
223  thread_common_param.operands = operands;
224  thread_common_param.input_operand_indexes = input_operand_indexes;
225  thread_common_param.output_operand_index = output_operand_index;
226  thread_common_param.parameters = parameters;
227  thread_common_param.ctx = ctx;
228 
229 #if HAVE_PTHREAD_CANCEL
230  thread_param = av_malloc_array(thread_num, sizeof(*thread_param));
231  if (!thread_param)
232  return AVERROR(ENOMEM);
233  thread_stride = (height - pad_size * 2) / thread_num;
234  //create threads
235  for (int i = 0; i < thread_num; i++){
236  int thread_ret = 0;
237  thread_param[i].thread_common_param = &thread_common_param;
238  thread_param[i].thread_start = thread_stride * i + pad_size;
239  thread_param[i].thread_end = (i == thread_num - 1) ? (height - pad_size) : (thread_param[i].thread_start + thread_stride);
240  thread_ret = pthread_create(&thread_param[i].thread, NULL,
241  dnn_execute_layer_conv2d_thread, &thread_param[i]);
242  if (thread_ret) {
243  thread_num = i;
244  ret = AVERROR(thread_ret);
245  break;
246  }
247  }
248 
249  for (int i = 0; i < thread_num; i++){
250  pthread_join(thread_param[i].thread, NULL);
251  }
252 
253  //release memory
254  av_freep(&thread_param);
255 
256  return ret;
257 #else
258  thread_param.thread_common_param = &thread_common_param;
259  thread_param.thread_start = pad_size;
260  thread_param.thread_end = height - pad_size;
261  dnn_execute_layer_conv2d_thread(&thread_param);
262 
263  return 0;
264 #endif
265 }
pthread_join
static av_always_inline int pthread_join(pthread_t thread, void **value_ptr)
Definition: os2threads.h:94
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
NONE
@ NONE
Definition: af_afade.c:61
thread.h
ThreadParam::thread_common_param
ThreadCommonParam * thread_common_param
Definition: dnn_backend_native_layer_conv2d.c:39
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
ConvolutionalParams::kernel
float * kernel
Definition: dnn_backend_native_layer_conv2d.h:33
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
ThreadParam::thread_end
int thread_end
Definition: dnn_backend_native_layer_conv2d.c:40
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
ThreadParam
Definition: dnn_backend_native_layer_conv2d.c:38
av_int2float
static av_always_inline float av_int2float(uint32_t i)
Reinterpret a 32-bit integer as a float.
Definition: intfloat.h:40
ff_calculate_operand_data_length
int32_t ff_calculate_operand_data_length(const DnnOperand *oprd)
Definition: dnn_backend_native.c:503
SIGMOID
@ SIGMOID
Definition: dnn_backend_native.h:55
ff_dnn_load_layer_conv2d
int ff_dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
Load the 2D Convolution Layer.
Definition: dnn_backend_native_layer_conv2d.c:46
avassert.h
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
ConvolutionalParams::input_num
int32_t input_num
Definition: dnn_backend_native_layer_conv2d.h:28
width
#define width
TANH
@ TANH
Definition: dnn_backend_native.h:55
DnnOperand::data
void * data
data pointer with data length in bytes.
Definition: dnn_backend_native.h:104
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
DnnOperand::data_type
DNNDataType data_type
support different kinds of data type such as float, half float, int8 etc, first support float now.
Definition: dnn_backend_native.h:85
ConvolutionalParams::activation
DNNActivationFunc activation
Definition: dnn_backend_native_layer_conv2d.h:29
ctx
AVFormatContext * ctx
Definition: movenc.c:48
SAME_CLAMP_TO_EDGE
@ SAME_CLAMP_TO_EDGE
Definition: dnn_backend_native.h:54
pthread_create
static av_always_inline int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
Definition: os2threads.h:80
ConvolutionalParams::has_bias
int32_t has_bias
Definition: dnn_backend_native_layer_conv2d.h:32
Layer::params
void * params
Definition: dnn_backend_native.h:66
NULL
#define NULL
Definition: coverity.c:32
ff_dnn_execute_layer_conv2d
int ff_dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters, NativeContext *ctx)
Execute the 2D Convolution Layer.
Definition: dnn_backend_native_layer_conv2d.c:187
DnnOperand::dims
int32_t dims[4]
there are two memory layouts, NHWC or NCHW, so we use dims, dims[0] is Number.
Definition: dnn_backend_native.h:74
exp
int8_t exp
Definition: eval.c:72
ConvolutionalParams::kernel_size
int32_t kernel_size
Definition: dnn_backend_native_layer_conv2d.h:28
DnnOperand::length
int32_t length
Definition: dnn_backend_native.h:105
av_cpu_count
int av_cpu_count(void)
Definition: cpu.c:206
avio_rl32
unsigned int avio_rl32(AVIOContext *s)
Definition: aviobuf.c:751
ThreadParam::thread_start
int thread_start
Definition: dnn_backend_native_layer_conv2d.c:40
f
f
Definition: af_crystalizer.c:122
AVIOContext
Bytestream IO Context.
Definition: avio.h:166
Layer::output_operand_index
int32_t output_operand_index
Definition: dnn_backend_native.h:65
NativeContext
Definition: dnn_backend_native.h:118
dnn_execute_layer_conv2d_thread
static void * dnn_execute_layer_conv2d_thread(void *threadarg)
Definition: dnn_backend_native_layer_conv2d.c:111
Layer
Definition: dnn_backend_native.h:57
cpu.h
Layer::input_operand_indexes
int32_t input_operand_indexes[4]
a layer can have multiple inputs and one output.
Definition: dnn_backend_native.h:64
VALID
@ VALID
Definition: dnn_backend_native.h:54
dnn_backend_native_layer_conv2d.h
height
#define height
pthread_t
Definition: os2threads.h:44
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
RELU
@ RELU
Definition: dnn_backend_native.h:55
ThreadCommonParam
Definition: dnn_backend_native_layer_conv2d.c:29
ConvolutionalParams::output_num
int32_t output_num
Definition: dnn_backend_native_layer_conv2d.h:28
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:31
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
CLAMP_TO_EDGE
#define CLAMP_TO_EDGE(x, w)
Definition: dnn_backend_native_layer_conv2d.c:26
ret
ret
Definition: filter_design.txt:187
ConvolutionalParams::padding_method
DNNPaddingParam padding_method
Definition: dnn_backend_native_layer_conv2d.h:30
ThreadCommonParam::output_operand_index
int32_t output_operand_index
Definition: dnn_backend_native_layer_conv2d.c:32
ThreadCommonParam::ctx
NativeContext * ctx
Definition: dnn_backend_native_layer_conv2d.c:34
DnnOperand
Definition: dnn_backend_native.h:69
ThreadCommonParam::output_data
float * output_data
Definition: dnn_backend_native_layer_conv2d.c:35
ThreadCommonParam::input_operand_indexes
const int32_t * input_operand_indexes
Definition: dnn_backend_native_layer_conv2d.c:31
LEAKY_RELU
@ LEAKY_RELU
Definition: dnn_backend_native.h:55
ThreadCommonParam::parameters
const void * parameters
Definition: dnn_backend_native_layer_conv2d.c:33
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
int32_t
int32_t
Definition: audioconvert.c:56
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
ThreadCommonParam::operands
DnnOperand * operands
Definition: dnn_backend_native_layer_conv2d.c:30
channel
channel
Definition: ebur128.h:39
ConvolutionalParams
Definition: dnn_backend_native_layer_conv2d.h:27
av_realloc
void * av_realloc(void *ptr, size_t size)
Allocate, reallocate, or free a block of memory.
Definition: mem.c:153
ConvolutionalParams::dilation
int32_t dilation
Definition: dnn_backend_native_layer_conv2d.h:31
ConvolutionalParams::biases
float * biases
Definition: dnn_backend_native_layer_conv2d.h:34