FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hwcontext_cuda.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "buffer.h"
20 #include "common.h"
21 #include "hwcontext.h"
22 #include "hwcontext_internal.h"
23 #include "hwcontext_cuda.h"
24 #include "mem.h"
25 #include "pixdesc.h"
26 #include "pixfmt.h"
27 
28 #define CUDA_FRAME_ALIGNMENT 256
29 
30 typedef struct CUDAFramesContext {
33 
34 static const enum AVPixelFormat supported_formats[] = {
38 };
39 
40 static void cuda_buffer_free(void *opaque, uint8_t *data)
41 {
42  AVHWFramesContext *ctx = opaque;
43  AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
44 
46 
47  cuCtxPushCurrent(hwctx->cuda_ctx);
48 
49  cuMemFree((CUdeviceptr)data);
50 
51  cuCtxPopCurrent(&dummy);
52 }
53 
54 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
55 {
56  AVHWFramesContext *ctx = opaque;
57  AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
58 
59  AVBufferRef *ret = NULL;
62  CUresult err;
63 
64  err = cuCtxPushCurrent(hwctx->cuda_ctx);
65  if (err != CUDA_SUCCESS) {
66  av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
67  return NULL;
68  }
69 
70  err = cuMemAlloc(&data, size);
71  if (err != CUDA_SUCCESS)
72  goto fail;
73 
74  ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
75  if (!ret) {
76  cuMemFree(data);
77  goto fail;
78  }
79 
80 fail:
81  cuCtxPopCurrent(&dummy);
82  return ret;
83 }
84 
86 {
87  CUDAFramesContext *priv = ctx->internal->priv;
88  int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
89  int i;
90 
91  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
92  if (ctx->sw_format == supported_formats[i])
93  break;
94  }
96  av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
98  return AVERROR(ENOSYS);
99  }
100 
102 
103  if (!ctx->pool) {
104  int size;
105 
106  switch (ctx->sw_format) {
107  case AV_PIX_FMT_NV12:
108  case AV_PIX_FMT_YUV420P:
109  size = aligned_width * ctx->height * 3 / 2;
110  break;
111  case AV_PIX_FMT_YUV444P:
112  size = aligned_width * ctx->height * 3;
113  break;
114  }
115 
117  if (!ctx->internal->pool_internal)
118  return AVERROR(ENOMEM);
119  }
120 
121  return 0;
122 }
123 
125 {
126  int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
127 
128  frame->buf[0] = av_buffer_pool_get(ctx->pool);
129  if (!frame->buf[0])
130  return AVERROR(ENOMEM);
131 
132  switch (ctx->sw_format) {
133  case AV_PIX_FMT_NV12:
134  frame->data[0] = frame->buf[0]->data;
135  frame->data[1] = frame->data[0] + aligned_width * ctx->height;
136  frame->linesize[0] = aligned_width;
137  frame->linesize[1] = aligned_width;
138  break;
139  case AV_PIX_FMT_YUV420P:
140  frame->data[0] = frame->buf[0]->data;
141  frame->data[2] = frame->data[0] + aligned_width * ctx->height;
142  frame->data[1] = frame->data[2] + aligned_width * ctx->height / 4;
143  frame->linesize[0] = aligned_width;
144  frame->linesize[1] = aligned_width / 2;
145  frame->linesize[2] = aligned_width / 2;
146  break;
147  case AV_PIX_FMT_YUV444P:
148  frame->data[0] = frame->buf[0]->data;
149  frame->data[1] = frame->data[0] + aligned_width * ctx->height;
150  frame->data[2] = frame->data[1] + aligned_width * ctx->height;
151  frame->linesize[0] = aligned_width;
152  frame->linesize[1] = aligned_width;
153  frame->linesize[2] = aligned_width;
154  break;
155  default:
156  av_frame_unref(frame);
157  return AVERROR_BUG;
158  }
159 
160  frame->format = AV_PIX_FMT_CUDA;
161  frame->width = ctx->width;
162  frame->height = ctx->height;
163 
164  return 0;
165 }
166 
169  enum AVPixelFormat **formats)
170 {
171  enum AVPixelFormat *fmts;
172 
173  fmts = av_malloc_array(2, sizeof(*fmts));
174  if (!fmts)
175  return AVERROR(ENOMEM);
176 
177  fmts[0] = ctx->sw_format;
178  fmts[1] = AV_PIX_FMT_NONE;
179 
180  *formats = fmts;
181 
182  return 0;
183 }
184 
186  const AVFrame *src)
187 {
188  CUDAFramesContext *priv = ctx->internal->priv;
189  AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
190 
192  CUresult err;
193  int i;
194 
195  err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
196  if (err != CUDA_SUCCESS)
197  return AVERROR_UNKNOWN;
198 
199  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
200  CUDA_MEMCPY2D cpy = {
201  .srcMemoryType = CU_MEMORYTYPE_DEVICE,
202  .dstMemoryType = CU_MEMORYTYPE_HOST,
203  .srcDevice = (CUdeviceptr)src->data[i],
204  .dstHost = dst->data[i],
205  .srcPitch = src->linesize[i],
206  .dstPitch = dst->linesize[i],
207  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
208  .Height = src->height >> (i ? priv->shift_height : 0),
209  };
210 
211  err = cuMemcpy2D(&cpy);
212  if (err != CUDA_SUCCESS) {
213  av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
214  return AVERROR_UNKNOWN;
215  }
216  }
217 
218  cuCtxPopCurrent(&dummy);
219 
220  return 0;
221 }
222 
224  const AVFrame *src)
225 {
226  CUDAFramesContext *priv = ctx->internal->priv;
227  AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
228 
230  CUresult err;
231  int i;
232 
233  err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
234  if (err != CUDA_SUCCESS)
235  return AVERROR_UNKNOWN;
236 
237  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
238  CUDA_MEMCPY2D cpy = {
239  .srcMemoryType = CU_MEMORYTYPE_HOST,
240  .dstMemoryType = CU_MEMORYTYPE_DEVICE,
241  .srcHost = src->data[i],
242  .dstDevice = (CUdeviceptr)dst->data[i],
243  .srcPitch = src->linesize[i],
244  .dstPitch = dst->linesize[i],
245  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
246  .Height = src->height >> (i ? priv->shift_height : 0),
247  };
248 
249  err = cuMemcpy2D(&cpy);
250  if (err != CUDA_SUCCESS) {
251  av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
252  return AVERROR_UNKNOWN;
253  }
254  }
255 
256  cuCtxPopCurrent(&dummy);
257 
258  return 0;
259 }
260 
262 {
263  AVCUDADeviceContext *hwctx = ctx->hwctx;
264  cuCtxDestroy(hwctx->cuda_ctx);
265 }
266 
267 static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
268  AVDictionary *opts, int flags)
269 {
270  AVCUDADeviceContext *hwctx = ctx->hwctx;
271  CUdevice cu_device;
273  CUresult err;
274  int device_idx = 0;
275 
276  if (device)
277  device_idx = strtol(device, NULL, 0);
278 
279  err = cuInit(0);
280  if (err != CUDA_SUCCESS) {
281  av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
282  return AVERROR_UNKNOWN;
283  }
284 
285  err = cuDeviceGet(&cu_device, device_idx);
286  if (err != CUDA_SUCCESS) {
287  av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
288  return AVERROR_UNKNOWN;
289  }
290 
291  err = cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
292  if (err != CUDA_SUCCESS) {
293  av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
294  return AVERROR_UNKNOWN;
295  }
296 
297  cuCtxPopCurrent(&dummy);
298 
299  ctx->free = cuda_device_free;
300 
301  return 0;
302 }
303 
306  .name = "CUDA",
307 
308  .device_hwctx_size = sizeof(AVCUDADeviceContext),
309  .frames_priv_size = sizeof(CUDAFramesContext),
310 
311  .device_create = cuda_device_create,
312  .frames_init = cuda_frames_init,
313  .frames_get_buffer = cuda_get_buffer,
314  .transfer_get_formats = cuda_transfer_get_formats,
315  .transfer_data_to = cuda_transfer_data_to,
316  .transfer_data_from = cuda_transfer_data_from,
317 
318  .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
319 };
This struct aggregates all the (hardware/vendor-specific) "high-level" state, i.e.
Definition: hwcontext.h:54
#define NULL
Definition: coverity.c:32
This structure describes decoded (raw) audio or video data.
Definition: frame.h:184
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:101
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:67
Memory handling functions.
AVBufferRef * buf[AV_NUM_DATA_POINTERS]
AVBuffer references backing the data for this frame.
Definition: frame.h:367
int CUdevice
Definition: nvenc.h:44
static enum AVSampleFormat formats[]
Definition: avresample.c:163
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:222
static int cuda_frames_init(AVHWFramesContext *ctx)
AVBufferPool * pool_internal
enum AVHWDeviceType type
uint8_t
static enum AVPixelFormat supported_formats[]
static AVFrame * frame
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:85
static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
ptrdiff_t size
Definition: opengl_enc.c:101
#define FFALIGN(x, a)
Definition: macros.h:48
#define av_log(a,...)
An API-specific header for AV_HWDEVICE_TYPE_CUDA.
static void cuda_buffer_free(void *opaque, uint8_t *data)
int width
width and height of the video frame
Definition: frame.h:236
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
void(* free)(struct AVHWDeviceContext *ctx)
This field may be set by the caller before calling av_hwdevice_ctx_init().
Definition: hwcontext.h:97
#define AVERROR(e)
Definition: error.h:43
int av_pix_fmt_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift)
Utility function to access log2_chroma_w log2_chroma_h from the pixel format AVPixFmtDescriptor.
Definition: pixdesc.c:2294
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:90
AVBufferRef * av_buffer_create(uint8_t *data, int size, void(*free)(void *opaque, uint8_t *data), void *opaque, int flags)
Create an AVBuffer from an existing array.
Definition: buffer.c:28
#define fail()
Definition: checkasm.h:83
AVDictionary * opts
Definition: movenc.c:50
static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
#define FFMIN(a, b)
Definition: common.h:96
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:142
static int cuda_transfer_get_formats(AVHWFramesContext *ctx, enum AVHWFrameTransferDirection dir, enum AVPixelFormat **formats)
static AVBufferRef * cuda_pool_alloc(void *opaque, int size)
static void cuda_device_free(AVHWDeviceContext *ctx)
AVFormatContext * ctx
Definition: movenc.c:48
CUresult
Definition: nvenc.h:41
int dummy
Definition: motion.c:64
#define src
Definition: vp9dsp.c:530
HW acceleration through CUDA.
Definition: pixfmt.h:249
AVBufferPool * av_buffer_pool_init2(int size, void *opaque, AVBufferRef *(*alloc)(void *opaque, int size), void(*pool_free)(void *opaque))
Allocate and initialize a buffer pool with a more complex allocator.
Definition: buffer.c:217
#define FF_ARRAY_ELEMS(a)
#define CUDA_FRAME_ALIGNMENT
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:248
static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:215
const HWContextType ff_hwcontext_type_cuda
uint8_t * data
The data buffer.
Definition: buffer.h:89
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
Definition: error.h:50
void * CUdeviceptr
Definition: nvenc.h:46
This struct is allocated as AVHWDeviceContext.hwctx.
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:117
refcounted data buffer API
AVHWFramesInternal * internal
Private data used internally by libavutil.
Definition: hwcontext.h:127
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:493
static int flags
Definition: cpu.c:47
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:198
A reference to a data buffer.
Definition: buffer.h:81
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:62
common internal and external API header
#define AVERROR_UNKNOWN
Unknown error, typically from an external library.
Definition: error.h:71
AVHWFrameTransferDirection
Definition: hwcontext.h:328
pixel format definitions
AVBufferPool * pool
A pool from which the frames are allocated by av_hwframe_get_buffer().
Definition: hwcontext.h:183
static int cuda_device_create(AVHWDeviceContext *ctx, const char *device, AVDictionary *opts, int flags)
int height
Definition: frame.h:236
AVBufferRef * av_buffer_pool_get(AVBufferPool *pool)
Allocate a new AVBuffer, reusing an old buffer from the pool when available.
Definition: buffer.c:380
#define av_malloc_array(a, b)
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2182
void * CUcontext
Definition: nvenc.h:45
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:215
AVPixelFormat
Pixel format.
Definition: pixfmt.h:60