FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hwcontext_cuda.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "buffer.h"
20 #include "common.h"
21 #include "hwcontext.h"
22 #include "hwcontext_internal.h"
23 #include "hwcontext_cuda.h"
24 #include "mem.h"
25 #include "pixdesc.h"
26 #include "pixfmt.h"
27 
28 typedef struct CUDAFramesContext {
31 
32 static const enum AVPixelFormat supported_formats[] = {
36 };
37 
38 static void cuda_buffer_free(void *opaque, uint8_t *data)
39 {
40  AVHWFramesContext *ctx = opaque;
41  AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
42 
44 
45  cuCtxPushCurrent(hwctx->cuda_ctx);
46 
47  cuMemFree((CUdeviceptr)data);
48 
49  cuCtxPopCurrent(&dummy);
50 }
51 
52 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
53 {
54  AVHWFramesContext *ctx = opaque;
55  AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
56 
57  AVBufferRef *ret = NULL;
60  CUresult err;
61 
62  err = cuCtxPushCurrent(hwctx->cuda_ctx);
63  if (err != CUDA_SUCCESS) {
64  av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
65  return NULL;
66  }
67 
68  err = cuMemAlloc(&data, size);
69  if (err != CUDA_SUCCESS)
70  goto fail;
71 
72  ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
73  if (!ret) {
74  cuMemFree(data);
75  goto fail;
76  }
77 
78 fail:
79  cuCtxPopCurrent(&dummy);
80  return ret;
81 }
82 
84 {
85  CUDAFramesContext *priv = ctx->internal->priv;
86  int i;
87 
88  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
89  if (ctx->sw_format == supported_formats[i])
90  break;
91  }
93  av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
95  return AVERROR(ENOSYS);
96  }
97 
99 
100  if (!ctx->pool) {
101  int size;
102 
103  switch (ctx->sw_format) {
104  case AV_PIX_FMT_NV12:
105  case AV_PIX_FMT_YUV420P:
106  size = ctx->width * ctx->height * 3 / 2;
107  break;
108  case AV_PIX_FMT_YUV444P:
109  size = ctx->width * ctx->height * 3;
110  break;
111  }
112 
114  if (!ctx->internal->pool_internal)
115  return AVERROR(ENOMEM);
116  }
117 
118  return 0;
119 }
120 
122 {
123  frame->buf[0] = av_buffer_pool_get(ctx->pool);
124  if (!frame->buf[0])
125  return AVERROR(ENOMEM);
126 
127  switch (ctx->sw_format) {
128  case AV_PIX_FMT_NV12:
129  frame->data[0] = frame->buf[0]->data;
130  frame->data[1] = frame->data[0] + ctx->width * ctx->height;
131  frame->linesize[0] = ctx->width;
132  frame->linesize[1] = ctx->width;
133  break;
134  case AV_PIX_FMT_YUV420P:
135  frame->data[0] = frame->buf[0]->data;
136  frame->data[2] = frame->data[0] + ctx->width * ctx->height;
137  frame->data[1] = frame->data[2] + ctx->width * ctx->height / 4;
138  frame->linesize[0] = ctx->width;
139  frame->linesize[1] = ctx->width / 2;
140  frame->linesize[2] = ctx->width / 2;
141  break;
142  case AV_PIX_FMT_YUV444P:
143  frame->data[0] = frame->buf[0]->data;
144  frame->data[1] = frame->data[0] + ctx->width * ctx->height;
145  frame->data[2] = frame->data[1] + ctx->width * ctx->height;
146  frame->linesize[0] = ctx->width;
147  frame->linesize[1] = ctx->width;
148  frame->linesize[2] = ctx->width;
149  break;
150  default:
151  av_frame_unref(frame);
152  return AVERROR_BUG;
153  }
154 
155  frame->format = AV_PIX_FMT_CUDA;
156  frame->width = ctx->width;
157  frame->height = ctx->height;
158 
159  return 0;
160 }
161 
164  enum AVPixelFormat **formats)
165 {
166  enum AVPixelFormat *fmts;
167 
168  fmts = av_malloc_array(2, sizeof(*fmts));
169  if (!fmts)
170  return AVERROR(ENOMEM);
171 
172  fmts[0] = ctx->sw_format;
173  fmts[1] = AV_PIX_FMT_NONE;
174 
175  *formats = fmts;
176 
177  return 0;
178 }
179 
181  const AVFrame *src)
182 {
183  CUDAFramesContext *priv = ctx->internal->priv;
184  AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
185 
187  CUresult err;
188  int i;
189 
190  err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
191  if (err != CUDA_SUCCESS)
192  return AVERROR_UNKNOWN;
193 
194  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
195  CUDA_MEMCPY2D cpy = {
196  .srcMemoryType = CU_MEMORYTYPE_DEVICE,
197  .dstMemoryType = CU_MEMORYTYPE_HOST,
198  .srcDevice = (CUdeviceptr)src->data[i],
199  .dstHost = dst->data[i],
200  .srcPitch = src->linesize[i],
201  .dstPitch = dst->linesize[i],
202  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
203  .Height = src->height >> (i ? priv->shift_height : 0),
204  };
205 
206  err = cuMemcpy2D(&cpy);
207  if (err != CUDA_SUCCESS) {
208  av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
209  return AVERROR_UNKNOWN;
210  }
211  }
212 
213  cuCtxPopCurrent(&dummy);
214 
215  return 0;
216 }
217 
219  const AVFrame *src)
220 {
221  CUDAFramesContext *priv = ctx->internal->priv;
222  AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
223 
225  CUresult err;
226  int i;
227 
228  err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
229  if (err != CUDA_SUCCESS)
230  return AVERROR_UNKNOWN;
231 
232  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
233  CUDA_MEMCPY2D cpy = {
234  .srcMemoryType = CU_MEMORYTYPE_HOST,
235  .dstMemoryType = CU_MEMORYTYPE_DEVICE,
236  .srcHost = src->data[i],
237  .dstDevice = (CUdeviceptr)dst->data[i],
238  .srcPitch = src->linesize[i],
239  .dstPitch = dst->linesize[i],
240  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
241  .Height = src->height >> (i ? priv->shift_height : 0),
242  };
243 
244  err = cuMemcpy2D(&cpy);
245  if (err != CUDA_SUCCESS) {
246  av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
247  return AVERROR_UNKNOWN;
248  }
249  }
250 
251  cuCtxPopCurrent(&dummy);
252 
253  return 0;
254 }
255 
257 {
258  AVCUDADeviceContext *hwctx = ctx->hwctx;
259  cuCtxDestroy(hwctx->cuda_ctx);
260 }
261 
262 static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
263  AVDictionary *opts, int flags)
264 {
265  AVCUDADeviceContext *hwctx = ctx->hwctx;
266  CUdevice cu_device;
268  CUresult err;
269  int device_idx = 0;
270 
271  if (device)
272  device_idx = strtol(device, NULL, 0);
273 
274  err = cuInit(0);
275  if (err != CUDA_SUCCESS) {
276  av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
277  return AVERROR_UNKNOWN;
278  }
279 
280  err = cuDeviceGet(&cu_device, device_idx);
281  if (err != CUDA_SUCCESS) {
282  av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
283  return AVERROR_UNKNOWN;
284  }
285 
286  err = cuCtxCreate(&hwctx->cuda_ctx, 0, cu_device);
287  if (err != CUDA_SUCCESS) {
288  av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
289  return AVERROR_UNKNOWN;
290  }
291 
292  cuCtxPopCurrent(&dummy);
293 
294  ctx->free = cuda_device_free;
295 
296  return 0;
297 }
298 
301  .name = "CUDA",
302 
303  .device_hwctx_size = sizeof(AVCUDADeviceContext),
304  .frames_priv_size = sizeof(CUDAFramesContext),
305 
306  .device_create = cuda_device_create,
307  .frames_init = cuda_frames_init,
308  .frames_get_buffer = cuda_get_buffer,
309  .transfer_get_formats = cuda_transfer_get_formats,
310  .transfer_data_to = cuda_transfer_data_to,
311  .transfer_data_from = cuda_transfer_data_from,
312 
313  .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
314 };
This struct aggregates all the (hardware/vendor-specific) "high-level" state, i.e.
Definition: hwcontext.h:53
#define NULL
Definition: coverity.c:32
This structure describes decoded (raw) audio or video data.
Definition: frame.h:184
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:101
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:67
memory handling functions
AVBufferRef * buf[AV_NUM_DATA_POINTERS]
AVBuffer references backing the data for this frame.
Definition: frame.h:363
int CUdevice
Definition: nvenc.h:44
static enum AVSampleFormat formats[]
Definition: avresample.c:163
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:221
static int cuda_frames_init(AVHWFramesContext *ctx)
AVBufferPool * pool_internal
enum AVHWDeviceType type
uint8_t
static enum AVPixelFormat supported_formats[]
static AVFrame * frame
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:84
static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
ptrdiff_t size
Definition: opengl_enc.c:101
#define av_log(a,...)
An API-specific header for AV_HWDEVICE_TYPE_CUDA.
static void cuda_buffer_free(void *opaque, uint8_t *data)
int width
width and height of the video frame
Definition: frame.h:236
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
void(* free)(struct AVHWDeviceContext *ctx)
This field may be set by the caller before calling av_hwdevice_ctx_init().
Definition: hwcontext.h:96
#define AVERROR(e)
Definition: error.h:43
int av_pix_fmt_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift)
Utility function to access log2_chroma_w log2_chroma_h from the pixel format AVPixFmtDescriptor.
Definition: pixdesc.c:2250
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:90
AVBufferRef * av_buffer_create(uint8_t *data, int size, void(*free)(void *opaque, uint8_t *data), void *opaque, int flags)
Create an AVBuffer from an existing array.
Definition: buffer.c:28
#define fail()
Definition: checkasm.h:81
AVDictionary * opts
Definition: movenc.c:50
static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
#define FFMIN(a, b)
Definition: common.h:96
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:141
static int cuda_transfer_get_formats(AVHWFramesContext *ctx, enum AVHWFrameTransferDirection dir, enum AVPixelFormat **formats)
static AVBufferRef * cuda_pool_alloc(void *opaque, int size)
static void cuda_device_free(AVHWDeviceContext *ctx)
AVFormatContext * ctx
Definition: movenc.c:48
CUresult
Definition: nvenc.h:41
int dummy
Definition: motion.c:64
#define src
Definition: vp9dsp.c:530
HW acceleration through CUDA.
Definition: pixfmt.h:248
AVBufferPool * av_buffer_pool_init2(int size, void *opaque, AVBufferRef *(*alloc)(void *opaque, int size), void(*pool_free)(void *opaque))
Allocate and initialize a buffer pool with a more complex allocator.
Definition: buffer.c:217
#define FF_ARRAY_ELEMS(a)
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:248
static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:215
const HWContextType ff_hwcontext_type_cuda
uint8_t * data
The data buffer.
Definition: buffer.h:89
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
Definition: error.h:50
void * CUdeviceptr
Definition: nvenc.h:46
This struct is allocated as AVHWDeviceContext.hwctx.
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:116
refcounted data buffer API
AVHWFramesInternal * internal
Private data used internally by libavutil.
Definition: hwcontext.h:126
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:484
static int flags
Definition: cpu.c:47
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:198
A reference to a data buffer.
Definition: buffer.h:81
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:62
common internal and external API header
#define AVERROR_UNKNOWN
Unknown error, typically from an external library.
Definition: error.h:71
AVHWFrameTransferDirection
Definition: hwcontext.h:327
pixel format definitions
AVBufferPool * pool
A pool from which the frames are allocated by av_hwframe_get_buffer().
Definition: hwcontext.h:182
static int cuda_device_create(AVHWDeviceContext *ctx, const char *device, AVDictionary *opts, int flags)
int height
Definition: frame.h:236
AVBufferRef * av_buffer_pool_get(AVBufferPool *pool)
Allocate a new AVBuffer, reusing an old buffer from the pool when available.
Definition: buffer.c:380
#define av_malloc_array(a, b)
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2138
void * CUcontext
Definition: nvenc.h:45
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:214
AVPixelFormat
Pixel format.
Definition: pixfmt.h:60