FFmpeg
slice.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/mem.h"
22 #include "swscale_internal.h"
23 
24 static void free_lines(SwsSlice *s)
25 {
26  int i;
27  for (i = 0; i < 2; ++i) {
28  int n = s->plane[i].available_lines;
29  int j;
30  for (j = 0; j < n; ++j) {
31  av_freep(&s->plane[i].line[j]);
32  if (s->is_ring)
33  s->plane[i].line[j+n] = NULL;
34  }
35  }
36 
37  for (i = 0; i < 4; ++i)
38  memset(s->plane[i].line, 0, sizeof(uint8_t*) * s->plane[i].available_lines * (s->is_ring ? 3 : 1));
39  s->should_free_lines = 0;
40 }
41 
42 /*
43  slice lines contains extra bytes for vectorial code thus @size
44  is the allocated memory size and @width is the number of pixels
45 */
46 static int alloc_lines(SwsSlice *s, int size, int width)
47 {
48  int i;
49  int idx[2] = {3, 2};
50 
51  s->should_free_lines = 1;
52  s->width = width;
53 
54  for (i = 0; i < 2; ++i) {
55  int n = s->plane[i].available_lines;
56  int j;
57  int ii = idx[i];
58 
59  av_assert0(n == s->plane[ii].available_lines);
60  for (j = 0; j < n; ++j) {
61  // chroma plane line U and V are expected to be contiguous in memory
62  // by mmx vertical scaler code
63  s->plane[i].line[j] = av_malloc(size * 2 + 32);
64  if (!s->plane[i].line[j]) {
65  free_lines(s);
66  return AVERROR(ENOMEM);
67  }
68  s->plane[ii].line[j] = s->plane[i].line[j] + size + 16;
69  if (s->is_ring) {
70  s->plane[i].line[j+n] = s->plane[i].line[j];
71  s->plane[ii].line[j+n] = s->plane[ii].line[j];
72  }
73  }
74  }
75 
76  return 0;
77 }
78 
79 static int alloc_slice(SwsSlice *s, enum AVPixelFormat fmt, int lumLines, int chrLines, int h_sub_sample, int v_sub_sample, int ring)
80 {
81  int i;
82  int size[4] = { lumLines,
83  chrLines,
84  chrLines,
85  lumLines };
86 
87  s->h_chr_sub_sample = h_sub_sample;
88  s->v_chr_sub_sample = v_sub_sample;
89  s->fmt = fmt;
90  s->is_ring = ring;
91  s->should_free_lines = 0;
92 
93  for (i = 0; i < 4; ++i) {
94  int n = size[i] * ( ring == 0 ? 1 : 3);
95  s->plane[i].line = av_calloc(n, sizeof(*s->plane[i].line));
96  if (!s->plane[i].line)
97  return AVERROR(ENOMEM);
98 
99  s->plane[i].tmp = ring ? s->plane[i].line + size[i] * 2 : NULL;
100  s->plane[i].available_lines = size[i];
101  s->plane[i].sliceY = 0;
102  s->plane[i].sliceH = 0;
103  }
104  return 0;
105 }
106 
107 static void free_slice(SwsSlice *s)
108 {
109  int i;
110  if (s) {
111  if (s->should_free_lines)
112  free_lines(s);
113  for (i = 0; i < 4; ++i) {
114  av_freep(&s->plane[i].line);
115  s->plane[i].tmp = NULL;
116  }
117  }
118 }
119 
120 int ff_rotate_slice(SwsSlice *s, int lum, int chr)
121 {
122  int i;
123  if (lum) {
124  for (i = 0; i < 4; i+=3) {
125  int n = s->plane[i].available_lines;
126  int l = lum - s->plane[i].sliceY;
127 
128  if (l >= n * 2) {
129  s->plane[i].sliceY += n;
130  s->plane[i].sliceH -= n;
131  }
132  }
133  }
134  if (chr) {
135  for (i = 1; i < 3; ++i) {
136  int n = s->plane[i].available_lines;
137  int l = chr - s->plane[i].sliceY;
138 
139  if (l >= n * 2) {
140  s->plane[i].sliceY += n;
141  s->plane[i].sliceH -= n;
142  }
143  }
144  }
145  return 0;
146 }
147 
148 int ff_init_slice_from_src(SwsSlice * s, uint8_t *src[4], int stride[4], int srcW, int lumY, int lumH, int chrY, int chrH, int relative)
149 {
150  int i = 0;
151 
152  const int start[4] = {lumY,
153  chrY,
154  chrY,
155  lumY};
156 
157  const int end[4] = {lumY +lumH,
158  chrY + chrH,
159  chrY + chrH,
160  lumY + lumH};
161 
162  s->width = srcW;
163 
164  for (i = 0; i < 4 && src[i] != NULL; ++i) {
165  uint8_t *const src_i = src[i] + (relative ? 0 : start[i]) * stride[i];
166  int j;
167  int first = s->plane[i].sliceY;
168  int n = s->plane[i].available_lines;
169  int lines = end[i] - start[i];
170  int tot_lines = end[i] - first;
171 
172  if (start[i] >= first && n >= tot_lines) {
173  s->plane[i].sliceH = FFMAX(tot_lines, s->plane[i].sliceH);
174  for (j = 0; j < lines; j+= 1)
175  s->plane[i].line[start[i] - first + j] = src_i + j * stride[i];
176  } else {
177  s->plane[i].sliceY = start[i];
178  lines = lines > n ? n : lines;
179  s->plane[i].sliceH = lines;
180  for (j = 0; j < lines; j+= 1)
181  s->plane[i].line[j] = src_i + j * stride[i];
182  }
183 
184  }
185 
186  return 0;
187 }
188 
189 static void fill_ones(SwsSlice *s, int n, int bpc)
190 {
191  int i, j, k, size, end;
192 
193  for (i = 0; i < 4; ++i) {
194  size = s->plane[i].available_lines;
195  for (j = 0; j < size; ++j) {
196  if (bpc == 16) {
197  end = (n>>1) + 1;
198  for (k = 0; k < end; ++k)
199  ((int32_t*)(s->plane[i].line[j]))[k] = 1<<18;
200  } else if (bpc == 32) {
201  end = (n>>2) + 1;
202  for (k = 0; k < end; ++k)
203  ((int64_t*)(s->plane[i].line[j]))[k] = 1LL<<34;
204  } else {
205  end = n + 1;
206  for (k = 0; k < end; ++k)
207  ((int16_t*)(s->plane[i].line[j]))[k] = 1<<14;
208  }
209  }
210  }
211 }
212 
213 /*
214  Calculates the minimum ring buffer size, it should be able to store vFilterSize
215  more n lines where n is the max difference between each adjacent slice which
216  outputs a line.
217  The n lines are needed only when there is not enough src lines to output a single
218  dst line, then we should buffer these lines to process them on the next call to scale.
219 */
220 static void get_min_buffer_size(SwsContext *c, int *out_lum_size, int *out_chr_size)
221 {
222  int lumY;
223  int dstH = c->dstH;
224  int chrDstH = c->chrDstH;
225  int *lumFilterPos = c->vLumFilterPos;
226  int *chrFilterPos = c->vChrFilterPos;
227  int lumFilterSize = c->vLumFilterSize;
228  int chrFilterSize = c->vChrFilterSize;
229  int chrSubSample = c->chrSrcVSubSample;
230 
231  *out_lum_size = lumFilterSize;
232  *out_chr_size = chrFilterSize;
233 
234  for (lumY = 0; lumY < dstH; lumY++) {
235  int chrY = (int64_t)lumY * chrDstH / dstH;
236  int nextSlice = FFMAX(lumFilterPos[lumY] + lumFilterSize - 1,
237  ((chrFilterPos[chrY] + chrFilterSize - 1)
238  << chrSubSample));
239 
240  nextSlice >>= chrSubSample;
241  nextSlice <<= chrSubSample;
242  (*out_lum_size) = FFMAX((*out_lum_size), nextSlice - lumFilterPos[lumY]);
243  (*out_chr_size) = FFMAX((*out_chr_size), (nextSlice >> chrSubSample) - chrFilterPos[chrY]);
244  }
245 }
246 
247 
248 
250 {
251  int i;
252  int index;
253  int num_ydesc;
254  int num_cdesc;
255  int num_vdesc = isPlanarYUV(c->dstFormat) && !isGray(c->dstFormat) ? 2 : 1;
256  int need_lum_conv = c->lumToYV12 || c->readLumPlanar || c->alpToYV12 || c->readAlpPlanar;
257  int need_chr_conv = c->chrToYV12 || c->readChrPlanar;
258  int need_gamma = c->is_internal_gamma;
259  int srcIdx, dstIdx;
260  int dst_stride = FFALIGN(c->dstW * sizeof(int16_t) + 66, 16);
261 
262  uint32_t * pal = usePal(c->srcFormat) ? c->pal_yuv : (uint32_t*)c->input_rgb2yuv_table;
263  int res = 0;
264 
265  int lumBufSize;
266  int chrBufSize;
267 
268  get_min_buffer_size(c, &lumBufSize, &chrBufSize);
269  lumBufSize = FFMAX(lumBufSize, c->vLumFilterSize + MAX_LINES_AHEAD);
270  chrBufSize = FFMAX(chrBufSize, c->vChrFilterSize + MAX_LINES_AHEAD);
271 
272  if (c->dstBpc == 16)
273  dst_stride <<= 1;
274 
275  if (c->dstBpc == 32)
276  dst_stride <<= 2;
277 
278  num_ydesc = need_lum_conv ? 2 : 1;
279  num_cdesc = need_chr_conv ? 2 : 1;
280 
281  c->numSlice = FFMAX(num_ydesc, num_cdesc) + 2;
282  c->numDesc = num_ydesc + num_cdesc + num_vdesc + (need_gamma ? 2 : 0);
283  c->descIndex[0] = num_ydesc + (need_gamma ? 1 : 0);
284  c->descIndex[1] = num_ydesc + num_cdesc + (need_gamma ? 1 : 0);
285 
286  if (isFloat16(c->srcFormat)) {
287  c->h2f_tables = av_malloc(sizeof(*c->h2f_tables));
288  if (!c->h2f_tables)
289  return AVERROR(ENOMEM);
290  ff_init_half2float_tables(c->h2f_tables);
291  c->input_opaque = c->h2f_tables;
292  }
293 
294  c->desc = av_calloc(c->numDesc, sizeof(*c->desc));
295  if (!c->desc)
296  return AVERROR(ENOMEM);
297  c->slice = av_calloc(c->numSlice, sizeof(*c->slice));
298  if (!c->slice) {
299  res = AVERROR(ENOMEM);
300  goto cleanup;
301  }
302 
303  res = alloc_slice(&c->slice[0], c->srcFormat, c->srcH, c->chrSrcH, c->chrSrcHSubSample, c->chrSrcVSubSample, 0);
304  if (res < 0) goto cleanup;
305  for (i = 1; i < c->numSlice-2; ++i) {
306  res = alloc_slice(&c->slice[i], c->srcFormat, lumBufSize, chrBufSize, c->chrSrcHSubSample, c->chrSrcVSubSample, 0);
307  if (res < 0) goto cleanup;
308  res = alloc_lines(&c->slice[i], FFALIGN(c->srcW*2+78, 16), c->srcW);
309  if (res < 0) goto cleanup;
310  }
311  // horizontal scaler output
312  res = alloc_slice(&c->slice[i], c->srcFormat, lumBufSize, chrBufSize, c->chrDstHSubSample, c->chrDstVSubSample, 1);
313  if (res < 0) goto cleanup;
314  res = alloc_lines(&c->slice[i], dst_stride, c->dstW);
315  if (res < 0) goto cleanup;
316 
317  fill_ones(&c->slice[i], dst_stride>>1, c->dstBpc);
318 
319  // vertical scaler output
320  ++i;
321  res = alloc_slice(&c->slice[i], c->dstFormat, c->dstH, c->chrDstH, c->chrDstHSubSample, c->chrDstVSubSample, 0);
322  if (res < 0) goto cleanup;
323 
324  index = 0;
325  srcIdx = 0;
326  dstIdx = 1;
327 
328  if (need_gamma) {
329  res = ff_init_gamma_convert(c->desc + index, c->slice + srcIdx, c->inv_gamma);
330  if (res < 0) goto cleanup;
331  ++index;
332  }
333 
334  if (need_lum_conv) {
335  res = ff_init_desc_fmt_convert(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], pal);
336  if (res < 0) goto cleanup;
337  c->desc[index].alpha = c->needAlpha;
338  ++index;
339  srcIdx = dstIdx;
340  }
341 
342 
343  dstIdx = FFMAX(num_ydesc, num_cdesc);
344  res = ff_init_desc_hscale(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], c->hLumFilter, c->hLumFilterPos, c->hLumFilterSize, c->lumXInc);
345  if (res < 0) goto cleanup;
346  c->desc[index].alpha = c->needAlpha;
347 
348 
349  ++index;
350  {
351  srcIdx = 0;
352  dstIdx = 1;
353  if (need_chr_conv) {
354  res = ff_init_desc_cfmt_convert(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], pal);
355  if (res < 0) goto cleanup;
356  ++index;
357  srcIdx = dstIdx;
358  }
359 
360  dstIdx = FFMAX(num_ydesc, num_cdesc);
361  if (c->needs_hcscale)
362  res = ff_init_desc_chscale(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], c->hChrFilter, c->hChrFilterPos, c->hChrFilterSize, c->chrXInc);
363  else
364  res = ff_init_desc_no_chr(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx]);
365  if (res < 0) goto cleanup;
366  }
367 
368  ++index;
369  {
370  srcIdx = c->numSlice - 2;
371  dstIdx = c->numSlice - 1;
372  res = ff_init_vscale(c, c->desc + index, c->slice + srcIdx, c->slice + dstIdx);
373  if (res < 0) goto cleanup;
374  }
375 
376  ++index;
377  if (need_gamma) {
378  res = ff_init_gamma_convert(c->desc + index, c->slice + dstIdx, c->gamma);
379  if (res < 0) goto cleanup;
380  }
381 
382  return 0;
383 
384 cleanup:
386  return res;
387 }
388 
390 {
391  int i;
392  if (c->desc) {
393  for (i = 0; i < c->numDesc; ++i)
394  av_freep(&c->desc[i].instance);
395  av_freep(&c->desc);
396  }
397 
398  if (c->slice) {
399  for (i = 0; i < c->numSlice; ++i)
400  free_slice(&c->slice[i]);
401  av_freep(&c->slice);
402  }
403  av_freep(&c->h2f_tables);
404  return 0;
405 }
ff_init_desc_cfmt_convert
int ff_init_desc_cfmt_convert(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint32_t *pal)
initializes chr pixel format conversion descriptor
Definition: hscale.c:236
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
fill_ones
static void fill_ones(SwsSlice *s, int n, int bpc)
Definition: slice.c:189
get_min_buffer_size
static void get_min_buffer_size(SwsContext *c, int *out_lum_size, int *out_chr_size)
Definition: slice.c:220
ff_rotate_slice
int ff_rotate_slice(SwsSlice *s, int lum, int chr)
Definition: slice.c:120
int64_t
long long int64_t
Definition: coverity.c:34
cleanup
static av_cold void cleanup(FlashSV2Context *s)
Definition: flashsv2enc.c:130
ff_init_desc_hscale
int ff_init_desc_hscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int *filter_pos, int filter_size, int xInc)
initializes lum horizontal scaling descriptor
Definition: hscale.c:145
isGray
#define isGray(x)
Definition: swscale.c:42
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
ff_init_desc_no_chr
int ff_init_desc_no_chr(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst)
Definition: hscale.c:282
ff_init_filters
int ff_init_filters(SwsContext *c)
Definition: slice.c:249
first
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
Definition: rate_distortion.txt:12
s
#define s(width, name)
Definition: cbs_vp9.c:198
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
NULL
#define NULL
Definition: coverity.c:32
isFloat16
static av_always_inline int isFloat16(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:850
ff_init_desc_chscale
int ff_init_desc_chscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int *filter_pos, int filter_size, int xInc)
initializes chr horizontal scaling descriptor
Definition: hscale.c:251
index
int index
Definition: gxfenc.c:90
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
free_lines
static void free_lines(SwsSlice *s)
Definition: slice.c:24
alloc_lines
static int alloc_lines(SwsSlice *s, int size, int width)
Definition: slice.c:46
usePal
static av_always_inline int usePal(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:897
size
int size
Definition: twinvq_data.h:10344
free_slice
static void free_slice(SwsSlice *s)
Definition: slice.c:107
alloc_slice
static int alloc_slice(SwsSlice *s, enum AVPixelFormat fmt, int lumLines, int chrLines, int h_sub_sample, int v_sub_sample, int ring)
Definition: slice.c:79
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
ff_init_gamma_convert
int ff_init_gamma_convert(SwsFilterDescriptor *desc, SwsSlice *src, uint16_t *table)
initializes gamma conversion descriptor
Definition: gamma.c:59
ff_free_filters
int ff_free_filters(SwsContext *c)
Definition: slice.c:389
swscale_internal.h
SwsSlice
Struct which defines a slice of an image to be scaled or an output for a scaled slice.
Definition: swscale_internal.h:1068
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
stride
#define stride
Definition: h264pred_template.c:537
ff_init_slice_from_src
int ff_init_slice_from_src(SwsSlice *s, uint8_t *src[4], int stride[4], int srcW, int lumY, int lumH, int chrY, int chrH, int relative)
Definition: slice.c:148
ff_init_half2float_tables
void ff_init_half2float_tables(Half2FloatTables *t)
Definition: half2float.c:39
isPlanarYUV
static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
Definition: vf_dnn_processing.c:162
mem.h
ff_init_vscale
int ff_init_vscale(SwsContext *c, SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst)
initializes vertical scaling descriptors
Definition: vscale.c:214
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
lum
static double lum(void *priv, double x, double y, int plane)
Definition: vf_fftfilt.c:107
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
ff_init_desc_fmt_convert
int ff_init_desc_fmt_convert(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint32_t *pal)
initializes lum pixel format conversion descriptor
Definition: hscale.c:128
int32_t
int32_t
Definition: audioconvert.c:56
width
#define width
Definition: dsp.h:85
SwsContext
Definition: swscale_internal.h:299
MAX_LINES_AHEAD
#define MAX_LINES_AHEAD
Definition: swscale_internal.h:1142
src
#define src
Definition: vp8dsp.c:248