FFmpeg
yuv2yuv_altivec.c
Go to the documentation of this file.
1 /*
2  * AltiVec-enhanced yuv-to-yuv conversion routines.
3  *
4  * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  * based on the equivalent C code in swscale.c
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include <inttypes.h>
25 
26 #include "config.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/cpu.h"
29 #include "libswscale/swscale.h"
31 
32 #if HAVE_ALTIVEC
34 
35 static int yv12toyuy2_unscaled_altivec(SwsInternal *c, const uint8_t *const src[],
36  const int srcStride[], int srcSliceY,
37  int srcSliceH, uint8_t *const dstParam[],
38  const int dstStride_a[])
39 {
40  uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
41  // yv12toyuy2(src[0], src[1], src[2], dst, c->opts.src_w, srcSliceH,
42  // srcStride[0], srcStride[1], dstStride[0]);
43  const uint8_t *ysrc = src[0];
44  const uint8_t *usrc = src[1];
45  const uint8_t *vsrc = src[2];
46  const int width = c->opts.src_w;
47  const int height = srcSliceH;
48  const int lumStride = srcStride[0];
49  const int chromStride = srcStride[1];
50  const int dstStride = dstStride_a[0];
51  const vector unsigned char yperm = vec_lvsl(0, ysrc);
52  const int vertLumPerChroma = 2;
53  register unsigned int y;
54 
55  /* This code assumes:
56  *
57  * 1) dst is 16 bytes-aligned
58  * 2) dstStride is a multiple of 16
59  * 3) width is a multiple of 16
60  * 4) lum & chrom stride are multiples of 8
61  */
62 
63  for (y = 0; y < height; y++) {
64  int i;
65  for (i = 0; i < width - 31; i += 32) {
66  const unsigned int j = i >> 1;
67  vector unsigned char v_yA = vec_ld(i, ysrc);
68  vector unsigned char v_yB = vec_ld(i + 16, ysrc);
69  vector unsigned char v_yC = vec_ld(i + 32, ysrc);
70  vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
71  vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
72  vector unsigned char v_uA = vec_ld(j, usrc);
73  vector unsigned char v_uB = vec_ld(j + 16, usrc);
74  vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
75  vector unsigned char v_vA = vec_ld(j, vsrc);
76  vector unsigned char v_vB = vec_ld(j + 16, vsrc);
77  vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
78  vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
79  vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
80  vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
81  vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
82  vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
83  vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
84  vec_st(v_yuy2_0, (i << 1), dst);
85  vec_st(v_yuy2_1, (i << 1) + 16, dst);
86  vec_st(v_yuy2_2, (i << 1) + 32, dst);
87  vec_st(v_yuy2_3, (i << 1) + 48, dst);
88  }
89  if (i < width) {
90  const unsigned int j = i >> 1;
91  vector unsigned char v_y1 = vec_ld(i, ysrc);
92  vector unsigned char v_u = vec_ld(j, usrc);
93  vector unsigned char v_v = vec_ld(j, vsrc);
94  vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
95  vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
96  vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
97  vec_st(v_yuy2_0, (i << 1), dst);
98  vec_st(v_yuy2_1, (i << 1) + 16, dst);
99  }
100  if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
101  usrc += chromStride;
102  vsrc += chromStride;
103  }
104  ysrc += lumStride;
105  dst += dstStride;
106  }
107 
108  return srcSliceH;
109 }
110 
111 static int yv12touyvy_unscaled_altivec(SwsInternal *c, const uint8_t *const src[],
112  const int srcStride[], int srcSliceY,
113  int srcSliceH, uint8_t *const dstParam[],
114  const int dstStride_a[])
115 {
116  uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
117  // yv12toyuy2(src[0], src[1], src[2], dst, c->opts.src_w, srcSliceH,
118  // srcStride[0], srcStride[1], dstStride[0]);
119  const uint8_t *ysrc = src[0];
120  const uint8_t *usrc = src[1];
121  const uint8_t *vsrc = src[2];
122  const int width = c->opts.src_w;
123  const int height = srcSliceH;
124  const int lumStride = srcStride[0];
125  const int chromStride = srcStride[1];
126  const int dstStride = dstStride_a[0];
127  const int vertLumPerChroma = 2;
128  const vector unsigned char yperm = vec_lvsl(0, ysrc);
129  register unsigned int y;
130 
131  /* This code assumes:
132  *
133  * 1) dst is 16 bytes-aligned
134  * 2) dstStride is a multiple of 16
135  * 3) width is a multiple of 16
136  * 4) lum & chrom stride are multiples of 8
137  */
138 
139  for (y = 0; y < height; y++) {
140  int i;
141  for (i = 0; i < width - 31; i += 32) {
142  const unsigned int j = i >> 1;
143  vector unsigned char v_yA = vec_ld(i, ysrc);
144  vector unsigned char v_yB = vec_ld(i + 16, ysrc);
145  vector unsigned char v_yC = vec_ld(i + 32, ysrc);
146  vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
147  vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
148  vector unsigned char v_uA = vec_ld(j, usrc);
149  vector unsigned char v_uB = vec_ld(j + 16, usrc);
150  vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
151  vector unsigned char v_vA = vec_ld(j, vsrc);
152  vector unsigned char v_vB = vec_ld(j + 16, vsrc);
153  vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
154  vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
155  vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
156  vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
157  vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
158  vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
159  vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
160  vec_st(v_uyvy_0, (i << 1), dst);
161  vec_st(v_uyvy_1, (i << 1) + 16, dst);
162  vec_st(v_uyvy_2, (i << 1) + 32, dst);
163  vec_st(v_uyvy_3, (i << 1) + 48, dst);
164  }
165  if (i < width) {
166  const unsigned int j = i >> 1;
167  vector unsigned char v_y1 = vec_ld(i, ysrc);
168  vector unsigned char v_u = vec_ld(j, usrc);
169  vector unsigned char v_v = vec_ld(j, vsrc);
170  vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
171  vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
172  vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
173  vec_st(v_uyvy_0, (i << 1), dst);
174  vec_st(v_uyvy_1, (i << 1) + 16, dst);
175  }
176  if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
177  usrc += chromStride;
178  vsrc += chromStride;
179  }
180  ysrc += lumStride;
181  dst += dstStride;
182  }
183  return srcSliceH;
184 }
185 
186 #endif /* HAVE_ALTIVEC */
187 
189 {
190 #if HAVE_ALTIVEC
192  return;
193 
194  if (!(c->opts.src_w & 15) && !(c->opts.flags & SWS_BITEXACT) &&
195  c->opts.src_format == AV_PIX_FMT_YUV420P) {
196  enum AVPixelFormat dstFormat = c->opts.dst_format;
197 
198  // unscaled YV12 -> packed YUV, we want speed
199  if (dstFormat == AV_PIX_FMT_YUYV422)
200  c->convert_unscaled = yv12toyuy2_unscaled_altivec;
201  else if (dstFormat == AV_PIX_FMT_UYVY422)
202  c->convert_unscaled = yv12touyvy_unscaled_altivec;
203  }
204 #endif /* HAVE_ALTIVEC */
205 }
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
ff_get_unscaled_swscale_ppc
av_cold void ff_get_unscaled_swscale_ppc(SwsInternal *c)
Definition: yuv2yuv_altivec.c:188
SWS_BITEXACT
@ SWS_BITEXACT
Definition: swscale.h:158
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
av_cold
#define av_cold
Definition: attributes.h:106
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:74
AV_CPU_FLAG_ALTIVEC
#define AV_CPU_FLAG_ALTIVEC
standard
Definition: cpu.h:64
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
height
#define height
Definition: dsp.h:89
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
cpu.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
attributes.h
swscale_internal.h
SwsInternal
Definition: swscale_internal.h:334
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:88
util_altivec.h
width
#define width
Definition: dsp.h:89
src
#define src
Definition: vp8dsp.c:248
swscale.h