FFmpeg
yuv2yuv_altivec.c
Go to the documentation of this file.
1 /*
2  * AltiVec-enhanced yuv-to-yuv conversion routines.
3  *
4  * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5  * based on the equivalent C code in swscale.c
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include <inttypes.h>
25 
26 #include "config.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/cpu.h"
29 #include "libswscale/swscale.h"
31 
32 #if HAVE_ALTIVEC
33 
34 static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t *src[],
35  int srcStride[], int srcSliceY,
36  int srcSliceH, uint8_t *dstParam[],
37  int dstStride_a[])
38 {
39  uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
40  // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
41  // srcStride[0], srcStride[1], dstStride[0]);
42  const uint8_t *ysrc = src[0];
43  const uint8_t *usrc = src[1];
44  const uint8_t *vsrc = src[2];
45  const int width = c->srcW;
46  const int height = srcSliceH;
47  const int lumStride = srcStride[0];
48  const int chromStride = srcStride[1];
49  const int dstStride = dstStride_a[0];
50  const vector unsigned char yperm = vec_lvsl(0, ysrc);
51  const int vertLumPerChroma = 2;
52  register unsigned int y;
53 
54  /* This code assumes:
55  *
56  * 1) dst is 16 bytes-aligned
57  * 2) dstStride is a multiple of 16
58  * 3) width is a multiple of 16
59  * 4) lum & chrom stride are multiples of 8
60  */
61 
62  for (y = 0; y < height; y++) {
63  int i;
64  for (i = 0; i < width - 31; i += 32) {
65  const unsigned int j = i >> 1;
66  vector unsigned char v_yA = vec_ld(i, ysrc);
67  vector unsigned char v_yB = vec_ld(i + 16, ysrc);
68  vector unsigned char v_yC = vec_ld(i + 32, ysrc);
69  vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
70  vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
71  vector unsigned char v_uA = vec_ld(j, usrc);
72  vector unsigned char v_uB = vec_ld(j + 16, usrc);
73  vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
74  vector unsigned char v_vA = vec_ld(j, vsrc);
75  vector unsigned char v_vB = vec_ld(j + 16, vsrc);
76  vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
77  vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
78  vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
79  vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
80  vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
81  vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
82  vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
83  vec_st(v_yuy2_0, (i << 1), dst);
84  vec_st(v_yuy2_1, (i << 1) + 16, dst);
85  vec_st(v_yuy2_2, (i << 1) + 32, dst);
86  vec_st(v_yuy2_3, (i << 1) + 48, dst);
87  }
88  if (i < width) {
89  const unsigned int j = i >> 1;
90  vector unsigned char v_y1 = vec_ld(i, ysrc);
91  vector unsigned char v_u = vec_ld(j, usrc);
92  vector unsigned char v_v = vec_ld(j, vsrc);
93  vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
94  vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
95  vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
96  vec_st(v_yuy2_0, (i << 1), dst);
97  vec_st(v_yuy2_1, (i << 1) + 16, dst);
98  }
99  if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
100  usrc += chromStride;
101  vsrc += chromStride;
102  }
103  ysrc += lumStride;
104  dst += dstStride;
105  }
106 
107  return srcSliceH;
108 }
109 
110 static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t *src[],
111  int srcStride[], int srcSliceY,
112  int srcSliceH, uint8_t *dstParam[],
113  int dstStride_a[])
114 {
115  uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
116  // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
117  // srcStride[0], srcStride[1], dstStride[0]);
118  const uint8_t *ysrc = src[0];
119  const uint8_t *usrc = src[1];
120  const uint8_t *vsrc = src[2];
121  const int width = c->srcW;
122  const int height = srcSliceH;
123  const int lumStride = srcStride[0];
124  const int chromStride = srcStride[1];
125  const int dstStride = dstStride_a[0];
126  const int vertLumPerChroma = 2;
127  const vector unsigned char yperm = vec_lvsl(0, ysrc);
128  register unsigned int y;
129 
130  /* This code assumes:
131  *
132  * 1) dst is 16 bytes-aligned
133  * 2) dstStride is a multiple of 16
134  * 3) width is a multiple of 16
135  * 4) lum & chrom stride are multiples of 8
136  */
137 
138  for (y = 0; y < height; y++) {
139  int i;
140  for (i = 0; i < width - 31; i += 32) {
141  const unsigned int j = i >> 1;
142  vector unsigned char v_yA = vec_ld(i, ysrc);
143  vector unsigned char v_yB = vec_ld(i + 16, ysrc);
144  vector unsigned char v_yC = vec_ld(i + 32, ysrc);
145  vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
146  vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
147  vector unsigned char v_uA = vec_ld(j, usrc);
148  vector unsigned char v_uB = vec_ld(j + 16, usrc);
149  vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
150  vector unsigned char v_vA = vec_ld(j, vsrc);
151  vector unsigned char v_vB = vec_ld(j + 16, vsrc);
152  vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
153  vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
154  vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
155  vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
156  vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
157  vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
158  vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
159  vec_st(v_uyvy_0, (i << 1), dst);
160  vec_st(v_uyvy_1, (i << 1) + 16, dst);
161  vec_st(v_uyvy_2, (i << 1) + 32, dst);
162  vec_st(v_uyvy_3, (i << 1) + 48, dst);
163  }
164  if (i < width) {
165  const unsigned int j = i >> 1;
166  vector unsigned char v_y1 = vec_ld(i, ysrc);
167  vector unsigned char v_u = vec_ld(j, usrc);
168  vector unsigned char v_v = vec_ld(j, vsrc);
169  vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
170  vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
171  vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
172  vec_st(v_uyvy_0, (i << 1), dst);
173  vec_st(v_uyvy_1, (i << 1) + 16, dst);
174  }
175  if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
176  usrc += chromStride;
177  vsrc += chromStride;
178  }
179  ysrc += lumStride;
180  dst += dstStride;
181  }
182  return srcSliceH;
183 }
184 
185 #endif /* HAVE_ALTIVEC */
186 
188 {
189 #if HAVE_ALTIVEC
191  return;
192 
193  if (!(c->srcW & 15) && !(c->flags & SWS_BITEXACT) &&
194  c->srcFormat == AV_PIX_FMT_YUV420P) {
195  enum AVPixelFormat dstFormat = c->dstFormat;
196 
197  // unscaled YV12 -> packed YUV, we want speed
198  if (dstFormat == AV_PIX_FMT_YUYV422)
199  c->convert_unscaled = yv12toyuy2_unscaled_altivec;
200  else if (dstFormat == AV_PIX_FMT_UYVY422)
201  c->convert_unscaled = yv12touyvy_unscaled_altivec;
202  }
203 #endif /* HAVE_ALTIVEC */
204 }
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:103
SWS_BITEXACT
#define SWS_BITEXACT
Definition: swscale.h:91
av_cold
#define av_cold
Definition: attributes.h:90
width
#define width
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:74
ff_get_unscaled_swscale_ppc
av_cold void ff_get_unscaled_swscale_ppc(SwsContext *c)
Definition: yuv2yuv_altivec.c:187
AV_CPU_FLAG_ALTIVEC
#define AV_CPU_FLAG_ALTIVEC
standard
Definition: cpu.h:60
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
cpu.h
srcSliceH
return srcSliceH
Definition: yuv2rgb_template.c:87
height
#define height
attributes.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
swscale_internal.h
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:88
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
SwsContext
Definition: swscale_internal.h:299
swscale.h