FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
pixblockdsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002 Brian Foley
3  * Copyright (c) 2002 Dieter Shirley
4  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "config.h"
24 #if HAVE_ALTIVEC_H
25 #include <altivec.h>
26 #endif
27 
28 #include "libavutil/attributes.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/ppc/cpu.h"
33 #include "libavcodec/avcodec.h"
34 #include "libavcodec/pixblockdsp.h"
35 
36 #if HAVE_ALTIVEC
37 
38 #if HAVE_VSX
39 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
40  ptrdiff_t line_size)
41 {
42  int i;
43  vector unsigned char perm =
44  (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
45  0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
46  const vector unsigned char zero =
47  (const vector unsigned char) vec_splat_u8(0);
48 
49  for (i = 0; i < 8; i++) {
50  /* Read potentially unaligned pixels.
51  * We're reading 16 pixels, and actually only want 8,
52  * but we simply ignore the extras. */
53  vector unsigned char bytes = vec_vsx_ld(0, pixels);
54 
55  // Convert the bytes into shorts.
56  //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
57  vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm);
58 
59  // Save the data to the block, we assume the block is 16-byte aligned.
60  vec_vsx_st(shorts, i * 16, (vector signed short *) block);
61 
62  pixels += line_size;
63  }
64 }
65 #else
66 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
67  ptrdiff_t line_size)
68 {
69  int i;
70  vector unsigned char perm = vec_lvsl(0, pixels);
71  const vector unsigned char zero =
72  (const vector unsigned char) vec_splat_u8(0);
73 
74  for (i = 0; i < 8; i++) {
75  /* Read potentially unaligned pixels.
76  * We're reading 16 pixels, and actually only want 8,
77  * but we simply ignore the extras. */
78  vector unsigned char pixl = vec_ld(0, pixels);
79  vector unsigned char pixr = vec_ld(7, pixels);
80  vector unsigned char bytes = vec_perm(pixl, pixr, perm);
81 
82  // Convert the bytes into shorts.
83  vector signed short shorts = (vector signed short) vec_mergeh(zero,
84  bytes);
85 
86  // Save the data to the block, we assume the block is 16-byte aligned.
87  vec_st(shorts, i * 16, (vector signed short *) block);
88 
89  pixels += line_size;
90  }
91 }
92 
93 #endif /* HAVE_VSX */
94 
95 #if HAVE_VSX
96 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
97  const uint8_t *s2, int stride)
98 {
99  int i;
100  const vector unsigned char zero =
101  (const vector unsigned char) vec_splat_u8(0);
102  vector signed short shorts1, shorts2;
103 
104  for (i = 0; i < 4; i++) {
105  /* Read potentially unaligned pixels.
106  * We're reading 16 pixels, and actually only want 8,
107  * but we simply ignore the extras. */
108  vector unsigned char bytes = vec_vsx_ld(0, s1);
109 
110  // Convert the bytes into shorts.
111  shorts1 = (vector signed short) vec_mergeh(bytes, zero);
112 
113  // Do the same for the second block of pixels.
114  bytes =vec_vsx_ld(0, s2);
115 
116  // Convert the bytes into shorts.
117  shorts2 = (vector signed short) vec_mergeh(bytes, zero);
118 
119  // Do the subtraction.
120  shorts1 = vec_sub(shorts1, shorts2);
121 
122  // Save the data to the block, we assume the block is 16-byte aligned.
123  vec_vsx_st(shorts1, 0, (vector signed short *) block);
124 
125  s1 += stride;
126  s2 += stride;
127  block += 8;
128 
129  /* The code below is a copy of the code above...
130  * This is a manual unroll. */
131 
132  /* Read potentially unaligned pixels.
133  * We're reading 16 pixels, and actually only want 8,
134  * but we simply ignore the extras. */
135  bytes = vec_vsx_ld(0, s1);
136 
137  // Convert the bytes into shorts.
138  shorts1 = (vector signed short) vec_mergeh(bytes, zero);
139 
140  // Do the same for the second block of pixels.
141  bytes = vec_vsx_ld(0, s2);
142 
143  // Convert the bytes into shorts.
144  shorts2 = (vector signed short) vec_mergeh(bytes, zero);
145 
146  // Do the subtraction.
147  shorts1 = vec_sub(shorts1, shorts2);
148 
149  // Save the data to the block, we assume the block is 16-byte aligned.
150  vec_vsx_st(shorts1, 0, (vector signed short *) block);
151 
152  s1 += stride;
153  s2 += stride;
154  block += 8;
155  }
156 }
157 #else
158 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
159  const uint8_t *s2, int stride)
160 {
161  int i;
162  vector unsigned char perm1 = vec_lvsl(0, s1);
163  vector unsigned char perm2 = vec_lvsl(0, s2);
164  const vector unsigned char zero =
165  (const vector unsigned char) vec_splat_u8(0);
166  vector signed short shorts1, shorts2;
167 
168  for (i = 0; i < 4; i++) {
169  /* Read potentially unaligned pixels.
170  * We're reading 16 pixels, and actually only want 8,
171  * but we simply ignore the extras. */
172  vector unsigned char pixl = vec_ld(0, s1);
173  vector unsigned char pixr = vec_ld(15, s1);
174  vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
175 
176  // Convert the bytes into shorts.
177  shorts1 = (vector signed short) vec_mergeh(zero, bytes);
178 
179  // Do the same for the second block of pixels.
180  pixl = vec_ld(0, s2);
181  pixr = vec_ld(15, s2);
182  bytes = vec_perm(pixl, pixr, perm2);
183 
184  // Convert the bytes into shorts.
185  shorts2 = (vector signed short) vec_mergeh(zero, bytes);
186 
187  // Do the subtraction.
188  shorts1 = vec_sub(shorts1, shorts2);
189 
190  // Save the data to the block, we assume the block is 16-byte aligned.
191  vec_st(shorts1, 0, (vector signed short *) block);
192 
193  s1 += stride;
194  s2 += stride;
195  block += 8;
196 
197  /* The code below is a copy of the code above...
198  * This is a manual unroll. */
199 
200  /* Read potentially unaligned pixels.
201  * We're reading 16 pixels, and actually only want 8,
202  * but we simply ignore the extras. */
203  pixl = vec_ld(0, s1);
204  pixr = vec_ld(15, s1);
205  bytes = vec_perm(pixl, pixr, perm1);
206 
207  // Convert the bytes into shorts.
208  shorts1 = (vector signed short) vec_mergeh(zero, bytes);
209 
210  // Do the same for the second block of pixels.
211  pixl = vec_ld(0, s2);
212  pixr = vec_ld(15, s2);
213  bytes = vec_perm(pixl, pixr, perm2);
214 
215  // Convert the bytes into shorts.
216  shorts2 = (vector signed short) vec_mergeh(zero, bytes);
217 
218  // Do the subtraction.
219  shorts1 = vec_sub(shorts1, shorts2);
220 
221  // Save the data to the block, we assume the block is 16-byte aligned.
222  vec_st(shorts1, 0, (vector signed short *) block);
223 
224  s1 += stride;
225  s2 += stride;
226  block += 8;
227  }
228 }
229 
230 #endif /* HAVE_VSX */
231 
232 #endif /* HAVE_ALTIVEC */
233 
235  AVCodecContext *avctx,
236  unsigned high_bit_depth)
237 {
238 #if HAVE_ALTIVEC
240  return;
241 
242  c->diff_pixels = diff_pixels_altivec;
243 
244  if (!high_bit_depth) {
245  c->get_pixels = get_pixels_altivec;
246  }
247 #endif /* HAVE_ALTIVEC */
248 }