36 static void get_pixels_altivec(int16_t *restrict
block,
const uint8_t *pixels,
40 vector
unsigned char perm =
41 (vector
unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
42 0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
43 const vector
unsigned char zero =
44 (
const vector
unsigned char) vec_splat_u8(0);
46 for (i = 0; i < 8; i++) {
50 vector
unsigned char bytes = vec_vsx_ld(0, pixels);
54 vector
signed short shorts = (vector
signed short) vec_perm(bytes, zero, perm);
57 vec_vsx_st(shorts, i * 16, (vector
signed short *) block);
63 static void get_pixels_altivec(int16_t *restrict block,
const uint8_t *pixels,
69 for (i = 0; i < 8; i++) {
70 vec_u8 perm = vec_lvsl(0, pixels);
74 vec_u8 pixl = vec_ld(0, pixels);
75 vec_u8 pixr = vec_ld(7, pixels);
76 vec_u8 bytes = vec_perm(pixl, pixr, perm);
82 vec_st(shorts, i * 16, (
vec_s16 *)block);
91 static void diff_pixels_altivec(int16_t *restrict block,
const uint8_t *
s1,
95 const vector
unsigned char zero =
96 (
const vector
unsigned char) vec_splat_u8(0);
97 vector
signed short shorts1, shorts2;
99 for (i = 0; i < 4; i++) {
103 vector
unsigned char bytes = vec_vsx_ld(0, s1);
106 shorts1 = (vector
signed short) vec_mergeh(bytes, zero);
109 bytes =vec_vsx_ld(0, s2);
112 shorts2 = (vector
signed short) vec_mergeh(bytes, zero);
115 shorts1 = vec_sub(shorts1, shorts2);
118 vec_vsx_st(shorts1, 0, (vector
signed short *) block);
130 bytes = vec_vsx_ld(0, s1);
133 shorts1 = (vector
signed short) vec_mergeh(bytes, zero);
136 bytes = vec_vsx_ld(0, s2);
139 shorts2 = (vector
signed short) vec_mergeh(bytes, zero);
142 shorts1 = vec_sub(shorts1, shorts2);
145 vec_vsx_st(shorts1, 0, (vector
signed short *) block);
153 static void diff_pixels_altivec(int16_t *restrict block,
const uint8_t *s1,
154 const uint8_t *s2, ptrdiff_t stride)
161 for (i = 0; i < 4; i++) {
165 perm = vec_lvsl(0, s1);
166 vec_u8 pixl = vec_ld(0, s1);
167 vec_u8 pixr = vec_ld(15, s1);
168 vec_u8 bytes = vec_perm(pixl, pixr, perm);
171 shorts1 = (
vec_s16)vec_mergeh(zero, bytes);
174 perm = vec_lvsl(0, s2);
175 pixl = vec_ld(0, s2);
176 pixr = vec_ld(15, s2);
177 bytes = vec_perm(pixl, pixr, perm);
180 shorts2 = (
vec_s16)vec_mergeh(zero, bytes);
183 shorts1 = vec_sub(shorts1, shorts2);
186 vec_st(shorts1, 0, (
vec_s16 *)block);
198 perm = vec_lvsl(0, s1);
199 pixl = vec_ld(0, s1);
200 pixr = vec_ld(15, s1);
201 bytes = vec_perm(pixl, pixr, perm);
204 shorts1 = (
vec_s16)vec_mergeh(zero, bytes);
207 perm = vec_lvsl(0, s2);
208 pixl = vec_ld(0, s2);
209 pixr = vec_ld(15, s2);
210 bytes = vec_perm(pixl, pixr, perm);
213 shorts2 = (
vec_s16)vec_mergeh(zero, bytes);
216 shorts1 = vec_sub(shorts1, shorts2);
219 vec_st(shorts1, 0, (
vec_s16 *)block);
232 static void get_pixels_vsx(int16_t *restrict block,
const uint8_t *pixels,
236 for (i = 0; i < 8; i++) {
237 vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
239 vec_vsx_st(shorts, i * 16, block);
245 static void diff_pixels_vsx(int16_t *restrict block,
const uint8_t *s1,
246 const uint8_t *s2, ptrdiff_t stride)
250 for (i = 0; i < 8; i++) {
251 shorts1 = vsx_ld_u8_s16(0, s1);
252 shorts2 = vsx_ld_u8_s16(0, s2);
254 shorts1 = vec_sub(shorts1, shorts2);
256 vec_vsx_st(shorts1, 0, block);
267 unsigned high_bit_depth)
275 if (!high_bit_depth) {
Macro definitions for various function/variable attributes.
The exact code depends on how similar the blocks are and how related they are to the block
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
void(* diff_pixels)(int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride)
#define PPC_ALTIVEC(flags)
Libavcodec external API header.
main external API structure.
av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Contains misc utility macros and inline functions.
void(* get_pixels)(int16_t *av_restrict block, const uint8_t *pixels, ptrdiff_t stride)
GLint GLenum GLboolean GLsizei stride
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are short