FFmpeg
hpeldsp_alpha.c
Go to the documentation of this file.
1 /*
2  * Alpha optimized DSP utils
3  * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "libavutil/attributes.h"
23 #include "libavcodec/hpeldsp.h"
24 #include "hpeldsp_alpha.h"
25 #include "asm.h"
26 
27 static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
28 {
29  return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
30 }
31 
32 static inline uint64_t avg2(uint64_t a, uint64_t b)
33 {
34  return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
35 }
36 
37 #if 0
38 /* The XY2 routines basically utilize this scheme, but reuse parts in
39  each iteration. */
40 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
41 {
42  uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
43  + ((l2 & ~BYTE_VEC(0x03)) >> 2)
44  + ((l3 & ~BYTE_VEC(0x03)) >> 2)
45  + ((l4 & ~BYTE_VEC(0x03)) >> 2);
46  uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
47  + (l2 & BYTE_VEC(0x03))
48  + (l3 & BYTE_VEC(0x03))
49  + (l4 & BYTE_VEC(0x03))
50  + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
51  return r1 + r2;
52 }
53 #endif
54 
55 #define OP(LOAD, STORE) \
56  do { \
57  STORE(LOAD(pixels), block); \
58  pixels += line_size; \
59  block += line_size; \
60  } while (--h)
61 
62 #define OP_X2(LOAD, STORE) \
63  do { \
64  uint64_t pix1, pix2; \
65  \
66  pix1 = LOAD(pixels); \
67  pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
68  STORE(AVG2(pix1, pix2), block); \
69  pixels += line_size; \
70  block += line_size; \
71  } while (--h)
72 
73 #define OP_Y2(LOAD, STORE) \
74  do { \
75  uint64_t pix = LOAD(pixels); \
76  do { \
77  uint64_t next_pix; \
78  \
79  pixels += line_size; \
80  next_pix = LOAD(pixels); \
81  STORE(AVG2(pix, next_pix), block); \
82  block += line_size; \
83  pix = next_pix; \
84  } while (--h); \
85  } while (0)
86 
87 #define OP_XY2(LOAD, STORE) \
88  do { \
89  uint64_t pix1 = LOAD(pixels); \
90  uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
91  uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
92  + (pix2 & BYTE_VEC(0x03)); \
93  uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
94  + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
95  \
96  do { \
97  uint64_t npix1, npix2; \
98  uint64_t npix_l, npix_h; \
99  uint64_t avg; \
100  \
101  pixels += line_size; \
102  npix1 = LOAD(pixels); \
103  npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
104  npix_l = (npix1 & BYTE_VEC(0x03)) \
105  + (npix2 & BYTE_VEC(0x03)); \
106  npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
107  + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
108  avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
109  + pix_h + npix_h; \
110  STORE(avg, block); \
111  \
112  block += line_size; \
113  pix_l = npix_l; \
114  pix_h = npix_h; \
115  } while (--h); \
116  } while (0)
117 
118 #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
119 static void OPNAME ## _pixels ## SUFF ## _axp \
120  (uint8_t *restrict block, const uint8_t *restrict pixels, \
121  ptrdiff_t line_size, int h) \
122 { \
123  if ((size_t) pixels & 0x7) { \
124  OPKIND(uldq, STORE); \
125  } else { \
126  OPKIND(ldq, STORE); \
127  } \
128 } \
129  \
130 static void OPNAME ## _pixels16 ## SUFF ## _axp \
131  (uint8_t *restrict block, const uint8_t *restrict pixels, \
132  ptrdiff_t line_size, int h) \
133 { \
134  OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
135  OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
136 }
137 
138 #define PIXOP(OPNAME, STORE) \
139  MAKE_OP(OPNAME, , OP, STORE) \
140  MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
141  MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
142  MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
143 
144 /* Rounding primitives. */
145 #define AVG2 avg2
146 #define AVG4 avg4
147 #define AVG4_ROUNDER BYTE_VEC(0x02)
148 #define STORE(l, b) stq(l, b)
149 PIXOP(put, STORE);
150 
151 #undef STORE
152 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
153 PIXOP(avg, STORE);
154 
155 /* Not rounding primitives. */
156 #undef AVG2
157 #undef AVG4
158 #undef AVG4_ROUNDER
159 #undef STORE
160 #define AVG2 avg2_no_rnd
161 #define AVG4 avg4_no_rnd
162 #define AVG4_ROUNDER BYTE_VEC(0x01)
163 #define STORE(l, b) stq(l, b)
164 PIXOP(put_no_rnd, STORE);
165 
166 #undef STORE
167 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
168 PIXOP(avg_no_rnd, STORE);
169 
171  ptrdiff_t line_size, int h)
172 {
173  put_pixels_axp_asm(block, pixels, line_size, h);
174  put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
175 }
176 
178 {
179  c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
180  c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
181  c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
182  c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
183 
184  c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
185  c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
186  c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
187  c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
188 
189  c->avg_pixels_tab[0][0] = avg_pixels16_axp;
190  c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
191  c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
192  c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
193 
194  c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp;
195  c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp;
196  c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp;
197  c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp;
198 
199  c->put_pixels_tab[1][0] = put_pixels_axp_asm;
200  c->put_pixels_tab[1][1] = put_pixels_x2_axp;
201  c->put_pixels_tab[1][2] = put_pixels_y2_axp;
202  c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
203 
204  c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
205  c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
206  c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
207  c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
208 
209  c->avg_pixels_tab[1][0] = avg_pixels_axp;
210  c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
211  c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
212  c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
213 }
avg2
static uint64_t avg2(uint64_t a, uint64_t b)
Definition: hpeldsp_alpha.c:32
pixels
int pixels
Definition: avisynth_c.h:390
b
#define b
Definition: input.c:41
asm.h
avg2_no_rnd
static uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
Definition: hpeldsp_alpha.c:27
av_cold
#define av_cold
Definition: attributes.h:84
put_pixels_axp_asm
void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
ff_hpeldsp_init_alpha
av_cold void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags)
Definition: hpeldsp_alpha.c:177
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
HpelDSPContext
Half-pel DSP context.
Definition: hpeldsp.h:45
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
attributes.h
hpeldsp_alpha.h
uint8_t
uint8_t
Definition: audio_convert.c:194
STORE
#define STORE(l, b)
Definition: hpeldsp_alpha.c:167
PIXOP
#define PIXOP(OPNAME, STORE)
Definition: hpeldsp_alpha.c:138
put_pixels16_axp_asm
static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_alpha.c:170
avg4
#define avg4(a, b, c, d)
Definition: me_cmp.c:146
BYTE_VEC
static uint64_t BYTE_VEC(uint64_t x)
Definition: asm.h:42
hpeldsp.h
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:565
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038