32 vector
signed int vo1, vo2, vo3, vo4;
33 vector
unsigned short vs1, vs2;
34 vector
unsigned char vf;
35 vector
unsigned int altivec_vectorShiftInt19 =
36 vec_add(vec_splat_u32(10), vec_splat_u32(9));
38 for (i = 0; i < 16; i++)
42 vo2 = vec_ld(16,
val);
43 vo3 = vec_ld(32,
val);
44 vo4 = vec_ld(48,
val);
46 for (j = 0; j < filterSize; j++) {
47 unsigned int joffset=j<<1;
48 unsigned int xoffset=x<<1;
50 vector
signed short l1,vLumFilter;
51 LOAD_FILTER(vLumFilter,
filter);
52 vLumFilter = vec_splat(vLumFilter, 0);
53 LOAD_L1(l1,
src[j],perm);
54 yuv2planeX_8(vo1, vo2, l1,
src[j], x, perm, vLumFilter);
55 yuv2planeX_8(vo3, vo4, l1,
src[j], x + 8, perm, vLumFilter);
58 vo1 = vec_sra(vo1, altivec_vectorShiftInt19);
59 vo2 = vec_sra(vo2, altivec_vectorShiftInt19);
60 vo3 = vec_sra(vo3, altivec_vectorShiftInt19);
61 vo4 = vec_sra(vo4, altivec_vectorShiftInt19);
62 vs1 = vec_packsu(vo1, vo2);
63 vs2 = vec_packsu(vo3, vo4);
64 vf = vec_packsu(vs1, vs2);
75 for (i = x; i < dstW; i++) {
76 int t = dither[(i +
offset) & 7] << 12;
77 for (j = 0; j < filterSize; j++)
78 t += src[j][i] * filter[j];
87 int dst_u = -(uintptr_t)dest & 15;
92 for (i = dst_u; i < dstW - 15; i += 16)
101 const int32_t *filterPos,
int filterSize)
106 if (filterSize % 4) {
107 for (i = 0; i < dstW; i++) {
109 register int srcPos = filterPos[
i];
110 register int val = 0;
111 for (j = 0; j < filterSize; j++)
112 val += ((
int)
src[srcPos + j]) *
filter[filterSize * i + j];
113 dst[
i] =
FFMIN(val >> 7, (1 << 15) - 1);
116 switch (filterSize) {
118 for (i = 0; i < dstW; i++) {
119 register int srcPos = filterPos[
i];
121 vector
unsigned char src_vF = unaligned_load(srcPos,
src);
122 vector
signed short src_v, filter_v;
123 vector
signed int val_vEven, val_s;
125 (vector
signed short)(VEC_MERGEH((vector
unsigned char)vzero, src_vF));
127 src_v = vec_mergeh(src_v, (vector
signed short)vzero);
128 GET_VF4(i, filter_v,
filter);
129 val_vEven = vec_mule(src_v, filter_v);
130 val_s = vec_sums(val_vEven, vzero);
131 vec_st(val_s, 0, tempo);
132 dst[
i] =
FFMIN(tempo[3] >> 7, (1 << 15) - 1);
136 for (i = 0; i < dstW; i++) {
137 register int srcPos = filterPos[
i];
140 vector
signed short src_v, filter_v;
141 vector
signed int val_v, val_s;
142 FIRST_LOAD(src_v0, srcPos,
src, permS);
143 LOAD_SRCV8(srcPos, 0,
src, permS, src_v0, src_v1, src_vF);
145 (vector
signed short)(VEC_MERGEH((vector
unsigned char)vzero, src_vF));
146 filter_v = vec_ld(i << 4,
filter);
147 val_v = vec_msums(src_v, filter_v, (vector
signed int)vzero);
148 val_s = vec_sums(val_v, vzero);
149 vec_st(val_s, 0, tempo);
150 dst[
i] =
FFMIN(tempo[3] >> 7, (1 << 15) - 1);
155 for (i = 0; i < dstW; i++) {
156 register int srcPos = filterPos[
i];
158 vector
unsigned char src_vF = unaligned_load(srcPos,
src);
159 vector
signed short src_vA =
160 (vector
signed short)(VEC_MERGEH((vector
unsigned char)vzero, src_vF));
161 vector
signed short src_vB =
162 (vector
signed short)(VEC_MERGEL((vector
unsigned char)vzero, src_vF));
163 vector
signed short filter_v0 = vec_ld(i << 5,
filter);
164 vector
signed short filter_v1 = vec_ld((i << 5) + 16,
filter);
166 vector
signed int val_acc = vec_msums(src_vA, filter_v0, (vector
signed int)vzero);
167 vector
signed int val_v = vec_msums(src_vB, filter_v1, val_acc);
169 vector
signed int val_s = vec_sums(val_v, vzero);
171 VEC_ST(val_s, 0, tempo);
172 dst[
i] =
FFMIN(tempo[3] >> 7, (1 << 15) - 1);
177 for (i = 0; i < dstW; i++) {
179 register int srcPos = filterPos[
i];
181 vector
signed int val_s, val_v = (vector
signed int)vzero;
182 vector
signed short av_unused filter_v0R;
184 FIRST_LOAD(filter_v0R, offset,
filter, permF);
185 FIRST_LOAD(src_v0, srcPos,
src, permS);
187 for (j = 0; j < filterSize - 15; j += 16) {
188 vector
unsigned char av_unused src_v1, src_vF;
190 filter_v0, filter_v1, src_vA, src_vB;
191 vector
signed int val_acc;
192 LOAD_SRCV(srcPos, j,
src, permS, src_v0, src_v1, src_vF);
194 (vector
signed short)(VEC_MERGEH((vector
unsigned char)vzero, src_vF));
196 (vector
signed short)(VEC_MERGEL((vector
unsigned char)vzero, src_vF));
197 GET_VFD(i, j,
filter, filter_v0R, filter_v1R, permF, filter_v0, 0);
198 GET_VFD(i, j,
filter, filter_v1R, filter_v2R, permF, filter_v1, 16);
200 val_acc = vec_msums(src_vA, filter_v0, val_v);
201 val_v = vec_msums(src_vB, filter_v1, val_acc);
202 UPDATE_PTR(filter_v2R, filter_v0R, src_v1, src_v0);
205 if (j < filterSize - 7) {
207 vector
unsigned char av_unused src_v1, src_vF;
208 vector
signed short src_v,
av_unused filter_v1R, filter_v;
209 LOAD_SRCV8(srcPos, j,
src, permS, src_v0, src_v1, src_vF);
211 (vector
signed short)(VEC_MERGEH((vector
unsigned char)vzero, src_vF));
212 GET_VFD(i, j,
filter, filter_v0R, filter_v1R, permF, filter_v, 0);
213 val_v = vec_msums(src_v, filter_v, val_v);
215 val_s = vec_sums(val_v, vzero);
217 VEC_ST(val_s, 0, tempo);
218 dst[
i] =
FFMIN(tempo[3] >> 7, (1 << 15) - 1);
static void FUNC() hScale_real(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
static void yuv2planeX_u(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset, int x)
#define LOCAL_ALIGNED(a, t, v,...)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
static const uint8_t dither[8][8]
static void FUNC() yuv2planeX_8_16(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, const uint8_t *dither, int offset, int x)
static void FUNC() yuv2planeX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are short
static double val(void *priv, double ch)