28                                   const int16_t **
src, uint8_t *dest,
 
   33     vector 
signed int vo1, vo2, vo3, vo4;
 
   34     vector 
unsigned short vs1, vs2;
 
   35     vector 
unsigned char vf;
 
   36     vector 
unsigned int altivec_vectorShiftInt19 =
 
   37         vec_add(vec_splat_u32(10), vec_splat_u32(9));
 
   39     for (
i = 0; 
i < 16; 
i++)
 
   43     vo2 = vec_ld(16, 
val);
 
   44     vo3 = vec_ld(32, 
val);
 
   45     vo4 = vec_ld(48, 
val);
 
   47     for (j = 0; j < filterSize; j++) {
 
   48         unsigned int joffset=j<<1;
 
   49         unsigned int xoffset=x<<1;
 
   51         vector 
signed short l1,vLumFilter;
 
   52         LOAD_FILTER(vLumFilter,
filter);
 
   53         vLumFilter = vec_splat(vLumFilter, 0);
 
   55         yuv2planeX_8(vo1, vo2, l1, 
src[j], x,     
perm, vLumFilter);
 
   56         yuv2planeX_8(vo3, vo4, l1, 
src[j], x + 8, 
perm, vLumFilter);
 
   59     vo1 = vec_sra(vo1, altivec_vectorShiftInt19);
 
   60     vo2 = vec_sra(vo2, altivec_vectorShiftInt19);
 
   61     vo3 = vec_sra(vo3, altivec_vectorShiftInt19);
 
   62     vo4 = vec_sra(vo4, altivec_vectorShiftInt19);
 
   63     vs1 = vec_packsu(vo1, vo2);
 
   64     vs2 = vec_packsu(vo3, vo4);
 
   65     vf  = vec_packsu(vs1, vs2);
 
   71                                 const int16_t **
src, uint8_t *dest, 
int dstW,
 
   76     for (
i = x; 
i < dstW; 
i++) {
 
   78         for (j = 0; j < filterSize; j++)
 
   85                                const int16_t **
src, uint8_t *dest, 
int dstW,
 
   88     int dst_u = -(uintptr_t)dest & 15;
 
   93     for (
i = dst_u; 
i < dstW - 15; 
i += 16)
 
  101                                 const uint8_t *
src, 
const int16_t *
filter,
 
  102                                 const int32_t *filterPos, 
int filterSize)
 
  107     if (filterSize % 4) {
 
  108         for (
i = 0; 
i < dstW; 
i++) {
 
  110             register int srcPos = filterPos[
i];
 
  111             register int val    = 0;
 
  112             for (j = 0; j < filterSize; j++)
 
  117         switch (filterSize) {
 
  119             for (
i = 0; 
i < dstW; 
i++) {
 
  120                 register int srcPos = filterPos[
i];
 
  122                 vector 
unsigned char src_vF = unaligned_load(srcPos, 
src);
 
  123                 vector 
signed short src_v, filter_v;
 
  124                 vector 
signed int val_vEven, val_s;
 
  126                         (vector 
signed short)(VEC_MERGEH((vector 
unsigned char)vzero, src_vF));
 
  128                 src_v = vec_mergeh(src_v, (vector 
signed short)vzero);
 
  130                 val_vEven = vec_mule(src_v, filter_v);
 
  131                 val_s     = vec_sums(val_vEven, vzero);
 
  132                 vec_st(val_s, 0, tempo);
 
  133                 dst[
i] = 
FFMIN(tempo[3] >> 7, (1 << 15) - 1);
 
  137             for (
i = 0; 
i < dstW; 
i++) {
 
  138                 register int srcPos = filterPos[
i];
 
  141                 vector 
signed short src_v, filter_v;
 
  142                 vector 
signed int val_v, val_s;
 
  143                 FIRST_LOAD(src_v0, srcPos, 
src, permS);
 
  144                 LOAD_SRCV8(srcPos, 0, 
src, permS, src_v0, src_v1, src_vF);
 
  146                         (vector 
signed short)(VEC_MERGEH((vector 
unsigned char)vzero, src_vF));
 
  147                 filter_v = vec_ld(
i << 4, 
filter);
 
  148                 val_v = vec_msums(src_v, filter_v, (vector 
signed int)vzero);
 
  149                 val_s = vec_sums(val_v, vzero);
 
  150                 vec_st(val_s, 0, tempo);
 
  151                 dst[
i] = 
FFMIN(tempo[3] >> 7, (1 << 15) - 1);
 
  156             for (
i = 0; 
i < dstW; 
i++) {
 
  157                 register int srcPos = filterPos[
i];
 
  159                 vector 
unsigned char src_vF = unaligned_load(srcPos, 
src);
 
  160                 vector 
signed short src_vA = 
 
  161                                              (vector 
signed short)(VEC_MERGEH((vector 
unsigned char)vzero, src_vF));
 
  162                 vector 
signed short src_vB = 
 
  163                                              (vector 
signed short)(VEC_MERGEL((vector 
unsigned char)vzero, src_vF));
 
  164                 vector 
signed short filter_v0 = vec_ld(
i << 5, 
filter);
 
  165                 vector 
signed short filter_v1 = vec_ld((
i << 5) + 16, 
filter);
 
  167                 vector 
signed int val_acc = vec_msums(src_vA, filter_v0, (vector 
signed int)vzero);
 
  168                 vector 
signed int val_v   = vec_msums(src_vB, filter_v1, val_acc);
 
  170                 vector 
signed int val_s = vec_sums(val_v, vzero);
 
  172                 VEC_ST(val_s, 0, tempo);
 
  173                 dst[
i] = 
FFMIN(tempo[3] >> 7, (1 << 15) - 1);
 
  178             for (
i = 0; 
i < dstW; 
i++) {
 
  180                 register int srcPos = filterPos[
i];
 
  182                 vector 
signed int val_s, val_v = (vector 
signed int)vzero;
 
  183                 vector 
signed short av_unused filter_v0R;
 
  186                 FIRST_LOAD(src_v0, srcPos, 
src, permS);
 
  188                 for (j = 0; j < filterSize - 15; j += 16) {
 
  189                     vector 
unsigned char av_unused src_v1, src_vF;
 
  191                                         filter_v0, filter_v1, src_vA, src_vB;
 
  192                     vector 
signed int val_acc;
 
  193                     LOAD_SRCV(srcPos, j, 
src, permS, src_v0, src_v1, src_vF);
 
  195                                                  (vector 
signed short)(VEC_MERGEH((vector 
unsigned char)vzero, src_vF));
 
  197                                                  (vector 
signed short)(VEC_MERGEL((vector 
unsigned char)vzero, src_vF));
 
  198                     GET_VFD(
i, j, 
filter, filter_v0R, filter_v1R, permF, filter_v0, 0);
 
  199                     GET_VFD(
i, j, 
filter, filter_v1R, filter_v2R, permF, filter_v1, 16);
 
  201                     val_acc = vec_msums(src_vA, filter_v0, val_v);
 
  202                     val_v = vec_msums(src_vB, filter_v1, val_acc);
 
  203                     UPDATE_PTR(filter_v2R, filter_v0R, src_v1, src_v0);
 
  206                 if (j < filterSize - 7) {
 
  208                     vector 
unsigned char av_unused src_v1, src_vF;
 
  209                     vector 
signed short src_v, 
av_unused filter_v1R, filter_v;
 
  210                     LOAD_SRCV8(srcPos, j, 
src, permS, src_v0, src_v1, src_vF);
 
  212                             (vector 
signed short)(VEC_MERGEH((vector 
unsigned char)vzero, src_vF));
 
  213                     GET_VFD(
i, j, 
filter, filter_v0R, filter_v1R, permF, filter_v, 0);
 
  214                     val_v = vec_msums(src_v, filter_v, val_v);
 
  216                 val_s = vec_sums(val_v, vzero);
 
  218                 VEC_ST(val_s, 0, tempo);
 
  219                 dst[
i] = 
FFMIN(tempo[3] >> 7, (1 << 15) - 1);