29     v16u8 
src0, 
src1, src2, src3, ref0, ref1, ref2, ref3;
 
   32     for (ht_cnt = (height >> 2); ht_cnt--;) {
 
   33         LD_UB4(src, src_stride, src0, src1, src2, src3);
 
   34         src += (4 * src_stride);
 
   35         LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
 
   36         ref += (4 * ref_stride);
 
   38         PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2,
 
   39                     src0, src1, ref0, ref1);
 
   54     for (ht_cnt = (height >> 2); ht_cnt--;) {
 
   55         LD_UB2(src, src_stride, src0, src1);
 
   56         src += (2 * src_stride);
 
   57         LD_UB2(ref, ref_stride, ref0, ref1);
 
   58         ref += (2 * ref_stride);
 
   61         LD_UB2(src, src_stride, src0, src1);
 
   62         src += (2 * src_stride);
 
   63         LD_UB2(ref, ref_stride, ref0, ref1);
 
   64         ref += (2 * ref_stride);
 
   78     v16u8 
src0, 
src1, src2, src3, comp0, comp1;
 
   79     v16u8 ref0, ref1, ref2, ref3, ref4, ref5;
 
   82     for (ht_cnt = (height >> 3); ht_cnt--;) {
 
   83         LD_UB4(src, src_stride, src0, src1, src2, src3);
 
   84         src += (4 * src_stride);
 
   85         LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
 
   86         ref += (4 * ref_stride);
 
   90         SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1);
 
   91         SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1);
 
   96         LD_UB4(src, src_stride, src0, src1, src2, src3);
 
   97         src += (4 * src_stride);
 
   98         LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
 
   99         ref += (4 * ref_stride);
 
  103         SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1);
 
  104         SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1);
 
  120     v16u8 
src0, 
src1, src2, src3, comp0, comp1;
 
  121     v16u8 ref00, ref10, ref20, ref30, ref01, ref11, ref21, ref31;
 
  124     for (ht_cnt = (height >> 3); ht_cnt--;) {
 
  125         LD_UB4(src, src_stride, src0, src1, src2, src3);
 
  126         src += (4 * src_stride);
 
  127         LD_UB4(ref, ref_stride, ref00, ref10, ref20, ref30);
 
  128         LD_UB4(ref + 1, ref_stride, ref01, ref11, ref21, ref31);
 
  129         ref += (4 * ref_stride);
 
  131         AVER_UB2_UB(ref01, ref00, ref11, ref10, comp0, comp1);
 
  133         AVER_UB2_UB(ref21, ref20, ref31, ref30, comp0, comp1);
 
  136         LD_UB4(src, src_stride, src0, src1, src2, src3);
 
  137         src += (4 * src_stride);
 
  138         LD_UB4(ref, ref_stride, ref00, ref10, ref20, ref30);
 
  139         LD_UB4(ref + 1, ref_stride, ref01, ref11, ref21, ref31);
 
  140         ref += (4 * ref_stride);
 
  142         AVER_UB2_UB(ref01, ref00, ref11, ref10, comp0, comp1);
 
  144         AVER_UB2_UB(ref21, ref20, ref31, ref30, comp0, comp1);
 
  158     v16u8 
src0, 
src1, src2, src3, comp0, comp1;
 
  159     v16u8 ref0, ref1, ref2, ref3, ref4;
 
  162     for (ht_cnt = (height >> 3); ht_cnt--;) {
 
  163         LD_UB4(src, src_stride, src0, src1, src2, src3);
 
  164         src += (4 * src_stride);
 
  165         LD_UB5(ref, ref_stride, ref0, ref1, ref2, ref3, ref4);
 
  166         ref += (4 * ref_stride);
 
  174         LD_UB4(src, src_stride, src0, src1, src2, src3);
 
  175         src += (4 * src_stride);
 
  176         LD_UB5(ref, ref_stride, ref0, ref1, ref2, ref3, ref4);
 
  177         ref += (4 * ref_stride);
 
  196     v16u8 
src0, 
src1, src2, src3, comp0, comp1;
 
  197     v16u8 ref0, ref1, ref2, ref3, ref4;
 
  200     for (ht_cnt = (height >> 3); ht_cnt--;) {
 
  201         LD_UB5(ref, ref_stride, ref4, ref0, ref1, ref2, ref3);
 
  202         ref += (5 * ref_stride);
 
  203         LD_UB4(src, src_stride, src0, src1, src2, src3);
 
  204         src += (4 * src_stride);
 
  213         LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
 
  214         ref += (3 * ref_stride);
 
  215         LD_UB4(src, src_stride, src0, src1, src2, src3);
 
  216         src += (4 * src_stride);
 
  235     v16u8 ref0, ref1, ref2, ref3, ref4;
 
  236     v16i8 
mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
 
  237     v8u16 comp0, comp1, comp2, comp3;
 
  240     for (ht_cnt = (height >> 2); ht_cnt--;) {
 
  241         LD_UB5(ref, ref_stride, ref4, ref0, ref1, ref2, ref3);
 
  242         ref += (4 * ref_stride);
 
  243         LD_UB4(src, src_stride, src0, src1, src2, src3);
 
  244         src += (4 * src_stride);
 
  248         VSHF_B2_UB(ref4, ref4, ref0, ref0, mask, mask, temp0, temp1);
 
  249         comp0 = __msa_hadd_u_h(temp0, temp0);
 
  250         comp1 = __msa_hadd_u_h(temp1, temp1);
 
  252         comp0 = (v8u16) __msa_srari_h((v8i16) comp0, 2);
 
  253         comp0 = (v8u16) __msa_pckev_b((v16i8) comp0, (v16i8) comp0);
 
  255         temp0 = (v16u8) __msa_vshf_b(mask, (v16i8) ref1, (v16i8) ref1);
 
  256         comp2 = __msa_hadd_u_h(temp0, temp0);
 
  258         comp1 = (v8u16) __msa_srari_h((v8i16) comp1, 2);
 
  259         comp1 = (v8u16) __msa_pckev_b((v16i8) comp1, (v16i8) comp1);
 
  260         comp1 = (v8u16) __msa_pckev_d((v2i64) comp1, (v2i64) comp0);
 
  261         diff = (v16u8) __msa_asub_u_b(src0, (v16u8) comp1);
 
  262         sad += __msa_hadd_u_h(diff, diff);
 
  264         temp1 = (v16u8) __msa_vshf_b(mask, (v16i8) ref2, (v16i8) ref2);
 
  265         comp3 = __msa_hadd_u_h(temp1, temp1);
 
  267         comp2 = (v8u16) __msa_srari_h((v8i16) comp2, 2);
 
  268         comp2 = (v8u16) __msa_pckev_b((v16i8) comp2, (v16i8) comp2);
 
  270         temp0 = (v16u8) __msa_vshf_b(mask, (v16i8) ref3, (v16i8) ref3);
 
  271         comp0 = __msa_hadd_u_h(temp0, temp0);
 
  273         comp3 = (v8u16) __msa_srari_h((v8i16) comp3, 2);
 
  274         comp3 = (v8u16) __msa_pckev_b((v16i8) comp3, (v16i8) comp3);
 
  275         comp3 = (v8u16) __msa_pckev_d((v2i64) comp3, (v2i64) comp2);
 
  276         diff = (v16u8) __msa_asub_u_b(src1, (v16u8) comp3);
 
  277         sad += __msa_hadd_u_h(diff, diff);
 
  291     v16u8 temp0, temp1, temp2, temp3;
 
  292     v16u8 ref00, ref01, ref02, ref03, ref04, ref10, ref11, ref12, ref13, ref14;
 
  293     v8u16 comp0, comp1, comp2, comp3;
 
  296     for (ht_cnt = (height >> 3); ht_cnt--;) {
 
  297         LD_UB4(src, src_stride, src0, src1, src2, src3);
 
  298         src += (4 * src_stride);
 
  299         LD_UB5(ref, ref_stride, ref04, ref00, ref01, ref02, ref03);
 
  300         LD_UB5(ref + 1, ref_stride, ref14, ref10, ref11, ref12, ref13);
 
  301         ref += (5 * ref_stride);
 
  304         comp0 = __msa_hadd_u_h(temp0, temp0);
 
  305         comp1 = __msa_hadd_u_h(temp1, temp1);
 
  307         comp2 = __msa_hadd_u_h(temp2, temp2);
 
  308         comp3 = __msa_hadd_u_h(temp3, temp3);
 
  312         comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0);
 
  313         diff = __msa_asub_u_b(src0, comp);
 
  314         sad += __msa_hadd_u_h(diff, diff);
 
  317         comp0 = __msa_hadd_u_h(temp0, temp0);
 
  318         comp1 = __msa_hadd_u_h(temp1, temp1);
 
  322         comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2);
 
  323         diff = __msa_asub_u_b(src1, comp);
 
  324         sad += __msa_hadd_u_h(diff, diff);
 
  327         comp2 = __msa_hadd_u_h(temp2, temp2);
 
  328         comp3 = __msa_hadd_u_h(temp3, temp3);
 
  332         comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0);
 
  333         diff = __msa_asub_u_b(src2, comp);
 
  334         sad += __msa_hadd_u_h(diff, diff);
 
  337         comp0 = __msa_hadd_u_h(temp0, temp0);
 
  338         comp1 = __msa_hadd_u_h(temp1, temp1);
 
  342         comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2);
 
  343         diff = __msa_asub_u_b(src3, comp);
 
  344         sad += __msa_hadd_u_h(diff, diff);
 
  346         LD_UB4(src, src_stride, src0, src1, src2, src3);
 
  347         src += (4 * src_stride);
 
  348         LD_UB4(ref, ref_stride, ref00, ref01, ref02, ref03);
 
  349         LD_UB4(ref + 1, ref_stride, ref10, ref11, ref12, ref13);
 
  350         ref += (3 * ref_stride);
 
  353         comp2 = __msa_hadd_u_h(temp2, temp2);
 
  354         comp3 = __msa_hadd_u_h(temp3, temp3);
 
  358         comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0);
 
  359         diff = __msa_asub_u_b(src0, comp);
 
  360         sad += __msa_hadd_u_h(diff, diff);
 
  363         comp0 = __msa_hadd_u_h(temp0, temp0);
 
  364         comp1 = __msa_hadd_u_h(temp1, temp1);
 
  368         comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2);
 
  369         diff = __msa_asub_u_b(src1, comp);
 
  370         sad += __msa_hadd_u_h(diff, diff);
 
  373         comp2 = __msa_hadd_u_h(temp2, temp2);
 
  374         comp3 = __msa_hadd_u_h(temp3, temp3);
 
  378         comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0);
 
  379         diff = __msa_asub_u_b(src2, comp);
 
  380         sad += __msa_hadd_u_h(diff, diff);
 
  383         comp0 = __msa_hadd_u_h(temp0, temp0);
 
  384         comp1 = __msa_hadd_u_h(temp1, temp1);
 
  388         comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2);
 
  389         diff = __msa_asub_u_b(src3, comp);
 
  390         sad += __msa_hadd_u_h(diff, diff);
 
  396 #define CALC_MSE_B(src, ref, var)                                    \ 
  398     v16u8 src_l0_m, src_l1_m;                                        \ 
  399     v8i16 res_l0_m, res_l1_m;                                        \ 
  401     ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m);                       \ 
  402     HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m);             \ 
  403     DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var);  \ 
  413     uint32_t ref0, ref1, ref2, ref3;
 
  418     for (ht_cnt = (height >> 2); ht_cnt--;) {
 
  419         LW4(src_ptr, src_stride, src0, src1, src2, src3);
 
  420         src_ptr += (4 * src_stride);
 
  421         LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
 
  422         ref_ptr += (4 * ref_stride);
 
  441     v16u8 ref0, ref1, ref2, ref3;
 
  444     for (ht_cnt = (height >> 2); ht_cnt--;) {
 
  445         LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
 
  446         src_ptr += (4 * src_stride);
 
  447         LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
 
  448         ref_ptr += (4 * ref_stride);
 
  450         PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2,
 
  451                     src0, src1, ref0, ref1);
 
  470     for (ht_cnt = (height >> 2); ht_cnt--;) {
 
  471         src = 
LD_UB(src_ptr);
 
  472         src_ptr += src_stride;
 
  473         ref = 
LD_UB(ref_ptr);
 
  474         ref_ptr += ref_stride;
 
  477         src = 
LD_UB(src_ptr);
 
  478         src_ptr += src_stride;
 
  479         ref = 
LD_UB(ref_ptr);
 
  480         ref_ptr += ref_stride;
 
  483         src = 
LD_UB(src_ptr);
 
  484         src_ptr += src_stride;
 
  485         ref = 
LD_UB(ref_ptr);
 
  486         ref_ptr += ref_stride;
 
  489         src = 
LD_UB(src_ptr);
 
  490         src_ptr += src_stride;
 
  491         ref = 
LD_UB(ref_ptr);
 
  492         ref_ptr += ref_stride;
 
  504     v16u8 
src0, 
src1, src2, src3, src4, src5, src6, src7;
 
  505     v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
 
  506     v8u16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
 
  507     v8u16 temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
 
  511     LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
 
  512     LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7);
 
  513     ILVR_B8_UH(src0, ref0, src1, ref1, src2, ref2, src3, ref3,
 
  514                src4, ref4, src5, ref5, src6, ref6, src7, ref7,
 
  515                diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7);
 
  516     HSUB_UB4_UH(diff0, diff1, diff2, diff3, diff0, diff1, diff2, diff3);
 
  517     HSUB_UB4_UH(diff4, diff5, diff6, diff7, diff4, diff5, diff6, diff7);
 
  519                        diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7);
 
  520     BUTTERFLY_8(diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1,
 
  521                 temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1);
 
  522     BUTTERFLY_8(temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2,
 
  523                 diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2);
 
  524     BUTTERFLY_8(diff0, diff1, diff2, diff3, diff7, diff6, diff5, diff4,
 
  525                 temp0, temp1, temp2, temp3, temp7, temp6, temp5, temp4);
 
  527                        temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7);
 
  528     BUTTERFLY_8(temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1,
 
  529                 diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1);
 
  530     BUTTERFLY_8(diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2,
 
  531                 temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2);
 
  532     ADD4(temp0, temp4, temp1, temp5, temp2, temp6, temp3, temp7,
 
  533          diff0, diff1, diff2, diff3);
 
  534     sum = __msa_asub_s_h((v8i16) temp3, (v8i16) temp7);
 
  535     sum += __msa_asub_s_h((v8i16) temp2, (v8i16) temp6);
 
  536     sum += __msa_asub_s_h((v8i16) temp1, (v8i16) temp5);
 
  537     sum += __msa_asub_s_h((v8i16) temp0, (v8i16) temp4);
 
  538     sum += __msa_add_a_h((v8i16) diff0, zero);
 
  539     sum += __msa_add_a_h((v8i16) diff1, zero);
 
  540     sum += __msa_add_a_h((v8i16) diff2, zero);
 
  541     sum += __msa_add_a_h((v8i16) diff3, zero);
 
  550     v16u8 
src0, 
src1, src2, src3, src4, src5, src6, src7;
 
  551     v8u16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
 
  552     v8u16 temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
 
  556     LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
 
  558                        src0, src1, src2, src3, src4, src5, src6, src7);
 
  559     ILVR_B8_UH(zero, src0, zero, src1, zero, src2, zero, src3,
 
  560                zero, src4, zero, src5, zero, src6, zero, src7,
 
  561                diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7);
 
  562     BUTTERFLY_8(diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1,
 
  563                 temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1);
 
  564     BUTTERFLY_8(temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2,
 
  565                 diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2);
 
  566     BUTTERFLY_8(diff0, diff1, diff2, diff3, diff7, diff6, diff5, diff4,
 
  567                 temp0, temp1, temp2, temp3, temp7, temp6, temp5, temp4);
 
  569                        temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7);
 
  570     BUTTERFLY_8(temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1,
 
  571                 diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1);
 
  572     BUTTERFLY_8(diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2,
 
  573                 temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2);
 
  574     ADD4(temp0, temp4, temp1, temp5, temp2, temp6, temp3, temp7,
 
  575          diff0, diff1, diff2, diff3);
 
  576     sum = __msa_asub_s_h((v8i16) temp3, (v8i16) temp7);
 
  577     sum += __msa_asub_s_h((v8i16) temp2, (v8i16) temp6);
 
  578     sum += __msa_asub_s_h((v8i16) temp1, (v8i16) temp5);
 
  579     sum += __msa_asub_s_h((v8i16) temp0, (v8i16) temp4);
 
  580     sum += __msa_add_a_h((v8i16) diff0, (v8i16) zero);
 
  581     sum += __msa_add_a_h((v8i16) diff1, (v8i16) zero);
 
  582     sum += __msa_add_a_h((v8i16) diff2, (v8i16) zero);
 
  583     sum += __msa_add_a_h((v8i16) diff3, (v8i16) zero);
 
  585     sum_res -= abs(temp0[0] + temp4[0]);
 
  669 #define WRAPPER8_16_SQ(name8, name16)                      \ 
  670 int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src,  \ 
  671            ptrdiff_t stride, int h)                        \ 
  674     score += name8(s, dst, src, stride, 8);                \ 
  675     score += name8(s, dst + 8, src + 8, stride, 8);        \ 
  679         score +=name8(s, dst, src, stride, 8);             \ 
  680         score +=name8(s, dst + 8, src + 8, stride, 8);     \ 
int ff_pix_abs8_y2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_pix_abs16_y2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_sse8_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref, ptrdiff_t stride, int height)
int ff_pix_abs16_xy2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
#define SAD_UB2_UH(in0, in1, ref0, ref1)
int ff_hadamard8_diff16_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h)
static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride)
int ff_hadamard8_diff8x8_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h)
int ff_pix_abs8_xy2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_pix_abs8_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref, ptrdiff_t stride, int height)
int ff_pix_abs16_x2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
int ff_hadamard8_intra8x8_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h)
static int32_t hadamard_diff_8x8_msa(uint8_t *src, int32_t src_stride, uint8_t *ref, int32_t ref_stride)
#define CALC_MSE_B(src, ref, var)
#define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3)
int ff_pix_abs8_x2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t stride, int h)
static uint32_t sad_hv_bilinear_filter_16width_msa(uint8_t *src, int32_t src_stride, uint8_t *ref, int32_t ref_stride, int32_t height)
static const uint16_t mask[17]
#define TRANSPOSE8x8_UH_UH(...)
static uint32_t sad_vert_bilinear_filter_16width_msa(uint8_t *src, int32_t src_stride, uint8_t *ref, int32_t ref_stride, int32_t height)
static uint32_t sad_16width_msa(uint8_t *src, int32_t src_stride, uint8_t *ref, int32_t ref_stride, int32_t height)
static uint32_t sad_8width_msa(uint8_t *src, int32_t src_stride, uint8_t *ref, int32_t ref_stride, int32_t height)
static int32_t hadamard_intra_8x8_msa(uint8_t *src, int32_t src_stride, uint8_t *ref, int32_t ref_stride)
int ff_sse16_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref, ptrdiff_t stride, int height)
int ff_sse4_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref, ptrdiff_t stride, int height)
#define BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7,out0, out1, out2, out3, out4, out5, out6, out7)
static uint32_t sad_horiz_bilinear_filter_8width_msa(uint8_t *src, int32_t src_stride, uint8_t *ref, int32_t ref_stride, int32_t height)
static uint32_t sse_8width_msa(uint8_t *src_ptr, int32_t src_stride, uint8_t *ref_ptr, int32_t ref_stride, int32_t height)
#define INSERT_W4_UB(...)
static uint32_t sse_16width_msa(uint8_t *src_ptr, int32_t src_stride, uint8_t *ref_ptr, int32_t ref_stride, int32_t height)
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
#define TRANSPOSE8x8_UB_UB(...)
#define WRAPPER8_16_SQ(name8, name16)
static uint32_t sse_4width_msa(uint8_t *src_ptr, int32_t src_stride, uint8_t *ref_ptr, int32_t ref_stride, int32_t height)
GLint GLenum GLboolean GLsizei stride
#define LW4(psrc, stride, out0, out1, out2, out3)
static int ref[MAX_W *MAX_W]
int ff_pix_abs16_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref, ptrdiff_t stride, int height)
static uint32_t sad_hv_bilinear_filter_8width_msa(uint8_t *src, int32_t src_stride, uint8_t *ref, int32_t ref_stride, int32_t height)
static av_always_inline int diff(const uint32_t a, const uint32_t b)
static uint32_t sad_vert_bilinear_filter_8width_msa(uint8_t *src, int32_t src_stride, uint8_t *ref, int32_t ref_stride, int32_t height)
int ff_hadamard8_intra16_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h)
static uint32_t sad_horiz_bilinear_filter_16width_msa(uint8_t *src, int32_t src_stride, uint8_t *ref, int32_t ref_stride, int32_t height)