69 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6 
   76 static const uint8_t uquad_sign_bits[81] = {
 
   77     0, 1, 1, 1, 2, 2, 1, 2, 2,
 
   78     1, 2, 2, 2, 3, 3, 2, 3, 3,
 
   79     1, 2, 2, 2, 3, 3, 2, 3, 3,
 
   80     1, 2, 2, 2, 3, 3, 2, 3, 3,
 
   81     2, 3, 3, 3, 4, 4, 3, 4, 4,
 
   82     2, 3, 3, 3, 4, 4, 3, 4, 4,
 
   83     1, 2, 2, 2, 3, 3, 2, 3, 3,
 
   84     2, 3, 3, 3, 4, 4, 3, 4, 4,
 
   85     2, 3, 3, 3, 4, 4, 3, 4, 4
 
   88 static const uint8_t upair7_sign_bits[64] = {
 
   89     0, 1, 1, 1, 1, 1, 1, 1,
 
   90     1, 2, 2, 2, 2, 2, 2, 2,
 
   91     1, 2, 2, 2, 2, 2, 2, 2,
 
   92     1, 2, 2, 2, 2, 2, 2, 2,
 
   93     1, 2, 2, 2, 2, 2, 2, 2,
 
   94     1, 2, 2, 2, 2, 2, 2, 2,
 
   95     1, 2, 2, 2, 2, 2, 2, 2,
 
   96     1, 2, 2, 2, 2, 2, 2, 2,
 
   99 static const uint8_t upair12_sign_bits[169] = {
 
  100     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 
  101     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  102     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  103     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  104     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  105     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  106     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  107     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  108     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  109     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  110     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  111     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  112     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 
  115 static const uint8_t esc_sign_bits[289] = {
 
  116     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 
  117     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  118     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  119     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  120     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  121     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  122     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  123     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  124     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  125     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  126     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  127     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  128     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  129     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  130     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  131     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 
  132     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 
  138 static void quantize_and_encode_band_cost_SQUAD_mips(
struct AACEncContext *
s,
 
  140                                                      const float *scaled, 
int size, 
int scale_idx,
 
  141                                                      int cb, 
const float lambda, 
const float uplim,
 
  142                                                      int *
bits, 
float *energy, 
const float ROUNDING)
 
  147     int qc1, qc2, qc3, qc4;
 
  148     float qenergy = 0.0f;
 
  156     for (
i = 0; 
i < 
size; 
i += 4) {
 
  158         int *in_int = (
int *)&in[
i];
 
  169             ".set noreorder                 \n\t" 
  171             "slt    %[qc1], $zero,  %[qc1]  \n\t" 
  172             "slt    %[qc2], $zero,  %[qc2]  \n\t" 
  173             "slt    %[qc3], $zero,  %[qc3]  \n\t" 
  174             "slt    %[qc4], $zero,  %[qc4]  \n\t" 
  175             "lw     %[t0],  0(%[in_int])    \n\t" 
  176             "lw     %[t1],  4(%[in_int])    \n\t" 
  177             "lw     %[t2],  8(%[in_int])    \n\t" 
  178             "lw     %[t3],  12(%[in_int])   \n\t" 
  179             "srl    %[t0],  %[t0],  31      \n\t" 
  180             "srl    %[t1],  %[t1],  31      \n\t" 
  181             "srl    %[t2],  %[t2],  31      \n\t" 
  182             "srl    %[t3],  %[t3],  31      \n\t" 
  183             "subu   %[t4],  $zero,  %[qc1]  \n\t" 
  184             "subu   %[t5],  $zero,  %[qc2]  \n\t" 
  185             "subu   %[t6],  $zero,  %[qc3]  \n\t" 
  186             "subu   %[t7],  $zero,  %[qc4]  \n\t" 
  187             "movn   %[qc1], %[t4],  %[t0]   \n\t" 
  188             "movn   %[qc2], %[t5],  %[t1]   \n\t" 
  189             "movn   %[qc3], %[t6],  %[t2]   \n\t" 
  190             "movn   %[qc4], %[t7],  %[t3]   \n\t" 
  194             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
  195               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
  198             : [in_int]
"r"(in_int)
 
  211         put_bits(pb, p_bits[curidx], p_codes[curidx]);
 
  215             vec = &p_vec[curidx*4];
 
  227                 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
 
  234 static void quantize_and_encode_band_cost_UQUAD_mips(
struct AACEncContext *
s,
 
  236                                                      const float *scaled, 
int size, 
int scale_idx,
 
  237                                                      int cb, 
const float lambda, 
const float uplim,
 
  238                                                      int *
bits, 
float *energy, 
const float ROUNDING)
 
  243     int qc1, qc2, qc3, qc4;
 
  244     float qenergy = 0.0f;
 
  252     for (
i = 0; 
i < 
size; 
i += 4) {
 
  253         int curidx, sign, count;
 
  254         int *in_int = (
int *)&in[
i];
 
  256         unsigned int v_codes;
 
  267             ".set noreorder                         \n\t" 
  269             "ori    %[t4],      $zero,      2       \n\t" 
  270             "ori    %[sign],    $zero,      0       \n\t" 
  271             "slt    %[t0],      %[t4],      %[qc1]  \n\t" 
  272             "slt    %[t1],      %[t4],      %[qc2]  \n\t" 
  273             "slt    %[t2],      %[t4],      %[qc3]  \n\t" 
  274             "slt    %[t3],      %[t4],      %[qc4]  \n\t" 
  275             "movn   %[qc1],     %[t4],      %[t0]   \n\t" 
  276             "movn   %[qc2],     %[t4],      %[t1]   \n\t" 
  277             "movn   %[qc3],     %[t4],      %[t2]   \n\t" 
  278             "movn   %[qc4],     %[t4],      %[t3]   \n\t" 
  279             "lw     %[t0],      0(%[in_int])        \n\t" 
  280             "lw     %[t1],      4(%[in_int])        \n\t" 
  281             "lw     %[t2],      8(%[in_int])        \n\t" 
  282             "lw     %[t3],      12(%[in_int])       \n\t" 
  283             "slt    %[t0],      %[t0],      $zero   \n\t" 
  284             "movn   %[sign],    %[t0],      %[qc1]  \n\t" 
  285             "slt    %[t1],      %[t1],      $zero   \n\t" 
  286             "slt    %[t2],      %[t2],      $zero   \n\t" 
  287             "slt    %[t3],      %[t3],      $zero   \n\t" 
  288             "sll    %[t0],      %[sign],    1       \n\t" 
  289             "or     %[t0],      %[t0],      %[t1]   \n\t" 
  290             "movn   %[sign],    %[t0],      %[qc2]  \n\t" 
  291             "slt    %[t4],      $zero,      %[qc1]  \n\t" 
  292             "slt    %[t1],      $zero,      %[qc2]  \n\t" 
  293             "slt    %[count],   $zero,      %[qc3]  \n\t" 
  294             "sll    %[t0],      %[sign],    1       \n\t" 
  295             "or     %[t0],      %[t0],      %[t2]   \n\t" 
  296             "movn   %[sign],    %[t0],      %[qc3]  \n\t" 
  297             "slt    %[t2],      $zero,      %[qc4]  \n\t" 
  298             "addu   %[count],   %[count],   %[t4]   \n\t" 
  299             "addu   %[count],   %[count],   %[t1]   \n\t" 
  300             "sll    %[t0],      %[sign],    1       \n\t" 
  301             "or     %[t0],      %[t0],      %[t3]   \n\t" 
  302             "movn   %[sign],    %[t0],      %[qc4]  \n\t" 
  303             "addu   %[count],   %[count],   %[t2]   \n\t" 
  307             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
  308               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
  309               [sign]
"=&r"(sign), [count]
"=&r"(count),
 
  312             : [in_int]
"r"(in_int)
 
  324         v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
 
  325         v_bits  = p_bits[curidx] + count;
 
  330             vec = &p_vec[curidx*4];
 
  331             e1 = copysignf(vec[0] * IQ, in[
i+0]);
 
  332             e2 = copysignf(vec[1] * IQ, in[
i+1]);
 
  333             e3 = copysignf(vec[2] * IQ, in[
i+2]);
 
  334             e4 = copysignf(vec[3] * IQ, in[
i+3]);
 
  342                 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
 
  349 static void quantize_and_encode_band_cost_SPAIR_mips(
struct AACEncContext *
s,
 
  351                                                      const float *scaled, 
int size, 
int scale_idx,
 
  352                                                      int cb, 
const float lambda, 
const float uplim,
 
  353                                                      int *
bits, 
float *energy, 
const float ROUNDING)
 
  358     int qc1, qc2, qc3, qc4;
 
  359     float qenergy = 0.0f;
 
  367     for (
i = 0; 
i < 
size; 
i += 4) {
 
  369         int *in_int = (
int *)&in[
i];
 
  371         unsigned int v_codes;
 
  373         const float *vec1, *vec2;
 
  382             ".set noreorder                 \n\t" 
  384             "ori    %[t4],  $zero,  4       \n\t" 
  385             "slt    %[t0],  %[t4],  %[qc1]  \n\t" 
  386             "slt    %[t1],  %[t4],  %[qc2]  \n\t" 
  387             "slt    %[t2],  %[t4],  %[qc3]  \n\t" 
  388             "slt    %[t3],  %[t4],  %[qc4]  \n\t" 
  389             "movn   %[qc1], %[t4],  %[t0]   \n\t" 
  390             "movn   %[qc2], %[t4],  %[t1]   \n\t" 
  391             "movn   %[qc3], %[t4],  %[t2]   \n\t" 
  392             "movn   %[qc4], %[t4],  %[t3]   \n\t" 
  393             "lw     %[t0],  0(%[in_int])    \n\t" 
  394             "lw     %[t1],  4(%[in_int])    \n\t" 
  395             "lw     %[t2],  8(%[in_int])    \n\t" 
  396             "lw     %[t3],  12(%[in_int])   \n\t" 
  397             "srl    %[t0],  %[t0],  31      \n\t" 
  398             "srl    %[t1],  %[t1],  31      \n\t" 
  399             "srl    %[t2],  %[t2],  31      \n\t" 
  400             "srl    %[t3],  %[t3],  31      \n\t" 
  401             "subu   %[t4],  $zero,  %[qc1]  \n\t" 
  402             "subu   %[t5],  $zero,  %[qc2]  \n\t" 
  403             "subu   %[t6],  $zero,  %[qc3]  \n\t" 
  404             "subu   %[t7],  $zero,  %[qc4]  \n\t" 
  405             "movn   %[qc1], %[t4],  %[t0]   \n\t" 
  406             "movn   %[qc2], %[t5],  %[t1]   \n\t" 
  407             "movn   %[qc3], %[t6],  %[t2]   \n\t" 
  408             "movn   %[qc4], %[t7],  %[t3]   \n\t" 
  412             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
  413               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
  416             : [in_int]
"r"(in_int)
 
  426         v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
 
  427         v_bits  = p_bits[curidx] + p_bits[curidx2];
 
  432             vec1 = &p_vec[curidx*2 ];
 
  433             vec2 = &p_vec[curidx2*2];
 
  445                 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
 
  452 static void quantize_and_encode_band_cost_UPAIR7_mips(
struct AACEncContext *
s,
 
  454                                                       const float *scaled, 
int size, 
int scale_idx,
 
  455                                                       int cb, 
const float lambda, 
const float uplim,
 
  456                                                       int *
bits, 
float *energy, 
const float ROUNDING)
 
  461     int qc1, qc2, qc3, qc4;
 
  462     float qenergy = 0.0f;
 
  470     for (
i = 0; 
i < 
size; 
i += 4) {
 
  471         int curidx1, curidx2, sign1, count1, sign2, count2;
 
  472         int *in_int = (
int *)&in[
i];
 
  474         unsigned int v_codes;
 
  476         const float *vec1, *vec2;
 
  485             ".set noreorder                         \n\t" 
  487             "ori    %[t4],      $zero,      7       \n\t" 
  488             "ori    %[sign1],   $zero,      0       \n\t" 
  489             "ori    %[sign2],   $zero,      0       \n\t" 
  490             "slt    %[t0],      %[t4],      %[qc1]  \n\t" 
  491             "slt    %[t1],      %[t4],      %[qc2]  \n\t" 
  492             "slt    %[t2],      %[t4],      %[qc3]  \n\t" 
  493             "slt    %[t3],      %[t4],      %[qc4]  \n\t" 
  494             "movn   %[qc1],     %[t4],      %[t0]   \n\t" 
  495             "movn   %[qc2],     %[t4],      %[t1]   \n\t" 
  496             "movn   %[qc3],     %[t4],      %[t2]   \n\t" 
  497             "movn   %[qc4],     %[t4],      %[t3]   \n\t" 
  498             "lw     %[t0],      0(%[in_int])        \n\t" 
  499             "lw     %[t1],      4(%[in_int])        \n\t" 
  500             "lw     %[t2],      8(%[in_int])        \n\t" 
  501             "lw     %[t3],      12(%[in_int])       \n\t" 
  502             "slt    %[t0],      %[t0],      $zero   \n\t" 
  503             "movn   %[sign1],   %[t0],      %[qc1]  \n\t" 
  504             "slt    %[t2],      %[t2],      $zero   \n\t" 
  505             "movn   %[sign2],   %[t2],      %[qc3]  \n\t" 
  506             "slt    %[t1],      %[t1],      $zero   \n\t" 
  507             "sll    %[t0],      %[sign1],   1       \n\t" 
  508             "or     %[t0],      %[t0],      %[t1]   \n\t" 
  509             "movn   %[sign1],   %[t0],      %[qc2]  \n\t" 
  510             "slt    %[t3],      %[t3],      $zero   \n\t" 
  511             "sll    %[t0],      %[sign2],   1       \n\t" 
  512             "or     %[t0],      %[t0],      %[t3]   \n\t" 
  513             "movn   %[sign2],   %[t0],      %[qc4]  \n\t" 
  514             "slt    %[count1],  $zero,      %[qc1]  \n\t" 
  515             "slt    %[t1],      $zero,      %[qc2]  \n\t" 
  516             "slt    %[count2],  $zero,      %[qc3]  \n\t" 
  517             "slt    %[t2],      $zero,      %[qc4]  \n\t" 
  518             "addu   %[count1],  %[count1],  %[t1]   \n\t" 
  519             "addu   %[count2],  %[count2],  %[t2]   \n\t" 
  523             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
  524               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
  525               [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
 
  526               [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
 
  529             : [in_int]
"r"(in_int)
 
  530             : 
"t0", 
"t1", 
"t2", 
"t3", 
"t4",
 
  537         v_codes = (p_codes[curidx1] << count1) | sign1;
 
  538         v_bits  = p_bits[curidx1] + count1;
 
  544         v_codes = (p_codes[curidx2] << count2) | sign2;
 
  545         v_bits  = p_bits[curidx2] + count2;
 
  550             vec1 = &p_vec[curidx1*2];
 
  551             vec2 = &p_vec[curidx2*2];
 
  552             e1 = copysignf(vec1[0] * IQ, in[
i+0]);
 
  553             e2 = copysignf(vec1[1] * IQ, in[
i+1]);
 
  554             e3 = copysignf(vec2[0] * IQ, in[
i+2]);
 
  555             e4 = copysignf(vec2[1] * IQ, in[
i+3]);
 
  563                 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
 
  570 static void quantize_and_encode_band_cost_UPAIR12_mips(
struct AACEncContext *
s,
 
  572                                                        const float *scaled, 
int size, 
int scale_idx,
 
  573                                                        int cb, 
const float lambda, 
const float uplim,
 
  574                                                        int *
bits, 
float *energy, 
const float ROUNDING)
 
  579     int qc1, qc2, qc3, qc4;
 
  580     float qenergy = 0.0f;
 
  588     for (
i = 0; 
i < 
size; 
i += 4) {
 
  589         int curidx1, curidx2, sign1, count1, sign2, count2;
 
  590         int *in_int = (
int *)&in[
i];
 
  592         unsigned int v_codes;
 
  594         const float *vec1, *vec2;
 
  603             ".set noreorder                         \n\t" 
  605             "ori    %[t4],      $zero,      12      \n\t" 
  606             "ori    %[sign1],   $zero,      0       \n\t" 
  607             "ori    %[sign2],   $zero,      0       \n\t" 
  608             "slt    %[t0],      %[t4],      %[qc1]  \n\t" 
  609             "slt    %[t1],      %[t4],      %[qc2]  \n\t" 
  610             "slt    %[t2],      %[t4],      %[qc3]  \n\t" 
  611             "slt    %[t3],      %[t4],      %[qc4]  \n\t" 
  612             "movn   %[qc1],     %[t4],      %[t0]   \n\t" 
  613             "movn   %[qc2],     %[t4],      %[t1]   \n\t" 
  614             "movn   %[qc3],     %[t4],      %[t2]   \n\t" 
  615             "movn   %[qc4],     %[t4],      %[t3]   \n\t" 
  616             "lw     %[t0],      0(%[in_int])        \n\t" 
  617             "lw     %[t1],      4(%[in_int])        \n\t" 
  618             "lw     %[t2],      8(%[in_int])        \n\t" 
  619             "lw     %[t3],      12(%[in_int])       \n\t" 
  620             "slt    %[t0],      %[t0],      $zero   \n\t" 
  621             "movn   %[sign1],   %[t0],      %[qc1]  \n\t" 
  622             "slt    %[t2],      %[t2],      $zero   \n\t" 
  623             "movn   %[sign2],   %[t2],      %[qc3]  \n\t" 
  624             "slt    %[t1],      %[t1],      $zero   \n\t" 
  625             "sll    %[t0],      %[sign1],   1       \n\t" 
  626             "or     %[t0],      %[t0],      %[t1]   \n\t" 
  627             "movn   %[sign1],   %[t0],      %[qc2]  \n\t" 
  628             "slt    %[t3],      %[t3],      $zero   \n\t" 
  629             "sll    %[t0],      %[sign2],   1       \n\t" 
  630             "or     %[t0],      %[t0],      %[t3]   \n\t" 
  631             "movn   %[sign2],   %[t0],      %[qc4]  \n\t" 
  632             "slt    %[count1],  $zero,      %[qc1]  \n\t" 
  633             "slt    %[t1],      $zero,      %[qc2]  \n\t" 
  634             "slt    %[count2],  $zero,      %[qc3]  \n\t" 
  635             "slt    %[t2],      $zero,      %[qc4]  \n\t" 
  636             "addu   %[count1],  %[count1],  %[t1]   \n\t" 
  637             "addu   %[count2],  %[count2],  %[t2]   \n\t" 
  641             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
  642               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
  643               [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
 
  644               [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
 
  647             : [in_int]
"r"(in_int)
 
  654         v_codes = (p_codes[curidx1] << count1) | sign1;
 
  655         v_bits  = p_bits[curidx1] + count1;
 
  661         v_codes = (p_codes[curidx2] << count2) | sign2;
 
  662         v_bits  = p_bits[curidx2] + count2;
 
  667             vec1 = &p_vec[curidx1*2];
 
  668             vec2 = &p_vec[curidx2*2];
 
  669             e1 = copysignf(vec1[0] * IQ, in[
i+0]);
 
  670             e2 = copysignf(vec1[1] * IQ, in[
i+1]);
 
  671             e3 = copysignf(vec2[0] * IQ, in[
i+2]);
 
  672             e4 = copysignf(vec2[1] * IQ, in[
i+3]);
 
  680                 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
 
  687 static void quantize_and_encode_band_cost_ESC_mips(
struct AACEncContext *
s,
 
  689                                                    const float *scaled, 
int size, 
int scale_idx,
 
  690                                                    int cb, 
const float lambda, 
const float uplim,
 
  691                                                    int *
bits, 
float *energy, 
const float ROUNDING)
 
  696     int qc1, qc2, qc3, qc4;
 
  697     float qenergy = 0.0f;
 
  707         for (
i = 0; 
i < 
size; 
i += 4) {
 
  708             int curidx, curidx2, sign1, count1, sign2, count2;
 
  709             int *in_int = (
int *)&in[
i];
 
  711             unsigned int v_codes;
 
  713             const float *vec1, *vec2;
 
  715             qc1 = scaled[
i  ] * Q34 + ROUNDING;
 
  716             qc2 = scaled[
i+1] * Q34 + ROUNDING;
 
  717             qc3 = scaled[
i+2] * Q34 + ROUNDING;
 
  718             qc4 = scaled[
i+3] * Q34 + ROUNDING;
 
  722                 ".set noreorder                             \n\t" 
  724                 "ori        %[t4],      $zero,      16      \n\t" 
  725                 "ori        %[sign1],   $zero,      0       \n\t" 
  726                 "ori        %[sign2],   $zero,      0       \n\t" 
  727                 "slt        %[t0],      %[t4],      %[qc1]  \n\t" 
  728                 "slt        %[t1],      %[t4],      %[qc2]  \n\t" 
  729                 "slt        %[t2],      %[t4],      %[qc3]  \n\t" 
  730                 "slt        %[t3],      %[t4],      %[qc4]  \n\t" 
  731                 "movn       %[qc1],     %[t4],      %[t0]   \n\t" 
  732                 "movn       %[qc2],     %[t4],      %[t1]   \n\t" 
  733                 "movn       %[qc3],     %[t4],      %[t2]   \n\t" 
  734                 "movn       %[qc4],     %[t4],      %[t3]   \n\t" 
  735                 "lw         %[t0],      0(%[in_int])        \n\t" 
  736                 "lw         %[t1],      4(%[in_int])        \n\t" 
  737                 "lw         %[t2],      8(%[in_int])        \n\t" 
  738                 "lw         %[t3],      12(%[in_int])       \n\t" 
  739                 "slt        %[t0],      %[t0],      $zero   \n\t" 
  740                 "movn       %[sign1],   %[t0],      %[qc1]  \n\t" 
  741                 "slt        %[t2],      %[t2],      $zero   \n\t" 
  742                 "movn       %[sign2],   %[t2],      %[qc3]  \n\t" 
  743                 "slt        %[t1],      %[t1],      $zero   \n\t" 
  744                 "sll        %[t0],      %[sign1],   1       \n\t" 
  745                 "or         %[t0],      %[t0],      %[t1]   \n\t" 
  746                 "movn       %[sign1],   %[t0],      %[qc2]  \n\t" 
  747                 "slt        %[t3],      %[t3],      $zero   \n\t" 
  748                 "sll        %[t0],      %[sign2],   1       \n\t" 
  749                 "or         %[t0],      %[t0],      %[t3]   \n\t" 
  750                 "movn       %[sign2],   %[t0],      %[qc4]  \n\t" 
  751                 "slt        %[count1],  $zero,      %[qc1]  \n\t" 
  752                 "slt        %[t1],      $zero,      %[qc2]  \n\t" 
  753                 "slt        %[count2],  $zero,      %[qc3]  \n\t" 
  754                 "slt        %[t2],      $zero,      %[qc4]  \n\t" 
  755                 "addu       %[count1],  %[count1],  %[t1]   \n\t" 
  756                 "addu       %[count2],  %[count2],  %[t2]   \n\t" 
  760                 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
  761                   [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
  762                   [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
 
  763                   [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
 
  766                 : [in_int]
"r"(in_int)
 
  775             v_codes = (p_codes[curidx] << count1) | sign1;
 
  776             v_bits  = p_bits[curidx] + count1;
 
  779             v_codes = (p_codes[curidx2] << count2) | sign2;
 
  780             v_bits  = p_bits[curidx2] + count2;
 
  785                 vec1 = &p_vectors[curidx*2 ];
 
  786                 vec2 = &p_vectors[curidx2*2];
 
  787                 e1 = copysignf(vec1[0] * IQ, in[
i+0]);
 
  788                 e2 = copysignf(vec1[1] * IQ, in[
i+1]);
 
  789                 e3 = copysignf(vec2[0] * IQ, in[
i+2]);
 
  790                 e4 = copysignf(vec2[1] * IQ, in[
i+3]);
 
  798                     qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
 
  802         for (
i = 0; 
i < 
size; 
i += 4) {
 
  803             int curidx, curidx2, sign1, count1, sign2, count2;
 
  804             int *in_int = (
int *)&in[
i];
 
  806             unsigned int v_codes;
 
  810             qc1 = scaled[
i  ] * Q34 + ROUNDING;
 
  811             qc2 = scaled[
i+1] * Q34 + ROUNDING;
 
  812             qc3 = scaled[
i+2] * Q34 + ROUNDING;
 
  813             qc4 = scaled[
i+3] * Q34 + ROUNDING;
 
  817                 ".set noreorder                             \n\t" 
  819                 "ori        %[t4],      $zero,      16      \n\t" 
  820                 "ori        %[sign1],   $zero,      0       \n\t" 
  821                 "ori        %[sign2],   $zero,      0       \n\t" 
  822                 "shll_s.w   %[c1],      %[qc1],     18      \n\t" 
  823                 "shll_s.w   %[c2],      %[qc2],     18      \n\t" 
  824                 "shll_s.w   %[c3],      %[qc3],     18      \n\t" 
  825                 "shll_s.w   %[c4],      %[qc4],     18      \n\t" 
  826                 "srl        %[c1],      %[c1],      18      \n\t" 
  827                 "srl        %[c2],      %[c2],      18      \n\t" 
  828                 "srl        %[c3],      %[c3],      18      \n\t" 
  829                 "srl        %[c4],      %[c4],      18      \n\t" 
  830                 "slt        %[t0],      %[t4],      %[qc1]  \n\t" 
  831                 "slt        %[t1],      %[t4],      %[qc2]  \n\t" 
  832                 "slt        %[t2],      %[t4],      %[qc3]  \n\t" 
  833                 "slt        %[t3],      %[t4],      %[qc4]  \n\t" 
  834                 "movn       %[qc1],     %[t4],      %[t0]   \n\t" 
  835                 "movn       %[qc2],     %[t4],      %[t1]   \n\t" 
  836                 "movn       %[qc3],     %[t4],      %[t2]   \n\t" 
  837                 "movn       %[qc4],     %[t4],      %[t3]   \n\t" 
  838                 "lw         %[t0],      0(%[in_int])        \n\t" 
  839                 "lw         %[t1],      4(%[in_int])        \n\t" 
  840                 "lw         %[t2],      8(%[in_int])        \n\t" 
  841                 "lw         %[t3],      12(%[in_int])       \n\t" 
  842                 "slt        %[t0],      %[t0],      $zero   \n\t" 
  843                 "movn       %[sign1],   %[t0],      %[qc1]  \n\t" 
  844                 "slt        %[t2],      %[t2],      $zero   \n\t" 
  845                 "movn       %[sign2],   %[t2],      %[qc3]  \n\t" 
  846                 "slt        %[t1],      %[t1],      $zero   \n\t" 
  847                 "sll        %[t0],      %[sign1],   1       \n\t" 
  848                 "or         %[t0],      %[t0],      %[t1]   \n\t" 
  849                 "movn       %[sign1],   %[t0],      %[qc2]  \n\t" 
  850                 "slt        %[t3],      %[t3],      $zero   \n\t" 
  851                 "sll        %[t0],      %[sign2],   1       \n\t" 
  852                 "or         %[t0],      %[t0],      %[t3]   \n\t" 
  853                 "movn       %[sign2],   %[t0],      %[qc4]  \n\t" 
  854                 "slt        %[count1],  $zero,      %[qc1]  \n\t" 
  855                 "slt        %[t1],      $zero,      %[qc2]  \n\t" 
  856                 "slt        %[count2],  $zero,      %[qc3]  \n\t" 
  857                 "slt        %[t2],      $zero,      %[qc4]  \n\t" 
  858                 "addu       %[count1],  %[count1],  %[t1]   \n\t" 
  859                 "addu       %[count2],  %[count2],  %[t2]   \n\t" 
  863                 : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
  864                   [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
  865                   [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
 
  866                   [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
 
  868                   [c3]
"=&r"(c3), [c4]
"=&r"(c4),
 
  871                 : [in_int]
"r"(in_int)
 
  881             v_codes = (p_codes[curidx] << count1) | sign1;
 
  882             v_bits  = p_bits[curidx] + count1;
 
  885             if (p_vectors[curidx*2  ] == 64.0
f) {
 
  887                 v_codes = (((1 << (
len - 3)) - 2) << 
len) | (
c1 & ((1 << 
len) - 1));
 
  890             if (p_vectors[curidx*2+1] == 64.0
f) {
 
  892                 v_codes = (((1 << (
len - 3)) - 2) << 
len) | (
c2 & ((1 << 
len) - 1));
 
  896             v_codes = (p_codes[curidx2] << count2) | sign2;
 
  897             v_bits  = p_bits[curidx2] + count2;
 
  900             if (p_vectors[curidx2*2  ] == 64.0
f) {
 
  902                 v_codes = (((1 << (
len - 3)) - 2) << 
len) | (c3 & ((1 << 
len) - 1));
 
  905             if (p_vectors[curidx2*2+1] == 64.0
f) {
 
  907                 v_codes = (((1 << (
len - 3)) - 2) << 
len) | (c4 & ((1 << 
len) - 1));
 
  912                 float e1, e2, e3, e4;
 
  915                 e3 = copysignf(c3 * 
cbrtf(c3) * IQ, in[
i+2]);
 
  916                 e4 = copysignf(c4 * 
cbrtf(c4) * IQ, in[
i+3]);
 
  924                     qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
 
  932 static void quantize_and_encode_band_cost_NONE_mips(
struct AACEncContext *
s,
 
  934                                                          const float *scaled, 
int size, 
int scale_idx,
 
  935                                                          int cb, 
const float lambda, 
const float uplim,
 
  936                                                          int *
bits, 
float *energy, 
const float ROUNDING) {
 
  940 static void quantize_and_encode_band_cost_ZERO_mips(
struct AACEncContext *
s,
 
  942                                                          const float *scaled, 
int size, 
int scale_idx,
 
  943                                                          int cb, 
const float lambda, 
const float uplim,
 
  944                                                          int *
bits, 
float *energy, 
const float ROUNDING) {
 
  949         for (
i = 0; 
i < 
size; 
i += 4) {
 
  962                                                          const float *scaled, 
int size, 
int scale_idx,
 
  963                                                          int cb, 
const float lambda, 
const float uplim,
 
  964                                                          int *
bits, 
float *energy, 
const float ROUNDING) = {
 
  965     quantize_and_encode_band_cost_ZERO_mips,
 
  966     quantize_and_encode_band_cost_SQUAD_mips,
 
  967     quantize_and_encode_band_cost_SQUAD_mips,
 
  968     quantize_and_encode_band_cost_UQUAD_mips,
 
  969     quantize_and_encode_band_cost_UQUAD_mips,
 
  970     quantize_and_encode_band_cost_SPAIR_mips,
 
  971     quantize_and_encode_band_cost_SPAIR_mips,
 
  972     quantize_and_encode_band_cost_UPAIR7_mips,
 
  973     quantize_and_encode_band_cost_UPAIR7_mips,
 
  974     quantize_and_encode_band_cost_UPAIR12_mips,
 
  975     quantize_and_encode_band_cost_UPAIR12_mips,
 
  976     quantize_and_encode_band_cost_ESC_mips,
 
  977     quantize_and_encode_band_cost_NONE_mips, 
 
  978     quantize_and_encode_band_cost_ZERO_mips,
 
  979     quantize_and_encode_band_cost_ZERO_mips,
 
  980     quantize_and_encode_band_cost_ZERO_mips,
 
  983 #define quantize_and_encode_band_cost(                                       \ 
  984                                 s, pb, in, out, scaled, size, scale_idx, cb, \ 
  985                                 lambda, uplim, bits, energy, ROUNDING)       \ 
  986     quantize_and_encode_band_cost_arr[cb](                                   \ 
  987                                 s, pb, in, out, scaled, size, scale_idx, cb, \ 
  988                                 lambda, uplim, bits, energy, ROUNDING) 
  991                                           const float *in, 
float *
out, 
int size, 
int scale_idx,
 
  992                                           int cb, 
const float lambda, 
int rtz)
 
 1003                                         const float *scaled, 
int size, 
int scale_idx,
 
 1004                                         int cb, 
const float lambda, 
const float uplim,
 
 1012                                         const float *scaled, 
int size, 
int scale_idx,
 
 1013                                         int cb, 
const float lambda, 
const float uplim,
 
 1020 static float get_band_numbits_SQUAD_mips(
struct AACEncContext *
s,
 
 1022                                          const float *scaled, 
int size, 
int scale_idx,
 
 1023                                          int cb, 
const float lambda, 
const float uplim,
 
 1028     int qc1, qc2, qc3, qc4;
 
 1033     for (
i = 0; 
i < 
size; 
i += 4) {
 
 1035         int *in_int = (
int *)&in[
i];
 
 1045             ".set noreorder                 \n\t" 
 1047             "slt    %[qc1], $zero,  %[qc1]  \n\t" 
 1048             "slt    %[qc2], $zero,  %[qc2]  \n\t" 
 1049             "slt    %[qc3], $zero,  %[qc3]  \n\t" 
 1050             "slt    %[qc4], $zero,  %[qc4]  \n\t" 
 1051             "lw     %[t0],  0(%[in_int])    \n\t" 
 1052             "lw     %[t1],  4(%[in_int])    \n\t" 
 1053             "lw     %[t2],  8(%[in_int])    \n\t" 
 1054             "lw     %[t3],  12(%[in_int])   \n\t" 
 1055             "srl    %[t0],  %[t0],  31      \n\t" 
 1056             "srl    %[t1],  %[t1],  31      \n\t" 
 1057             "srl    %[t2],  %[t2],  31      \n\t" 
 1058             "srl    %[t3],  %[t3],  31      \n\t" 
 1059             "subu   %[t4],  $zero,  %[qc1]  \n\t" 
 1060             "subu   %[t5],  $zero,  %[qc2]  \n\t" 
 1061             "subu   %[t6],  $zero,  %[qc3]  \n\t" 
 1062             "subu   %[t7],  $zero,  %[qc4]  \n\t" 
 1063             "movn   %[qc1], %[t4],  %[t0]   \n\t" 
 1064             "movn   %[qc2], %[t5],  %[t1]   \n\t" 
 1065             "movn   %[qc3], %[t6],  %[t2]   \n\t" 
 1066             "movn   %[qc4], %[t7],  %[t3]   \n\t" 
 1070             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
 1071               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
 1074             : [in_int]
"r"(in_int)
 
 1087         curbits += p_bits[curidx];
 
 1092 static float get_band_numbits_UQUAD_mips(
struct AACEncContext *
s,
 
 1094                                          const float *scaled, 
int size, 
int scale_idx,
 
 1095                                          int cb, 
const float lambda, 
const float uplim,
 
 1101     int qc1, qc2, qc3, qc4;
 
 1105     for (
i = 0; 
i < 
size; 
i += 4) {
 
 1116             ".set noreorder                 \n\t" 
 1118             "ori    %[t4],  $zero,  2       \n\t" 
 1119             "slt    %[t0],  %[t4],  %[qc1]  \n\t" 
 1120             "slt    %[t1],  %[t4],  %[qc2]  \n\t" 
 1121             "slt    %[t2],  %[t4],  %[qc3]  \n\t" 
 1122             "slt    %[t3],  %[t4],  %[qc4]  \n\t" 
 1123             "movn   %[qc1], %[t4],  %[t0]   \n\t" 
 1124             "movn   %[qc2], %[t4],  %[t1]   \n\t" 
 1125             "movn   %[qc3], %[t4],  %[t2]   \n\t" 
 1126             "movn   %[qc4], %[t4],  %[t3]   \n\t" 
 1130             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
 1131               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
 1144         curbits += p_bits[curidx];
 
 1145         curbits += uquad_sign_bits[curidx];
 
 1150 static float get_band_numbits_SPAIR_mips(
struct AACEncContext *
s,
 
 1152                                          const float *scaled, 
int size, 
int scale_idx,
 
 1153                                          int cb, 
const float lambda, 
const float uplim,
 
 1158     int qc1, qc2, qc3, qc4;
 
 1163     for (
i = 0; 
i < 
size; 
i += 4) {
 
 1164         int curidx, curidx2;
 
 1165         int *in_int = (
int *)&in[
i];
 
 1175             ".set noreorder                 \n\t" 
 1177             "ori    %[t4],  $zero,  4       \n\t" 
 1178             "slt    %[t0],  %[t4],  %[qc1]  \n\t" 
 1179             "slt    %[t1],  %[t4],  %[qc2]  \n\t" 
 1180             "slt    %[t2],  %[t4],  %[qc3]  \n\t" 
 1181             "slt    %[t3],  %[t4],  %[qc4]  \n\t" 
 1182             "movn   %[qc1], %[t4],  %[t0]   \n\t" 
 1183             "movn   %[qc2], %[t4],  %[t1]   \n\t" 
 1184             "movn   %[qc3], %[t4],  %[t2]   \n\t" 
 1185             "movn   %[qc4], %[t4],  %[t3]   \n\t" 
 1186             "lw     %[t0],  0(%[in_int])    \n\t" 
 1187             "lw     %[t1],  4(%[in_int])    \n\t" 
 1188             "lw     %[t2],  8(%[in_int])    \n\t" 
 1189             "lw     %[t3],  12(%[in_int])   \n\t" 
 1190             "srl    %[t0],  %[t0],  31      \n\t" 
 1191             "srl    %[t1],  %[t1],  31      \n\t" 
 1192             "srl    %[t2],  %[t2],  31      \n\t" 
 1193             "srl    %[t3],  %[t3],  31      \n\t" 
 1194             "subu   %[t4],  $zero,  %[qc1]  \n\t" 
 1195             "subu   %[t5],  $zero,  %[qc2]  \n\t" 
 1196             "subu   %[t6],  $zero,  %[qc3]  \n\t" 
 1197             "subu   %[t7],  $zero,  %[qc4]  \n\t" 
 1198             "movn   %[qc1], %[t4],  %[t0]   \n\t" 
 1199             "movn   %[qc2], %[t5],  %[t1]   \n\t" 
 1200             "movn   %[qc3], %[t6],  %[t2]   \n\t" 
 1201             "movn   %[qc4], %[t7],  %[t3]   \n\t" 
 1205             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
 1206               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
 1209             : [in_int]
"r"(in_int)
 
 1217         curidx2 += qc4 + 40;
 
 1219         curbits += p_bits[curidx] + p_bits[curidx2];
 
 1224 static float get_band_numbits_UPAIR7_mips(
struct AACEncContext *
s,
 
 1226                                           const float *scaled, 
int size, 
int scale_idx,
 
 1227                                           int cb, 
const float lambda, 
const float uplim,
 
 1232     int qc1, qc2, qc3, qc4;
 
 1237     for (
i = 0; 
i < 
size; 
i += 4) {
 
 1238         int curidx, curidx2;
 
 1248             ".set noreorder                 \n\t" 
 1250             "ori    %[t4],  $zero,  7       \n\t" 
 1251             "slt    %[t0],  %[t4],  %[qc1]  \n\t" 
 1252             "slt    %[t1],  %[t4],  %[qc2]  \n\t" 
 1253             "slt    %[t2],  %[t4],  %[qc3]  \n\t" 
 1254             "slt    %[t3],  %[t4],  %[qc4]  \n\t" 
 1255             "movn   %[qc1], %[t4],  %[t0]   \n\t" 
 1256             "movn   %[qc2], %[t4],  %[t1]   \n\t" 
 1257             "movn   %[qc3], %[t4],  %[t2]   \n\t" 
 1258             "movn   %[qc4], %[t4],  %[t3]   \n\t" 
 1262             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
 1263               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
 1274         curbits += p_bits[curidx] +
 
 1275                    upair7_sign_bits[curidx] +
 
 1277                    upair7_sign_bits[curidx2];
 
 1282 static float get_band_numbits_UPAIR12_mips(
struct AACEncContext *
s,
 
 1284                                            const float *scaled, 
int size, 
int scale_idx,
 
 1285                                            int cb, 
const float lambda, 
const float uplim,
 
 1290     int qc1, qc2, qc3, qc4;
 
 1295     for (
i = 0; 
i < 
size; 
i += 4) {
 
 1296         int curidx, curidx2;
 
 1306             ".set noreorder                 \n\t" 
 1308             "ori    %[t4],  $zero,  12      \n\t" 
 1309             "slt    %[t0],  %[t4],  %[qc1]  \n\t" 
 1310             "slt    %[t1],  %[t4],  %[qc2]  \n\t" 
 1311             "slt    %[t2],  %[t4],  %[qc3]  \n\t" 
 1312             "slt    %[t3],  %[t4],  %[qc4]  \n\t" 
 1313             "movn   %[qc1], %[t4],  %[t0]   \n\t" 
 1314             "movn   %[qc2], %[t4],  %[t1]   \n\t" 
 1315             "movn   %[qc3], %[t4],  %[t2]   \n\t" 
 1316             "movn   %[qc4], %[t4],  %[t3]   \n\t" 
 1320             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
 1321               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
 1332         curbits += p_bits[curidx] +
 
 1334                    upair12_sign_bits[curidx] +
 
 1335                    upair12_sign_bits[curidx2];
 
 1342                                        const float *scaled, 
int size, 
int scale_idx,
 
 1343                                        int cb, 
const float lambda, 
const float uplim,
 
 1348     int qc1, qc2, qc3, qc4;
 
 1353     for (
i = 0; 
i < 
size; 
i += 4) {
 
 1354         int curidx, curidx2;
 
 1355         int cond0, cond1, cond2, cond3;
 
 1366             ".set noreorder                             \n\t" 
 1368             "ori        %[t4],      $zero,  15          \n\t" 
 1369             "ori        %[t5],      $zero,  16          \n\t" 
 1370             "shll_s.w   %[c1],      %[qc1], 18          \n\t" 
 1371             "shll_s.w   %[c2],      %[qc2], 18          \n\t" 
 1372             "shll_s.w   %[c3],      %[qc3], 18          \n\t" 
 1373             "shll_s.w   %[c4],      %[qc4], 18          \n\t" 
 1374             "srl        %[c1],      %[c1],  18          \n\t" 
 1375             "srl        %[c2],      %[c2],  18          \n\t" 
 1376             "srl        %[c3],      %[c3],  18          \n\t" 
 1377             "srl        %[c4],      %[c4],  18          \n\t" 
 1378             "slt        %[cond0],   %[t4],  %[qc1]      \n\t" 
 1379             "slt        %[cond1],   %[t4],  %[qc2]      \n\t" 
 1380             "slt        %[cond2],   %[t4],  %[qc3]      \n\t" 
 1381             "slt        %[cond3],   %[t4],  %[qc4]      \n\t" 
 1382             "movn       %[qc1],     %[t5],  %[cond0]    \n\t" 
 1383             "movn       %[qc2],     %[t5],  %[cond1]    \n\t" 
 1384             "movn       %[qc3],     %[t5],  %[cond2]    \n\t" 
 1385             "movn       %[qc4],     %[t5],  %[cond3]    \n\t" 
 1386             "ori        %[t5],      $zero,  31          \n\t" 
 1387             "clz        %[c1],      %[c1]               \n\t" 
 1388             "clz        %[c2],      %[c2]               \n\t" 
 1389             "clz        %[c3],      %[c3]               \n\t" 
 1390             "clz        %[c4],      %[c4]               \n\t" 
 1391             "subu       %[c1],      %[t5],  %[c1]       \n\t" 
 1392             "subu       %[c2],      %[t5],  %[c2]       \n\t" 
 1393             "subu       %[c3],      %[t5],  %[c3]       \n\t" 
 1394             "subu       %[c4],      %[t5],  %[c4]       \n\t" 
 1395             "sll        %[c1],      %[c1],  1           \n\t" 
 1396             "sll        %[c2],      %[c2],  1           \n\t" 
 1397             "sll        %[c3],      %[c3],  1           \n\t" 
 1398             "sll        %[c4],      %[c4],  1           \n\t" 
 1399             "addiu      %[c1],      %[c1],  -3          \n\t" 
 1400             "addiu      %[c2],      %[c2],  -3          \n\t" 
 1401             "addiu      %[c3],      %[c3],  -3          \n\t" 
 1402             "addiu      %[c4],      %[c4],  -3          \n\t" 
 1403             "subu       %[cond0],   $zero,  %[cond0]    \n\t" 
 1404             "subu       %[cond1],   $zero,  %[cond1]    \n\t" 
 1405             "subu       %[cond2],   $zero,  %[cond2]    \n\t" 
 1406             "subu       %[cond3],   $zero,  %[cond3]    \n\t" 
 1407             "and        %[c1],      %[c1],  %[cond0]    \n\t" 
 1408             "and        %[c2],      %[c2],  %[cond1]    \n\t" 
 1409             "and        %[c3],      %[c3],  %[cond2]    \n\t" 
 1410             "and        %[c4],      %[c4],  %[cond3]    \n\t" 
 1414             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
 1415               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
 1416               [cond0]
"=&r"(cond0), [cond1]
"=&r"(cond1),
 
 1417               [cond2]
"=&r"(cond2), [cond3]
"=&r"(cond3),
 
 1419               [c3]
"=&r"(c3), [c4]
"=&r"(c4),
 
 1429         curbits += p_bits[curidx];
 
 1430         curbits += esc_sign_bits[curidx];
 
 1431         curbits += p_bits[curidx2];
 
 1432         curbits += esc_sign_bits[curidx2];
 
 1442 static float (*
const get_band_numbits_arr[])(
struct AACEncContext *
s,
 
 1444                                              const float *scaled, 
int size, 
int scale_idx,
 
 1445                                              int cb, 
const float lambda, 
const float uplim,
 
 1447     get_band_numbits_ZERO_mips,
 
 1448     get_band_numbits_SQUAD_mips,
 
 1449     get_band_numbits_SQUAD_mips,
 
 1450     get_band_numbits_UQUAD_mips,
 
 1451     get_band_numbits_UQUAD_mips,
 
 1452     get_band_numbits_SPAIR_mips,
 
 1453     get_band_numbits_SPAIR_mips,
 
 1454     get_band_numbits_UPAIR7_mips,
 
 1455     get_band_numbits_UPAIR7_mips,
 
 1456     get_band_numbits_UPAIR12_mips,
 
 1457     get_band_numbits_UPAIR12_mips,
 
 1458     get_band_numbits_ESC_mips,
 
 1459     get_band_numbits_NONE_mips, 
 
 1460     get_band_numbits_ZERO_mips,
 
 1461     get_band_numbits_ZERO_mips,
 
 1462     get_band_numbits_ZERO_mips,
 
 1465 #define get_band_numbits(                                  \ 
 1466                                 s, pb, in, scaled, size, scale_idx, cb, \ 
 1467                                 lambda, uplim, bits)                    \ 
 1468     get_band_numbits_arr[cb](                              \ 
 1469                                 s, pb, in, scaled, size, scale_idx, cb, \ 
 1470                                 lambda, uplim, bits) 
 1473                                      const float *scaled, 
int size, 
int scale_idx,
 
 1474                                      int cb, 
const float lambda, 
const float uplim,
 
 1475                                      int *
bits, 
float *energy, 
int rtz)
 
 1486                                      const float *scaled, 
int size, 
int scale_idx,
 
 1487                                      int cb, 
const float lambda, 
const float uplim,
 
 1488                                      int *
bits, 
float *energy)
 
 1493     for (
i = 0; 
i < 
size; 
i += 4) {
 
 1494         cost += in[
i  ] * in[
i  ];
 
 1495         cost += in[
i+1] * in[
i+1];
 
 1496         cost += in[
i+2] * in[
i+2];
 
 1497         cost += in[
i+3] * in[
i+3];
 
 1508                                      const float *scaled, 
int size, 
int scale_idx,
 
 1509                                      int cb, 
const float lambda, 
const float uplim,
 
 1510                                      int *
bits, 
float *energy)
 
 1518                                       const float *scaled, 
int size, 
int scale_idx,
 
 1519                                       int cb, 
const float lambda, 
const float uplim,
 
 1520                                       int *
bits, 
float *energy)
 
 1526     float qenergy = 0.0f;
 
 1527     int qc1, qc2, qc3, qc4;
 
 1533     for (
i = 0; 
i < 
size; 
i += 4) {
 
 1536         int   *in_int = (
int   *)&in[
i];
 
 1537         float *in_pos = (
float *)&in[
i];
 
 1538         float di0, di1, di2, di3;
 
 1548             ".set noreorder                             \n\t" 
 1550             "slt        %[qc1], $zero,  %[qc1]          \n\t" 
 1551             "slt        %[qc2], $zero,  %[qc2]          \n\t" 
 1552             "slt        %[qc3], $zero,  %[qc3]          \n\t" 
 1553             "slt        %[qc4], $zero,  %[qc4]          \n\t" 
 1554             "lw         %[t0],  0(%[in_int])            \n\t" 
 1555             "lw         %[t1],  4(%[in_int])            \n\t" 
 1556             "lw         %[t2],  8(%[in_int])            \n\t" 
 1557             "lw         %[t3],  12(%[in_int])           \n\t" 
 1558             "srl        %[t0],  %[t0],  31              \n\t" 
 1559             "srl        %[t1],  %[t1],  31              \n\t" 
 1560             "srl        %[t2],  %[t2],  31              \n\t" 
 1561             "srl        %[t3],  %[t3],  31              \n\t" 
 1562             "subu       %[t4],  $zero,  %[qc1]          \n\t" 
 1563             "subu       %[t5],  $zero,  %[qc2]          \n\t" 
 1564             "subu       %[t6],  $zero,  %[qc3]          \n\t" 
 1565             "subu       %[t7],  $zero,  %[qc4]          \n\t" 
 1566             "movn       %[qc1], %[t4],  %[t0]           \n\t" 
 1567             "movn       %[qc2], %[t5],  %[t1]           \n\t" 
 1568             "movn       %[qc3], %[t6],  %[t2]           \n\t" 
 1569             "movn       %[qc4], %[t7],  %[t3]           \n\t" 
 1573             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
 1574               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
 1577             : [in_int]
"r"(in_int)
 
 1590         curbits += p_bits[curidx];
 
 1591         vec     = &p_codes[curidx*4];
 
 1593         qenergy += vec[0]*vec[0] + vec[1]*vec[1]
 
 1594                 +  vec[2]*vec[2] + vec[3]*vec[3];
 
 1598             ".set noreorder                             \n\t" 
 1600             "lwc1       $f0,    0(%[in_pos])            \n\t" 
 1601             "lwc1       $f1,    0(%[vec])               \n\t" 
 1602             "lwc1       $f2,    4(%[in_pos])            \n\t" 
 1603             "lwc1       $f3,    4(%[vec])               \n\t" 
 1604             "lwc1       $f4,    8(%[in_pos])            \n\t" 
 1605             "lwc1       $f5,    8(%[vec])               \n\t" 
 1606             "lwc1       $f6,    12(%[in_pos])           \n\t" 
 1607             "lwc1       $f7,    12(%[vec])              \n\t" 
 1608             "nmsub.s    %[di0], $f0,    $f1,    %[IQ]   \n\t" 
 1609             "nmsub.s    %[di1], $f2,    $f3,    %[IQ]   \n\t" 
 1610             "nmsub.s    %[di2], $f4,    $f5,    %[IQ]   \n\t" 
 1611             "nmsub.s    %[di3], $f6,    $f7,    %[IQ]   \n\t" 
 1615             : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
 
 1616               [di2]
"=&f"(di2), [di3]
"=&f"(di3)
 
 1617             : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
 
 1619             : 
"$f0", 
"$f1", 
"$f2", 
"$f3",
 
 1620               "$f4", 
"$f5", 
"$f6", 
"$f7",
 
 1624         cost += di0 * di0 + di1 * di1
 
 1625                 + di2 * di2 + di3 * di3;
 
 1631         *energy = qenergy * (IQ*IQ);
 
 1632     return cost * 
lambda + curbits;
 
 1637                                       const float *scaled, 
int size, 
int scale_idx,
 
 1638                                       int cb, 
const float lambda, 
const float uplim,
 
 1639                                       int *
bits, 
float *energy)
 
 1645     float qenergy = 0.0f;
 
 1647     int qc1, qc2, qc3, qc4;
 
 1652     for (
i = 0; 
i < 
size; 
i += 4) {
 
 1655         float *in_pos = (
float *)&in[
i];
 
 1656         float di0, di1, di2, di3;
 
 1666             ".set noreorder                             \n\t" 
 1668             "ori        %[t4],  $zero,  2               \n\t" 
 1669             "slt        %[t0],  %[t4],  %[qc1]          \n\t" 
 1670             "slt        %[t1],  %[t4],  %[qc2]          \n\t" 
 1671             "slt        %[t2],  %[t4],  %[qc3]          \n\t" 
 1672             "slt        %[t3],  %[t4],  %[qc4]          \n\t" 
 1673             "movn       %[qc1], %[t4],  %[t0]           \n\t" 
 1674             "movn       %[qc2], %[t4],  %[t1]           \n\t" 
 1675             "movn       %[qc3], %[t4],  %[t2]           \n\t" 
 1676             "movn       %[qc4], %[t4],  %[t3]           \n\t" 
 1680             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
 1681               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
 1694         curbits += p_bits[curidx];
 
 1695         curbits += uquad_sign_bits[curidx];
 
 1696         vec     = &p_codes[curidx*4];
 
 1698         qenergy += vec[0]*vec[0] + vec[1]*vec[1]
 
 1699                 +  vec[2]*vec[2] + vec[3]*vec[3];
 
 1703             ".set noreorder                             \n\t" 
 1705             "lwc1       %[di0], 0(%[in_pos])            \n\t" 
 1706             "lwc1       %[di1], 4(%[in_pos])            \n\t" 
 1707             "lwc1       %[di2], 8(%[in_pos])            \n\t" 
 1708             "lwc1       %[di3], 12(%[in_pos])           \n\t" 
 1709             "abs.s      %[di0], %[di0]                  \n\t" 
 1710             "abs.s      %[di1], %[di1]                  \n\t" 
 1711             "abs.s      %[di2], %[di2]                  \n\t" 
 1712             "abs.s      %[di3], %[di3]                  \n\t" 
 1713             "lwc1       $f0,    0(%[vec])               \n\t" 
 1714             "lwc1       $f1,    4(%[vec])               \n\t" 
 1715             "lwc1       $f2,    8(%[vec])               \n\t" 
 1716             "lwc1       $f3,    12(%[vec])              \n\t" 
 1717             "nmsub.s    %[di0], %[di0], $f0,    %[IQ]   \n\t" 
 1718             "nmsub.s    %[di1], %[di1], $f1,    %[IQ]   \n\t" 
 1719             "nmsub.s    %[di2], %[di2], $f2,    %[IQ]   \n\t" 
 1720             "nmsub.s    %[di3], %[di3], $f3,    %[IQ]   \n\t" 
 1724             : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
 
 1725               [di2]
"=&f"(di2), [di3]
"=&f"(di3)
 
 1726             : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
 
 1728             : 
"$f0", 
"$f1", 
"$f2", 
"$f3",
 
 1732         cost += di0 * di0 + di1 * di1
 
 1733                 + di2 * di2 + di3 * di3;
 
 1739         *energy = qenergy * (IQ*IQ);
 
 1740     return cost * 
lambda + curbits;
 
 1745                                       const float *scaled, 
int size, 
int scale_idx,
 
 1746                                       int cb, 
const float lambda, 
const float uplim,
 
 1747                                       int *
bits, 
float *energy)
 
 1753     float qenergy = 0.0f;
 
 1754     int qc1, qc2, qc3, qc4;
 
 1760     for (
i = 0; 
i < 
size; 
i += 4) {
 
 1761         const float *vec, *vec2;
 
 1762         int curidx, curidx2;
 
 1763         int   *in_int = (
int   *)&in[
i];
 
 1764         float *in_pos = (
float *)&in[
i];
 
 1765         float di0, di1, di2, di3;
 
 1775             ".set noreorder                             \n\t" 
 1777             "ori        %[t4],  $zero,  4               \n\t" 
 1778             "slt        %[t0],  %[t4],  %[qc1]          \n\t" 
 1779             "slt        %[t1],  %[t4],  %[qc2]          \n\t" 
 1780             "slt        %[t2],  %[t4],  %[qc3]          \n\t" 
 1781             "slt        %[t3],  %[t4],  %[qc4]          \n\t" 
 1782             "movn       %[qc1], %[t4],  %[t0]           \n\t" 
 1783             "movn       %[qc2], %[t4],  %[t1]           \n\t" 
 1784             "movn       %[qc3], %[t4],  %[t2]           \n\t" 
 1785             "movn       %[qc4], %[t4],  %[t3]           \n\t" 
 1786             "lw         %[t0],  0(%[in_int])            \n\t" 
 1787             "lw         %[t1],  4(%[in_int])            \n\t" 
 1788             "lw         %[t2],  8(%[in_int])            \n\t" 
 1789             "lw         %[t3],  12(%[in_int])           \n\t" 
 1790             "srl        %[t0],  %[t0],  31              \n\t" 
 1791             "srl        %[t1],  %[t1],  31              \n\t" 
 1792             "srl        %[t2],  %[t2],  31              \n\t" 
 1793             "srl        %[t3],  %[t3],  31              \n\t" 
 1794             "subu       %[t4],  $zero,  %[qc1]          \n\t" 
 1795             "subu       %[t5],  $zero,  %[qc2]          \n\t" 
 1796             "subu       %[t6],  $zero,  %[qc3]          \n\t" 
 1797             "subu       %[t7],  $zero,  %[qc4]          \n\t" 
 1798             "movn       %[qc1], %[t4],  %[t0]           \n\t" 
 1799             "movn       %[qc2], %[t5],  %[t1]           \n\t" 
 1800             "movn       %[qc3], %[t6],  %[t2]           \n\t" 
 1801             "movn       %[qc4], %[t7],  %[t3]           \n\t" 
 1805             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
 1806               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
 1809             : [in_int]
"r"(in_int)
 
 1817         curidx2 += qc4 + 40;
 
 1819         curbits += p_bits[curidx];
 
 1820         curbits += p_bits[curidx2];
 
 1822         vec     = &p_codes[curidx*2];
 
 1823         vec2    = &p_codes[curidx2*2];
 
 1825         qenergy += vec[0]*vec[0] + vec[1]*vec[1]
 
 1826                 +  vec2[0]*vec2[0] + vec2[1]*vec2[1];
 
 1830             ".set noreorder                             \n\t" 
 1832             "lwc1       $f0,    0(%[in_pos])            \n\t" 
 1833             "lwc1       $f1,    0(%[vec])               \n\t" 
 1834             "lwc1       $f2,    4(%[in_pos])            \n\t" 
 1835             "lwc1       $f3,    4(%[vec])               \n\t" 
 1836             "lwc1       $f4,    8(%[in_pos])            \n\t" 
 1837             "lwc1       $f5,    0(%[vec2])              \n\t" 
 1838             "lwc1       $f6,    12(%[in_pos])           \n\t" 
 1839             "lwc1       $f7,    4(%[vec2])              \n\t" 
 1840             "nmsub.s    %[di0], $f0,    $f1,    %[IQ]   \n\t" 
 1841             "nmsub.s    %[di1], $f2,    $f3,    %[IQ]   \n\t" 
 1842             "nmsub.s    %[di2], $f4,    $f5,    %[IQ]   \n\t" 
 1843             "nmsub.s    %[di3], $f6,    $f7,    %[IQ]   \n\t" 
 1847             : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
 
 1848               [di2]
"=&f"(di2), [di3]
"=&f"(di3)
 
 1849             : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
 
 1850               [vec2]
"r"(vec2), [IQ]
"f"(IQ)
 
 1851             : 
"$f0", 
"$f1", 
"$f2", 
"$f3",
 
 1852               "$f4", 
"$f5", 
"$f6", 
"$f7",
 
 1856         cost += di0 * di0 + di1 * di1
 
 1857                 + di2 * di2 + di3 * di3;
 
 1863         *energy = qenergy * (IQ*IQ);
 
 1864     return cost * 
lambda + curbits;
 
 1869                                        const float *scaled, 
int size, 
int scale_idx,
 
 1870                                        int cb, 
const float lambda, 
const float uplim,
 
 1871                                        int *
bits, 
float *energy)
 
 1877     float qenergy = 0.0f;
 
 1878     int qc1, qc2, qc3, qc4;
 
 1884     for (
i = 0; 
i < 
size; 
i += 4) {
 
 1885         const float *vec, *vec2;
 
 1886         int curidx, curidx2, sign1, count1, sign2, count2;
 
 1887         int   *in_int = (
int   *)&in[
i];
 
 1888         float *in_pos = (
float *)&in[
i];
 
 1889         float di0, di1, di2, di3;
 
 1899             ".set noreorder                                     \n\t" 
 1901             "ori        %[t4],      $zero,      7               \n\t" 
 1902             "ori        %[sign1],   $zero,      0               \n\t" 
 1903             "ori        %[sign2],   $zero,      0               \n\t" 
 1904             "slt        %[t0],      %[t4],      %[qc1]          \n\t" 
 1905             "slt        %[t1],      %[t4],      %[qc2]          \n\t" 
 1906             "slt        %[t2],      %[t4],      %[qc3]          \n\t" 
 1907             "slt        %[t3],      %[t4],      %[qc4]          \n\t" 
 1908             "movn       %[qc1],     %[t4],      %[t0]           \n\t" 
 1909             "movn       %[qc2],     %[t4],      %[t1]           \n\t" 
 1910             "movn       %[qc3],     %[t4],      %[t2]           \n\t" 
 1911             "movn       %[qc4],     %[t4],      %[t3]           \n\t" 
 1912             "lw         %[t0],      0(%[in_int])                \n\t" 
 1913             "lw         %[t1],      4(%[in_int])                \n\t" 
 1914             "lw         %[t2],      8(%[in_int])                \n\t" 
 1915             "lw         %[t3],      12(%[in_int])               \n\t" 
 1916             "slt        %[t0],      %[t0],      $zero           \n\t" 
 1917             "movn       %[sign1],   %[t0],      %[qc1]          \n\t" 
 1918             "slt        %[t2],      %[t2],      $zero           \n\t" 
 1919             "movn       %[sign2],   %[t2],      %[qc3]          \n\t" 
 1920             "slt        %[t1],      %[t1],      $zero           \n\t" 
 1921             "sll        %[t0],      %[sign1],   1               \n\t" 
 1922             "or         %[t0],      %[t0],      %[t1]           \n\t" 
 1923             "movn       %[sign1],   %[t0],      %[qc2]          \n\t" 
 1924             "slt        %[t3],      %[t3],      $zero           \n\t" 
 1925             "sll        %[t0],      %[sign2],   1               \n\t" 
 1926             "or         %[t0],      %[t0],      %[t3]           \n\t" 
 1927             "movn       %[sign2],   %[t0],      %[qc4]          \n\t" 
 1928             "slt        %[count1],  $zero,      %[qc1]          \n\t" 
 1929             "slt        %[t1],      $zero,      %[qc2]          \n\t" 
 1930             "slt        %[count2],  $zero,      %[qc3]          \n\t" 
 1931             "slt        %[t2],      $zero,      %[qc4]          \n\t" 
 1932             "addu       %[count1],  %[count1],  %[t1]           \n\t" 
 1933             "addu       %[count2],  %[count2],  %[t2]           \n\t" 
 1937             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
 1938               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
 1939               [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
 
 1940               [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
 
 1943             : [in_int]
"r"(in_int)
 
 1953         curbits += p_bits[curidx];
 
 1954         curbits += upair7_sign_bits[curidx];
 
 1955         vec     = &p_codes[curidx*2];
 
 1957         curbits += p_bits[curidx2];
 
 1958         curbits += upair7_sign_bits[curidx2];
 
 1959         vec2    = &p_codes[curidx2*2];
 
 1961         qenergy += vec[0]*vec[0] + vec[1]*vec[1]
 
 1962                 +  vec2[0]*vec2[0] + vec2[1]*vec2[1];
 
 1966             ".set noreorder                                     \n\t" 
 1968             "lwc1       %[di0],     0(%[in_pos])                \n\t" 
 1969             "lwc1       %[di1],     4(%[in_pos])                \n\t" 
 1970             "lwc1       %[di2],     8(%[in_pos])                \n\t" 
 1971             "lwc1       %[di3],     12(%[in_pos])               \n\t" 
 1972             "abs.s      %[di0],     %[di0]                      \n\t" 
 1973             "abs.s      %[di1],     %[di1]                      \n\t" 
 1974             "abs.s      %[di2],     %[di2]                      \n\t" 
 1975             "abs.s      %[di3],     %[di3]                      \n\t" 
 1976             "lwc1       $f0,        0(%[vec])                   \n\t" 
 1977             "lwc1       $f1,        4(%[vec])                   \n\t" 
 1978             "lwc1       $f2,        0(%[vec2])                  \n\t" 
 1979             "lwc1       $f3,        4(%[vec2])                  \n\t" 
 1980             "nmsub.s    %[di0],     %[di0],     $f0,    %[IQ]   \n\t" 
 1981             "nmsub.s    %[di1],     %[di1],     $f1,    %[IQ]   \n\t" 
 1982             "nmsub.s    %[di2],     %[di2],     $f2,    %[IQ]   \n\t" 
 1983             "nmsub.s    %[di3],     %[di3],     $f3,    %[IQ]   \n\t" 
 1987             : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
 
 1988               [di2]
"=&f"(di2), [di3]
"=&f"(di3)
 
 1989             : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
 
 1990               [vec2]
"r"(vec2), [IQ]
"f"(IQ)
 
 1991             : 
"$f0", 
"$f1", 
"$f2", 
"$f3",
 
 1995         cost += di0 * di0 + di1 * di1
 
 1996                 + di2 * di2 + di3 * di3;
 
 2002         *energy = qenergy * (IQ*IQ);
 
 2003     return cost * 
lambda + curbits;
 
 2008                                         const float *scaled, 
int size, 
int scale_idx,
 
 2009                                         int cb, 
const float lambda, 
const float uplim,
 
 2010                                         int *
bits, 
float *energy)
 
 2016     float qenergy = 0.0f;
 
 2017     int qc1, qc2, qc3, qc4;
 
 2023     for (
i = 0; 
i < 
size; 
i += 4) {
 
 2024         const float *vec, *vec2;
 
 2025         int curidx, curidx2;
 
 2026         int sign1, count1, sign2, count2;
 
 2027         int   *in_int = (
int   *)&in[
i];
 
 2028         float *in_pos = (
float *)&in[
i];
 
 2029         float di0, di1, di2, di3;
 
 2039             ".set noreorder                                     \n\t" 
 2041             "ori        %[t4],      $zero,      12              \n\t" 
 2042             "ori        %[sign1],   $zero,      0               \n\t" 
 2043             "ori        %[sign2],   $zero,      0               \n\t" 
 2044             "slt        %[t0],      %[t4],      %[qc1]          \n\t" 
 2045             "slt        %[t1],      %[t4],      %[qc2]          \n\t" 
 2046             "slt        %[t2],      %[t4],      %[qc3]          \n\t" 
 2047             "slt        %[t3],      %[t4],      %[qc4]          \n\t" 
 2048             "movn       %[qc1],     %[t4],      %[t0]           \n\t" 
 2049             "movn       %[qc2],     %[t4],      %[t1]           \n\t" 
 2050             "movn       %[qc3],     %[t4],      %[t2]           \n\t" 
 2051             "movn       %[qc4],     %[t4],      %[t3]           \n\t" 
 2052             "lw         %[t0],      0(%[in_int])                \n\t" 
 2053             "lw         %[t1],      4(%[in_int])                \n\t" 
 2054             "lw         %[t2],      8(%[in_int])                \n\t" 
 2055             "lw         %[t3],      12(%[in_int])               \n\t" 
 2056             "slt        %[t0],      %[t0],      $zero           \n\t" 
 2057             "movn       %[sign1],   %[t0],      %[qc1]          \n\t" 
 2058             "slt        %[t2],      %[t2],      $zero           \n\t" 
 2059             "movn       %[sign2],   %[t2],      %[qc3]          \n\t" 
 2060             "slt        %[t1],      %[t1],      $zero           \n\t" 
 2061             "sll        %[t0],      %[sign1],   1               \n\t" 
 2062             "or         %[t0],      %[t0],      %[t1]           \n\t" 
 2063             "movn       %[sign1],   %[t0],      %[qc2]          \n\t" 
 2064             "slt        %[t3],      %[t3],      $zero           \n\t" 
 2065             "sll        %[t0],      %[sign2],   1               \n\t" 
 2066             "or         %[t0],      %[t0],      %[t3]           \n\t" 
 2067             "movn       %[sign2],   %[t0],      %[qc4]          \n\t" 
 2068             "slt        %[count1],  $zero,      %[qc1]          \n\t" 
 2069             "slt        %[t1],      $zero,      %[qc2]          \n\t" 
 2070             "slt        %[count2],  $zero,      %[qc3]          \n\t" 
 2071             "slt        %[t2],      $zero,      %[qc4]          \n\t" 
 2072             "addu       %[count1],  %[count1],  %[t1]           \n\t" 
 2073             "addu       %[count2],  %[count2],  %[t2]           \n\t" 
 2077             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
 2078               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
 2079               [sign1]
"=&r"(sign1), [count1]
"=&r"(count1),
 
 2080               [sign2]
"=&r"(sign2), [count2]
"=&r"(count2),
 
 2083             : [in_int]
"r"(in_int)
 
 2093         curbits += p_bits[curidx];
 
 2094         curbits += p_bits[curidx2];
 
 2095         curbits += upair12_sign_bits[curidx];
 
 2096         curbits += upair12_sign_bits[curidx2];
 
 2097         vec     = &p_codes[curidx*2];
 
 2098         vec2    = &p_codes[curidx2*2];
 
 2100         qenergy += vec[0]*vec[0] + vec[1]*vec[1]
 
 2101                 +  vec2[0]*vec2[0] + vec2[1]*vec2[1];
 
 2105             ".set noreorder                                     \n\t" 
 2107             "lwc1       %[di0],     0(%[in_pos])                \n\t" 
 2108             "lwc1       %[di1],     4(%[in_pos])                \n\t" 
 2109             "lwc1       %[di2],     8(%[in_pos])                \n\t" 
 2110             "lwc1       %[di3],     12(%[in_pos])               \n\t" 
 2111             "abs.s      %[di0],     %[di0]                      \n\t" 
 2112             "abs.s      %[di1],     %[di1]                      \n\t" 
 2113             "abs.s      %[di2],     %[di2]                      \n\t" 
 2114             "abs.s      %[di3],     %[di3]                      \n\t" 
 2115             "lwc1       $f0,        0(%[vec])                   \n\t" 
 2116             "lwc1       $f1,        4(%[vec])                   \n\t" 
 2117             "lwc1       $f2,        0(%[vec2])                  \n\t" 
 2118             "lwc1       $f3,        4(%[vec2])                  \n\t" 
 2119             "nmsub.s    %[di0],     %[di0],     $f0,    %[IQ]   \n\t" 
 2120             "nmsub.s    %[di1],     %[di1],     $f1,    %[IQ]   \n\t" 
 2121             "nmsub.s    %[di2],     %[di2],     $f2,    %[IQ]   \n\t" 
 2122             "nmsub.s    %[di3],     %[di3],     $f3,    %[IQ]   \n\t" 
 2126             : [di0]
"=&f"(di0), [di1]
"=&f"(di1),
 
 2127               [di2]
"=&f"(di2), [di3]
"=&f"(di3)
 
 2128             : [in_pos]
"r"(in_pos), [vec]
"r"(vec),
 
 2129               [vec2]
"r"(vec2), [IQ]
"f"(IQ)
 
 2130             : 
"$f0", 
"$f1", 
"$f2", 
"$f3",
 
 2134         cost += di0 * di0 + di1 * di1
 
 2135                 + di2 * di2 + di3 * di3;
 
 2141         *energy = qenergy * (IQ*IQ);
 
 2142     return cost * 
lambda + curbits;
 
 2147                                     const float *scaled, 
int size, 
int scale_idx,
 
 2148                                     int cb, 
const float lambda, 
const float uplim,
 
 2149                                     int *
bits, 
float *energy)
 
 2153     const float CLIPPED_ESCAPE = 165140.0f * IQ;
 
 2156     float qenergy = 0.0f;
 
 2157     int qc1, qc2, qc3, qc4;
 
 2163     for (
i = 0; 
i < 
size; 
i += 4) {
 
 2164         const float *vec, *vec2;
 
 2165         int curidx, curidx2;
 
 2167         float di1, di2, di3, di4;
 
 2168         int cond0, cond1, cond2, cond3;
 
 2179             ".set noreorder                             \n\t" 
 2181             "ori        %[t6],      $zero,  15          \n\t" 
 2182             "ori        %[t7],      $zero,  16          \n\t" 
 2183             "shll_s.w   %[c1],      %[qc1], 18          \n\t" 
 2184             "shll_s.w   %[c2],      %[qc2], 18          \n\t" 
 2185             "shll_s.w   %[c3],      %[qc3], 18          \n\t" 
 2186             "shll_s.w   %[c4],      %[qc4], 18          \n\t" 
 2187             "srl        %[c1],      %[c1],  18          \n\t" 
 2188             "srl        %[c2],      %[c2],  18          \n\t" 
 2189             "srl        %[c3],      %[c3],  18          \n\t" 
 2190             "srl        %[c4],      %[c4],  18          \n\t" 
 2191             "slt        %[cond0],   %[t6],  %[qc1]      \n\t" 
 2192             "slt        %[cond1],   %[t6],  %[qc2]      \n\t" 
 2193             "slt        %[cond2],   %[t6],  %[qc3]      \n\t" 
 2194             "slt        %[cond3],   %[t6],  %[qc4]      \n\t" 
 2195             "movn       %[qc1],     %[t7],  %[cond0]    \n\t" 
 2196             "movn       %[qc2],     %[t7],  %[cond1]    \n\t" 
 2197             "movn       %[qc3],     %[t7],  %[cond2]    \n\t" 
 2198             "movn       %[qc4],     %[t7],  %[cond3]    \n\t" 
 2202             : [qc1]
"+r"(qc1), [qc2]
"+r"(qc2),
 
 2203               [qc3]
"+r"(qc3), [qc4]
"+r"(qc4),
 
 2204               [cond0]
"=&r"(cond0), [cond1]
"=&r"(cond1),
 
 2205               [cond2]
"=&r"(cond2), [cond3]
"=&r"(cond3),
 
 2207               [c3]
"=&r"(c3), [c4]
"=&r"(c4),
 
 2217         curbits += p_bits[curidx];
 
 2218         curbits += esc_sign_bits[curidx];
 
 2219         vec     = &p_codes[curidx*2];
 
 2221         curbits += p_bits[curidx2];
 
 2222         curbits += esc_sign_bits[curidx2];
 
 2223         vec2     = &p_codes[curidx2*2];
 
 2225         curbits += (
av_log2(
c1) * 2 - 3) & (-cond0);
 
 2226         curbits += (
av_log2(
c2) * 2 - 3) & (-cond1);
 
 2227         curbits += (
av_log2(c3) * 2 - 3) & (-cond2);
 
 2228         curbits += (
av_log2(c4) * 2 - 3) & (-cond3);
 
 2236             if (
t1 >= CLIPPED_ESCAPE) {
 
 2237                 di1 = 
t1 - CLIPPED_ESCAPE;
 
 2238                 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
 
 2244             di1 = 
t1 - (
V = vec[0] * IQ);
 
 2249             if (
t2 >= CLIPPED_ESCAPE) {
 
 2250                 di2 = 
t2 - CLIPPED_ESCAPE;
 
 2251                 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
 
 2257             di2 = 
t2 - (
V = vec[1] * IQ);
 
 2262             if (
t3 >= CLIPPED_ESCAPE) {
 
 2263                 di3 = 
t3 - CLIPPED_ESCAPE;
 
 2264                 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
 
 2266                 di3 = 
t3 - (
V = c3 * 
cbrtf(c3) * IQ);
 
 2270             di3 = 
t3 - (
V = vec2[0] * IQ);
 
 2275             if (
t4 >= CLIPPED_ESCAPE) {
 
 2276                 di4 = 
t4 - CLIPPED_ESCAPE;
 
 2277                 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
 
 2279                 di4 = 
t4 - (
V = c4 * 
cbrtf(c4) * IQ);
 
 2283             di4 = 
t4 - (
V = vec2[1]*IQ);
 
 2287         cost += di1 * di1 + di2 * di2
 
 2288                 + di3 * di3 + di4 * di4;
 
 2293     return cost * 
lambda + curbits;
 
 2296 static float (*
const get_band_cost_arr[])(
struct AACEncContext *
s,
 
 2298                                           const float *scaled, 
int size, 
int scale_idx,
 
 2299                                           int cb, 
const float lambda, 
const float uplim,
 
 2300                                           int *
bits, 
float *energy) = {
 
 2301     get_band_cost_ZERO_mips,
 
 2302     get_band_cost_SQUAD_mips,
 
 2303     get_band_cost_SQUAD_mips,
 
 2304     get_band_cost_UQUAD_mips,
 
 2305     get_band_cost_UQUAD_mips,
 
 2306     get_band_cost_SPAIR_mips,
 
 2307     get_band_cost_SPAIR_mips,
 
 2308     get_band_cost_UPAIR7_mips,
 
 2309     get_band_cost_UPAIR7_mips,
 
 2310     get_band_cost_UPAIR12_mips,
 
 2311     get_band_cost_UPAIR12_mips,
 
 2312     get_band_cost_ESC_mips,
 
 2313     get_band_cost_NONE_mips, 
 
 2314     get_band_cost_ZERO_mips,
 
 2315     get_band_cost_ZERO_mips,
 
 2316     get_band_cost_ZERO_mips,
 
 2319 #define get_band_cost(                                  \ 
 2320                                 s, pb, in, scaled, size, scale_idx, cb, \ 
 2321                                 lambda, uplim, bits, energy)            \ 
 2322     get_band_cost_arr[cb](                              \ 
 2323                                 s, pb, in, scaled, size, scale_idx, cb, \ 
 2324                                 lambda, uplim, bits, energy) 
 2327                                 const float *scaled, 
int size, 
int scale_idx,
 
 2328                                 int cb, 
const float lambda, 
const float uplim,
 
 2329                                 int *
bits, 
float *energy, 
int rtz)
 
 2331     return get_band_cost(
s, 
NULL, in, scaled, 
size, scale_idx, 
cb, 
lambda, uplim, 
bits, energy);
 
 2340     int start = 0, 
i, 
w, w2, 
g, sid_sf_boost, prev_mid, prev_side;
 
 2341     uint8_t nextband0[128], nextband1[128];
 
 2342     float M[128], 
S[128];
 
 2343     float *L34 = 
s->scoefs, *R34 = 
s->scoefs + 128, *M34 = 
s->scoefs + 128*2, *S34 = 
s->scoefs + 128*3;
 
 2344     const float lambda = 
s->lambda;
 
 2355     prev_mid = sce0->
sf_idx[0];
 
 2356     prev_side = sce1->
sf_idx[0];
 
 2364                 float Mmax = 0.0f, Smax = 0.0f;
 
 2370                               + sce1->
coeffs[start+(
w+w2)*128+
i]) * 0.5;
 
 2372                               - sce1->
coeffs[start+(
w+w2)*128+
i];
 
 2377                         Mmax = 
FFMAX(Mmax, M34[
i]);
 
 2378                         Smax = 
FFMAX(Smax, S34[
i]);
 
 2382                 for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
 
 2383                     float dist1 = 0.0f, dist2 = 0.0f;
 
 2403                     midcb = 
FFMAX(1,midcb);
 
 2404                     sidcb = 
FFMAX(1,sidcb);
 
 2407                         FFPsyBand *band0 = &
s->psy.ch[
s->cur_channel+0].psy_bands[(
w+w2)*16+
g];
 
 2408                         FFPsyBand *band1 = &
s->psy.ch[
s->cur_channel+1].psy_bands[(
w+w2)*16+
g];
 
 2413                                   + sce1->
coeffs[start+(
w+w2)*128+
i]) * 0.5;
 
 2415                                   - sce1->
coeffs[start+(
w+w2)*128+
i];
 
 2463                     } 
else if (
B1 > 
B0) {
 
 2486 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6 
 2488     int option = 
c->options.coder;