00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 
00040 
00041 
00042 
00043 
00044 
00045 
00046 
00047 
00048 
00049 
00050 #include "config.h"
00051 #include "libavcodec/fft.h"
00052 #include "fft_table.h"
00053 
00058 #if HAVE_INLINE_ASM
00059 static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z)
00060 {
00061     int nbits, i, n, num_transforms, offset, step;
00062     int n4, n2, n34;
00063     FFTSample tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
00064     FFTComplex *tmpz;
00065     float w_re, w_im;
00066     float *w_re_ptr, *w_im_ptr;
00067     const int fft_size = (1 << s->nbits);
00068     int s_n = s->nbits;
00069     int tem1, tem2;
00070     float pom,  pom1,  pom2,  pom3;
00071     float temp, temp1, temp3, temp4;
00072     FFTComplex * tmpz_n2, * tmpz_n34, * tmpz_n4;
00073     FFTComplex * tmpz_n2_i, * tmpz_n34_i, * tmpz_n4_i, * tmpz_i;
00074 
00078     __asm__ volatile (
00079         "li   %[tem1], 16                                      \n\t"
00080         "sub  %[s_n],  %[tem1], %[s_n]                         \n\t"
00081         "li   %[tem2], 10923                                   \n\t"
00082         "srav %[tem2], %[tem2], %[s_n]                         \n\t"
00083         "ori  %[num_t],%[tem2], 1                              \n\t"
00084         : [num_t]"=r"(num_transforms), [s_n]"+r"(s_n),
00085           [tem1]"=&r"(tem1), [tem2]"=&r"(tem2)
00086     );
00087 
00088 
00089     for (n=0; n<num_transforms; n++) {
00090         offset = fft_offsets_lut[n] << 2;
00091         tmpz = z + offset;
00092 
00093         tmp1 = tmpz[0].re + tmpz[1].re;
00094         tmp5 = tmpz[2].re + tmpz[3].re;
00095         tmp2 = tmpz[0].im + tmpz[1].im;
00096         tmp6 = tmpz[2].im + tmpz[3].im;
00097         tmp3 = tmpz[0].re - tmpz[1].re;
00098         tmp8 = tmpz[2].im - tmpz[3].im;
00099         tmp4 = tmpz[0].im - tmpz[1].im;
00100         tmp7 = tmpz[2].re - tmpz[3].re;
00101 
00102         tmpz[0].re = tmp1 + tmp5;
00103         tmpz[2].re = tmp1 - tmp5;
00104         tmpz[0].im = tmp2 + tmp6;
00105         tmpz[2].im = tmp2 - tmp6;
00106         tmpz[1].re = tmp3 + tmp8;
00107         tmpz[3].re = tmp3 - tmp8;
00108         tmpz[1].im = tmp4 - tmp7;
00109         tmpz[3].im = tmp4 + tmp7;
00110 
00111     }
00112 
00113     if (fft_size < 8)
00114         return;
00115 
00116     num_transforms = (num_transforms >> 1) | 1;
00117 
00118     for (n=0; n<num_transforms; n++) {
00119         offset = fft_offsets_lut[n] << 3;
00120         tmpz = z + offset;
00121 
00122         __asm__ volatile (
00123             "lwc1  %[tmp1], 32(%[tmpz])                     \n\t"
00124             "lwc1  %[pom],  40(%[tmpz])                     \n\t"
00125             "lwc1  %[tmp3], 48(%[tmpz])                     \n\t"
00126             "lwc1  %[pom1], 56(%[tmpz])                     \n\t"
00127             "lwc1  %[tmp2], 36(%[tmpz])                     \n\t"
00128             "lwc1  %[pom2], 44(%[tmpz])                     \n\t"
00129             "lwc1  %[pom3], 60(%[tmpz])                     \n\t"
00130             "lwc1  %[tmp4], 52(%[tmpz])                     \n\t"
00131             "add.s %[tmp1], %[tmp1],    %[pom]              \n\t"  
00132             "add.s %[tmp3], %[tmp3],    %[pom1]             \n\t"  
00133             "add.s %[tmp2], %[tmp2],    %[pom2]             \n\t"  
00134             "lwc1  %[pom],  40(%[tmpz])                     \n\t"
00135             "add.s %[tmp4], %[tmp4],    %[pom3]             \n\t"  
00136             "add.s %[tmp5], %[tmp1],    %[tmp3]             \n\t"  
00137             "sub.s %[tmp7], %[tmp1],    %[tmp3]             \n\t"  
00138             "lwc1  %[tmp1], 32(%[tmpz])                     \n\t"
00139             "lwc1  %[pom1], 44(%[tmpz])                     \n\t"
00140             "add.s %[tmp6], %[tmp2],    %[tmp4]             \n\t"  
00141             "sub.s %[tmp8], %[tmp2],    %[tmp4]             \n\t"  
00142             "lwc1  %[tmp2], 36(%[tmpz])                     \n\t"
00143             "lwc1  %[pom2], 56(%[tmpz])                     \n\t"
00144             "lwc1  %[pom3], 60(%[tmpz])                     \n\t"
00145             "lwc1  %[tmp3], 48(%[tmpz])                     \n\t"
00146             "lwc1  %[tmp4], 52(%[tmpz])                     \n\t"
00147             "sub.s %[tmp1], %[tmp1],    %[pom]              \n\t"  
00148             "lwc1  %[pom],  0(%[tmpz])                      \n\t"
00149             "sub.s %[tmp2], %[tmp2],    %[pom1]             \n\t"  
00150             "sub.s %[tmp3], %[tmp3],    %[pom2]             \n\t"  
00151             "lwc1  %[pom2], 4(%[tmpz])                      \n\t"
00152             "sub.s %[pom1], %[pom],     %[tmp5]             \n\t"
00153             "sub.s %[tmp4], %[tmp4],    %[pom3]             \n\t"  
00154             "add.s %[pom3], %[pom],     %[tmp5]             \n\t"
00155             "sub.s %[pom],  %[pom2],    %[tmp6]             \n\t"
00156             "add.s %[pom2], %[pom2],    %[tmp6]             \n\t"
00157             "swc1  %[pom1], 32(%[tmpz])                     \n\t"  
00158             "swc1  %[pom3], 0(%[tmpz])                      \n\t"  
00159             "swc1  %[pom],  36(%[tmpz])                     \n\t"  
00160             "swc1  %[pom2], 4(%[tmpz])                      \n\t"  
00161             "lwc1  %[pom1], 16(%[tmpz])                     \n\t"
00162             "lwc1  %[pom3], 20(%[tmpz])                     \n\t"
00163             "li.s  %[pom],  0.7071067812                    \n\t"  
00164             "add.s %[temp1],%[tmp1],    %[tmp2]             \n\t"
00165             "sub.s %[temp], %[pom1],    %[tmp8]             \n\t"
00166             "add.s %[pom2], %[pom3],    %[tmp7]             \n\t"
00167             "sub.s %[temp3],%[tmp3],    %[tmp4]             \n\t"
00168             "sub.s %[temp4],%[tmp2],    %[tmp1]             \n\t"
00169             "swc1  %[temp], 48(%[tmpz])                     \n\t"  
00170             "swc1  %[pom2], 52(%[tmpz])                     \n\t"  
00171             "add.s %[pom1], %[pom1],    %[tmp8]             \n\t"
00172             "sub.s %[pom3], %[pom3],    %[tmp7]             \n\t"
00173             "add.s %[tmp3], %[tmp3],    %[tmp4]             \n\t"
00174             "mul.s %[tmp5], %[pom],     %[temp1]            \n\t"  
00175             "mul.s %[tmp7], %[pom],     %[temp3]            \n\t"  
00176             "mul.s %[tmp6], %[pom],     %[temp4]            \n\t"  
00177             "mul.s %[tmp8], %[pom],     %[tmp3]             \n\t"  
00178             "swc1  %[pom1], 16(%[tmpz])                     \n\t"  
00179             "swc1  %[pom3], 20(%[tmpz])                     \n\t"  
00180             "add.s %[tmp1], %[tmp5],    %[tmp7]             \n\t"  
00181             "sub.s %[tmp3], %[tmp5],    %[tmp7]             \n\t"  
00182             "add.s %[tmp2], %[tmp6],    %[tmp8]             \n\t"  
00183             "sub.s %[tmp4], %[tmp6],    %[tmp8]             \n\t"  
00184             "lwc1  %[temp], 8(%[tmpz])                      \n\t"
00185             "lwc1  %[temp1],12(%[tmpz])                     \n\t"
00186             "lwc1  %[pom],  24(%[tmpz])                     \n\t"
00187             "lwc1  %[pom2], 28(%[tmpz])                     \n\t"
00188             "sub.s %[temp4],%[temp],    %[tmp1]             \n\t"
00189             "sub.s %[temp3],%[temp1],   %[tmp2]             \n\t"
00190             "add.s %[temp], %[temp],    %[tmp1]             \n\t"
00191             "add.s %[temp1],%[temp1],   %[tmp2]             \n\t"
00192             "sub.s %[pom1], %[pom],     %[tmp4]             \n\t"
00193             "add.s %[pom3], %[pom2],    %[tmp3]             \n\t"
00194             "add.s %[pom],  %[pom],     %[tmp4]             \n\t"
00195             "sub.s %[pom2], %[pom2],    %[tmp3]             \n\t"
00196             "swc1  %[temp4],40(%[tmpz])                     \n\t"  
00197             "swc1  %[temp3],44(%[tmpz])                     \n\t"  
00198             "swc1  %[temp], 8(%[tmpz])                      \n\t"  
00199             "swc1  %[temp1],12(%[tmpz])                     \n\t"  
00200             "swc1  %[pom1], 56(%[tmpz])                     \n\t"  
00201             "swc1  %[pom3], 60(%[tmpz])                     \n\t"  
00202             "swc1  %[pom],  24(%[tmpz])                     \n\t"  
00203             "swc1  %[pom2], 28(%[tmpz])                     \n\t"  
00204             : [tmp1]"=&f"(tmp1), [pom]"=&f"(pom),   [pom1]"=&f"(pom1), [pom2]"=&f"(pom2),
00205               [tmp3]"=&f"(tmp3), [tmp2]"=&f"(tmp2), [tmp4]"=&f"(tmp4), [tmp5]"=&f"(tmp5),  [tmp7]"=&f"(tmp7),
00206               [tmp6]"=&f"(tmp6), [tmp8]"=&f"(tmp8), [pom3]"=&f"(pom3),[temp]"=&f"(temp), [temp1]"=&f"(temp1),
00207               [temp3]"=&f"(temp3), [temp4]"=&f"(temp4)
00208             : [tmpz]"r"(tmpz)
00209             : "memory"
00210         );
00211     }
00212 
00213     step = 1 << (MAX_LOG2_NFFT - 4);
00214     n4 = 4;
00215 
00216     for (nbits=4; nbits<=s->nbits; nbits++) {
00217         
00218 
00219 
00220         __asm__ volatile (
00221             "sra %[num_t], %[num_t], 1               \n\t"
00222             "ori %[num_t], %[num_t], 1               \n\t"
00223 
00224             : [num_t] "+r" (num_transforms)
00225         );
00226         n2  = 2 * n4;
00227         n34 = 3 * n4;
00228 
00229         for (n=0; n<num_transforms; n++) {
00230             offset = fft_offsets_lut[n] << nbits;
00231             tmpz = z + offset;
00232 
00233             tmpz_n2  = tmpz +  n2;
00234             tmpz_n4  = tmpz +  n4;
00235             tmpz_n34 = tmpz +  n34;
00236 
00237             __asm__ volatile (
00238                 "lwc1  %[pom1], 0(%[tmpz_n2])            \n\t"
00239                 "lwc1  %[pom],  0(%[tmpz_n34])           \n\t"
00240                 "lwc1  %[pom2], 4(%[tmpz_n2])            \n\t"
00241                 "lwc1  %[pom3], 4(%[tmpz_n34])           \n\t"
00242                 "lwc1  %[temp1],0(%[tmpz])               \n\t"
00243                 "lwc1  %[temp3],4(%[tmpz])               \n\t"
00244                 "add.s %[tmp5], %[pom1],      %[pom]     \n\t"   
00245                 "sub.s %[tmp1], %[pom1],      %[pom]     \n\t"   
00246                 "add.s %[tmp6], %[pom2],      %[pom3]    \n\t"   
00247                 "sub.s %[tmp2], %[pom2],      %[pom3]    \n\t"   
00248                 "sub.s %[temp], %[temp1],     %[tmp5]    \n\t"
00249                 "add.s %[temp1],%[temp1],     %[tmp5]    \n\t"
00250                 "sub.s %[temp4],%[temp3],     %[tmp6]    \n\t"
00251                 "add.s %[temp3],%[temp3],     %[tmp6]    \n\t"
00252                 "swc1  %[temp], 0(%[tmpz_n2])            \n\t"   
00253                 "swc1  %[temp1],0(%[tmpz])               \n\t"   
00254                 "lwc1  %[pom1], 0(%[tmpz_n4])            \n\t"
00255                 "swc1  %[temp4],4(%[tmpz_n2])            \n\t"   
00256                 "lwc1  %[temp], 4(%[tmpz_n4])            \n\t"
00257                 "swc1  %[temp3],4(%[tmpz])               \n\t"   
00258                 "sub.s %[pom],  %[pom1],      %[tmp2]    \n\t"
00259                 "add.s %[pom1], %[pom1],      %[tmp2]    \n\t"
00260                 "add.s %[temp1],%[temp],      %[tmp1]    \n\t"
00261                 "sub.s %[temp], %[temp],      %[tmp1]    \n\t"
00262                 "swc1  %[pom],  0(%[tmpz_n34])           \n\t"   
00263                 "swc1  %[pom1], 0(%[tmpz_n4])            \n\t"   
00264                 "swc1  %[temp1],4(%[tmpz_n34])           \n\t"   
00265                 "swc1  %[temp], 4(%[tmpz_n4])            \n\t"   
00266                 : [tmp5]"=&f"(tmp5),
00267                   [tmp1]"=&f"(tmp1), [pom]"=&f"(pom),        [pom1]"=&f"(pom1),        [pom2]"=&f"(pom2),
00268                   [tmp2]"=&f"(tmp2), [tmp6]"=&f"(tmp6),          [pom3]"=&f"(pom3),
00269                   [temp]"=&f"(temp), [temp1]"=&f"(temp1),     [temp3]"=&f"(temp3),       [temp4]"=&f"(temp4)
00270                 : [tmpz]"r"(tmpz), [tmpz_n2]"r"(tmpz_n2), [tmpz_n34]"r"(tmpz_n34), [tmpz_n4]"r"(tmpz_n4)
00271                 : "memory"
00272             );
00273 
00274             w_re_ptr = (float*)(ff_cos_65536 + step);
00275             w_im_ptr = (float*)(ff_cos_65536 + MAX_FFT_SIZE/4 - step);
00276 
00277             for (i=1; i<n4; i++) {
00278                 w_re = w_re_ptr[0];
00279                 w_im = w_im_ptr[0];
00280                 tmpz_n2_i = tmpz_n2  + i;
00281                 tmpz_n4_i = tmpz_n4  + i;
00282                 tmpz_n34_i= tmpz_n34 + i;
00283                 tmpz_i    = tmpz     + i;
00284 
00285                 __asm__ volatile (
00286                     "lwc1     %[temp],  0(%[tmpz_n2_i])               \n\t"
00287                     "lwc1     %[temp1], 4(%[tmpz_n2_i])               \n\t"
00288                     "lwc1     %[pom],   0(%[tmpz_n34_i])              \n\t"
00289                     "lwc1     %[pom1],  4(%[tmpz_n34_i])              \n\t"
00290                     "mul.s    %[temp3], %[w_im],    %[temp]           \n\t"
00291                     "mul.s    %[temp4], %[w_im],    %[temp1]          \n\t"
00292                     "mul.s    %[pom2],  %[w_im],    %[pom1]           \n\t"
00293                     "mul.s    %[pom3],  %[w_im],    %[pom]            \n\t"
00294                     "msub.s   %[tmp2],  %[temp3],   %[w_re], %[temp1] \n\t"  
00295                     "madd.s   %[tmp1],  %[temp4],   %[w_re], %[temp]  \n\t"  
00296                     "msub.s   %[tmp3],  %[pom2],    %[w_re], %[pom]   \n\t"  
00297                     "madd.s   %[tmp4],  %[pom3],    %[w_re], %[pom1]  \n\t"  
00298                     "lwc1     %[temp],  0(%[tmpz_i])                  \n\t"
00299                     "lwc1     %[pom],   4(%[tmpz_i])                  \n\t"
00300                     "add.s    %[tmp5],  %[tmp1],    %[tmp3]           \n\t"  
00301                     "sub.s    %[tmp1],  %[tmp1],    %[tmp3]           \n\t"  
00302                     "add.s    %[tmp6],  %[tmp2],    %[tmp4]           \n\t"  
00303                     "sub.s    %[tmp2],  %[tmp2],    %[tmp4]           \n\t"  
00304                     "sub.s    %[temp1], %[temp],    %[tmp5]           \n\t"
00305                     "add.s    %[temp],  %[temp],    %[tmp5]           \n\t"
00306                     "sub.s    %[pom1],  %[pom],     %[tmp6]           \n\t"
00307                     "add.s    %[pom],   %[pom],     %[tmp6]           \n\t"
00308                     "lwc1     %[temp3], 0(%[tmpz_n4_i])               \n\t"
00309                     "lwc1     %[pom2],  4(%[tmpz_n4_i])               \n\t"
00310                     "swc1     %[temp1], 0(%[tmpz_n2_i])               \n\t"  
00311                     "swc1     %[temp],  0(%[tmpz_i])                  \n\t"  
00312                     "swc1     %[pom1],  4(%[tmpz_n2_i])               \n\t"  
00313                     "swc1     %[pom] ,  4(%[tmpz_i])                  \n\t"  
00314                     "sub.s    %[temp4], %[temp3],   %[tmp2]           \n\t"
00315                     "add.s    %[pom3],  %[pom2],    %[tmp1]           \n\t"
00316                     "add.s    %[temp3], %[temp3],   %[tmp2]           \n\t"
00317                     "sub.s    %[pom2],  %[pom2],    %[tmp1]           \n\t"
00318                     "swc1     %[temp4], 0(%[tmpz_n34_i])              \n\t"  
00319                     "swc1     %[pom3],  4(%[tmpz_n34_i])              \n\t"  
00320                     "swc1     %[temp3], 0(%[tmpz_n4_i])               \n\t"  
00321                     "swc1     %[pom2],  4(%[tmpz_n4_i])               \n\t"  
00322                     : [tmp1]"=&f"(tmp1), [tmp2]"=&f" (tmp2), [temp]"=&f"(temp), [tmp3]"=&f"(tmp3),
00323                       [tmp4]"=&f"(tmp4), [tmp5]"=&f"(tmp5), [tmp6]"=&f"(tmp6),
00324                       [temp1]"=&f"(temp1), [temp3]"=&f"(temp3), [temp4]"=&f"(temp4),
00325                       [pom]"=&f"(pom), [pom1]"=&f"(pom1), [pom2]"=&f"(pom2), [pom3]"=&f"(pom3)
00326                     : [w_re]"f"(w_re), [w_im]"f"(w_im),
00327                       [tmpz_i]"r"(tmpz_i),[tmpz_n2_i]"r"(tmpz_n2_i),
00328                       [tmpz_n34_i]"r"(tmpz_n34_i), [tmpz_n4_i]"r"(tmpz_n4_i)
00329                     : "memory"
00330                 );
00331                 w_re_ptr += step;
00332                 w_im_ptr -= step;
00333             }
00334         }
00335         step >>= 1;
00336         n4   <<= 1;
00337     }
00338 }
00339 
00344 static void ff_imdct_half_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
00345 {
00346     int k, n8, n4, n2, n, j;
00347     const uint16_t *revtab = s->revtab;
00348     const FFTSample *tcos = s->tcos;
00349     const FFTSample *tsin = s->tsin;
00350     const FFTSample *in1, *in2, *in3, *in4;
00351     FFTComplex *z = (FFTComplex *)output;
00352 
00353     int j1;
00354     const float *tcos1, *tsin1, *tcos2, *tsin2;
00355     float temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8,
00356         temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
00357     FFTComplex *z1, *z2;
00358 
00359     n = 1 << s->mdct_bits;
00360     n2 = n >> 1;
00361     n4 = n >> 2;
00362     n8 = n >> 3;
00363 
00364     
00365     in1 = input;
00366     in2 = input + n2 - 1;
00367     in3 = input + 2;
00368     in4 = input + n2 - 3;
00369 
00370     tcos1 = tcos;
00371     tsin1 = tsin;
00372 
00373     
00374     for(k = 0; k < n4; k += 2) {
00375         j  = revtab[k    ];
00376         j1 = revtab[k + 1];
00377 
00378         __asm__ volatile (
00379             "lwc1           %[temp1],       0(%[in2])                           \t\n"
00380             "lwc1           %[temp2],       0(%[tcos1])                         \t\n"
00381             "lwc1           %[temp3],       0(%[tsin1])                         \t\n"
00382             "lwc1           %[temp4],       0(%[in1])                           \t\n"
00383             "lwc1           %[temp5],       0(%[in4])                           \t\n"
00384             "mul.s          %[temp9],       %[temp1],   %[temp2]                \t\n"
00385             "mul.s          %[temp10],      %[temp1],   %[temp3]                \t\n"
00386             "lwc1           %[temp6],       4(%[tcos1])                         \t\n"
00387             "lwc1           %[temp7],       4(%[tsin1])                         \t\n"
00388             "nmsub.s        %[temp9],       %[temp9],   %[temp4],   %[temp3]    \t\n"
00389             "madd.s         %[temp10],      %[temp10],  %[temp4],   %[temp2]    \t\n"
00390             "mul.s          %[temp11],      %[temp5],   %[temp6]                \t\n"
00391             "mul.s          %[temp12],      %[temp5],   %[temp7]                \t\n"
00392             "lwc1           %[temp8],       0(%[in3])                           \t\n"
00393             "addiu          %[tcos1],       %[tcos1],   8                       \t\n"
00394             "addiu          %[tsin1],       %[tsin1],   8                       \t\n"
00395             "addiu          %[in1],         %[in1],     16                      \t\n"
00396             "nmsub.s        %[temp11],      %[temp11],  %[temp8],   %[temp7]    \t\n"
00397             "madd.s         %[temp12],      %[temp12],  %[temp8],   %[temp6]    \t\n"
00398             "addiu          %[in2],         %[in2],     -16                     \t\n"
00399             "addiu          %[in3],         %[in3],     16                      \t\n"
00400             "addiu          %[in4],         %[in4],     -16                     \t\n"
00401 
00402             : [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
00403               [temp3]"=&f"(temp3), [temp4]"=&f"(temp4),
00404               [temp5]"=&f"(temp5), [temp6]"=&f"(temp6),
00405               [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
00406               [temp9]"=&f"(temp9), [temp10]"=&f"(temp10),
00407               [temp11]"=&f"(temp11), [temp12]"=&f"(temp12),
00408               [tsin1]"+r"(tsin1), [tcos1]"+r"(tcos1),
00409               [in1]"+r"(in1), [in2]"+r"(in2),
00410               [in3]"+r"(in3), [in4]"+r"(in4)
00411         );
00412 
00413         z[j ].re = temp9;
00414         z[j ].im = temp10;
00415         z[j1].re = temp11;
00416         z[j1].im = temp12;
00417     }
00418 
00419     s->fft_calc(s, z);
00420 
00421     
00422     
00423     for(k = 0; k < n8; k += 2) {
00424         tcos1 = &tcos[n8 - k - 2];
00425         tsin1 = &tsin[n8 - k - 2];
00426         tcos2 = &tcos[n8 + k];
00427         tsin2 = &tsin[n8 + k];
00428         z1 = &z[n8 - k - 2];
00429         z2 = &z[n8 + k    ];
00430 
00431         __asm__ volatile (
00432             "lwc1       %[temp1],   12(%[z1])                           \t\n"
00433             "lwc1       %[temp2],   4(%[tsin1])                         \t\n"
00434             "lwc1       %[temp3],   4(%[tcos1])                         \t\n"
00435             "lwc1       %[temp4],   8(%[z1])                            \t\n"
00436             "lwc1       %[temp5],   4(%[z1])                            \t\n"
00437             "mul.s      %[temp9],   %[temp1],   %[temp2]                \t\n"
00438             "mul.s      %[temp10],  %[temp1],   %[temp3]                \t\n"
00439             "lwc1       %[temp6],   0(%[tsin1])                         \t\n"
00440             "lwc1       %[temp7],   0(%[tcos1])                         \t\n"
00441             "nmsub.s    %[temp9],   %[temp9],   %[temp4],   %[temp3]    \t\n"
00442             "madd.s     %[temp10],  %[temp10],  %[temp4],   %[temp2]    \t\n"
00443             "mul.s      %[temp11],  %[temp5],   %[temp6]                \t\n"
00444             "mul.s      %[temp12],  %[temp5],   %[temp7]                \t\n"
00445             "lwc1       %[temp8],   0(%[z1])                            \t\n"
00446             "lwc1       %[temp1],   4(%[z2])                            \t\n"
00447             "lwc1       %[temp2],   0(%[tsin2])                         \t\n"
00448             "lwc1       %[temp3],   0(%[tcos2])                         \t\n"
00449             "nmsub.s    %[temp11],  %[temp11],  %[temp8],   %[temp7]    \t\n"
00450             "madd.s     %[temp12],  %[temp12],  %[temp8],   %[temp6]    \t\n"
00451             "mul.s      %[temp13],  %[temp1],   %[temp2]                \t\n"
00452             "mul.s      %[temp14],  %[temp1],   %[temp3]                \t\n"
00453             "lwc1       %[temp4],   0(%[z2])                            \t\n"
00454             "lwc1       %[temp5],   12(%[z2])                           \t\n"
00455             "lwc1       %[temp6],   4(%[tsin2])                         \t\n"
00456             "lwc1       %[temp7],   4(%[tcos2])                         \t\n"
00457             "nmsub.s    %[temp13],  %[temp13],  %[temp4],   %[temp3]    \t\n"
00458             "madd.s     %[temp14],  %[temp14],  %[temp4],   %[temp2]    \t\n"
00459             "mul.s      %[temp15],  %[temp5],   %[temp6]                \t\n"
00460             "mul.s      %[temp16],  %[temp5],   %[temp7]                \t\n"
00461             "lwc1       %[temp8],   8(%[z2])                            \t\n"
00462             "nmsub.s    %[temp15],  %[temp15],  %[temp8],   %[temp7]    \t\n"
00463             "madd.s     %[temp16],  %[temp16],  %[temp8],   %[temp6]    \t\n"
00464             : [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
00465               [temp3]"=&f"(temp3), [temp4]"=&f"(temp4),
00466               [temp5]"=&f"(temp5), [temp6]"=&f"(temp6),
00467               [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
00468               [temp9]"=&f"(temp9), [temp10]"=&f"(temp10),
00469               [temp11]"=&f"(temp11), [temp12]"=&f"(temp12),
00470               [temp13]"=&f"(temp13), [temp14]"=&f"(temp14),
00471               [temp15]"=&f"(temp15), [temp16]"=&f"(temp16)
00472             : [z1]"r"(z1), [z2]"r"(z2),
00473               [tsin1]"r"(tsin1), [tcos1]"r"(tcos1),
00474               [tsin2]"r"(tsin2), [tcos2]"r"(tcos2)
00475         );
00476 
00477         z1[1].re = temp9;
00478         z1[1].im = temp14;
00479         z2[0].re = temp13;
00480         z2[0].im = temp10;
00481 
00482         z1[0].re = temp11;
00483         z1[0].im = temp16;
00484         z2[1].re = temp15;
00485         z2[1].im = temp12;
00486     }
00487 }
00488 #endif 
00489 
00495 static void ff_imdct_calc_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
00496 {
00497     int k;
00498     int n = 1 << s->mdct_bits;
00499     int n2 = n >> 1;
00500     int n4 = n >> 2;
00501 
00502     ff_imdct_half_mips(s, output+n4, input);
00503 
00504     for(k = 0; k < n4; k+=4) {
00505         output[k] = -output[n2-k-1];
00506         output[k+1] = -output[n2-k-2];
00507         output[k+2] = -output[n2-k-3];
00508         output[k+3] = -output[n2-k-4];
00509 
00510         output[n-k-1] = output[n2+k];
00511         output[n-k-2] = output[n2+k+1];
00512         output[n-k-3] = output[n2+k+2];
00513         output[n-k-4] = output[n2+k+3];
00514     }
00515 }
00516 
00517 av_cold void ff_fft_init_mips(FFTContext *s)
00518 {
00519     int n=0;
00520 
00521     ff_fft_lut_init(fft_offsets_lut, 0, 1 << 16, &n);
00522 
00523 #if HAVE_INLINE_ASM
00524     s->fft_calc     = ff_fft_calc_mips;
00525 #endif
00526 #if CONFIG_MDCT
00527     s->imdct_calc   = ff_imdct_calc_mips;
00528     s->imdct_half   = ff_imdct_half_mips;
00529 #endif
00530 }