00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "libavcodec/dsputil.h"
00024 #include "sh4.h"
00025
00026 #define c1 1.38703984532214752434
00027 #define c2 1.30656296487637657577
00028 #define c3 1.17587560241935884520
00029 #define c4 1.00000000000000000000
00030 #define c5 0.78569495838710234903
00031 #define c6 0.54119610014619712324
00032 #define c7 0.27589937928294311353
00033
00034 static const float even_table[] __attribute__ ((aligned(8))) = {
00035 c4, c4, c4, c4,
00036 c2, c6,-c6,-c2,
00037 c4,-c4,-c4, c4,
00038 c6,-c2, c2,-c6
00039 };
00040
00041 static const float odd_table[] __attribute__ ((aligned(8))) = {
00042 c1, c3, c5, c7,
00043 c3,-c7,-c1,-c5,
00044 c5,-c1, c7, c3,
00045 c7,-c5, c3,-c1
00046 };
00047
00048 #undef c1
00049 #undef c2
00050 #undef c3
00051 #undef c4
00052 #undef c5
00053 #undef c6
00054 #undef c7
00055
00056 #if 1
00057
00058 #define load_matrix(table) \
00059 do { \
00060 const float *t = table; \
00061 __asm__ volatile( \
00062 " fschg\n" \
00063 " fmov @%0+,xd0\n" \
00064 " fmov @%0+,xd2\n" \
00065 " fmov @%0+,xd4\n" \
00066 " fmov @%0+,xd6\n" \
00067 " fmov @%0+,xd8\n" \
00068 " fmov @%0+,xd10\n" \
00069 " fmov @%0+,xd12\n" \
00070 " fmov @%0+,xd14\n" \
00071 " fschg\n" \
00072 : "+r"(t) \
00073 ); \
00074 } while (0)
00075
00076 #define ftrv() \
00077 __asm__ volatile("ftrv xmtrx,fv0" \
00078 : "+f"(fr0),"+f"(fr1),"+f"(fr2),"+f"(fr3));
00079
00080 #define DEFREG \
00081 register float fr0 __asm__("fr0"); \
00082 register float fr1 __asm__("fr1"); \
00083 register float fr2 __asm__("fr2"); \
00084 register float fr3 __asm__("fr3")
00085
00086 #else
00087
00088
00089
00090 static void ftrv_(const float xf[],float fv[])
00091 {
00092 float f0,f1,f2,f3;
00093 f0 = fv[0];
00094 f1 = fv[1];
00095 f2 = fv[2];
00096 f3 = fv[3];
00097 fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3;
00098 fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3;
00099 fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3;
00100 fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3;
00101 }
00102
00103 static void load_matrix_(float xf[],const float table[])
00104 {
00105 int i;
00106 for(i=0;i<16;i++) xf[i]=table[i];
00107 }
00108
00109 #define ftrv() ftrv_(xf,fv)
00110 #define load_matrix(table) load_matrix_(xf,table)
00111
00112 #define DEFREG \
00113 float fv[4],xf[16]
00114
00115 #define fr0 fv[0]
00116 #define fr1 fv[1]
00117 #define fr2 fv[2]
00118 #define fr3 fv[3]
00119
00120 #endif
00121
00122 #if 1
00123 #define DESCALE(x,n) (x)*(1.0f/(1<<(n)))
00124 #else
00125 #define DESCALE(x,n) (((int)(x)+(1<<(n-1)))>>(n))
00126 #endif
00127
00128
00129
00130
00131 #if 1
00132
00133
00134 void idct_sh4(DCTELEM *block)
00135 {
00136 DEFREG;
00137
00138 int i;
00139 float tblock[8*8],*fblock;
00140 int ofs1,ofs2,ofs3;
00141 int fpscr;
00142
00143 fp_single_enter(fpscr);
00144
00145
00146
00147
00148 load_matrix(even_table);
00149
00150 fblock = tblock+4;
00151 i = 8;
00152 do {
00153 fr0 = block[0];
00154 fr1 = block[2];
00155 fr2 = block[4];
00156 fr3 = block[6];
00157 block+=8;
00158 ftrv();
00159 *--fblock = fr3;
00160 *--fblock = fr2;
00161 *--fblock = fr1;
00162 *--fblock = fr0;
00163 fblock+=8+4;
00164 } while(--i);
00165 block-=8*8;
00166 fblock-=8*8+4;
00167
00168 load_matrix(odd_table);
00169
00170 i = 8;
00171
00172 do {
00173 float t0,t1,t2,t3;
00174 fr0 = block[1];
00175 fr1 = block[3];
00176 fr2 = block[5];
00177 fr3 = block[7];
00178 block+=8;
00179 ftrv();
00180 t0 = *fblock++;
00181 t1 = *fblock++;
00182 t2 = *fblock++;
00183 t3 = *fblock++;
00184 fblock+=4;
00185 *--fblock = t0 - fr0;
00186 *--fblock = t1 - fr1;
00187 *--fblock = t2 - fr2;
00188 *--fblock = t3 - fr3;
00189 *--fblock = t3 + fr3;
00190 *--fblock = t2 + fr2;
00191 *--fblock = t1 + fr1;
00192 *--fblock = t0 + fr0;
00193 fblock+=8;
00194 } while(--i);
00195 block-=8*8;
00196 fblock-=8*8;
00197
00198
00199
00200
00201 load_matrix(even_table);
00202
00203 ofs1 = sizeof(float)*2*8;
00204 ofs2 = sizeof(float)*4*8;
00205 ofs3 = sizeof(float)*6*8;
00206
00207 i = 8;
00208
00209 #define OA(fblock,ofs) *(float*)((char*)fblock + ofs)
00210
00211 do {
00212 fr0 = OA(fblock, 0);
00213 fr1 = OA(fblock,ofs1);
00214 fr2 = OA(fblock,ofs2);
00215 fr3 = OA(fblock,ofs3);
00216 ftrv();
00217 OA(fblock,0 ) = fr0;
00218 OA(fblock,ofs1) = fr1;
00219 OA(fblock,ofs2) = fr2;
00220 OA(fblock,ofs3) = fr3;
00221 fblock++;
00222 } while(--i);
00223 fblock-=8;
00224
00225 load_matrix(odd_table);
00226
00227 i=8;
00228 do {
00229 float t0,t1,t2,t3;
00230 t0 = OA(fblock, 0);
00231 t1 = OA(fblock,ofs1);
00232 t2 = OA(fblock,ofs2);
00233 t3 = OA(fblock,ofs3);
00234 fblock+=8;
00235 fr0 = OA(fblock, 0);
00236 fr1 = OA(fblock,ofs1);
00237 fr2 = OA(fblock,ofs2);
00238 fr3 = OA(fblock,ofs3);
00239 fblock+=-8+1;
00240 ftrv();
00241 block[8*0] = DESCALE(t0 + fr0,3);
00242 block[8*7] = DESCALE(t0 - fr0,3);
00243 block[8*1] = DESCALE(t1 + fr1,3);
00244 block[8*6] = DESCALE(t1 - fr1,3);
00245 block[8*2] = DESCALE(t2 + fr2,3);
00246 block[8*5] = DESCALE(t2 - fr2,3);
00247 block[8*3] = DESCALE(t3 + fr3,3);
00248 block[8*4] = DESCALE(t3 - fr3,3);
00249 block++;
00250 } while(--i);
00251
00252 fp_single_leave(fpscr);
00253 }
00254 #else
00255 void idct_sh4(DCTELEM *block)
00256 {
00257 DEFREG;
00258
00259 int i;
00260 float tblock[8*8],*fblock;
00261
00262
00263
00264
00265 load_matrix(even_table);
00266
00267 fblock = tblock;
00268 i = 8;
00269 do {
00270 fr0 = block[0];
00271 fr1 = block[2];
00272 fr2 = block[4];
00273 fr3 = block[6];
00274 block+=8;
00275 ftrv();
00276 fblock[0] = fr0;
00277 fblock[2] = fr1;
00278 fblock[4] = fr2;
00279 fblock[6] = fr3;
00280 fblock+=8;
00281 } while(--i);
00282 block-=8*8;
00283 fblock-=8*8;
00284
00285 load_matrix(odd_table);
00286
00287 i = 8;
00288
00289 do {
00290 float t0,t1,t2,t3;
00291 fr0 = block[1];
00292 fr1 = block[3];
00293 fr2 = block[5];
00294 fr3 = block[7];
00295 block+=8;
00296 ftrv();
00297 t0 = fblock[0];
00298 t1 = fblock[2];
00299 t2 = fblock[4];
00300 t3 = fblock[6];
00301 fblock[0] = t0 + fr0;
00302 fblock[7] = t0 - fr0;
00303 fblock[1] = t1 + fr1;
00304 fblock[6] = t1 - fr1;
00305 fblock[2] = t2 + fr2;
00306 fblock[5] = t2 - fr2;
00307 fblock[3] = t3 + fr3;
00308 fblock[4] = t3 - fr3;
00309 fblock+=8;
00310 } while(--i);
00311 block-=8*8;
00312 fblock-=8*8;
00313
00314
00315
00316
00317 load_matrix(even_table);
00318
00319 i = 8;
00320
00321 do {
00322 fr0 = fblock[8*0];
00323 fr1 = fblock[8*2];
00324 fr2 = fblock[8*4];
00325 fr3 = fblock[8*6];
00326 ftrv();
00327 fblock[8*0] = fr0;
00328 fblock[8*2] = fr1;
00329 fblock[8*4] = fr2;
00330 fblock[8*6] = fr3;
00331 fblock++;
00332 } while(--i);
00333 fblock-=8;
00334
00335 load_matrix(odd_table);
00336
00337 i=8;
00338 do {
00339 float t0,t1,t2,t3;
00340 fr0 = fblock[8*1];
00341 fr1 = fblock[8*3];
00342 fr2 = fblock[8*5];
00343 fr3 = fblock[8*7];
00344 ftrv();
00345 t0 = fblock[8*0];
00346 t1 = fblock[8*2];
00347 t2 = fblock[8*4];
00348 t3 = fblock[8*6];
00349 fblock++;
00350 block[8*0] = DESCALE(t0 + fr0,3);
00351 block[8*7] = DESCALE(t0 - fr0,3);
00352 block[8*1] = DESCALE(t1 + fr1,3);
00353 block[8*6] = DESCALE(t1 - fr1,3);
00354 block[8*2] = DESCALE(t2 + fr2,3);
00355 block[8*5] = DESCALE(t2 - fr2,3);
00356 block[8*3] = DESCALE(t3 + fr3,3);
00357 block[8*4] = DESCALE(t3 - fr3,3);
00358 block++;
00359 } while(--i);
00360 }
00361 #endif