00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00026 #include "libavutil/x86/asm.h"
00027
00028
00029
00030
00031
00032
00033 #ifdef TEMPLATE_PP_C
00034 # define RENAME(a) a ## _C
00035 #else
00036 # define TEMPLATE_PP_C 0
00037 #endif
00038
00039 #ifdef TEMPLATE_PP_ALTIVEC
00040 # define RENAME(a) a ## _altivec
00041 #else
00042 # define TEMPLATE_PP_ALTIVEC 0
00043 #endif
00044
00045 #ifdef TEMPLATE_PP_MMX
00046 # define RENAME(a) a ## _MMX
00047 #else
00048 # define TEMPLATE_PP_MMX 0
00049 #endif
00050
00051 #ifdef TEMPLATE_PP_MMXEXT
00052 # undef TEMPLATE_PP_MMX
00053 # define TEMPLATE_PP_MMX 1
00054 # define RENAME(a) a ## _MMX2
00055 #else
00056 # define TEMPLATE_PP_MMXEXT 0
00057 #endif
00058
00059 #ifdef TEMPLATE_PP_3DNOW
00060 # undef TEMPLATE_PP_MMX
00061 # define TEMPLATE_PP_MMX 1
00062 # define RENAME(a) a ## _3DNow
00063 #else
00064 # define TEMPLATE_PP_3DNOW 0
00065 #endif
00066
00067 #ifdef TEMPLATE_PP_SSE2
00068 # undef TEMPLATE_PP_MMX
00069 # define TEMPLATE_PP_MMX 1
00070 # undef TEMPLATE_PP_MMXEXT
00071 # define TEMPLATE_PP_MMXEXT 1
00072 # define RENAME(a) a ## _SSE2
00073 #else
00074 # define TEMPLATE_PP_SSE2 0
00075 #endif
00076
00077 #undef REAL_PAVGB
00078 #undef PAVGB
00079 #undef PMINUB
00080 #undef PMAXUB
00081
00082 #if TEMPLATE_PP_MMXEXT
00083 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
00084 #elif TEMPLATE_PP_3DNOW
00085 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
00086 #endif
00087 #define PAVGB(a,b) REAL_PAVGB(a,b)
00088
00089 #if TEMPLATE_PP_MMXEXT
00090 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
00091 #elif TEMPLATE_PP_MMX
00092 #define PMINUB(b,a,t) \
00093 "movq " #a ", " #t " \n\t"\
00094 "psubusb " #b ", " #t " \n\t"\
00095 "psubb " #t ", " #a " \n\t"
00096 #endif
00097
00098 #if TEMPLATE_PP_MMXEXT
00099 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
00100 #elif TEMPLATE_PP_MMX
00101 #define PMAXUB(a,b) \
00102 "psubusb " #a ", " #b " \n\t"\
00103 "paddb " #a ", " #b " \n\t"
00104 #endif
00105
00106
00107 #if TEMPLATE_PP_MMX
00108
00111 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
00112 int numEq= 0, dcOk;
00113 src+= stride*4;
00114 __asm__ volatile(
00115 "movq %0, %%mm7 \n\t"
00116 "movq %1, %%mm6 \n\t"
00117 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
00118 );
00119
00120 __asm__ volatile(
00121 "lea (%2, %3), %%"REG_a" \n\t"
00122
00123
00124
00125 "movq (%2), %%mm0 \n\t"
00126 "movq (%%"REG_a"), %%mm1 \n\t"
00127 "movq %%mm0, %%mm3 \n\t"
00128 "movq %%mm0, %%mm4 \n\t"
00129 PMAXUB(%%mm1, %%mm4)
00130 PMINUB(%%mm1, %%mm3, %%mm5)
00131 "psubb %%mm1, %%mm0 \n\t"
00132 "paddb %%mm7, %%mm0 \n\t"
00133 "pcmpgtb %%mm6, %%mm0 \n\t"
00134
00135 "movq (%%"REG_a",%3), %%mm2 \n\t"
00136 PMAXUB(%%mm2, %%mm4)
00137 PMINUB(%%mm2, %%mm3, %%mm5)
00138 "psubb %%mm2, %%mm1 \n\t"
00139 "paddb %%mm7, %%mm1 \n\t"
00140 "pcmpgtb %%mm6, %%mm1 \n\t"
00141 "paddb %%mm1, %%mm0 \n\t"
00142
00143 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
00144 PMAXUB(%%mm1, %%mm4)
00145 PMINUB(%%mm1, %%mm3, %%mm5)
00146 "psubb %%mm1, %%mm2 \n\t"
00147 "paddb %%mm7, %%mm2 \n\t"
00148 "pcmpgtb %%mm6, %%mm2 \n\t"
00149 "paddb %%mm2, %%mm0 \n\t"
00150
00151 "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t"
00152
00153 "movq (%2, %3, 4), %%mm2 \n\t"
00154 PMAXUB(%%mm2, %%mm4)
00155 PMINUB(%%mm2, %%mm3, %%mm5)
00156 "psubb %%mm2, %%mm1 \n\t"
00157 "paddb %%mm7, %%mm1 \n\t"
00158 "pcmpgtb %%mm6, %%mm1 \n\t"
00159 "paddb %%mm1, %%mm0 \n\t"
00160
00161 "movq (%%"REG_a"), %%mm1 \n\t"
00162 PMAXUB(%%mm1, %%mm4)
00163 PMINUB(%%mm1, %%mm3, %%mm5)
00164 "psubb %%mm1, %%mm2 \n\t"
00165 "paddb %%mm7, %%mm2 \n\t"
00166 "pcmpgtb %%mm6, %%mm2 \n\t"
00167 "paddb %%mm2, %%mm0 \n\t"
00168
00169 "movq (%%"REG_a", %3), %%mm2 \n\t"
00170 PMAXUB(%%mm2, %%mm4)
00171 PMINUB(%%mm2, %%mm3, %%mm5)
00172 "psubb %%mm2, %%mm1 \n\t"
00173 "paddb %%mm7, %%mm1 \n\t"
00174 "pcmpgtb %%mm6, %%mm1 \n\t"
00175 "paddb %%mm1, %%mm0 \n\t"
00176
00177 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
00178 PMAXUB(%%mm1, %%mm4)
00179 PMINUB(%%mm1, %%mm3, %%mm5)
00180 "psubb %%mm1, %%mm2 \n\t"
00181 "paddb %%mm7, %%mm2 \n\t"
00182 "pcmpgtb %%mm6, %%mm2 \n\t"
00183 "paddb %%mm2, %%mm0 \n\t"
00184 "psubusb %%mm3, %%mm4 \n\t"
00185
00186 " \n\t"
00187 #if TEMPLATE_PP_MMXEXT
00188 "pxor %%mm7, %%mm7 \n\t"
00189 "psadbw %%mm7, %%mm0 \n\t"
00190 #else
00191 "movq %%mm0, %%mm1 \n\t"
00192 "psrlw $8, %%mm0 \n\t"
00193 "paddb %%mm1, %%mm0 \n\t"
00194 "movq %%mm0, %%mm1 \n\t"
00195 "psrlq $16, %%mm0 \n\t"
00196 "paddb %%mm1, %%mm0 \n\t"
00197 "movq %%mm0, %%mm1 \n\t"
00198 "psrlq $32, %%mm0 \n\t"
00199 "paddb %%mm1, %%mm0 \n\t"
00200 #endif
00201 "movq %4, %%mm7 \n\t"
00202 "paddusb %%mm7, %%mm7 \n\t"
00203 "psubusb %%mm7, %%mm4 \n\t"
00204 "packssdw %%mm4, %%mm4 \n\t"
00205 "movd %%mm0, %0 \n\t"
00206 "movd %%mm4, %1 \n\t"
00207
00208 : "=r" (numEq), "=r" (dcOk)
00209 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
00210 : "%"REG_a
00211 );
00212
00213 numEq= (-numEq) &0xFF;
00214 if(numEq > c->ppMode.flatnessThreshold){
00215 if(dcOk) return 0;
00216 else return 1;
00217 }else{
00218 return 2;
00219 }
00220 }
00221 #endif //TEMPLATE_PP_MMX
00222
00227 #if !TEMPLATE_PP_ALTIVEC
00228 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
00229 {
00230 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
00231 src+= stride*3;
00232 __asm__ volatile(
00233 "movq %2, %%mm0 \n\t"
00234 "pxor %%mm4, %%mm4 \n\t"
00235
00236 "movq (%0), %%mm6 \n\t"
00237 "movq (%0, %1), %%mm5 \n\t"
00238 "movq %%mm5, %%mm1 \n\t"
00239 "movq %%mm6, %%mm2 \n\t"
00240 "psubusb %%mm6, %%mm5 \n\t"
00241 "psubusb %%mm1, %%mm2 \n\t"
00242 "por %%mm5, %%mm2 \n\t"
00243 "psubusb %%mm0, %%mm2 \n\t"
00244 "pcmpeqb %%mm4, %%mm2 \n\t"
00245
00246 "pand %%mm2, %%mm6 \n\t"
00247 "pandn %%mm1, %%mm2 \n\t"
00248 "por %%mm2, %%mm6 \n\t"
00249
00250 "movq (%0, %1, 8), %%mm5 \n\t"
00251 "lea (%0, %1, 4), %%"REG_a" \n\t"
00252 "lea (%0, %1, 8), %%"REG_c" \n\t"
00253 "sub %1, %%"REG_c" \n\t"
00254 "add %1, %0 \n\t"
00255 "movq (%0, %1, 8), %%mm7 \n\t"
00256 "movq %%mm5, %%mm1 \n\t"
00257 "movq %%mm7, %%mm2 \n\t"
00258 "psubusb %%mm7, %%mm5 \n\t"
00259 "psubusb %%mm1, %%mm2 \n\t"
00260 "por %%mm5, %%mm2 \n\t"
00261 "psubusb %%mm0, %%mm2 \n\t"
00262 "pcmpeqb %%mm4, %%mm2 \n\t"
00263
00264 "pand %%mm2, %%mm7 \n\t"
00265 "pandn %%mm1, %%mm2 \n\t"
00266 "por %%mm2, %%mm7 \n\t"
00267
00268
00269
00270
00271
00272
00273
00274
00275 "movq (%0, %1), %%mm0 \n\t"
00276 "movq %%mm0, %%mm1 \n\t"
00277 PAVGB(%%mm6, %%mm0)
00278 PAVGB(%%mm6, %%mm0)
00279
00280 "movq (%0, %1, 4), %%mm2 \n\t"
00281 "movq %%mm2, %%mm5 \n\t"
00282 PAVGB((%%REGa), %%mm2)
00283 PAVGB((%0, %1, 2), %%mm2)
00284 "movq %%mm2, %%mm3 \n\t"
00285 "movq (%0), %%mm4 \n\t"
00286 PAVGB(%%mm4, %%mm3)
00287 PAVGB(%%mm0, %%mm3)
00288 "movq %%mm3, (%0) \n\t"
00289
00290 "movq %%mm1, %%mm0 \n\t"
00291 PAVGB(%%mm6, %%mm0)
00292 "movq %%mm4, %%mm3 \n\t"
00293 PAVGB((%0,%1,2), %%mm3)
00294 PAVGB((%%REGa,%1,2), %%mm5)
00295 PAVGB((%%REGa), %%mm5)
00296 PAVGB(%%mm5, %%mm3)
00297 PAVGB(%%mm0, %%mm3)
00298 "movq %%mm3, (%0,%1) \n\t"
00299
00300 PAVGB(%%mm4, %%mm6)
00301 "movq (%%"REG_c"), %%mm0 \n\t"
00302 PAVGB((%%REGa, %1, 2), %%mm0)
00303 "movq %%mm0, %%mm3 \n\t"
00304 PAVGB(%%mm1, %%mm0)
00305 PAVGB(%%mm6, %%mm0)
00306 PAVGB(%%mm2, %%mm0)
00307 "movq (%0, %1, 2), %%mm2 \n\t"
00308 "movq %%mm0, (%0, %1, 2) \n\t"
00309
00310 "movq (%%"REG_a", %1, 4), %%mm0 \n\t"
00311 PAVGB((%%REGc), %%mm0)
00312 PAVGB(%%mm0, %%mm6)
00313 PAVGB(%%mm1, %%mm4)
00314 PAVGB(%%mm2, %%mm1)
00315 PAVGB(%%mm1, %%mm6)
00316 PAVGB(%%mm5, %%mm6)
00317 "movq (%%"REG_a"), %%mm5 \n\t"
00318 "movq %%mm6, (%%"REG_a") \n\t"
00319
00320 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00321 PAVGB(%%mm7, %%mm6)
00322 PAVGB(%%mm4, %%mm6)
00323 PAVGB(%%mm3, %%mm6)
00324 PAVGB(%%mm5, %%mm2)
00325 "movq (%0, %1, 4), %%mm4 \n\t"
00326 PAVGB(%%mm4, %%mm2)
00327 PAVGB(%%mm2, %%mm6)
00328 "movq %%mm6, (%0, %1, 4) \n\t"
00329
00330 PAVGB(%%mm7, %%mm1)
00331 PAVGB(%%mm4, %%mm5)
00332 PAVGB(%%mm5, %%mm0)
00333 "movq (%%"REG_a", %1, 2), %%mm6 \n\t"
00334 PAVGB(%%mm6, %%mm1)
00335 PAVGB(%%mm0, %%mm1)
00336 "movq %%mm1, (%%"REG_a", %1, 2) \n\t"
00337
00338 PAVGB((%%REGc), %%mm2)
00339 "movq (%%"REG_a", %1, 4), %%mm0 \n\t"
00340 PAVGB(%%mm0, %%mm6)
00341 PAVGB(%%mm7, %%mm6)
00342 PAVGB(%%mm2, %%mm6)
00343 "movq %%mm6, (%%"REG_c") \n\t"
00344
00345 PAVGB(%%mm7, %%mm5)
00346 PAVGB(%%mm7, %%mm5)
00347
00348 PAVGB(%%mm3, %%mm0)
00349 PAVGB(%%mm0, %%mm5)
00350 "movq %%mm5, (%%"REG_a", %1, 4) \n\t"
00351 "sub %1, %0 \n\t"
00352
00353 :
00354 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
00355 : "%"REG_a, "%"REG_c
00356 );
00357 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
00358 const int l1= stride;
00359 const int l2= stride + l1;
00360 const int l3= stride + l2;
00361 const int l4= stride + l3;
00362 const int l5= stride + l4;
00363 const int l6= stride + l5;
00364 const int l7= stride + l6;
00365 const int l8= stride + l7;
00366 const int l9= stride + l8;
00367 int x;
00368 src+= stride*3;
00369 for(x=0; x<BLOCK_SIZE; x++){
00370 const int first= FFABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
00371 const int last= FFABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
00372
00373 int sums[10];
00374 sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4;
00375 sums[1] = sums[0] - first + src[l4];
00376 sums[2] = sums[1] - first + src[l5];
00377 sums[3] = sums[2] - first + src[l6];
00378 sums[4] = sums[3] - first + src[l7];
00379 sums[5] = sums[4] - src[l1] + src[l8];
00380 sums[6] = sums[5] - src[l2] + last;
00381 sums[7] = sums[6] - src[l3] + last;
00382 sums[8] = sums[7] - src[l4] + last;
00383 sums[9] = sums[8] - src[l5] + last;
00384
00385 src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
00386 src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
00387 src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
00388 src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
00389 src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
00390 src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
00391 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
00392 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
00393
00394 src++;
00395 }
00396 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
00397 }
00398 #endif //TEMPLATE_PP_ALTIVEC
00399
00407 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
00408 {
00409 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
00410 src+= stride*3;
00411
00412 __asm__ volatile(
00413 "pxor %%mm7, %%mm7 \n\t"
00414 "lea (%0, %1), %%"REG_a" \n\t"
00415 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00416
00417
00418 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00419 "movq (%0, %1, 4), %%mm1 \n\t"
00420 "movq %%mm1, %%mm2 \n\t"
00421 "psubusb %%mm0, %%mm1 \n\t"
00422 "psubusb %%mm2, %%mm0 \n\t"
00423 "por %%mm1, %%mm0 \n\t"
00424 "movq (%%"REG_c"), %%mm3 \n\t"
00425 "movq (%%"REG_c", %1), %%mm4 \n\t"
00426 "movq %%mm3, %%mm5 \n\t"
00427 "psubusb %%mm4, %%mm3 \n\t"
00428 "psubusb %%mm5, %%mm4 \n\t"
00429 "por %%mm4, %%mm3 \n\t"
00430 PAVGB(%%mm3, %%mm0)
00431 "movq %%mm2, %%mm1 \n\t"
00432 "psubusb %%mm5, %%mm2 \n\t"
00433 "movq %%mm2, %%mm4 \n\t"
00434 "pcmpeqb %%mm7, %%mm2 \n\t"
00435 "psubusb %%mm1, %%mm5 \n\t"
00436 "por %%mm5, %%mm4 \n\t"
00437 "psubusb %%mm0, %%mm4 \n\t"
00438 "movq %%mm4, %%mm3 \n\t"
00439 "movq %2, %%mm0 \n\t"
00440 "paddusb %%mm0, %%mm0 \n\t"
00441 "psubusb %%mm0, %%mm4 \n\t"
00442 "pcmpeqb %%mm7, %%mm4 \n\t"
00443 "psubusb "MANGLE(b01)", %%mm3 \n\t"
00444 "pand %%mm4, %%mm3 \n\t"
00445
00446 PAVGB(%%mm7, %%mm3)
00447 "movq %%mm3, %%mm1 \n\t"
00448 PAVGB(%%mm7, %%mm3)
00449 PAVGB(%%mm1, %%mm3)
00450
00451 "movq (%0, %1, 4), %%mm0 \n\t"
00452 "pxor %%mm2, %%mm0 \n\t"
00453 "psubusb %%mm3, %%mm0 \n\t"
00454 "pxor %%mm2, %%mm0 \n\t"
00455 "movq %%mm0, (%0, %1, 4) \n\t"
00456
00457 "movq (%%"REG_c"), %%mm0 \n\t"
00458 "pxor %%mm2, %%mm0 \n\t"
00459 "paddusb %%mm3, %%mm0 \n\t"
00460 "pxor %%mm2, %%mm0 \n\t"
00461 "movq %%mm0, (%%"REG_c") \n\t"
00462
00463 PAVGB(%%mm7, %%mm1)
00464
00465 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00466 "pxor %%mm2, %%mm0 \n\t"
00467 "psubusb %%mm1, %%mm0 \n\t"
00468 "pxor %%mm2, %%mm0 \n\t"
00469 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00470
00471 "movq (%%"REG_c", %1), %%mm0 \n\t"
00472 "pxor %%mm2, %%mm0 \n\t"
00473 "paddusb %%mm1, %%mm0 \n\t"
00474 "pxor %%mm2, %%mm0 \n\t"
00475 "movq %%mm0, (%%"REG_c", %1) \n\t"
00476
00477 PAVGB(%%mm7, %%mm1)
00478
00479 "movq (%%"REG_a", %1), %%mm0 \n\t"
00480 "pxor %%mm2, %%mm0 \n\t"
00481 "psubusb %%mm1, %%mm0 \n\t"
00482 "pxor %%mm2, %%mm0 \n\t"
00483 "movq %%mm0, (%%"REG_a", %1) \n\t"
00484
00485 "movq (%%"REG_c", %1, 2), %%mm0 \n\t"
00486 "pxor %%mm2, %%mm0 \n\t"
00487 "paddusb %%mm1, %%mm0 \n\t"
00488 "pxor %%mm2, %%mm0 \n\t"
00489 "movq %%mm0, (%%"REG_c", %1, 2) \n\t"
00490
00491 :
00492 : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
00493 : "%"REG_a, "%"REG_c
00494 );
00495 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
00496
00497 const int l1= stride;
00498 const int l2= stride + l1;
00499 const int l3= stride + l2;
00500 const int l4= stride + l3;
00501 const int l5= stride + l4;
00502 const int l6= stride + l5;
00503 const int l7= stride + l6;
00504
00505
00506 int x;
00507
00508 src+= stride*3;
00509 for(x=0; x<BLOCK_SIZE; x++){
00510 int a= src[l3] - src[l4];
00511 int b= src[l4] - src[l5];
00512 int c= src[l5] - src[l6];
00513
00514 int d= FFABS(b) - ((FFABS(a) + FFABS(c))>>1);
00515 d= FFMAX(d, 0);
00516
00517 if(d < co->QP*2){
00518 int v = d * FFSIGN(-b);
00519
00520 src[l2] +=v>>3;
00521 src[l3] +=v>>2;
00522 src[l4] +=(3*v)>>3;
00523 src[l5] -=(3*v)>>3;
00524 src[l6] -=v>>2;
00525 src[l7] -=v>>3;
00526 }
00527 src++;
00528 }
00529 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
00530 }
00531
00532 #if !TEMPLATE_PP_ALTIVEC
00533 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
00534 {
00535 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550 src+= stride*4;
00551 __asm__ volatile(
00552
00553 #if 0 //slightly more accurate and slightly slower
00554 "pxor %%mm7, %%mm7 \n\t"
00555 "lea (%0, %1), %%"REG_a" \n\t"
00556 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00557
00558
00559
00560
00561
00562 "movq (%0, %1, 2), %%mm0 \n\t"
00563 "movq (%0), %%mm1 \n\t"
00564 "movq %%mm0, %%mm2 \n\t"
00565 PAVGB(%%mm7, %%mm0)
00566 PAVGB(%%mm1, %%mm0)
00567 PAVGB(%%mm2, %%mm0)
00568
00569 "movq (%%"REG_a"), %%mm1 \n\t"
00570 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
00571 "movq %%mm1, %%mm4 \n\t"
00572 PAVGB(%%mm7, %%mm1)
00573 PAVGB(%%mm3, %%mm1)
00574 PAVGB(%%mm4, %%mm1)
00575
00576 "movq %%mm0, %%mm4 \n\t"
00577 "psubusb %%mm1, %%mm0 \n\t"
00578 "psubusb %%mm4, %%mm1 \n\t"
00579 "por %%mm0, %%mm1 \n\t"
00580
00581
00582 "movq (%0, %1, 4), %%mm0 \n\t"
00583 "movq %%mm0, %%mm4 \n\t"
00584 PAVGB(%%mm7, %%mm0)
00585 PAVGB(%%mm2, %%mm0)
00586 PAVGB(%%mm4, %%mm0)
00587
00588 "movq (%%"REG_c"), %%mm2 \n\t"
00589 "movq %%mm3, %%mm5 \n\t"
00590 PAVGB(%%mm7, %%mm3)
00591 PAVGB(%%mm2, %%mm3)
00592 PAVGB(%%mm5, %%mm3)
00593
00594 "movq %%mm0, %%mm6 \n\t"
00595 "psubusb %%mm3, %%mm0 \n\t"
00596 "psubusb %%mm6, %%mm3 \n\t"
00597 "por %%mm0, %%mm3 \n\t"
00598 "pcmpeqb %%mm7, %%mm0 \n\t"
00599
00600
00601 "movq (%%"REG_c", %1), %%mm6 \n\t"
00602 "movq %%mm6, %%mm5 \n\t"
00603 PAVGB(%%mm7, %%mm6)
00604 PAVGB(%%mm4, %%mm6)
00605 PAVGB(%%mm5, %%mm6)
00606
00607 "movq (%%"REG_c", %1, 2), %%mm5 \n\t"
00608 "movq %%mm2, %%mm4 \n\t"
00609 PAVGB(%%mm7, %%mm2)
00610 PAVGB(%%mm5, %%mm2)
00611 PAVGB(%%mm4, %%mm2)
00612
00613 "movq %%mm6, %%mm4 \n\t"
00614 "psubusb %%mm2, %%mm6 \n\t"
00615 "psubusb %%mm4, %%mm2 \n\t"
00616 "por %%mm6, %%mm2 \n\t"
00617
00618
00619
00620 PMINUB(%%mm2, %%mm1, %%mm4)
00621 "movq %2, %%mm4 \n\t"
00622 "paddusb "MANGLE(b01)", %%mm4 \n\t"
00623 "pcmpgtb %%mm3, %%mm4 \n\t"
00624 "psubusb %%mm1, %%mm3 \n\t"
00625 "pand %%mm4, %%mm3 \n\t"
00626
00627 "movq %%mm3, %%mm1 \n\t"
00628
00629 PAVGB(%%mm7, %%mm3)
00630 PAVGB(%%mm7, %%mm3)
00631 "paddusb %%mm1, %%mm3 \n\t"
00632
00633
00634 "movq (%%"REG_a", %1, 2), %%mm6 \n\t"
00635 "movq (%0, %1, 4), %%mm5 \n\t"
00636 "movq (%0, %1, 4), %%mm4 \n\t"
00637 "psubusb %%mm6, %%mm5 \n\t"
00638 "psubusb %%mm4, %%mm6 \n\t"
00639 "por %%mm6, %%mm5 \n\t"
00640 "pcmpeqb %%mm7, %%mm6 \n\t"
00641 "pxor %%mm6, %%mm0 \n\t"
00642 "pand %%mm0, %%mm3 \n\t"
00643 PMINUB(%%mm5, %%mm3, %%mm0)
00644
00645 "psubusb "MANGLE(b01)", %%mm3 \n\t"
00646 PAVGB(%%mm7, %%mm3)
00647
00648 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00649 "movq (%0, %1, 4), %%mm2 \n\t"
00650 "pxor %%mm6, %%mm0 \n\t"
00651 "pxor %%mm6, %%mm2 \n\t"
00652 "psubb %%mm3, %%mm0 \n\t"
00653 "paddb %%mm3, %%mm2 \n\t"
00654 "pxor %%mm6, %%mm0 \n\t"
00655 "pxor %%mm6, %%mm2 \n\t"
00656 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00657 "movq %%mm2, (%0, %1, 4) \n\t"
00658 #endif //0
00659
00660 "lea (%0, %1), %%"REG_a" \n\t"
00661 "pcmpeqb %%mm6, %%mm6 \n\t"
00662
00663
00664
00665
00666
00667 "movq (%%"REG_a", %1, 2), %%mm1 \n\t"
00668 "movq (%0, %1, 4), %%mm0 \n\t"
00669 "pxor %%mm6, %%mm1 \n\t"
00670 PAVGB(%%mm1, %%mm0)
00671
00672
00673 "movq (%%"REG_a", %1, 4), %%mm2 \n\t"
00674 "movq (%%"REG_a", %1), %%mm3 \n\t"
00675 "pxor %%mm6, %%mm2 \n\t"
00676 "movq %%mm2, %%mm5 \n\t"
00677 "movq "MANGLE(b80)", %%mm4 \n\t"
00678 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00679 PAVGB(%%mm3, %%mm2)
00680 PAVGB(%%mm0, %%mm4)
00681 PAVGB(%%mm2, %%mm4)
00682 PAVGB(%%mm0, %%mm4)
00683
00684
00685 "movq (%%"REG_a"), %%mm2 \n\t"
00686 "pxor %%mm6, %%mm2 \n\t"
00687 PAVGB(%%mm3, %%mm2)
00688 PAVGB((%0), %%mm1)
00689 "movq "MANGLE(b80)", %%mm3 \n\t"
00690 PAVGB(%%mm2, %%mm3)
00691 PAVGB(%%mm1, %%mm3)
00692 PAVGB(%%mm2, %%mm3)
00693
00694
00695 PAVGB((%%REGc, %1), %%mm5)
00696 "movq (%%"REG_c", %1, 2), %%mm1 \n\t"
00697 "pxor %%mm6, %%mm1 \n\t"
00698 PAVGB((%0, %1, 4), %%mm1)
00699 "movq "MANGLE(b80)", %%mm2 \n\t"
00700 PAVGB(%%mm5, %%mm2)
00701 PAVGB(%%mm1, %%mm2)
00702 PAVGB(%%mm5, %%mm2)
00703
00704
00705 "movq "MANGLE(b00)", %%mm1 \n\t"
00706 "movq "MANGLE(b00)", %%mm5 \n\t"
00707 "psubb %%mm2, %%mm1 \n\t"
00708 "psubb %%mm3, %%mm5 \n\t"
00709 PMAXUB(%%mm1, %%mm2)
00710 PMAXUB(%%mm5, %%mm3)
00711 PMINUB(%%mm2, %%mm3, %%mm1)
00712
00713
00714
00715 "movq "MANGLE(b00)", %%mm7 \n\t"
00716 "movq %2, %%mm2 \n\t"
00717 PAVGB(%%mm6, %%mm2)
00718 "psubb %%mm6, %%mm2 \n\t"
00719
00720 "movq %%mm4, %%mm1 \n\t"
00721 "pcmpgtb %%mm7, %%mm1 \n\t"
00722 "pxor %%mm1, %%mm4 \n\t"
00723 "psubb %%mm1, %%mm4 \n\t"
00724 "pcmpgtb %%mm4, %%mm2 \n\t"
00725 "psubusb %%mm3, %%mm4 \n\t"
00726
00727
00728 "movq %%mm4, %%mm3 \n\t"
00729 "psubusb "MANGLE(b01)", %%mm4 \n\t"
00730 PAVGB(%%mm7, %%mm4)
00731 PAVGB(%%mm7, %%mm4)
00732 "paddb %%mm3, %%mm4 \n\t"
00733 "pand %%mm2, %%mm4 \n\t"
00734
00735 "movq "MANGLE(b80)", %%mm5 \n\t"
00736 "psubb %%mm0, %%mm5 \n\t"
00737 "paddsb %%mm6, %%mm5 \n\t"
00738 "pcmpgtb %%mm5, %%mm7 \n\t"
00739 "pxor %%mm7, %%mm5 \n\t"
00740
00741 PMINUB(%%mm5, %%mm4, %%mm3)
00742 "pxor %%mm1, %%mm7 \n\t"
00743
00744 "pand %%mm7, %%mm4 \n\t"
00745 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00746 "movq (%0, %1, 4), %%mm2 \n\t"
00747 "pxor %%mm1, %%mm0 \n\t"
00748 "pxor %%mm1, %%mm2 \n\t"
00749 "paddb %%mm4, %%mm0 \n\t"
00750 "psubb %%mm4, %%mm2 \n\t"
00751 "pxor %%mm1, %%mm0 \n\t"
00752 "pxor %%mm1, %%mm2 \n\t"
00753 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00754 "movq %%mm2, (%0, %1, 4) \n\t"
00755
00756 :
00757 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
00758 : "%"REG_a, "%"REG_c
00759 );
00760
00761
00762
00763
00764
00765
00766
00767
00768
00769
00770
00771
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798
00799
00800
00801
00802
00803
00804
00805
00806
00807
00808
00809
00810
00811
00812
00813
00814
00815
00816 #elif TEMPLATE_PP_MMX
00817 DECLARE_ALIGNED(8, uint64_t, tmp)[4];
00818 src+= stride*4;
00819 __asm__ volatile(
00820 "pxor %%mm7, %%mm7 \n\t"
00821
00822
00823
00824
00825 "movq (%0), %%mm0 \n\t"
00826 "movq %%mm0, %%mm1 \n\t"
00827 "punpcklbw %%mm7, %%mm0 \n\t"
00828 "punpckhbw %%mm7, %%mm1 \n\t"
00829
00830 "movq (%0, %1), %%mm2 \n\t"
00831 "lea (%0, %1, 2), %%"REG_a" \n\t"
00832 "movq %%mm2, %%mm3 \n\t"
00833 "punpcklbw %%mm7, %%mm2 \n\t"
00834 "punpckhbw %%mm7, %%mm3 \n\t"
00835
00836 "movq (%%"REG_a"), %%mm4 \n\t"
00837 "movq %%mm4, %%mm5 \n\t"
00838 "punpcklbw %%mm7, %%mm4 \n\t"
00839 "punpckhbw %%mm7, %%mm5 \n\t"
00840
00841 "paddw %%mm0, %%mm0 \n\t"
00842 "paddw %%mm1, %%mm1 \n\t"
00843 "psubw %%mm4, %%mm2 \n\t"
00844 "psubw %%mm5, %%mm3 \n\t"
00845 "psubw %%mm2, %%mm0 \n\t"
00846 "psubw %%mm3, %%mm1 \n\t"
00847
00848 "psllw $2, %%mm2 \n\t"
00849 "psllw $2, %%mm3 \n\t"
00850 "psubw %%mm2, %%mm0 \n\t"
00851 "psubw %%mm3, %%mm1 \n\t"
00852
00853 "movq (%%"REG_a", %1), %%mm2 \n\t"
00854 "movq %%mm2, %%mm3 \n\t"
00855 "punpcklbw %%mm7, %%mm2 \n\t"
00856 "punpckhbw %%mm7, %%mm3 \n\t"
00857
00858 "psubw %%mm2, %%mm0 \n\t"
00859 "psubw %%mm3, %%mm1 \n\t"
00860 "psubw %%mm2, %%mm0 \n\t"
00861 "psubw %%mm3, %%mm1 \n\t"
00862 "movq %%mm0, (%3) \n\t"
00863 "movq %%mm1, 8(%3) \n\t"
00864
00865 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00866 "movq %%mm0, %%mm1 \n\t"
00867 "punpcklbw %%mm7, %%mm0 \n\t"
00868 "punpckhbw %%mm7, %%mm1 \n\t"
00869
00870 "psubw %%mm0, %%mm2 \n\t"
00871 "psubw %%mm1, %%mm3 \n\t"
00872 "movq %%mm2, 16(%3) \n\t"
00873 "movq %%mm3, 24(%3) \n\t"
00874 "paddw %%mm4, %%mm4 \n\t"
00875 "paddw %%mm5, %%mm5 \n\t"
00876 "psubw %%mm2, %%mm4 \n\t"
00877 "psubw %%mm3, %%mm5 \n\t"
00878
00879 "lea (%%"REG_a", %1), %0 \n\t"
00880 "psllw $2, %%mm2 \n\t"
00881 "psllw $2, %%mm3 \n\t"
00882 "psubw %%mm2, %%mm4 \n\t"
00883 "psubw %%mm3, %%mm5 \n\t"
00884
00885 "movq (%0, %1, 2), %%mm2 \n\t"
00886 "movq %%mm2, %%mm3 \n\t"
00887 "punpcklbw %%mm7, %%mm2 \n\t"
00888 "punpckhbw %%mm7, %%mm3 \n\t"
00889 "psubw %%mm2, %%mm4 \n\t"
00890 "psubw %%mm3, %%mm5 \n\t"
00891 "psubw %%mm2, %%mm4 \n\t"
00892 "psubw %%mm3, %%mm5 \n\t"
00893
00894 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00895 "punpcklbw %%mm7, %%mm6 \n\t"
00896 "psubw %%mm6, %%mm2 \n\t"
00897 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00898 "punpckhbw %%mm7, %%mm6 \n\t"
00899 "psubw %%mm6, %%mm3 \n\t"
00900
00901 "paddw %%mm0, %%mm0 \n\t"
00902 "paddw %%mm1, %%mm1 \n\t"
00903 "psubw %%mm2, %%mm0 \n\t"
00904 "psubw %%mm3, %%mm1 \n\t"
00905
00906 "psllw $2, %%mm2 \n\t"
00907 "psllw $2, %%mm3 \n\t"
00908 "psubw %%mm2, %%mm0 \n\t"
00909 "psubw %%mm3, %%mm1 \n\t"
00910
00911 "movq (%0, %1, 4), %%mm2 \n\t"
00912 "movq %%mm2, %%mm3 \n\t"
00913 "punpcklbw %%mm7, %%mm2 \n\t"
00914 "punpckhbw %%mm7, %%mm3 \n\t"
00915
00916 "paddw %%mm2, %%mm2 \n\t"
00917 "paddw %%mm3, %%mm3 \n\t"
00918 "psubw %%mm2, %%mm0 \n\t"
00919 "psubw %%mm3, %%mm1 \n\t"
00920
00921 "movq (%3), %%mm2 \n\t"
00922 "movq 8(%3), %%mm3 \n\t"
00923
00924 #if TEMPLATE_PP_MMXEXT
00925 "movq %%mm7, %%mm6 \n\t"
00926 "psubw %%mm0, %%mm6 \n\t"
00927 "pmaxsw %%mm6, %%mm0 \n\t"
00928 "movq %%mm7, %%mm6 \n\t"
00929 "psubw %%mm1, %%mm6 \n\t"
00930 "pmaxsw %%mm6, %%mm1 \n\t"
00931 "movq %%mm7, %%mm6 \n\t"
00932 "psubw %%mm2, %%mm6 \n\t"
00933 "pmaxsw %%mm6, %%mm2 \n\t"
00934 "movq %%mm7, %%mm6 \n\t"
00935 "psubw %%mm3, %%mm6 \n\t"
00936 "pmaxsw %%mm6, %%mm3 \n\t"
00937 #else
00938 "movq %%mm7, %%mm6 \n\t"
00939 "pcmpgtw %%mm0, %%mm6 \n\t"
00940 "pxor %%mm6, %%mm0 \n\t"
00941 "psubw %%mm6, %%mm0 \n\t"
00942 "movq %%mm7, %%mm6 \n\t"
00943 "pcmpgtw %%mm1, %%mm6 \n\t"
00944 "pxor %%mm6, %%mm1 \n\t"
00945 "psubw %%mm6, %%mm1 \n\t"
00946 "movq %%mm7, %%mm6 \n\t"
00947 "pcmpgtw %%mm2, %%mm6 \n\t"
00948 "pxor %%mm6, %%mm2 \n\t"
00949 "psubw %%mm6, %%mm2 \n\t"
00950 "movq %%mm7, %%mm6 \n\t"
00951 "pcmpgtw %%mm3, %%mm6 \n\t"
00952 "pxor %%mm6, %%mm3 \n\t"
00953 "psubw %%mm6, %%mm3 \n\t"
00954 #endif
00955
00956 #if TEMPLATE_PP_MMXEXT
00957 "pminsw %%mm2, %%mm0 \n\t"
00958 "pminsw %%mm3, %%mm1 \n\t"
00959 #else
00960 "movq %%mm0, %%mm6 \n\t"
00961 "psubusw %%mm2, %%mm6 \n\t"
00962 "psubw %%mm6, %%mm0 \n\t"
00963 "movq %%mm1, %%mm6 \n\t"
00964 "psubusw %%mm3, %%mm6 \n\t"
00965 "psubw %%mm6, %%mm1 \n\t"
00966 #endif
00967
00968 "movd %2, %%mm2 \n\t"
00969 "punpcklbw %%mm7, %%mm2 \n\t"
00970
00971 "movq %%mm7, %%mm6 \n\t"
00972 "pcmpgtw %%mm4, %%mm6 \n\t"
00973 "pxor %%mm6, %%mm4 \n\t"
00974 "psubw %%mm6, %%mm4 \n\t"
00975 "pcmpgtw %%mm5, %%mm7 \n\t"
00976 "pxor %%mm7, %%mm5 \n\t"
00977 "psubw %%mm7, %%mm5 \n\t"
00978
00979 "psllw $3, %%mm2 \n\t"
00980 "movq %%mm2, %%mm3 \n\t"
00981 "pcmpgtw %%mm4, %%mm2 \n\t"
00982 "pcmpgtw %%mm5, %%mm3 \n\t"
00983 "pand %%mm2, %%mm4 \n\t"
00984 "pand %%mm3, %%mm5 \n\t"
00985
00986
00987 "psubusw %%mm0, %%mm4 \n\t"
00988 "psubusw %%mm1, %%mm5 \n\t"
00989
00990
00991 "movq "MANGLE(w05)", %%mm2 \n\t"
00992 "pmullw %%mm2, %%mm4 \n\t"
00993 "pmullw %%mm2, %%mm5 \n\t"
00994 "movq "MANGLE(w20)", %%mm2 \n\t"
00995 "paddw %%mm2, %%mm4 \n\t"
00996 "paddw %%mm2, %%mm5 \n\t"
00997 "psrlw $6, %%mm4 \n\t"
00998 "psrlw $6, %%mm5 \n\t"
00999
01000 "movq 16(%3), %%mm0 \n\t"
01001 "movq 24(%3), %%mm1 \n\t"
01002
01003 "pxor %%mm2, %%mm2 \n\t"
01004 "pxor %%mm3, %%mm3 \n\t"
01005
01006 "pcmpgtw %%mm0, %%mm2 \n\t"
01007 "pcmpgtw %%mm1, %%mm3 \n\t"
01008 "pxor %%mm2, %%mm0 \n\t"
01009 "pxor %%mm3, %%mm1 \n\t"
01010 "psubw %%mm2, %%mm0 \n\t"
01011 "psubw %%mm3, %%mm1 \n\t"
01012 "psrlw $1, %%mm0 \n\t"
01013 "psrlw $1, %%mm1 \n\t"
01014
01015 "pxor %%mm6, %%mm2 \n\t"
01016 "pxor %%mm7, %%mm3 \n\t"
01017 "pand %%mm2, %%mm4 \n\t"
01018 "pand %%mm3, %%mm5 \n\t"
01019
01020 #if TEMPLATE_PP_MMXEXT
01021 "pminsw %%mm0, %%mm4 \n\t"
01022 "pminsw %%mm1, %%mm5 \n\t"
01023 #else
01024 "movq %%mm4, %%mm2 \n\t"
01025 "psubusw %%mm0, %%mm2 \n\t"
01026 "psubw %%mm2, %%mm4 \n\t"
01027 "movq %%mm5, %%mm2 \n\t"
01028 "psubusw %%mm1, %%mm2 \n\t"
01029 "psubw %%mm2, %%mm5 \n\t"
01030 #endif
01031 "pxor %%mm6, %%mm4 \n\t"
01032 "pxor %%mm7, %%mm5 \n\t"
01033 "psubw %%mm6, %%mm4 \n\t"
01034 "psubw %%mm7, %%mm5 \n\t"
01035 "packsswb %%mm5, %%mm4 \n\t"
01036 "movq (%0), %%mm0 \n\t"
01037 "paddb %%mm4, %%mm0 \n\t"
01038 "movq %%mm0, (%0) \n\t"
01039 "movq (%0, %1), %%mm0 \n\t"
01040 "psubb %%mm4, %%mm0 \n\t"
01041 "movq %%mm0, (%0, %1) \n\t"
01042
01043 : "+r" (src)
01044 : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp)
01045 : "%"REG_a
01046 );
01047 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01048 const int l1= stride;
01049 const int l2= stride + l1;
01050 const int l3= stride + l2;
01051 const int l4= stride + l3;
01052 const int l5= stride + l4;
01053 const int l6= stride + l5;
01054 const int l7= stride + l6;
01055 const int l8= stride + l7;
01056
01057 int x;
01058 src+= stride*3;
01059 for(x=0; x<BLOCK_SIZE; x++){
01060 const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
01061 if(FFABS(middleEnergy) < 8*c->QP){
01062 const int q=(src[l4] - src[l5])/2;
01063 const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
01064 const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
01065
01066 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
01067 d= FFMAX(d, 0);
01068
01069 d= (5*d + 32) >> 6;
01070 d*= FFSIGN(-middleEnergy);
01071
01072 if(q>0){
01073 d= d<0 ? 0 : d;
01074 d= d>q ? q : d;
01075 }else{
01076 d= d>0 ? 0 : d;
01077 d= d<q ? q : d;
01078 }
01079
01080 src[l4]-= d;
01081 src[l5]+= d;
01082 }
01083 src++;
01084 }
01085 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01086 }
01087 #endif //TEMPLATE_PP_ALTIVEC
01088
01089 #if !TEMPLATE_PP_ALTIVEC
01090 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
01091 {
01092 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01093 DECLARE_ALIGNED(8, uint64_t, tmp)[3];
01094 __asm__ volatile(
01095 "pxor %%mm6, %%mm6 \n\t"
01096 "pcmpeqb %%mm7, %%mm7 \n\t"
01097 "movq %2, %%mm0 \n\t"
01098 "punpcklbw %%mm6, %%mm0 \n\t"
01099 "psrlw $1, %%mm0 \n\t"
01100 "psubw %%mm7, %%mm0 \n\t"
01101 "packuswb %%mm0, %%mm0 \n\t"
01102 "movq %%mm0, %3 \n\t"
01103
01104 "lea (%0, %1), %%"REG_a" \n\t"
01105 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01106
01107
01108
01109
01110 #undef REAL_FIND_MIN_MAX
01111 #undef FIND_MIN_MAX
01112 #if TEMPLATE_PP_MMXEXT
01113 #define REAL_FIND_MIN_MAX(addr)\
01114 "movq " #addr ", %%mm0 \n\t"\
01115 "pminub %%mm0, %%mm7 \n\t"\
01116 "pmaxub %%mm0, %%mm6 \n\t"
01117 #else
01118 #define REAL_FIND_MIN_MAX(addr)\
01119 "movq " #addr ", %%mm0 \n\t"\
01120 "movq %%mm7, %%mm1 \n\t"\
01121 "psubusb %%mm0, %%mm6 \n\t"\
01122 "paddb %%mm0, %%mm6 \n\t"\
01123 "psubusb %%mm0, %%mm1 \n\t"\
01124 "psubb %%mm1, %%mm7 \n\t"
01125 #endif
01126 #define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr)
01127
01128 FIND_MIN_MAX((%%REGa))
01129 FIND_MIN_MAX((%%REGa, %1))
01130 FIND_MIN_MAX((%%REGa, %1, 2))
01131 FIND_MIN_MAX((%0, %1, 4))
01132 FIND_MIN_MAX((%%REGd))
01133 FIND_MIN_MAX((%%REGd, %1))
01134 FIND_MIN_MAX((%%REGd, %1, 2))
01135 FIND_MIN_MAX((%0, %1, 8))
01136
01137 "movq %%mm7, %%mm4 \n\t"
01138 "psrlq $8, %%mm7 \n\t"
01139 #if TEMPLATE_PP_MMXEXT
01140 "pminub %%mm4, %%mm7 \n\t"
01141 "pshufw $0xF9, %%mm7, %%mm4 \n\t"
01142 "pminub %%mm4, %%mm7 \n\t"
01143 "pshufw $0xFE, %%mm7, %%mm4 \n\t"
01144 "pminub %%mm4, %%mm7 \n\t"
01145 #else
01146 "movq %%mm7, %%mm1 \n\t"
01147 "psubusb %%mm4, %%mm1 \n\t"
01148 "psubb %%mm1, %%mm7 \n\t"
01149 "movq %%mm7, %%mm4 \n\t"
01150 "psrlq $16, %%mm7 \n\t"
01151 "movq %%mm7, %%mm1 \n\t"
01152 "psubusb %%mm4, %%mm1 \n\t"
01153 "psubb %%mm1, %%mm7 \n\t"
01154 "movq %%mm7, %%mm4 \n\t"
01155 "psrlq $32, %%mm7 \n\t"
01156 "movq %%mm7, %%mm1 \n\t"
01157 "psubusb %%mm4, %%mm1 \n\t"
01158 "psubb %%mm1, %%mm7 \n\t"
01159 #endif
01160
01161
01162 "movq %%mm6, %%mm4 \n\t"
01163 "psrlq $8, %%mm6 \n\t"
01164 #if TEMPLATE_PP_MMXEXT
01165 "pmaxub %%mm4, %%mm6 \n\t"
01166 "pshufw $0xF9, %%mm6, %%mm4 \n\t"
01167 "pmaxub %%mm4, %%mm6 \n\t"
01168 "pshufw $0xFE, %%mm6, %%mm4 \n\t"
01169 "pmaxub %%mm4, %%mm6 \n\t"
01170 #else
01171 "psubusb %%mm4, %%mm6 \n\t"
01172 "paddb %%mm4, %%mm6 \n\t"
01173 "movq %%mm6, %%mm4 \n\t"
01174 "psrlq $16, %%mm6 \n\t"
01175 "psubusb %%mm4, %%mm6 \n\t"
01176 "paddb %%mm4, %%mm6 \n\t"
01177 "movq %%mm6, %%mm4 \n\t"
01178 "psrlq $32, %%mm6 \n\t"
01179 "psubusb %%mm4, %%mm6 \n\t"
01180 "paddb %%mm4, %%mm6 \n\t"
01181 #endif
01182 "movq %%mm6, %%mm0 \n\t"
01183 "psubb %%mm7, %%mm6 \n\t"
01184 "push %4 \n\t"
01185 "movd %%mm6, %k4 \n\t"
01186 "cmpb "MANGLE(deringThreshold)", %b4 \n\t"
01187 "pop %4 \n\t"
01188 " jb 1f \n\t"
01189 PAVGB(%%mm0, %%mm7)
01190 "punpcklbw %%mm7, %%mm7 \n\t"
01191 "punpcklbw %%mm7, %%mm7 \n\t"
01192 "punpcklbw %%mm7, %%mm7 \n\t"
01193 "movq %%mm7, (%4) \n\t"
01194
01195 "movq (%0), %%mm0 \n\t"
01196 "movq %%mm0, %%mm1 \n\t"
01197 "movq %%mm0, %%mm2 \n\t"
01198 "psllq $8, %%mm1 \n\t"
01199 "psrlq $8, %%mm2 \n\t"
01200 "movd -4(%0), %%mm3 \n\t"
01201 "movd 8(%0), %%mm4 \n\t"
01202 "psrlq $24, %%mm3 \n\t"
01203 "psllq $56, %%mm4 \n\t"
01204 "por %%mm3, %%mm1 \n\t"
01205 "por %%mm4, %%mm2 \n\t"
01206 "movq %%mm1, %%mm3 \n\t"
01207 PAVGB(%%mm2, %%mm1)
01208 PAVGB(%%mm0, %%mm1)
01209 "psubusb %%mm7, %%mm0 \n\t"
01210 "psubusb %%mm7, %%mm2 \n\t"
01211 "psubusb %%mm7, %%mm3 \n\t"
01212 "pcmpeqb "MANGLE(b00)", %%mm0 \n\t"
01213 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t"
01214 "pcmpeqb "MANGLE(b00)", %%mm3 \n\t"
01215 "paddb %%mm2, %%mm0 \n\t"
01216 "paddb %%mm3, %%mm0 \n\t"
01217
01218 "movq (%%"REG_a"), %%mm2 \n\t"
01219 "movq %%mm2, %%mm3 \n\t"
01220 "movq %%mm2, %%mm4 \n\t"
01221 "psllq $8, %%mm3 \n\t"
01222 "psrlq $8, %%mm4 \n\t"
01223 "movd -4(%%"REG_a"), %%mm5 \n\t"
01224 "movd 8(%%"REG_a"), %%mm6 \n\t"
01225 "psrlq $24, %%mm5 \n\t"
01226 "psllq $56, %%mm6 \n\t"
01227 "por %%mm5, %%mm3 \n\t"
01228 "por %%mm6, %%mm4 \n\t"
01229 "movq %%mm3, %%mm5 \n\t"
01230 PAVGB(%%mm4, %%mm3)
01231 PAVGB(%%mm2, %%mm3)
01232 "psubusb %%mm7, %%mm2 \n\t"
01233 "psubusb %%mm7, %%mm4 \n\t"
01234 "psubusb %%mm7, %%mm5 \n\t"
01235 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t"
01236 "pcmpeqb "MANGLE(b00)", %%mm4 \n\t"
01237 "pcmpeqb "MANGLE(b00)", %%mm5 \n\t"
01238 "paddb %%mm4, %%mm2 \n\t"
01239 "paddb %%mm5, %%mm2 \n\t"
01240
01241 #define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
01242 "movq " #src ", " #sx " \n\t" \
01243 "movq " #sx ", " #lx " \n\t" \
01244 "movq " #sx ", " #t0 " \n\t" \
01245 "psllq $8, " #lx " \n\t"\
01246 "psrlq $8, " #t0 " \n\t"\
01247 "movd -4" #src ", " #t1 " \n\t"\
01248 "psrlq $24, " #t1 " \n\t"\
01249 "por " #t1 ", " #lx " \n\t" \
01250 "movd 8" #src ", " #t1 " \n\t"\
01251 "psllq $56, " #t1 " \n\t"\
01252 "por " #t1 ", " #t0 " \n\t" \
01253 "movq " #lx ", " #t1 " \n\t" \
01254 PAVGB(t0, lx) \
01255 PAVGB(sx, lx) \
01256 PAVGB(lx, pplx) \
01257 "movq " #lx ", 8(%4) \n\t"\
01258 "movq (%4), " #lx " \n\t"\
01259 "psubusb " #lx ", " #t1 " \n\t"\
01260 "psubusb " #lx ", " #t0 " \n\t"\
01261 "psubusb " #lx ", " #sx " \n\t"\
01262 "movq "MANGLE(b00)", " #lx " \n\t"\
01263 "pcmpeqb " #lx ", " #t1 " \n\t" \
01264 "pcmpeqb " #lx ", " #t0 " \n\t" \
01265 "pcmpeqb " #lx ", " #sx " \n\t" \
01266 "paddb " #t1 ", " #t0 " \n\t"\
01267 "paddb " #t0 ", " #sx " \n\t"\
01268 \
01269 PAVGB(plx, pplx) \
01270 "movq " #dst ", " #t0 " \n\t" \
01271 "movq " #t0 ", " #t1 " \n\t" \
01272 "psubusb %3, " #t0 " \n\t"\
01273 "paddusb %3, " #t1 " \n\t"\
01274 PMAXUB(t0, pplx)\
01275 PMINUB(t1, pplx, t0)\
01276 "paddb " #sx ", " #ppsx " \n\t"\
01277 "paddb " #psx ", " #ppsx " \n\t"\
01278 "#paddb "MANGLE(b02)", " #ppsx " \n\t"\
01279 "pand "MANGLE(b08)", " #ppsx " \n\t"\
01280 "pcmpeqb " #lx ", " #ppsx " \n\t"\
01281 "pand " #ppsx ", " #pplx " \n\t"\
01282 "pandn " #dst ", " #ppsx " \n\t"\
01283 "por " #pplx ", " #ppsx " \n\t"\
01284 "movq " #ppsx ", " #dst " \n\t"\
01285 "movq 8(%4), " #lx " \n\t"
01286
01287 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
01288 REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300
01301
01302
01303
01304
01305 DERING_CORE((%%REGa) ,(%%REGa, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01306 DERING_CORE((%%REGa, %1) ,(%%REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01307 DERING_CORE((%%REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
01308 DERING_CORE((%0, %1, 4) ,(%%REGd) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01309 DERING_CORE((%%REGd) ,(%%REGd, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01310 DERING_CORE((%%REGd, %1) ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
01311 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01312 DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01313
01314 "1: \n\t"
01315 : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp)
01316 : "%"REG_a, "%"REG_d
01317 );
01318 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01319 int y;
01320 int min=255;
01321 int max=0;
01322 int avg;
01323 uint8_t *p;
01324 int s[10];
01325 const int QP2= c->QP/2 + 1;
01326
01327 for(y=1; y<9; y++){
01328 int x;
01329 p= src + stride*y;
01330 for(x=1; x<9; x++){
01331 p++;
01332 if(*p > max) max= *p;
01333 if(*p < min) min= *p;
01334 }
01335 }
01336 avg= (min + max + 1)>>1;
01337
01338 if(max - min <deringThreshold) return;
01339
01340 for(y=0; y<10; y++){
01341 int t = 0;
01342
01343 if(src[stride*y + 0] > avg) t+= 1;
01344 if(src[stride*y + 1] > avg) t+= 2;
01345 if(src[stride*y + 2] > avg) t+= 4;
01346 if(src[stride*y + 3] > avg) t+= 8;
01347 if(src[stride*y + 4] > avg) t+= 16;
01348 if(src[stride*y + 5] > avg) t+= 32;
01349 if(src[stride*y + 6] > avg) t+= 64;
01350 if(src[stride*y + 7] > avg) t+= 128;
01351 if(src[stride*y + 8] > avg) t+= 256;
01352 if(src[stride*y + 9] > avg) t+= 512;
01353
01354 t |= (~t)<<16;
01355 t &= (t<<1) & (t>>1);
01356 s[y] = t;
01357 }
01358
01359 for(y=1; y<9; y++){
01360 int t = s[y-1] & s[y] & s[y+1];
01361 t|= t>>16;
01362 s[y-1]= t;
01363 }
01364
01365 for(y=1; y<9; y++){
01366 int x;
01367 int t = s[y-1];
01368
01369 p= src + stride*y;
01370 for(x=1; x<9; x++){
01371 p++;
01372 if(t & (1<<x)){
01373 int f= (*(p-stride-1)) + 2*(*(p-stride)) + (*(p-stride+1))
01374 +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1))
01375 +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1));
01376 f= (f + 8)>>4;
01377
01378 #ifdef DEBUG_DERING_THRESHOLD
01379 __asm__ volatile("emms\n\t":);
01380 {
01381 static long long numPixels=0;
01382 if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
01383
01384
01385
01386 if(max-min < 20){
01387 static int numSkipped=0;
01388 static int errorSum=0;
01389 static int worstQP=0;
01390 static int worstRange=0;
01391 static int worstDiff=0;
01392 int diff= (f - *p);
01393 int absDiff= FFABS(diff);
01394 int error= diff*diff;
01395
01396 if(x==1 || x==8 || y==1 || y==8) continue;
01397
01398 numSkipped++;
01399 if(absDiff > worstDiff){
01400 worstDiff= absDiff;
01401 worstQP= QP;
01402 worstRange= max-min;
01403 }
01404 errorSum+= error;
01405
01406 if(1024LL*1024LL*1024LL % numSkipped == 0){
01407 av_log(c, AV_LOG_INFO, "sum:%1.3f, skip:%d, wQP:%d, "
01408 "wRange:%d, wDiff:%d, relSkip:%1.3f\n",
01409 (float)errorSum/numSkipped, numSkipped, worstQP, worstRange,
01410 worstDiff, (float)numSkipped/numPixels);
01411 }
01412 }
01413 }
01414 #endif
01415 if (*p + QP2 < f) *p= *p + QP2;
01416 else if(*p - QP2 > f) *p= *p - QP2;
01417 else *p=f;
01418 }
01419 }
01420 }
01421 #ifdef DEBUG_DERING_THRESHOLD
01422 if(max-min < 20){
01423 for(y=1; y<9; y++){
01424 int x;
01425 int t = 0;
01426 p= src + stride*y;
01427 for(x=1; x<9; x++){
01428 p++;
01429 *p = FFMIN(*p + 20, 255);
01430 }
01431 }
01432
01433 }
01434 #endif
01435 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01436 }
01437 #endif //TEMPLATE_PP_ALTIVEC
01438
01445 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
01446 {
01447 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01448 src+= 4*stride;
01449 __asm__ volatile(
01450 "lea (%0, %1), %%"REG_a" \n\t"
01451 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
01452
01453
01454
01455 "movq (%0), %%mm0 \n\t"
01456 "movq (%%"REG_a", %1), %%mm1 \n\t"
01457 PAVGB(%%mm1, %%mm0)
01458 "movq %%mm0, (%%"REG_a") \n\t"
01459 "movq (%0, %1, 4), %%mm0 \n\t"
01460 PAVGB(%%mm0, %%mm1)
01461 "movq %%mm1, (%%"REG_a", %1, 2) \n\t"
01462 "movq (%%"REG_c", %1), %%mm1 \n\t"
01463 PAVGB(%%mm1, %%mm0)
01464 "movq %%mm0, (%%"REG_c") \n\t"
01465 "movq (%0, %1, 8), %%mm0 \n\t"
01466 PAVGB(%%mm0, %%mm1)
01467 "movq %%mm1, (%%"REG_c", %1, 2) \n\t"
01468
01469 : : "r" (src), "r" ((x86_reg)stride)
01470 : "%"REG_a, "%"REG_c
01471 );
01472 #else
01473 int a, b, x;
01474 src+= 4*stride;
01475
01476 for(x=0; x<2; x++){
01477 a= *(uint32_t*)&src[stride*0];
01478 b= *(uint32_t*)&src[stride*2];
01479 *(uint32_t*)&src[stride*1]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01480 a= *(uint32_t*)&src[stride*4];
01481 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01482 b= *(uint32_t*)&src[stride*6];
01483 *(uint32_t*)&src[stride*5]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01484 a= *(uint32_t*)&src[stride*8];
01485 *(uint32_t*)&src[stride*7]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01486 src += 4;
01487 }
01488 #endif
01489 }
01490
01498 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
01499 {
01500 #if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01501 src+= stride*3;
01502 __asm__ volatile(
01503 "lea (%0, %1), %%"REG_a" \n\t"
01504 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01505 "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t"
01506 "add %1, %%"REG_c" \n\t"
01507 #if TEMPLATE_PP_SSE2
01508 "pxor %%xmm7, %%xmm7 \n\t"
01509 #define REAL_DEINT_CUBIC(a,b,c,d,e)\
01510 "movq " #a ", %%xmm0 \n\t"\
01511 "movq " #b ", %%xmm1 \n\t"\
01512 "movq " #d ", %%xmm2 \n\t"\
01513 "movq " #e ", %%xmm3 \n\t"\
01514 "pavgb %%xmm2, %%xmm1 \n\t"\
01515 "pavgb %%xmm3, %%xmm0 \n\t"\
01516 "punpcklbw %%xmm7, %%xmm0 \n\t"\
01517 "punpcklbw %%xmm7, %%xmm1 \n\t"\
01518 "psubw %%xmm1, %%xmm0 \n\t"\
01519 "psraw $3, %%xmm0 \n\t"\
01520 "psubw %%xmm0, %%xmm1 \n\t"\
01521 "packuswb %%xmm1, %%xmm1 \n\t"\
01522 "movlps %%xmm1, " #c " \n\t"
01523 #else //TEMPLATE_PP_SSE2
01524 "pxor %%mm7, %%mm7 \n\t"
01525
01526
01527
01528 #define REAL_DEINT_CUBIC(a,b,c,d,e)\
01529 "movq " #a ", %%mm0 \n\t"\
01530 "movq " #b ", %%mm1 \n\t"\
01531 "movq " #d ", %%mm2 \n\t"\
01532 "movq " #e ", %%mm3 \n\t"\
01533 PAVGB(%%mm2, %%mm1) \
01534 PAVGB(%%mm3, %%mm0) \
01535 "movq %%mm0, %%mm2 \n\t"\
01536 "punpcklbw %%mm7, %%mm0 \n\t"\
01537 "punpckhbw %%mm7, %%mm2 \n\t"\
01538 "movq %%mm1, %%mm3 \n\t"\
01539 "punpcklbw %%mm7, %%mm1 \n\t"\
01540 "punpckhbw %%mm7, %%mm3 \n\t"\
01541 "psubw %%mm1, %%mm0 \n\t" \
01542 "psubw %%mm3, %%mm2 \n\t" \
01543 "psraw $3, %%mm0 \n\t" \
01544 "psraw $3, %%mm2 \n\t" \
01545 "psubw %%mm0, %%mm1 \n\t" \
01546 "psubw %%mm2, %%mm3 \n\t" \
01547 "packuswb %%mm3, %%mm1 \n\t"\
01548 "movq %%mm1, " #c " \n\t"
01549 #endif //TEMPLATE_PP_SSE2
01550 #define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e)
01551
01552 DEINT_CUBIC((%0) , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1))
01553 DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%0, %1, 8))
01554 DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc))
01555 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2))
01556
01557 : : "r" (src), "r" ((x86_reg)stride)
01558 :
01559 #if TEMPLATE_PP_SSE2
01560 XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm7",)
01561 #endif
01562 "%"REG_a, "%"REG_d, "%"REG_c
01563 );
01564 #undef REAL_DEINT_CUBIC
01565 #else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01566 int x;
01567 src+= stride*3;
01568 for(x=0; x<8; x++){
01569 src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
01570 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
01571 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
01572 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
01573 src++;
01574 }
01575 #endif //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01576 }
01577
01585 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
01586 {
01587 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01588 src+= stride*4;
01589 __asm__ volatile(
01590 "lea (%0, %1), %%"REG_a" \n\t"
01591 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01592 "pxor %%mm7, %%mm7 \n\t"
01593 "movq (%2), %%mm0 \n\t"
01594
01595
01596
01597 #define REAL_DEINT_FF(a,b,c,d)\
01598 "movq " #a ", %%mm1 \n\t"\
01599 "movq " #b ", %%mm2 \n\t"\
01600 "movq " #c ", %%mm3 \n\t"\
01601 "movq " #d ", %%mm4 \n\t"\
01602 PAVGB(%%mm3, %%mm1) \
01603 PAVGB(%%mm4, %%mm0) \
01604 "movq %%mm0, %%mm3 \n\t"\
01605 "punpcklbw %%mm7, %%mm0 \n\t"\
01606 "punpckhbw %%mm7, %%mm3 \n\t"\
01607 "movq %%mm1, %%mm4 \n\t"\
01608 "punpcklbw %%mm7, %%mm1 \n\t"\
01609 "punpckhbw %%mm7, %%mm4 \n\t"\
01610 "psllw $2, %%mm1 \n\t"\
01611 "psllw $2, %%mm4 \n\t"\
01612 "psubw %%mm0, %%mm1 \n\t"\
01613 "psubw %%mm3, %%mm4 \n\t"\
01614 "movq %%mm2, %%mm5 \n\t"\
01615 "movq %%mm2, %%mm0 \n\t"\
01616 "punpcklbw %%mm7, %%mm2 \n\t"\
01617 "punpckhbw %%mm7, %%mm5 \n\t"\
01618 "paddw %%mm2, %%mm1 \n\t"\
01619 "paddw %%mm5, %%mm4 \n\t"\
01620 "psraw $2, %%mm1 \n\t"\
01621 "psraw $2, %%mm4 \n\t"\
01622 "packuswb %%mm4, %%mm1 \n\t"\
01623 "movq %%mm1, " #b " \n\t"\
01624
01625 #define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d)
01626
01627 DEINT_FF((%0) , (%%REGa) , (%%REGa, %1), (%%REGa, %1, 2))
01628 DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
01629 DEINT_FF((%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2))
01630 DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
01631
01632 "movq %%mm0, (%2) \n\t"
01633 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp)
01634 : "%"REG_a, "%"REG_d
01635 );
01636 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01637 int x;
01638 src+= stride*4;
01639 for(x=0; x<8; x++){
01640 int t1= tmp[x];
01641 int t2= src[stride*1];
01642
01643 src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3);
01644 t1= src[stride*4];
01645 src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3);
01646 t2= src[stride*6];
01647 src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3);
01648 t1= src[stride*8];
01649 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
01650 tmp[x]= t1;
01651
01652 src++;
01653 }
01654 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01655 }
01656
01664 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
01665 {
01666 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01667 src+= stride*4;
01668 __asm__ volatile(
01669 "lea (%0, %1), %%"REG_a" \n\t"
01670 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01671 "pxor %%mm7, %%mm7 \n\t"
01672 "movq (%2), %%mm0 \n\t"
01673 "movq (%3), %%mm1 \n\t"
01674
01675
01676
01677 #define REAL_DEINT_L5(t1,t2,a,b,c)\
01678 "movq " #a ", %%mm2 \n\t"\
01679 "movq " #b ", %%mm3 \n\t"\
01680 "movq " #c ", %%mm4 \n\t"\
01681 PAVGB(t2, %%mm3) \
01682 PAVGB(t1, %%mm4) \
01683 "movq %%mm2, %%mm5 \n\t"\
01684 "movq %%mm2, " #t1 " \n\t"\
01685 "punpcklbw %%mm7, %%mm2 \n\t"\
01686 "punpckhbw %%mm7, %%mm5 \n\t"\
01687 "movq %%mm2, %%mm6 \n\t"\
01688 "paddw %%mm2, %%mm2 \n\t"\
01689 "paddw %%mm6, %%mm2 \n\t"\
01690 "movq %%mm5, %%mm6 \n\t"\
01691 "paddw %%mm5, %%mm5 \n\t"\
01692 "paddw %%mm6, %%mm5 \n\t"\
01693 "movq %%mm3, %%mm6 \n\t"\
01694 "punpcklbw %%mm7, %%mm3 \n\t"\
01695 "punpckhbw %%mm7, %%mm6 \n\t"\
01696 "paddw %%mm3, %%mm3 \n\t"\
01697 "paddw %%mm6, %%mm6 \n\t"\
01698 "paddw %%mm3, %%mm2 \n\t"\
01699 "paddw %%mm6, %%mm5 \n\t"\
01700 "movq %%mm4, %%mm6 \n\t"\
01701 "punpcklbw %%mm7, %%mm4 \n\t"\
01702 "punpckhbw %%mm7, %%mm6 \n\t"\
01703 "psubw %%mm4, %%mm2 \n\t"\
01704 "psubw %%mm6, %%mm5 \n\t"\
01705 "psraw $2, %%mm2 \n\t"\
01706 "psraw $2, %%mm5 \n\t"\
01707 "packuswb %%mm5, %%mm2 \n\t"\
01708 "movq %%mm2, " #a " \n\t"\
01709
01710 #define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c)
01711
01712 DEINT_L5(%%mm0, %%mm1, (%0) , (%%REGa) , (%%REGa, %1) )
01713 DEINT_L5(%%mm1, %%mm0, (%%REGa) , (%%REGa, %1) , (%%REGa, %1, 2))
01714 DEINT_L5(%%mm0, %%mm1, (%%REGa, %1) , (%%REGa, %1, 2), (%0, %1, 4) )
01715 DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
01716 DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%REGd) , (%%REGd, %1) )
01717 DEINT_L5(%%mm1, %%mm0, (%%REGd) , (%%REGd, %1) , (%%REGd, %1, 2))
01718 DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) )
01719 DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
01720
01721 "movq %%mm0, (%2) \n\t"
01722 "movq %%mm1, (%3) \n\t"
01723 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2)
01724 : "%"REG_a, "%"REG_d
01725 );
01726 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01727 int x;
01728 src+= stride*4;
01729 for(x=0; x<8; x++){
01730 int t1= tmp[x];
01731 int t2= tmp2[x];
01732 int t3= src[0];
01733
01734 src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3);
01735 t1= src[stride*1];
01736 src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3);
01737 t2= src[stride*2];
01738 src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3);
01739 t3= src[stride*3];
01740 src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3);
01741 t1= src[stride*4];
01742 src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3);
01743 t2= src[stride*5];
01744 src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3);
01745 t3= src[stride*6];
01746 src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3);
01747 t1= src[stride*7];
01748 src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3);
01749
01750 tmp[x]= t3;
01751 tmp2[x]= t1;
01752
01753 src++;
01754 }
01755 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01756 }
01757
01765 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
01766 {
01767 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01768 src+= 4*stride;
01769 __asm__ volatile(
01770 "lea (%0, %1), %%"REG_a" \n\t"
01771 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01772
01773
01774
01775 "movq (%2), %%mm0 \n\t"
01776 "movq (%%"REG_a"), %%mm1 \n\t"
01777 PAVGB(%%mm1, %%mm0)
01778 "movq (%0), %%mm2 \n\t"
01779 PAVGB(%%mm2, %%mm0)
01780 "movq %%mm0, (%0) \n\t"
01781 "movq (%%"REG_a", %1), %%mm0 \n\t"
01782 PAVGB(%%mm0, %%mm2)
01783 PAVGB(%%mm1, %%mm2)
01784 "movq %%mm2, (%%"REG_a") \n\t"
01785 "movq (%%"REG_a", %1, 2), %%mm2 \n\t"
01786 PAVGB(%%mm2, %%mm1)
01787 PAVGB(%%mm0, %%mm1)
01788 "movq %%mm1, (%%"REG_a", %1) \n\t"
01789 "movq (%0, %1, 4), %%mm1 \n\t"
01790 PAVGB(%%mm1, %%mm0)
01791 PAVGB(%%mm2, %%mm0)
01792 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
01793 "movq (%%"REG_d"), %%mm0 \n\t"
01794 PAVGB(%%mm0, %%mm2)
01795 PAVGB(%%mm1, %%mm2)
01796 "movq %%mm2, (%0, %1, 4) \n\t"
01797 "movq (%%"REG_d", %1), %%mm2 \n\t"
01798 PAVGB(%%mm2, %%mm1)
01799 PAVGB(%%mm0, %%mm1)
01800 "movq %%mm1, (%%"REG_d") \n\t"
01801 "movq (%%"REG_d", %1, 2), %%mm1 \n\t"
01802 PAVGB(%%mm1, %%mm0)
01803 PAVGB(%%mm2, %%mm0)
01804 "movq %%mm0, (%%"REG_d", %1) \n\t"
01805 "movq (%0, %1, 8), %%mm0 \n\t"
01806 PAVGB(%%mm0, %%mm2)
01807 PAVGB(%%mm1, %%mm2)
01808 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
01809 "movq %%mm1, (%2) \n\t"
01810
01811 : : "r" (src), "r" ((x86_reg)stride), "r" (tmp)
01812 : "%"REG_a, "%"REG_d
01813 );
01814 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01815 int a, b, c, x;
01816 src+= 4*stride;
01817
01818 for(x=0; x<2; x++){
01819 a= *(uint32_t*)&tmp[stride*0];
01820 b= *(uint32_t*)&src[stride*0];
01821 c= *(uint32_t*)&src[stride*1];
01822 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01823 *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01824
01825 a= *(uint32_t*)&src[stride*2];
01826 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01827 *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01828
01829 b= *(uint32_t*)&src[stride*3];
01830 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
01831 *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
01832
01833 c= *(uint32_t*)&src[stride*4];
01834 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01835 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01836
01837 a= *(uint32_t*)&src[stride*5];
01838 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01839 *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01840
01841 b= *(uint32_t*)&src[stride*6];
01842 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
01843 *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
01844
01845 c= *(uint32_t*)&src[stride*7];
01846 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01847 *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01848
01849 a= *(uint32_t*)&src[stride*8];
01850 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01851 *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01852
01853 *(uint32_t*)&tmp[stride*0]= c;
01854 src += 4;
01855 tmp += 4;
01856 }
01857 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
01858 }
01859
01866 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
01867 {
01868 #if TEMPLATE_PP_MMX
01869 src+= 4*stride;
01870 #if TEMPLATE_PP_MMXEXT
01871 __asm__ volatile(
01872 "lea (%0, %1), %%"REG_a" \n\t"
01873 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01874
01875
01876
01877 "movq (%0), %%mm0 \n\t"
01878 "movq (%%"REG_a", %1), %%mm2 \n\t"
01879 "movq (%%"REG_a"), %%mm1 \n\t"
01880 "movq %%mm0, %%mm3 \n\t"
01881 "pmaxub %%mm1, %%mm0 \n\t"
01882 "pminub %%mm3, %%mm1 \n\t"
01883 "pmaxub %%mm2, %%mm1 \n\t"
01884 "pminub %%mm1, %%mm0 \n\t"
01885 "movq %%mm0, (%%"REG_a") \n\t"
01886
01887 "movq (%0, %1, 4), %%mm0 \n\t"
01888 "movq (%%"REG_a", %1, 2), %%mm1 \n\t"
01889 "movq %%mm2, %%mm3 \n\t"
01890 "pmaxub %%mm1, %%mm2 \n\t"
01891 "pminub %%mm3, %%mm1 \n\t"
01892 "pmaxub %%mm0, %%mm1 \n\t"
01893 "pminub %%mm1, %%mm2 \n\t"
01894 "movq %%mm2, (%%"REG_a", %1, 2) \n\t"
01895
01896 "movq (%%"REG_d"), %%mm2 \n\t"
01897 "movq (%%"REG_d", %1), %%mm1 \n\t"
01898 "movq %%mm2, %%mm3 \n\t"
01899 "pmaxub %%mm0, %%mm2 \n\t"
01900 "pminub %%mm3, %%mm0 \n\t"
01901 "pmaxub %%mm1, %%mm0 \n\t"
01902 "pminub %%mm0, %%mm2 \n\t"
01903 "movq %%mm2, (%%"REG_d") \n\t"
01904
01905 "movq (%%"REG_d", %1, 2), %%mm2 \n\t"
01906 "movq (%0, %1, 8), %%mm0 \n\t"
01907 "movq %%mm2, %%mm3 \n\t"
01908 "pmaxub %%mm0, %%mm2 \n\t"
01909 "pminub %%mm3, %%mm0 \n\t"
01910 "pmaxub %%mm1, %%mm0 \n\t"
01911 "pminub %%mm0, %%mm2 \n\t"
01912 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
01913
01914
01915 : : "r" (src), "r" ((x86_reg)stride)
01916 : "%"REG_a, "%"REG_d
01917 );
01918
01919 #else // MMX without MMX2
01920 __asm__ volatile(
01921 "lea (%0, %1), %%"REG_a" \n\t"
01922 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01923
01924
01925 "pxor %%mm7, %%mm7 \n\t"
01926
01927 #define REAL_MEDIAN(a,b,c)\
01928 "movq " #a ", %%mm0 \n\t"\
01929 "movq " #b ", %%mm2 \n\t"\
01930 "movq " #c ", %%mm1 \n\t"\
01931 "movq %%mm0, %%mm3 \n\t"\
01932 "movq %%mm1, %%mm4 \n\t"\
01933 "movq %%mm2, %%mm5 \n\t"\
01934 "psubusb %%mm1, %%mm3 \n\t"\
01935 "psubusb %%mm2, %%mm4 \n\t"\
01936 "psubusb %%mm0, %%mm5 \n\t"\
01937 "pcmpeqb %%mm7, %%mm3 \n\t"\
01938 "pcmpeqb %%mm7, %%mm4 \n\t"\
01939 "pcmpeqb %%mm7, %%mm5 \n\t"\
01940 "movq %%mm3, %%mm6 \n\t"\
01941 "pxor %%mm4, %%mm3 \n\t"\
01942 "pxor %%mm5, %%mm4 \n\t"\
01943 "pxor %%mm6, %%mm5 \n\t"\
01944 "por %%mm3, %%mm1 \n\t"\
01945 "por %%mm4, %%mm2 \n\t"\
01946 "por %%mm5, %%mm0 \n\t"\
01947 "pand %%mm2, %%mm0 \n\t"\
01948 "pand %%mm1, %%mm0 \n\t"\
01949 "movq %%mm0, " #b " \n\t"
01950 #define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c)
01951
01952 MEDIAN((%0) , (%%REGa) , (%%REGa, %1))
01953 MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4))
01954 MEDIAN((%0, %1, 4) , (%%REGd) , (%%REGd, %1))
01955 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
01956
01957 : : "r" (src), "r" ((x86_reg)stride)
01958 : "%"REG_a, "%"REG_d
01959 );
01960 #endif //TEMPLATE_PP_MMXEXT
01961 #else //TEMPLATE_PP_MMX
01962 int x, y;
01963 src+= 4*stride;
01964
01965 for(x=0; x<8; x++){
01966 uint8_t *colsrc = src;
01967 for (y=0; y<4; y++){
01968 int a, b, c, d, e, f;
01969 a = colsrc[0 ];
01970 b = colsrc[stride ];
01971 c = colsrc[stride*2];
01972 d = (a-b)>>31;
01973 e = (b-c)>>31;
01974 f = (c-a)>>31;
01975 colsrc[stride ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f));
01976 colsrc += stride*2;
01977 }
01978 src++;
01979 }
01980 #endif //TEMPLATE_PP_MMX
01981 }
01982
01983 #if TEMPLATE_PP_MMX
01984
01987 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
01988 {
01989 __asm__(
01990 "lea (%0, %1), %%"REG_a" \n\t"
01991
01992
01993 "movq (%0), %%mm0 \n\t"
01994 "movq (%%"REG_a"), %%mm1 \n\t"
01995 "movq %%mm0, %%mm2 \n\t"
01996 "punpcklbw %%mm1, %%mm0 \n\t"
01997 "punpckhbw %%mm1, %%mm2 \n\t"
01998
01999 "movq (%%"REG_a", %1), %%mm1 \n\t"
02000 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
02001 "movq %%mm1, %%mm4 \n\t"
02002 "punpcklbw %%mm3, %%mm1 \n\t"
02003 "punpckhbw %%mm3, %%mm4 \n\t"
02004
02005 "movq %%mm0, %%mm3 \n\t"
02006 "punpcklwd %%mm1, %%mm0 \n\t"
02007 "punpckhwd %%mm1, %%mm3 \n\t"
02008 "movq %%mm2, %%mm1 \n\t"
02009 "punpcklwd %%mm4, %%mm2 \n\t"
02010 "punpckhwd %%mm4, %%mm1 \n\t"
02011
02012 "movd %%mm0, 128(%2) \n\t"
02013 "psrlq $32, %%mm0 \n\t"
02014 "movd %%mm0, 144(%2) \n\t"
02015 "movd %%mm3, 160(%2) \n\t"
02016 "psrlq $32, %%mm3 \n\t"
02017 "movd %%mm3, 176(%2) \n\t"
02018 "movd %%mm3, 48(%3) \n\t"
02019 "movd %%mm2, 192(%2) \n\t"
02020 "movd %%mm2, 64(%3) \n\t"
02021 "psrlq $32, %%mm2 \n\t"
02022 "movd %%mm2, 80(%3) \n\t"
02023 "movd %%mm1, 96(%3) \n\t"
02024 "psrlq $32, %%mm1 \n\t"
02025 "movd %%mm1, 112(%3) \n\t"
02026
02027 "lea (%%"REG_a", %1, 4), %%"REG_a" \n\t"
02028
02029 "movq (%0, %1, 4), %%mm0 \n\t"
02030 "movq (%%"REG_a"), %%mm1 \n\t"
02031 "movq %%mm0, %%mm2 \n\t"
02032 "punpcklbw %%mm1, %%mm0 \n\t"
02033 "punpckhbw %%mm1, %%mm2 \n\t"
02034
02035 "movq (%%"REG_a", %1), %%mm1 \n\t"
02036 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
02037 "movq %%mm1, %%mm4 \n\t"
02038 "punpcklbw %%mm3, %%mm1 \n\t"
02039 "punpckhbw %%mm3, %%mm4 \n\t"
02040
02041 "movq %%mm0, %%mm3 \n\t"
02042 "punpcklwd %%mm1, %%mm0 \n\t"
02043 "punpckhwd %%mm1, %%mm3 \n\t"
02044 "movq %%mm2, %%mm1 \n\t"
02045 "punpcklwd %%mm4, %%mm2 \n\t"
02046 "punpckhwd %%mm4, %%mm1 \n\t"
02047
02048 "movd %%mm0, 132(%2) \n\t"
02049 "psrlq $32, %%mm0 \n\t"
02050 "movd %%mm0, 148(%2) \n\t"
02051 "movd %%mm3, 164(%2) \n\t"
02052 "psrlq $32, %%mm3 \n\t"
02053 "movd %%mm3, 180(%2) \n\t"
02054 "movd %%mm3, 52(%3) \n\t"
02055 "movd %%mm2, 196(%2) \n\t"
02056 "movd %%mm2, 68(%3) \n\t"
02057 "psrlq $32, %%mm2 \n\t"
02058 "movd %%mm2, 84(%3) \n\t"
02059 "movd %%mm1, 100(%3) \n\t"
02060 "psrlq $32, %%mm1 \n\t"
02061 "movd %%mm1, 116(%3) \n\t"
02062
02063
02064 :: "r" (src), "r" ((x86_reg)srcStride), "r" (dst1), "r" (dst2)
02065 : "%"REG_a
02066 );
02067 }
02068
02072 static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
02073 {
02074 __asm__(
02075 "lea (%0, %1), %%"REG_a" \n\t"
02076 "lea (%%"REG_a",%1,4), %%"REG_d" \n\t"
02077
02078
02079 "movq (%2), %%mm0 \n\t"
02080 "movq 16(%2), %%mm1 \n\t"
02081 "movq %%mm0, %%mm2 \n\t"
02082 "punpcklbw %%mm1, %%mm0 \n\t"
02083 "punpckhbw %%mm1, %%mm2 \n\t"
02084
02085 "movq 32(%2), %%mm1 \n\t"
02086 "movq 48(%2), %%mm3 \n\t"
02087 "movq %%mm1, %%mm4 \n\t"
02088 "punpcklbw %%mm3, %%mm1 \n\t"
02089 "punpckhbw %%mm3, %%mm4 \n\t"
02090
02091 "movq %%mm0, %%mm3 \n\t"
02092 "punpcklwd %%mm1, %%mm0 \n\t"
02093 "punpckhwd %%mm1, %%mm3 \n\t"
02094 "movq %%mm2, %%mm1 \n\t"
02095 "punpcklwd %%mm4, %%mm2 \n\t"
02096 "punpckhwd %%mm4, %%mm1 \n\t"
02097
02098 "movd %%mm0, (%0) \n\t"
02099 "psrlq $32, %%mm0 \n\t"
02100 "movd %%mm0, (%%"REG_a") \n\t"
02101 "movd %%mm3, (%%"REG_a", %1) \n\t"
02102 "psrlq $32, %%mm3 \n\t"
02103 "movd %%mm3, (%%"REG_a", %1, 2) \n\t"
02104 "movd %%mm2, (%0, %1, 4) \n\t"
02105 "psrlq $32, %%mm2 \n\t"
02106 "movd %%mm2, (%%"REG_d") \n\t"
02107 "movd %%mm1, (%%"REG_d", %1) \n\t"
02108 "psrlq $32, %%mm1 \n\t"
02109 "movd %%mm1, (%%"REG_d", %1, 2) \n\t"
02110
02111
02112 "movq 64(%2), %%mm0 \n\t"
02113 "movq 80(%2), %%mm1 \n\t"
02114 "movq %%mm0, %%mm2 \n\t"
02115 "punpcklbw %%mm1, %%mm0 \n\t"
02116 "punpckhbw %%mm1, %%mm2 \n\t"
02117
02118 "movq 96(%2), %%mm1 \n\t"
02119 "movq 112(%2), %%mm3 \n\t"
02120 "movq %%mm1, %%mm4 \n\t"
02121 "punpcklbw %%mm3, %%mm1 \n\t"
02122 "punpckhbw %%mm3, %%mm4 \n\t"
02123
02124 "movq %%mm0, %%mm3 \n\t"
02125 "punpcklwd %%mm1, %%mm0 \n\t"
02126 "punpckhwd %%mm1, %%mm3 \n\t"
02127 "movq %%mm2, %%mm1 \n\t"
02128 "punpcklwd %%mm4, %%mm2 \n\t"
02129 "punpckhwd %%mm4, %%mm1 \n\t"
02130
02131 "movd %%mm0, 4(%0) \n\t"
02132 "psrlq $32, %%mm0 \n\t"
02133 "movd %%mm0, 4(%%"REG_a") \n\t"
02134 "movd %%mm3, 4(%%"REG_a", %1) \n\t"
02135 "psrlq $32, %%mm3 \n\t"
02136 "movd %%mm3, 4(%%"REG_a", %1, 2) \n\t"
02137 "movd %%mm2, 4(%0, %1, 4) \n\t"
02138 "psrlq $32, %%mm2 \n\t"
02139 "movd %%mm2, 4(%%"REG_d") \n\t"
02140 "movd %%mm1, 4(%%"REG_d", %1) \n\t"
02141 "psrlq $32, %%mm1 \n\t"
02142 "movd %%mm1, 4(%%"REG_d", %1, 2) \n\t"
02143
02144 :: "r" (dst), "r" ((x86_reg)dstStride), "r" (src)
02145 : "%"REG_a, "%"REG_d
02146 );
02147 }
02148 #endif //TEMPLATE_PP_MMX
02149
02150
02151 #if !TEMPLATE_PP_ALTIVEC
02152 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
02153 uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
02154 {
02155
02156 tempBlurredPast[127]= maxNoise[0];
02157 tempBlurredPast[128]= maxNoise[1];
02158 tempBlurredPast[129]= maxNoise[2];
02159
02160 #define FAST_L2_DIFF
02161
02162 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
02163 __asm__ volatile(
02164 "lea (%2, %2, 2), %%"REG_a" \n\t"
02165 "lea (%2, %2, 4), %%"REG_d" \n\t"
02166 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02167
02168
02169
02170 #ifdef L1_DIFF //needs mmx2
02171 "movq (%0), %%mm0 \n\t"
02172 "psadbw (%1), %%mm0 \n\t"
02173 "movq (%0, %2), %%mm1 \n\t"
02174 "psadbw (%1, %2), %%mm1 \n\t"
02175 "movq (%0, %2, 2), %%mm2 \n\t"
02176 "psadbw (%1, %2, 2), %%mm2 \n\t"
02177 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02178 "psadbw (%1, %%"REG_a"), %%mm3 \n\t"
02179
02180 "movq (%0, %2, 4), %%mm4 \n\t"
02181 "paddw %%mm1, %%mm0 \n\t"
02182 "psadbw (%1, %2, 4), %%mm4 \n\t"
02183 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02184 "paddw %%mm2, %%mm0 \n\t"
02185 "psadbw (%1, %%"REG_d"), %%mm5 \n\t"
02186 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02187 "paddw %%mm3, %%mm0 \n\t"
02188 "psadbw (%1, %%"REG_a", 2), %%mm6 \n\t"
02189 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02190 "paddw %%mm4, %%mm0 \n\t"
02191 "psadbw (%1, %%"REG_c"), %%mm7 \n\t"
02192 "paddw %%mm5, %%mm6 \n\t"
02193 "paddw %%mm7, %%mm6 \n\t"
02194 "paddw %%mm6, %%mm0 \n\t"
02195 #else //L1_DIFF
02196 #if defined (FAST_L2_DIFF)
02197 "pcmpeqb %%mm7, %%mm7 \n\t"
02198 "movq "MANGLE(b80)", %%mm6 \n\t"
02199 "pxor %%mm0, %%mm0 \n\t"
02200 #define REAL_L2_DIFF_CORE(a, b)\
02201 "movq " #a ", %%mm5 \n\t"\
02202 "movq " #b ", %%mm2 \n\t"\
02203 "pxor %%mm7, %%mm2 \n\t"\
02204 PAVGB(%%mm2, %%mm5)\
02205 "paddb %%mm6, %%mm5 \n\t"\
02206 "movq %%mm5, %%mm2 \n\t"\
02207 "psllw $8, %%mm5 \n\t"\
02208 "pmaddwd %%mm5, %%mm5 \n\t"\
02209 "pmaddwd %%mm2, %%mm2 \n\t"\
02210 "paddd %%mm2, %%mm5 \n\t"\
02211 "psrld $14, %%mm5 \n\t"\
02212 "paddd %%mm5, %%mm0 \n\t"
02213
02214 #else //defined (FAST_L2_DIFF)
02215 "pxor %%mm7, %%mm7 \n\t"
02216 "pxor %%mm0, %%mm0 \n\t"
02217 #define REAL_L2_DIFF_CORE(a, b)\
02218 "movq " #a ", %%mm5 \n\t"\
02219 "movq " #b ", %%mm2 \n\t"\
02220 "movq %%mm5, %%mm1 \n\t"\
02221 "movq %%mm2, %%mm3 \n\t"\
02222 "punpcklbw %%mm7, %%mm5 \n\t"\
02223 "punpckhbw %%mm7, %%mm1 \n\t"\
02224 "punpcklbw %%mm7, %%mm2 \n\t"\
02225 "punpckhbw %%mm7, %%mm3 \n\t"\
02226 "psubw %%mm2, %%mm5 \n\t"\
02227 "psubw %%mm3, %%mm1 \n\t"\
02228 "pmaddwd %%mm5, %%mm5 \n\t"\
02229 "pmaddwd %%mm1, %%mm1 \n\t"\
02230 "paddd %%mm1, %%mm5 \n\t"\
02231 "paddd %%mm5, %%mm0 \n\t"
02232
02233 #endif //defined (FAST_L2_DIFF)
02234
02235 #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b)
02236
02237 L2_DIFF_CORE((%0) , (%1))
02238 L2_DIFF_CORE((%0, %2) , (%1, %2))
02239 L2_DIFF_CORE((%0, %2, 2) , (%1, %2, 2))
02240 L2_DIFF_CORE((%0, %%REGa) , (%1, %%REGa))
02241 L2_DIFF_CORE((%0, %2, 4) , (%1, %2, 4))
02242 L2_DIFF_CORE((%0, %%REGd) , (%1, %%REGd))
02243 L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2))
02244 L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
02245
02246 #endif //L1_DIFF
02247
02248 "movq %%mm0, %%mm4 \n\t"
02249 "psrlq $32, %%mm0 \n\t"
02250 "paddd %%mm0, %%mm4 \n\t"
02251 "movd %%mm4, %%ecx \n\t"
02252 "shll $2, %%ecx \n\t"
02253 "mov %3, %%"REG_d" \n\t"
02254 "addl -4(%%"REG_d"), %%ecx \n\t"
02255 "addl 4(%%"REG_d"), %%ecx \n\t"
02256 "addl -1024(%%"REG_d"), %%ecx \n\t"
02257 "addl $4, %%ecx \n\t"
02258 "addl 1024(%%"REG_d"), %%ecx \n\t"
02259 "shrl $3, %%ecx \n\t"
02260 "movl %%ecx, (%%"REG_d") \n\t"
02261
02262
02263
02264
02265 "cmpl 512(%%"REG_d"), %%ecx \n\t"
02266 " jb 2f \n\t"
02267 "cmpl 516(%%"REG_d"), %%ecx \n\t"
02268 " jb 1f \n\t"
02269
02270 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02271 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02272 "movq (%0), %%mm0 \n\t"
02273 "movq (%0, %2), %%mm1 \n\t"
02274 "movq (%0, %2, 2), %%mm2 \n\t"
02275 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02276 "movq (%0, %2, 4), %%mm4 \n\t"
02277 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02278 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02279 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02280 "movq %%mm0, (%1) \n\t"
02281 "movq %%mm1, (%1, %2) \n\t"
02282 "movq %%mm2, (%1, %2, 2) \n\t"
02283 "movq %%mm3, (%1, %%"REG_a") \n\t"
02284 "movq %%mm4, (%1, %2, 4) \n\t"
02285 "movq %%mm5, (%1, %%"REG_d") \n\t"
02286 "movq %%mm6, (%1, %%"REG_a", 2) \n\t"
02287 "movq %%mm7, (%1, %%"REG_c") \n\t"
02288 "jmp 4f \n\t"
02289
02290 "1: \n\t"
02291 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02292 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02293 "movq (%0), %%mm0 \n\t"
02294 PAVGB((%1), %%mm0)
02295 "movq (%0, %2), %%mm1 \n\t"
02296 PAVGB((%1, %2), %%mm1)
02297 "movq (%0, %2, 2), %%mm2 \n\t"
02298 PAVGB((%1, %2, 2), %%mm2)
02299 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02300 PAVGB((%1, %%REGa), %%mm3)
02301 "movq (%0, %2, 4), %%mm4 \n\t"
02302 PAVGB((%1, %2, 4), %%mm4)
02303 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02304 PAVGB((%1, %%REGd), %%mm5)
02305 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02306 PAVGB((%1, %%REGa, 2), %%mm6)
02307 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02308 PAVGB((%1, %%REGc), %%mm7)
02309 "movq %%mm0, (%1) \n\t"
02310 "movq %%mm1, (%1, %2) \n\t"
02311 "movq %%mm2, (%1, %2, 2) \n\t"
02312 "movq %%mm3, (%1, %%"REG_a") \n\t"
02313 "movq %%mm4, (%1, %2, 4) \n\t"
02314 "movq %%mm5, (%1, %%"REG_d") \n\t"
02315 "movq %%mm6, (%1, %%"REG_a", 2) \n\t"
02316 "movq %%mm7, (%1, %%"REG_c") \n\t"
02317 "movq %%mm0, (%0) \n\t"
02318 "movq %%mm1, (%0, %2) \n\t"
02319 "movq %%mm2, (%0, %2, 2) \n\t"
02320 "movq %%mm3, (%0, %%"REG_a") \n\t"
02321 "movq %%mm4, (%0, %2, 4) \n\t"
02322 "movq %%mm5, (%0, %%"REG_d") \n\t"
02323 "movq %%mm6, (%0, %%"REG_a", 2) \n\t"
02324 "movq %%mm7, (%0, %%"REG_c") \n\t"
02325 "jmp 4f \n\t"
02326
02327 "2: \n\t"
02328 "cmpl 508(%%"REG_d"), %%ecx \n\t"
02329 " jb 3f \n\t"
02330
02331 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02332 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02333 "movq (%0), %%mm0 \n\t"
02334 "movq (%0, %2), %%mm1 \n\t"
02335 "movq (%0, %2, 2), %%mm2 \n\t"
02336 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02337 "movq (%1), %%mm4 \n\t"
02338 "movq (%1, %2), %%mm5 \n\t"
02339 "movq (%1, %2, 2), %%mm6 \n\t"
02340 "movq (%1, %%"REG_a"), %%mm7 \n\t"
02341 PAVGB(%%mm4, %%mm0)
02342 PAVGB(%%mm5, %%mm1)
02343 PAVGB(%%mm6, %%mm2)
02344 PAVGB(%%mm7, %%mm3)
02345 PAVGB(%%mm4, %%mm0)
02346 PAVGB(%%mm5, %%mm1)
02347 PAVGB(%%mm6, %%mm2)
02348 PAVGB(%%mm7, %%mm3)
02349 "movq %%mm0, (%1) \n\t"
02350 "movq %%mm1, (%1, %2) \n\t"
02351 "movq %%mm2, (%1, %2, 2) \n\t"
02352 "movq %%mm3, (%1, %%"REG_a") \n\t"
02353 "movq %%mm0, (%0) \n\t"
02354 "movq %%mm1, (%0, %2) \n\t"
02355 "movq %%mm2, (%0, %2, 2) \n\t"
02356 "movq %%mm3, (%0, %%"REG_a") \n\t"
02357
02358 "movq (%0, %2, 4), %%mm0 \n\t"
02359 "movq (%0, %%"REG_d"), %%mm1 \n\t"
02360 "movq (%0, %%"REG_a", 2), %%mm2 \n\t"
02361 "movq (%0, %%"REG_c"), %%mm3 \n\t"
02362 "movq (%1, %2, 4), %%mm4 \n\t"
02363 "movq (%1, %%"REG_d"), %%mm5 \n\t"
02364 "movq (%1, %%"REG_a", 2), %%mm6 \n\t"
02365 "movq (%1, %%"REG_c"), %%mm7 \n\t"
02366 PAVGB(%%mm4, %%mm0)
02367 PAVGB(%%mm5, %%mm1)
02368 PAVGB(%%mm6, %%mm2)
02369 PAVGB(%%mm7, %%mm3)
02370 PAVGB(%%mm4, %%mm0)
02371 PAVGB(%%mm5, %%mm1)
02372 PAVGB(%%mm6, %%mm2)
02373 PAVGB(%%mm7, %%mm3)
02374 "movq %%mm0, (%1, %2, 4) \n\t"
02375 "movq %%mm1, (%1, %%"REG_d") \n\t"
02376 "movq %%mm2, (%1, %%"REG_a", 2) \n\t"
02377 "movq %%mm3, (%1, %%"REG_c") \n\t"
02378 "movq %%mm0, (%0, %2, 4) \n\t"
02379 "movq %%mm1, (%0, %%"REG_d") \n\t"
02380 "movq %%mm2, (%0, %%"REG_a", 2) \n\t"
02381 "movq %%mm3, (%0, %%"REG_c") \n\t"
02382 "jmp 4f \n\t"
02383
02384 "3: \n\t"
02385 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02386 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02387 "movq (%0), %%mm0 \n\t"
02388 "movq (%0, %2), %%mm1 \n\t"
02389 "movq (%0, %2, 2), %%mm2 \n\t"
02390 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02391 "movq (%1), %%mm4 \n\t"
02392 "movq (%1, %2), %%mm5 \n\t"
02393 "movq (%1, %2, 2), %%mm6 \n\t"
02394 "movq (%1, %%"REG_a"), %%mm7 \n\t"
02395 PAVGB(%%mm4, %%mm0)
02396 PAVGB(%%mm5, %%mm1)
02397 PAVGB(%%mm6, %%mm2)
02398 PAVGB(%%mm7, %%mm3)
02399 PAVGB(%%mm4, %%mm0)
02400 PAVGB(%%mm5, %%mm1)
02401 PAVGB(%%mm6, %%mm2)
02402 PAVGB(%%mm7, %%mm3)
02403 PAVGB(%%mm4, %%mm0)
02404 PAVGB(%%mm5, %%mm1)
02405 PAVGB(%%mm6, %%mm2)
02406 PAVGB(%%mm7, %%mm3)
02407 "movq %%mm0, (%1) \n\t"
02408 "movq %%mm1, (%1, %2) \n\t"
02409 "movq %%mm2, (%1, %2, 2) \n\t"
02410 "movq %%mm3, (%1, %%"REG_a") \n\t"
02411 "movq %%mm0, (%0) \n\t"
02412 "movq %%mm1, (%0, %2) \n\t"
02413 "movq %%mm2, (%0, %2, 2) \n\t"
02414 "movq %%mm3, (%0, %%"REG_a") \n\t"
02415
02416 "movq (%0, %2, 4), %%mm0 \n\t"
02417 "movq (%0, %%"REG_d"), %%mm1 \n\t"
02418 "movq (%0, %%"REG_a", 2), %%mm2 \n\t"
02419 "movq (%0, %%"REG_c"), %%mm3 \n\t"
02420 "movq (%1, %2, 4), %%mm4 \n\t"
02421 "movq (%1, %%"REG_d"), %%mm5 \n\t"
02422 "movq (%1, %%"REG_a", 2), %%mm6 \n\t"
02423 "movq (%1, %%"REG_c"), %%mm7 \n\t"
02424 PAVGB(%%mm4, %%mm0)
02425 PAVGB(%%mm5, %%mm1)
02426 PAVGB(%%mm6, %%mm2)
02427 PAVGB(%%mm7, %%mm3)
02428 PAVGB(%%mm4, %%mm0)
02429 PAVGB(%%mm5, %%mm1)
02430 PAVGB(%%mm6, %%mm2)
02431 PAVGB(%%mm7, %%mm3)
02432 PAVGB(%%mm4, %%mm0)
02433 PAVGB(%%mm5, %%mm1)
02434 PAVGB(%%mm6, %%mm2)
02435 PAVGB(%%mm7, %%mm3)
02436 "movq %%mm0, (%1, %2, 4) \n\t"
02437 "movq %%mm1, (%1, %%"REG_d") \n\t"
02438 "movq %%mm2, (%1, %%"REG_a", 2) \n\t"
02439 "movq %%mm3, (%1, %%"REG_c") \n\t"
02440 "movq %%mm0, (%0, %2, 4) \n\t"
02441 "movq %%mm1, (%0, %%"REG_d") \n\t"
02442 "movq %%mm2, (%0, %%"REG_a", 2) \n\t"
02443 "movq %%mm3, (%0, %%"REG_c") \n\t"
02444
02445 "4: \n\t"
02446
02447 :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
02448 : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
02449 );
02450 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
02451 {
02452 int y;
02453 int d=0;
02454
02455 int i;
02456
02457 for(y=0; y<8; y++){
02458 int x;
02459 for(x=0; x<8; x++){
02460 int ref= tempBlurred[ x + y*stride ];
02461 int cur= src[ x + y*stride ];
02462 int d1=ref - cur;
02463
02464
02465
02466 d+= d1*d1;
02467
02468 }
02469 }
02470 i=d;
02471 d= (
02472 4*d
02473 +(*(tempBlurredPast-256))
02474 +(*(tempBlurredPast-1))+ (*(tempBlurredPast+1))
02475 +(*(tempBlurredPast+256))
02476 +4)>>3;
02477 *tempBlurredPast=i;
02478
02479
02480
02481
02482
02483
02484
02485
02486
02487 if(d > maxNoise[1]){
02488 if(d < maxNoise[2]){
02489 for(y=0; y<8; y++){
02490 int x;
02491 for(x=0; x<8; x++){
02492 int ref= tempBlurred[ x + y*stride ];
02493 int cur= src[ x + y*stride ];
02494 tempBlurred[ x + y*stride ]=
02495 src[ x + y*stride ]=
02496 (ref + cur + 1)>>1;
02497 }
02498 }
02499 }else{
02500 for(y=0; y<8; y++){
02501 int x;
02502 for(x=0; x<8; x++){
02503 tempBlurred[ x + y*stride ]= src[ x + y*stride ];
02504 }
02505 }
02506 }
02507 }else{
02508 if(d < maxNoise[0]){
02509 for(y=0; y<8; y++){
02510 int x;
02511 for(x=0; x<8; x++){
02512 int ref= tempBlurred[ x + y*stride ];
02513 int cur= src[ x + y*stride ];
02514 tempBlurred[ x + y*stride ]=
02515 src[ x + y*stride ]=
02516 (ref*7 + cur + 4)>>3;
02517 }
02518 }
02519 }else{
02520 for(y=0; y<8; y++){
02521 int x;
02522 for(x=0; x<8; x++){
02523 int ref= tempBlurred[ x + y*stride ];
02524 int cur= src[ x + y*stride ];
02525 tempBlurred[ x + y*stride ]=
02526 src[ x + y*stride ]=
02527 (ref*3 + cur + 2)>>2;
02528 }
02529 }
02530 }
02531 }
02532 }
02533 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
02534 }
02535 #endif //TEMPLATE_PP_ALTIVEC
02536
02537 #if TEMPLATE_PP_MMX
02538
02541 static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){
02542 int64_t dc_mask, eq_mask, both_masks;
02543 int64_t sums[10*8*2];
02544 src+= step*3;
02545
02546 __asm__ volatile(
02547 "movq %0, %%mm7 \n\t"
02548 "movq %1, %%mm6 \n\t"
02549 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
02550 );
02551
02552 __asm__ volatile(
02553 "lea (%2, %3), %%"REG_a" \n\t"
02554
02555
02556
02557 "movq (%2), %%mm0 \n\t"
02558 "movq (%%"REG_a"), %%mm1 \n\t"
02559 "movq %%mm1, %%mm3 \n\t"
02560 "movq %%mm1, %%mm4 \n\t"
02561 "psubb %%mm1, %%mm0 \n\t"
02562 "paddb %%mm7, %%mm0 \n\t"
02563 "pcmpgtb %%mm6, %%mm0 \n\t"
02564
02565 "movq (%%"REG_a",%3), %%mm2 \n\t"
02566 PMAXUB(%%mm2, %%mm4)
02567 PMINUB(%%mm2, %%mm3, %%mm5)
02568 "psubb %%mm2, %%mm1 \n\t"
02569 "paddb %%mm7, %%mm1 \n\t"
02570 "pcmpgtb %%mm6, %%mm1 \n\t"
02571 "paddb %%mm1, %%mm0 \n\t"
02572
02573 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
02574 PMAXUB(%%mm1, %%mm4)
02575 PMINUB(%%mm1, %%mm3, %%mm5)
02576 "psubb %%mm1, %%mm2 \n\t"
02577 "paddb %%mm7, %%mm2 \n\t"
02578 "pcmpgtb %%mm6, %%mm2 \n\t"
02579 "paddb %%mm2, %%mm0 \n\t"
02580
02581 "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t"
02582
02583 "movq (%2, %3, 4), %%mm2 \n\t"
02584 PMAXUB(%%mm2, %%mm4)
02585 PMINUB(%%mm2, %%mm3, %%mm5)
02586 "psubb %%mm2, %%mm1 \n\t"
02587 "paddb %%mm7, %%mm1 \n\t"
02588 "pcmpgtb %%mm6, %%mm1 \n\t"
02589 "paddb %%mm1, %%mm0 \n\t"
02590
02591 "movq (%%"REG_a"), %%mm1 \n\t"
02592 PMAXUB(%%mm1, %%mm4)
02593 PMINUB(%%mm1, %%mm3, %%mm5)
02594 "psubb %%mm1, %%mm2 \n\t"
02595 "paddb %%mm7, %%mm2 \n\t"
02596 "pcmpgtb %%mm6, %%mm2 \n\t"
02597 "paddb %%mm2, %%mm0 \n\t"
02598
02599 "movq (%%"REG_a", %3), %%mm2 \n\t"
02600 PMAXUB(%%mm2, %%mm4)
02601 PMINUB(%%mm2, %%mm3, %%mm5)
02602 "psubb %%mm2, %%mm1 \n\t"
02603 "paddb %%mm7, %%mm1 \n\t"
02604 "pcmpgtb %%mm6, %%mm1 \n\t"
02605 "paddb %%mm1, %%mm0 \n\t"
02606
02607 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
02608 PMAXUB(%%mm1, %%mm4)
02609 PMINUB(%%mm1, %%mm3, %%mm5)
02610 "psubb %%mm1, %%mm2 \n\t"
02611 "paddb %%mm7, %%mm2 \n\t"
02612 "pcmpgtb %%mm6, %%mm2 \n\t"
02613 "paddb %%mm2, %%mm0 \n\t"
02614
02615 "movq (%2, %3, 8), %%mm2 \n\t"
02616 PMAXUB(%%mm2, %%mm4)
02617 PMINUB(%%mm2, %%mm3, %%mm5)
02618 "psubb %%mm2, %%mm1 \n\t"
02619 "paddb %%mm7, %%mm1 \n\t"
02620 "pcmpgtb %%mm6, %%mm1 \n\t"
02621 "paddb %%mm1, %%mm0 \n\t"
02622
02623 "movq (%%"REG_a", %3, 4), %%mm1 \n\t"
02624 "psubb %%mm1, %%mm2 \n\t"
02625 "paddb %%mm7, %%mm2 \n\t"
02626 "pcmpgtb %%mm6, %%mm2 \n\t"
02627 "paddb %%mm2, %%mm0 \n\t"
02628 "psubusb %%mm3, %%mm4 \n\t"
02629
02630 "pxor %%mm6, %%mm6 \n\t"
02631 "movq %4, %%mm7 \n\t"
02632 "paddusb %%mm7, %%mm7 \n\t"
02633 "psubusb %%mm4, %%mm7 \n\t"
02634 "pcmpeqb %%mm6, %%mm7 \n\t"
02635 "pcmpeqb %%mm6, %%mm7 \n\t"
02636 "movq %%mm7, %1 \n\t"
02637
02638 "movq %5, %%mm7 \n\t"
02639 "punpcklbw %%mm7, %%mm7 \n\t"
02640 "punpcklbw %%mm7, %%mm7 \n\t"
02641 "punpcklbw %%mm7, %%mm7 \n\t"
02642 "psubb %%mm0, %%mm6 \n\t"
02643 "pcmpgtb %%mm7, %%mm6 \n\t"
02644 "movq %%mm6, %0 \n\t"
02645
02646 : "=m" (eq_mask), "=m" (dc_mask)
02647 : "r" (src), "r" ((x86_reg)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold)
02648 : "%"REG_a
02649 );
02650
02651 both_masks = dc_mask & eq_mask;
02652
02653 if(both_masks){
02654 x86_reg offset= -8*step;
02655 int64_t *temp_sums= sums;
02656
02657 __asm__ volatile(
02658 "movq %2, %%mm0 \n\t"
02659 "pxor %%mm4, %%mm4 \n\t"
02660
02661 "movq (%0), %%mm6 \n\t"
02662 "movq (%0, %1), %%mm5 \n\t"
02663 "movq %%mm5, %%mm1 \n\t"
02664 "movq %%mm6, %%mm2 \n\t"
02665 "psubusb %%mm6, %%mm5 \n\t"
02666 "psubusb %%mm1, %%mm2 \n\t"
02667 "por %%mm5, %%mm2 \n\t"
02668 "psubusb %%mm2, %%mm0 \n\t"
02669 "pcmpeqb %%mm4, %%mm0 \n\t"
02670
02671 "pxor %%mm6, %%mm1 \n\t"
02672 "pand %%mm0, %%mm1 \n\t"
02673 "pxor %%mm1, %%mm6 \n\t"
02674
02675
02676 "movq (%0, %1, 8), %%mm5 \n\t"
02677 "add %1, %0 \n\t"
02678 "movq (%0, %1, 8), %%mm7 \n\t"
02679 "movq %%mm5, %%mm1 \n\t"
02680 "movq %%mm7, %%mm2 \n\t"
02681 "psubusb %%mm7, %%mm5 \n\t"
02682 "psubusb %%mm1, %%mm2 \n\t"
02683 "por %%mm5, %%mm2 \n\t"
02684 "movq %2, %%mm0 \n\t"
02685 "psubusb %%mm2, %%mm0 \n\t"
02686 "pcmpeqb %%mm4, %%mm0 \n\t"
02687
02688 "pxor %%mm7, %%mm1 \n\t"
02689 "pand %%mm0, %%mm1 \n\t"
02690 "pxor %%mm1, %%mm7 \n\t"
02691
02692 "movq %%mm6, %%mm5 \n\t"
02693 "punpckhbw %%mm4, %%mm6 \n\t"
02694 "punpcklbw %%mm4, %%mm5 \n\t"
02695
02696
02697 "movq %%mm5, %%mm0 \n\t"
02698 "movq %%mm6, %%mm1 \n\t"
02699 "psllw $2, %%mm0 \n\t"
02700 "psllw $2, %%mm1 \n\t"
02701 "paddw "MANGLE(w04)", %%mm0 \n\t"
02702 "paddw "MANGLE(w04)", %%mm1 \n\t"
02703
02704 #define NEXT\
02705 "movq (%0), %%mm2 \n\t"\
02706 "movq (%0), %%mm3 \n\t"\
02707 "add %1, %0 \n\t"\
02708 "punpcklbw %%mm4, %%mm2 \n\t"\
02709 "punpckhbw %%mm4, %%mm3 \n\t"\
02710 "paddw %%mm2, %%mm0 \n\t"\
02711 "paddw %%mm3, %%mm1 \n\t"
02712
02713 #define PREV\
02714 "movq (%0), %%mm2 \n\t"\
02715 "movq (%0), %%mm3 \n\t"\
02716 "add %1, %0 \n\t"\
02717 "punpcklbw %%mm4, %%mm2 \n\t"\
02718 "punpckhbw %%mm4, %%mm3 \n\t"\
02719 "psubw %%mm2, %%mm0 \n\t"\
02720 "psubw %%mm3, %%mm1 \n\t"
02721
02722
02723 NEXT
02724 NEXT
02725 NEXT
02726 "movq %%mm0, (%3) \n\t"
02727 "movq %%mm1, 8(%3) \n\t"
02728
02729 NEXT
02730 "psubw %%mm5, %%mm0 \n\t"
02731 "psubw %%mm6, %%mm1 \n\t"
02732 "movq %%mm0, 16(%3) \n\t"
02733 "movq %%mm1, 24(%3) \n\t"
02734
02735 NEXT
02736 "psubw %%mm5, %%mm0 \n\t"
02737 "psubw %%mm6, %%mm1 \n\t"
02738 "movq %%mm0, 32(%3) \n\t"
02739 "movq %%mm1, 40(%3) \n\t"
02740
02741 NEXT
02742 "psubw %%mm5, %%mm0 \n\t"
02743 "psubw %%mm6, %%mm1 \n\t"
02744 "movq %%mm0, 48(%3) \n\t"
02745 "movq %%mm1, 56(%3) \n\t"
02746
02747 NEXT
02748 "psubw %%mm5, %%mm0 \n\t"
02749 "psubw %%mm6, %%mm1 \n\t"
02750 "movq %%mm0, 64(%3) \n\t"
02751 "movq %%mm1, 72(%3) \n\t"
02752
02753 "movq %%mm7, %%mm6 \n\t"
02754 "punpckhbw %%mm4, %%mm7 \n\t"
02755 "punpcklbw %%mm4, %%mm6 \n\t"
02756
02757 NEXT
02758 "mov %4, %0 \n\t"
02759 "add %1, %0 \n\t"
02760 PREV
02761 "movq %%mm0, 80(%3) \n\t"
02762 "movq %%mm1, 88(%3) \n\t"
02763
02764 PREV
02765 "paddw %%mm6, %%mm0 \n\t"
02766 "paddw %%mm7, %%mm1 \n\t"
02767 "movq %%mm0, 96(%3) \n\t"
02768 "movq %%mm1, 104(%3) \n\t"
02769
02770 PREV
02771 "paddw %%mm6, %%mm0 \n\t"
02772 "paddw %%mm7, %%mm1 \n\t"
02773 "movq %%mm0, 112(%3) \n\t"
02774 "movq %%mm1, 120(%3) \n\t"
02775
02776 PREV
02777 "paddw %%mm6, %%mm0 \n\t"
02778 "paddw %%mm7, %%mm1 \n\t"
02779 "movq %%mm0, 128(%3) \n\t"
02780 "movq %%mm1, 136(%3) \n\t"
02781
02782 PREV
02783 "paddw %%mm6, %%mm0 \n\t"
02784 "paddw %%mm7, %%mm1 \n\t"
02785 "movq %%mm0, 144(%3) \n\t"
02786 "movq %%mm1, 152(%3) \n\t"
02787
02788 "mov %4, %0 \n\t"
02789
02790 : "+&r"(src)
02791 : "r" ((x86_reg)step), "m" (c->pQPb), "r"(sums), "g"(src)
02792 );
02793
02794 src+= step;
02795
02796 __asm__ volatile(
02797 "movq %4, %%mm6 \n\t"
02798 "pcmpeqb %%mm5, %%mm5 \n\t"
02799 "pxor %%mm6, %%mm5 \n\t"
02800 "pxor %%mm7, %%mm7 \n\t"
02801
02802 "1: \n\t"
02803 "movq (%1), %%mm0 \n\t"
02804 "movq 8(%1), %%mm1 \n\t"
02805 "paddw 32(%1), %%mm0 \n\t"
02806 "paddw 40(%1), %%mm1 \n\t"
02807 "movq (%0, %3), %%mm2 \n\t"
02808 "movq %%mm2, %%mm3 \n\t"
02809 "movq %%mm2, %%mm4 \n\t"
02810 "punpcklbw %%mm7, %%mm2 \n\t"
02811 "punpckhbw %%mm7, %%mm3 \n\t"
02812 "paddw %%mm2, %%mm0 \n\t"
02813 "paddw %%mm3, %%mm1 \n\t"
02814 "paddw %%mm2, %%mm0 \n\t"
02815 "paddw %%mm3, %%mm1 \n\t"
02816 "psrlw $4, %%mm0 \n\t"
02817 "psrlw $4, %%mm1 \n\t"
02818 "packuswb %%mm1, %%mm0 \n\t"
02819 "pand %%mm6, %%mm0 \n\t"
02820 "pand %%mm5, %%mm4 \n\t"
02821 "por %%mm4, %%mm0 \n\t"
02822 "movq %%mm0, (%0, %3) \n\t"
02823 "add $16, %1 \n\t"
02824 "add %2, %0 \n\t"
02825 " js 1b \n\t"
02826
02827 : "+r"(offset), "+r"(temp_sums)
02828 : "r" ((x86_reg)step), "r"(src - offset), "m"(both_masks)
02829 );
02830 }else
02831 src+= step;
02832
02833 if(eq_mask != -1LL){
02834 uint8_t *temp_src= src;
02835 DECLARE_ALIGNED(8, uint64_t, tmp)[4];
02836 __asm__ volatile(
02837 "pxor %%mm7, %%mm7 \n\t"
02838
02839
02840
02841 "movq (%0), %%mm0 \n\t"
02842 "movq %%mm0, %%mm1 \n\t"
02843 "punpcklbw %%mm7, %%mm0 \n\t"
02844 "punpckhbw %%mm7, %%mm1 \n\t"
02845
02846 "movq (%0, %1), %%mm2 \n\t"
02847 "lea (%0, %1, 2), %%"REG_a" \n\t"
02848 "movq %%mm2, %%mm3 \n\t"
02849 "punpcklbw %%mm7, %%mm2 \n\t"
02850 "punpckhbw %%mm7, %%mm3 \n\t"
02851
02852 "movq (%%"REG_a"), %%mm4 \n\t"
02853 "movq %%mm4, %%mm5 \n\t"
02854 "punpcklbw %%mm7, %%mm4 \n\t"
02855 "punpckhbw %%mm7, %%mm5 \n\t"
02856
02857 "paddw %%mm0, %%mm0 \n\t"
02858 "paddw %%mm1, %%mm1 \n\t"
02859 "psubw %%mm4, %%mm2 \n\t"
02860 "psubw %%mm5, %%mm3 \n\t"
02861 "psubw %%mm2, %%mm0 \n\t"
02862 "psubw %%mm3, %%mm1 \n\t"
02863
02864 "psllw $2, %%mm2 \n\t"
02865 "psllw $2, %%mm3 \n\t"
02866 "psubw %%mm2, %%mm0 \n\t"
02867 "psubw %%mm3, %%mm1 \n\t"
02868
02869 "movq (%%"REG_a", %1), %%mm2 \n\t"
02870 "movq %%mm2, %%mm3 \n\t"
02871 "punpcklbw %%mm7, %%mm2 \n\t"
02872 "punpckhbw %%mm7, %%mm3 \n\t"
02873
02874 "psubw %%mm2, %%mm0 \n\t"
02875 "psubw %%mm3, %%mm1 \n\t"
02876 "psubw %%mm2, %%mm0 \n\t"
02877 "psubw %%mm3, %%mm1 \n\t"
02878 "movq %%mm0, (%4) \n\t"
02879 "movq %%mm1, 8(%4) \n\t"
02880
02881 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
02882 "movq %%mm0, %%mm1 \n\t"
02883 "punpcklbw %%mm7, %%mm0 \n\t"
02884 "punpckhbw %%mm7, %%mm1 \n\t"
02885
02886 "psubw %%mm0, %%mm2 \n\t"
02887 "psubw %%mm1, %%mm3 \n\t"
02888 "movq %%mm2, 16(%4) \n\t"
02889 "movq %%mm3, 24(%4) \n\t"
02890 "paddw %%mm4, %%mm4 \n\t"
02891 "paddw %%mm5, %%mm5 \n\t"
02892 "psubw %%mm2, %%mm4 \n\t"
02893 "psubw %%mm3, %%mm5 \n\t"
02894
02895 "lea (%%"REG_a", %1), %0 \n\t"
02896 "psllw $2, %%mm2 \n\t"
02897 "psllw $2, %%mm3 \n\t"
02898 "psubw %%mm2, %%mm4 \n\t"
02899 "psubw %%mm3, %%mm5 \n\t"
02900
02901 "movq (%0, %1, 2), %%mm2 \n\t"
02902 "movq %%mm2, %%mm3 \n\t"
02903 "punpcklbw %%mm7, %%mm2 \n\t"
02904 "punpckhbw %%mm7, %%mm3 \n\t"
02905 "psubw %%mm2, %%mm4 \n\t"
02906 "psubw %%mm3, %%mm5 \n\t"
02907 "psubw %%mm2, %%mm4 \n\t"
02908 "psubw %%mm3, %%mm5 \n\t"
02909
02910 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
02911 "punpcklbw %%mm7, %%mm6 \n\t"
02912 "psubw %%mm6, %%mm2 \n\t"
02913 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
02914 "punpckhbw %%mm7, %%mm6 \n\t"
02915 "psubw %%mm6, %%mm3 \n\t"
02916
02917 "paddw %%mm0, %%mm0 \n\t"
02918 "paddw %%mm1, %%mm1 \n\t"
02919 "psubw %%mm2, %%mm0 \n\t"
02920 "psubw %%mm3, %%mm1 \n\t"
02921
02922 "psllw $2, %%mm2 \n\t"
02923 "psllw $2, %%mm3 \n\t"
02924 "psubw %%mm2, %%mm0 \n\t"
02925 "psubw %%mm3, %%mm1 \n\t"
02926
02927 "movq (%0, %1, 4), %%mm2 \n\t"
02928 "movq %%mm2, %%mm3 \n\t"
02929 "punpcklbw %%mm7, %%mm2 \n\t"
02930 "punpckhbw %%mm7, %%mm3 \n\t"
02931
02932 "paddw %%mm2, %%mm2 \n\t"
02933 "paddw %%mm3, %%mm3 \n\t"
02934 "psubw %%mm2, %%mm0 \n\t"
02935 "psubw %%mm3, %%mm1 \n\t"
02936
02937 "movq (%4), %%mm2 \n\t"
02938 "movq 8(%4), %%mm3 \n\t"
02939
02940 #if TEMPLATE_PP_MMXEXT
02941 "movq %%mm7, %%mm6 \n\t"
02942 "psubw %%mm0, %%mm6 \n\t"
02943 "pmaxsw %%mm6, %%mm0 \n\t"
02944 "movq %%mm7, %%mm6 \n\t"
02945 "psubw %%mm1, %%mm6 \n\t"
02946 "pmaxsw %%mm6, %%mm1 \n\t"
02947 "movq %%mm7, %%mm6 \n\t"
02948 "psubw %%mm2, %%mm6 \n\t"
02949 "pmaxsw %%mm6, %%mm2 \n\t"
02950 "movq %%mm7, %%mm6 \n\t"
02951 "psubw %%mm3, %%mm6 \n\t"
02952 "pmaxsw %%mm6, %%mm3 \n\t"
02953 #else
02954 "movq %%mm7, %%mm6 \n\t"
02955 "pcmpgtw %%mm0, %%mm6 \n\t"
02956 "pxor %%mm6, %%mm0 \n\t"
02957 "psubw %%mm6, %%mm0 \n\t"
02958 "movq %%mm7, %%mm6 \n\t"
02959 "pcmpgtw %%mm1, %%mm6 \n\t"
02960 "pxor %%mm6, %%mm1 \n\t"
02961 "psubw %%mm6, %%mm1 \n\t"
02962 "movq %%mm7, %%mm6 \n\t"
02963 "pcmpgtw %%mm2, %%mm6 \n\t"
02964 "pxor %%mm6, %%mm2 \n\t"
02965 "psubw %%mm6, %%mm2 \n\t"
02966 "movq %%mm7, %%mm6 \n\t"
02967 "pcmpgtw %%mm3, %%mm6 \n\t"
02968 "pxor %%mm6, %%mm3 \n\t"
02969 "psubw %%mm6, %%mm3 \n\t"
02970 #endif
02971
02972 #if TEMPLATE_PP_MMXEXT
02973 "pminsw %%mm2, %%mm0 \n\t"
02974 "pminsw %%mm3, %%mm1 \n\t"
02975 #else
02976 "movq %%mm0, %%mm6 \n\t"
02977 "psubusw %%mm2, %%mm6 \n\t"
02978 "psubw %%mm6, %%mm0 \n\t"
02979 "movq %%mm1, %%mm6 \n\t"
02980 "psubusw %%mm3, %%mm6 \n\t"
02981 "psubw %%mm6, %%mm1 \n\t"
02982 #endif
02983
02984 "movd %2, %%mm2 \n\t"
02985 "punpcklbw %%mm7, %%mm2 \n\t"
02986
02987 "movq %%mm7, %%mm6 \n\t"
02988 "pcmpgtw %%mm4, %%mm6 \n\t"
02989 "pxor %%mm6, %%mm4 \n\t"
02990 "psubw %%mm6, %%mm4 \n\t"
02991 "pcmpgtw %%mm5, %%mm7 \n\t"
02992 "pxor %%mm7, %%mm5 \n\t"
02993 "psubw %%mm7, %%mm5 \n\t"
02994
02995 "psllw $3, %%mm2 \n\t"
02996 "movq %%mm2, %%mm3 \n\t"
02997 "pcmpgtw %%mm4, %%mm2 \n\t"
02998 "pcmpgtw %%mm5, %%mm3 \n\t"
02999 "pand %%mm2, %%mm4 \n\t"
03000 "pand %%mm3, %%mm5 \n\t"
03001
03002
03003 "psubusw %%mm0, %%mm4 \n\t"
03004 "psubusw %%mm1, %%mm5 \n\t"
03005
03006
03007 "movq "MANGLE(w05)", %%mm2 \n\t"
03008 "pmullw %%mm2, %%mm4 \n\t"
03009 "pmullw %%mm2, %%mm5 \n\t"
03010 "movq "MANGLE(w20)", %%mm2 \n\t"
03011 "paddw %%mm2, %%mm4 \n\t"
03012 "paddw %%mm2, %%mm5 \n\t"
03013 "psrlw $6, %%mm4 \n\t"
03014 "psrlw $6, %%mm5 \n\t"
03015
03016 "movq 16(%4), %%mm0 \n\t"
03017 "movq 24(%4), %%mm1 \n\t"
03018
03019 "pxor %%mm2, %%mm2 \n\t"
03020 "pxor %%mm3, %%mm3 \n\t"
03021
03022 "pcmpgtw %%mm0, %%mm2 \n\t"
03023 "pcmpgtw %%mm1, %%mm3 \n\t"
03024 "pxor %%mm2, %%mm0 \n\t"
03025 "pxor %%mm3, %%mm1 \n\t"
03026 "psubw %%mm2, %%mm0 \n\t"
03027 "psubw %%mm3, %%mm1 \n\t"
03028 "psrlw $1, %%mm0 \n\t"
03029 "psrlw $1, %%mm1 \n\t"
03030
03031 "pxor %%mm6, %%mm2 \n\t"
03032 "pxor %%mm7, %%mm3 \n\t"
03033 "pand %%mm2, %%mm4 \n\t"
03034 "pand %%mm3, %%mm5 \n\t"
03035
03036 #if TEMPLATE_PP_MMXEXT
03037 "pminsw %%mm0, %%mm4 \n\t"
03038 "pminsw %%mm1, %%mm5 \n\t"
03039 #else
03040 "movq %%mm4, %%mm2 \n\t"
03041 "psubusw %%mm0, %%mm2 \n\t"
03042 "psubw %%mm2, %%mm4 \n\t"
03043 "movq %%mm5, %%mm2 \n\t"
03044 "psubusw %%mm1, %%mm2 \n\t"
03045 "psubw %%mm2, %%mm5 \n\t"
03046 #endif
03047 "pxor %%mm6, %%mm4 \n\t"
03048 "pxor %%mm7, %%mm5 \n\t"
03049 "psubw %%mm6, %%mm4 \n\t"
03050 "psubw %%mm7, %%mm5 \n\t"
03051 "packsswb %%mm5, %%mm4 \n\t"
03052 "movq %3, %%mm1 \n\t"
03053 "pandn %%mm4, %%mm1 \n\t"
03054 "movq (%0), %%mm0 \n\t"
03055 "paddb %%mm1, %%mm0 \n\t"
03056 "movq %%mm0, (%0) \n\t"
03057 "movq (%0, %1), %%mm0 \n\t"
03058 "psubb %%mm1, %%mm0 \n\t"
03059 "movq %%mm0, (%0, %1) \n\t"
03060
03061 : "+r" (temp_src)
03062 : "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask), "r"(tmp)
03063 : "%"REG_a
03064 );
03065 }
03066
03067
03068
03069
03070
03071
03072 }
03073 #endif //TEMPLATE_PP_MMX
03074
03075 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
03076 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
03077
03082 #undef REAL_SCALED_CPY
03083 #undef SCALED_CPY
03084
03085 static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride,
03086 int levelFix, int64_t *packedOffsetAndScale)
03087 {
03088 #if !TEMPLATE_PP_MMX
03089 int i;
03090 #endif
03091 if(levelFix){
03092 #if TEMPLATE_PP_MMX
03093 __asm__ volatile(
03094 "movq (%%"REG_a"), %%mm2 \n\t"
03095 "movq 8(%%"REG_a"), %%mm3 \n\t"
03096 "lea (%2,%4), %%"REG_a" \n\t"
03097 "lea (%3,%5), %%"REG_d" \n\t"
03098 "pxor %%mm4, %%mm4 \n\t"
03099 #if TEMPLATE_PP_MMXEXT
03100 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
03101 "movq " #src1 ", %%mm0 \n\t"\
03102 "movq " #src1 ", %%mm5 \n\t"\
03103 "movq " #src2 ", %%mm1 \n\t"\
03104 "movq " #src2 ", %%mm6 \n\t"\
03105 "punpcklbw %%mm0, %%mm0 \n\t"\
03106 "punpckhbw %%mm5, %%mm5 \n\t"\
03107 "punpcklbw %%mm1, %%mm1 \n\t"\
03108 "punpckhbw %%mm6, %%mm6 \n\t"\
03109 "pmulhuw %%mm3, %%mm0 \n\t"\
03110 "pmulhuw %%mm3, %%mm5 \n\t"\
03111 "pmulhuw %%mm3, %%mm1 \n\t"\
03112 "pmulhuw %%mm3, %%mm6 \n\t"\
03113 "psubw %%mm2, %%mm0 \n\t"\
03114 "psubw %%mm2, %%mm5 \n\t"\
03115 "psubw %%mm2, %%mm1 \n\t"\
03116 "psubw %%mm2, %%mm6 \n\t"\
03117 "packuswb %%mm5, %%mm0 \n\t"\
03118 "packuswb %%mm6, %%mm1 \n\t"\
03119 "movq %%mm0, " #dst1 " \n\t"\
03120 "movq %%mm1, " #dst2 " \n\t"\
03121
03122 #else //TEMPLATE_PP_MMXEXT
03123 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
03124 "movq " #src1 ", %%mm0 \n\t"\
03125 "movq " #src1 ", %%mm5 \n\t"\
03126 "punpcklbw %%mm4, %%mm0 \n\t"\
03127 "punpckhbw %%mm4, %%mm5 \n\t"\
03128 "psubw %%mm2, %%mm0 \n\t"\
03129 "psubw %%mm2, %%mm5 \n\t"\
03130 "movq " #src2 ", %%mm1 \n\t"\
03131 "psllw $6, %%mm0 \n\t"\
03132 "psllw $6, %%mm5 \n\t"\
03133 "pmulhw %%mm3, %%mm0 \n\t"\
03134 "movq " #src2 ", %%mm6 \n\t"\
03135 "pmulhw %%mm3, %%mm5 \n\t"\
03136 "punpcklbw %%mm4, %%mm1 \n\t"\
03137 "punpckhbw %%mm4, %%mm6 \n\t"\
03138 "psubw %%mm2, %%mm1 \n\t"\
03139 "psubw %%mm2, %%mm6 \n\t"\
03140 "psllw $6, %%mm1 \n\t"\
03141 "psllw $6, %%mm6 \n\t"\
03142 "pmulhw %%mm3, %%mm1 \n\t"\
03143 "pmulhw %%mm3, %%mm6 \n\t"\
03144 "packuswb %%mm5, %%mm0 \n\t"\
03145 "packuswb %%mm6, %%mm1 \n\t"\
03146 "movq %%mm0, " #dst1 " \n\t"\
03147 "movq %%mm1, " #dst2 " \n\t"\
03148
03149 #endif //TEMPLATE_PP_MMXEXT
03150 #define SCALED_CPY(src1, src2, dst1, dst2)\
03151 REAL_SCALED_CPY(src1, src2, dst1, dst2)
03152
03153 SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5))
03154 SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
03155 SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
03156 "lea (%%"REG_a",%4,4), %%"REG_a" \n\t"
03157 "lea (%%"REG_d",%5,4), %%"REG_d" \n\t"
03158 SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
03159
03160
03161 : "=&a" (packedOffsetAndScale)
03162 : "0" (packedOffsetAndScale),
03163 "r"(src),
03164 "r"(dst),
03165 "r" ((x86_reg)srcStride),
03166 "r" ((x86_reg)dstStride)
03167 : "%"REG_d
03168 );
03169 #else //TEMPLATE_PP_MMX
03170 for(i=0; i<8; i++)
03171 memcpy( &(dst[dstStride*i]),
03172 &(src[srcStride*i]), BLOCK_SIZE);
03173 #endif //TEMPLATE_PP_MMX
03174 }else{
03175 #if TEMPLATE_PP_MMX
03176 __asm__ volatile(
03177 "lea (%0,%2), %%"REG_a" \n\t"
03178 "lea (%1,%3), %%"REG_d" \n\t"
03179
03180 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \
03181 "movq " #src1 ", %%mm0 \n\t"\
03182 "movq " #src2 ", %%mm1 \n\t"\
03183 "movq %%mm0, " #dst1 " \n\t"\
03184 "movq %%mm1, " #dst2 " \n\t"\
03185
03186 #define SIMPLE_CPY(src1, src2, dst1, dst2)\
03187 REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
03188
03189 SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3))
03190 SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
03191 SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
03192 "lea (%%"REG_a",%2,4), %%"REG_a" \n\t"
03193 "lea (%%"REG_d",%3,4), %%"REG_d" \n\t"
03194 SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
03195
03196 : : "r" (src),
03197 "r" (dst),
03198 "r" ((x86_reg)srcStride),
03199 "r" ((x86_reg)dstStride)
03200 : "%"REG_a, "%"REG_d
03201 );
03202 #else //TEMPLATE_PP_MMX
03203 for(i=0; i<8; i++)
03204 memcpy( &(dst[dstStride*i]),
03205 &(src[srcStride*i]), BLOCK_SIZE);
03206 #endif //TEMPLATE_PP_MMX
03207 }
03208 }
03209
03213 static inline void RENAME(duplicate)(uint8_t src[], int stride)
03214 {
03215 #if TEMPLATE_PP_MMX
03216 __asm__ volatile(
03217 "movq (%0), %%mm0 \n\t"
03218 "add %1, %0 \n\t"
03219 "movq %%mm0, (%0) \n\t"
03220 "movq %%mm0, (%0, %1) \n\t"
03221 "movq %%mm0, (%0, %1, 2) \n\t"
03222 : "+r" (src)
03223 : "r" ((x86_reg)-stride)
03224 );
03225 #else
03226 int i;
03227 uint8_t *p=src;
03228 for(i=0; i<3; i++){
03229 p-= stride;
03230 memcpy(p, src, 8);
03231 }
03232 #endif
03233 }
03234
03238 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
03239 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2)
03240 {
03241 DECLARE_ALIGNED(8, PPContext, c)= *c2;
03242 int x,y;
03243 #ifdef TEMPLATE_PP_TIME_MODE
03244 const int mode= TEMPLATE_PP_TIME_MODE;
03245 #else
03246 const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
03247 #endif
03248 int black=0, white=255;
03249 int QPCorrecture= 256*256;
03250
03251 int copyAhead;
03252 #if TEMPLATE_PP_MMX
03253 int i;
03254 #endif
03255
03256 const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
03257 const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
03258
03259
03260 uint64_t * const yHistogram= c.yHistogram;
03261 uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
03262 uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
03263
03264
03265 #if TEMPLATE_PP_MMX
03266 for(i=0; i<57; i++){
03267 int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
03268 int threshold= offset*2 + 1;
03269 c.mmxDcOffset[i]= 0x7F - offset;
03270 c.mmxDcThreshold[i]= 0x7F - threshold;
03271 c.mmxDcOffset[i]*= 0x0101010101010101LL;
03272 c.mmxDcThreshold[i]*= 0x0101010101010101LL;
03273 }
03274 #endif
03275
03276 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
03277 else if( (mode & LINEAR_BLEND_DEINT_FILTER)
03278 || (mode & FFMPEG_DEINT_FILTER)
03279 || (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14;
03280 else if( (mode & V_DEBLOCK)
03281 || (mode & LINEAR_IPOL_DEINT_FILTER)
03282 || (mode & MEDIAN_DEINT_FILTER)
03283 || (mode & V_A_DEBLOCK)) copyAhead=13;
03284 else if(mode & V_X1_FILTER) copyAhead=11;
03285
03286 else if(mode & DERING) copyAhead=9;
03287 else copyAhead=8;
03288
03289 copyAhead-= 8;
03290
03291 if(!isColor){
03292 uint64_t sum= 0;
03293 int i;
03294 uint64_t maxClipped;
03295 uint64_t clipped;
03296 double scale;
03297
03298 c.frameNum++;
03299
03300 if(c.frameNum == 1) yHistogram[0]= width*(uint64_t)height/64*15/256;
03301
03302 for(i=0; i<256; i++){
03303 sum+= yHistogram[i];
03304 }
03305
03306
03307 maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold);
03308
03309 clipped= sum;
03310 for(black=255; black>0; black--){
03311 if(clipped < maxClipped) break;
03312 clipped-= yHistogram[black];
03313 }
03314
03315 clipped= sum;
03316 for(white=0; white<256; white++){
03317 if(clipped < maxClipped) break;
03318 clipped-= yHistogram[white];
03319 }
03320
03321 scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
03322
03323 #if TEMPLATE_PP_MMXEXT
03324 c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
03325 c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
03326 #else
03327 c.packedYScale= (uint16_t)(scale*1024.0 + 0.5);
03328 c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
03329 #endif
03330
03331 c.packedYOffset|= c.packedYOffset<<32;
03332 c.packedYOffset|= c.packedYOffset<<16;
03333
03334 c.packedYScale|= c.packedYScale<<32;
03335 c.packedYScale|= c.packedYScale<<16;
03336
03337 if(mode & LEVEL_FIX) QPCorrecture= (int)(scale*256*256 + 0.5);
03338 else QPCorrecture= 256*256;
03339 }else{
03340 c.packedYScale= 0x0100010001000100LL;
03341 c.packedYOffset= 0;
03342 QPCorrecture= 256*256;
03343 }
03344
03345
03346 y=-BLOCK_SIZE;
03347 {
03348 const uint8_t *srcBlock= &(src[y*srcStride]);
03349 uint8_t *dstBlock= tempDst + dstStride;
03350
03351
03352
03353
03354 for(x=0; x<width; x+=BLOCK_SIZE){
03355
03356 #if TEMPLATE_PP_MMXEXT
03357
03358
03359
03360
03361
03362
03363
03364 __asm__(
03365 "mov %4, %%"REG_a" \n\t"
03366 "shr $2, %%"REG_a" \n\t"
03367 "and $6, %%"REG_a" \n\t"
03368 "add %5, %%"REG_a" \n\t"
03369 "mov %%"REG_a", %%"REG_d" \n\t"
03370 "imul %1, %%"REG_a" \n\t"
03371 "imul %3, %%"REG_d" \n\t"
03372 "prefetchnta 32(%%"REG_a", %0) \n\t"
03373 "prefetcht0 32(%%"REG_d", %2) \n\t"
03374 "add %1, %%"REG_a" \n\t"
03375 "add %3, %%"REG_d" \n\t"
03376 "prefetchnta 32(%%"REG_a", %0) \n\t"
03377 "prefetcht0 32(%%"REG_d", %2) \n\t"
03378 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
03379 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
03380 : "%"REG_a, "%"REG_d
03381 );
03382
03383 #elif TEMPLATE_PP_3DNOW
03384
03385
03386
03387
03388
03389
03390 #endif
03391
03392 RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
03393 srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
03394
03395 RENAME(duplicate)(dstBlock + dstStride*8, dstStride);
03396
03397 if(mode & LINEAR_IPOL_DEINT_FILTER)
03398 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
03399 else if(mode & LINEAR_BLEND_DEINT_FILTER)
03400 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
03401 else if(mode & MEDIAN_DEINT_FILTER)
03402 RENAME(deInterlaceMedian)(dstBlock, dstStride);
03403 else if(mode & CUBIC_IPOL_DEINT_FILTER)
03404 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
03405 else if(mode & FFMPEG_DEINT_FILTER)
03406 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
03407 else if(mode & LOWPASS5_DEINT_FILTER)
03408 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
03409
03410
03411
03412 dstBlock+=8;
03413 srcBlock+=8;
03414 }
03415 if(width==FFABS(dstStride))
03416 linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride);
03417 else{
03418 int i;
03419 for(i=0; i<copyAhead; i++){
03420 memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
03421 }
03422 }
03423 }
03424
03425 for(y=0; y<height; y+=BLOCK_SIZE){
03426
03427 const uint8_t *srcBlock= &(src[y*srcStride]);
03428 uint8_t *dstBlock= &(dst[y*dstStride]);
03429 #if TEMPLATE_PP_MMX
03430 uint8_t *tempBlock1= c.tempBlocks;
03431 uint8_t *tempBlock2= c.tempBlocks + 8;
03432 #endif
03433 const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
03434 int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
03435 int QP=0;
03436
03437
03438 if(y+15 >= height){
03439 int i;
03440
03441
03442 linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead,
03443 FFMAX(height-y-copyAhead, 0), srcStride);
03444
03445
03446 for(i=FFMAX(height-y, 8); i<copyAhead+8; i++)
03447 memcpy(tempSrc + srcStride*i, src + srcStride*(height-1), FFABS(srcStride));
03448
03449
03450 linecpy(tempDst, dstBlock - dstStride, FFMIN(height-y+1, copyAhead+1), dstStride);
03451
03452
03453 for(i=height-y+1; i<=copyAhead; i++)
03454 memcpy(tempDst + dstStride*i, dst + dstStride*(height-1), FFABS(dstStride));
03455
03456 dstBlock= tempDst + dstStride;
03457 srcBlock= tempSrc;
03458 }
03459
03460
03461
03462
03463 for(x=0; x<width; x+=BLOCK_SIZE){
03464 const int stride= dstStride;
03465 #if TEMPLATE_PP_MMX
03466 uint8_t *tmpXchg;
03467 #endif
03468 if(isColor){
03469 QP= QPptr[x>>qpHShift];
03470 c.nonBQP= nonBQPptr[x>>qpHShift];
03471 }else{
03472 QP= QPptr[x>>4];
03473 QP= (QP* QPCorrecture + 256*128)>>16;
03474 c.nonBQP= nonBQPptr[x>>4];
03475 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
03476 yHistogram[ srcBlock[srcStride*12 + 4] ]++;
03477 }
03478 c.QP= QP;
03479 #if TEMPLATE_PP_MMX
03480 __asm__ volatile(
03481 "movd %1, %%mm7 \n\t"
03482 "packuswb %%mm7, %%mm7 \n\t"
03483 "packuswb %%mm7, %%mm7 \n\t"
03484 "packuswb %%mm7, %%mm7 \n\t"
03485 "movq %%mm7, %0 \n\t"
03486 : "=m" (c.pQPb)
03487 : "r" (QP)
03488 );
03489 #endif
03490
03491
03492 #if TEMPLATE_PP_MMXEXT
03493
03494
03495
03496
03497
03498
03499
03500 __asm__(
03501 "mov %4, %%"REG_a" \n\t"
03502 "shr $2, %%"REG_a" \n\t"
03503 "and $6, %%"REG_a" \n\t"
03504 "add %5, %%"REG_a" \n\t"
03505 "mov %%"REG_a", %%"REG_d" \n\t"
03506 "imul %1, %%"REG_a" \n\t"
03507 "imul %3, %%"REG_d" \n\t"
03508 "prefetchnta 32(%%"REG_a", %0) \n\t"
03509 "prefetcht0 32(%%"REG_d", %2) \n\t"
03510 "add %1, %%"REG_a" \n\t"
03511 "add %3, %%"REG_d" \n\t"
03512 "prefetchnta 32(%%"REG_a", %0) \n\t"
03513 "prefetcht0 32(%%"REG_d", %2) \n\t"
03514 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
03515 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
03516 : "%"REG_a, "%"REG_d
03517 );
03518
03519 #elif TEMPLATE_PP_3DNOW
03520
03521
03522
03523
03524
03525
03526 #endif
03527
03528 RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
03529 srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
03530
03531 if(mode & LINEAR_IPOL_DEINT_FILTER)
03532 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
03533 else if(mode & LINEAR_BLEND_DEINT_FILTER)
03534 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
03535 else if(mode & MEDIAN_DEINT_FILTER)
03536 RENAME(deInterlaceMedian)(dstBlock, dstStride);
03537 else if(mode & CUBIC_IPOL_DEINT_FILTER)
03538 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
03539 else if(mode & FFMPEG_DEINT_FILTER)
03540 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
03541 else if(mode & LOWPASS5_DEINT_FILTER)
03542 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
03543
03544
03545
03546
03547
03548 if(y + 8 < height){
03549 if(mode & V_X1_FILTER)
03550 RENAME(vertX1Filter)(dstBlock, stride, &c);
03551 else if(mode & V_DEBLOCK){
03552 const int t= RENAME(vertClassify)(dstBlock, stride, &c);
03553
03554 if(t==1)
03555 RENAME(doVertLowPass)(dstBlock, stride, &c);
03556 else if(t==2)
03557 RENAME(doVertDefFilter)(dstBlock, stride, &c);
03558 }else if(mode & V_A_DEBLOCK){
03559 RENAME(do_a_deblock)(dstBlock, stride, 1, &c);
03560 }
03561 }
03562
03563 #if TEMPLATE_PP_MMX
03564 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
03565 #endif
03566
03567 if(x - 8 >= 0){
03568 #if TEMPLATE_PP_MMX
03569 if(mode & H_X1_FILTER)
03570 RENAME(vertX1Filter)(tempBlock1, 16, &c);
03571 else if(mode & H_DEBLOCK){
03572
03573 const int t= RENAME(vertClassify)(tempBlock1, 16, &c);
03574
03575 if(t==1)
03576 RENAME(doVertLowPass)(tempBlock1, 16, &c);
03577 else if(t==2)
03578 RENAME(doVertDefFilter)(tempBlock1, 16, &c);
03579 }else if(mode & H_A_DEBLOCK){
03580 RENAME(do_a_deblock)(tempBlock1, 16, 1, &c);
03581 }
03582
03583 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
03584
03585 #else
03586 if(mode & H_X1_FILTER)
03587 horizX1Filter(dstBlock-4, stride, QP);
03588 else if(mode & H_DEBLOCK){
03589 #if TEMPLATE_PP_ALTIVEC
03590 DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
03591 int t;
03592 transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
03593
03594 t = vertClassify_altivec(tempBlock-48, 16, &c);
03595 if(t==1) {
03596 doVertLowPass_altivec(tempBlock-48, 16, &c);
03597 transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
03598 }
03599 else if(t==2) {
03600 doVertDefFilter_altivec(tempBlock-48, 16, &c);
03601 transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
03602 }
03603 #else
03604 const int t= RENAME(horizClassify)(dstBlock-4, stride, &c);
03605
03606 if(t==1)
03607 RENAME(doHorizLowPass)(dstBlock-4, stride, &c);
03608 else if(t==2)
03609 RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
03610 #endif
03611 }else if(mode & H_A_DEBLOCK){
03612 RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
03613 }
03614 #endif //TEMPLATE_PP_MMX
03615 if(mode & DERING){
03616
03617 if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
03618 }
03619
03620 if(mode & TEMP_NOISE_FILTER)
03621 {
03622 RENAME(tempNoiseReducer)(dstBlock-8, stride,
03623 c.tempBlurred[isColor] + y*dstStride + x,
03624 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
03625 c.ppMode.maxTmpNoise);
03626 }
03627 }
03628
03629 dstBlock+=8;
03630 srcBlock+=8;
03631
03632 #if TEMPLATE_PP_MMX
03633 tmpXchg= tempBlock1;
03634 tempBlock1= tempBlock2;
03635 tempBlock2 = tmpXchg;
03636 #endif
03637 }
03638
03639 if(mode & DERING){
03640 if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c);
03641 }
03642
03643 if((mode & TEMP_NOISE_FILTER)){
03644 RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
03645 c.tempBlurred[isColor] + y*dstStride + x,
03646 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
03647 c.ppMode.maxTmpNoise);
03648 }
03649
03650
03651 if(y+15 >= height){
03652 uint8_t *dstBlock= &(dst[y*dstStride]);
03653 if(width==FFABS(dstStride))
03654 linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
03655 else{
03656 int i;
03657 for(i=0; i<height-y; i++){
03658 memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
03659 }
03660 }
03661 }
03662
03663
03664
03665
03666
03667
03668
03669
03670
03671 }
03672 #if TEMPLATE_PP_3DNOW
03673 __asm__ volatile("femms");
03674 #elif TEMPLATE_PP_MMX
03675 __asm__ volatile("emms");
03676 #endif
03677
03678 #ifdef DEBUG_BRIGHTNESS
03679 if(!isColor){
03680 int max=1;
03681 int i;
03682 for(i=0; i<256; i++)
03683 if(yHistogram[i] > max) max=yHistogram[i];
03684
03685 for(i=1; i<256; i++){
03686 int x;
03687 int start=yHistogram[i-1]/(max/256+1);
03688 int end=yHistogram[i]/(max/256+1);
03689 int inc= end > start ? 1 : -1;
03690 for(x=start; x!=end+inc; x+=inc)
03691 dst[ i*dstStride + x]+=128;
03692 }
03693
03694 for(i=0; i<100; i+=2){
03695 dst[ (white)*dstStride + i]+=128;
03696 dst[ (black)*dstStride + i]+=128;
03697 }
03698 }
03699 #endif
03700
03701 *c2= c;
03702
03703 }
03704
03705 #undef RENAME
03706 #undef TEMPLATE_PP_C
03707 #undef TEMPLATE_PP_ALTIVEC
03708 #undef TEMPLATE_PP_MMX
03709 #undef TEMPLATE_PP_MMXEXT
03710 #undef TEMPLATE_PP_3DNOW
03711 #undef TEMPLATE_PP_SSE2