35 # define RENAME(a) a ## _C 37 # define TEMPLATE_PP_C 0 40 #ifdef TEMPLATE_PP_ALTIVEC 41 # define RENAME(a) a ## _altivec 43 # define TEMPLATE_PP_ALTIVEC 0 46 #ifdef TEMPLATE_PP_MMX 47 # define RENAME(a) a ## _MMX 49 # define TEMPLATE_PP_MMX 0 52 #ifdef TEMPLATE_PP_MMXEXT 53 # undef TEMPLATE_PP_MMX 54 # define TEMPLATE_PP_MMX 1 55 # define RENAME(a) a ## _MMX2 57 # define TEMPLATE_PP_MMXEXT 0 60 #ifdef TEMPLATE_PP_3DNOW 61 # undef TEMPLATE_PP_MMX 62 # define TEMPLATE_PP_MMX 1 63 # define RENAME(a) a ## _3DNow 65 # define TEMPLATE_PP_3DNOW 0 68 #ifdef TEMPLATE_PP_SSE2 69 # undef TEMPLATE_PP_MMX 70 # define TEMPLATE_PP_MMX 1 71 # undef TEMPLATE_PP_MMXEXT 72 # define TEMPLATE_PP_MMXEXT 1 73 # define RENAME(a) a ## _SSE2 75 # define TEMPLATE_PP_SSE2 0 83 #if TEMPLATE_PP_MMXEXT 84 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" 85 #elif TEMPLATE_PP_3DNOW 86 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" 88 #define PAVGB(a,b) REAL_PAVGB(a,b) 90 #if TEMPLATE_PP_MMXEXT 91 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" 93 #define PMINUB(b,a,t) \ 94 "movq " #a ", " #t " \n\t"\ 95 "psubusb " #b ", " #t " \n\t"\ 96 "psubb " #t ", " #a " \n\t" 99 #if TEMPLATE_PP_MMXEXT 100 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t" 101 #elif TEMPLATE_PP_MMX 102 #define PMAXUB(a,b) \ 103 "psubusb " #a ", " #b " \n\t"\ 104 "paddb " #a ", " #b " \n\t" 116 "movq %0, %%mm7 \n\t" 117 "movq %1, %%mm6 \n\t" 118 : :
"m" (
c->mmxDcOffset[
c->nonBQP]),
"m" (
c->mmxDcThreshold[
c->nonBQP])
122 "lea (%2, %3), %%"FF_REG_a
" \n\t" 126 "movq (%2), %%mm0 \n\t" 127 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 128 "movq %%mm0, %%mm3 \n\t" 129 "movq %%mm0, %%mm4 \n\t" 131 PMINUB(%%mm1, %%mm3, %%mm5)
132 "psubb %%mm1, %%mm0 \n\t" 133 "paddb %%mm7, %%mm0 \n\t" 134 "pcmpgtb %%mm6, %%mm0 \n\t" 136 "movq (%%"FF_REG_a
",%3), %%mm2 \n\t" 138 PMINUB(%%mm2, %%mm3, %%mm5)
139 "psubb %%mm2, %%mm1 \n\t" 140 "paddb %%mm7, %%mm1 \n\t" 141 "pcmpgtb %%mm6, %%mm1 \n\t" 142 "paddb %%mm1, %%mm0 \n\t" 144 "movq (%%"FF_REG_a
", %3, 2), %%mm1 \n\t" 146 PMINUB(%%mm1, %%mm3, %%mm5)
147 "psubb %%mm1, %%mm2 \n\t" 148 "paddb %%mm7, %%mm2 \n\t" 149 "pcmpgtb %%mm6, %%mm2 \n\t" 150 "paddb %%mm2, %%mm0 \n\t" 152 "lea (%%"FF_REG_a
", %3, 4), %%"FF_REG_a
"\n\t" 154 "movq (%2, %3, 4), %%mm2 \n\t" 156 PMINUB(%%mm2, %%mm3, %%mm5)
157 "psubb %%mm2, %%mm1 \n\t" 158 "paddb %%mm7, %%mm1 \n\t" 159 "pcmpgtb %%mm6, %%mm1 \n\t" 160 "paddb %%mm1, %%mm0 \n\t" 162 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 164 PMINUB(%%mm1, %%mm3, %%mm5)
165 "psubb %%mm1, %%mm2 \n\t" 166 "paddb %%mm7, %%mm2 \n\t" 167 "pcmpgtb %%mm6, %%mm2 \n\t" 168 "paddb %%mm2, %%mm0 \n\t" 170 "movq (%%"FF_REG_a
", %3), %%mm2 \n\t" 172 PMINUB(%%mm2, %%mm3, %%mm5)
173 "psubb %%mm2, %%mm1 \n\t" 174 "paddb %%mm7, %%mm1 \n\t" 175 "pcmpgtb %%mm6, %%mm1 \n\t" 176 "paddb %%mm1, %%mm0 \n\t" 178 "movq (%%"FF_REG_a
", %3, 2), %%mm1 \n\t" 180 PMINUB(%%mm1, %%mm3, %%mm5)
181 "psubb %%mm1, %%mm2 \n\t" 182 "paddb %%mm7, %%mm2 \n\t" 183 "pcmpgtb %%mm6, %%mm2 \n\t" 184 "paddb %%mm2, %%mm0 \n\t" 185 "psubusb %%mm3, %%mm4 \n\t" 188 #if TEMPLATE_PP_MMXEXT 189 "pxor %%mm7, %%mm7 \n\t" 190 "psadbw %%mm7, %%mm0 \n\t" 192 "movq %%mm0, %%mm1 \n\t" 193 "psrlw $8, %%mm0 \n\t" 194 "paddb %%mm1, %%mm0 \n\t" 195 "movq %%mm0, %%mm1 \n\t" 196 "psrlq $16, %%mm0 \n\t" 197 "paddb %%mm1, %%mm0 \n\t" 198 "movq %%mm0, %%mm1 \n\t" 199 "psrlq $32, %%mm0 \n\t" 200 "paddb %%mm1, %%mm0 \n\t" 202 "movq %4, %%mm7 \n\t" 203 "paddusb %%mm7, %%mm7 \n\t" 204 "psubusb %%mm7, %%mm4 \n\t" 205 "packssdw %%mm4, %%mm4 \n\t" 206 "movd %%mm0, %0 \n\t" 207 "movd %%mm4, %1 \n\t" 209 :
"=r" (numEq),
"=r" (dcOk)
214 numEq= (-numEq) &0xFF;
215 if(numEq >
c->ppMode.flatnessThreshold){
222 #endif //TEMPLATE_PP_MMX 228 #if !TEMPLATE_PP_ALTIVEC 231 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 234 "movq %2, %%mm0 \n\t" 235 "pxor %%mm4, %%mm4 \n\t" 237 "movq (%0), %%mm6 \n\t" 238 "movq (%0, %1), %%mm5 \n\t" 239 "movq %%mm5, %%mm1 \n\t" 240 "movq %%mm6, %%mm2 \n\t" 241 "psubusb %%mm6, %%mm5 \n\t" 242 "psubusb %%mm1, %%mm2 \n\t" 243 "por %%mm5, %%mm2 \n\t" 244 "psubusb %%mm0, %%mm2 \n\t" 245 "pcmpeqb %%mm4, %%mm2 \n\t" 247 "pand %%mm2, %%mm6 \n\t" 248 "pandn %%mm1, %%mm2 \n\t" 249 "por %%mm2, %%mm6 \n\t" 251 "movq (%0, %1, 8), %%mm5 \n\t" 252 "lea (%0, %1, 4), %%"FF_REG_a
" \n\t" 253 "lea (%0, %1, 8), %%"FF_REG_c
" \n\t" 254 "sub %1, %%"FF_REG_c
" \n\t" 256 "movq (%0, %1, 8), %%mm7 \n\t" 257 "movq %%mm5, %%mm1 \n\t" 258 "movq %%mm7, %%mm2 \n\t" 259 "psubusb %%mm7, %%mm5 \n\t" 260 "psubusb %%mm1, %%mm2 \n\t" 261 "por %%mm5, %%mm2 \n\t" 262 "psubusb %%mm0, %%mm2 \n\t" 263 "pcmpeqb %%mm4, %%mm2 \n\t" 265 "pand %%mm2, %%mm7 \n\t" 266 "pandn %%mm1, %%mm2 \n\t" 267 "por %%mm2, %%mm7 \n\t" 276 "movq (%0, %1), %%mm0 \n\t" 277 "movq %%mm0, %%mm1 \n\t" 281 "movq (%0, %1, 4), %%mm2 \n\t" 282 "movq %%mm2, %%mm5 \n\t" 283 PAVGB((%%FF_REGa), %%mm2)
284 PAVGB((%0, %1, 2), %%mm2)
285 "movq %%mm2, %%mm3 \n\t" 286 "movq (%0), %%mm4 \n\t" 289 "movq %%mm3, (%0) \n\t" 291 "movq %%mm1, %%mm0 \n\t" 293 "movq %%mm4, %%mm3 \n\t" 294 PAVGB((%0,%1,2), %%mm3)
295 PAVGB((%%FF_REGa,%1,2), %%mm5)
296 PAVGB((%%FF_REGa), %%mm5)
299 "movq %%mm3, (%0,%1) \n\t" 302 "movq (%%"FF_REG_c
"), %%mm0 \n\t" 303 PAVGB((%%FF_REGa, %1, 2), %%mm0)
304 "movq %%mm0, %%mm3 \n\t" 308 "movq (%0, %1, 2), %%mm2 \n\t" 309 "movq %%mm0, (%0, %1, 2) \n\t" 311 "movq (%%"FF_REG_a
", %1, 4), %%mm0 \n\t" 312 PAVGB((%%FF_REGc), %%mm0)
318 "movq (%%"FF_REG_a
"), %%mm5 \n\t" 319 "movq %%mm6, (%%"FF_REG_a
") \n\t" 321 "movq (%%"FF_REG_a
", %1, 4), %%mm6 \n\t" 326 "movq (%0, %1, 4), %%mm4 \n\t" 329 "movq %%mm6, (%0, %1, 4) \n\t" 334 "movq (%%"FF_REG_a
", %1, 2), %%mm6 \n\t" 337 "movq %%mm1, (%%"FF_REG_a
", %1, 2) \n\t" 339 PAVGB((%%FF_REGc), %%mm2)
340 "movq (%%"FF_REG_a
", %1, 4), %%mm0 \n\t" 344 "movq %%mm6, (%%"FF_REG_c
") \n\t" 351 "movq %%mm5, (%%"FF_REG_a
", %1, 4) \n\t" 356 :
"%"FF_REG_a,
"%"FF_REG_c
358 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 360 const int l2=
stride + l1;
361 const int l3=
stride + l2;
362 const int l4=
stride + l3;
363 const int l5=
stride + l4;
364 const int l6=
stride + l5;
365 const int l7=
stride + l6;
366 const int l8=
stride + l7;
367 const int l9=
stride + l8;
375 sums[0] = 4*first +
src[l1] +
src[l2] +
src[l3] + 4;
376 sums[1] = sums[0] - first + src[l4];
377 sums[2] = sums[1] - first + src[l5];
378 sums[3] = sums[2] - first + src[l6];
379 sums[4] = sums[3] - first + src[l7];
380 sums[5] = sums[4] - src[l1] + src[l8];
381 sums[6] = sums[5] - src[l2] + last;
382 sums[7] = sums[6] - src[l3] + last;
383 sums[8] = sums[7] - src[l4] + last;
384 sums[9] = sums[8] - src[l5] + last;
386 src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
387 src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
388 src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
389 src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
390 src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
391 src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
392 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
393 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
397 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 399 #endif //TEMPLATE_PP_ALTIVEC 410 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 414 "pxor %%mm7, %%mm7 \n\t" 415 "lea (%0, %1), %%"FF_REG_a
" \n\t" 416 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_c
"\n\t" 419 "movq (%%"FF_REG_a
", %1, 2), %%mm0 \n\t" 420 "movq (%0, %1, 4), %%mm1 \n\t" 421 "movq %%mm1, %%mm2 \n\t" 422 "psubusb %%mm0, %%mm1 \n\t" 423 "psubusb %%mm2, %%mm0 \n\t" 424 "por %%mm1, %%mm0 \n\t" 425 "movq (%%"FF_REG_c
"), %%mm3 \n\t" 426 "movq (%%"FF_REG_c
", %1), %%mm4 \n\t" 427 "movq %%mm3, %%mm5 \n\t" 428 "psubusb %%mm4, %%mm3 \n\t" 429 "psubusb %%mm5, %%mm4 \n\t" 430 "por %%mm4, %%mm3 \n\t" 432 "movq %%mm2, %%mm1 \n\t" 433 "psubusb %%mm5, %%mm2 \n\t" 434 "movq %%mm2, %%mm4 \n\t" 435 "pcmpeqb %%mm7, %%mm2 \n\t" 436 "psubusb %%mm1, %%mm5 \n\t" 437 "por %%mm5, %%mm4 \n\t" 438 "psubusb %%mm0, %%mm4 \n\t" 439 "movq %%mm4, %%mm3 \n\t" 440 "movq %2, %%mm0 \n\t" 441 "paddusb %%mm0, %%mm0 \n\t" 442 "psubusb %%mm0, %%mm4 \n\t" 443 "pcmpeqb %%mm7, %%mm4 \n\t" 444 "psubusb "MANGLE(b01)
", %%mm3 \n\t" 445 "pand %%mm4, %%mm3 \n\t" 448 "movq %%mm3, %%mm1 \n\t" 452 "movq (%0, %1, 4), %%mm0 \n\t" 453 "pxor %%mm2, %%mm0 \n\t" 454 "psubusb %%mm3, %%mm0 \n\t" 455 "pxor %%mm2, %%mm0 \n\t" 456 "movq %%mm0, (%0, %1, 4) \n\t" 458 "movq (%%"FF_REG_c
"), %%mm0 \n\t" 459 "pxor %%mm2, %%mm0 \n\t" 460 "paddusb %%mm3, %%mm0 \n\t" 461 "pxor %%mm2, %%mm0 \n\t" 462 "movq %%mm0, (%%"FF_REG_c
") \n\t" 466 "movq (%%"FF_REG_a
", %1, 2), %%mm0 \n\t" 467 "pxor %%mm2, %%mm0 \n\t" 468 "psubusb %%mm1, %%mm0 \n\t" 469 "pxor %%mm2, %%mm0 \n\t" 470 "movq %%mm0, (%%"FF_REG_a
", %1, 2) \n\t" 472 "movq (%%"FF_REG_c
", %1), %%mm0 \n\t" 473 "pxor %%mm2, %%mm0 \n\t" 474 "paddusb %%mm1, %%mm0 \n\t" 475 "pxor %%mm2, %%mm0 \n\t" 476 "movq %%mm0, (%%"FF_REG_c
", %1) \n\t" 480 "movq (%%"FF_REG_a
", %1), %%mm0 \n\t" 481 "pxor %%mm2, %%mm0 \n\t" 482 "psubusb %%mm1, %%mm0 \n\t" 483 "pxor %%mm2, %%mm0 \n\t" 484 "movq %%mm0, (%%"FF_REG_a
", %1) \n\t" 486 "movq (%%"FF_REG_c
", %1, 2), %%mm0 \n\t" 487 "pxor %%mm2, %%mm0 \n\t" 488 "paddusb %%mm1, %%mm0 \n\t" 489 "pxor %%mm2, %%mm0 \n\t" 490 "movq %%mm0, (%%"FF_REG_c
", %1, 2) \n\t" 495 :
"%"FF_REG_a,
"%"FF_REG_c
497 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 500 const int l2=
stride + l1;
501 const int l3=
stride + l2;
502 const int l4=
stride + l3;
503 const int l5=
stride + l4;
504 const int l6=
stride + l5;
505 const int l7=
stride + l6;
513 int b= src[l4] - src[l5];
514 int c= src[l5] - src[l6];
531 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 534 #if !TEMPLATE_PP_ALTIVEC 537 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 555 #if 0 //slightly more accurate and slightly slower 556 "pxor %%mm7, %%mm7 \n\t" 557 "lea (%0, %1), %%"FF_REG_a
" \n\t" 558 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_c
"\n\t" 564 "movq (%0, %1, 2), %%mm0 \n\t" 565 "movq (%0), %%mm1 \n\t" 566 "movq %%mm0, %%mm2 \n\t" 571 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 572 "movq (%%"FF_REG_a
", %1, 2), %%mm3 \n\t" 573 "movq %%mm1, %%mm4 \n\t" 578 "movq %%mm0, %%mm4 \n\t" 579 "psubusb %%mm1, %%mm0 \n\t" 580 "psubusb %%mm4, %%mm1 \n\t" 581 "por %%mm0, %%mm1 \n\t" 584 "movq (%0, %1, 4), %%mm0 \n\t" 585 "movq %%mm0, %%mm4 \n\t" 590 "movq (%%"FF_REG_c
"), %%mm2 \n\t" 591 "movq %%mm3, %%mm5 \n\t" 596 "movq %%mm0, %%mm6 \n\t" 597 "psubusb %%mm3, %%mm0 \n\t" 598 "psubusb %%mm6, %%mm3 \n\t" 599 "por %%mm0, %%mm3 \n\t" 600 "pcmpeqb %%mm7, %%mm0 \n\t" 603 "movq (%%"FF_REG_c
", %1), %%mm6 \n\t" 604 "movq %%mm6, %%mm5 \n\t" 609 "movq (%%"FF_REG_c
", %1, 2), %%mm5 \n\t" 610 "movq %%mm2, %%mm4 \n\t" 615 "movq %%mm6, %%mm4 \n\t" 616 "psubusb %%mm2, %%mm6 \n\t" 617 "psubusb %%mm4, %%mm2 \n\t" 618 "por %%mm6, %%mm2 \n\t" 622 PMINUB(%%mm2, %%mm1, %%mm4)
623 "movq %2, %%mm4 \n\t" 624 "paddusb "MANGLE(b01)
", %%mm4 \n\t" 625 "pcmpgtb %%mm3, %%mm4 \n\t" 626 "psubusb %%mm1, %%mm3 \n\t" 627 "pand %%mm4, %%mm3 \n\t" 629 "movq %%mm3, %%mm1 \n\t" 633 "paddusb %%mm1, %%mm3 \n\t" 636 "movq (%%"FF_REG_a
", %1, 2), %%mm6 \n\t" 637 "movq (%0, %1, 4), %%mm5 \n\t" 638 "movq (%0, %1, 4), %%mm4 \n\t" 639 "psubusb %%mm6, %%mm5 \n\t" 640 "psubusb %%mm4, %%mm6 \n\t" 641 "por %%mm6, %%mm5 \n\t" 642 "pcmpeqb %%mm7, %%mm6 \n\t" 643 "pxor %%mm6, %%mm0 \n\t" 644 "pand %%mm0, %%mm3 \n\t" 645 PMINUB(%%mm5, %%mm3, %%mm0)
647 "psubusb "MANGLE(b01)
", %%mm3 \n\t" 650 "movq (%%"FF_REG_a
", %1, 2), %%mm0 \n\t" 651 "movq (%0, %1, 4), %%mm2 \n\t" 652 "pxor %%mm6, %%mm0 \n\t" 653 "pxor %%mm6, %%mm2 \n\t" 654 "psubb %%mm3, %%mm0 \n\t" 655 "paddb %%mm3, %%mm2 \n\t" 656 "pxor %%mm6, %%mm0 \n\t" 657 "pxor %%mm6, %%mm2 \n\t" 658 "movq %%mm0, (%%"FF_REG_a
", %1, 2) \n\t" 659 "movq %%mm2, (%0, %1, 4) \n\t" 662 "lea (%0, %1), %%"FF_REG_a
" \n\t" 663 "pcmpeqb %%mm6, %%mm6 \n\t" 669 "movq (%%"FF_REG_a
", %1, 2), %%mm1 \n\t" 670 "movq (%0, %1, 4), %%mm0 \n\t" 671 "pxor %%mm6, %%mm1 \n\t" 675 "movq (%%"FF_REG_a
", %1, 4), %%mm2 \n\t" 676 "movq (%%"FF_REG_a
", %1), %%mm3 \n\t" 677 "pxor %%mm6, %%mm2 \n\t" 678 "movq %%mm2, %%mm5 \n\t" 679 "movq "MANGLE(b80)
", %%mm4 \n\t" 680 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_c
"\n\t" 687 "movq (%%"FF_REG_a
"), %%mm2 \n\t" 688 "pxor %%mm6, %%mm2 \n\t" 691 "movq "MANGLE(b80)
", %%mm3 \n\t" 697 PAVGB((%%FF_REGc, %1), %%mm5)
698 "movq (%%"FF_REG_c
", %1, 2), %%mm1 \n\t" 699 "pxor %%mm6, %%mm1 \n\t" 700 PAVGB((%0, %1, 4), %%mm1)
701 "movq "MANGLE(b80)
", %%mm2 \n\t" 707 "movq "MANGLE(b00)
", %%mm1 \n\t" 708 "movq "MANGLE(b00)
", %%mm5 \n\t" 709 "psubb %%mm2, %%mm1 \n\t" 710 "psubb %%mm3, %%mm5 \n\t" 713 PMINUB(%%mm2, %%mm3, %%mm1)
717 "movq "MANGLE(b00)
", %%mm7 \n\t" 718 "movq %2, %%mm2 \n\t" 720 "psubb %%mm6, %%mm2 \n\t" 722 "movq %%mm4, %%mm1 \n\t" 723 "pcmpgtb %%mm7, %%mm1 \n\t" 724 "pxor %%mm1, %%mm4 \n\t" 725 "psubb %%mm1, %%mm4 \n\t" 726 "pcmpgtb %%mm4, %%mm2 \n\t" 727 "psubusb %%mm3, %%mm4 \n\t" 730 "movq %%mm4, %%mm3 \n\t" 731 "psubusb "MANGLE(b01)
", %%mm4 \n\t" 734 "paddb %%mm3, %%mm4 \n\t" 735 "pand %%mm2, %%mm4 \n\t" 737 "movq "MANGLE(b80)
", %%mm5 \n\t" 738 "psubb %%mm0, %%mm5 \n\t" 739 "paddsb %%mm6, %%mm5 \n\t" 740 "pcmpgtb %%mm5, %%mm7 \n\t" 741 "pxor %%mm7, %%mm5 \n\t" 743 PMINUB(%%mm5, %%mm4, %%mm3)
744 "pxor %%mm1, %%mm7 \n\t" 746 "pand %%mm7, %%mm4 \n\t" 747 "movq (%%"FF_REG_a
", %1, 2), %%mm0 \n\t" 748 "movq (%0, %1, 4), %%mm2 \n\t" 749 "pxor %%mm1, %%mm0 \n\t" 750 "pxor %%mm1, %%mm2 \n\t" 751 "paddb %%mm4, %%mm0 \n\t" 752 "psubb %%mm4, %%mm2 \n\t" 753 "pxor %%mm1, %%mm0 \n\t" 754 "pxor %%mm1, %%mm2 \n\t" 755 "movq %%mm0, (%%"FF_REG_a
", %1, 2) \n\t" 756 "movq %%mm2, (%0, %1, 4) \n\t" 761 :
"%"FF_REG_a,
"%"FF_REG_c
819 #elif TEMPLATE_PP_MMX 823 "pxor %%mm7, %%mm7 \n\t" 828 "movq (%0), %%mm0 \n\t" 829 "movq %%mm0, %%mm1 \n\t" 830 "punpcklbw %%mm7, %%mm0 \n\t" 831 "punpckhbw %%mm7, %%mm1 \n\t" 833 "movq (%0, %1), %%mm2 \n\t" 834 "lea (%0, %1, 2), %%"FF_REG_a
" \n\t" 835 "movq %%mm2, %%mm3 \n\t" 836 "punpcklbw %%mm7, %%mm2 \n\t" 837 "punpckhbw %%mm7, %%mm3 \n\t" 839 "movq (%%"FF_REG_a
"), %%mm4 \n\t" 840 "movq %%mm4, %%mm5 \n\t" 841 "punpcklbw %%mm7, %%mm4 \n\t" 842 "punpckhbw %%mm7, %%mm5 \n\t" 844 "paddw %%mm0, %%mm0 \n\t" 845 "paddw %%mm1, %%mm1 \n\t" 846 "psubw %%mm4, %%mm2 \n\t" 847 "psubw %%mm5, %%mm3 \n\t" 848 "psubw %%mm2, %%mm0 \n\t" 849 "psubw %%mm3, %%mm1 \n\t" 851 "psllw $2, %%mm2 \n\t" 852 "psllw $2, %%mm3 \n\t" 853 "psubw %%mm2, %%mm0 \n\t" 854 "psubw %%mm3, %%mm1 \n\t" 856 "movq (%%"FF_REG_a
", %1), %%mm2 \n\t" 857 "movq %%mm2, %%mm3 \n\t" 858 "punpcklbw %%mm7, %%mm2 \n\t" 859 "punpckhbw %%mm7, %%mm3 \n\t" 861 "psubw %%mm2, %%mm0 \n\t" 862 "psubw %%mm3, %%mm1 \n\t" 863 "psubw %%mm2, %%mm0 \n\t" 864 "psubw %%mm3, %%mm1 \n\t" 865 "movq %%mm0, (%3) \n\t" 866 "movq %%mm1, 8(%3) \n\t" 868 "movq (%%"FF_REG_a
", %1, 2), %%mm0 \n\t" 869 "movq %%mm0, %%mm1 \n\t" 870 "punpcklbw %%mm7, %%mm0 \n\t" 871 "punpckhbw %%mm7, %%mm1 \n\t" 873 "psubw %%mm0, %%mm2 \n\t" 874 "psubw %%mm1, %%mm3 \n\t" 875 "movq %%mm2, 16(%3) \n\t" 876 "movq %%mm3, 24(%3) \n\t" 877 "paddw %%mm4, %%mm4 \n\t" 878 "paddw %%mm5, %%mm5 \n\t" 879 "psubw %%mm2, %%mm4 \n\t" 880 "psubw %%mm3, %%mm5 \n\t" 882 "lea (%%"FF_REG_a
", %1), %0 \n\t" 883 "psllw $2, %%mm2 \n\t" 884 "psllw $2, %%mm3 \n\t" 885 "psubw %%mm2, %%mm4 \n\t" 886 "psubw %%mm3, %%mm5 \n\t" 888 "movq (%0, %1, 2), %%mm2 \n\t" 889 "movq %%mm2, %%mm3 \n\t" 890 "punpcklbw %%mm7, %%mm2 \n\t" 891 "punpckhbw %%mm7, %%mm3 \n\t" 892 "psubw %%mm2, %%mm4 \n\t" 893 "psubw %%mm3, %%mm5 \n\t" 894 "psubw %%mm2, %%mm4 \n\t" 895 "psubw %%mm3, %%mm5 \n\t" 897 "movq (%%"FF_REG_a
", %1, 4), %%mm6 \n\t" 898 "punpcklbw %%mm7, %%mm6 \n\t" 899 "psubw %%mm6, %%mm2 \n\t" 900 "movq (%%"FF_REG_a
", %1, 4), %%mm6 \n\t" 901 "punpckhbw %%mm7, %%mm6 \n\t" 902 "psubw %%mm6, %%mm3 \n\t" 904 "paddw %%mm0, %%mm0 \n\t" 905 "paddw %%mm1, %%mm1 \n\t" 906 "psubw %%mm2, %%mm0 \n\t" 907 "psubw %%mm3, %%mm1 \n\t" 909 "psllw $2, %%mm2 \n\t" 910 "psllw $2, %%mm3 \n\t" 911 "psubw %%mm2, %%mm0 \n\t" 912 "psubw %%mm3, %%mm1 \n\t" 914 "movq (%0, %1, 4), %%mm2 \n\t" 915 "movq %%mm2, %%mm3 \n\t" 916 "punpcklbw %%mm7, %%mm2 \n\t" 917 "punpckhbw %%mm7, %%mm3 \n\t" 919 "paddw %%mm2, %%mm2 \n\t" 920 "paddw %%mm3, %%mm3 \n\t" 921 "psubw %%mm2, %%mm0 \n\t" 922 "psubw %%mm3, %%mm1 \n\t" 924 "movq (%3), %%mm2 \n\t" 925 "movq 8(%3), %%mm3 \n\t" 927 #if TEMPLATE_PP_MMXEXT 928 "movq %%mm7, %%mm6 \n\t" 929 "psubw %%mm0, %%mm6 \n\t" 930 "pmaxsw %%mm6, %%mm0 \n\t" 931 "movq %%mm7, %%mm6 \n\t" 932 "psubw %%mm1, %%mm6 \n\t" 933 "pmaxsw %%mm6, %%mm1 \n\t" 934 "movq %%mm7, %%mm6 \n\t" 935 "psubw %%mm2, %%mm6 \n\t" 936 "pmaxsw %%mm6, %%mm2 \n\t" 937 "movq %%mm7, %%mm6 \n\t" 938 "psubw %%mm3, %%mm6 \n\t" 939 "pmaxsw %%mm6, %%mm3 \n\t" 941 "movq %%mm7, %%mm6 \n\t" 942 "pcmpgtw %%mm0, %%mm6 \n\t" 943 "pxor %%mm6, %%mm0 \n\t" 944 "psubw %%mm6, %%mm0 \n\t" 945 "movq %%mm7, %%mm6 \n\t" 946 "pcmpgtw %%mm1, %%mm6 \n\t" 947 "pxor %%mm6, %%mm1 \n\t" 948 "psubw %%mm6, %%mm1 \n\t" 949 "movq %%mm7, %%mm6 \n\t" 950 "pcmpgtw %%mm2, %%mm6 \n\t" 951 "pxor %%mm6, %%mm2 \n\t" 952 "psubw %%mm6, %%mm2 \n\t" 953 "movq %%mm7, %%mm6 \n\t" 954 "pcmpgtw %%mm3, %%mm6 \n\t" 955 "pxor %%mm6, %%mm3 \n\t" 956 "psubw %%mm6, %%mm3 \n\t" 959 #if TEMPLATE_PP_MMXEXT 960 "pminsw %%mm2, %%mm0 \n\t" 961 "pminsw %%mm3, %%mm1 \n\t" 963 "movq %%mm0, %%mm6 \n\t" 964 "psubusw %%mm2, %%mm6 \n\t" 965 "psubw %%mm6, %%mm0 \n\t" 966 "movq %%mm1, %%mm6 \n\t" 967 "psubusw %%mm3, %%mm6 \n\t" 968 "psubw %%mm6, %%mm1 \n\t" 971 "movd %2, %%mm2 \n\t" 972 "punpcklbw %%mm7, %%mm2 \n\t" 974 "movq %%mm7, %%mm6 \n\t" 975 "pcmpgtw %%mm4, %%mm6 \n\t" 976 "pxor %%mm6, %%mm4 \n\t" 977 "psubw %%mm6, %%mm4 \n\t" 978 "pcmpgtw %%mm5, %%mm7 \n\t" 979 "pxor %%mm7, %%mm5 \n\t" 980 "psubw %%mm7, %%mm5 \n\t" 982 "psllw $3, %%mm2 \n\t" 983 "movq %%mm2, %%mm3 \n\t" 984 "pcmpgtw %%mm4, %%mm2 \n\t" 985 "pcmpgtw %%mm5, %%mm3 \n\t" 986 "pand %%mm2, %%mm4 \n\t" 987 "pand %%mm3, %%mm5 \n\t" 990 "psubusw %%mm0, %%mm4 \n\t" 991 "psubusw %%mm1, %%mm5 \n\t" 994 "movq "MANGLE(w05)
", %%mm2 \n\t" 995 "pmullw %%mm2, %%mm4 \n\t" 996 "pmullw %%mm2, %%mm5 \n\t" 997 "movq "MANGLE(w20)
", %%mm2 \n\t" 998 "paddw %%mm2, %%mm4 \n\t" 999 "paddw %%mm2, %%mm5 \n\t" 1000 "psrlw $6, %%mm4 \n\t" 1001 "psrlw $6, %%mm5 \n\t" 1003 "movq 16(%3), %%mm0 \n\t" 1004 "movq 24(%3), %%mm1 \n\t" 1006 "pxor %%mm2, %%mm2 \n\t" 1007 "pxor %%mm3, %%mm3 \n\t" 1009 "pcmpgtw %%mm0, %%mm2 \n\t" 1010 "pcmpgtw %%mm1, %%mm3 \n\t" 1011 "pxor %%mm2, %%mm0 \n\t" 1012 "pxor %%mm3, %%mm1 \n\t" 1013 "psubw %%mm2, %%mm0 \n\t" 1014 "psubw %%mm3, %%mm1 \n\t" 1015 "psrlw $1, %%mm0 \n\t" 1016 "psrlw $1, %%mm1 \n\t" 1018 "pxor %%mm6, %%mm2 \n\t" 1019 "pxor %%mm7, %%mm3 \n\t" 1020 "pand %%mm2, %%mm4 \n\t" 1021 "pand %%mm3, %%mm5 \n\t" 1023 #if TEMPLATE_PP_MMXEXT 1024 "pminsw %%mm0, %%mm4 \n\t" 1025 "pminsw %%mm1, %%mm5 \n\t" 1027 "movq %%mm4, %%mm2 \n\t" 1028 "psubusw %%mm0, %%mm2 \n\t" 1029 "psubw %%mm2, %%mm4 \n\t" 1030 "movq %%mm5, %%mm2 \n\t" 1031 "psubusw %%mm1, %%mm2 \n\t" 1032 "psubw %%mm2, %%mm5 \n\t" 1034 "pxor %%mm6, %%mm4 \n\t" 1035 "pxor %%mm7, %%mm5 \n\t" 1036 "psubw %%mm6, %%mm4 \n\t" 1037 "psubw %%mm7, %%mm5 \n\t" 1038 "packsswb %%mm5, %%mm4 \n\t" 1039 "movq (%0), %%mm0 \n\t" 1040 "paddb %%mm4, %%mm0 \n\t" 1041 "movq %%mm0, (%0) \n\t" 1042 "movq (%0, %1), %%mm0 \n\t" 1043 "psubb %%mm4, %%mm0 \n\t" 1044 "movq %%mm0, (%0, %1) \n\t" 1051 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1053 const int l2=
stride + l1;
1054 const int l3=
stride + l2;
1055 const int l4=
stride + l3;
1056 const int l5=
stride + l4;
1057 const int l6=
stride + l5;
1058 const int l7=
stride + l6;
1059 const int l8=
stride + l7;
1064 const int middleEnergy= 5*(
src[l5] -
src[l4]) + 2*(
src[l3] -
src[l6]);
1065 if(
FFABS(middleEnergy) < 8*
c->QP){
1066 const int q=(
src[l4] -
src[l5])/2;
1067 const int leftEnergy= 5*(
src[l3] -
src[l2]) + 2*(
src[l1] -
src[l4]);
1068 const int rightEnergy= 5*(
src[l7] -
src[l6]) + 2*(
src[l5] -
src[l8]);
1074 d*=
FFSIGN(-middleEnergy);
1089 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1091 #endif //TEMPLATE_PP_ALTIVEC 1093 #if !TEMPLATE_PP_ALTIVEC 1096 #if HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) 1099 "pxor %%mm6, %%mm6 \n\t" 1100 "pcmpeqb %%mm7, %%mm7 \n\t" 1101 "movq %2, %%mm0 \n\t" 1102 "punpcklbw %%mm6, %%mm0 \n\t" 1103 "psrlw $1, %%mm0 \n\t" 1104 "psubw %%mm7, %%mm0 \n\t" 1105 "packuswb %%mm0, %%mm0 \n\t" 1106 "movq %%mm0, %3 \n\t" 1108 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1109 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1114 #undef REAL_FIND_MIN_MAX 1116 #if TEMPLATE_PP_MMXEXT 1117 #define REAL_FIND_MIN_MAX(addr)\ 1118 "movq " #addr ", %%mm0 \n\t"\ 1119 "pminub %%mm0, %%mm7 \n\t"\ 1120 "pmaxub %%mm0, %%mm6 \n\t" 1122 #define REAL_FIND_MIN_MAX(addr)\ 1123 "movq " #addr ", %%mm0 \n\t"\ 1124 "movq %%mm7, %%mm1 \n\t"\ 1125 "psubusb %%mm0, %%mm6 \n\t"\ 1126 "paddb %%mm0, %%mm6 \n\t"\ 1127 "psubusb %%mm0, %%mm1 \n\t"\ 1128 "psubb %%mm1, %%mm7 \n\t" 1130 #define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr) 1132 FIND_MIN_MAX((%%FF_REGa))
1133 FIND_MIN_MAX((%%FF_REGa, %1))
1134 FIND_MIN_MAX((%%FF_REGa, %1, 2))
1135 FIND_MIN_MAX((%0, %1, 4))
1136 FIND_MIN_MAX((%%FF_REGd))
1137 FIND_MIN_MAX((%%FF_REGd, %1))
1138 FIND_MIN_MAX((%%FF_REGd, %1, 2))
1139 FIND_MIN_MAX((%0, %1, 8))
1141 "movq %%mm7, %%mm4 \n\t" 1142 "psrlq $8, %%mm7 \n\t" 1143 #if TEMPLATE_PP_MMXEXT 1144 "pminub %%mm4, %%mm7 \n\t" 1145 "pshufw $0xF9, %%mm7, %%mm4 \n\t" 1146 "pminub %%mm4, %%mm7 \n\t" 1147 "pshufw $0xFE, %%mm7, %%mm4 \n\t" 1148 "pminub %%mm4, %%mm7 \n\t" 1150 "movq %%mm7, %%mm1 \n\t" 1151 "psubusb %%mm4, %%mm1 \n\t" 1152 "psubb %%mm1, %%mm7 \n\t" 1153 "movq %%mm7, %%mm4 \n\t" 1154 "psrlq $16, %%mm7 \n\t" 1155 "movq %%mm7, %%mm1 \n\t" 1156 "psubusb %%mm4, %%mm1 \n\t" 1157 "psubb %%mm1, %%mm7 \n\t" 1158 "movq %%mm7, %%mm4 \n\t" 1159 "psrlq $32, %%mm7 \n\t" 1160 "movq %%mm7, %%mm1 \n\t" 1161 "psubusb %%mm4, %%mm1 \n\t" 1162 "psubb %%mm1, %%mm7 \n\t" 1166 "movq %%mm6, %%mm4 \n\t" 1167 "psrlq $8, %%mm6 \n\t" 1168 #if TEMPLATE_PP_MMXEXT 1169 "pmaxub %%mm4, %%mm6 \n\t" 1170 "pshufw $0xF9, %%mm6, %%mm4 \n\t" 1171 "pmaxub %%mm4, %%mm6 \n\t" 1172 "pshufw $0xFE, %%mm6, %%mm4 \n\t" 1173 "pmaxub %%mm4, %%mm6 \n\t" 1175 "psubusb %%mm4, %%mm6 \n\t" 1176 "paddb %%mm4, %%mm6 \n\t" 1177 "movq %%mm6, %%mm4 \n\t" 1178 "psrlq $16, %%mm6 \n\t" 1179 "psubusb %%mm4, %%mm6 \n\t" 1180 "paddb %%mm4, %%mm6 \n\t" 1181 "movq %%mm6, %%mm4 \n\t" 1182 "psrlq $32, %%mm6 \n\t" 1183 "psubusb %%mm4, %%mm6 \n\t" 1184 "paddb %%mm4, %%mm6 \n\t" 1186 "movq %%mm6, %%mm0 \n\t" 1187 "psubb %%mm7, %%mm6 \n\t" 1188 "push %%"FF_REG_a
" \n\t" 1189 "movd %%mm6, %%eax \n\t" 1190 "cmpb "MANGLE(deringThreshold)
", %%al \n\t" 1191 "pop %%"FF_REG_a
" \n\t" 1194 "punpcklbw %%mm7, %%mm7 \n\t" 1195 "punpcklbw %%mm7, %%mm7 \n\t" 1196 "punpcklbw %%mm7, %%mm7 \n\t" 1197 "movq %%mm7, (%4) \n\t" 1199 "movq (%0), %%mm0 \n\t" 1200 "movq %%mm0, %%mm1 \n\t" 1201 "movq %%mm0, %%mm2 \n\t" 1202 "psllq $8, %%mm1 \n\t" 1203 "psrlq $8, %%mm2 \n\t" 1204 "movd -4(%0), %%mm3 \n\t" 1205 "movd 8(%0), %%mm4 \n\t" 1206 "psrlq $24, %%mm3 \n\t" 1207 "psllq $56, %%mm4 \n\t" 1208 "por %%mm3, %%mm1 \n\t" 1209 "por %%mm4, %%mm2 \n\t" 1210 "movq %%mm1, %%mm3 \n\t" 1213 "psubusb %%mm7, %%mm0 \n\t" 1214 "psubusb %%mm7, %%mm2 \n\t" 1215 "psubusb %%mm7, %%mm3 \n\t" 1216 "pcmpeqb "MANGLE(b00)
", %%mm0 \n\t" 1217 "pcmpeqb "MANGLE(b00)
", %%mm2 \n\t" 1218 "pcmpeqb "MANGLE(b00)
", %%mm3 \n\t" 1219 "paddb %%mm2, %%mm0 \n\t" 1220 "paddb %%mm3, %%mm0 \n\t" 1222 "movq (%%"FF_REG_a
"), %%mm2 \n\t" 1223 "movq %%mm2, %%mm3 \n\t" 1224 "movq %%mm2, %%mm4 \n\t" 1225 "psllq $8, %%mm3 \n\t" 1226 "psrlq $8, %%mm4 \n\t" 1227 "movd -4(%%"FF_REG_a
"), %%mm5 \n\t" 1228 "movd 8(%%"FF_REG_a
"), %%mm6 \n\t" 1229 "psrlq $24, %%mm5 \n\t" 1230 "psllq $56, %%mm6 \n\t" 1231 "por %%mm5, %%mm3 \n\t" 1232 "por %%mm6, %%mm4 \n\t" 1233 "movq %%mm3, %%mm5 \n\t" 1236 "psubusb %%mm7, %%mm2 \n\t" 1237 "psubusb %%mm7, %%mm4 \n\t" 1238 "psubusb %%mm7, %%mm5 \n\t" 1239 "pcmpeqb "MANGLE(b00)
", %%mm2 \n\t" 1240 "pcmpeqb "MANGLE(b00)
", %%mm4 \n\t" 1241 "pcmpeqb "MANGLE(b00)
", %%mm5 \n\t" 1242 "paddb %%mm4, %%mm2 \n\t" 1243 "paddb %%mm5, %%mm2 \n\t" 1245 #define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \ 1246 "movq " #src ", " #sx " \n\t" \ 1247 "movq " #sx ", " #lx " \n\t" \ 1248 "movq " #sx ", " #t0 " \n\t" \ 1249 "psllq $8, " #lx " \n\t"\ 1250 "psrlq $8, " #t0 " \n\t"\ 1251 "movd -4" #src ", " #t1 " \n\t"\ 1252 "psrlq $24, " #t1 " \n\t"\ 1253 "por " #t1 ", " #lx " \n\t" \ 1254 "movd 8" #src ", " #t1 " \n\t"\ 1255 "psllq $56, " #t1 " \n\t"\ 1256 "por " #t1 ", " #t0 " \n\t" \ 1257 "movq " #lx ", " #t1 " \n\t" \ 1261 "movq " #lx ", 8(%4) \n\t"\ 1262 "movq (%4), " #lx " \n\t"\ 1263 "psubusb " #lx ", " #t1 " \n\t"\ 1264 "psubusb " #lx ", " #t0 " \n\t"\ 1265 "psubusb " #lx ", " #sx " \n\t"\ 1266 "movq "MANGLE(b00)", " #lx " \n\t"\ 1267 "pcmpeqb " #lx ", " #t1 " \n\t" \ 1268 "pcmpeqb " #lx ", " #t0 " \n\t" \ 1269 "pcmpeqb " #lx ", " #sx " \n\t" \ 1270 "paddb " #t1 ", " #t0 " \n\t"\ 1271 "paddb " #t0 ", " #sx " \n\t"\ 1274 "movq " #dst ", " #t0 " \n\t" \ 1275 "movq " #t0 ", " #t1 " \n\t" \ 1276 "psubusb %3, " #t0 " \n\t"\ 1277 "paddusb %3, " #t1 " \n\t"\ 1279 PMINUB(t1, pplx, t0)\ 1280 "paddb " #sx ", " #ppsx " \n\t"\ 1281 "paddb " #psx ", " #ppsx " \n\t"\ 1282 "#paddb "MANGLE(b02)", " #ppsx " \n\t"\ 1283 "pand "MANGLE(b08)", " #ppsx " \n\t"\ 1284 "pcmpeqb " #lx ", " #ppsx " \n\t"\ 1285 "pand " #ppsx ", " #pplx " \n\t"\ 1286 "pandn " #dst ", " #ppsx " \n\t"\ 1287 "por " #pplx ", " #ppsx " \n\t"\ 1288 "movq " #ppsx ", " #dst " \n\t"\ 1289 "movq 8(%4), " #lx " \n\t" 1291 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \ 1292 REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) 1309 DERING_CORE((%%FF_REGa) ,(%%FF_REGa, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
1310 DERING_CORE((%%FF_REGa, %1) ,(%%FF_REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1311 DERING_CORE((%%FF_REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
1312 DERING_CORE((%0, %1, 4) ,(%%FF_REGd) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
1313 DERING_CORE((%%FF_REGd) ,(%%FF_REGd, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1314 DERING_CORE((%%FF_REGd, %1) ,(%%FF_REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
1315 DERING_CORE((%%FF_REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
1316 DERING_CORE((%0, %1, 8) ,(%%FF_REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
1321 :
"%"FF_REG_a,
"%"FF_REG_d
1323 #else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) 1330 const int QP2=
c->QP/2 + 1;
1338 if(*p > max) max= *p;
1339 if(*p < min) min= *p;
1342 avg= (min + max + 1)>>1;
1344 if(max - min <deringThreshold)
return;
1346 for(y=0; y<10; y++){
1361 t &= (t<<1) & (t>>1);
1366 int t = s[y-1] & s[y] & s[y+1];
1380 +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1))
1384 #ifdef DEBUG_DERING_THRESHOLD 1385 __asm__ volatile(
"emms\n\t":);
1387 static uint64_t numPixels=0;
1388 if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
1393 static int numSkipped=0;
1394 static int errorSum=0;
1395 static int worstQP=0;
1396 static int worstRange=0;
1397 static int worstDiff=0;
1399 int absDiff=
FFABS(diff);
1402 if(x==1 || x==8 || y==1 || y==8)
continue;
1405 if(absDiff > worstDiff){
1408 worstRange= max-
min;
1412 if(1024LL*1024LL*1024LL % numSkipped == 0){
1414 "wRange:%d, wDiff:%d, relSkip:%1.3f\n",
1415 (
float)errorSum/numSkipped, numSkipped, worstQP, worstRange,
1416 worstDiff, (
float)numSkipped/numPixels);
1421 if (*p + QP2 < f) *p= *p + QP2;
1422 else if(*p - QP2 > f) *p= *p - QP2;
1427 #ifdef DEBUG_DERING_THRESHOLD 1435 *p =
FFMIN(*p + 20, 255);
1441 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1443 #endif //TEMPLATE_PP_ALTIVEC 1453 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1456 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1457 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_c
"\n\t" 1461 "movq (%0), %%mm0 \n\t" 1462 "movq (%%"FF_REG_a
", %1), %%mm1 \n\t" 1464 "movq %%mm0, (%%"FF_REG_a
") \n\t" 1465 "movq (%0, %1, 4), %%mm0 \n\t" 1467 "movq %%mm1, (%%"FF_REG_a
", %1, 2) \n\t" 1468 "movq (%%"FF_REG_c
", %1), %%mm1 \n\t" 1470 "movq %%mm0, (%%"FF_REG_c
") \n\t" 1471 "movq (%0, %1, 8), %%mm0 \n\t" 1473 "movq %%mm1, (%%"FF_REG_c
", %1, 2) \n\t" 1476 :
"%"FF_REG_a,
"%"FF_REG_c
1485 *(uint32_t*)&
src[
stride*1]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1487 *(uint32_t*)&
src[
stride*3]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1489 *(uint32_t*)&
src[
stride*5]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1491 *(uint32_t*)&
src[
stride*7]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1506 #if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1509 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1510 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1511 "lea (%%"FF_REG_d
", %1, 4), %%"FF_REG_c
"\n\t" 1512 "add %1, %%"FF_REG_c
" \n\t" 1513 #if TEMPLATE_PP_SSE2 1514 "pxor %%xmm7, %%xmm7 \n\t" 1515 #define REAL_DEINT_CUBIC(a,b,c,d,e)\ 1516 "movq " #a ", %%xmm0 \n\t"\ 1517 "movq " #b ", %%xmm1 \n\t"\ 1518 "movq " #d ", %%xmm2 \n\t"\ 1519 "movq " #e ", %%xmm3 \n\t"\ 1520 "pavgb %%xmm2, %%xmm1 \n\t"\ 1521 "pavgb %%xmm3, %%xmm0 \n\t"\ 1522 "punpcklbw %%xmm7, %%xmm0 \n\t"\ 1523 "punpcklbw %%xmm7, %%xmm1 \n\t"\ 1524 "psubw %%xmm1, %%xmm0 \n\t"\ 1525 "psraw $3, %%xmm0 \n\t"\ 1526 "psubw %%xmm0, %%xmm1 \n\t"\ 1527 "packuswb %%xmm1, %%xmm1 \n\t"\ 1528 "movlps %%xmm1, " #c " \n\t" 1529 #else //TEMPLATE_PP_SSE2 1530 "pxor %%mm7, %%mm7 \n\t" 1534 #define REAL_DEINT_CUBIC(a,b,c,d,e)\ 1535 "movq " #a ", %%mm0 \n\t"\ 1536 "movq " #b ", %%mm1 \n\t"\ 1537 "movq " #d ", %%mm2 \n\t"\ 1538 "movq " #e ", %%mm3 \n\t"\ 1539 PAVGB(%%mm2, %%mm1) \ 1540 PAVGB(%%mm3, %%mm0) \ 1541 "movq %%mm0, %%mm2 \n\t"\ 1542 "punpcklbw %%mm7, %%mm0 \n\t"\ 1543 "punpckhbw %%mm7, %%mm2 \n\t"\ 1544 "movq %%mm1, %%mm3 \n\t"\ 1545 "punpcklbw %%mm7, %%mm1 \n\t"\ 1546 "punpckhbw %%mm7, %%mm3 \n\t"\ 1547 "psubw %%mm1, %%mm0 \n\t" \ 1548 "psubw %%mm3, %%mm2 \n\t" \ 1549 "psraw $3, %%mm0 \n\t" \ 1550 "psraw $3, %%mm2 \n\t" \ 1551 "psubw %%mm0, %%mm1 \n\t" \ 1552 "psubw %%mm2, %%mm3 \n\t" \ 1553 "packuswb %%mm3, %%mm1 \n\t"\ 1554 "movq %%mm1, " #c " \n\t" 1555 #endif //TEMPLATE_PP_SSE2 1556 #define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e) 1558 DEINT_CUBIC((%0) , (%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4) , (%%FF_REGd, %1))
1559 DEINT_CUBIC((%%FF_REGa, %1), (%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1), (%0, %1, 8))
1560 DEINT_CUBIC((%0, %1, 4) , (%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGc))
1561 DEINT_CUBIC((%%FF_REGd, %1), (%0, %1, 8) , (%%FF_REGd, %1, 4), (%%FF_REGc) , (%%FF_REGc, %1, 2))
1566 XMM_CLOBBERS(
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm7",)
1568 "%"FF_REG_a,
"%"FF_REG_d,
"%"FF_REG_c
1570 #undef REAL_DEINT_CUBIC 1571 #else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1581 #endif //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1593 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1596 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1597 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1598 "pxor %%mm7, %%mm7 \n\t" 1599 "movq (%2), %%mm0 \n\t" 1603 #define REAL_DEINT_FF(a,b,c,d)\ 1604 "movq " #a ", %%mm1 \n\t"\ 1605 "movq " #b ", %%mm2 \n\t"\ 1606 "movq " #c ", %%mm3 \n\t"\ 1607 "movq " #d ", %%mm4 \n\t"\ 1608 PAVGB(%%mm3, %%mm1) \ 1609 PAVGB(%%mm4, %%mm0) \ 1610 "movq %%mm0, %%mm3 \n\t"\ 1611 "punpcklbw %%mm7, %%mm0 \n\t"\ 1612 "punpckhbw %%mm7, %%mm3 \n\t"\ 1613 "movq %%mm1, %%mm4 \n\t"\ 1614 "punpcklbw %%mm7, %%mm1 \n\t"\ 1615 "punpckhbw %%mm7, %%mm4 \n\t"\ 1616 "psllw $2, %%mm1 \n\t"\ 1617 "psllw $2, %%mm4 \n\t"\ 1618 "psubw %%mm0, %%mm1 \n\t"\ 1619 "psubw %%mm3, %%mm4 \n\t"\ 1620 "movq %%mm2, %%mm5 \n\t"\ 1621 "movq %%mm2, %%mm0 \n\t"\ 1622 "punpcklbw %%mm7, %%mm2 \n\t"\ 1623 "punpckhbw %%mm7, %%mm5 \n\t"\ 1624 "paddw %%mm2, %%mm1 \n\t"\ 1625 "paddw %%mm5, %%mm4 \n\t"\ 1626 "psraw $2, %%mm1 \n\t"\ 1627 "psraw $2, %%mm4 \n\t"\ 1628 "packuswb %%mm4, %%mm1 \n\t"\ 1629 "movq %%mm1, " #b " \n\t"\ 1631 #define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d) 1633 DEINT_FF((%0) , (%%FF_REGa) , (%%FF_REGa, %1), (%%FF_REGa, %1, 2))
1634 DEINT_FF((%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4) , (%%FF_REGd) )
1635 DEINT_FF((%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1), (%%FF_REGd, %1, 2))
1636 DEINT_FF((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGd, %1, 4))
1638 "movq %%mm0, (%2) \n\t" 1640 :
"%"FF_REG_a,
"%"FF_REG_d
1642 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1660 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1672 #if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 1675 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1676 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1677 "pxor %%mm7, %%mm7 \n\t" 1678 "movq (%2), %%mm0 \n\t" 1679 "movq (%3), %%mm1 \n\t" 1683 #define REAL_DEINT_L5(t1,t2,a,b,c)\ 1684 "movq " #a ", %%mm2 \n\t"\ 1685 "movq " #b ", %%mm3 \n\t"\ 1686 "movq " #c ", %%mm4 \n\t"\ 1689 "movq %%mm2, %%mm5 \n\t"\ 1690 "movq %%mm2, " #t1 " \n\t"\ 1691 "punpcklbw %%mm7, %%mm2 \n\t"\ 1692 "punpckhbw %%mm7, %%mm5 \n\t"\ 1693 "movq %%mm2, %%mm6 \n\t"\ 1694 "paddw %%mm2, %%mm2 \n\t"\ 1695 "paddw %%mm6, %%mm2 \n\t"\ 1696 "movq %%mm5, %%mm6 \n\t"\ 1697 "paddw %%mm5, %%mm5 \n\t"\ 1698 "paddw %%mm6, %%mm5 \n\t"\ 1699 "movq %%mm3, %%mm6 \n\t"\ 1700 "punpcklbw %%mm7, %%mm3 \n\t"\ 1701 "punpckhbw %%mm7, %%mm6 \n\t"\ 1702 "paddw %%mm3, %%mm3 \n\t"\ 1703 "paddw %%mm6, %%mm6 \n\t"\ 1704 "paddw %%mm3, %%mm2 \n\t"\ 1705 "paddw %%mm6, %%mm5 \n\t"\ 1706 "movq %%mm4, %%mm6 \n\t"\ 1707 "punpcklbw %%mm7, %%mm4 \n\t"\ 1708 "punpckhbw %%mm7, %%mm6 \n\t"\ 1709 "psubw %%mm4, %%mm2 \n\t"\ 1710 "psubw %%mm6, %%mm5 \n\t"\ 1711 "psraw $2, %%mm2 \n\t"\ 1712 "psraw $2, %%mm5 \n\t"\ 1713 "packuswb %%mm5, %%mm2 \n\t"\ 1714 "movq %%mm2, " #a " \n\t"\ 1716 #define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c) 1718 DEINT_L5(%%mm0, %%mm1, (%0) , (%%FF_REGa) , (%%FF_REGa, %1) )
1719 DEINT_L5(%%mm1, %%mm0, (%%FF_REGa) , (%%FF_REGa, %1) , (%%FF_REGa, %1, 2))
1720 DEINT_L5(%%mm0, %%mm1, (%%FF_REGa, %1) , (%%FF_REGa, %1, 2), (%0, %1, 4) )
1721 DEINT_L5(%%mm1, %%mm0, (%%FF_REGa, %1, 2), (%0, %1, 4) , (%%FF_REGd) )
1722 DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1) )
1723 DEINT_L5(%%mm1, %%mm0, (%%FF_REGd) , (%%FF_REGd, %1) , (%%FF_REGd, %1, 2))
1724 DEINT_L5(%%mm0, %%mm1, (%%FF_REGd, %1) , (%%FF_REGd, %1, 2), (%0, %1, 8) )
1725 DEINT_L5(%%mm1, %%mm0, (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGd, %1, 4))
1727 "movq %%mm0, (%2) \n\t" 1728 "movq %%mm1, (%3) \n\t" 1730 :
"%"FF_REG_a,
"%"FF_REG_d
1732 #else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 1761 #endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 1773 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1776 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1777 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1781 "movq (%2), %%mm0 \n\t" 1782 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 1784 "movq (%0), %%mm2 \n\t" 1786 "movq %%mm0, (%0) \n\t" 1787 "movq (%%"FF_REG_a
", %1), %%mm0 \n\t" 1790 "movq %%mm2, (%%"FF_REG_a
") \n\t" 1791 "movq (%%"FF_REG_a
", %1, 2), %%mm2 \n\t" 1794 "movq %%mm1, (%%"FF_REG_a
", %1) \n\t" 1795 "movq (%0, %1, 4), %%mm1 \n\t" 1798 "movq %%mm0, (%%"FF_REG_a
", %1, 2) \n\t" 1799 "movq (%%"FF_REG_d
"), %%mm0 \n\t" 1802 "movq %%mm2, (%0, %1, 4) \n\t" 1803 "movq (%%"FF_REG_d
", %1), %%mm2 \n\t" 1806 "movq %%mm1, (%%"FF_REG_d
") \n\t" 1807 "movq (%%"FF_REG_d
", %1, 2), %%mm1 \n\t" 1810 "movq %%mm0, (%%"FF_REG_d
", %1) \n\t" 1811 "movq (%0, %1, 8), %%mm0 \n\t" 1814 "movq %%mm2, (%%"FF_REG_d
", %1, 2) \n\t" 1815 "movq %%mm1, (%2) \n\t" 1818 :
"%"FF_REG_a,
"%"FF_REG_d
1820 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1828 a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
1829 *(uint32_t*)&
src[
stride*0]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1832 b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
1833 *(uint32_t*)&
src[
stride*1]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
1836 c= (b&
c) + (((b^c)&0xFEFEFEFEUL)>>1);
1837 *(uint32_t*)&
src[
stride*2]= (c|a) - (((c^
a)&0xFEFEFEFEUL)>>1);
1840 a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
1841 *(uint32_t*)&
src[
stride*3]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1844 b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
1845 *(uint32_t*)&
src[
stride*4]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
1848 c= (b&
c) + (((b^c)&0xFEFEFEFEUL)>>1);
1849 *(uint32_t*)&
src[
stride*5]= (c|a) - (((c^
a)&0xFEFEFEFEUL)>>1);
1852 a= (a&
c) + (((a^c)&0xFEFEFEFEUL)>>1);
1853 *(uint32_t*)&
src[
stride*6]= (a|b) - (((a^
b)&0xFEFEFEFEUL)>>1);
1856 b= (a&
b) + (((a^b)&0xFEFEFEFEUL)>>1);
1857 *(uint32_t*)&
src[
stride*7]= (c|b) - (((c^
b)&0xFEFEFEFEUL)>>1);
1863 #endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW 1876 #if TEMPLATE_PP_MMXEXT 1878 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1879 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1883 "movq (%0), %%mm0 \n\t" 1884 "movq (%%"FF_REG_a
", %1), %%mm2 \n\t" 1885 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 1886 "movq %%mm0, %%mm3 \n\t" 1887 "pmaxub %%mm1, %%mm0 \n\t" 1888 "pminub %%mm3, %%mm1 \n\t" 1889 "pmaxub %%mm2, %%mm1 \n\t" 1890 "pminub %%mm1, %%mm0 \n\t" 1891 "movq %%mm0, (%%"FF_REG_a
") \n\t" 1893 "movq (%0, %1, 4), %%mm0 \n\t" 1894 "movq (%%"FF_REG_a
", %1, 2), %%mm1 \n\t" 1895 "movq %%mm2, %%mm3 \n\t" 1896 "pmaxub %%mm1, %%mm2 \n\t" 1897 "pminub %%mm3, %%mm1 \n\t" 1898 "pmaxub %%mm0, %%mm1 \n\t" 1899 "pminub %%mm1, %%mm2 \n\t" 1900 "movq %%mm2, (%%"FF_REG_a
", %1, 2) \n\t" 1902 "movq (%%"FF_REG_d
"), %%mm2 \n\t" 1903 "movq (%%"FF_REG_d
", %1), %%mm1 \n\t" 1904 "movq %%mm2, %%mm3 \n\t" 1905 "pmaxub %%mm0, %%mm2 \n\t" 1906 "pminub %%mm3, %%mm0 \n\t" 1907 "pmaxub %%mm1, %%mm0 \n\t" 1908 "pminub %%mm0, %%mm2 \n\t" 1909 "movq %%mm2, (%%"FF_REG_d
") \n\t" 1911 "movq (%%"FF_REG_d
", %1, 2), %%mm2 \n\t" 1912 "movq (%0, %1, 8), %%mm0 \n\t" 1913 "movq %%mm2, %%mm3 \n\t" 1914 "pmaxub %%mm0, %%mm2 \n\t" 1915 "pminub %%mm3, %%mm0 \n\t" 1916 "pmaxub %%mm1, %%mm0 \n\t" 1917 "pminub %%mm0, %%mm2 \n\t" 1918 "movq %%mm2, (%%"FF_REG_d
", %1, 2) \n\t" 1922 :
"%"FF_REG_a,
"%"FF_REG_d
1925 #else // MMX without MMX2 1927 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1928 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_d
"\n\t" 1931 "pxor %%mm7, %%mm7 \n\t" 1933 #define REAL_MEDIAN(a,b,c)\ 1934 "movq " #a ", %%mm0 \n\t"\ 1935 "movq " #b ", %%mm2 \n\t"\ 1936 "movq " #c ", %%mm1 \n\t"\ 1937 "movq %%mm0, %%mm3 \n\t"\ 1938 "movq %%mm1, %%mm4 \n\t"\ 1939 "movq %%mm2, %%mm5 \n\t"\ 1940 "psubusb %%mm1, %%mm3 \n\t"\ 1941 "psubusb %%mm2, %%mm4 \n\t"\ 1942 "psubusb %%mm0, %%mm5 \n\t"\ 1943 "pcmpeqb %%mm7, %%mm3 \n\t"\ 1944 "pcmpeqb %%mm7, %%mm4 \n\t"\ 1945 "pcmpeqb %%mm7, %%mm5 \n\t"\ 1946 "movq %%mm3, %%mm6 \n\t"\ 1947 "pxor %%mm4, %%mm3 \n\t"\ 1948 "pxor %%mm5, %%mm4 \n\t"\ 1949 "pxor %%mm6, %%mm5 \n\t"\ 1950 "por %%mm3, %%mm1 \n\t"\ 1951 "por %%mm4, %%mm2 \n\t"\ 1952 "por %%mm5, %%mm0 \n\t"\ 1953 "pand %%mm2, %%mm0 \n\t"\ 1954 "pand %%mm1, %%mm0 \n\t"\ 1955 "movq %%mm0, " #b " \n\t" 1956 #define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c) 1958 MEDIAN((%0) , (%%FF_REGa) , (%%FF_REGa, %1))
1959 MEDIAN((%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4))
1960 MEDIAN((%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1))
1961 MEDIAN((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8))
1964 :
"%"FF_REG_a,
"%"FF_REG_d
1966 #endif //TEMPLATE_PP_MMXEXT 1967 #else //TEMPLATE_PP_MMX 1973 for (y=0; y<4; y++){
1974 int a,
b,
c, d, e,
f;
1981 colsrc[
stride ] = (a|(d^
f)) & (b|(d^e)) & (c|(e^
f));
1986 #endif //TEMPLATE_PP_MMX 1996 "lea (%0, %1), %%"FF_REG_a
" \n\t" 1999 "movq (%0), %%mm0 \n\t" 2000 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 2001 "movq %%mm0, %%mm2 \n\t" 2002 "punpcklbw %%mm1, %%mm0 \n\t" 2003 "punpckhbw %%mm1, %%mm2 \n\t" 2005 "movq (%%"FF_REG_a
", %1), %%mm1 \n\t" 2006 "movq (%%"FF_REG_a
", %1, 2), %%mm3 \n\t" 2007 "movq %%mm1, %%mm4 \n\t" 2008 "punpcklbw %%mm3, %%mm1 \n\t" 2009 "punpckhbw %%mm3, %%mm4 \n\t" 2011 "movq %%mm0, %%mm3 \n\t" 2012 "punpcklwd %%mm1, %%mm0 \n\t" 2013 "punpckhwd %%mm1, %%mm3 \n\t" 2014 "movq %%mm2, %%mm1 \n\t" 2015 "punpcklwd %%mm4, %%mm2 \n\t" 2016 "punpckhwd %%mm4, %%mm1 \n\t" 2018 "movd %%mm0, 128(%2) \n\t" 2019 "psrlq $32, %%mm0 \n\t" 2020 "movd %%mm0, 144(%2) \n\t" 2021 "movd %%mm3, 160(%2) \n\t" 2022 "psrlq $32, %%mm3 \n\t" 2023 "movd %%mm3, 176(%2) \n\t" 2024 "movd %%mm3, 48(%3) \n\t" 2025 "movd %%mm2, 192(%2) \n\t" 2026 "movd %%mm2, 64(%3) \n\t" 2027 "psrlq $32, %%mm2 \n\t" 2028 "movd %%mm2, 80(%3) \n\t" 2029 "movd %%mm1, 96(%3) \n\t" 2030 "psrlq $32, %%mm1 \n\t" 2031 "movd %%mm1, 112(%3) \n\t" 2033 "lea (%%"FF_REG_a
", %1, 4), %%"FF_REG_a
"\n\t" 2035 "movq (%0, %1, 4), %%mm0 \n\t" 2036 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 2037 "movq %%mm0, %%mm2 \n\t" 2038 "punpcklbw %%mm1, %%mm0 \n\t" 2039 "punpckhbw %%mm1, %%mm2 \n\t" 2041 "movq (%%"FF_REG_a
", %1), %%mm1 \n\t" 2042 "movq (%%"FF_REG_a
", %1, 2), %%mm3 \n\t" 2043 "movq %%mm1, %%mm4 \n\t" 2044 "punpcklbw %%mm3, %%mm1 \n\t" 2045 "punpckhbw %%mm3, %%mm4 \n\t" 2047 "movq %%mm0, %%mm3 \n\t" 2048 "punpcklwd %%mm1, %%mm0 \n\t" 2049 "punpckhwd %%mm1, %%mm3 \n\t" 2050 "movq %%mm2, %%mm1 \n\t" 2051 "punpcklwd %%mm4, %%mm2 \n\t" 2052 "punpckhwd %%mm4, %%mm1 \n\t" 2054 "movd %%mm0, 132(%2) \n\t" 2055 "psrlq $32, %%mm0 \n\t" 2056 "movd %%mm0, 148(%2) \n\t" 2057 "movd %%mm3, 164(%2) \n\t" 2058 "psrlq $32, %%mm3 \n\t" 2059 "movd %%mm3, 180(%2) \n\t" 2060 "movd %%mm3, 52(%3) \n\t" 2061 "movd %%mm2, 196(%2) \n\t" 2062 "movd %%mm2, 68(%3) \n\t" 2063 "psrlq $32, %%mm2 \n\t" 2064 "movd %%mm2, 84(%3) \n\t" 2065 "movd %%mm1, 100(%3) \n\t" 2066 "psrlq $32, %%mm1 \n\t" 2067 "movd %%mm1, 116(%3) \n\t" 2070 ::
"r" (
src),
"r" ((
x86_reg)srcStride),
"r" (dst1),
"r" (dst2)
2081 "lea (%0, %1), %%"FF_REG_a
" \n\t" 2082 "lea (%%"FF_REG_a
",%1,4), %%"FF_REG_d
" \n\t" 2085 "movq (%2), %%mm0 \n\t" 2086 "movq 16(%2), %%mm1 \n\t" 2087 "movq %%mm0, %%mm2 \n\t" 2088 "punpcklbw %%mm1, %%mm0 \n\t" 2089 "punpckhbw %%mm1, %%mm2 \n\t" 2091 "movq 32(%2), %%mm1 \n\t" 2092 "movq 48(%2), %%mm3 \n\t" 2093 "movq %%mm1, %%mm4 \n\t" 2094 "punpcklbw %%mm3, %%mm1 \n\t" 2095 "punpckhbw %%mm3, %%mm4 \n\t" 2097 "movq %%mm0, %%mm3 \n\t" 2098 "punpcklwd %%mm1, %%mm0 \n\t" 2099 "punpckhwd %%mm1, %%mm3 \n\t" 2100 "movq %%mm2, %%mm1 \n\t" 2101 "punpcklwd %%mm4, %%mm2 \n\t" 2102 "punpckhwd %%mm4, %%mm1 \n\t" 2104 "movd %%mm0, (%0) \n\t" 2105 "psrlq $32, %%mm0 \n\t" 2106 "movd %%mm0, (%%"FF_REG_a
") \n\t" 2107 "movd %%mm3, (%%"FF_REG_a
", %1) \n\t" 2108 "psrlq $32, %%mm3 \n\t" 2109 "movd %%mm3, (%%"FF_REG_a
", %1, 2) \n\t" 2110 "movd %%mm2, (%0, %1, 4) \n\t" 2111 "psrlq $32, %%mm2 \n\t" 2112 "movd %%mm2, (%%"FF_REG_d
") \n\t" 2113 "movd %%mm1, (%%"FF_REG_d
", %1) \n\t" 2114 "psrlq $32, %%mm1 \n\t" 2115 "movd %%mm1, (%%"FF_REG_d
", %1, 2) \n\t" 2118 "movq 64(%2), %%mm0 \n\t" 2119 "movq 80(%2), %%mm1 \n\t" 2120 "movq %%mm0, %%mm2 \n\t" 2121 "punpcklbw %%mm1, %%mm0 \n\t" 2122 "punpckhbw %%mm1, %%mm2 \n\t" 2124 "movq 96(%2), %%mm1 \n\t" 2125 "movq 112(%2), %%mm3 \n\t" 2126 "movq %%mm1, %%mm4 \n\t" 2127 "punpcklbw %%mm3, %%mm1 \n\t" 2128 "punpckhbw %%mm3, %%mm4 \n\t" 2130 "movq %%mm0, %%mm3 \n\t" 2131 "punpcklwd %%mm1, %%mm0 \n\t" 2132 "punpckhwd %%mm1, %%mm3 \n\t" 2133 "movq %%mm2, %%mm1 \n\t" 2134 "punpcklwd %%mm4, %%mm2 \n\t" 2135 "punpckhwd %%mm4, %%mm1 \n\t" 2137 "movd %%mm0, 4(%0) \n\t" 2138 "psrlq $32, %%mm0 \n\t" 2139 "movd %%mm0, 4(%%"FF_REG_a
") \n\t" 2140 "movd %%mm3, 4(%%"FF_REG_a
", %1) \n\t" 2141 "psrlq $32, %%mm3 \n\t" 2142 "movd %%mm3, 4(%%"FF_REG_a
", %1, 2) \n\t" 2143 "movd %%mm2, 4(%0, %1, 4) \n\t" 2144 "psrlq $32, %%mm2 \n\t" 2145 "movd %%mm2, 4(%%"FF_REG_d
") \n\t" 2146 "movd %%mm1, 4(%%"FF_REG_d
", %1) \n\t" 2147 "psrlq $32, %%mm1 \n\t" 2148 "movd %%mm1, 4(%%"FF_REG_d
", %1, 2) \n\t" 2150 ::
"r" (dst),
"r" ((
x86_reg)dstStride),
"r" (
src)
2151 :
"%"FF_REG_a,
"%"FF_REG_d
2154 #endif //TEMPLATE_PP_MMX 2157 #if !TEMPLATE_PP_ALTIVEC 2159 uint8_t *tempBlurred, uint32_t *tempBlurredPast,
const int *maxNoise)
2162 tempBlurredPast[127]= maxNoise[0];
2163 tempBlurredPast[128]= maxNoise[1];
2164 tempBlurredPast[129]= maxNoise[2];
2166 #define FAST_L2_DIFF 2168 #if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 2170 "lea (%2, %2, 2), %%"FF_REG_a
" \n\t" 2171 "lea (%2, %2, 4), %%"FF_REG_d
" \n\t" 2172 "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t" 2176 #ifdef L1_DIFF //needs mmx2 2177 "movq (%0), %%mm0 \n\t" 2178 "psadbw (%1), %%mm0 \n\t" 2179 "movq (%0, %2), %%mm1 \n\t" 2180 "psadbw (%1, %2), %%mm1 \n\t" 2181 "movq (%0, %2, 2), %%mm2 \n\t" 2182 "psadbw (%1, %2, 2), %%mm2 \n\t" 2183 "movq (%0, %%"FF_REG_a
"), %%mm3 \n\t" 2184 "psadbw (%1, %%"FF_REG_a
"), %%mm3 \n\t" 2186 "movq (%0, %2, 4), %%mm4 \n\t" 2187 "paddw %%mm1, %%mm0 \n\t" 2188 "psadbw (%1, %2, 4), %%mm4 \n\t" 2189 "movq (%0, %%"FF_REG_d
"), %%mm5 \n\t" 2190 "paddw %%mm2, %%mm0 \n\t" 2191 "psadbw (%1, %%"FF_REG_d
"), %%mm5 \n\t" 2192 "movq (%0, %%"FF_REG_a
", 2), %%mm6 \n\t" 2193 "paddw %%mm3, %%mm0 \n\t" 2194 "psadbw (%1, %%"FF_REG_a
", 2), %%mm6 \n\t" 2195 "movq (%0, %%"FF_REG_c
"), %%mm7 \n\t" 2196 "paddw %%mm4, %%mm0 \n\t" 2197 "psadbw (%1, %%"FF_REG_c
"), %%mm7 \n\t" 2198 "paddw %%mm5, %%mm6 \n\t" 2199 "paddw %%mm7, %%mm6 \n\t" 2200 "paddw %%mm6, %%mm0 \n\t" 2202 #if defined (FAST_L2_DIFF) 2203 "pcmpeqb %%mm7, %%mm7 \n\t" 2204 "movq "MANGLE(b80)
", %%mm6 \n\t" 2205 "pxor %%mm0, %%mm0 \n\t" 2206 #define REAL_L2_DIFF_CORE(a, b)\ 2207 "movq " #a ", %%mm5 \n\t"\ 2208 "movq " #b ", %%mm2 \n\t"\ 2209 "pxor %%mm7, %%mm2 \n\t"\ 2210 PAVGB(%%mm2, %%mm5)\ 2211 "paddb %%mm6, %%mm5 \n\t"\ 2212 "movq %%mm5, %%mm2 \n\t"\ 2213 "psllw $8, %%mm5 \n\t"\ 2214 "pmaddwd %%mm5, %%mm5 \n\t"\ 2215 "pmaddwd %%mm2, %%mm2 \n\t"\ 2216 "paddd %%mm2, %%mm5 \n\t"\ 2217 "psrld $14, %%mm5 \n\t"\ 2218 "paddd %%mm5, %%mm0 \n\t" 2220 #else //defined (FAST_L2_DIFF) 2221 "pxor %%mm7, %%mm7 \n\t" 2222 "pxor %%mm0, %%mm0 \n\t" 2223 #define REAL_L2_DIFF_CORE(a, b)\ 2224 "movq " #a ", %%mm5 \n\t"\ 2225 "movq " #b ", %%mm2 \n\t"\ 2226 "movq %%mm5, %%mm1 \n\t"\ 2227 "movq %%mm2, %%mm3 \n\t"\ 2228 "punpcklbw %%mm7, %%mm5 \n\t"\ 2229 "punpckhbw %%mm7, %%mm1 \n\t"\ 2230 "punpcklbw %%mm7, %%mm2 \n\t"\ 2231 "punpckhbw %%mm7, %%mm3 \n\t"\ 2232 "psubw %%mm2, %%mm5 \n\t"\ 2233 "psubw %%mm3, %%mm1 \n\t"\ 2234 "pmaddwd %%mm5, %%mm5 \n\t"\ 2235 "pmaddwd %%mm1, %%mm1 \n\t"\ 2236 "paddd %%mm1, %%mm5 \n\t"\ 2237 "paddd %%mm5, %%mm0 \n\t" 2239 #endif //defined (FAST_L2_DIFF) 2241 #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b) 2243 L2_DIFF_CORE((%0) , (%1))
2244 L2_DIFF_CORE((%0, %2) , (%1, %2))
2245 L2_DIFF_CORE((%0, %2, 2) , (%1, %2, 2))
2246 L2_DIFF_CORE((%0, %%FF_REGa) , (%1, %%FF_REGa))
2247 L2_DIFF_CORE((%0, %2, 4) , (%1, %2, 4))
2248 L2_DIFF_CORE((%0, %%FF_REGd) , (%1, %%FF_REGd))
2249 L2_DIFF_CORE((%0, %%FF_REGa,2), (%1, %%FF_REGa,2))
2250 L2_DIFF_CORE((%0, %%FF_REGc) , (%1, %%FF_REGc))
2254 "movq %%mm0, %%mm4 \n\t" 2255 "psrlq $32, %%mm0 \n\t" 2256 "paddd %%mm0, %%mm4 \n\t" 2257 "movd %%mm4, %%ecx \n\t" 2258 "shll $2, %%ecx \n\t" 2259 "mov %3, %%"FF_REG_d
" \n\t" 2260 "addl -4(%%"FF_REG_d
"), %%ecx \n\t" 2261 "addl 4(%%"FF_REG_d
"), %%ecx \n\t" 2262 "addl -1024(%%"FF_REG_d
"), %%ecx \n\t" 2263 "addl $4, %%ecx \n\t" 2264 "addl 1024(%%"FF_REG_d
"), %%ecx \n\t" 2265 "shrl $3, %%ecx \n\t" 2266 "movl %%ecx, (%%"FF_REG_d
") \n\t" 2271 "cmpl 512(%%"FF_REG_d
"), %%ecx \n\t" 2273 "cmpl 516(%%"FF_REG_d
"), %%ecx \n\t" 2276 "lea (%%"FF_REG_a
", %2, 2), %%"FF_REG_d
"\n\t" 2277 "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t" 2278 "movq (%0), %%mm0 \n\t" 2279 "movq (%0, %2), %%mm1 \n\t" 2280 "movq (%0, %2, 2), %%mm2 \n\t" 2281 "movq (%0, %%"FF_REG_a
"), %%mm3 \n\t" 2282 "movq (%0, %2, 4), %%mm4 \n\t" 2283 "movq (%0, %%"FF_REG_d
"), %%mm5 \n\t" 2284 "movq (%0, %%"FF_REG_a
", 2), %%mm6 \n\t" 2285 "movq (%0, %%"FF_REG_c
"), %%mm7 \n\t" 2286 "movq %%mm0, (%1) \n\t" 2287 "movq %%mm1, (%1, %2) \n\t" 2288 "movq %%mm2, (%1, %2, 2) \n\t" 2289 "movq %%mm3, (%1, %%"FF_REG_a
") \n\t" 2290 "movq %%mm4, (%1, %2, 4) \n\t" 2291 "movq %%mm5, (%1, %%"FF_REG_d
") \n\t" 2292 "movq %%mm6, (%1, %%"FF_REG_a
", 2) \n\t" 2293 "movq %%mm7, (%1, %%"FF_REG_c
") \n\t" 2297 "lea (%%"FF_REG_a
", %2, 2), %%"FF_REG_d
"\n\t" 2298 "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t" 2299 "movq (%0), %%mm0 \n\t" 2301 "movq (%0, %2), %%mm1 \n\t" 2302 PAVGB((%1, %2), %%mm1)
2303 "movq (%0, %2, 2), %%mm2 \n\t" 2304 PAVGB((%1, %2, 2), %%mm2)
2305 "movq (%0, %%"FF_REG_a
"), %%mm3 \n\t" 2306 PAVGB((%1, %%FF_REGa), %%mm3)
2307 "movq (%0, %2, 4), %%mm4 \n\t" 2308 PAVGB((%1, %2, 4), %%mm4)
2309 "movq (%0, %%"FF_REG_d
"), %%mm5 \n\t" 2310 PAVGB((%1, %%FF_REGd), %%mm5)
2311 "movq (%0, %%"FF_REG_a
", 2), %%mm6 \n\t" 2312 PAVGB((%1, %%FF_REGa, 2), %%mm6)
2313 "movq (%0, %%"FF_REG_c
"), %%mm7 \n\t" 2314 PAVGB((%1, %%FF_REGc), %%mm7)
2315 "movq %%mm0, (%1) \n\t" 2316 "movq %%mm1, (%1, %2) \n\t" 2317 "movq %%mm2, (%1, %2, 2) \n\t" 2318 "movq %%mm3, (%1, %%"FF_REG_a
") \n\t" 2319 "movq %%mm4, (%1, %2, 4) \n\t" 2320 "movq %%mm5, (%1, %%"FF_REG_d
") \n\t" 2321 "movq %%mm6, (%1, %%"FF_REG_a
", 2) \n\t" 2322 "movq %%mm7, (%1, %%"FF_REG_c
") \n\t" 2323 "movq %%mm0, (%0) \n\t" 2324 "movq %%mm1, (%0, %2) \n\t" 2325 "movq %%mm2, (%0, %2, 2) \n\t" 2326 "movq %%mm3, (%0, %%"FF_REG_a
") \n\t" 2327 "movq %%mm4, (%0, %2, 4) \n\t" 2328 "movq %%mm5, (%0, %%"FF_REG_d
") \n\t" 2329 "movq %%mm6, (%0, %%"FF_REG_a
", 2) \n\t" 2330 "movq %%mm7, (%0, %%"FF_REG_c
") \n\t" 2334 "cmpl 508(%%"FF_REG_d
"), %%ecx \n\t" 2337 "lea (%%"FF_REG_a
", %2, 2), %%"FF_REG_d
"\n\t" 2338 "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t" 2339 "movq (%0), %%mm0 \n\t" 2340 "movq (%0, %2), %%mm1 \n\t" 2341 "movq (%0, %2, 2), %%mm2 \n\t" 2342 "movq (%0, %%"FF_REG_a
"), %%mm3 \n\t" 2343 "movq (%1), %%mm4 \n\t" 2344 "movq (%1, %2), %%mm5 \n\t" 2345 "movq (%1, %2, 2), %%mm6 \n\t" 2346 "movq (%1, %%"FF_REG_a
"), %%mm7 \n\t" 2355 "movq %%mm0, (%1) \n\t" 2356 "movq %%mm1, (%1, %2) \n\t" 2357 "movq %%mm2, (%1, %2, 2) \n\t" 2358 "movq %%mm3, (%1, %%"FF_REG_a
") \n\t" 2359 "movq %%mm0, (%0) \n\t" 2360 "movq %%mm1, (%0, %2) \n\t" 2361 "movq %%mm2, (%0, %2, 2) \n\t" 2362 "movq %%mm3, (%0, %%"FF_REG_a
") \n\t" 2364 "movq (%0, %2, 4), %%mm0 \n\t" 2365 "movq (%0, %%"FF_REG_d
"), %%mm1 \n\t" 2366 "movq (%0, %%"FF_REG_a
", 2), %%mm2 \n\t" 2367 "movq (%0, %%"FF_REG_c
"), %%mm3 \n\t" 2368 "movq (%1, %2, 4), %%mm4 \n\t" 2369 "movq (%1, %%"FF_REG_d
"), %%mm5 \n\t" 2370 "movq (%1, %%"FF_REG_a
", 2), %%mm6 \n\t" 2371 "movq (%1, %%"FF_REG_c
"), %%mm7 \n\t" 2380 "movq %%mm0, (%1, %2, 4) \n\t" 2381 "movq %%mm1, (%1, %%"FF_REG_d
") \n\t" 2382 "movq %%mm2, (%1, %%"FF_REG_a
", 2) \n\t" 2383 "movq %%mm3, (%1, %%"FF_REG_c
") \n\t" 2384 "movq %%mm0, (%0, %2, 4) \n\t" 2385 "movq %%mm1, (%0, %%"FF_REG_d
") \n\t" 2386 "movq %%mm2, (%0, %%"FF_REG_a
", 2) \n\t" 2387 "movq %%mm3, (%0, %%"FF_REG_c
") \n\t" 2391 "lea (%%"FF_REG_a
", %2, 2), %%"FF_REG_d
"\n\t" 2392 "lea (%%"FF_REG_d
", %2, 2), %%"FF_REG_c
"\n\t" 2393 "movq (%0), %%mm0 \n\t" 2394 "movq (%0, %2), %%mm1 \n\t" 2395 "movq (%0, %2, 2), %%mm2 \n\t" 2396 "movq (%0, %%"FF_REG_a
"), %%mm3 \n\t" 2397 "movq (%1), %%mm4 \n\t" 2398 "movq (%1, %2), %%mm5 \n\t" 2399 "movq (%1, %2, 2), %%mm6 \n\t" 2400 "movq (%1, %%"FF_REG_a
"), %%mm7 \n\t" 2413 "movq %%mm0, (%1) \n\t" 2414 "movq %%mm1, (%1, %2) \n\t" 2415 "movq %%mm2, (%1, %2, 2) \n\t" 2416 "movq %%mm3, (%1, %%"FF_REG_a
") \n\t" 2417 "movq %%mm0, (%0) \n\t" 2418 "movq %%mm1, (%0, %2) \n\t" 2419 "movq %%mm2, (%0, %2, 2) \n\t" 2420 "movq %%mm3, (%0, %%"FF_REG_a
") \n\t" 2422 "movq (%0, %2, 4), %%mm0 \n\t" 2423 "movq (%0, %%"FF_REG_d
"), %%mm1 \n\t" 2424 "movq (%0, %%"FF_REG_a
", 2), %%mm2 \n\t" 2425 "movq (%0, %%"FF_REG_c
"), %%mm3 \n\t" 2426 "movq (%1, %2, 4), %%mm4 \n\t" 2427 "movq (%1, %%"FF_REG_d
"), %%mm5 \n\t" 2428 "movq (%1, %%"FF_REG_a
", 2), %%mm6 \n\t" 2429 "movq (%1, %%"FF_REG_c
"), %%mm7 \n\t" 2442 "movq %%mm0, (%1, %2, 4) \n\t" 2443 "movq %%mm1, (%1, %%"FF_REG_d
") \n\t" 2444 "movq %%mm2, (%1, %%"FF_REG_a
", 2) \n\t" 2445 "movq %%mm3, (%1, %%"FF_REG_c
") \n\t" 2446 "movq %%mm0, (%0, %2, 4) \n\t" 2447 "movq %%mm1, (%0, %%"FF_REG_d
") \n\t" 2448 "movq %%mm2, (%0, %%"FF_REG_a
", 2) \n\t" 2449 "movq %%mm3, (%0, %%"FF_REG_c
") \n\t" 2453 ::
"r" (
src),
"r" (tempBlurred),
"r"((
x86_reg)
stride),
"m" (tempBlurredPast)
2455 :
"%"FF_REG_a,
"%"FF_REG_d,
"%"FF_REG_c,
"memory" 2457 #else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 2480 +(*(tempBlurredPast-256))
2481 +(*(tempBlurredPast-1))+ (*(tempBlurredPast+1))
2482 +(*(tempBlurredPast+256))
2494 if(d > maxNoise[1]){
2495 if(d < maxNoise[2]){
2501 tempBlurred[ x + y*
stride ]=
2515 if(d < maxNoise[0]){
2521 tempBlurred[ x + y*
stride ]=
2523 (ref*7 + cur + 4)>>3;
2532 tempBlurred[ x + y*
stride ]=
2534 (ref*3 + cur + 2)>>2;
2540 #endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS 2542 #endif //TEMPLATE_PP_ALTIVEC 2549 int64_t dc_mask, eq_mask, both_masks;
2550 int64_t sums[10*8*2];
2554 "movq %0, %%mm7 \n\t" 2555 "movq %1, %%mm6 \n\t" 2556 : :
"m" (
c->mmxDcOffset[
c->nonBQP]),
"m" (
c->mmxDcThreshold[
c->nonBQP])
2560 "lea (%2, %3), %%"FF_REG_a
" \n\t" 2564 "movq (%2), %%mm0 \n\t" 2565 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 2566 "movq %%mm1, %%mm3 \n\t" 2567 "movq %%mm1, %%mm4 \n\t" 2568 "psubb %%mm1, %%mm0 \n\t" 2569 "paddb %%mm7, %%mm0 \n\t" 2570 "pcmpgtb %%mm6, %%mm0 \n\t" 2572 "movq (%%"FF_REG_a
",%3), %%mm2 \n\t" 2573 PMAXUB(%%mm2, %%mm4)
2574 PMINUB(%%mm2, %%mm3, %%mm5)
2575 "psubb %%mm2, %%mm1 \n\t" 2576 "paddb %%mm7, %%mm1 \n\t" 2577 "pcmpgtb %%mm6, %%mm1 \n\t" 2578 "paddb %%mm1, %%mm0 \n\t" 2580 "movq (%%"FF_REG_a
", %3, 2), %%mm1 \n\t" 2581 PMAXUB(%%mm1, %%mm4)
2582 PMINUB(%%mm1, %%mm3, %%mm5)
2583 "psubb %%mm1, %%mm2 \n\t" 2584 "paddb %%mm7, %%mm2 \n\t" 2585 "pcmpgtb %%mm6, %%mm2 \n\t" 2586 "paddb %%mm2, %%mm0 \n\t" 2588 "lea (%%"FF_REG_a
", %3, 4), %%"FF_REG_a
"\n\t" 2590 "movq (%2, %3, 4), %%mm2 \n\t" 2591 PMAXUB(%%mm2, %%mm4)
2592 PMINUB(%%mm2, %%mm3, %%mm5)
2593 "psubb %%mm2, %%mm1 \n\t" 2594 "paddb %%mm7, %%mm1 \n\t" 2595 "pcmpgtb %%mm6, %%mm1 \n\t" 2596 "paddb %%mm1, %%mm0 \n\t" 2598 "movq (%%"FF_REG_a
"), %%mm1 \n\t" 2599 PMAXUB(%%mm1, %%mm4)
2600 PMINUB(%%mm1, %%mm3, %%mm5)
2601 "psubb %%mm1, %%mm2 \n\t" 2602 "paddb %%mm7, %%mm2 \n\t" 2603 "pcmpgtb %%mm6, %%mm2 \n\t" 2604 "paddb %%mm2, %%mm0 \n\t" 2606 "movq (%%"FF_REG_a
", %3), %%mm2 \n\t" 2607 PMAXUB(%%mm2, %%mm4)
2608 PMINUB(%%mm2, %%mm3, %%mm5)
2609 "psubb %%mm2, %%mm1 \n\t" 2610 "paddb %%mm7, %%mm1 \n\t" 2611 "pcmpgtb %%mm6, %%mm1 \n\t" 2612 "paddb %%mm1, %%mm0 \n\t" 2614 "movq (%%"FF_REG_a
", %3, 2), %%mm1 \n\t" 2615 PMAXUB(%%mm1, %%mm4)
2616 PMINUB(%%mm1, %%mm3, %%mm5)
2617 "psubb %%mm1, %%mm2 \n\t" 2618 "paddb %%mm7, %%mm2 \n\t" 2619 "pcmpgtb %%mm6, %%mm2 \n\t" 2620 "paddb %%mm2, %%mm0 \n\t" 2622 "movq (%2, %3, 8), %%mm2 \n\t" 2623 PMAXUB(%%mm2, %%mm4)
2624 PMINUB(%%mm2, %%mm3, %%mm5)
2625 "psubb %%mm2, %%mm1 \n\t" 2626 "paddb %%mm7, %%mm1 \n\t" 2627 "pcmpgtb %%mm6, %%mm1 \n\t" 2628 "paddb %%mm1, %%mm0 \n\t" 2630 "movq (%%"FF_REG_a
", %3, 4), %%mm1 \n\t" 2631 "psubb %%mm1, %%mm2 \n\t" 2632 "paddb %%mm7, %%mm2 \n\t" 2633 "pcmpgtb %%mm6, %%mm2 \n\t" 2634 "paddb %%mm2, %%mm0 \n\t" 2635 "psubusb %%mm3, %%mm4 \n\t" 2637 "pxor %%mm6, %%mm6 \n\t" 2638 "movq %4, %%mm7 \n\t" 2639 "paddusb %%mm7, %%mm7 \n\t" 2640 "psubusb %%mm4, %%mm7 \n\t" 2641 "pcmpeqb %%mm6, %%mm7 \n\t" 2642 "pcmpeqb %%mm6, %%mm7 \n\t" 2643 "movq %%mm7, %1 \n\t" 2645 "movq %5, %%mm7 \n\t" 2646 "punpcklbw %%mm7, %%mm7 \n\t" 2647 "punpcklbw %%mm7, %%mm7 \n\t" 2648 "punpcklbw %%mm7, %%mm7 \n\t" 2649 "psubb %%mm0, %%mm6 \n\t" 2650 "pcmpgtb %%mm7, %%mm6 \n\t" 2651 "movq %%mm6, %0 \n\t" 2653 :
"=m" (eq_mask),
"=m" (dc_mask)
2654 :
"r" (
src),
"r" ((
x86_reg)
step),
"m" (
c->pQPb),
"m"(
c->ppMode.flatnessThreshold)
2658 both_masks = dc_mask & eq_mask;
2662 int64_t *temp_sums= sums;
2665 "movq %2, %%mm0 \n\t" 2666 "pxor %%mm4, %%mm4 \n\t" 2668 "movq (%0), %%mm6 \n\t" 2669 "movq (%0, %1), %%mm5 \n\t" 2670 "movq %%mm5, %%mm1 \n\t" 2671 "movq %%mm6, %%mm2 \n\t" 2672 "psubusb %%mm6, %%mm5 \n\t" 2673 "psubusb %%mm1, %%mm2 \n\t" 2674 "por %%mm5, %%mm2 \n\t" 2675 "psubusb %%mm2, %%mm0 \n\t" 2676 "pcmpeqb %%mm4, %%mm0 \n\t" 2678 "pxor %%mm6, %%mm1 \n\t" 2679 "pand %%mm0, %%mm1 \n\t" 2680 "pxor %%mm1, %%mm6 \n\t" 2683 "movq (%0, %1, 8), %%mm5 \n\t" 2685 "movq (%0, %1, 8), %%mm7 \n\t" 2686 "movq %%mm5, %%mm1 \n\t" 2687 "movq %%mm7, %%mm2 \n\t" 2688 "psubusb %%mm7, %%mm5 \n\t" 2689 "psubusb %%mm1, %%mm2 \n\t" 2690 "por %%mm5, %%mm2 \n\t" 2691 "movq %2, %%mm0 \n\t" 2692 "psubusb %%mm2, %%mm0 \n\t" 2693 "pcmpeqb %%mm4, %%mm0 \n\t" 2695 "pxor %%mm7, %%mm1 \n\t" 2696 "pand %%mm0, %%mm1 \n\t" 2697 "pxor %%mm1, %%mm7 \n\t" 2699 "movq %%mm6, %%mm5 \n\t" 2700 "punpckhbw %%mm4, %%mm6 \n\t" 2701 "punpcklbw %%mm4, %%mm5 \n\t" 2704 "movq %%mm5, %%mm0 \n\t" 2705 "movq %%mm6, %%mm1 \n\t" 2706 "psllw $2, %%mm0 \n\t" 2707 "psllw $2, %%mm1 \n\t" 2708 "paddw "MANGLE(w04)
", %%mm0 \n\t" 2709 "paddw "MANGLE(w04)
", %%mm1 \n\t" 2712 "movq (%0), %%mm2 \n\t"\ 2713 "movq (%0), %%mm3 \n\t"\ 2715 "punpcklbw %%mm4, %%mm2 \n\t"\ 2716 "punpckhbw %%mm4, %%mm3 \n\t"\ 2717 "paddw %%mm2, %%mm0 \n\t"\ 2718 "paddw %%mm3, %%mm1 \n\t" 2721 "movq (%0), %%mm2 \n\t"\ 2722 "movq (%0), %%mm3 \n\t"\ 2724 "punpcklbw %%mm4, %%mm2 \n\t"\ 2725 "punpckhbw %%mm4, %%mm3 \n\t"\ 2726 "psubw %%mm2, %%mm0 \n\t"\ 2727 "psubw %%mm3, %%mm1 \n\t" 2733 "movq %%mm0, (%3) \n\t" 2734 "movq %%mm1, 8(%3) \n\t" 2737 "psubw %%mm5, %%mm0 \n\t" 2738 "psubw %%mm6, %%mm1 \n\t" 2739 "movq %%mm0, 16(%3) \n\t" 2740 "movq %%mm1, 24(%3) \n\t" 2743 "psubw %%mm5, %%mm0 \n\t" 2744 "psubw %%mm6, %%mm1 \n\t" 2745 "movq %%mm0, 32(%3) \n\t" 2746 "movq %%mm1, 40(%3) \n\t" 2749 "psubw %%mm5, %%mm0 \n\t" 2750 "psubw %%mm6, %%mm1 \n\t" 2751 "movq %%mm0, 48(%3) \n\t" 2752 "movq %%mm1, 56(%3) \n\t" 2755 "psubw %%mm5, %%mm0 \n\t" 2756 "psubw %%mm6, %%mm1 \n\t" 2757 "movq %%mm0, 64(%3) \n\t" 2758 "movq %%mm1, 72(%3) \n\t" 2760 "movq %%mm7, %%mm6 \n\t" 2761 "punpckhbw %%mm4, %%mm7 \n\t" 2762 "punpcklbw %%mm4, %%mm6 \n\t" 2768 "movq %%mm0, 80(%3) \n\t" 2769 "movq %%mm1, 88(%3) \n\t" 2772 "paddw %%mm6, %%mm0 \n\t" 2773 "paddw %%mm7, %%mm1 \n\t" 2774 "movq %%mm0, 96(%3) \n\t" 2775 "movq %%mm1, 104(%3) \n\t" 2778 "paddw %%mm6, %%mm0 \n\t" 2779 "paddw %%mm7, %%mm1 \n\t" 2780 "movq %%mm0, 112(%3) \n\t" 2781 "movq %%mm1, 120(%3) \n\t" 2784 "paddw %%mm6, %%mm0 \n\t" 2785 "paddw %%mm7, %%mm1 \n\t" 2786 "movq %%mm0, 128(%3) \n\t" 2787 "movq %%mm1, 136(%3) \n\t" 2790 "paddw %%mm6, %%mm0 \n\t" 2791 "paddw %%mm7, %%mm1 \n\t" 2792 "movq %%mm0, 144(%3) \n\t" 2793 "movq %%mm1, 152(%3) \n\t" 2805 "movq %4, %%mm6 \n\t" 2806 "pcmpeqb %%mm5, %%mm5 \n\t" 2807 "pxor %%mm6, %%mm5 \n\t" 2808 "pxor %%mm7, %%mm7 \n\t" 2811 "movq (%1), %%mm0 \n\t" 2812 "movq 8(%1), %%mm1 \n\t" 2813 "paddw 32(%1), %%mm0 \n\t" 2814 "paddw 40(%1), %%mm1 \n\t" 2815 "movq (%0, %3), %%mm2 \n\t" 2816 "movq %%mm2, %%mm3 \n\t" 2817 "movq %%mm2, %%mm4 \n\t" 2818 "punpcklbw %%mm7, %%mm2 \n\t" 2819 "punpckhbw %%mm7, %%mm3 \n\t" 2820 "paddw %%mm2, %%mm0 \n\t" 2821 "paddw %%mm3, %%mm1 \n\t" 2822 "paddw %%mm2, %%mm0 \n\t" 2823 "paddw %%mm3, %%mm1 \n\t" 2824 "psrlw $4, %%mm0 \n\t" 2825 "psrlw $4, %%mm1 \n\t" 2826 "packuswb %%mm1, %%mm0 \n\t" 2827 "pand %%mm6, %%mm0 \n\t" 2828 "pand %%mm5, %%mm4 \n\t" 2829 "por %%mm4, %%mm0 \n\t" 2830 "movq %%mm0, (%0, %3) \n\t" 2835 :
"+r"(
offset),
"+r"(temp_sums)
2841 if(eq_mask != -1LL){
2845 "pxor %%mm7, %%mm7 \n\t" 2849 "movq (%0), %%mm0 \n\t" 2850 "movq %%mm0, %%mm1 \n\t" 2851 "punpcklbw %%mm7, %%mm0 \n\t" 2852 "punpckhbw %%mm7, %%mm1 \n\t" 2854 "movq (%0, %1), %%mm2 \n\t" 2855 "lea (%0, %1, 2), %%"FF_REG_a
" \n\t" 2856 "movq %%mm2, %%mm3 \n\t" 2857 "punpcklbw %%mm7, %%mm2 \n\t" 2858 "punpckhbw %%mm7, %%mm3 \n\t" 2860 "movq (%%"FF_REG_a
"), %%mm4 \n\t" 2861 "movq %%mm4, %%mm5 \n\t" 2862 "punpcklbw %%mm7, %%mm4 \n\t" 2863 "punpckhbw %%mm7, %%mm5 \n\t" 2865 "paddw %%mm0, %%mm0 \n\t" 2866 "paddw %%mm1, %%mm1 \n\t" 2867 "psubw %%mm4, %%mm2 \n\t" 2868 "psubw %%mm5, %%mm3 \n\t" 2869 "psubw %%mm2, %%mm0 \n\t" 2870 "psubw %%mm3, %%mm1 \n\t" 2872 "psllw $2, %%mm2 \n\t" 2873 "psllw $2, %%mm3 \n\t" 2874 "psubw %%mm2, %%mm0 \n\t" 2875 "psubw %%mm3, %%mm1 \n\t" 2877 "movq (%%"FF_REG_a
", %1), %%mm2 \n\t" 2878 "movq %%mm2, %%mm3 \n\t" 2879 "punpcklbw %%mm7, %%mm2 \n\t" 2880 "punpckhbw %%mm7, %%mm3 \n\t" 2882 "psubw %%mm2, %%mm0 \n\t" 2883 "psubw %%mm3, %%mm1 \n\t" 2884 "psubw %%mm2, %%mm0 \n\t" 2885 "psubw %%mm3, %%mm1 \n\t" 2886 "movq %%mm0, (%4) \n\t" 2887 "movq %%mm1, 8(%4) \n\t" 2889 "movq (%%"FF_REG_a
", %1, 2), %%mm0 \n\t" 2890 "movq %%mm0, %%mm1 \n\t" 2891 "punpcklbw %%mm7, %%mm0 \n\t" 2892 "punpckhbw %%mm7, %%mm1 \n\t" 2894 "psubw %%mm0, %%mm2 \n\t" 2895 "psubw %%mm1, %%mm3 \n\t" 2896 "movq %%mm2, 16(%4) \n\t" 2897 "movq %%mm3, 24(%4) \n\t" 2898 "paddw %%mm4, %%mm4 \n\t" 2899 "paddw %%mm5, %%mm5 \n\t" 2900 "psubw %%mm2, %%mm4 \n\t" 2901 "psubw %%mm3, %%mm5 \n\t" 2903 "lea (%%"FF_REG_a
", %1), %0 \n\t" 2904 "psllw $2, %%mm2 \n\t" 2905 "psllw $2, %%mm3 \n\t" 2906 "psubw %%mm2, %%mm4 \n\t" 2907 "psubw %%mm3, %%mm5 \n\t" 2909 "movq (%0, %1, 2), %%mm2 \n\t" 2910 "movq %%mm2, %%mm3 \n\t" 2911 "punpcklbw %%mm7, %%mm2 \n\t" 2912 "punpckhbw %%mm7, %%mm3 \n\t" 2913 "psubw %%mm2, %%mm4 \n\t" 2914 "psubw %%mm3, %%mm5 \n\t" 2915 "psubw %%mm2, %%mm4 \n\t" 2916 "psubw %%mm3, %%mm5 \n\t" 2918 "movq (%%"FF_REG_a
", %1, 4), %%mm6 \n\t" 2919 "punpcklbw %%mm7, %%mm6 \n\t" 2920 "psubw %%mm6, %%mm2 \n\t" 2921 "movq (%%"FF_REG_a
", %1, 4), %%mm6 \n\t" 2922 "punpckhbw %%mm7, %%mm6 \n\t" 2923 "psubw %%mm6, %%mm3 \n\t" 2925 "paddw %%mm0, %%mm0 \n\t" 2926 "paddw %%mm1, %%mm1 \n\t" 2927 "psubw %%mm2, %%mm0 \n\t" 2928 "psubw %%mm3, %%mm1 \n\t" 2930 "psllw $2, %%mm2 \n\t" 2931 "psllw $2, %%mm3 \n\t" 2932 "psubw %%mm2, %%mm0 \n\t" 2933 "psubw %%mm3, %%mm1 \n\t" 2935 "movq (%0, %1, 4), %%mm2 \n\t" 2936 "movq %%mm2, %%mm3 \n\t" 2937 "punpcklbw %%mm7, %%mm2 \n\t" 2938 "punpckhbw %%mm7, %%mm3 \n\t" 2940 "paddw %%mm2, %%mm2 \n\t" 2941 "paddw %%mm3, %%mm3 \n\t" 2942 "psubw %%mm2, %%mm0 \n\t" 2943 "psubw %%mm3, %%mm1 \n\t" 2945 "movq (%4), %%mm2 \n\t" 2946 "movq 8(%4), %%mm3 \n\t" 2948 #if TEMPLATE_PP_MMXEXT 2949 "movq %%mm7, %%mm6 \n\t" 2950 "psubw %%mm0, %%mm6 \n\t" 2951 "pmaxsw %%mm6, %%mm0 \n\t" 2952 "movq %%mm7, %%mm6 \n\t" 2953 "psubw %%mm1, %%mm6 \n\t" 2954 "pmaxsw %%mm6, %%mm1 \n\t" 2955 "movq %%mm7, %%mm6 \n\t" 2956 "psubw %%mm2, %%mm6 \n\t" 2957 "pmaxsw %%mm6, %%mm2 \n\t" 2958 "movq %%mm7, %%mm6 \n\t" 2959 "psubw %%mm3, %%mm6 \n\t" 2960 "pmaxsw %%mm6, %%mm3 \n\t" 2962 "movq %%mm7, %%mm6 \n\t" 2963 "pcmpgtw %%mm0, %%mm6 \n\t" 2964 "pxor %%mm6, %%mm0 \n\t" 2965 "psubw %%mm6, %%mm0 \n\t" 2966 "movq %%mm7, %%mm6 \n\t" 2967 "pcmpgtw %%mm1, %%mm6 \n\t" 2968 "pxor %%mm6, %%mm1 \n\t" 2969 "psubw %%mm6, %%mm1 \n\t" 2970 "movq %%mm7, %%mm6 \n\t" 2971 "pcmpgtw %%mm2, %%mm6 \n\t" 2972 "pxor %%mm6, %%mm2 \n\t" 2973 "psubw %%mm6, %%mm2 \n\t" 2974 "movq %%mm7, %%mm6 \n\t" 2975 "pcmpgtw %%mm3, %%mm6 \n\t" 2976 "pxor %%mm6, %%mm3 \n\t" 2977 "psubw %%mm6, %%mm3 \n\t" 2980 #if TEMPLATE_PP_MMXEXT 2981 "pminsw %%mm2, %%mm0 \n\t" 2982 "pminsw %%mm3, %%mm1 \n\t" 2984 "movq %%mm0, %%mm6 \n\t" 2985 "psubusw %%mm2, %%mm6 \n\t" 2986 "psubw %%mm6, %%mm0 \n\t" 2987 "movq %%mm1, %%mm6 \n\t" 2988 "psubusw %%mm3, %%mm6 \n\t" 2989 "psubw %%mm6, %%mm1 \n\t" 2992 "movd %2, %%mm2 \n\t" 2993 "punpcklbw %%mm7, %%mm2 \n\t" 2995 "movq %%mm7, %%mm6 \n\t" 2996 "pcmpgtw %%mm4, %%mm6 \n\t" 2997 "pxor %%mm6, %%mm4 \n\t" 2998 "psubw %%mm6, %%mm4 \n\t" 2999 "pcmpgtw %%mm5, %%mm7 \n\t" 3000 "pxor %%mm7, %%mm5 \n\t" 3001 "psubw %%mm7, %%mm5 \n\t" 3003 "psllw $3, %%mm2 \n\t" 3004 "movq %%mm2, %%mm3 \n\t" 3005 "pcmpgtw %%mm4, %%mm2 \n\t" 3006 "pcmpgtw %%mm5, %%mm3 \n\t" 3007 "pand %%mm2, %%mm4 \n\t" 3008 "pand %%mm3, %%mm5 \n\t" 3011 "psubusw %%mm0, %%mm4 \n\t" 3012 "psubusw %%mm1, %%mm5 \n\t" 3015 "movq "MANGLE(w05)
", %%mm2 \n\t" 3016 "pmullw %%mm2, %%mm4 \n\t" 3017 "pmullw %%mm2, %%mm5 \n\t" 3018 "movq "MANGLE(w20)
", %%mm2 \n\t" 3019 "paddw %%mm2, %%mm4 \n\t" 3020 "paddw %%mm2, %%mm5 \n\t" 3021 "psrlw $6, %%mm4 \n\t" 3022 "psrlw $6, %%mm5 \n\t" 3024 "movq 16(%4), %%mm0 \n\t" 3025 "movq 24(%4), %%mm1 \n\t" 3027 "pxor %%mm2, %%mm2 \n\t" 3028 "pxor %%mm3, %%mm3 \n\t" 3030 "pcmpgtw %%mm0, %%mm2 \n\t" 3031 "pcmpgtw %%mm1, %%mm3 \n\t" 3032 "pxor %%mm2, %%mm0 \n\t" 3033 "pxor %%mm3, %%mm1 \n\t" 3034 "psubw %%mm2, %%mm0 \n\t" 3035 "psubw %%mm3, %%mm1 \n\t" 3036 "psrlw $1, %%mm0 \n\t" 3037 "psrlw $1, %%mm1 \n\t" 3039 "pxor %%mm6, %%mm2 \n\t" 3040 "pxor %%mm7, %%mm3 \n\t" 3041 "pand %%mm2, %%mm4 \n\t" 3042 "pand %%mm3, %%mm5 \n\t" 3044 #if TEMPLATE_PP_MMXEXT 3045 "pminsw %%mm0, %%mm4 \n\t" 3046 "pminsw %%mm1, %%mm5 \n\t" 3048 "movq %%mm4, %%mm2 \n\t" 3049 "psubusw %%mm0, %%mm2 \n\t" 3050 "psubw %%mm2, %%mm4 \n\t" 3051 "movq %%mm5, %%mm2 \n\t" 3052 "psubusw %%mm1, %%mm2 \n\t" 3053 "psubw %%mm2, %%mm5 \n\t" 3055 "pxor %%mm6, %%mm4 \n\t" 3056 "pxor %%mm7, %%mm5 \n\t" 3057 "psubw %%mm6, %%mm4 \n\t" 3058 "psubw %%mm7, %%mm5 \n\t" 3059 "packsswb %%mm5, %%mm4 \n\t" 3060 "movq %3, %%mm1 \n\t" 3061 "pandn %%mm4, %%mm1 \n\t" 3062 "movq (%0), %%mm0 \n\t" 3063 "paddb %%mm1, %%mm0 \n\t" 3064 "movq %%mm0, (%0) \n\t" 3065 "movq (%0, %1), %%mm0 \n\t" 3066 "psubb %%mm1, %%mm0 \n\t" 3067 "movq %%mm0, (%0, %1) \n\t" 3076 #endif //TEMPLATE_PP_MMX 3079 const int8_t QPs[],
int QPStride,
int isColor,
PPContext *
c);
3085 #undef REAL_SCALED_CPY 3089 int levelFix, int64_t *packedOffsetAndScale)
3091 #if !TEMPLATE_PP_MMX || !HAVE_6REGS 3095 #if TEMPLATE_PP_MMX && HAVE_6REGS 3097 "movq (%%"FF_REG_a
"), %%mm2 \n\t" 3098 "movq 8(%%"FF_REG_a
"), %%mm3 \n\t" 3099 "lea (%2,%4), %%"FF_REG_a
" \n\t" 3100 "lea (%3,%5), %%"FF_REG_d
" \n\t" 3101 "pxor %%mm4, %%mm4 \n\t" 3102 #if TEMPLATE_PP_MMXEXT 3103 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ 3104 "movq " #src1 ", %%mm0 \n\t"\ 3105 "movq " #src1 ", %%mm5 \n\t"\ 3106 "movq " #src2 ", %%mm1 \n\t"\ 3107 "movq " #src2 ", %%mm6 \n\t"\ 3108 "punpcklbw %%mm0, %%mm0 \n\t"\ 3109 "punpckhbw %%mm5, %%mm5 \n\t"\ 3110 "punpcklbw %%mm1, %%mm1 \n\t"\ 3111 "punpckhbw %%mm6, %%mm6 \n\t"\ 3112 "pmulhuw %%mm3, %%mm0 \n\t"\ 3113 "pmulhuw %%mm3, %%mm5 \n\t"\ 3114 "pmulhuw %%mm3, %%mm1 \n\t"\ 3115 "pmulhuw %%mm3, %%mm6 \n\t"\ 3116 "psubw %%mm2, %%mm0 \n\t"\ 3117 "psubw %%mm2, %%mm5 \n\t"\ 3118 "psubw %%mm2, %%mm1 \n\t"\ 3119 "psubw %%mm2, %%mm6 \n\t"\ 3120 "packuswb %%mm5, %%mm0 \n\t"\ 3121 "packuswb %%mm6, %%mm1 \n\t"\ 3122 "movq %%mm0, " #dst1 " \n\t"\ 3123 "movq %%mm1, " #dst2 " \n\t"\ 3125 #else //TEMPLATE_PP_MMXEXT 3126 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ 3127 "movq " #src1 ", %%mm0 \n\t"\ 3128 "movq " #src1 ", %%mm5 \n\t"\ 3129 "punpcklbw %%mm4, %%mm0 \n\t"\ 3130 "punpckhbw %%mm4, %%mm5 \n\t"\ 3131 "psubw %%mm2, %%mm0 \n\t"\ 3132 "psubw %%mm2, %%mm5 \n\t"\ 3133 "movq " #src2 ", %%mm1 \n\t"\ 3134 "psllw $6, %%mm0 \n\t"\ 3135 "psllw $6, %%mm5 \n\t"\ 3136 "pmulhw %%mm3, %%mm0 \n\t"\ 3137 "movq " #src2 ", %%mm6 \n\t"\ 3138 "pmulhw %%mm3, %%mm5 \n\t"\ 3139 "punpcklbw %%mm4, %%mm1 \n\t"\ 3140 "punpckhbw %%mm4, %%mm6 \n\t"\ 3141 "psubw %%mm2, %%mm1 \n\t"\ 3142 "psubw %%mm2, %%mm6 \n\t"\ 3143 "psllw $6, %%mm1 \n\t"\ 3144 "psllw $6, %%mm6 \n\t"\ 3145 "pmulhw %%mm3, %%mm1 \n\t"\ 3146 "pmulhw %%mm3, %%mm6 \n\t"\ 3147 "packuswb %%mm5, %%mm0 \n\t"\ 3148 "packuswb %%mm6, %%mm1 \n\t"\ 3149 "movq %%mm0, " #dst1 " \n\t"\ 3150 "movq %%mm1, " #dst2 " \n\t"\ 3152 #endif //TEMPLATE_PP_MMXEXT 3153 #define SCALED_CPY(src1, src2, dst1, dst2)\ 3154 REAL_SCALED_CPY(src1, src2, dst1, dst2) 3156 SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5))
3157 SCALED_CPY((%2, %4, 2), (%%FF_REGa, %4, 2), (%3, %5, 2), (%%FF_REGd, %5, 2))
3158 SCALED_CPY((%2, %4, 4), (%%FF_REGa, %4, 4), (%3, %5, 4), (%%FF_REGd, %5, 4))
3159 "lea (%%"FF_REG_a
",%4,4), %%"FF_REG_a
" \n\t" 3160 "lea (%%"FF_REG_d
",%5,4), %%"FF_REG_d
" \n\t" 3161 SCALED_CPY((%%FF_REGa, %4), (%%FF_REGa, %4, 2), (%%FF_REGd, %5), (%%FF_REGd, %5, 2))
3164 :
"=&a" (packedOffsetAndScale)
3165 :
"0" (packedOffsetAndScale),
3172 #else //TEMPLATE_PP_MMX && HAVE_6REGS 3174 memcpy( &(dst[dstStride*i]),
3176 #endif //TEMPLATE_PP_MMX && HAVE_6REGS 3178 #if TEMPLATE_PP_MMX && HAVE_6REGS 3180 "lea (%0,%2), %%"FF_REG_a
" \n\t" 3181 "lea (%1,%3), %%"FF_REG_d
" \n\t" 3183 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \ 3184 "movq " #src1 ", %%mm0 \n\t"\ 3185 "movq " #src2 ", %%mm1 \n\t"\ 3186 "movq %%mm0, " #dst1 " \n\t"\ 3187 "movq %%mm1, " #dst2 " \n\t"\ 3189 #define SIMPLE_CPY(src1, src2, dst1, dst2)\ 3190 REAL_SIMPLE_CPY(src1, src2, dst1, dst2) 3192 SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3))
3193 SIMPLE_CPY((%0, %2, 2), (%%FF_REGa, %2, 2), (%1, %3, 2), (%%FF_REGd, %3, 2))
3194 SIMPLE_CPY((%0, %2, 4), (%%FF_REGa, %2, 4), (%1, %3, 4), (%%FF_REGd, %3, 4))
3195 "lea (%%"FF_REG_a
",%2,4), %%"FF_REG_a
" \n\t" 3196 "lea (%%"FF_REG_d
",%3,4), %%"FF_REG_d
" \n\t" 3197 SIMPLE_CPY((%%FF_REGa, %2), (%%FF_REGa, %2, 2), (%%FF_REGd, %3), (%%FF_REGd, %3, 2))
3203 :
"%"FF_REG_a,
"%"FF_REG_d
3205 #else //TEMPLATE_PP_MMX && HAVE_6REGS 3207 memcpy( &(dst[dstStride*i]),
3209 #endif //TEMPLATE_PP_MMX && HAVE_6REGS 3220 "movq (%0), %%mm0 \n\t" 3221 "movq %%mm0, (%0, %1, 4) \n\t" 3223 "movq %%mm0, (%0) \n\t" 3224 "movq %%mm0, (%0, %1) \n\t" 3225 "movq %%mm0, (%0, %1, 2) \n\t" 3226 "movq %%mm0, (%0, %1, 4) \n\t" 3240 #if ARCH_X86 && TEMPLATE_PP_MMXEXT 3241 static inline void RENAME(prefetchnta)(
const void *p)
3243 __asm__ volatile(
"prefetchnta (%0)\n\t" 3248 static inline void RENAME(prefetcht0)(
const void *p)
3250 __asm__ volatile(
"prefetcht0 (%0)\n\t" 3255 static inline void RENAME(prefetcht1)(
const void *p)
3257 __asm__ volatile(
"prefetcht1 (%0)\n\t" 3262 static inline void RENAME(prefetcht2)(
const void *p)
3264 __asm__ volatile(
"prefetcht2 (%0)\n\t" 3268 #elif !ARCH_X86 && AV_GCC_VERSION_AT_LEAST(3,2) 3269 static inline void RENAME(prefetchnta)(
const void *p)
3271 __builtin_prefetch(p,0,0);
3273 static inline void RENAME(prefetcht0)(
const void *p)
3275 __builtin_prefetch(p,0,1);
3277 static inline void RENAME(prefetcht1)(
const void *p)
3279 __builtin_prefetch(p,0,2);
3281 static inline void RENAME(prefetcht2)(
const void *p)
3283 __builtin_prefetch(p,0,3);
3286 static inline void RENAME(prefetchnta)(
const void *p)
3290 static inline void RENAME(prefetcht0)(
const void *p)
3294 static inline void RENAME(prefetcht1)(
const void *p)
3298 static inline void RENAME(prefetcht2)(
const void *p)
3307 const int8_t QPs[],
int QPStride,
int isColor,
PPContext *
c2)
3311 #ifdef TEMPLATE_PP_TIME_MODE 3312 const int mode= TEMPLATE_PP_TIME_MODE;
3314 const int mode= isColor ?
c.ppMode.chromMode :
c.ppMode.lumMode;
3316 int black=0, white=255;
3317 int QPCorrecture= 256*256;
3324 const int qpHShift= isColor ? 4-
c.hChromaSubSample : 4;
3325 const int qpVShift= isColor ? 4-
c.vChromaSubSample : 4;
3328 uint64_t *
const yHistogram=
c.yHistogram;
3329 uint8_t *
const tempSrc= srcStride > 0 ?
c.tempSrc :
c.tempSrc - 23*srcStride;
3330 uint8_t *
const tempDst= (dstStride > 0 ?
c.tempDst :
c.tempDst - 23*dstStride) + 32;
3335 av_log(
c2,
AV_LOG_WARNING,
"Visualization is currently only supported with the accurate deblock filter without SIMD\n");
3340 for(i=0; i<57; i++){
3341 int offset= ((i*
c.ppMode.baseDcDiff)>>8) + 1;
3342 int threshold= offset*2 + 1;
3344 c.mmxDcThreshold[
i]= 0x7F - threshold;
3345 c.mmxDcOffset[
i]*= 0x0101010101010101LL;
3346 c.mmxDcThreshold[
i]*= 0x0101010101010101LL;
3360 else if(mode &
DERING) copyAhead=9;
3368 uint64_t maxClipped;
3374 if(
c.frameNum == 1) yHistogram[0]=
width*(uint64_t)
height/64*15/256;
3376 for(i=0; i<256; i++){
3377 sum+= yHistogram[
i];
3381 maxClipped=
av_rescale(sum,
c.ppMode.maxClippedThreshold.num,
c.ppMode.maxClippedThreshold.den);
3384 for(black=255; black>0; black--){
3385 if(clipped < maxClipped)
break;
3386 clipped-= yHistogram[black];
3390 for(white=0; white<256; white++){
3391 if(clipped < maxClipped)
break;
3392 clipped-= yHistogram[white];
3395 scale = (
AVRational){
c.ppMode.maxAllowedY -
c.ppMode.minAllowedY, white - black};
3397 #if TEMPLATE_PP_MMXEXT 3399 c.packedYOffset= (((black*
c.packedYScale)>>8) -
c.ppMode.minAllowedY) & 0xFFFF;
3402 c.packedYOffset= (black -
c.ppMode.minAllowedY) & 0xFFFF;
3405 c.packedYOffset|=
c.packedYOffset<<32;
3406 c.packedYOffset|=
c.packedYOffset<<16;
3408 c.packedYScale|=
c.packedYScale<<32;
3409 c.packedYScale|=
c.packedYScale<<16;
3412 else QPCorrecture= 256*256;
3414 c.packedYScale= 0x0100010001000100LL;
3416 QPCorrecture= 256*256;
3422 const uint8_t *srcBlock= &(
src[y*srcStride]);
3423 uint8_t *dstBlock= tempDst + dstStride;
3429 RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
3430 RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
3431 RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
3432 RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
3434 RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
3435 srcBlock + srcStride*8, srcStride, mode &
LEVEL_FIX, &
c.packedYOffset);
3437 RENAME(duplicate)(dstBlock + dstStride*8, dstStride);
3440 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
3441 else if(mode & LINEAR_BLEND_DEINT_FILTER)
3442 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride,
c.deintTemp + x);
3443 else if(mode & MEDIAN_DEINT_FILTER)
3444 RENAME(deInterlaceMedian)(dstBlock, dstStride);
3445 else if(mode & CUBIC_IPOL_DEINT_FILTER)
3446 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
3448 RENAME(deInterlaceFF)(dstBlock, dstStride,
c.deintTemp + x);
3449 else if(mode & LOWPASS5_DEINT_FILTER)
3450 RENAME(deInterlaceL5)(dstBlock, dstStride,
c.deintTemp + x,
c.deintTemp + width + x);
3457 if(width==
FFABS(dstStride))
3458 linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride);
3461 for(i=0; i<copyAhead; i++){
3462 memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
3469 const uint8_t *srcBlock= &(
src[y*srcStride]);
3470 uint8_t *dstBlock= &(dst[y*dstStride]);
3473 uint8_t *tempBlock2=
c.tempBlocks + 8;
3475 const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
3476 int8_t *nonBQPptr= &
c.nonBQPTable[(y>>qpVShift)*
FFABS(QPStride)];
3484 linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead,
3485 FFMAX(height-y-copyAhead, 0), srcStride);
3488 for(i=
FFMAX(height-y, 8); i<copyAhead+8; i++)
3489 memcpy(tempSrc + srcStride*i,
src + srcStride*(height-1),
FFABS(srcStride));
3492 linecpy(tempDst, dstBlock - dstStride,
FFMIN(height-y+1, copyAhead+1), dstStride);
3495 for(i=height-y+1; i<=copyAhead; i++)
3496 memcpy(tempDst + dstStride*i, dst + dstStride*(height-1),
FFABS(dstStride));
3498 dstBlock= tempDst + dstStride;
3505 for(x=0; x<
width; ){
3507 int endx =
FFMIN(width, x+32);
3508 uint8_t *dstBlockStart = dstBlock;
3509 const uint8_t *srcBlockStart = srcBlock;
3511 for(qp_index=0; qp_index < (endx-startx)/
BLOCK_SIZE; qp_index++){
3512 QP = QPptr[(x+qp_index*
BLOCK_SIZE)>>qpHShift];
3513 nonBQP = nonBQPptr[(x+qp_index*
BLOCK_SIZE)>>qpHShift];
3515 QP= (QP* QPCorrecture + 256*128)>>16;
3516 nonBQP= (nonBQP* QPCorrecture + 256*128)>>16;
3517 yHistogram[(srcBlock+qp_index*8)[srcStride*12 + 4]]++;
3519 c.QP_block[qp_index] =
QP;
3520 c.nonBQP_block[qp_index] = nonBQP;
3523 "movd %1, %%mm7 \n\t" 3524 "packuswb %%mm7, %%mm7 \n\t" 3525 "packuswb %%mm7, %%mm7 \n\t" 3526 "packuswb %%mm7, %%mm7 \n\t" 3527 "movq %%mm7, %0 \n\t" 3528 :
"=m" (
c.pQPb_block[qp_index])
3534 RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
3535 RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
3536 RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
3537 RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
3539 RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
3540 srcBlock + srcStride*copyAhead, srcStride, mode &
LEVEL_FIX, &
c.packedYOffset);
3543 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
3544 else if(mode & LINEAR_BLEND_DEINT_FILTER)
3545 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride,
c.deintTemp + x);
3546 else if(mode & MEDIAN_DEINT_FILTER)
3547 RENAME(deInterlaceMedian)(dstBlock, dstStride);
3548 else if(mode & CUBIC_IPOL_DEINT_FILTER)
3549 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
3551 RENAME(deInterlaceFF)(dstBlock, dstStride,
c.deintTemp + x);
3552 else if(mode & LOWPASS5_DEINT_FILTER)
3553 RENAME(deInterlaceL5)(dstBlock, dstStride,
c.deintTemp + x,
c.deintTemp + width + x);
3561 dstBlock = dstBlockStart;
3562 srcBlock = srcBlockStart;
3564 for(x = startx, qp_index = 0; x < endx; x+=
BLOCK_SIZE, qp_index++){
3565 const int stride= dstStride;
3568 c.QP =
c.QP_block[qp_index];
3569 c.nonBQP =
c.nonBQP_block[qp_index];
3570 c.pQPb =
c.pQPb_block[qp_index];
3571 c.pQPb2 =
c.pQPb2_block[qp_index];
3575 if(mode & V_X1_FILTER)
3577 else if(mode & V_DEBLOCK){
3593 dstBlock = dstBlockStart;
3594 srcBlock = srcBlockStart;
3596 for(x = startx, qp_index=0; x < endx; x+=
BLOCK_SIZE, qp_index++){
3597 const int stride= dstStride;
3599 c.QP =
c.QP_block[qp_index];
3600 c.nonBQP =
c.nonBQP_block[qp_index];
3601 c.pQPb =
c.pQPb_block[qp_index];
3602 c.pQPb2 =
c.pQPb2_block[qp_index];
3604 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
3610 RENAME(vertX1Filter)(tempBlock1, 16, &
c);
3612 const int t=
RENAME(vertClassify)(tempBlock1, 16, &
c);
3614 RENAME(doVertLowPass)(tempBlock1, 16, &
c);
3616 RENAME(doVertDefFilter)(tempBlock1, 16, &
c);
3618 RENAME(do_a_deblock)(tempBlock1, 16, 1, &
c,
mode);
3621 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
3624 if(mode & H_X1_FILTER)
3626 else if(mode & H_DEBLOCK){
3627 #if TEMPLATE_PP_ALTIVEC 3649 }
else if(mode & H_A_DEBLOCK){
3652 #endif //TEMPLATE_PP_MMX 3661 c.tempBlurred[isColor] + y*dstStride + x,
3662 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
3663 c.ppMode.maxTmpNoise);
3671 tmpXchg= tempBlock1;
3672 tempBlock1= tempBlock2;
3673 tempBlock2 = tmpXchg;
3679 if(y > 0)
RENAME(dering)(dstBlock - dstStride - 8, dstStride, &
c);
3683 RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
3684 c.tempBlurred[isColor] + y*dstStride + x,
3685 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
3686 c.ppMode.maxTmpNoise);
3691 uint8_t *dstBlock= &(dst[y*dstStride]);
3692 if(width==
FFABS(dstStride))
3693 linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
3696 for(i=0; i<height-y; i++){
3697 memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
3702 #if TEMPLATE_PP_3DNOW 3704 #elif TEMPLATE_PP_MMX 3708 #ifdef DEBUG_BRIGHTNESS 3712 for(i=0; i<256; i++)
3713 if(yHistogram[i] > max) max=yHistogram[
i];
3715 for(i=1; i<256; i++){
3717 int start=yHistogram[i-1]/(max/256+1);
3718 int end=yHistogram[
i]/(max/256+1);
3719 int inc= end > start ? 1 : -1;
3720 for(x=start; x!=end+inc; x+=inc)
3721 dst[ i*dstStride + x]+=128;
3724 for(i=0; i<100; i+=2){
3725 dst[ (white)*dstStride + i]+=128;
3726 dst[ (black)*dstStride + i]+=128;
3736 #undef TEMPLATE_PP_C 3737 #undef TEMPLATE_PP_ALTIVEC 3738 #undef TEMPLATE_PP_MMX 3739 #undef TEMPLATE_PP_MMXEXT 3740 #undef TEMPLATE_PP_3DNOW 3741 #undef TEMPLATE_PP_SSE2
static int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
#define AV_LOG_WARNING
Something somehow does not look correct.
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
static void error(const char *err)
static void transpose_16x8_char_toPackedAlign_altivec(unsigned char *dst, unsigned char *src, int stride)
#define LINEAR_BLEND_DEINT_FILTER
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
static av_cold int end(AVCodecContext *avctx)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
static void transpose_8x16_char_fromPackedAlign_altivec(unsigned char *dst, unsigned char *src, int stride)
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
#define LOWPASS5_DEINT_FILTER
static void horizX1Filter(uint8_t *src, int stride, int QP)
Experimental Filter 1 (Horizontal) will not damage linear gradients Flat blocks should look like they...
static void linecpy(void *dest, const void *src, int lines, int stride)
#define MEDIAN_DEINT_FILTER
#define CUBIC_IPOL_DEINT_FILTER
int64_t av_rescale(int64_t a, int64_t b, int64_t c)
Rescale a 64-bit integer with rounding to nearest.
#define LINEAR_IPOL_DEINT_FILTER
#define XMM_CLOBBERS(...)
#define FFMPEG_DEINT_FILTER
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c)
#define AV_LOG_INFO
Standard information.
Rational number (pair of numerator and denominator).
GLint GLenum GLboolean GLsizei stride
static void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, const int8_t QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
static int ref[MAX_W *MAX_W]
static void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
__asm__(".macro parse_r var r\n\t""\\var = -1\n\t"_IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)".iflt \\var\n\t"".error \"Unable to parse register name \\r\"\n\t"".endif\n\t"".endm")
static av_always_inline int diff(const uint32_t a, const uint32_t b)
#define TEMP_NOISE_FILTER
#define LEVEL_FIX
Brightness & Contrast.
mode
Use these values in ebur128_init (or'ed).
#define NAMED_CONSTRAINTS_ADD(...)
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step