Go to the documentation of this file.
   39 #if COMPILE_TEMPLATE_AMD3DNOW 
   40 #define PREFETCH  "prefetch" 
   41 #define PAVGB     "pavgusb" 
   42 #elif COMPILE_TEMPLATE_MMXEXT 
   43 #define PREFETCH "prefetchnta" 
   46 #define PREFETCH  " # nop" 
   49 #if COMPILE_TEMPLATE_AMD3DNOW 
   56 #if COMPILE_TEMPLATE_MMXEXT 
   57 #define MOVNTQ "movntq" 
   58 #define SFENCE "sfence" 
   61 #define SFENCE " # nop" 
   64 #if !COMPILE_TEMPLATE_SSE2 
   66 #if !COMPILE_TEMPLATE_AMD3DNOW 
   71     const uint8_t *
s = 
src;
 
   73     const uint8_t *mm_end;
 
   77     __asm__ volatile(
"movq        %0, %%mm7"::
"m"(mask32a):
"memory");
 
   81             "movd        (%1), %%mm0    \n\t" 
   82             "punpckldq  3(%1), %%mm0    \n\t" 
   83             "movd       6(%1), %%mm1    \n\t" 
   84             "punpckldq  9(%1), %%mm1    \n\t" 
   85             "movd      12(%1), %%mm2    \n\t" 
   86             "punpckldq 15(%1), %%mm2    \n\t" 
   87             "movd      18(%1), %%mm3    \n\t" 
   88             "punpckldq 21(%1), %%mm3    \n\t" 
   89             "por        %%mm7, %%mm0    \n\t" 
   90             "por        %%mm7, %%mm1    \n\t" 
   91             "por        %%mm7, %%mm2    \n\t" 
   92             "por        %%mm7, %%mm3    \n\t" 
   95             MOVNTQ"     %%mm2, 16(%0)   \n\t" 
  112 #define STORE_BGR24_MMX \ 
  113             "psrlq         $8, %%mm2    \n\t" \ 
  114             "psrlq         $8, %%mm3    \n\t" \ 
  115             "psrlq         $8, %%mm6    \n\t" \ 
  116             "psrlq         $8, %%mm7    \n\t" \ 
  117             "pand "MANGLE(mask24l)", %%mm0\n\t" \ 
  118             "pand "MANGLE(mask24l)", %%mm1\n\t" \ 
  119             "pand "MANGLE(mask24l)", %%mm4\n\t" \ 
  120             "pand "MANGLE(mask24l)", %%mm5\n\t" \ 
  121             "pand "MANGLE(mask24h)", %%mm2\n\t" \ 
  122             "pand "MANGLE(mask24h)", %%mm3\n\t" \ 
  123             "pand "MANGLE(mask24h)", %%mm6\n\t" \ 
  124             "pand "MANGLE(mask24h)", %%mm7\n\t" \ 
  125             "por        %%mm2, %%mm0    \n\t" \ 
  126             "por        %%mm3, %%mm1    \n\t" \ 
  127             "por        %%mm6, %%mm4    \n\t" \ 
  128             "por        %%mm7, %%mm5    \n\t" \ 
  130             "movq       %%mm1, %%mm2    \n\t" \ 
  131             "movq       %%mm4, %%mm3    \n\t" \ 
  132             "psllq        $48, %%mm2    \n\t" \ 
  133             "psllq        $32, %%mm3    \n\t" \ 
  134             "por        %%mm2, %%mm0    \n\t" \ 
  135             "psrlq        $16, %%mm1    \n\t" \ 
  136             "psrlq        $32, %%mm4    \n\t" \ 
  137             "psllq        $16, %%mm5    \n\t" \ 
  138             "por        %%mm3, %%mm1    \n\t" \ 
  139             "por        %%mm5, %%mm4    \n\t" \ 
  141             MOVNTQ"     %%mm0,   (%0)    \n\t" \ 
  142             MOVNTQ"     %%mm1,  8(%0)    \n\t" \ 
  143             MOVNTQ"     %%mm4, 16(%0)" 
  149     const uint8_t *
s = 
src;
 
  151     const uint8_t *mm_end;
 
  158             "movq        (%1), %%mm0    \n\t" 
  159             "movq       8(%1), %%mm1    \n\t" 
  160             "movq      16(%1), %%mm4    \n\t" 
  161             "movq      24(%1), %%mm5    \n\t" 
  162             "movq       %%mm0, %%mm2    \n\t" 
  163             "movq       %%mm1, %%mm3    \n\t" 
  164             "movq       %%mm4, %%mm6    \n\t" 
  165             "movq       %%mm5, %%mm7    \n\t" 
  191     register const uint8_t* 
s=
src;
 
  192     register uint8_t* 
d=dst;
 
  193     register const uint8_t *end;
 
  194     const uint8_t *mm_end;
 
  197     __asm__ volatile(
"movq        %0, %%mm4"::
"m"(mask15s));
 
  202             "movq      (%1), %%mm0  \n\t" 
  203             "movq     8(%1), %%mm2  \n\t" 
  204             "movq     %%mm0, %%mm1  \n\t" 
  205             "movq     %%mm2, %%mm3  \n\t" 
  206             "pand     %%mm4, %%mm0  \n\t" 
  207             "pand     %%mm4, %%mm2  \n\t" 
  208             "paddw    %%mm1, %%mm0  \n\t" 
  209             "paddw    %%mm3, %%mm2  \n\t" 
  221         register unsigned x= *((
const uint32_t *)
s);
 
  222         *((uint32_t *)
d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
 
  227         register unsigned short x= *((
const uint16_t *)
s);
 
  228         *((uint16_t *)
d) = (x&0x7FFF) + (x&0x7FE0);
 
  234     register const uint8_t* 
s=
src;
 
  235     register uint8_t* 
d=dst;
 
  236     register const uint8_t *end;
 
  237     const uint8_t *mm_end;
 
  240     __asm__ volatile(
"movq        %0, %%mm7"::
"m"(mask15rg));
 
  241     __asm__ volatile(
"movq        %0, %%mm6"::
"m"(mask15b));
 
  246             "movq      (%1), %%mm0  \n\t" 
  247             "movq     8(%1), %%mm2  \n\t" 
  248             "movq     %%mm0, %%mm1  \n\t" 
  249             "movq     %%mm2, %%mm3  \n\t" 
  250             "psrlq       $1, %%mm0  \n\t" 
  251             "psrlq       $1, %%mm2  \n\t" 
  252             "pand     %%mm7, %%mm0  \n\t" 
  253             "pand     %%mm7, %%mm2  \n\t" 
  254             "pand     %%mm6, %%mm1  \n\t" 
  255             "pand     %%mm6, %%mm3  \n\t" 
  256             "por      %%mm1, %%mm0  \n\t" 
  257             "por      %%mm3, %%mm2  \n\t" 
  269         register uint32_t x= *((
const uint32_t*)
s);
 
  270         *((uint32_t *)
d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
 
  275         register uint16_t x= *((
const uint16_t*)
s);
 
  276         *((uint16_t *)
d) = ((x>>1)&0x7FE0) | (x&0x001F);
 
  282     const uint8_t *
s = 
src;
 
  284     const uint8_t *mm_end;
 
  285     uint16_t *
d = (uint16_t *)dst;
 
  289         "movq           %3, %%mm5   \n\t" 
  290         "movq           %4, %%mm6   \n\t" 
  291         "movq           %5, %%mm7   \n\t" 
  296         "movd         (%1), %%mm0   \n\t" 
  297         "movd        4(%1), %%mm3   \n\t" 
  298         "punpckldq   8(%1), %%mm0   \n\t" 
  299         "punpckldq  12(%1), %%mm3   \n\t" 
  300         "movq        %%mm0, %%mm1   \n\t" 
  301         "movq        %%mm3, %%mm4   \n\t" 
  302         "pand        %%mm6, %%mm0   \n\t" 
  303         "pand        %%mm6, %%mm3   \n\t" 
  304         "pmaddwd     %%mm7, %%mm0   \n\t" 
  305         "pmaddwd     %%mm7, %%mm3   \n\t" 
  306         "pand        %%mm5, %%mm1   \n\t" 
  307         "pand        %%mm5, %%mm4   \n\t" 
  308         "por         %%mm1, %%mm0   \n\t" 
  309         "por         %%mm4, %%mm3   \n\t" 
  310         "psrld          $5, %%mm0   \n\t" 
  311         "pslld         $11, %%mm3   \n\t" 
  312         "por         %%mm3, %%mm0   \n\t" 
  320         : 
"r" (mm_end), 
"m" (mask3216g), 
"m" (mask3216br), 
"m" (mul3216)
 
  325         register int rgb = *(
const uint32_t*)
s; 
s += 4;
 
  326         *
d++ = ((
rgb&0xFF)>>3) + ((
rgb&0xFC00)>>5) + ((
rgb&0xF80000)>>8);
 
  332     const uint8_t *
s = 
src;
 
  334     const uint8_t *mm_end;
 
  335     uint16_t *
d = (uint16_t *)dst;
 
  339         "movq          %0, %%mm7    \n\t" 
  340         "movq          %1, %%mm6    \n\t" 
  341         ::
"m"(red_16mask),
"m"(green_16mask));
 
  346             "movd        (%1), %%mm0    \n\t" 
  347             "movd       4(%1), %%mm3    \n\t" 
  348             "punpckldq  8(%1), %%mm0    \n\t" 
  349             "punpckldq 12(%1), %%mm3    \n\t" 
  350             "movq       %%mm0, %%mm1    \n\t" 
  351             "movq       %%mm0, %%mm2    \n\t" 
  352             "movq       %%mm3, %%mm4    \n\t" 
  353             "movq       %%mm3, %%mm5    \n\t" 
  354             "psllq         $8, %%mm0    \n\t" 
  355             "psllq         $8, %%mm3    \n\t" 
  356             "pand       %%mm7, %%mm0    \n\t" 
  357             "pand       %%mm7, %%mm3    \n\t" 
  358             "psrlq         $5, %%mm1    \n\t" 
  359             "psrlq         $5, %%mm4    \n\t" 
  360             "pand       %%mm6, %%mm1    \n\t" 
  361             "pand       %%mm6, %%mm4    \n\t" 
  362             "psrlq        $19, %%mm2    \n\t" 
  363             "psrlq        $19, %%mm5    \n\t" 
  364             "pand          %2, %%mm2    \n\t" 
  365             "pand          %2, %%mm5    \n\t" 
  366             "por        %%mm1, %%mm0    \n\t" 
  367             "por        %%mm4, %%mm3    \n\t" 
  368             "por        %%mm2, %%mm0    \n\t" 
  369             "por        %%mm5, %%mm3    \n\t" 
  370             "psllq        $16, %%mm3    \n\t" 
  371             "por        %%mm3, %%mm0    \n\t" 
  373             :: 
"r"(
d),
"r"(
s),
"m"(blue_16mask):
"memory");
 
  380         register int rgb = *(
const uint32_t*)
s; 
s += 4;
 
  381         *
d++ = ((
rgb&0xF8)<<8) + ((
rgb&0xFC00)>>5) + ((
rgb&0xF80000)>>19);
 
  387     const uint8_t *
s = 
src;
 
  389     const uint8_t *mm_end;
 
  390     uint16_t *
d = (uint16_t *)dst;
 
  394         "movq           %3, %%mm5   \n\t" 
  395         "movq           %4, %%mm6   \n\t" 
  396         "movq           %5, %%mm7   \n\t" 
  401         "movd         (%1), %%mm0   \n\t" 
  402         "movd        4(%1), %%mm3   \n\t" 
  403         "punpckldq   8(%1), %%mm0   \n\t" 
  404         "punpckldq  12(%1), %%mm3   \n\t" 
  405         "movq        %%mm0, %%mm1   \n\t" 
  406         "movq        %%mm3, %%mm4   \n\t" 
  407         "pand        %%mm6, %%mm0   \n\t" 
  408         "pand        %%mm6, %%mm3   \n\t" 
  409         "pmaddwd     %%mm7, %%mm0   \n\t" 
  410         "pmaddwd     %%mm7, %%mm3   \n\t" 
  411         "pand        %%mm5, %%mm1   \n\t" 
  412         "pand        %%mm5, %%mm4   \n\t" 
  413         "por         %%mm1, %%mm0   \n\t" 
  414         "por         %%mm4, %%mm3   \n\t" 
  415         "psrld          $6, %%mm0   \n\t" 
  416         "pslld         $10, %%mm3   \n\t" 
  417         "por         %%mm3, %%mm0   \n\t" 
  425         : 
"r" (mm_end), 
"m" (mask3215g), 
"m" (mask3216br), 
"m" (mul3215)
 
  430         register int rgb = *(
const uint32_t*)
s; 
s += 4;
 
  431         *
d++ = ((
rgb&0xFF)>>3) + ((
rgb&0xF800)>>6) + ((
rgb&0xF80000)>>9);
 
  437     const uint8_t *
s = 
src;
 
  439     const uint8_t *mm_end;
 
  440     uint16_t *
d = (uint16_t *)dst;
 
  444         "movq          %0, %%mm7    \n\t" 
  445         "movq          %1, %%mm6    \n\t" 
  446         ::
"m"(red_15mask),
"m"(green_15mask));
 
  451             "movd        (%1), %%mm0    \n\t" 
  452             "movd       4(%1), %%mm3    \n\t" 
  453             "punpckldq  8(%1), %%mm0    \n\t" 
  454             "punpckldq 12(%1), %%mm3    \n\t" 
  455             "movq       %%mm0, %%mm1    \n\t" 
  456             "movq       %%mm0, %%mm2    \n\t" 
  457             "movq       %%mm3, %%mm4    \n\t" 
  458             "movq       %%mm3, %%mm5    \n\t" 
  459             "psllq         $7, %%mm0    \n\t" 
  460             "psllq         $7, %%mm3    \n\t" 
  461             "pand       %%mm7, %%mm0    \n\t" 
  462             "pand       %%mm7, %%mm3    \n\t" 
  463             "psrlq         $6, %%mm1    \n\t" 
  464             "psrlq         $6, %%mm4    \n\t" 
  465             "pand       %%mm6, %%mm1    \n\t" 
  466             "pand       %%mm6, %%mm4    \n\t" 
  467             "psrlq        $19, %%mm2    \n\t" 
  468             "psrlq        $19, %%mm5    \n\t" 
  469             "pand          %2, %%mm2    \n\t" 
  470             "pand          %2, %%mm5    \n\t" 
  471             "por        %%mm1, %%mm0    \n\t" 
  472             "por        %%mm4, %%mm3    \n\t" 
  473             "por        %%mm2, %%mm0    \n\t" 
  474             "por        %%mm5, %%mm3    \n\t" 
  475             "psllq        $16, %%mm3    \n\t" 
  476             "por        %%mm3, %%mm0    \n\t" 
  478             ::
"r"(
d),
"r"(
s),
"m"(blue_15mask):
"memory");
 
  485         register int rgb = *(
const uint32_t*)
s; 
s += 4;
 
  486         *
d++ = ((
rgb&0xF8)<<7) + ((
rgb&0xF800)>>6) + ((
rgb&0xF80000)>>19);
 
  492     const uint8_t *
s = 
src;
 
  494     const uint8_t *mm_end;
 
  495     uint16_t *
d = (uint16_t *)dst;
 
  499         "movq         %0, %%mm7     \n\t" 
  500         "movq         %1, %%mm6     \n\t" 
  501         ::
"m"(red_16mask),
"m"(green_16mask));
 
  506             "movd        (%1), %%mm0    \n\t" 
  507             "movd       3(%1), %%mm3    \n\t" 
  508             "punpckldq  6(%1), %%mm0    \n\t" 
  509             "punpckldq  9(%1), %%mm3    \n\t" 
  510             "movq       %%mm0, %%mm1    \n\t" 
  511             "movq       %%mm0, %%mm2    \n\t" 
  512             "movq       %%mm3, %%mm4    \n\t" 
  513             "movq       %%mm3, %%mm5    \n\t" 
  514             "psrlq         $3, %%mm0    \n\t" 
  515             "psrlq         $3, %%mm3    \n\t" 
  516             "pand          %2, %%mm0    \n\t" 
  517             "pand          %2, %%mm3    \n\t" 
  518             "psrlq         $5, %%mm1    \n\t" 
  519             "psrlq         $5, %%mm4    \n\t" 
  520             "pand       %%mm6, %%mm1    \n\t" 
  521             "pand       %%mm6, %%mm4    \n\t" 
  522             "psrlq         $8, %%mm2    \n\t" 
  523             "psrlq         $8, %%mm5    \n\t" 
  524             "pand       %%mm7, %%mm2    \n\t" 
  525             "pand       %%mm7, %%mm5    \n\t" 
  526             "por        %%mm1, %%mm0    \n\t" 
  527             "por        %%mm4, %%mm3    \n\t" 
  528             "por        %%mm2, %%mm0    \n\t" 
  529             "por        %%mm5, %%mm3    \n\t" 
  530             "psllq        $16, %%mm3    \n\t" 
  531             "por        %%mm3, %%mm0    \n\t" 
  533             ::
"r"(
d),
"r"(
s),
"m"(blue_16mask):
"memory");
 
  543         *
d++ = (
b>>3) | ((
g&0xFC)<<3) | ((
r&0xF8)<<8);
 
  549     const uint8_t *
s = 
src;
 
  551     const uint8_t *mm_end;
 
  552     uint16_t *
d = (uint16_t *)dst;
 
  556         "movq         %0, %%mm7     \n\t" 
  557         "movq         %1, %%mm6     \n\t" 
  558         ::
"m"(red_16mask),
"m"(green_16mask));
 
  563             "movd        (%1), %%mm0    \n\t" 
  564             "movd       3(%1), %%mm3    \n\t" 
  565             "punpckldq  6(%1), %%mm0    \n\t" 
  566             "punpckldq  9(%1), %%mm3    \n\t" 
  567             "movq       %%mm0, %%mm1    \n\t" 
  568             "movq       %%mm0, %%mm2    \n\t" 
  569             "movq       %%mm3, %%mm4    \n\t" 
  570             "movq       %%mm3, %%mm5    \n\t" 
  571             "psllq         $8, %%mm0    \n\t" 
  572             "psllq         $8, %%mm3    \n\t" 
  573             "pand       %%mm7, %%mm0    \n\t" 
  574             "pand       %%mm7, %%mm3    \n\t" 
  575             "psrlq         $5, %%mm1    \n\t" 
  576             "psrlq         $5, %%mm4    \n\t" 
  577             "pand       %%mm6, %%mm1    \n\t" 
  578             "pand       %%mm6, %%mm4    \n\t" 
  579             "psrlq        $19, %%mm2    \n\t" 
  580             "psrlq        $19, %%mm5    \n\t" 
  581             "pand          %2, %%mm2    \n\t" 
  582             "pand          %2, %%mm5    \n\t" 
  583             "por        %%mm1, %%mm0    \n\t" 
  584             "por        %%mm4, %%mm3    \n\t" 
  585             "por        %%mm2, %%mm0    \n\t" 
  586             "por        %%mm5, %%mm3    \n\t" 
  587             "psllq        $16, %%mm3    \n\t" 
  588             "por        %%mm3, %%mm0    \n\t" 
  590             ::
"r"(
d),
"r"(
s),
"m"(blue_16mask):
"memory");
 
  600         *
d++ = (
b>>3) | ((
g&0xFC)<<3) | ((
r&0xF8)<<8);
 
  606     const uint8_t *
s = 
src;
 
  608     const uint8_t *mm_end;
 
  609     uint16_t *
d = (uint16_t *)dst;
 
  613         "movq          %0, %%mm7    \n\t" 
  614         "movq          %1, %%mm6    \n\t" 
  615         ::
"m"(red_15mask),
"m"(green_15mask));
 
  620             "movd        (%1), %%mm0    \n\t" 
  621             "movd       3(%1), %%mm3    \n\t" 
  622             "punpckldq  6(%1), %%mm0    \n\t" 
  623             "punpckldq  9(%1), %%mm3    \n\t" 
  624             "movq       %%mm0, %%mm1    \n\t" 
  625             "movq       %%mm0, %%mm2    \n\t" 
  626             "movq       %%mm3, %%mm4    \n\t" 
  627             "movq       %%mm3, %%mm5    \n\t" 
  628             "psrlq         $3, %%mm0    \n\t" 
  629             "psrlq         $3, %%mm3    \n\t" 
  630             "pand          %2, %%mm0    \n\t" 
  631             "pand          %2, %%mm3    \n\t" 
  632             "psrlq         $6, %%mm1    \n\t" 
  633             "psrlq         $6, %%mm4    \n\t" 
  634             "pand       %%mm6, %%mm1    \n\t" 
  635             "pand       %%mm6, %%mm4    \n\t" 
  636             "psrlq         $9, %%mm2    \n\t" 
  637             "psrlq         $9, %%mm5    \n\t" 
  638             "pand       %%mm7, %%mm2    \n\t" 
  639             "pand       %%mm7, %%mm5    \n\t" 
  640             "por        %%mm1, %%mm0    \n\t" 
  641             "por        %%mm4, %%mm3    \n\t" 
  642             "por        %%mm2, %%mm0    \n\t" 
  643             "por        %%mm5, %%mm3    \n\t" 
  644             "psllq        $16, %%mm3    \n\t" 
  645             "por        %%mm3, %%mm0    \n\t" 
  647             ::
"r"(
d),
"r"(
s),
"m"(blue_15mask):
"memory");
 
  657         *
d++ = (
b>>3) | ((
g&0xF8)<<2) | ((
r&0xF8)<<7);
 
  663     const uint8_t *
s = 
src;
 
  665     const uint8_t *mm_end;
 
  666     uint16_t *
d = (uint16_t *)dst;
 
  670         "movq         %0, %%mm7     \n\t" 
  671         "movq         %1, %%mm6     \n\t" 
  672         ::
"m"(red_15mask),
"m"(green_15mask));
 
  677             "movd       (%1), %%mm0     \n\t" 
  678             "movd      3(%1), %%mm3     \n\t" 
  679             "punpckldq 6(%1), %%mm0     \n\t" 
  680             "punpckldq 9(%1), %%mm3     \n\t" 
  681             "movq      %%mm0, %%mm1     \n\t" 
  682             "movq      %%mm0, %%mm2     \n\t" 
  683             "movq      %%mm3, %%mm4     \n\t" 
  684             "movq      %%mm3, %%mm5     \n\t" 
  685             "psllq        $7, %%mm0     \n\t" 
  686             "psllq        $7, %%mm3     \n\t" 
  687             "pand      %%mm7, %%mm0     \n\t" 
  688             "pand      %%mm7, %%mm3     \n\t" 
  689             "psrlq        $6, %%mm1     \n\t" 
  690             "psrlq        $6, %%mm4     \n\t" 
  691             "pand      %%mm6, %%mm1     \n\t" 
  692             "pand      %%mm6, %%mm4     \n\t" 
  693             "psrlq       $19, %%mm2     \n\t" 
  694             "psrlq       $19, %%mm5     \n\t" 
  695             "pand         %2, %%mm2     \n\t" 
  696             "pand         %2, %%mm5     \n\t" 
  697             "por       %%mm1, %%mm0     \n\t" 
  698             "por       %%mm4, %%mm3     \n\t" 
  699             "por       %%mm2, %%mm0     \n\t" 
  700             "por       %%mm5, %%mm3     \n\t" 
  701             "psllq       $16, %%mm3     \n\t" 
  702             "por       %%mm3, %%mm0     \n\t" 
  704             ::
"r"(
d),
"r"(
s),
"m"(blue_15mask):
"memory");
 
  714         *
d++ = (
b>>3) | ((
g&0xF8)<<2) | ((
r&0xF8)<<7);
 
  721     const uint16_t *mm_end;
 
  723     const uint16_t *
s = (
const uint16_t*)
src;
 
  724     end = 
s + src_size/2;
 
  730             "movq        (%1), %%mm0    \n\t" 
  731             "movq        (%1), %%mm1    \n\t" 
  732             "movq        (%1), %%mm2    \n\t" 
  733             "pand          %2, %%mm0    \n\t" 
  734             "pand          %3, %%mm1    \n\t" 
  735             "pand          %4, %%mm2    \n\t" 
  736             "psllq         $5, %%mm0    \n\t" 
  737             "pmulhw        "MANGLE(mul15_mid)
", %%mm0    \n\t" 
  738             "pmulhw        "MANGLE(mul15_mid)
", %%mm1    \n\t" 
  739             "pmulhw        "MANGLE(mul15_hi)
", %%mm2    \n\t" 
  740             "movq       %%mm0, %%mm3    \n\t" 
  741             "movq       %%mm1, %%mm4    \n\t" 
  742             "movq       %%mm2, %%mm5    \n\t" 
  743             "punpcklwd     %5, %%mm0    \n\t" 
  744             "punpcklwd     %5, %%mm1    \n\t" 
  745             "punpcklwd     %5, %%mm2    \n\t" 
  746             "punpckhwd     %5, %%mm3    \n\t" 
  747             "punpckhwd     %5, %%mm4    \n\t" 
  748             "punpckhwd     %5, %%mm5    \n\t" 
  749             "psllq         $8, %%mm1    \n\t" 
  750             "psllq        $16, %%mm2    \n\t" 
  751             "por        %%mm1, %%mm0    \n\t" 
  752             "por        %%mm2, %%mm0    \n\t" 
  753             "psllq         $8, %%mm4    \n\t" 
  754             "psllq        $16, %%mm5    \n\t" 
  755             "por        %%mm4, %%mm3    \n\t" 
  756             "por        %%mm5, %%mm3    \n\t" 
  758             "movq       %%mm0, %%mm6    \n\t" 
  759             "movq       %%mm3, %%mm7    \n\t" 
  761             "movq       8(%1), %%mm0    \n\t" 
  762             "movq       8(%1), %%mm1    \n\t" 
  763             "movq       8(%1), %%mm2    \n\t" 
  764             "pand          %2, %%mm0    \n\t" 
  765             "pand          %3, %%mm1    \n\t" 
  766             "pand          %4, %%mm2    \n\t" 
  767             "psllq         $5, %%mm0    \n\t" 
  768             "pmulhw        "MANGLE(mul15_mid)
", %%mm0    \n\t" 
  769             "pmulhw        "MANGLE(mul15_mid)
", %%mm1    \n\t" 
  770             "pmulhw        "MANGLE(mul15_hi)
", %%mm2    \n\t" 
  771             "movq       %%mm0, %%mm3    \n\t" 
  772             "movq       %%mm1, %%mm4    \n\t" 
  773             "movq       %%mm2, %%mm5    \n\t" 
  774             "punpcklwd     %5, %%mm0    \n\t" 
  775             "punpcklwd     %5, %%mm1    \n\t" 
  776             "punpcklwd     %5, %%mm2    \n\t" 
  777             "punpckhwd     %5, %%mm3    \n\t" 
  778             "punpckhwd     %5, %%mm4    \n\t" 
  779             "punpckhwd     %5, %%mm5    \n\t" 
  780             "psllq         $8, %%mm1    \n\t" 
  781             "psllq        $16, %%mm2    \n\t" 
  782             "por        %%mm1, %%mm0    \n\t" 
  783             "por        %%mm2, %%mm0    \n\t" 
  784             "psllq         $8, %%mm4    \n\t" 
  785             "psllq        $16, %%mm5    \n\t" 
  786             "por        %%mm4, %%mm3    \n\t" 
  787             "por        %%mm5, %%mm3    \n\t" 
  790             :
"r"(
s),
"m"(mask15b),
"m"(mask15g),
"m"(mask15r), 
"m"(mmx_null)
 
  795             "movq       %%mm0, %%mm4    \n\t" 
  796             "movq       %%mm3, %%mm5    \n\t" 
  797             "movq       %%mm6, %%mm0    \n\t" 
  798             "movq       %%mm7, %%mm1    \n\t" 
  800             "movq       %%mm4, %%mm6    \n\t" 
  801             "movq       %%mm5, %%mm7    \n\t" 
  802             "movq       %%mm0, %%mm2    \n\t" 
  803             "movq       %%mm1, %%mm3    \n\t" 
  816         register uint16_t bgr;
 
  818         *
d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
 
  819         *
d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
 
  820         *
d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
 
  827     const uint16_t *mm_end;
 
  828     uint8_t *
d = (uint8_t *)dst;
 
  829     const uint16_t *
s = (
const uint16_t *)
src;
 
  830     end = 
s + src_size/2;
 
  836             "movq        (%1), %%mm0    \n\t" 
  837             "movq        (%1), %%mm1    \n\t" 
  838             "movq        (%1), %%mm2    \n\t" 
  839             "pand          %2, %%mm0    \n\t" 
  840             "pand          %3, %%mm1    \n\t" 
  841             "pand          %4, %%mm2    \n\t" 
  842             "psllq         $5, %%mm0    \n\t" 
  843             "psrlq         $1, %%mm2    \n\t" 
  844             "pmulhw        "MANGLE(mul15_mid)
", %%mm0    \n\t" 
  845             "pmulhw        "MANGLE(mul16_mid)
", %%mm1    \n\t" 
  846             "pmulhw        "MANGLE(mul15_hi)
", %%mm2    \n\t" 
  847             "movq       %%mm0, %%mm3    \n\t" 
  848             "movq       %%mm1, %%mm4    \n\t" 
  849             "movq       %%mm2, %%mm5    \n\t" 
  850             "punpcklwd     %5, %%mm0    \n\t" 
  851             "punpcklwd     %5, %%mm1    \n\t" 
  852             "punpcklwd     %5, %%mm2    \n\t" 
  853             "punpckhwd     %5, %%mm3    \n\t" 
  854             "punpckhwd     %5, %%mm4    \n\t" 
  855             "punpckhwd     %5, %%mm5    \n\t" 
  856             "psllq         $8, %%mm1    \n\t" 
  857             "psllq        $16, %%mm2    \n\t" 
  858             "por        %%mm1, %%mm0    \n\t" 
  859             "por        %%mm2, %%mm0    \n\t" 
  860             "psllq         $8, %%mm4    \n\t" 
  861             "psllq        $16, %%mm5    \n\t" 
  862             "por        %%mm4, %%mm3    \n\t" 
  863             "por        %%mm5, %%mm3    \n\t" 
  865             "movq       %%mm0, %%mm6    \n\t" 
  866             "movq       %%mm3, %%mm7    \n\t" 
  868             "movq       8(%1), %%mm0    \n\t" 
  869             "movq       8(%1), %%mm1    \n\t" 
  870             "movq       8(%1), %%mm2    \n\t" 
  871             "pand          %2, %%mm0    \n\t" 
  872             "pand          %3, %%mm1    \n\t" 
  873             "pand          %4, %%mm2    \n\t" 
  874             "psllq         $5, %%mm0    \n\t" 
  875             "psrlq         $1, %%mm2    \n\t" 
  876             "pmulhw        "MANGLE(mul15_mid)
", %%mm0    \n\t" 
  877             "pmulhw        "MANGLE(mul16_mid)
", %%mm1    \n\t" 
  878             "pmulhw        "MANGLE(mul15_hi)
", %%mm2    \n\t" 
  879             "movq       %%mm0, %%mm3    \n\t" 
  880             "movq       %%mm1, %%mm4    \n\t" 
  881             "movq       %%mm2, %%mm5    \n\t" 
  882             "punpcklwd     %5, %%mm0    \n\t" 
  883             "punpcklwd     %5, %%mm1    \n\t" 
  884             "punpcklwd     %5, %%mm2    \n\t" 
  885             "punpckhwd     %5, %%mm3    \n\t" 
  886             "punpckhwd     %5, %%mm4    \n\t" 
  887             "punpckhwd     %5, %%mm5    \n\t" 
  888             "psllq         $8, %%mm1    \n\t" 
  889             "psllq        $16, %%mm2    \n\t" 
  890             "por        %%mm1, %%mm0    \n\t" 
  891             "por        %%mm2, %%mm0    \n\t" 
  892             "psllq         $8, %%mm4    \n\t" 
  893             "psllq        $16, %%mm5    \n\t" 
  894             "por        %%mm4, %%mm3    \n\t" 
  895             "por        %%mm5, %%mm3    \n\t" 
  897             :
"r"(
s),
"m"(mask16b),
"m"(mask16g),
"m"(mask16r),
"m"(mmx_null)
 
  902             "movq       %%mm0, %%mm4    \n\t" 
  903             "movq       %%mm3, %%mm5    \n\t" 
  904             "movq       %%mm6, %%mm0    \n\t" 
  905             "movq       %%mm7, %%mm1    \n\t" 
  907             "movq       %%mm4, %%mm6    \n\t" 
  908             "movq       %%mm5, %%mm7    \n\t" 
  909             "movq       %%mm0, %%mm2    \n\t" 
  910             "movq       %%mm1, %%mm3    \n\t" 
  923         register uint16_t bgr;
 
  925         *
d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
 
  926         *
d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
 
  927         *
d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
 
  939     "packuswb   %%mm7, %%mm0    \n\t"  \ 
  940     "packuswb   %%mm7, %%mm1    \n\t"  \ 
  941     "packuswb   %%mm7, %%mm2    \n\t"  \ 
  942     "punpcklbw  %%mm1, %%mm0    \n\t"  \ 
  943     "punpcklbw  %%mm6, %%mm2    \n\t"  \ 
  944     "movq       %%mm0, %%mm3    \n\t"                               \ 
  945     "punpcklwd  %%mm2, %%mm0    \n\t"  \ 
  946     "punpckhwd  %%mm2, %%mm3    \n\t"  \ 
  947     MOVNTQ"     %%mm0,  (%0)    \n\t"                               \ 
  948     MOVNTQ"     %%mm3, 8(%0)    \n\t"                               \ 
  953     const uint16_t *mm_end;
 
  955     const uint16_t *
s = (
const uint16_t *)
src;
 
  956     end = 
s + src_size/2;
 
  958     __asm__ volatile(
"pxor    %%mm7,%%mm7    \n\t":::
"memory");
 
  959     __asm__ volatile(
"pcmpeqd %%mm6,%%mm6    \n\t":::
"memory");
 
  964             "movq        (%1), %%mm0    \n\t" 
  965             "movq        (%1), %%mm1    \n\t" 
  966             "movq        (%1), %%mm2    \n\t" 
  967             "pand          %2, %%mm0    \n\t" 
  968             "pand          %3, %%mm1    \n\t" 
  969             "pand          %4, %%mm2    \n\t" 
  970             "psllq         $5, %%mm0    \n\t" 
  971             "pmulhw        %5, %%mm0    \n\t" 
  972             "pmulhw        %5, %%mm1    \n\t" 
  973             "pmulhw        "MANGLE(mul15_hi)
", %%mm2    \n\t" 
  975             ::
"r"(
d),
"r"(
s),
"m"(mask15b),
"m"(mask15g),
"m"(mask15r) ,
"m"(mul15_mid)
 
  984         register uint16_t bgr;
 
  986         *
d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
 
  987         *
d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
 
  988         *
d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
 
  996     const uint16_t *mm_end;
 
  998     const uint16_t *
s = (
const uint16_t*)
src;
 
  999     end = 
s + src_size/2;
 
 1001     __asm__ volatile(
"pxor    %%mm7,%%mm7    \n\t":::
"memory");
 
 1002     __asm__ volatile(
"pcmpeqd %%mm6,%%mm6    \n\t":::
"memory");
 
 1004     while (
s < mm_end) {
 
 1007             "movq        (%1), %%mm0    \n\t" 
 1008             "movq        (%1), %%mm1    \n\t" 
 1009             "movq        (%1), %%mm2    \n\t" 
 1010             "pand          %2, %%mm0    \n\t" 
 1011             "pand          %3, %%mm1    \n\t" 
 1012             "pand          %4, %%mm2    \n\t" 
 1013             "psllq         $5, %%mm0    \n\t" 
 1014             "psrlq         $1, %%mm2    \n\t" 
 1015             "pmulhw        %5, %%mm0    \n\t" 
 1016             "pmulhw        "MANGLE(mul16_mid)
", %%mm1    \n\t" 
 1017             "pmulhw        "MANGLE(mul15_hi)
", %%mm2    \n\t" 
 1019             ::
"r"(
d),
"r"(
s),
"m"(mask16b),
"m"(mask16g),
"m"(mask16r),
"m"(mul15_mid)
 
 1028         register uint16_t bgr;
 
 1030         *
d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
 
 1031         *
d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
 
 1032         *
d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
 
 1040     x86_reg mmx_size= 23 - src_size;
 
 1042         "test             %%"FF_REG_a
", %%"FF_REG_a
"    \n\t" 
 1044         "movq     "MANGLE(mask24r)
", %%mm5              \n\t" 
 1045         "movq     "MANGLE(mask24g)
", %%mm6              \n\t" 
 1046         "movq     "MANGLE(mask24b)
", %%mm7              \n\t" 
 1049         PREFETCH" 32(%1, %%"FF_REG_a
")                  \n\t" 
 1050         "movq    (%1, %%"FF_REG_a
"), %%mm0              \n\t"  
 1051         "movq    (%1, %%"FF_REG_a
"), %%mm1              \n\t"  
 1052         "movq   2(%1, %%"FF_REG_a
"), %%mm2              \n\t"  
 1053         "psllq                  $16, %%mm0              \n\t"  
 1054         "pand                 %%mm5, %%mm0              \n\t" 
 1055         "pand                 %%mm6, %%mm1              \n\t" 
 1056         "pand                 %%mm7, %%mm2              \n\t" 
 1057         "por                  %%mm0, %%mm1              \n\t" 
 1058         "por                  %%mm2, %%mm1              \n\t" 
 1059         "movq   6(%1, %%"FF_REG_a
"), %%mm0              \n\t"  
 1060         MOVNTQ"               %%mm1,(%2, %%"FF_REG_a
")  \n\t"  
 1061         "movq   8(%1, %%"FF_REG_a
"), %%mm1              \n\t"  
 1062         "movq  10(%1, %%"FF_REG_a
"), %%mm2              \n\t"  
 1063         "pand                 %%mm7, %%mm0              \n\t" 
 1064         "pand                 %%mm5, %%mm1              \n\t" 
 1065         "pand                 %%mm6, %%mm2              \n\t" 
 1066         "por                  %%mm0, %%mm1              \n\t" 
 1067         "por                  %%mm2, %%mm1              \n\t" 
 1068         "movq  14(%1, %%"FF_REG_a
"), %%mm0              \n\t"  
 1069         MOVNTQ"               %%mm1, 8(%2, %%"FF_REG_a
")\n\t"  
 1070         "movq  16(%1, %%"FF_REG_a
"), %%mm1              \n\t"  
 1071         "movq  18(%1, %%"FF_REG_a
"), %%mm2              \n\t"  
 1072         "pand                 %%mm6, %%mm0              \n\t" 
 1073         "pand                 %%mm7, %%mm1              \n\t" 
 1074         "pand                 %%mm5, %%mm2              \n\t" 
 1075         "por                  %%mm0, %%mm1              \n\t" 
 1076         "por                  %%mm2, %%mm1              \n\t" 
 1077         MOVNTQ"               %%mm1, 16(%2, %%"FF_REG_a
") \n\t" 
 1078         "add                    $24, %%"FF_REG_a
"       \n\t" 
 1082         : 
"r" (
src-mmx_size), 
"r"(dst-mmx_size)
 
 1089     if (mmx_size==23) 
return; 
 
 1093     src_size= 23-mmx_size;
 
 1096     for (
i=0; 
i<src_size; 
i+=3) {
 
 1099         dst[
i + 1] = 
src[
i + 1];
 
 1100         dst[
i + 2] = 
src[
i + 0];
 
 1105 static inline void RENAME(yuvPlanartoyuy2)(
const uint8_t *ysrc, 
const uint8_t *usrc, 
const uint8_t *vsrc, uint8_t *dst,
 
 1107                                            int lumStride, 
int chromStride, 
int dstStride, 
int vertLumPerChroma)
 
 1111     for (y=0; y<
height; y++) {
 
 1114             "xor                 %%"FF_REG_a
", %%"FF_REG_a
" \n\t" 
 1117             PREFETCH" 32(%1, %%"FF_REG_a
", 2)           \n\t" 
 1118             PREFETCH" 32(%2, %%"FF_REG_a
")              \n\t" 
 1119             PREFETCH" 32(%3, %%"FF_REG_a
")              \n\t" 
 1120             "movq       (%2, %%"FF_REG_a
"), %%mm0       \n\t"  
 1121             "movq                    %%mm0, %%mm2       \n\t"  
 1122             "movq       (%3, %%"FF_REG_a
"), %%mm1       \n\t"  
 1123             "punpcklbw               %%mm1, %%mm0       \n\t"  
 1124             "punpckhbw               %%mm1, %%mm2       \n\t"  
 1126             "movq     (%1, %%"FF_REG_a
",2), %%mm3       \n\t"  
 1127             "movq    8(%1, %%"FF_REG_a
",2), %%mm5       \n\t"  
 1128             "movq                    %%mm3, %%mm4       \n\t"  
 1129             "movq                    %%mm5, %%mm6       \n\t"  
 1130             "punpcklbw               %%mm0, %%mm3       \n\t"  
 1131             "punpckhbw               %%mm0, %%mm4       \n\t"  
 1132             "punpcklbw               %%mm2, %%mm5       \n\t"  
 1133             "punpckhbw               %%mm2, %%mm6       \n\t"  
 1135             MOVNTQ"                  %%mm3,   (%0, %%"FF_REG_a
", 4)    \n\t" 
 1136             MOVNTQ"                  %%mm4,  8(%0, %%"FF_REG_a
", 4)    \n\t" 
 1137             MOVNTQ"                  %%mm5, 16(%0, %%"FF_REG_a
", 4)    \n\t" 
 1138             MOVNTQ"                  %%mm6, 24(%0, %%"FF_REG_a
", 4)    \n\t" 
 1140             "add                        $8, %%"FF_REG_a
" \n\t" 
 1141             "cmp                        %4, %%"FF_REG_a
" \n\t" 
 1143             ::
"r"(dst), 
"r"(ysrc), 
"r"(usrc), 
"r"(vsrc), 
"g" (chromWidth)
 
 1146         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
 
 1147             usrc += chromStride;
 
 1148             vsrc += chromStride;
 
 1162 static inline void RENAME(
yv12toyuy2)(
const uint8_t *ysrc, 
const uint8_t *usrc, 
const uint8_t *vsrc, uint8_t *dst,
 
 1164                                       int lumStride, 
int chromStride, 
int dstStride)
 
 1167     RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, 
width, 
height, lumStride, chromStride, dstStride, 2);
 
 1170 static inline void RENAME(yuvPlanartouyvy)(
const uint8_t *ysrc, 
const uint8_t *usrc, 
const uint8_t *vsrc, uint8_t *dst,
 
 1172                                            int lumStride, 
int chromStride, 
int dstStride, 
int vertLumPerChroma)
 
 1176     for (y=0; y<
height; y++) {
 
 1179             "xor             %%"FF_REG_a
", %%"FF_REG_a
" \n\t" 
 1182             PREFETCH" 32(%1, %%"FF_REG_a
", 2)           \n\t" 
 1183             PREFETCH" 32(%2, %%"FF_REG_a
")              \n\t" 
 1184             PREFETCH" 32(%3, %%"FF_REG_a
")              \n\t" 
 1185             "movq      (%2, %%"FF_REG_a
"), %%mm0        \n\t"  
 1186             "movq                   %%mm0, %%mm2        \n\t"  
 1187             "movq      (%3, %%"FF_REG_a
"), %%mm1        \n\t"  
 1188             "punpcklbw              %%mm1, %%mm0        \n\t"  
 1189             "punpckhbw              %%mm1, %%mm2        \n\t"  
 1191             "movq    (%1, %%"FF_REG_a
",2), %%mm3        \n\t"  
 1192             "movq   8(%1, %%"FF_REG_a
",2), %%mm5        \n\t"  
 1193             "movq                   %%mm0, %%mm4        \n\t"  
 1194             "movq                   %%mm2, %%mm6        \n\t"  
 1195             "punpcklbw              %%mm3, %%mm0        \n\t"  
 1196             "punpckhbw              %%mm3, %%mm4        \n\t"  
 1197             "punpcklbw              %%mm5, %%mm2        \n\t"  
 1198             "punpckhbw              %%mm5, %%mm6        \n\t"  
 1200             MOVNTQ"                 %%mm0,   (%0, %%"FF_REG_a
", 4)     \n\t" 
 1201             MOVNTQ"                 %%mm4,  8(%0, %%"FF_REG_a
", 4)     \n\t" 
 1202             MOVNTQ"                 %%mm2, 16(%0, %%"FF_REG_a
", 4)     \n\t" 
 1203             MOVNTQ"                 %%mm6, 24(%0, %%"FF_REG_a
", 4)     \n\t" 
 1205             "add                       $8, %%"FF_REG_a
" \n\t" 
 1206             "cmp                       %4, %%"FF_REG_a
" \n\t" 
 1208             ::
"r"(dst), 
"r"(ysrc), 
"r"(usrc), 
"r"(vsrc), 
"g" (chromWidth)
 
 1211         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
 
 1212             usrc += chromStride;
 
 1213             vsrc += chromStride;
 
 1227 static inline void RENAME(
yv12touyvy)(
const uint8_t *ysrc, 
const uint8_t *usrc, 
const uint8_t *vsrc, uint8_t *dst,
 
 1229                                       int lumStride, 
int chromStride, 
int dstStride)
 
 1232     RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, 
width, 
height, lumStride, chromStride, dstStride, 2);
 
 1238 static inline void RENAME(
yuv422ptouyvy)(
const uint8_t *ysrc, 
const uint8_t *usrc, 
const uint8_t *vsrc, uint8_t *dst,
 
 1240                                          int lumStride, 
int chromStride, 
int dstStride)
 
 1242     RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, 
width, 
height, lumStride, chromStride, dstStride, 1);
 
 1248 static inline void RENAME(
yuv422ptoyuy2)(
const uint8_t *ysrc, 
const uint8_t *usrc, 
const uint8_t *vsrc, uint8_t *dst,
 
 1250                                          int lumStride, 
int chromStride, 
int dstStride)
 
 1252     RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, 
width, 
height, lumStride, chromStride, dstStride, 1);
 
 1261                                       int lumStride, 
int chromStride, 
int srcStride)
 
 1265     for (y=0; y<
height; y+=2) {
 
 1267             "xor              %%"FF_REG_a
", %%"FF_REG_a
"\n\t" 
 1268             "pcmpeqw                 %%mm7, %%mm7       \n\t" 
 1269             "psrlw                      $8, %%mm7       \n\t"  
 1272             PREFETCH" 64(%0, %%"FF_REG_a
", 4)           \n\t" 
 1273             "movq    (%0, %%"FF_REG_a
", 4), %%mm0       \n\t"  
 1274             "movq   8(%0, %%"FF_REG_a
", 4), %%mm1       \n\t"  
 1275             "movq                    %%mm0, %%mm2       \n\t"  
 1276             "movq                    %%mm1, %%mm3       \n\t"  
 1277             "psrlw                      $8, %%mm0       \n\t"  
 1278             "psrlw                      $8, %%mm1       \n\t"  
 1279             "pand                    %%mm7, %%mm2       \n\t"  
 1280             "pand                    %%mm7, %%mm3       \n\t"  
 1281             "packuswb                %%mm1, %%mm0       \n\t"  
 1282             "packuswb                %%mm3, %%mm2       \n\t"  
 1284             MOVNTQ"                  %%mm2, (%1, %%"FF_REG_a
", 2) \n\t" 
 1286             "movq  16(%0, %%"FF_REG_a
", 4), %%mm1       \n\t"  
 1287             "movq  24(%0, %%"FF_REG_a
", 4), %%mm2       \n\t"  
 1288             "movq                    %%mm1, %%mm3       \n\t"  
 1289             "movq                    %%mm2, %%mm4       \n\t"  
 1290             "psrlw                      $8, %%mm1       \n\t"  
 1291             "psrlw                      $8, %%mm2       \n\t"  
 1292             "pand                    %%mm7, %%mm3       \n\t"  
 1293             "pand                    %%mm7, %%mm4       \n\t"  
 1294             "packuswb                %%mm2, %%mm1       \n\t"  
 1295             "packuswb                %%mm4, %%mm3       \n\t"  
 1297             MOVNTQ"                  %%mm3, 8(%1, %%"FF_REG_a
", 2) \n\t" 
 1299             "movq                    %%mm0, %%mm2       \n\t"  
 1300             "movq                    %%mm1, %%mm3       \n\t"  
 1301             "psrlw                      $8, %%mm0       \n\t"  
 1302             "psrlw                      $8, %%mm1       \n\t"  
 1303             "pand                    %%mm7, %%mm2       \n\t"  
 1304             "pand                    %%mm7, %%mm3       \n\t"  
 1305             "packuswb                %%mm1, %%mm0       \n\t"  
 1306             "packuswb                %%mm3, %%mm2       \n\t"  
 1308             MOVNTQ"                  %%mm0, (%3, %%"FF_REG_a
")     \n\t" 
 1309             MOVNTQ"                  %%mm2, (%2, %%"FF_REG_a
")     \n\t" 
 1311             "add                        $8, %%"FF_REG_a
" \n\t" 
 1312             "cmp                        %4, %%"FF_REG_a
" \n\t" 
 1314             ::
"r"(
src), 
"r"(ydst), 
"r"(udst), 
"r"(vdst), 
"g" (chromWidth)
 
 1315             : 
"memory", 
"%"FF_REG_a
 
 1322             "xor              %%"FF_REG_a
", %%"FF_REG_a
"\n\t" 
 1325             PREFETCH" 64(%0, %%"FF_REG_a
", 4)           \n\t" 
 1326             "movq    (%0, %%"FF_REG_a
", 4), %%mm0       \n\t"  
 1327             "movq   8(%0, %%"FF_REG_a
", 4), %%mm1       \n\t"  
 1328             "movq  16(%0, %%"FF_REG_a
", 4), %%mm2       \n\t"  
 1329             "movq  24(%0, %%"FF_REG_a
", 4), %%mm3       \n\t"  
 1330             "pand                    %%mm7, %%mm0       \n\t"  
 1331             "pand                    %%mm7, %%mm1       \n\t"  
 1332             "pand                    %%mm7, %%mm2       \n\t"  
 1333             "pand                    %%mm7, %%mm3       \n\t"  
 1334             "packuswb                %%mm1, %%mm0       \n\t"  
 1335             "packuswb                %%mm3, %%mm2       \n\t"  
 1337             MOVNTQ"                  %%mm0,  (%1, %%"FF_REG_a
", 2) \n\t" 
 1338             MOVNTQ"                  %%mm2, 8(%1, %%"FF_REG_a
", 2) \n\t" 
 1340             "add                        $8, %%"FF_REG_a
"\n\t" 
 1341             "cmp                        %4, %%"FF_REG_a
"\n\t" 
 1344             ::
"r"(
src), 
"r"(ydst), 
"r"(udst), 
"r"(vdst), 
"g" (chromWidth)
 
 1345             : 
"memory", 
"%"FF_REG_a
 
 1347         udst += chromStride;
 
 1348         vdst += chromStride;
 
 1358 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW 
 1359 static inline void RENAME(
planar2x)(
const uint8_t *
src, uint8_t *dst, 
int srcWidth, 
int srcHeight, 
int srcStride, 
int dstStride)
 
 1366     for (x=0; x<srcWidth-1; x++) {
 
 1367         dst[2*x+1]= (3*
src[x] +   
src[x+1])>>2;
 
 1368         dst[2*x+2]= (  
src[x] + 3*
src[x+1])>>2;
 
 1370     dst[2*srcWidth-1]= 
src[srcWidth-1];
 
 1374     for (y=1; y<srcHeight; y++) {
 
 1375         x86_reg mmxSize= srcWidth&~15;
 
 1379             "mov                       %4, %%"FF_REG_a
" \n\t" 
 1380             "movq        "MANGLE(mmx_ff)
", %%mm0    \n\t" 
 1381             "movq      (%0, %%"FF_REG_a
"), %%mm4    \n\t" 
 1382             "movq                   %%mm4, %%mm2    \n\t" 
 1383             "psllq                     $8, %%mm4    \n\t" 
 1384             "pand                   %%mm0, %%mm2    \n\t" 
 1385             "por                    %%mm2, %%mm4    \n\t" 
 1386             "movq      (%1, %%"FF_REG_a
"), %%mm5    \n\t" 
 1387             "movq                   %%mm5, %%mm3    \n\t" 
 1388             "psllq                     $8, %%mm5    \n\t" 
 1389             "pand                   %%mm0, %%mm3    \n\t" 
 1390             "por                    %%mm3, %%mm5    \n\t" 
 1392             "movq      (%0, %%"FF_REG_a
"), %%mm0    \n\t" 
 1393             "movq      (%1, %%"FF_REG_a
"), %%mm1    \n\t" 
 1394             "movq     1(%0, %%"FF_REG_a
"), %%mm2    \n\t" 
 1395             "movq     1(%1, %%"FF_REG_a
"), %%mm3    \n\t" 
 1396             PAVGB"                  %%mm0, %%mm5    \n\t" 
 1397             PAVGB"                  %%mm0, %%mm3    \n\t" 
 1398             PAVGB"                  %%mm0, %%mm5    \n\t" 
 1399             PAVGB"                  %%mm0, %%mm3    \n\t" 
 1400             PAVGB"                  %%mm1, %%mm4    \n\t" 
 1401             PAVGB"                  %%mm1, %%mm2    \n\t" 
 1402             PAVGB"                  %%mm1, %%mm4    \n\t" 
 1403             PAVGB"                  %%mm1, %%mm2    \n\t" 
 1404             "movq                   %%mm5, %%mm7    \n\t" 
 1405             "movq                   %%mm4, %%mm6    \n\t" 
 1406             "punpcklbw              %%mm3, %%mm5    \n\t" 
 1407             "punpckhbw              %%mm3, %%mm7    \n\t" 
 1408             "punpcklbw              %%mm2, %%mm4    \n\t" 
 1409             "punpckhbw              %%mm2, %%mm6    \n\t" 
 1410             MOVNTQ"                 %%mm5,  (%2, %%"FF_REG_a
", 2)  \n\t" 
 1411             MOVNTQ"                 %%mm7, 8(%2, %%"FF_REG_a
", 2)  \n\t" 
 1412             MOVNTQ"                 %%mm4,  (%3, %%"FF_REG_a
", 2)  \n\t" 
 1413             MOVNTQ"                 %%mm6, 8(%3, %%"FF_REG_a
", 2)  \n\t" 
 1414             "add                       $8, %%"FF_REG_a
"            \n\t" 
 1415             "movq    -1(%0, %%"FF_REG_a
"), %%mm4    \n\t" 
 1416             "movq    -1(%1, %%"FF_REG_a
"), %%mm5    \n\t" 
 1418             :: 
"r" (
src + mmxSize  ), 
"r" (
src + srcStride + mmxSize  ),
 
 1419                "r" (dst + mmxSize*2), 
"r" (dst + dstStride + mmxSize*2),
 
 1426             dst[0]         = (
src[0] * 3 + 
src[srcStride]) >> 2;
 
 1427             dst[dstStride] = (
src[0] + 3 * 
src[srcStride]) >> 2;
 
 1430         for (x=mmxSize-1; x<srcWidth-1; x++) {
 
 1431             dst[2*x          +1]= (3*
src[x+0] +   
src[x+srcStride+1])>>2;
 
 1432             dst[2*x+dstStride+2]= (  
src[x+0] + 3*
src[x+srcStride+1])>>2;
 
 1433             dst[2*x+dstStride+1]= (  
src[x+1] + 3*
src[x+srcStride  ])>>2;
 
 1434             dst[2*x          +2]= (3*
src[x+1] +   
src[x+srcStride  ])>>2;
 
 1436         dst[srcWidth*2 -1            ]= (3*
src[srcWidth-1] +   
src[srcWidth-1 + srcStride])>>2;
 
 1437         dst[srcWidth*2 -1 + dstStride]= (  
src[srcWidth-1] + 3*
src[srcWidth-1 + srcStride])>>2;
 
 1446     for (x=0; x<srcWidth-1; x++) {
 
 1447         dst[2*x+1]= (3*
src[x] +   
src[x+1])>>2;
 
 1448         dst[2*x+2]= (  
src[x] + 3*
src[x+1])>>2;
 
 1450     dst[2*srcWidth-1]= 
src[srcWidth-1];
 
 1458 #if !COMPILE_TEMPLATE_AMD3DNOW 
 1465 static inline void RENAME(uyvytoyv12)(
const uint8_t *
src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 
 1467                                       int lumStride, 
int chromStride, 
int srcStride)
 
 1471     for (y=0; y<
height; y+=2) {
 
 1473             "xor          %%"FF_REG_a
", %%"FF_REG_a
" \n\t" 
 1474             "pcmpeqw             %%mm7, %%mm7   \n\t" 
 1475             "psrlw                  $8, %%mm7   \n\t"  
 1478             PREFETCH" 64(%0, %%"FF_REG_a
", 4)          \n\t" 
 1479             "movq       (%0, %%"FF_REG_a
", 4), %%mm0   \n\t"  
 1480             "movq      8(%0, %%"FF_REG_a
", 4), %%mm1   \n\t"  
 1481             "movq                %%mm0, %%mm2   \n\t"  
 1482             "movq                %%mm1, %%mm3   \n\t"  
 1483             "pand                %%mm7, %%mm0   \n\t"  
 1484             "pand                %%mm7, %%mm1   \n\t"  
 1485             "psrlw                  $8, %%mm2   \n\t"  
 1486             "psrlw                  $8, %%mm3   \n\t"  
 1487             "packuswb            %%mm1, %%mm0   \n\t"  
 1488             "packuswb            %%mm3, %%mm2   \n\t"  
 1490             MOVNTQ"              %%mm2,  (%1, %%"FF_REG_a
", 2) \n\t" 
 1492             "movq     16(%0, %%"FF_REG_a
", 4), %%mm1   \n\t"  
 1493             "movq     24(%0, %%"FF_REG_a
", 4), %%mm2   \n\t"  
 1494             "movq                %%mm1, %%mm3   \n\t"  
 1495             "movq                %%mm2, %%mm4   \n\t"  
 1496             "pand                %%mm7, %%mm1   \n\t"  
 1497             "pand                %%mm7, %%mm2   \n\t"  
 1498             "psrlw                  $8, %%mm3   \n\t"  
 1499             "psrlw                  $8, %%mm4   \n\t"  
 1500             "packuswb            %%mm2, %%mm1   \n\t"  
 1501             "packuswb            %%mm4, %%mm3   \n\t"  
 1503             MOVNTQ"              %%mm3, 8(%1, %%"FF_REG_a
", 2) \n\t" 
 1505             "movq                %%mm0, %%mm2   \n\t"  
 1506             "movq                %%mm1, %%mm3   \n\t"  
 1507             "psrlw                  $8, %%mm0   \n\t"  
 1508             "psrlw                  $8, %%mm1   \n\t"  
 1509             "pand                %%mm7, %%mm2   \n\t"  
 1510             "pand                %%mm7, %%mm3   \n\t"  
 1511             "packuswb            %%mm1, %%mm0   \n\t"  
 1512             "packuswb            %%mm3, %%mm2   \n\t"  
 1514             MOVNTQ"              %%mm0, (%3, %%"FF_REG_a
") \n\t" 
 1515             MOVNTQ"              %%mm2, (%2, %%"FF_REG_a
") \n\t" 
 1517             "add                    $8, %%"FF_REG_a
" \n\t" 
 1518             "cmp                    %4, %%"FF_REG_a
" \n\t" 
 1520             ::
"r"(
src), 
"r"(ydst), 
"r"(udst), 
"r"(vdst), 
"g" (chromWidth)
 
 1521             : 
"memory", 
"%"FF_REG_a
 
 1528             "xor          %%"FF_REG_a
", %%"FF_REG_a
"  \n\t" 
 1531             PREFETCH" 64(%0, %%"FF_REG_a
", 4)         \n\t" 
 1532             "movq       (%0, %%"FF_REG_a
", 4), %%mm0  \n\t"  
 1533             "movq      8(%0, %%"FF_REG_a
", 4), %%mm1  \n\t"  
 1534             "movq     16(%0, %%"FF_REG_a
", 4), %%mm2  \n\t"  
 1535             "movq     24(%0, %%"FF_REG_a
", 4), %%mm3  \n\t"  
 1536             "psrlw                  $8, %%mm0   \n\t"  
 1537             "psrlw                  $8, %%mm1   \n\t"  
 1538             "psrlw                  $8, %%mm2   \n\t"  
 1539             "psrlw                  $8, %%mm3   \n\t"  
 1540             "packuswb            %%mm1, %%mm0   \n\t"  
 1541             "packuswb            %%mm3, %%mm2   \n\t"  
 1543             MOVNTQ"              %%mm0,  (%1, %%"FF_REG_a
", 2) \n\t" 
 1544             MOVNTQ"              %%mm2, 8(%1, %%"FF_REG_a
", 2) \n\t" 
 1546             "add                    $8, %%"FF_REG_a
" \n\t" 
 1547             "cmp                    %4, %%"FF_REG_a
" \n\t" 
 1550             ::
"r"(
src), 
"r"(ydst), 
"r"(udst), 
"r"(vdst), 
"g" (chromWidth)
 
 1551             : 
"memory", 
"%"FF_REG_a
 
 1553         udst += chromStride;
 
 1554         vdst += chromStride;
 
 1572 static inline void RENAME(rgb24toyv12)(
const uint8_t *
src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 
 1574                                        int lumStride, 
int chromStride, 
int srcStride,
 
 1577 #define BGR2Y_IDX "16*4+16*32" 
 1578 #define BGR2U_IDX "16*4+16*33" 
 1579 #define BGR2V_IDX "16*4+16*34" 
 1586         ydst += 2*lumStride;
 
 1587         udst += chromStride;
 
 1588         vdst += chromStride;
 
 1592     for (y=0; y<
height-2; y+=2) {
 
 1594         for (
i=0; 
i<2; 
i++) {
 
 1596                 "mov                        %2, %%"FF_REG_a
"\n\t" 
 1597                 "movq          "BGR2Y_IDX
"(%3), %%mm6       \n\t" 
 1598                 "movq       "MANGLE(ff_w1111)
", %%mm5       \n\t" 
 1599                 "pxor                    %%mm7, %%mm7       \n\t" 
 1600                 "lea (%%"FF_REG_a
", %%"FF_REG_a
", 2), %%"FF_REG_d
" \n\t" 
 1603                 PREFETCH" 64(%0, %%"FF_REG_d
")              \n\t" 
 1604                 "movd       (%0, %%"FF_REG_d
"), %%mm0       \n\t" 
 1605                 "movd      3(%0, %%"FF_REG_d
"), %%mm1       \n\t" 
 1606                 "punpcklbw               %%mm7, %%mm0       \n\t" 
 1607                 "punpcklbw               %%mm7, %%mm1       \n\t" 
 1608                 "movd      6(%0, %%"FF_REG_d
"), %%mm2       \n\t" 
 1609                 "movd      9(%0, %%"FF_REG_d
"), %%mm3       \n\t" 
 1610                 "punpcklbw               %%mm7, %%mm2       \n\t" 
 1611                 "punpcklbw               %%mm7, %%mm3       \n\t" 
 1612                 "pmaddwd                 %%mm6, %%mm0       \n\t" 
 1613                 "pmaddwd                 %%mm6, %%mm1       \n\t" 
 1614                 "pmaddwd                 %%mm6, %%mm2       \n\t" 
 1615                 "pmaddwd                 %%mm6, %%mm3       \n\t" 
 1616                 "psrad                      $8, %%mm0       \n\t" 
 1617                 "psrad                      $8, %%mm1       \n\t" 
 1618                 "psrad                      $8, %%mm2       \n\t" 
 1619                 "psrad                      $8, %%mm3       \n\t" 
 1620                 "packssdw                %%mm1, %%mm0       \n\t" 
 1621                 "packssdw                %%mm3, %%mm2       \n\t" 
 1622                 "pmaddwd                 %%mm5, %%mm0       \n\t" 
 1623                 "pmaddwd                 %%mm5, %%mm2       \n\t" 
 1624                 "packssdw                %%mm2, %%mm0       \n\t" 
 1625                 "psraw                      $7, %%mm0       \n\t" 
 1627                 "movd     12(%0, %%"FF_REG_d
"), %%mm4       \n\t" 
 1628                 "movd     15(%0, %%"FF_REG_d
"), %%mm1       \n\t" 
 1629                 "punpcklbw               %%mm7, %%mm4       \n\t" 
 1630                 "punpcklbw               %%mm7, %%mm1       \n\t" 
 1631                 "movd     18(%0, %%"FF_REG_d
"), %%mm2       \n\t" 
 1632                 "movd     21(%0, %%"FF_REG_d
"), %%mm3       \n\t" 
 1633                 "punpcklbw               %%mm7, %%mm2       \n\t" 
 1634                 "punpcklbw               %%mm7, %%mm3       \n\t" 
 1635                 "pmaddwd                 %%mm6, %%mm4       \n\t" 
 1636                 "pmaddwd                 %%mm6, %%mm1       \n\t" 
 1637                 "pmaddwd                 %%mm6, %%mm2       \n\t" 
 1638                 "pmaddwd                 %%mm6, %%mm3       \n\t" 
 1639                 "psrad                      $8, %%mm4       \n\t" 
 1640                 "psrad                      $8, %%mm1       \n\t" 
 1641                 "psrad                      $8, %%mm2       \n\t" 
 1642                 "psrad                      $8, %%mm3       \n\t" 
 1643                 "packssdw                %%mm1, %%mm4       \n\t" 
 1644                 "packssdw                %%mm3, %%mm2       \n\t" 
 1645                 "pmaddwd                 %%mm5, %%mm4       \n\t" 
 1646                 "pmaddwd                 %%mm5, %%mm2       \n\t" 
 1647                 "add                       $24, %%"FF_REG_d
"\n\t" 
 1648                 "packssdw                %%mm2, %%mm4       \n\t" 
 1649                 "psraw                      $7, %%mm4       \n\t" 
 1651                 "packuswb                %%mm4, %%mm0       \n\t" 
 1652                 "paddusb "MANGLE(ff_bgr2YOffset)
", %%mm0    \n\t" 
 1654                 MOVNTQ"                  %%mm0, (%1, %%"FF_REG_a
") \n\t" 
 1655                 "add                        $8,      %%"FF_REG_a
"  \n\t" 
 1659                 : 
"%"FF_REG_a, 
"%"FF_REG_d
 
 1666             "mov                        %4, %%"FF_REG_a
"\n\t" 
 1667             "movq       "MANGLE(ff_w1111)
", %%mm5       \n\t" 
 1668             "movq          "BGR2U_IDX
"(%5), %%mm6       \n\t" 
 1669             "pxor                    %%mm7, %%mm7       \n\t" 
 1670             "lea (%%"FF_REG_a
", %%"FF_REG_a
", 2), %%"FF_REG_d
" \n\t" 
 1671             "add              %%"FF_REG_d
", %%"FF_REG_d
"\n\t" 
 1674             PREFETCH" 64(%0, %%"FF_REG_d
")              \n\t" 
 1675             PREFETCH" 64(%1, %%"FF_REG_d
")              \n\t" 
 1676 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW 
 1677             "movq       (%0, %%"FF_REG_d
"), %%mm0       \n\t" 
 1678             "movq       (%1, %%"FF_REG_d
"), %%mm1       \n\t" 
 1679             "movq      6(%0, %%"FF_REG_d
"), %%mm2       \n\t" 
 1680             "movq      6(%1, %%"FF_REG_d
"), %%mm3       \n\t" 
 1681             PAVGB"                   %%mm1, %%mm0       \n\t" 
 1682             PAVGB"                   %%mm3, %%mm2       \n\t" 
 1683             "movq                    %%mm0, %%mm1       \n\t" 
 1684             "movq                    %%mm2, %%mm3       \n\t" 
 1685             "psrlq                     $24, %%mm0       \n\t" 
 1686             "psrlq                     $24, %%mm2       \n\t" 
 1687             PAVGB"                   %%mm1, %%mm0       \n\t" 
 1688             PAVGB"                   %%mm3, %%mm2       \n\t" 
 1689             "punpcklbw               %%mm7, %%mm0       \n\t" 
 1690             "punpcklbw               %%mm7, %%mm2       \n\t" 
 1692             "movd       (%0, %%"FF_REG_d
"), %%mm0       \n\t" 
 1693             "movd       (%1, %%"FF_REG_d
"), %%mm1       \n\t" 
 1694             "movd      3(%0, %%"FF_REG_d
"), %%mm2       \n\t" 
 1695             "movd      3(%1, %%"FF_REG_d
"), %%mm3       \n\t" 
 1696             "punpcklbw               %%mm7, %%mm0       \n\t" 
 1697             "punpcklbw               %%mm7, %%mm1       \n\t" 
 1698             "punpcklbw               %%mm7, %%mm2       \n\t" 
 1699             "punpcklbw               %%mm7, %%mm3       \n\t" 
 1700             "paddw                   %%mm1, %%mm0       \n\t" 
 1701             "paddw                   %%mm3, %%mm2       \n\t" 
 1702             "paddw                   %%mm2, %%mm0       \n\t" 
 1703             "movd      6(%0, %%"FF_REG_d
"), %%mm4       \n\t" 
 1704             "movd      6(%1, %%"FF_REG_d
"), %%mm1       \n\t" 
 1705             "movd      9(%0, %%"FF_REG_d
"), %%mm2       \n\t" 
 1706             "movd      9(%1, %%"FF_REG_d
"), %%mm3       \n\t" 
 1707             "punpcklbw               %%mm7, %%mm4       \n\t" 
 1708             "punpcklbw               %%mm7, %%mm1       \n\t" 
 1709             "punpcklbw               %%mm7, %%mm2       \n\t" 
 1710             "punpcklbw               %%mm7, %%mm3       \n\t" 
 1711             "paddw                   %%mm1, %%mm4       \n\t" 
 1712             "paddw                   %%mm3, %%mm2       \n\t" 
 1713             "paddw                   %%mm4, %%mm2       \n\t" 
 1714             "psrlw                      $2, %%mm0       \n\t" 
 1715             "psrlw                      $2, %%mm2       \n\t" 
 1717             "movq          "BGR2V_IDX
"(%5), %%mm1       \n\t" 
 1718             "movq          "BGR2V_IDX
"(%5), %%mm3       \n\t" 
 1720             "pmaddwd                 %%mm0, %%mm1       \n\t" 
 1721             "pmaddwd                 %%mm2, %%mm3       \n\t" 
 1722             "pmaddwd                 %%mm6, %%mm0       \n\t" 
 1723             "pmaddwd                 %%mm6, %%mm2       \n\t" 
 1724             "psrad                      $8, %%mm0       \n\t" 
 1725             "psrad                      $8, %%mm1       \n\t" 
 1726             "psrad                      $8, %%mm2       \n\t" 
 1727             "psrad                      $8, %%mm3       \n\t" 
 1728             "packssdw                %%mm2, %%mm0       \n\t" 
 1729             "packssdw                %%mm3, %%mm1       \n\t" 
 1730             "pmaddwd                 %%mm5, %%mm0       \n\t" 
 1731             "pmaddwd                 %%mm5, %%mm1       \n\t" 
 1732             "packssdw                %%mm1, %%mm0       \n\t"  
 1733             "psraw                      $7, %%mm0       \n\t" 
 1735 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW 
 1736             "movq     12(%0, %%"FF_REG_d
"), %%mm4       \n\t" 
 1737             "movq     12(%1, %%"FF_REG_d
"), %%mm1       \n\t" 
 1738             "movq     18(%0, %%"FF_REG_d
"), %%mm2       \n\t" 
 1739             "movq     18(%1, %%"FF_REG_d
"), %%mm3       \n\t" 
 1740             PAVGB"                   %%mm1, %%mm4       \n\t" 
 1741             PAVGB"                   %%mm3, %%mm2       \n\t" 
 1742             "movq                    %%mm4, %%mm1       \n\t" 
 1743             "movq                    %%mm2, %%mm3       \n\t" 
 1744             "psrlq                     $24, %%mm4       \n\t" 
 1745             "psrlq                     $24, %%mm2       \n\t" 
 1746             PAVGB"                   %%mm1, %%mm4       \n\t" 
 1747             PAVGB"                   %%mm3, %%mm2       \n\t" 
 1748             "punpcklbw               %%mm7, %%mm4       \n\t" 
 1749             "punpcklbw               %%mm7, %%mm2       \n\t" 
 1751             "movd     12(%0, %%"FF_REG_d
"), %%mm4       \n\t" 
 1752             "movd     12(%1, %%"FF_REG_d
"), %%mm1       \n\t" 
 1753             "movd     15(%0, %%"FF_REG_d
"), %%mm2       \n\t" 
 1754             "movd     15(%1, %%"FF_REG_d
"), %%mm3       \n\t" 
 1755             "punpcklbw               %%mm7, %%mm4       \n\t" 
 1756             "punpcklbw               %%mm7, %%mm1       \n\t" 
 1757             "punpcklbw               %%mm7, %%mm2       \n\t" 
 1758             "punpcklbw               %%mm7, %%mm3       \n\t" 
 1759             "paddw                   %%mm1, %%mm4       \n\t" 
 1760             "paddw                   %%mm3, %%mm2       \n\t" 
 1761             "paddw                   %%mm2, %%mm4       \n\t" 
 1762             "movd     18(%0, %%"FF_REG_d
"), %%mm5       \n\t" 
 1763             "movd     18(%1, %%"FF_REG_d
"), %%mm1       \n\t" 
 1764             "movd     21(%0, %%"FF_REG_d
"), %%mm2       \n\t" 
 1765             "movd     21(%1, %%"FF_REG_d
"), %%mm3       \n\t" 
 1766             "punpcklbw               %%mm7, %%mm5       \n\t" 
 1767             "punpcklbw               %%mm7, %%mm1       \n\t" 
 1768             "punpcklbw               %%mm7, %%mm2       \n\t" 
 1769             "punpcklbw               %%mm7, %%mm3       \n\t" 
 1770             "paddw                   %%mm1, %%mm5       \n\t" 
 1771             "paddw                   %%mm3, %%mm2       \n\t" 
 1772             "paddw                   %%mm5, %%mm2       \n\t" 
 1773             "movq       "MANGLE(ff_w1111)
", %%mm5       \n\t" 
 1774             "psrlw                      $2, %%mm4       \n\t" 
 1775             "psrlw                      $2, %%mm2       \n\t" 
 1777             "movq          "BGR2V_IDX
"(%5), %%mm1       \n\t" 
 1778             "movq          "BGR2V_IDX
"(%5), %%mm3       \n\t" 
 1780             "pmaddwd                 %%mm4, %%mm1       \n\t" 
 1781             "pmaddwd                 %%mm2, %%mm3       \n\t" 
 1782             "pmaddwd                 %%mm6, %%mm4       \n\t" 
 1783             "pmaddwd                 %%mm6, %%mm2       \n\t" 
 1784             "psrad                      $8, %%mm4       \n\t" 
 1785             "psrad                      $8, %%mm1       \n\t" 
 1786             "psrad                      $8, %%mm2       \n\t" 
 1787             "psrad                      $8, %%mm3       \n\t" 
 1788             "packssdw                %%mm2, %%mm4       \n\t" 
 1789             "packssdw                %%mm3, %%mm1       \n\t" 
 1790             "pmaddwd                 %%mm5, %%mm4       \n\t" 
 1791             "pmaddwd                 %%mm5, %%mm1       \n\t" 
 1792             "add                       $24, %%"FF_REG_d
"\n\t" 
 1793             "packssdw                %%mm1, %%mm4       \n\t"  
 1794             "psraw                      $7, %%mm4       \n\t" 
 1796             "movq                    %%mm0, %%mm1           \n\t" 
 1797             "punpckldq               %%mm4, %%mm0           \n\t" 
 1798             "punpckhdq               %%mm4, %%mm1           \n\t" 
 1799             "packsswb                %%mm1, %%mm0           \n\t" 
 1800             "paddb "MANGLE(ff_bgr2UVOffset)
", %%mm0         \n\t" 
 1801             "movd                    %%mm0, (%2, %%"FF_REG_a
") \n\t" 
 1802             "punpckhdq               %%mm0, %%mm0              \n\t" 
 1803             "movd                    %%mm0, (%3, %%"FF_REG_a
") \n\t" 
 1804             "add                        $4, %%"FF_REG_a
"       \n\t" 
 1806             : : 
"r" (
src+chromWidth*6), 
"r" (
src+srcStride+chromWidth*6), 
"r" (udst+chromWidth), 
"r" (vdst+chromWidth), 
"g" (-chromWidth), 
"r"(
rgb2yuv)
 
 1808             : 
"%"FF_REG_a, 
"%"FF_REG_d
 
 1811         udst += chromStride;
 
 1812         vdst += chromStride;
 
 1825 #if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX 
 1828                                     int src2Stride, 
int dstStride)
 
 1836 #if COMPILE_TEMPLATE_SSE2 
 1837             if (!((((intptr_t)
src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) {
 
 1839             "xor              %%"FF_REG_a
", %%"FF_REG_a
"  \n\t" 
 1841             PREFETCH" 64(%1, %%"FF_REG_a
")          \n\t" 
 1842             PREFETCH" 64(%2, %%"FF_REG_a
")          \n\t" 
 1843             "movdqa  (%1, %%"FF_REG_a
"), %%xmm0     \n\t" 
 1844             "movdqa  (%1, %%"FF_REG_a
"), %%xmm1     \n\t" 
 1845             "movdqa  (%2, %%"FF_REG_a
"), %%xmm2     \n\t" 
 1846             "punpcklbw           %%xmm2, %%xmm0     \n\t" 
 1847             "punpckhbw           %%xmm2, %%xmm1     \n\t" 
 1848             "movntdq             %%xmm0,   (%0, %%"FF_REG_a
", 2) \n\t" 
 1849             "movntdq             %%xmm1, 16(%0, %%"FF_REG_a
", 2) \n\t" 
 1850             "add                    $16, %%"FF_REG_a
"            \n\t" 
 1851             "cmp                     %3, %%"FF_REG_a
"            \n\t" 
 1854             : 
"memory", 
XMM_CLOBBERS(
"xmm0", 
"xmm1", 
"xmm2",) 
"%"FF_REG_a
 
 1859             "xor %%"FF_REG_a
", %%"FF_REG_a
"         \n\t" 
 1861             PREFETCH" 64(%1, %%"FF_REG_a
")          \n\t" 
 1862             PREFETCH" 64(%2, %%"FF_REG_a
")          \n\t" 
 1863             "movq    (%1, %%"FF_REG_a
"), %%mm0      \n\t" 
 1864             "movq   8(%1, %%"FF_REG_a
"), %%mm2      \n\t" 
 1865             "movq                 %%mm0, %%mm1      \n\t" 
 1866             "movq                 %%mm2, %%mm3      \n\t" 
 1867             "movq    (%2, %%"FF_REG_a
"), %%mm4      \n\t" 
 1868             "movq   8(%2, %%"FF_REG_a
"), %%mm5      \n\t" 
 1869             "punpcklbw            %%mm4, %%mm0      \n\t" 
 1870             "punpckhbw            %%mm4, %%mm1      \n\t" 
 1871             "punpcklbw            %%mm5, %%mm2      \n\t" 
 1872             "punpckhbw            %%mm5, %%mm3      \n\t" 
 1873             MOVNTQ"               %%mm0,   (%0, %%"FF_REG_a
", 2) \n\t" 
 1874             MOVNTQ"               %%mm1,  8(%0, %%"FF_REG_a
", 2) \n\t" 
 1875             MOVNTQ"               %%mm2, 16(%0, %%"FF_REG_a
", 2) \n\t" 
 1876             MOVNTQ"               %%mm3, 24(%0, %%"FF_REG_a
", 2) \n\t" 
 1877             "add                    $16, %%"FF_REG_a
"            \n\t" 
 1878             "cmp                     %3, %%"FF_REG_a
"            \n\t" 
 1881             : 
"memory", 
"%"FF_REG_a
 
 1887             dest[2*
w+1] = src2[
w];
 
 1901 #if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL 
 1902 #if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM 
 1903 void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
 
 1904                          const uint8_t *unused,
 
 1905                          const uint8_t *
src1,
 
 1906                          const uint8_t *src2,
 
 1911                                       int dst1Stride, 
int dst2Stride)
 
 1922 #
if !COMPILE_TEMPLATE_SSE2
 
 1932 #if !COMPILE_TEMPLATE_SSE2 
 1933 #if !COMPILE_TEMPLATE_AMD3DNOW 
 1935                                        uint8_t *dst1, uint8_t *dst2,
 
 1937                                        int srcStride1, 
int srcStride2,
 
 1938                                        int dstStride1, 
int dstStride2)
 
 1946         ::
"m"(*(
src1+srcStride1)),
"m"(*(src2+srcStride2)):
"memory");
 
 1948         const uint8_t* 
s1=
src1+srcStride1*(y>>1);
 
 1949         uint8_t* 
d=dst1+dstStride1*y;
 
 1951         for (;x<
w-31;x+=32) {
 
 1954                 "movq         (%1,%2), %%mm0 \n\t" 
 1955                 "movq        8(%1,%2), %%mm2 \n\t" 
 1956                 "movq       16(%1,%2), %%mm4 \n\t" 
 1957                 "movq       24(%1,%2), %%mm6 \n\t" 
 1958                 "movq      %%mm0, %%mm1 \n\t" 
 1959                 "movq      %%mm2, %%mm3 \n\t" 
 1960                 "movq      %%mm4, %%mm5 \n\t" 
 1961                 "movq      %%mm6, %%mm7 \n\t" 
 1962                 "punpcklbw %%mm0, %%mm0 \n\t" 
 1963                 "punpckhbw %%mm1, %%mm1 \n\t" 
 1964                 "punpcklbw %%mm2, %%mm2 \n\t" 
 1965                 "punpckhbw %%mm3, %%mm3 \n\t" 
 1966                 "punpcklbw %%mm4, %%mm4 \n\t" 
 1967                 "punpckhbw %%mm5, %%mm5 \n\t" 
 1968                 "punpcklbw %%mm6, %%mm6 \n\t" 
 1969                 "punpckhbw %%mm7, %%mm7 \n\t" 
 1970                 MOVNTQ"    %%mm0,   (%0,%2,2)  \n\t" 
 1971                 MOVNTQ"    %%mm1,  8(%0,%2,2)  \n\t" 
 1972                 MOVNTQ"    %%mm2, 16(%0,%2,2)  \n\t" 
 1973                 MOVNTQ"    %%mm3, 24(%0,%2,2)  \n\t" 
 1974                 MOVNTQ"    %%mm4, 32(%0,%2,2)  \n\t" 
 1975                 MOVNTQ"    %%mm5, 40(%0,%2,2)  \n\t" 
 1976                 MOVNTQ"    %%mm6, 48(%0,%2,2)  \n\t" 
 1977                 MOVNTQ"    %%mm7, 56(%0,%2,2)" 
 1978                 :: 
"r"(
d), 
"r"(
s1), 
"r"(x)
 
 1981         for (;x<
w;x++) 
d[2*x]=
d[2*x+1]=
s1[x];
 
 1984         const uint8_t* 
s2=src2+srcStride2*(y>>1);
 
 1985         uint8_t* 
d=dst2+dstStride2*y;
 
 1987         for (;x<
w-31;x+=32) {
 
 1990                 "movq         (%1,%2), %%mm0 \n\t" 
 1991                 "movq        8(%1,%2), %%mm2 \n\t" 
 1992                 "movq       16(%1,%2), %%mm4 \n\t" 
 1993                 "movq       24(%1,%2), %%mm6 \n\t" 
 1994                 "movq      %%mm0, %%mm1 \n\t" 
 1995                 "movq      %%mm2, %%mm3 \n\t" 
 1996                 "movq      %%mm4, %%mm5 \n\t" 
 1997                 "movq      %%mm6, %%mm7 \n\t" 
 1998                 "punpcklbw %%mm0, %%mm0 \n\t" 
 1999                 "punpckhbw %%mm1, %%mm1 \n\t" 
 2000                 "punpcklbw %%mm2, %%mm2 \n\t" 
 2001                 "punpckhbw %%mm3, %%mm3 \n\t" 
 2002                 "punpcklbw %%mm4, %%mm4 \n\t" 
 2003                 "punpckhbw %%mm5, %%mm5 \n\t" 
 2004                 "punpcklbw %%mm6, %%mm6 \n\t" 
 2005                 "punpckhbw %%mm7, %%mm7 \n\t" 
 2006                 MOVNTQ"    %%mm0,   (%0,%2,2)  \n\t" 
 2007                 MOVNTQ"    %%mm1,  8(%0,%2,2)  \n\t" 
 2008                 MOVNTQ"    %%mm2, 16(%0,%2,2)  \n\t" 
 2009                 MOVNTQ"    %%mm3, 24(%0,%2,2)  \n\t" 
 2010                 MOVNTQ"    %%mm4, 32(%0,%2,2)  \n\t" 
 2011                 MOVNTQ"    %%mm5, 40(%0,%2,2)  \n\t" 
 2012                 MOVNTQ"    %%mm6, 48(%0,%2,2)  \n\t" 
 2013                 MOVNTQ"    %%mm7, 56(%0,%2,2)" 
 2014                 :: 
"r"(
d), 
"r"(
s2), 
"r"(x)
 
 2017         for (;x<
w;x++) 
d[2*x]=
d[2*x+1]=
s2[x];
 
 2029                                         int srcStride1, 
int srcStride2,
 
 2030                                         int srcStride3, 
int dstStride)
 
 2036         const uint8_t* yp=
src1+srcStride1*y;
 
 2037         const uint8_t* up=src2+srcStride2*(y>>2);
 
 2038         const uint8_t* vp=src3+srcStride3*(y>>2);
 
 2039         uint8_t* 
d=dst+dstStride*y;
 
 2046                 "movq      (%1, %0, 4), %%mm0   \n\t"  
 2047                 "movq         (%2, %0), %%mm1   \n\t"  
 2048                 "movq         (%3, %0), %%mm2   \n\t"  
 2049                 "movq            %%mm0, %%mm3   \n\t"  
 2050                 "movq            %%mm1, %%mm4   \n\t"  
 2051                 "movq            %%mm2, %%mm5   \n\t"  
 2052                 "punpcklbw       %%mm1, %%mm1   \n\t"  
 2053                 "punpcklbw       %%mm2, %%mm2   \n\t"  
 2054                 "punpckhbw       %%mm4, %%mm4   \n\t"  
 2055                 "punpckhbw       %%mm5, %%mm5   \n\t"  
 2057                 "movq            %%mm1, %%mm6   \n\t" 
 2058                 "punpcklbw       %%mm2, %%mm1   \n\t"  
 2059                 "punpcklbw       %%mm1, %%mm0   \n\t"  
 2060                 "punpckhbw       %%mm1, %%mm3   \n\t"  
 2061                 MOVNTQ"          %%mm0,  (%4, %0, 8)    \n\t" 
 2062                 MOVNTQ"          %%mm3, 8(%4, %0, 8)    \n\t" 
 2064                 "punpckhbw       %%mm2, %%mm6   \n\t"  
 2065                 "movq     8(%1, %0, 4), %%mm0   \n\t" 
 2066                 "movq            %%mm0, %%mm3   \n\t" 
 2067                 "punpcklbw       %%mm6, %%mm0   \n\t"  
 2068                 "punpckhbw       %%mm6, %%mm3   \n\t"  
 2069                 MOVNTQ"          %%mm0, 16(%4, %0, 8)   \n\t" 
 2070                 MOVNTQ"          %%mm3, 24(%4, %0, 8)   \n\t" 
 2072                 "movq            %%mm4, %%mm6   \n\t" 
 2073                 "movq    16(%1, %0, 4), %%mm0   \n\t" 
 2074                 "movq            %%mm0, %%mm3   \n\t" 
 2075                 "punpcklbw       %%mm5, %%mm4   \n\t" 
 2076                 "punpcklbw       %%mm4, %%mm0   \n\t"  
 2077                 "punpckhbw       %%mm4, %%mm3   \n\t"  
 2078                 MOVNTQ"          %%mm0, 32(%4, %0, 8)   \n\t" 
 2079                 MOVNTQ"          %%mm3, 40(%4, %0, 8)   \n\t" 
 2081                 "punpckhbw       %%mm5, %%mm6   \n\t" 
 2082                 "movq    24(%1, %0, 4), %%mm0   \n\t" 
 2083                 "movq            %%mm0, %%mm3   \n\t" 
 2084                 "punpcklbw       %%mm6, %%mm0   \n\t"  
 2085                 "punpckhbw       %%mm6, %%mm3   \n\t"  
 2086                 MOVNTQ"          %%mm0, 48(%4, %0, 8)   \n\t" 
 2087                 MOVNTQ"          %%mm3, 56(%4, %0, 8)   \n\t" 
 2090                 : 
"r"(yp), 
"r" (up), 
"r"(vp), 
"r"(
d)
 
 2094             const int x2 = x<<2;
 
 2097             d[8*x+2] = yp[x2+1];
 
 2099             d[8*x+4] = yp[x2+2];
 
 2101             d[8*x+6] = yp[x2+3];
 
 2122             "pcmpeqw       %%mm7, %%mm7        \n\t" 
 2123             "psrlw            $8, %%mm7        \n\t" 
 2125             "movq -30(%1, %0, 2), %%mm0        \n\t" 
 2126             "movq -22(%1, %0, 2), %%mm1        \n\t" 
 2127             "movq -14(%1, %0, 2), %%mm2        \n\t" 
 2128             "movq  -6(%1, %0, 2), %%mm3        \n\t" 
 2129             "pand          %%mm7, %%mm0        \n\t" 
 2130             "pand          %%mm7, %%mm1        \n\t" 
 2131             "pand          %%mm7, %%mm2        \n\t" 
 2132             "pand          %%mm7, %%mm3        \n\t" 
 2133             "packuswb      %%mm1, %%mm0        \n\t" 
 2134             "packuswb      %%mm3, %%mm2        \n\t" 
 2135             MOVNTQ"        %%mm0,-15(%2, %0)   \n\t" 
 2136             MOVNTQ"        %%mm2,- 7(%2, %0)   \n\t" 
 2140             : 
"r"(
src), 
"r"(dst)
 
 2145         dst[count]= 
src[2*count];
 
 2160             "pcmpeqw       %%mm7, %%mm7        \n\t" 
 2161             "psrlw            $8, %%mm7        \n\t" 
 2163             "movq -32(%1, %0, 2), %%mm0        \n\t" 
 2164             "movq -24(%1, %0, 2), %%mm1        \n\t" 
 2165             "movq -16(%1, %0, 2), %%mm2        \n\t" 
 2166             "movq  -8(%1, %0, 2), %%mm3        \n\t" 
 2167             "pand          %%mm7, %%mm0        \n\t" 
 2168             "pand          %%mm7, %%mm1        \n\t" 
 2169             "pand          %%mm7, %%mm2        \n\t" 
 2170             "pand          %%mm7, %%mm3        \n\t" 
 2171             "packuswb      %%mm1, %%mm0        \n\t" 
 2172             "packuswb      %%mm3, %%mm2        \n\t" 
 2173             MOVNTQ"        %%mm0,-16(%2, %0)   \n\t" 
 2174             MOVNTQ"        %%mm2,- 8(%2, %0)   \n\t" 
 2178             : 
"r"(
src), 
"r"(dst)
 
 2183         dst[count]= 
src[2*count];
 
 2188 #if !COMPILE_TEMPLATE_AMD3DNOW 
 2189 static void RENAME(extract_even2)(
const uint8_t *
src, uint8_t *dst0, uint8_t *dst1, 
x86_reg count)
 
 2198             "pcmpeqw       %%mm7, %%mm7        \n\t" 
 2199             "psrlw            $8, %%mm7        \n\t" 
 2201             "movq -28(%1, %0, 4), %%mm0        \n\t" 
 2202             "movq -20(%1, %0, 4), %%mm1        \n\t" 
 2203             "movq -12(%1, %0, 4), %%mm2        \n\t" 
 2204             "movq  -4(%1, %0, 4), %%mm3        \n\t" 
 2205             "pand          %%mm7, %%mm0        \n\t" 
 2206             "pand          %%mm7, %%mm1        \n\t" 
 2207             "pand          %%mm7, %%mm2        \n\t" 
 2208             "pand          %%mm7, %%mm3        \n\t" 
 2209             "packuswb      %%mm1, %%mm0        \n\t" 
 2210             "packuswb      %%mm3, %%mm2        \n\t" 
 2211             "movq          %%mm0, %%mm1        \n\t" 
 2212             "movq          %%mm2, %%mm3        \n\t" 
 2213             "psrlw            $8, %%mm0        \n\t" 
 2214             "psrlw            $8, %%mm2        \n\t" 
 2215             "pand          %%mm7, %%mm1        \n\t" 
 2216             "pand          %%mm7, %%mm3        \n\t" 
 2217             "packuswb      %%mm2, %%mm0        \n\t" 
 2218             "packuswb      %%mm3, %%mm1        \n\t" 
 2219             MOVNTQ"        %%mm0,- 7(%3, %0)   \n\t" 
 2220             MOVNTQ"        %%mm1,- 7(%2, %0)   \n\t" 
 2224             : 
"r"(
src), 
"r"(dst0), 
"r"(dst1)
 
 2229         dst0[count]= 
src[4*count+0];
 
 2230         dst1[count]= 
src[4*count+2];
 
 2236 static void RENAME(extract_even2avg)(
const uint8_t *
src0, 
const uint8_t *
src1, uint8_t *dst0, uint8_t *dst1, 
x86_reg count)
 
 2247             "pcmpeqw        %%mm7, %%mm7        \n\t" 
 2248             "psrlw             $8, %%mm7        \n\t" 
 2250             "movq  -28(%1, %0, 4), %%mm0        \n\t" 
 2251             "movq  -20(%1, %0, 4), %%mm1        \n\t" 
 2252             "movq  -12(%1, %0, 4), %%mm2        \n\t" 
 2253             "movq   -4(%1, %0, 4), %%mm3        \n\t" 
 2254             PAVGB" -28(%2, %0, 4), %%mm0        \n\t" 
 2255             PAVGB" -20(%2, %0, 4), %%mm1        \n\t" 
 2256             PAVGB" -12(%2, %0, 4), %%mm2        \n\t" 
 2257             PAVGB" - 4(%2, %0, 4), %%mm3        \n\t" 
 2258             "pand           %%mm7, %%mm0        \n\t" 
 2259             "pand           %%mm7, %%mm1        \n\t" 
 2260             "pand           %%mm7, %%mm2        \n\t" 
 2261             "pand           %%mm7, %%mm3        \n\t" 
 2262             "packuswb       %%mm1, %%mm0        \n\t" 
 2263             "packuswb       %%mm3, %%mm2        \n\t" 
 2264             "movq           %%mm0, %%mm1        \n\t" 
 2265             "movq           %%mm2, %%mm3        \n\t" 
 2266             "psrlw             $8, %%mm0        \n\t" 
 2267             "psrlw             $8, %%mm2        \n\t" 
 2268             "pand           %%mm7, %%mm1        \n\t" 
 2269             "pand           %%mm7, %%mm3        \n\t" 
 2270             "packuswb       %%mm2, %%mm0        \n\t" 
 2271             "packuswb       %%mm3, %%mm1        \n\t" 
 2272             MOVNTQ"         %%mm0,- 7(%4, %0)   \n\t" 
 2273             MOVNTQ"         %%mm1,- 7(%3, %0)   \n\t" 
 2277             : 
"r"(
src0), 
"r"(
src1), 
"r"(dst0), 
"r"(dst1)
 
 2283         dst0[count]= (
src0[4*count+0]+
src1[4*count+0])>>1;
 
 2284         dst1[count]= (
src0[4*count+2]+
src1[4*count+2])>>1;
 
 2289 #if !COMPILE_TEMPLATE_AMD3DNOW 
 2290 static void RENAME(extract_odd2)(
const uint8_t *
src, uint8_t *dst0, uint8_t *dst1, 
x86_reg count)
 
 2299             "pcmpeqw       %%mm7, %%mm7        \n\t" 
 2300             "psrlw            $8, %%mm7        \n\t" 
 2302             "movq -28(%1, %0, 4), %%mm0        \n\t" 
 2303             "movq -20(%1, %0, 4), %%mm1        \n\t" 
 2304             "movq -12(%1, %0, 4), %%mm2        \n\t" 
 2305             "movq  -4(%1, %0, 4), %%mm3        \n\t" 
 2306             "psrlw            $8, %%mm0        \n\t" 
 2307             "psrlw            $8, %%mm1        \n\t" 
 2308             "psrlw            $8, %%mm2        \n\t" 
 2309             "psrlw            $8, %%mm3        \n\t" 
 2310             "packuswb      %%mm1, %%mm0        \n\t" 
 2311             "packuswb      %%mm3, %%mm2        \n\t" 
 2312             "movq          %%mm0, %%mm1        \n\t" 
 2313             "movq          %%mm2, %%mm3        \n\t" 
 2314             "psrlw            $8, %%mm0        \n\t" 
 2315             "psrlw            $8, %%mm2        \n\t" 
 2316             "pand          %%mm7, %%mm1        \n\t" 
 2317             "pand          %%mm7, %%mm3        \n\t" 
 2318             "packuswb      %%mm2, %%mm0        \n\t" 
 2319             "packuswb      %%mm3, %%mm1        \n\t" 
 2320             MOVNTQ"        %%mm0,- 7(%3, %0)   \n\t" 
 2321             MOVNTQ"        %%mm1,- 7(%2, %0)   \n\t" 
 2325             : 
"r"(
src), 
"r"(dst0), 
"r"(dst1)
 
 2331         dst0[count]= 
src[4*count+0];
 
 2332         dst1[count]= 
src[4*count+2];
 
 2338 static void RENAME(extract_odd2avg)(
const uint8_t *
src0, 
const uint8_t *
src1, uint8_t *dst0, uint8_t *dst1, 
x86_reg count)
 
 2349             "pcmpeqw        %%mm7, %%mm7        \n\t" 
 2350             "psrlw             $8, %%mm7        \n\t" 
 2352             "movq  -28(%1, %0, 4), %%mm0        \n\t" 
 2353             "movq  -20(%1, %0, 4), %%mm1        \n\t" 
 2354             "movq  -12(%1, %0, 4), %%mm2        \n\t" 
 2355             "movq   -4(%1, %0, 4), %%mm3        \n\t" 
 2356             PAVGB" -28(%2, %0, 4), %%mm0        \n\t" 
 2357             PAVGB" -20(%2, %0, 4), %%mm1        \n\t" 
 2358             PAVGB" -12(%2, %0, 4), %%mm2        \n\t" 
 2359             PAVGB" - 4(%2, %0, 4), %%mm3        \n\t" 
 2360             "psrlw             $8, %%mm0        \n\t" 
 2361             "psrlw             $8, %%mm1        \n\t" 
 2362             "psrlw             $8, %%mm2        \n\t" 
 2363             "psrlw             $8, %%mm3        \n\t" 
 2364             "packuswb       %%mm1, %%mm0        \n\t" 
 2365             "packuswb       %%mm3, %%mm2        \n\t" 
 2366             "movq           %%mm0, %%mm1        \n\t" 
 2367             "movq           %%mm2, %%mm3        \n\t" 
 2368             "psrlw             $8, %%mm0        \n\t" 
 2369             "psrlw             $8, %%mm2        \n\t" 
 2370             "pand           %%mm7, %%mm1        \n\t" 
 2371             "pand           %%mm7, %%mm3        \n\t" 
 2372             "packuswb       %%mm2, %%mm0        \n\t" 
 2373             "packuswb       %%mm3, %%mm1        \n\t" 
 2374             MOVNTQ"         %%mm0,- 7(%4, %0)   \n\t" 
 2375             MOVNTQ"         %%mm1,- 7(%3, %0)   \n\t" 
 2379             : 
"r"(
src0), 
"r"(
src1), 
"r"(dst0), 
"r"(dst1)
 
 2387         dst0[count]= (
src0[4*count+0]+
src1[4*count+0])>>1;
 
 2388         dst1[count]= (
src0[4*count+2]+
src1[4*count+2])>>1;
 
 2395                                  int lumStride, 
int chromStride, 
int srcStride)
 
 2400     for (y=0; y<
height; y++) {
 
 2403             RENAME(extract_odd2avg)(
src-srcStride, 
src, udst, vdst, chromWidth);
 
 2418 #if !COMPILE_TEMPLATE_AMD3DNOW 
 2421                                  int lumStride, 
int chromStride, 
int srcStride)
 
 2426     for (y=0; y<
height; y++) {
 
 2428         RENAME(extract_odd2)(
src, udst, vdst, chromWidth);
 
 2445                                  int lumStride, 
int chromStride, 
int srcStride)
 
 2450     for (y=0; y<
height; y++) {
 
 2453             RENAME(extract_even2avg)(
src-srcStride, 
src, udst, vdst, chromWidth);
 
 2468 #if !COMPILE_TEMPLATE_AMD3DNOW 
 2471                                  int lumStride, 
int chromStride, 
int srcStride)
 
 2476     for (y=0; y<
height; y++) {
 
 2478         RENAME(extract_even2)(
src, udst, vdst, chromWidth);
 
 2496 #if !COMPILE_TEMPLATE_SSE2 
 2497 #if !COMPILE_TEMPLATE_AMD3DNOW 
 2526 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW 
 2537 #if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX 
 2540 #if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL 
 2541 #if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM 
  
void(* rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
void(* yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride)
Height should be a multiple of 2 and width should be a multiple of 16.
void(* yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride)
Height should be a multiple of 2 and width should be a multiple of 16.
#define NAMED_CONSTRAINTS_ADD(...)
static const char rgb2yuv[]
void(* rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
void(* yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride)
void(* rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
void(* rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
void(* yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride)
Height should be a multiple of 2 and width should be a multiple of 16.
void(* rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size)
void(* ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride, int32_t *rgb2yuv)
Height should be a multiple of 2 and width should be a multiple of 2.
void(* rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
#define AV_CEIL_RSHIFT(a, b)
void(* rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
void(* yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride)
Width should be a multiple of 16.
void(* rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
#define XMM_CLOBBERS(...)
void(* interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, int width, int height, int src1Stride, int src2Stride, int dstStride)
void(* yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, uint8_t *dst, int width, int height, int srcStride1, int srcStride2, int srcStride3, int dstStride)
void(* rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size)
void(* yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride)
void(* rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size)
void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride, int32_t *rgb2yuv)
Height should be a multiple of 2 and width should be a multiple of 2.
void(* rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size)
void(* uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride)
#define i(width, name, range_min, range_max)
void(* rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size)
void(* rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size)
__asm__(".macro        parse_r var r\n\t" "\\var        = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt        \\var\n\t" ".error        \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
void(* deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, int width, int height, int srcStride, int dst1Stride, int dst2Stride)
void(* uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride)
void(* rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size)
void(* rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
void(* rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size)
void(* yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride)
Width should be a multiple of 16.
void(* vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, int width, int height, int srcStride1, int srcStride2, int dstStride1, int dstStride2)
void(* rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
void(* planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride)