libavcodec/x86/vc1dsp_mmx.c File Reference

#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "dsputil_mmx.h"

Go to the source code of this file.

Defines

#define NORMALIZE_MMX(SHIFT)
 Add rounder from mm7 to mm3 and pack result at destination.
#define TRANSFER_DO_PACK
#define TRANSFER_DONT_PACK
#define DO_UNPACK(reg)   "punpcklbw %%mm0, " reg "\n\t"
#define DONT_UNPACK(reg)
#define LOAD_ROUNDER_MMX(ROUND)
 Compute the rounder 32-r or 8-r and unpacks it to mm7.
#define SHIFT2_LINE(OFF, R0, R1, R2, R3)
#define MSPEL_FILTER13_CORE(UNPACK, MOVQ, A1, A2, A3, A4)
#define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4)
#define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4)
#define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4)
#define DECLARE_FUNCTION(a, b)
 Macro to ease bicubic filter interpolation functions declarations.

Functions

 DECLARE_ALIGNED_16 (const uint64_t, ff_pw_9)=0x0009000900090009ULL
static void vc1_put_ver_16b_shift2_mmx (int16_t *dst, const uint8_t *src, x86_reg stride, int rnd, int64_t shift)
 Sacrifying mm6 allows to pipeline loads from src.
static void vc1_put_hor_16b_shift2_mmx (uint8_t *dst, x86_reg stride, const int16_t *src, int rnd)
 Data is already unpacked, so some operations can directly be made from memory.
static void vc1_put_shift2_mmx (uint8_t *dst, const uint8_t *src, x86_reg stride, int rnd, x86_reg offset)
 Purely vertical or horizontal 1/2 shift interpolation.
 DECLARE_ASM_CONST (DECLARE_ASM_CONST(16, DECLARE_ASM_CONST(uint64_t, ff_pw_53)
 Filter coefficients made global to allow access by all 1 or 3 quarter shift interpolation functions.
void ff_put_vc1_mspel_mc00_mmx (uint8_t *dst, const uint8_t *src, int stride, int rnd)
void ff_vc1dsp_init_mmx (DSPContext *dsp, AVCodecContext *avctx)


Define Documentation

#define DECLARE_FUNCTION ( a,
 ) 

Value:

static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
     vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \
}
Macro to ease bicubic filter interpolation functions declarations.

Definition at line 446 of file vc1dsp_mmx.c.

#define DO_UNPACK ( reg   )     "punpcklbw %%mm0, " reg "\n\t"

See also:
MSPEL_FILTER13_CORE for use as UNPACK macro

Definition at line 47 of file vc1dsp_mmx.c.

#define DONT_UNPACK ( reg   ) 

Definition at line 48 of file vc1dsp_mmx.c.

#define LOAD_ROUNDER_MMX ( ROUND   ) 

Value:

"movd      "ROUND", %%mm7         \n\t"    \
     "punpcklwd %%mm7, %%mm7           \n\t"    \
     "punpckldq %%mm7, %%mm7           \n\t"
Compute the rounder 32-r or 8-r and unpacks it to mm7.

Definition at line 51 of file vc1dsp_mmx.c.

Referenced by vc1_put_hor_16b_shift2_mmx(), vc1_put_shift2_mmx(), and vc1_put_ver_16b_shift2_mmx().

#define MSPEL_FILTER13_8B ( NAME,
A1,
A2,
A3,
A4   ) 

Value:

static void                                                             \
vc1_put_## NAME ## _mmx(uint8_t *dst, const uint8_t *src,               \
                        x86_reg stride, int rnd, x86_reg offset)      \
{                                                                       \
    int h = 8;                                                          \
    src -= offset;                                                      \
    rnd = 32-rnd;                                                       \
    __asm__ volatile (                                                      \
        LOAD_ROUNDER_MMX("%6")                                          \
        "movq      "MANGLE(ff_pw_53)", %%mm5       \n\t"                \
        "movq      "MANGLE(ff_pw_18)", %%mm6       \n\t"                \
        ASMALIGN(3)                                                     \
        "1:                        \n\t"                                \
        MSPEL_FILTER13_CORE(DO_UNPACK, "movd   1", A1, A2, A3, A4)      \
        NORMALIZE_MMX("$6")                                             \
        TRANSFER_DO_PACK                                                \
        "add       %5, %1          \n\t"                                \
        "add       %5, %2          \n\t"                                \
        "decl      %0              \n\t"                                \
        "jnz 1b                    \n\t"                                \
        : "+r"(h), "+r" (src),  "+r" (dst)                              \
        : "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd)             \
        : "memory"                                                      \
    );                                                                  \
}

Referenced by DECLARE_ASM_CONST().

#define MSPEL_FILTER13_CORE ( UNPACK,
MOVQ,
A1,
A2,
A3,
A4   ) 

Value:

MOVQ "*0+"A1", %%mm1       \n\t"                           \
     MOVQ "*4+"A1", %%mm2       \n\t"                           \
     UNPACK("%%mm1")                                            \
     UNPACK("%%mm2")                                            \
     "pmullw    "MANGLE(ff_pw_3)", %%mm1\n\t"                   \
     "pmullw    "MANGLE(ff_pw_3)", %%mm2\n\t"                   \
     MOVQ "*0+"A2", %%mm3       \n\t"                           \
     MOVQ "*4+"A2", %%mm4       \n\t"                           \
     UNPACK("%%mm3")                                            \
     UNPACK("%%mm4")                                            \
     "pmullw    %%mm6, %%mm3    \n\t" /* *18 */                 \
     "pmullw    %%mm6, %%mm4    \n\t" /* *18 */                 \
     "psubw     %%mm1, %%mm3    \n\t" /* 18,-3 */               \
     "psubw     %%mm2, %%mm4    \n\t" /* 18,-3 */               \
     MOVQ "*0+"A4", %%mm1       \n\t"                           \
     MOVQ "*4+"A4", %%mm2       \n\t"                           \
     UNPACK("%%mm1")                                            \
     UNPACK("%%mm2")                                            \
     "psllw     $2, %%mm1       \n\t" /* 4* */                  \
     "psllw     $2, %%mm2       \n\t" /* 4* */                  \
     "psubw     %%mm1, %%mm3    \n\t" /* -4,18,-3 */            \
     "psubw     %%mm2, %%mm4    \n\t" /* -4,18,-3 */            \
     MOVQ "*0+"A3", %%mm1       \n\t"                           \
     MOVQ "*4+"A3", %%mm2       \n\t"                           \
     UNPACK("%%mm1")                                            \
     UNPACK("%%mm2")                                            \
     "pmullw    %%mm5, %%mm1    \n\t" /* *53 */                 \
     "pmullw    %%mm5, %%mm2    \n\t" /* *53 */                 \
     "paddw     %%mm1, %%mm3    \n\t" /* 4,53,18,-3 */          \
     "paddw     %%mm2, %%mm4    \n\t"

#define MSPEL_FILTER13_HOR_16B ( NAME,
A1,
A2,
A3,
A4   ) 

Value:

static void                                                             \
vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride,         \
                                 const int16_t *src, int rnd)           \
{                                                                       \
    int h = 8;                                                          \
    src -= 1;                                                           \
    rnd -= (-4+58+13-3)*256; /* Add -256 bias */                        \
    __asm__ volatile(                                                       \
        LOAD_ROUNDER_MMX("%4")                                          \
        "movq      "MANGLE(ff_pw_18)", %%mm6   \n\t"                    \
        "movq      "MANGLE(ff_pw_53)", %%mm5   \n\t"                    \
        ASMALIGN(3)                                                     \
        "1:                        \n\t"                                \
        MSPEL_FILTER13_CORE(DONT_UNPACK, "movq 2", A1, A2, A3, A4)      \
        NORMALIZE_MMX("$7")                                             \
        /* Remove bias */                                               \
        "paddw     "MANGLE(ff_pw_128)", %%mm3  \n\t"                    \
        "paddw     "MANGLE(ff_pw_128)", %%mm4  \n\t"                    \
        TRANSFER_DO_PACK                                                \
        "add       $24, %1         \n\t"                                \
        "add       %3, %2          \n\t"                                \
        "decl      %0              \n\t"                                \
        "jnz 1b                    \n\t"                                \
        : "+r"(h), "+r" (src),  "+r" (dst)                              \
        : "r"(stride), "m"(rnd)                                         \
        : "memory"                                                      \
    );                                                                  \
}

#define MSPEL_FILTER13_VER_16B ( NAME,
A1,
A2,
A3,
A4   ) 

#define NORMALIZE_MMX ( SHIFT   ) 

Value:

"paddw     %%mm7, %%mm3           \n\t" /* +bias-r */      \
     "paddw     %%mm7, %%mm4           \n\t" /* +bias-r */      \
     "psraw     "SHIFT", %%mm3         \n\t"                    \
     "psraw     "SHIFT", %%mm4         \n\t"
Add rounder from mm7 to mm3 and pack result at destination.

Definition at line 32 of file vc1dsp_mmx.c.

Referenced by vc1_put_hor_16b_shift2_mmx(), and vc1_put_shift2_mmx().

#define SHIFT2_LINE ( OFF,
R0,
R1,
R2,
R3   ) 

Value:

"paddw     %%mm"#R2", %%mm"#R1"    \n\t"    \
    "movd      (%0,%3), %%mm"#R0"      \n\t"    \
    "pmullw    %%mm6, %%mm"#R1"        \n\t"    \
    "punpcklbw %%mm0, %%mm"#R0"        \n\t"    \
    "movd      (%0,%2), %%mm"#R3"      \n\t"    \
    "psubw     %%mm"#R0", %%mm"#R1"    \n\t"    \
    "punpcklbw %%mm0, %%mm"#R3"        \n\t"    \
    "paddw     %%mm7, %%mm"#R1"        \n\t"    \
    "psubw     %%mm"#R3", %%mm"#R1"    \n\t"    \
    "psraw     %4, %%mm"#R1"           \n\t"    \
    "movq      %%mm"#R1", "#OFF"(%1)   \n\t"    \
    "add       %2, %0                  \n\t"

Definition at line 56 of file vc1dsp_mmx.c.

Referenced by vc1_put_ver_16b_shift2_mmx().

#define TRANSFER_DO_PACK

Value:

"packuswb  %%mm4, %%mm3           \n\t"    \
     "movq      %%mm3, (%2)            \n\t"

Definition at line 38 of file vc1dsp_mmx.c.

Referenced by vc1_put_hor_16b_shift2_mmx().

#define TRANSFER_DONT_PACK

Value:

"movq      %%mm3, 0(%2)           \n\t"    \
     "movq      %%mm4, 8(%2)           \n\t"

Definition at line 42 of file vc1dsp_mmx.c.


Function Documentation

DECLARE_ALIGNED_16 ( const   uint64_t,
ff_pw_9   
) [pure virtual]

DECLARE_ASM_CONST ( DECLARE_ASM_CONST(  16,
DECLARE_ASM_CONST(  uint64_t,
ff_pw_53   
) [pure virtual]

Filter coefficients made global to allow access by all 1 or 3 quarter shift interpolation functions.

Definition at line 206 of file vc1dsp_mmx.c.

void ff_put_vc1_mspel_mc00_mmx ( uint8_t *  dst,
const uint8_t *  src,
int  stride,
int  rnd 
)

Definition at line 1795 of file dsputil_mmx.c.

Referenced by ff_vc1dsp_init_mmx().

void ff_vc1dsp_init_mmx ( DSPContext dsp,
AVCodecContext avctx 
)

Definition at line 470 of file vc1dsp_mmx.c.

Referenced by dsputil_init_mmx().

static void vc1_put_hor_16b_shift2_mmx ( uint8_t *  dst,
x86_reg  stride,
const int16_t *  src,
int  rnd 
) [static]

Data is already unpacked, so some operations can directly be made from memory.

Definition at line 110 of file vc1dsp_mmx.c.

Referenced by DECLARE_ASM_CONST().

static void vc1_put_shift2_mmx ( uint8_t *  dst,
const uint8_t *  src,
x86_reg  stride,
int  rnd,
x86_reg  offset 
) [static]

Purely vertical or horizontal 1/2 shift interpolation.

Sacrify mm6 for *9 factor.

Definition at line 154 of file vc1dsp_mmx.c.

Referenced by DECLARE_ASM_CONST().

static void vc1_put_ver_16b_shift2_mmx ( int16_t *  dst,
const uint8_t *  src,
x86_reg  stride,
int  rnd,
int64_t  shift 
) [static]

Sacrifying mm6 allows to pipeline loads from src.

Definition at line 73 of file vc1dsp_mmx.c.

Referenced by DECLARE_ASM_CONST().


Generated on Fri Oct 26 02:35:45 2012 for FFmpeg by  doxygen 1.5.8