FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
idctdsp_mmx.c
Go to the documentation of this file.
1 /*
2  * SIMD-optimized IDCT-related routines
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "config.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/x86/asm.h"
28 #include "idctdsp.h"
29 #include "inline_asm.h"
30 
31 #if HAVE_INLINE_ASM
32 
33 void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
34  int line_size)
35 {
36  const int16_t *p;
37  uint8_t *pix;
38 
39  /* read the pixels */
40  p = block;
41  pix = pixels;
42  /* unrolled loop */
43  __asm__ volatile (
44  "movq (%3), %%mm0 \n\t"
45  "movq 8(%3), %%mm1 \n\t"
46  "movq 16(%3), %%mm2 \n\t"
47  "movq 24(%3), %%mm3 \n\t"
48  "movq 32(%3), %%mm4 \n\t"
49  "movq 40(%3), %%mm5 \n\t"
50  "movq 48(%3), %%mm6 \n\t"
51  "movq 56(%3), %%mm7 \n\t"
52  "packuswb %%mm1, %%mm0 \n\t"
53  "packuswb %%mm3, %%mm2 \n\t"
54  "packuswb %%mm5, %%mm4 \n\t"
55  "packuswb %%mm7, %%mm6 \n\t"
56  "movq %%mm0, (%0) \n\t"
57  "movq %%mm2, (%0, %1) \n\t"
58  "movq %%mm4, (%0, %1, 2) \n\t"
59  "movq %%mm6, (%0, %2) \n\t"
60  :: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3),
61  "r" (p)
62  : "memory");
63  pix += line_size * 4;
64  p += 32;
65 
66  // if here would be an exact copy of the code above
67  // compiler would generate some very strange code
68  // thus using "r"
69  __asm__ volatile (
70  "movq (%3), %%mm0 \n\t"
71  "movq 8(%3), %%mm1 \n\t"
72  "movq 16(%3), %%mm2 \n\t"
73  "movq 24(%3), %%mm3 \n\t"
74  "movq 32(%3), %%mm4 \n\t"
75  "movq 40(%3), %%mm5 \n\t"
76  "movq 48(%3), %%mm6 \n\t"
77  "movq 56(%3), %%mm7 \n\t"
78  "packuswb %%mm1, %%mm0 \n\t"
79  "packuswb %%mm3, %%mm2 \n\t"
80  "packuswb %%mm5, %%mm4 \n\t"
81  "packuswb %%mm7, %%mm6 \n\t"
82  "movq %%mm0, (%0) \n\t"
83  "movq %%mm2, (%0, %1) \n\t"
84  "movq %%mm4, (%0, %1, 2) \n\t"
85  "movq %%mm6, (%0, %2) \n\t"
86  :: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3),
87  "r" (p)
88  : "memory");
89 }
90 
91 void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
92  int line_size)
93 {
94  const int16_t *p;
95  uint8_t *pix;
96  int i;
97 
98  /* read the pixels */
99  p = block;
100  pix = pixels;
101  MOVQ_ZERO(mm7);
102  i = 4;
103  do {
104  __asm__ volatile (
105  "movq (%2), %%mm0 \n\t"
106  "movq 8(%2), %%mm1 \n\t"
107  "movq 16(%2), %%mm2 \n\t"
108  "movq 24(%2), %%mm3 \n\t"
109  "movq %0, %%mm4 \n\t"
110  "movq %1, %%mm6 \n\t"
111  "movq %%mm4, %%mm5 \n\t"
112  "punpcklbw %%mm7, %%mm4 \n\t"
113  "punpckhbw %%mm7, %%mm5 \n\t"
114  "paddsw %%mm4, %%mm0 \n\t"
115  "paddsw %%mm5, %%mm1 \n\t"
116  "movq %%mm6, %%mm5 \n\t"
117  "punpcklbw %%mm7, %%mm6 \n\t"
118  "punpckhbw %%mm7, %%mm5 \n\t"
119  "paddsw %%mm6, %%mm2 \n\t"
120  "paddsw %%mm5, %%mm3 \n\t"
121  "packuswb %%mm1, %%mm0 \n\t"
122  "packuswb %%mm3, %%mm2 \n\t"
123  "movq %%mm0, %0 \n\t"
124  "movq %%mm2, %1 \n\t"
125  : "+m" (*pix), "+m" (*(pix + line_size))
126  : "r" (p)
127  : "memory");
128  pix += line_size * 2;
129  p += 16;
130  } while (--i);
131 }
132 
133 #endif /* HAVE_INLINE_ASM */