FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
yadif_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #ifdef COMPILE_TEMPLATE_SSE2
22 #define MM "%%xmm"
23 #define MOV "movq"
24 #define MOVQ "movdqa"
25 #define MOVQU "movdqu"
26 #define STEP 8
27 #define LOAD(mem,dst) \
28  MOV" "mem", "dst" \n\t"\
29  "punpcklbw "MM"7, "dst" \n\t"
30 #define PSRL1(reg) "psrldq $1, "reg" \n\t"
31 #define PSRL2(reg) "psrldq $2, "reg" \n\t"
32 #define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
33  "psrldq $2, "src" \n\t"
34 #else
35 #define MM "%%mm"
36 #define MOV "movd"
37 #define MOVQ "movq"
38 #define MOVQU "movq"
39 #define STEP 4
40 #define LOAD(mem,dst) \
41  MOV" "mem", "dst" \n\t"\
42  "punpcklbw "MM"7, "dst" \n\t"
43 #define PSRL1(reg) "psrlq $8, "reg" \n\t"
44 #define PSRL2(reg) "psrlq $16, "reg" \n\t"
45 #define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
46 #endif
47 
48 #ifdef COMPILE_TEMPLATE_SSSE3
49 #define PABS(tmp,dst) \
50  "pabsw "dst", "dst" \n\t"
51 #else
52 #define PABS(tmp,dst) \
53  "pxor "tmp", "tmp" \n\t"\
54  "psubw "dst", "tmp" \n\t"\
55  "pmaxsw "tmp", "dst" \n\t"
56 #endif
57 
58 #define CHECK(pj,mj) \
59  MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1+j] */\
60  MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1-j] */\
61  MOVQ" "MM"2, "MM"4 \n\t"\
62  MOVQ" "MM"2, "MM"5 \n\t"\
63  "pxor "MM"3, "MM"4 \n\t"\
64  "pavgb "MM"3, "MM"5 \n\t"\
65  "pand "MANGLE(pb_1)", "MM"4 \n\t"\
66  "psubusb "MM"4, "MM"5 \n\t"\
67  PSRL1(MM"5") \
68  "punpcklbw "MM"7, "MM"5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
69  MOVQ" "MM"2, "MM"4 \n\t"\
70  "psubusb "MM"3, "MM"2 \n\t"\
71  "psubusb "MM"4, "MM"3 \n\t"\
72  "pmaxub "MM"3, "MM"2 \n\t"\
73  MOVQ" "MM"2, "MM"3 \n\t"\
74  MOVQ" "MM"2, "MM"4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
75  PSRL1(MM"3") /* ABS(cur[x-refs +j] - cur[x+refs -j]) */\
76  PSRL2(MM"4") /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
77  "punpcklbw "MM"7, "MM"2 \n\t"\
78  "punpcklbw "MM"7, "MM"3 \n\t"\
79  "punpcklbw "MM"7, "MM"4 \n\t"\
80  "paddw "MM"3, "MM"2 \n\t"\
81  "paddw "MM"4, "MM"2 \n\t" /* score */
82 
83 #define CHECK1 \
84  MOVQ" "MM"0, "MM"3 \n\t"\
85  "pcmpgtw "MM"2, "MM"3 \n\t" /* if(score < spatial_score) */\
86  "pminsw "MM"2, "MM"0 \n\t" /* spatial_score= score; */\
87  MOVQ" "MM"3, "MM"6 \n\t"\
88  "pand "MM"3, "MM"5 \n\t"\
89  "pandn "MM"1, "MM"3 \n\t"\
90  "por "MM"5, "MM"3 \n\t"\
91  MOVQ" "MM"3, "MM"1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
92 
93 #define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
94  hurts both quality and speed, but matches the C version. */\
95  "paddw "MANGLE(pw_1)", "MM"6 \n\t"\
96  "psllw $14, "MM"6 \n\t"\
97  "paddsw "MM"6, "MM"2 \n\t"\
98  MOVQ" "MM"0, "MM"3 \n\t"\
99  "pcmpgtw "MM"2, "MM"3 \n\t"\
100  "pminsw "MM"2, "MM"0 \n\t"\
101  "pand "MM"3, "MM"5 \n\t"\
102  "pandn "MM"1, "MM"3 \n\t"\
103  "por "MM"5, "MM"3 \n\t"\
104  MOVQ" "MM"3, "MM"1 \n\t"
106 static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
107  uint8_t *next, int w, int prefs,
108  int mrefs, int parity, int mode)
109 {
110  uint8_t tmpU[5*16];
111  uint8_t *tmp= (uint8_t*)(((uint64_t)(tmpU+15)) & ~15);
112  int x;
113 
114 #define FILTER\
115  for(x=0; x<w; x+=STEP){\
116  __asm__ volatile(\
117  "pxor "MM"7, "MM"7 \n\t"\
118  LOAD("(%[cur],%[mrefs])", MM"0") /* c = cur[x-refs] */\
119  LOAD("(%[cur],%[prefs])", MM"1") /* e = cur[x+refs] */\
120  LOAD("(%["prev2"])", MM"2") /* prev2[x] */\
121  LOAD("(%["next2"])", MM"3") /* next2[x] */\
122  MOVQ" "MM"3, "MM"4 \n\t"\
123  "paddw "MM"2, "MM"3 \n\t"\
124  "psraw $1, "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
125  MOVQ" "MM"0, (%[tmp]) \n\t" /* c */\
126  MOVQ" "MM"3, 16(%[tmp]) \n\t" /* d */\
127  MOVQ" "MM"1, 32(%[tmp]) \n\t" /* e */\
128  "psubw "MM"4, "MM"2 \n\t"\
129  PABS( MM"4", MM"2") /* temporal_diff0 */\
130  LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
131  LOAD("(%[prev],%[prefs])", MM"4") /* prev[x+refs] */\
132  "psubw "MM"0, "MM"3 \n\t"\
133  "psubw "MM"1, "MM"4 \n\t"\
134  PABS( MM"5", MM"3")\
135  PABS( MM"5", MM"4")\
136  "paddw "MM"4, "MM"3 \n\t" /* temporal_diff1 */\
137  "psrlw $1, "MM"2 \n\t"\
138  "psrlw $1, "MM"3 \n\t"\
139  "pmaxsw "MM"3, "MM"2 \n\t"\
140  LOAD("(%[next],%[mrefs])", MM"3") /* next[x-refs] */\
141  LOAD("(%[next],%[prefs])", MM"4") /* next[x+refs] */\
142  "psubw "MM"0, "MM"3 \n\t"\
143  "psubw "MM"1, "MM"4 \n\t"\
144  PABS( MM"5", MM"3")\
145  PABS( MM"5", MM"4")\
146  "paddw "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
147  "psrlw $1, "MM"3 \n\t"\
148  "pmaxsw "MM"3, "MM"2 \n\t"\
149  MOVQ" "MM"2, 48(%[tmp]) \n\t" /* diff */\
150 \
151  "paddw "MM"0, "MM"1 \n\t"\
152  "paddw "MM"0, "MM"0 \n\t"\
153  "psubw "MM"1, "MM"0 \n\t"\
154  "psrlw $1, "MM"1 \n\t" /* spatial_pred */\
155  PABS( MM"2", MM"0") /* ABS(c-e) */\
156 \
157  MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1] */\
158  MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1] */\
159  MOVQ" "MM"2, "MM"4 \n\t"\
160  "psubusb "MM"3, "MM"2 \n\t"\
161  "psubusb "MM"4, "MM"3 \n\t"\
162  "pmaxub "MM"3, "MM"2 \n\t"\
163  PSHUF(MM"3", MM"2") \
164  "punpcklbw "MM"7, "MM"2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
165  "punpcklbw "MM"7, "MM"3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
166  "paddw "MM"2, "MM"0 \n\t"\
167  "paddw "MM"3, "MM"0 \n\t"\
168  "psubw "MANGLE(pw_1)", "MM"0 \n\t" /* spatial_score */\
169 \
170  CHECK(-2,0)\
171  CHECK1\
172  CHECK(-3,1)\
173  CHECK2\
174  CHECK(0,-2)\
175  CHECK1\
176  CHECK(1,-3)\
177  CHECK2\
178 \
179  /* if(p->mode<2) ... */\
180  MOVQ" 48(%[tmp]), "MM"6 \n\t" /* diff */\
181  "cmpl $2, %[mode] \n\t"\
182  "jge 1f \n\t"\
183  LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
184  LOAD("(%["next2"],%[mrefs],2)", MM"4") /* next2[x-2*refs] */\
185  LOAD("(%["prev2"],%[prefs],2)", MM"3") /* prev2[x+2*refs] */\
186  LOAD("(%["next2"],%[prefs],2)", MM"5") /* next2[x+2*refs] */\
187  "paddw "MM"4, "MM"2 \n\t"\
188  "paddw "MM"5, "MM"3 \n\t"\
189  "psrlw $1, "MM"2 \n\t" /* b */\
190  "psrlw $1, "MM"3 \n\t" /* f */\
191  MOVQ" (%[tmp]), "MM"4 \n\t" /* c */\
192  MOVQ" 16(%[tmp]), "MM"5 \n\t" /* d */\
193  MOVQ" 32(%[tmp]), "MM"7 \n\t" /* e */\
194  "psubw "MM"4, "MM"2 \n\t" /* b-c */\
195  "psubw "MM"7, "MM"3 \n\t" /* f-e */\
196  MOVQ" "MM"5, "MM"0 \n\t"\
197  "psubw "MM"4, "MM"5 \n\t" /* d-c */\
198  "psubw "MM"7, "MM"0 \n\t" /* d-e */\
199  MOVQ" "MM"2, "MM"4 \n\t"\
200  "pminsw "MM"3, "MM"2 \n\t"\
201  "pmaxsw "MM"4, "MM"3 \n\t"\
202  "pmaxsw "MM"5, "MM"2 \n\t"\
203  "pminsw "MM"5, "MM"3 \n\t"\
204  "pmaxsw "MM"0, "MM"2 \n\t" /* max */\
205  "pminsw "MM"0, "MM"3 \n\t" /* min */\
206  "pxor "MM"4, "MM"4 \n\t"\
207  "pmaxsw "MM"3, "MM"6 \n\t"\
208  "psubw "MM"2, "MM"4 \n\t" /* -max */\
209  "pmaxsw "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
210  "1: \n\t"\
211 \
212  MOVQ" 16(%[tmp]), "MM"2 \n\t" /* d */\
213  MOVQ" "MM"2, "MM"3 \n\t"\
214  "psubw "MM"6, "MM"2 \n\t" /* d-diff */\
215  "paddw "MM"6, "MM"3 \n\t" /* d+diff */\
216  "pmaxsw "MM"2, "MM"1 \n\t"\
217  "pminsw "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
218  "packuswb "MM"1, "MM"1 \n\t"\
219 \
220  ::[prev] "r"(prev),\
221  [cur] "r"(cur),\
222  [next] "r"(next),\
223  [prefs]"r"((x86_reg)prefs),\
224  [mrefs]"r"((x86_reg)mrefs),\
225  [mode] "g"(mode),\
226  [tmp] "r"(tmp)\
227  );\
228  __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
229  dst += STEP;\
230  prev+= STEP;\
231  cur += STEP;\
232  next+= STEP;\
233  }
234 
235  if (parity) {
236 #define prev2 "prev"
237 #define next2 "cur"
238  FILTER
239 #undef prev2
240 #undef next2
241  } else {
242 #define prev2 "cur"
243 #define next2 "next"
244  FILTER
245 #undef prev2
246 #undef next2
247  }
248 }
249 #undef STEP
250 #undef MM
251 #undef MOV
252 #undef MOVQ
253 #undef MOVQU
254 #undef PSHUF
255 #undef PSRL1
256 #undef PSRL2
257 #undef LOAD
258 #undef PABS
259 #undef CHECK
260 #undef CHECK1
261 #undef CHECK2
262 #undef FILTER