44 #define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
45 #define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
46 #define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
47 #define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
48 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
49 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
50 #define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
51 #define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
54 #define COL_SHIFT 20 // 6
90 int16_t *
const temp= (int16_t*)align_tmp;
93 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
94 "movq " #src0 ", %%mm0 \n\t" \
95 "movq " #src4 ", %%mm1 \n\t" \
96 "movq " #src1 ", %%mm2 \n\t" \
97 "movq " #src5 ", %%mm3 \n\t" \
98 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
99 "pand %%mm0, %%mm4 \n\t"\
100 "por %%mm1, %%mm4 \n\t"\
101 "por %%mm2, %%mm4 \n\t"\
102 "por %%mm3, %%mm4 \n\t"\
103 "packssdw %%mm4,%%mm4 \n\t"\
104 "movd %%mm4, %%eax \n\t"\
105 "orl %%eax, %%eax \n\t"\
107 "movq 16(%2), %%mm4 \n\t" \
108 "pmaddwd %%mm0, %%mm4 \n\t" \
109 "movq 24(%2), %%mm5 \n\t" \
110 "pmaddwd %%mm5, %%mm0 \n\t" \
111 "movq 32(%2), %%mm5 \n\t" \
112 "pmaddwd %%mm1, %%mm5 \n\t" \
113 "movq 40(%2), %%mm6 \n\t" \
114 "pmaddwd %%mm6, %%mm1 \n\t" \
115 "movq 48(%2), %%mm7 \n\t" \
116 "pmaddwd %%mm2, %%mm7 \n\t" \
117 #rounder ", %%mm4 \n\t"\
118 "movq %%mm4, %%mm6 \n\t" \
119 "paddd %%mm5, %%mm4 \n\t" \
120 "psubd %%mm5, %%mm6 \n\t" \
121 "movq 56(%2), %%mm5 \n\t" \
122 "pmaddwd %%mm3, %%mm5 \n\t" \
123 #rounder ", %%mm0 \n\t"\
124 "paddd %%mm0, %%mm1 \n\t" \
125 "paddd %%mm0, %%mm0 \n\t" \
126 "psubd %%mm1, %%mm0 \n\t" \
127 "pmaddwd 64(%2), %%mm2 \n\t" \
128 "paddd %%mm5, %%mm7 \n\t" \
129 "movq 72(%2), %%mm5 \n\t" \
130 "pmaddwd %%mm3, %%mm5 \n\t" \
131 "paddd %%mm4, %%mm7 \n\t" \
132 "paddd %%mm4, %%mm4 \n\t" \
133 "psubd %%mm7, %%mm4 \n\t" \
134 "paddd %%mm2, %%mm5 \n\t" \
135 "psrad $" #shift ", %%mm7 \n\t"\
136 "psrad $" #shift ", %%mm4 \n\t"\
137 "movq %%mm1, %%mm2 \n\t" \
138 "paddd %%mm5, %%mm1 \n\t" \
139 "psubd %%mm5, %%mm2 \n\t" \
140 "psrad $" #shift ", %%mm1 \n\t"\
141 "psrad $" #shift ", %%mm2 \n\t"\
142 "packssdw %%mm1, %%mm7 \n\t" \
143 "packssdw %%mm4, %%mm2 \n\t" \
144 "movq %%mm7, " #dst " \n\t"\
145 "movq " #src1 ", %%mm1 \n\t" \
146 "movq 80(%2), %%mm4 \n\t" \
147 "movq %%mm2, 24+" #dst " \n\t"\
148 "pmaddwd %%mm1, %%mm4 \n\t" \
149 "movq 88(%2), %%mm7 \n\t" \
150 "pmaddwd 96(%2), %%mm1 \n\t" \
151 "pmaddwd %%mm3, %%mm7 \n\t" \
152 "movq %%mm0, %%mm2 \n\t" \
153 "pmaddwd 104(%2), %%mm3 \n\t" \
154 "paddd %%mm7, %%mm4 \n\t" \
155 "paddd %%mm4, %%mm2 \n\t" \
156 "psubd %%mm4, %%mm0 \n\t" \
157 "psrad $" #shift ", %%mm2 \n\t"\
158 "psrad $" #shift ", %%mm0 \n\t"\
159 "movq %%mm6, %%mm4 \n\t" \
160 "paddd %%mm1, %%mm3 \n\t" \
161 "paddd %%mm3, %%mm6 \n\t" \
162 "psubd %%mm3, %%mm4 \n\t" \
163 "psrad $" #shift ", %%mm6 \n\t"\
164 "packssdw %%mm6, %%mm2 \n\t" \
165 "movq %%mm2, 8+" #dst " \n\t"\
166 "psrad $" #shift ", %%mm4 \n\t"\
167 "packssdw %%mm0, %%mm4 \n\t" \
168 "movq %%mm4, 16+" #dst " \n\t"\
171 "pslld $16, %%mm0 \n\t"\
172 "paddd "MANGLE(d40000)", %%mm0 \n\t"\
173 "psrad $13, %%mm0 \n\t"\
174 "packssdw %%mm0, %%mm0 \n\t"\
175 "movq %%mm0, " #dst " \n\t"\
176 "movq %%mm0, 8+" #dst " \n\t"\
177 "movq %%mm0, 16+" #dst " \n\t"\
178 "movq %%mm0, 24+" #dst " \n\t"\
181 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \
182 "movq " #src0 ", %%mm0 \n\t" \
183 "movq " #src4 ", %%mm1 \n\t" \
184 "movq " #src1 ", %%mm2 \n\t" \
185 "movq " #src5 ", %%mm3 \n\t" \
186 "movq %%mm0, %%mm4 \n\t"\
187 "por %%mm1, %%mm4 \n\t"\
188 "por %%mm2, %%mm4 \n\t"\
189 "por %%mm3, %%mm4 \n\t"\
190 "packssdw %%mm4,%%mm4 \n\t"\
191 "movd %%mm4, %%eax \n\t"\
192 "orl %%eax, %%eax \n\t"\
194 "movq 16(%2), %%mm4 \n\t" \
195 "pmaddwd %%mm0, %%mm4 \n\t" \
196 "movq 24(%2), %%mm5 \n\t" \
197 "pmaddwd %%mm5, %%mm0 \n\t" \
198 "movq 32(%2), %%mm5 \n\t" \
199 "pmaddwd %%mm1, %%mm5 \n\t" \
200 "movq 40(%2), %%mm6 \n\t" \
201 "pmaddwd %%mm6, %%mm1 \n\t" \
202 "movq 48(%2), %%mm7 \n\t" \
203 "pmaddwd %%mm2, %%mm7 \n\t" \
204 #rounder ", %%mm4 \n\t"\
205 "movq %%mm4, %%mm6 \n\t" \
206 "paddd %%mm5, %%mm4 \n\t" \
207 "psubd %%mm5, %%mm6 \n\t" \
208 "movq 56(%2), %%mm5 \n\t" \
209 "pmaddwd %%mm3, %%mm5 \n\t" \
210 #rounder ", %%mm0 \n\t"\
211 "paddd %%mm0, %%mm1 \n\t" \
212 "paddd %%mm0, %%mm0 \n\t" \
213 "psubd %%mm1, %%mm0 \n\t" \
214 "pmaddwd 64(%2), %%mm2 \n\t" \
215 "paddd %%mm5, %%mm7 \n\t" \
216 "movq 72(%2), %%mm5 \n\t" \
217 "pmaddwd %%mm3, %%mm5 \n\t" \
218 "paddd %%mm4, %%mm7 \n\t" \
219 "paddd %%mm4, %%mm4 \n\t" \
220 "psubd %%mm7, %%mm4 \n\t" \
221 "paddd %%mm2, %%mm5 \n\t" \
222 "psrad $" #shift ", %%mm7 \n\t"\
223 "psrad $" #shift ", %%mm4 \n\t"\
224 "movq %%mm1, %%mm2 \n\t" \
225 "paddd %%mm5, %%mm1 \n\t" \
226 "psubd %%mm5, %%mm2 \n\t" \
227 "psrad $" #shift ", %%mm1 \n\t"\
228 "psrad $" #shift ", %%mm2 \n\t"\
229 "packssdw %%mm1, %%mm7 \n\t" \
230 "packssdw %%mm4, %%mm2 \n\t" \
231 "movq %%mm7, " #dst " \n\t"\
232 "movq " #src1 ", %%mm1 \n\t" \
233 "movq 80(%2), %%mm4 \n\t" \
234 "movq %%mm2, 24+" #dst " \n\t"\
235 "pmaddwd %%mm1, %%mm4 \n\t" \
236 "movq 88(%2), %%mm7 \n\t" \
237 "pmaddwd 96(%2), %%mm1 \n\t" \
238 "pmaddwd %%mm3, %%mm7 \n\t" \
239 "movq %%mm0, %%mm2 \n\t" \
240 "pmaddwd 104(%2), %%mm3 \n\t" \
241 "paddd %%mm7, %%mm4 \n\t" \
242 "paddd %%mm4, %%mm2 \n\t" \
243 "psubd %%mm4, %%mm0 \n\t" \
244 "psrad $" #shift ", %%mm2 \n\t"\
245 "psrad $" #shift ", %%mm0 \n\t"\
246 "movq %%mm6, %%mm4 \n\t" \
247 "paddd %%mm1, %%mm3 \n\t" \
248 "paddd %%mm3, %%mm6 \n\t" \
249 "psubd %%mm3, %%mm4 \n\t" \
250 "psrad $" #shift ", %%mm6 \n\t"\
251 "packssdw %%mm6, %%mm2 \n\t" \
252 "movq %%mm2, 8+" #dst " \n\t"\
253 "psrad $" #shift ", %%mm4 \n\t"\
254 "packssdw %%mm0, %%mm4 \n\t" \
255 "movq %%mm4, 16+" #dst " \n\t"\
257 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
258 "movq " #src0 ", %%mm0 \n\t" \
259 "movq " #src4 ", %%mm1 \n\t" \
260 "movq " #src1 ", %%mm2 \n\t" \
261 "movq " #src5 ", %%mm3 \n\t" \
262 "movq 16(%2), %%mm4 \n\t" \
263 "pmaddwd %%mm0, %%mm4 \n\t" \
264 "movq 24(%2), %%mm5 \n\t" \
265 "pmaddwd %%mm5, %%mm0 \n\t" \
266 "movq 32(%2), %%mm5 \n\t" \
267 "pmaddwd %%mm1, %%mm5 \n\t" \
268 "movq 40(%2), %%mm6 \n\t" \
269 "pmaddwd %%mm6, %%mm1 \n\t" \
270 "movq 48(%2), %%mm7 \n\t" \
271 "pmaddwd %%mm2, %%mm7 \n\t" \
272 #rounder ", %%mm4 \n\t"\
273 "movq %%mm4, %%mm6 \n\t" \
274 "paddd %%mm5, %%mm4 \n\t" \
275 "psubd %%mm5, %%mm6 \n\t" \
276 "movq 56(%2), %%mm5 \n\t" \
277 "pmaddwd %%mm3, %%mm5 \n\t" \
278 #rounder ", %%mm0 \n\t"\
279 "paddd %%mm0, %%mm1 \n\t" \
280 "paddd %%mm0, %%mm0 \n\t" \
281 "psubd %%mm1, %%mm0 \n\t" \
282 "pmaddwd 64(%2), %%mm2 \n\t" \
283 "paddd %%mm5, %%mm7 \n\t" \
284 "movq 72(%2), %%mm5 \n\t" \
285 "pmaddwd %%mm3, %%mm5 \n\t" \
286 "paddd %%mm4, %%mm7 \n\t" \
287 "paddd %%mm4, %%mm4 \n\t" \
288 "psubd %%mm7, %%mm4 \n\t" \
289 "paddd %%mm2, %%mm5 \n\t" \
290 "psrad $" #shift ", %%mm7 \n\t"\
291 "psrad $" #shift ", %%mm4 \n\t"\
292 "movq %%mm1, %%mm2 \n\t" \
293 "paddd %%mm5, %%mm1 \n\t" \
294 "psubd %%mm5, %%mm2 \n\t" \
295 "psrad $" #shift ", %%mm1 \n\t"\
296 "psrad $" #shift ", %%mm2 \n\t"\
297 "packssdw %%mm1, %%mm7 \n\t" \
298 "packssdw %%mm4, %%mm2 \n\t" \
299 "movq %%mm7, " #dst " \n\t"\
300 "movq " #src1 ", %%mm1 \n\t" \
301 "movq 80(%2), %%mm4 \n\t" \
302 "movq %%mm2, 24+" #dst " \n\t"\
303 "pmaddwd %%mm1, %%mm4 \n\t" \
304 "movq 88(%2), %%mm7 \n\t" \
305 "pmaddwd 96(%2), %%mm1 \n\t" \
306 "pmaddwd %%mm3, %%mm7 \n\t" \
307 "movq %%mm0, %%mm2 \n\t" \
308 "pmaddwd 104(%2), %%mm3 \n\t" \
309 "paddd %%mm7, %%mm4 \n\t" \
310 "paddd %%mm4, %%mm2 \n\t" \
311 "psubd %%mm4, %%mm0 \n\t" \
312 "psrad $" #shift ", %%mm2 \n\t"\
313 "psrad $" #shift ", %%mm0 \n\t"\
314 "movq %%mm6, %%mm4 \n\t" \
315 "paddd %%mm1, %%mm3 \n\t" \
316 "paddd %%mm3, %%mm6 \n\t" \
317 "psubd %%mm3, %%mm4 \n\t" \
318 "psrad $" #shift ", %%mm6 \n\t"\
319 "packssdw %%mm6, %%mm2 \n\t" \
320 "movq %%mm2, 8+" #dst " \n\t"\
321 "psrad $" #shift ", %%mm4 \n\t"\
322 "packssdw %%mm0, %%mm4 \n\t" \
323 "movq %%mm4, 16+" #dst " \n\t"\
326 DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
327 Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
328 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
329 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
332 #define IDCT(src0, src4, src1, src5, dst, shift) \
333 "movq " #src0 ", %%mm0 \n\t" \
334 "movq " #src4 ", %%mm1 \n\t" \
335 "movq " #src1 ", %%mm2 \n\t" \
336 "movq " #src5 ", %%mm3 \n\t" \
337 "movq 16(%2), %%mm4 \n\t" \
338 "pmaddwd %%mm0, %%mm4 \n\t" \
339 "movq 24(%2), %%mm5 \n\t" \
340 "pmaddwd %%mm5, %%mm0 \n\t" \
341 "movq 32(%2), %%mm5 \n\t" \
342 "pmaddwd %%mm1, %%mm5 \n\t" \
343 "movq 40(%2), %%mm6 \n\t" \
344 "pmaddwd %%mm6, %%mm1 \n\t" \
345 "movq %%mm4, %%mm6 \n\t" \
346 "movq 48(%2), %%mm7 \n\t" \
347 "pmaddwd %%mm2, %%mm7 \n\t" \
348 "paddd %%mm5, %%mm4 \n\t" \
349 "psubd %%mm5, %%mm6 \n\t" \
350 "movq %%mm0, %%mm5 \n\t" \
351 "paddd %%mm1, %%mm0 \n\t" \
352 "psubd %%mm1, %%mm5 \n\t" \
353 "movq 56(%2), %%mm1 \n\t" \
354 "pmaddwd %%mm3, %%mm1 \n\t" \
355 "pmaddwd 64(%2), %%mm2 \n\t" \
356 "paddd %%mm1, %%mm7 \n\t" \
357 "movq 72(%2), %%mm1 \n\t" \
358 "pmaddwd %%mm3, %%mm1 \n\t" \
359 "paddd %%mm4, %%mm7 \n\t" \
360 "paddd %%mm4, %%mm4 \n\t" \
361 "psubd %%mm7, %%mm4 \n\t" \
362 "paddd %%mm2, %%mm1 \n\t" \
363 "psrad $" #shift ", %%mm7 \n\t"\
364 "psrad $" #shift ", %%mm4 \n\t"\
365 "movq %%mm0, %%mm2 \n\t" \
366 "paddd %%mm1, %%mm0 \n\t" \
367 "psubd %%mm1, %%mm2 \n\t" \
368 "psrad $" #shift ", %%mm0 \n\t"\
369 "psrad $" #shift ", %%mm2 \n\t"\
370 "packssdw %%mm7, %%mm7 \n\t" \
371 "movd %%mm7, " #dst " \n\t"\
372 "packssdw %%mm0, %%mm0 \n\t" \
373 "movd %%mm0, 16+" #dst " \n\t"\
374 "packssdw %%mm2, %%mm2 \n\t" \
375 "movd %%mm2, 96+" #dst " \n\t"\
376 "packssdw %%mm4, %%mm4 \n\t" \
377 "movd %%mm4, 112+" #dst " \n\t"\
378 "movq " #src1 ", %%mm0 \n\t" \
379 "movq 80(%2), %%mm4 \n\t" \
380 "pmaddwd %%mm0, %%mm4 \n\t" \
381 "movq 88(%2), %%mm7 \n\t" \
382 "pmaddwd 96(%2), %%mm0 \n\t" \
383 "pmaddwd %%mm3, %%mm7 \n\t" \
384 "movq %%mm5, %%mm2 \n\t" \
385 "pmaddwd 104(%2), %%mm3 \n\t" \
386 "paddd %%mm7, %%mm4 \n\t" \
387 "paddd %%mm4, %%mm2 \n\t" \
388 "psubd %%mm4, %%mm5 \n\t" \
389 "psrad $" #shift ", %%mm2 \n\t"\
390 "psrad $" #shift ", %%mm5 \n\t"\
391 "movq %%mm6, %%mm4 \n\t" \
392 "paddd %%mm0, %%mm3 \n\t" \
393 "paddd %%mm3, %%mm6 \n\t" \
394 "psubd %%mm3, %%mm4 \n\t" \
395 "psrad $" #shift ", %%mm6 \n\t"\
396 "psrad $" #shift ", %%mm4 \n\t"\
397 "packssdw %%mm2, %%mm2 \n\t" \
398 "packssdw %%mm6, %%mm6 \n\t" \
399 "movd %%mm2, 32+" #dst " \n\t"\
400 "packssdw %%mm4, %%mm4 \n\t" \
401 "packssdw %%mm5, %%mm5 \n\t" \
402 "movd %%mm6, 48+" #dst " \n\t"\
403 "movd %%mm4, 64+" #dst " \n\t"\
404 "movd %%mm5, 80+" #dst " \n\t"
408 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
409 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
410 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
411 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
416 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
417 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
420 #define IDCT(src0, src4, src1, src5, dst, shift) \
421 "movq " #src0 ", %%mm0 \n\t" \
422 "movq " #src4 ", %%mm1 \n\t" \
423 "movq " #src5 ", %%mm3 \n\t" \
424 "movq 16(%2), %%mm4 \n\t" \
425 "pmaddwd %%mm0, %%mm4 \n\t" \
426 "movq 24(%2), %%mm5 \n\t" \
427 "pmaddwd %%mm5, %%mm0 \n\t" \
428 "movq 32(%2), %%mm5 \n\t" \
429 "pmaddwd %%mm1, %%mm5 \n\t" \
430 "movq 40(%2), %%mm6 \n\t" \
431 "pmaddwd %%mm6, %%mm1 \n\t" \
432 "movq %%mm4, %%mm6 \n\t" \
433 "paddd %%mm5, %%mm4 \n\t" \
434 "psubd %%mm5, %%mm6 \n\t" \
435 "movq %%mm0, %%mm5 \n\t" \
436 "paddd %%mm1, %%mm0 \n\t" \
437 "psubd %%mm1, %%mm5 \n\t" \
438 "movq 56(%2), %%mm1 \n\t" \
439 "pmaddwd %%mm3, %%mm1 \n\t" \
440 "movq 72(%2), %%mm7 \n\t" \
441 "pmaddwd %%mm3, %%mm7 \n\t" \
442 "paddd %%mm4, %%mm1 \n\t" \
443 "paddd %%mm4, %%mm4 \n\t" \
444 "psubd %%mm1, %%mm4 \n\t" \
445 "psrad $" #shift ", %%mm1 \n\t"\
446 "psrad $" #shift ", %%mm4 \n\t"\
447 "movq %%mm0, %%mm2 \n\t" \
448 "paddd %%mm7, %%mm0 \n\t" \
449 "psubd %%mm7, %%mm2 \n\t" \
450 "psrad $" #shift ", %%mm0 \n\t"\
451 "psrad $" #shift ", %%mm2 \n\t"\
452 "packssdw %%mm1, %%mm1 \n\t" \
453 "movd %%mm1, " #dst " \n\t"\
454 "packssdw %%mm0, %%mm0 \n\t" \
455 "movd %%mm0, 16+" #dst " \n\t"\
456 "packssdw %%mm2, %%mm2 \n\t" \
457 "movd %%mm2, 96+" #dst " \n\t"\
458 "packssdw %%mm4, %%mm4 \n\t" \
459 "movd %%mm4, 112+" #dst " \n\t"\
460 "movq 88(%2), %%mm1 \n\t" \
461 "pmaddwd %%mm3, %%mm1 \n\t" \
462 "movq %%mm5, %%mm2 \n\t" \
463 "pmaddwd 104(%2), %%mm3 \n\t" \
464 "paddd %%mm1, %%mm2 \n\t" \
465 "psubd %%mm1, %%mm5 \n\t" \
466 "psrad $" #shift ", %%mm2 \n\t"\
467 "psrad $" #shift ", %%mm5 \n\t"\
468 "movq %%mm6, %%mm1 \n\t" \
469 "paddd %%mm3, %%mm6 \n\t" \
470 "psubd %%mm3, %%mm1 \n\t" \
471 "psrad $" #shift ", %%mm6 \n\t"\
472 "psrad $" #shift ", %%mm1 \n\t"\
473 "packssdw %%mm2, %%mm2 \n\t" \
474 "packssdw %%mm6, %%mm6 \n\t" \
475 "movd %%mm2, 32+" #dst " \n\t"\
476 "packssdw %%mm1, %%mm1 \n\t" \
477 "packssdw %%mm5, %%mm5 \n\t" \
478 "movd %%mm6, 48+" #dst " \n\t"\
479 "movd %%mm1, 64+" #dst " \n\t"\
480 "movd %%mm5, 80+" #dst " \n\t"
483 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
484 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
485 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
486 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
491 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
494 #define IDCT(src0, src4, src1, src5, dst, shift) \
495 "movq " #src0 ", %%mm0 \n\t" \
496 "movq " #src5 ", %%mm3 \n\t" \
497 "movq 16(%2), %%mm4 \n\t" \
498 "pmaddwd %%mm0, %%mm4 \n\t" \
499 "movq 24(%2), %%mm5 \n\t" \
500 "pmaddwd %%mm5, %%mm0 \n\t" \
501 "movq %%mm4, %%mm6 \n\t" \
502 "movq %%mm0, %%mm5 \n\t" \
503 "movq 56(%2), %%mm1 \n\t" \
504 "pmaddwd %%mm3, %%mm1 \n\t" \
505 "movq 72(%2), %%mm7 \n\t" \
506 "pmaddwd %%mm3, %%mm7 \n\t" \
507 "paddd %%mm4, %%mm1 \n\t" \
508 "paddd %%mm4, %%mm4 \n\t" \
509 "psubd %%mm1, %%mm4 \n\t" \
510 "psrad $" #shift ", %%mm1 \n\t"\
511 "psrad $" #shift ", %%mm4 \n\t"\
512 "movq %%mm0, %%mm2 \n\t" \
513 "paddd %%mm7, %%mm0 \n\t" \
514 "psubd %%mm7, %%mm2 \n\t" \
515 "psrad $" #shift ", %%mm0 \n\t"\
516 "psrad $" #shift ", %%mm2 \n\t"\
517 "packssdw %%mm1, %%mm1 \n\t" \
518 "movd %%mm1, " #dst " \n\t"\
519 "packssdw %%mm0, %%mm0 \n\t" \
520 "movd %%mm0, 16+" #dst " \n\t"\
521 "packssdw %%mm2, %%mm2 \n\t" \
522 "movd %%mm2, 96+" #dst " \n\t"\
523 "packssdw %%mm4, %%mm4 \n\t" \
524 "movd %%mm4, 112+" #dst " \n\t"\
525 "movq 88(%2), %%mm1 \n\t" \
526 "pmaddwd %%mm3, %%mm1 \n\t" \
527 "movq %%mm5, %%mm2 \n\t" \
528 "pmaddwd 104(%2), %%mm3 \n\t" \
529 "paddd %%mm1, %%mm2 \n\t" \
530 "psubd %%mm1, %%mm5 \n\t" \
531 "psrad $" #shift ", %%mm2 \n\t"\
532 "psrad $" #shift ", %%mm5 \n\t"\
533 "movq %%mm6, %%mm1 \n\t" \
534 "paddd %%mm3, %%mm6 \n\t" \
535 "psubd %%mm3, %%mm1 \n\t" \
536 "psrad $" #shift ", %%mm6 \n\t"\
537 "psrad $" #shift ", %%mm1 \n\t"\
538 "packssdw %%mm2, %%mm2 \n\t" \
539 "packssdw %%mm6, %%mm6 \n\t" \
540 "movd %%mm2, 32+" #dst " \n\t"\
541 "packssdw %%mm1, %%mm1 \n\t" \
542 "packssdw %%mm5, %%mm5 \n\t" \
543 "movd %%mm6, 48+" #dst " \n\t"\
544 "movd %%mm1, 64+" #dst " \n\t"\
545 "movd %%mm5, 80+" #dst " \n\t"
549 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
550 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
551 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
552 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
557 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
560 #define IDCT(src0, src4, src1, src5, dst, shift) \
561 "movq " #src0 ", %%mm0 \n\t" \
562 "movq " #src1 ", %%mm2 \n\t" \
563 "movq " #src5 ", %%mm3 \n\t" \
564 "movq 16(%2), %%mm4 \n\t" \
565 "pmaddwd %%mm0, %%mm4 \n\t" \
566 "movq 24(%2), %%mm5 \n\t" \
567 "pmaddwd %%mm5, %%mm0 \n\t" \
568 "movq %%mm4, %%mm6 \n\t" \
569 "movq 48(%2), %%mm7 \n\t" \
570 "pmaddwd %%mm2, %%mm7 \n\t" \
571 "movq %%mm0, %%mm5 \n\t" \
572 "movq 56(%2), %%mm1 \n\t" \
573 "pmaddwd %%mm3, %%mm1 \n\t" \
574 "pmaddwd 64(%2), %%mm2 \n\t" \
575 "paddd %%mm1, %%mm7 \n\t" \
576 "movq 72(%2), %%mm1 \n\t" \
577 "pmaddwd %%mm3, %%mm1 \n\t" \
578 "paddd %%mm4, %%mm7 \n\t" \
579 "paddd %%mm4, %%mm4 \n\t" \
580 "psubd %%mm7, %%mm4 \n\t" \
581 "paddd %%mm2, %%mm1 \n\t" \
582 "psrad $" #shift ", %%mm7 \n\t"\
583 "psrad $" #shift ", %%mm4 \n\t"\
584 "movq %%mm0, %%mm2 \n\t" \
585 "paddd %%mm1, %%mm0 \n\t" \
586 "psubd %%mm1, %%mm2 \n\t" \
587 "psrad $" #shift ", %%mm0 \n\t"\
588 "psrad $" #shift ", %%mm2 \n\t"\
589 "packssdw %%mm7, %%mm7 \n\t" \
590 "movd %%mm7, " #dst " \n\t"\
591 "packssdw %%mm0, %%mm0 \n\t" \
592 "movd %%mm0, 16+" #dst " \n\t"\
593 "packssdw %%mm2, %%mm2 \n\t" \
594 "movd %%mm2, 96+" #dst " \n\t"\
595 "packssdw %%mm4, %%mm4 \n\t" \
596 "movd %%mm4, 112+" #dst " \n\t"\
597 "movq " #src1 ", %%mm0 \n\t" \
598 "movq 80(%2), %%mm4 \n\t" \
599 "pmaddwd %%mm0, %%mm4 \n\t" \
600 "movq 88(%2), %%mm7 \n\t" \
601 "pmaddwd 96(%2), %%mm0 \n\t" \
602 "pmaddwd %%mm3, %%mm7 \n\t" \
603 "movq %%mm5, %%mm2 \n\t" \
604 "pmaddwd 104(%2), %%mm3 \n\t" \
605 "paddd %%mm7, %%mm4 \n\t" \
606 "paddd %%mm4, %%mm2 \n\t" \
607 "psubd %%mm4, %%mm5 \n\t" \
608 "psrad $" #shift ", %%mm2 \n\t"\
609 "psrad $" #shift ", %%mm5 \n\t"\
610 "movq %%mm6, %%mm4 \n\t" \
611 "paddd %%mm0, %%mm3 \n\t" \
612 "paddd %%mm3, %%mm6 \n\t" \
613 "psubd %%mm3, %%mm4 \n\t" \
614 "psrad $" #shift ", %%mm6 \n\t"\
615 "psrad $" #shift ", %%mm4 \n\t"\
616 "packssdw %%mm2, %%mm2 \n\t" \
617 "packssdw %%mm6, %%mm6 \n\t" \
618 "movd %%mm2, 32+" #dst " \n\t"\
619 "packssdw %%mm4, %%mm4 \n\t" \
620 "packssdw %%mm5, %%mm5 \n\t" \
621 "movd %%mm6, 48+" #dst " \n\t"\
622 "movd %%mm4, 64+" #dst " \n\t"\
623 "movd %%mm5, 80+" #dst " \n\t"
626 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
627 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
628 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
629 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
635 #define IDCT(src0, src4, src1, src5, dst, shift) \
636 "movq " #src0 ", %%mm0 \n\t" \
637 "movq " #src1 ", %%mm2 \n\t" \
638 "movq 16(%2), %%mm4 \n\t" \
639 "pmaddwd %%mm0, %%mm4 \n\t" \
640 "movq 24(%2), %%mm5 \n\t" \
641 "pmaddwd %%mm5, %%mm0 \n\t" \
642 "movq %%mm4, %%mm6 \n\t" \
643 "movq 48(%2), %%mm7 \n\t" \
644 "pmaddwd %%mm2, %%mm7 \n\t" \
645 "movq %%mm0, %%mm5 \n\t" \
646 "movq 64(%2), %%mm3 \n\t"\
647 "pmaddwd %%mm2, %%mm3 \n\t" \
648 "paddd %%mm4, %%mm7 \n\t" \
649 "paddd %%mm4, %%mm4 \n\t" \
650 "psubd %%mm7, %%mm4 \n\t" \
651 "psrad $" #shift ", %%mm7 \n\t"\
652 "psrad $" #shift ", %%mm4 \n\t"\
653 "movq %%mm0, %%mm1 \n\t" \
654 "paddd %%mm3, %%mm0 \n\t" \
655 "psubd %%mm3, %%mm1 \n\t" \
656 "psrad $" #shift ", %%mm0 \n\t"\
657 "psrad $" #shift ", %%mm1 \n\t"\
658 "packssdw %%mm7, %%mm7 \n\t" \
659 "movd %%mm7, " #dst " \n\t"\
660 "packssdw %%mm0, %%mm0 \n\t" \
661 "movd %%mm0, 16+" #dst " \n\t"\
662 "packssdw %%mm1, %%mm1 \n\t" \
663 "movd %%mm1, 96+" #dst " \n\t"\
664 "packssdw %%mm4, %%mm4 \n\t" \
665 "movd %%mm4, 112+" #dst " \n\t"\
666 "movq 80(%2), %%mm4 \n\t" \
667 "pmaddwd %%mm2, %%mm4 \n\t" \
668 "pmaddwd 96(%2), %%mm2 \n\t" \
669 "movq %%mm5, %%mm1 \n\t" \
670 "paddd %%mm4, %%mm1 \n\t" \
671 "psubd %%mm4, %%mm5 \n\t" \
672 "psrad $" #shift ", %%mm1 \n\t"\
673 "psrad $" #shift ", %%mm5 \n\t"\
674 "movq %%mm6, %%mm4 \n\t" \
675 "paddd %%mm2, %%mm6 \n\t" \
676 "psubd %%mm2, %%mm4 \n\t" \
677 "psrad $" #shift ", %%mm6 \n\t"\
678 "psrad $" #shift ", %%mm4 \n\t"\
679 "packssdw %%mm1, %%mm1 \n\t" \
680 "packssdw %%mm6, %%mm6 \n\t" \
681 "movd %%mm1, 32+" #dst " \n\t"\
682 "packssdw %%mm4, %%mm4 \n\t" \
683 "packssdw %%mm5, %%mm5 \n\t" \
684 "movd %%mm6, 48+" #dst " \n\t"\
685 "movd %%mm4, 64+" #dst " \n\t"\
686 "movd %%mm5, 80+" #dst " \n\t"
690 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
691 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
692 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
693 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
699 #define IDCT(src0, src4, src1, src5, dst, shift) \
700 "movq " #src0 ", %%mm0 \n\t" \
701 "movq " #src4 ", %%mm1 \n\t" \
702 "movq 16(%2), %%mm4 \n\t" \
703 "pmaddwd %%mm0, %%mm4 \n\t" \
704 "movq 24(%2), %%mm5 \n\t" \
705 "pmaddwd %%mm5, %%mm0 \n\t" \
706 "movq 32(%2), %%mm5 \n\t" \
707 "pmaddwd %%mm1, %%mm5 \n\t" \
708 "movq 40(%2), %%mm6 \n\t" \
709 "pmaddwd %%mm6, %%mm1 \n\t" \
710 "movq %%mm4, %%mm6 \n\t" \
711 "paddd %%mm5, %%mm4 \n\t" \
712 "psubd %%mm5, %%mm6 \n\t" \
713 "movq %%mm0, %%mm5 \n\t" \
714 "paddd %%mm1, %%mm0 \n\t" \
715 "psubd %%mm1, %%mm5 \n\t" \
716 "movq 8+" #src0 ", %%mm2 \n\t" \
717 "movq 8+" #src4 ", %%mm3 \n\t" \
718 "movq 16(%2), %%mm1 \n\t" \
719 "pmaddwd %%mm2, %%mm1 \n\t" \
720 "movq 24(%2), %%mm7 \n\t" \
721 "pmaddwd %%mm7, %%mm2 \n\t" \
722 "movq 32(%2), %%mm7 \n\t" \
723 "pmaddwd %%mm3, %%mm7 \n\t" \
724 "pmaddwd 40(%2), %%mm3 \n\t" \
725 "paddd %%mm1, %%mm7 \n\t" \
726 "paddd %%mm1, %%mm1 \n\t" \
727 "psubd %%mm7, %%mm1 \n\t" \
728 "paddd %%mm2, %%mm3 \n\t" \
729 "paddd %%mm2, %%mm2 \n\t" \
730 "psubd %%mm3, %%mm2 \n\t" \
731 "psrad $" #shift ", %%mm4 \n\t"\
732 "psrad $" #shift ", %%mm7 \n\t"\
733 "psrad $" #shift ", %%mm3 \n\t"\
734 "packssdw %%mm7, %%mm4 \n\t" \
735 "movq %%mm4, " #dst " \n\t"\
736 "psrad $" #shift ", %%mm0 \n\t"\
737 "packssdw %%mm3, %%mm0 \n\t" \
738 "movq %%mm0, 16+" #dst " \n\t"\
739 "movq %%mm0, 96+" #dst " \n\t"\
740 "movq %%mm4, 112+" #dst " \n\t"\
741 "psrad $" #shift ", %%mm5 \n\t"\
742 "psrad $" #shift ", %%mm6 \n\t"\
743 "psrad $" #shift ", %%mm2 \n\t"\
744 "packssdw %%mm2, %%mm5 \n\t" \
745 "movq %%mm5, 32+" #dst " \n\t"\
746 "psrad $" #shift ", %%mm1 \n\t"\
747 "packssdw %%mm1, %%mm6 \n\t" \
748 "movq %%mm6, 48+" #dst " \n\t"\
749 "movq %%mm6, 64+" #dst " \n\t"\
750 "movq %%mm5, 80+" #dst " \n\t"
754 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
756 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
764 #define IDCT(src0, src4, src1, src5, dst, shift) \
765 "movq " #src0 ", %%mm0 \n\t" \
766 "movq " #src4 ", %%mm1 \n\t" \
767 "movq " #src1 ", %%mm2 \n\t" \
768 "movq 16(%2), %%mm4 \n\t" \
769 "pmaddwd %%mm0, %%mm4 \n\t" \
770 "movq 24(%2), %%mm5 \n\t" \
771 "pmaddwd %%mm5, %%mm0 \n\t" \
772 "movq 32(%2), %%mm5 \n\t" \
773 "pmaddwd %%mm1, %%mm5 \n\t" \
774 "movq 40(%2), %%mm6 \n\t" \
775 "pmaddwd %%mm6, %%mm1 \n\t" \
776 "movq %%mm4, %%mm6 \n\t" \
777 "movq 48(%2), %%mm7 \n\t" \
778 "pmaddwd %%mm2, %%mm7 \n\t" \
779 "paddd %%mm5, %%mm4 \n\t" \
780 "psubd %%mm5, %%mm6 \n\t" \
781 "movq %%mm0, %%mm5 \n\t" \
782 "paddd %%mm1, %%mm0 \n\t" \
783 "psubd %%mm1, %%mm5 \n\t" \
784 "movq 64(%2), %%mm1 \n\t"\
785 "pmaddwd %%mm2, %%mm1 \n\t" \
786 "paddd %%mm4, %%mm7 \n\t" \
787 "paddd %%mm4, %%mm4 \n\t" \
788 "psubd %%mm7, %%mm4 \n\t" \
789 "psrad $" #shift ", %%mm7 \n\t"\
790 "psrad $" #shift ", %%mm4 \n\t"\
791 "movq %%mm0, %%mm3 \n\t" \
792 "paddd %%mm1, %%mm0 \n\t" \
793 "psubd %%mm1, %%mm3 \n\t" \
794 "psrad $" #shift ", %%mm0 \n\t"\
795 "psrad $" #shift ", %%mm3 \n\t"\
796 "packssdw %%mm7, %%mm7 \n\t" \
797 "movd %%mm7, " #dst " \n\t"\
798 "packssdw %%mm0, %%mm0 \n\t" \
799 "movd %%mm0, 16+" #dst " \n\t"\
800 "packssdw %%mm3, %%mm3 \n\t" \
801 "movd %%mm3, 96+" #dst " \n\t"\
802 "packssdw %%mm4, %%mm4 \n\t" \
803 "movd %%mm4, 112+" #dst " \n\t"\
804 "movq 80(%2), %%mm4 \n\t" \
805 "pmaddwd %%mm2, %%mm4 \n\t" \
806 "pmaddwd 96(%2), %%mm2 \n\t" \
807 "movq %%mm5, %%mm3 \n\t" \
808 "paddd %%mm4, %%mm3 \n\t" \
809 "psubd %%mm4, %%mm5 \n\t" \
810 "psrad $" #shift ", %%mm3 \n\t"\
811 "psrad $" #shift ", %%mm5 \n\t"\
812 "movq %%mm6, %%mm4 \n\t" \
813 "paddd %%mm2, %%mm6 \n\t" \
814 "psubd %%mm2, %%mm4 \n\t" \
815 "psrad $" #shift ", %%mm6 \n\t"\
816 "packssdw %%mm3, %%mm3 \n\t" \
817 "movd %%mm3, 32+" #dst " \n\t"\
818 "psrad $" #shift ", %%mm4 \n\t"\
819 "packssdw %%mm6, %%mm6 \n\t" \
820 "movd %%mm6, 48+" #dst " \n\t"\
821 "packssdw %%mm4, %%mm4 \n\t" \
822 "packssdw %%mm5, %%mm5 \n\t" \
823 "movd %%mm4, 64+" #dst " \n\t"\
824 "movd %%mm5, 80+" #dst " \n\t"
828 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
829 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
830 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
831 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
838 #define IDCT(src0, src4, src1, src5, dst, shift) \
839 "movq " #src0 ", %%mm0 \n\t" \
840 "movq 16(%2), %%mm4 \n\t" \
841 "pmaddwd %%mm0, %%mm4 \n\t" \
842 "movq 24(%2), %%mm5 \n\t" \
843 "pmaddwd %%mm5, %%mm0 \n\t" \
844 "psrad $" #shift ", %%mm4 \n\t"\
845 "psrad $" #shift ", %%mm0 \n\t"\
846 "movq 8+" #src0 ", %%mm2 \n\t" \
847 "movq 16(%2), %%mm1 \n\t" \
848 "pmaddwd %%mm2, %%mm1 \n\t" \
849 "movq 24(%2), %%mm7 \n\t" \
850 "pmaddwd %%mm7, %%mm2 \n\t" \
851 "movq 32(%2), %%mm7 \n\t" \
852 "psrad $" #shift ", %%mm1 \n\t"\
853 "packssdw %%mm1, %%mm4 \n\t" \
854 "movq %%mm4, " #dst " \n\t"\
855 "psrad $" #shift ", %%mm2 \n\t"\
856 "packssdw %%mm2, %%mm0 \n\t" \
857 "movq %%mm0, 16+" #dst " \n\t"\
858 "movq %%mm0, 96+" #dst " \n\t"\
859 "movq %%mm4, 112+" #dst " \n\t"\
860 "movq %%mm0, 32+" #dst " \n\t"\
861 "movq %%mm4, 48+" #dst " \n\t"\
862 "movq %%mm4, 64+" #dst " \n\t"\
863 "movq %%mm0, 80+" #dst " \n\t"
866 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
868 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
#define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift)
static void idct(int16_t block[64])
Memory handling functions.
void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
#define LOCAL_ALIGNED_8(t, v,...)
#define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift, bt)
#define DECLARE_ASM_CONST(n, t, v)
Declare a static constant aligned variable appropriate for use in inline assembly code...
void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
void ff_put_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
void ff_simple_idct_mmx(int16_t *block)
void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
static const int16_t coeffs[]
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
#define NAMED_CONSTRAINTS_ADD(...)