FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
h264qpel_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264qpel
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "h264dsp_mips.h"
25 #include "hpeldsp_mips.h"
27 #include "libavutil/mips/asmdefs.h"
28 
29 static inline void copy_block4_mmi(uint8_t *dst, const uint8_t *src,
30  int dstStride, int srcStride, int h)
31 {
32  double ftmp[1];
33  uint64_t low32;
34 
35  __asm__ volatile (
36  "1: \n\t"
37  "uld %[low32], 0x00(%[src]) \n\t"
38  "mtc1 %[low32], %[ftmp0] \n\t"
39  "gsswlc1 %[ftmp0], 0x03(%[dst]) \n\t"
40  "gsswrc1 %[ftmp0], 0x00(%[dst]) \n\t"
41  "addi %[h], %[h], -0x01 \n\t"
42  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
43  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
44  "bnez %[h], 1b \n\t"
45  : [ftmp0]"=&f"(ftmp[0]),
46  [dst]"+&r"(dst), [src]"+&r"(src),
47  [h]"+&r"(h),
48  [low32]"=&r"(low32)
49  : [dstStride]"r"((mips_reg)dstStride),
50  [srcStride]"r"((mips_reg)srcStride)
51  : "memory"
52  );
53 }
54 
55 static inline void copy_block8_mmi(uint8_t *dst, const uint8_t *src,
56  int dstStride, int srcStride, int h)
57 {
58  double ftmp[1];
59 
60  __asm__ volatile (
61  "1: \n\t"
62  "gsldlc1 %[ftmp0], 0x07(%[src]) \n\t"
63  "gsldrc1 %[ftmp0], 0x00(%[src]) \n\t"
64  "gssdlc1 %[ftmp0], 0x07(%[dst]) \n\t"
65  "gssdrc1 %[ftmp0], 0x00(%[dst]) \n\t"
66  "addi %[h], %[h], -0x01 \n\t"
67  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
68  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
69  "bnez %[h], 1b \n\t"
70  : [ftmp0]"=&f"(ftmp[0]),
71  [dst]"+&r"(dst), [src]"+&r"(src),
72  [h]"+&r"(h)
73  : [dstStride]"r"((mips_reg)dstStride),
74  [srcStride]"r"((mips_reg)srcStride)
75  : "memory"
76  );
77 }
78 
79 static inline void copy_block16_mmi(uint8_t *dst, const uint8_t *src,
80  int dstStride, int srcStride, int h)
81 {
82  double ftmp[1];
83  uint64_t tmp[1];
84 
85  __asm__ volatile (
86  "1: \n\t"
87  "gsldlc1 %[ftmp0], 0x07(%[src]) \n\t"
88  "gsldrc1 %[ftmp0], 0x00(%[src]) \n\t"
89  "ldl %[tmp0], 0x0f(%[src]) \n\t"
90  "ldr %[tmp0], 0x08(%[src]) \n\t"
91  "gssdlc1 %[ftmp0], 0x07(%[dst]) \n\t"
92  "gssdrc1 %[ftmp0], 0x00(%[dst]) \n\t"
93  "sdl %[tmp0], 0x0f(%[dst]) \n\t"
94  "sdr %[tmp0], 0x08(%[dst]) \n\t"
95  "addi %[h], %[h], -0x01 \n\t"
96  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
97  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
98  "bnez %[h], 1b \n\t"
99  : [ftmp0]"=&f"(ftmp[0]),
100  [tmp0]"=&r"(tmp[0]),
101  [dst]"+&r"(dst), [src]"+&r"(src),
102  [h]"+&r"(h)
103  : [dstStride]"r"((mips_reg)dstStride),
104  [srcStride]"r"((mips_reg)srcStride)
105  : "memory"
106  );
107 }
108 
109 #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
110 #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
112  int dstStride, int srcStride)
113 {
114  double ftmp[10];
115  uint64_t tmp[1];
116  uint64_t low32;
117 
118  __asm__ volatile (
119  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
120  "dli %[tmp0], 0x04 \n\t"
121  "1: \n\t"
122  "uld %[low32], -0x02(%[src]) \n\t"
123  "mtc1 %[low32], %[ftmp1] \n\t"
124  "uld %[low32], -0x01(%[src]) \n\t"
125  "mtc1 %[low32], %[ftmp2] \n\t"
126  "uld %[low32], 0x00(%[src]) \n\t"
127  "mtc1 %[low32], %[ftmp3] \n\t"
128  "uld %[low32], 0x01(%[src]) \n\t"
129  "mtc1 %[low32], %[ftmp4] \n\t"
130  "uld %[low32], 0x02(%[src]) \n\t"
131  "mtc1 %[low32], %[ftmp5] \n\t"
132  "uld %[low32], 0x03(%[src]) \n\t"
133  "mtc1 %[low32], %[ftmp6] \n\t"
134  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
135  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
136  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
137  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
138  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
139  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
140  "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
141  "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
142  "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
143  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
144  "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
145  "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
146  "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
147  "paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t"
148  "psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t"
149  "packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
150  "gsswlc1 %[ftmp9], 0x03(%[dst]) \n\t"
151  "gsswrc1 %[ftmp9], 0x00(%[dst]) \n\t"
152  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
153  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
154  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
155  "bnez %[tmp0], 1b \n\t"
156  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
157  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
158  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
159  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
160  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
161  [tmp0]"=&r"(tmp[0]),
162  [dst]"+&r"(dst), [src]"+&r"(src),
163  [low32]"=&r"(low32)
164  : [dstStride]"r"((mips_reg)dstStride),
165  [srcStride]"r"((mips_reg)srcStride),
166  [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
167  [ff_pw_16]"f"(ff_pw_16)
168  : "memory"
169  );
170 }
171 
173  int dstStride, int srcStride)
174 {
175  double ftmp[11];
176  uint64_t tmp[1];
177 
178  __asm__ volatile (
179  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
180  "dli %[tmp0], 0x08 \n\t"
181  "1: \n\t"
182  "gsldlc1 %[ftmp1], 0x05(%[src]) \n\t"
183  "gsldrc1 %[ftmp1], -0x02(%[src]) \n\t"
184  "gsldlc1 %[ftmp2], 0x06(%[src]) \n\t"
185  "gsldrc1 %[ftmp2], -0x01(%[src]) \n\t"
186  "gsldlc1 %[ftmp3], 0x07(%[src]) \n\t"
187  "gsldrc1 %[ftmp3], 0x00(%[src]) \n\t"
188  "gsldlc1 %[ftmp4], 0x08(%[src]) \n\t"
189  "gsldrc1 %[ftmp4], 0x01(%[src]) \n\t"
190  "gsldlc1 %[ftmp5], 0x09(%[src]) \n\t"
191  "gsldrc1 %[ftmp5], 0x02(%[src]) \n\t"
192  "gsldlc1 %[ftmp6], 0x0a(%[src]) \n\t"
193  "gsldrc1 %[ftmp6], 0x03(%[src]) \n\t"
194  "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
195  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
196  "punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
197  "punpckhbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
198  "paddsh %[ftmp3], %[ftmp7], %[ftmp9] \n\t"
199  "paddsh %[ftmp4], %[ftmp8], %[ftmp10] \n\t"
200  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_20] \n\t"
201  "pmullh %[ftmp4], %[ftmp4], %[ff_pw_20] \n\t"
202  "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
203  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
204  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
205  "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
206  "paddsh %[ftmp2], %[ftmp7], %[ftmp9] \n\t"
207  "paddsh %[ftmp5], %[ftmp8], %[ftmp10] \n\t"
208  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
209  "pmullh %[ftmp5], %[ftmp5], %[ff_pw_5] \n\t"
210  "punpcklbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
211  "punpckhbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t"
212  "punpcklbh %[ftmp9], %[ftmp6], %[ftmp0] \n\t"
213  "punpckhbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t"
214  "paddsh %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
215  "paddsh %[ftmp6], %[ftmp8], %[ftmp10] \n\t"
216  "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
217  "psubsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
218  "paddsh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
219  "paddsh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
220  "paddsh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
221  "paddsh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
222  "psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
223  "psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t"
224  "packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t"
225  "gssdlc1 %[ftmp9], 0x07(%[dst]) \n\t"
226  "gssdrc1 %[ftmp9], 0x00(%[dst]) \n\t"
227  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
228  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
229  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
230  "bnez %[tmp0], 1b \n\t"
231  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
232  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
233  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
234  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
235  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
236  [ftmp10]"=&f"(ftmp[10]),
237  [tmp0]"=&r"(tmp[0]),
238  [dst]"+&r"(dst), [src]"+&r"(src)
239  : [dstStride]"r"((mips_reg)dstStride),
240  [srcStride]"r"((mips_reg)srcStride),
241  [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
242  [ff_pw_16]"f"(ff_pw_16)
243  : "memory"
244  );
245 }
246 
248  int dstStride, int srcStride)
249 {
250  put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
251  put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
252  src += 8*srcStride;
253  dst += 8*dstStride;
254  put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
255  put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
256 }
257 
259  int dstStride, int srcStride)
260 {
261  double ftmp[11];
262  uint64_t tmp[1];
263  uint64_t low32;
264 
265  __asm__ volatile (
266  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
267  "dli %[tmp0], 0x04 \n\t"
268  "1: \n\t"
269  "uld %[low32], -0x02(%[src]) \n\t"
270  "mtc1 %[low32], %[ftmp1] \n\t"
271  "uld %[low32], -0x01(%[src]) \n\t"
272  "mtc1 %[low32], %[ftmp2] \n\t"
273  "uld %[low32], 0x00(%[src]) \n\t"
274  "mtc1 %[low32], %[ftmp3] \n\t"
275  "uld %[low32], 0x01(%[src]) \n\t"
276  "mtc1 %[low32], %[ftmp4] \n\t"
277  "uld %[low32], 0x02(%[src]) \n\t"
278  "mtc1 %[low32], %[ftmp5] \n\t"
279  "uld %[low32], 0x03(%[src]) \n\t"
280  "mtc1 %[low32], %[ftmp6] \n\t"
281  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
282  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
283  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
284  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
285  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
286  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
287  "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
288  "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
289  "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
290  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
291  "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
292  "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
293  "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
294  "paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t"
295  "psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t"
296  "packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
297  "lwc1 %[ftmp10], 0x00(%[dst]) \n\t"
298  "pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
299  "gsswlc1 %[ftmp9], 0x03(%[dst]) \n\t"
300  "gsswrc1 %[ftmp9], 0x00(%[dst]) \n\t"
301  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
302  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
303  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
304  "bnez %[tmp0], 1b \n\t"
305  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
306  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
307  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
308  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
309  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
310  [ftmp10]"=&f"(ftmp[10]),
311  [tmp0]"=&r"(tmp[0]),
312  [dst]"+&r"(dst), [src]"+&r"(src),
313  [low32]"=&r"(low32)
314  : [dstStride]"r"((mips_reg)dstStride),
315  [srcStride]"r"((mips_reg)srcStride),
316  [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
317  [ff_pw_16]"f"(ff_pw_16)
318  : "memory"
319  );
320 }
321 
323  int dstStride, int srcStride)
324 {
325  double ftmp[11];
326  uint64_t tmp[1];
327 
328  __asm__ volatile (
329  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
330  "dli %[tmp0], 0x08 \n\t"
331  "1: \n\t"
332  "gsldlc1 %[ftmp1], 0x05(%[src]) \n\t"
333  "gsldrc1 %[ftmp1], -0x02(%[src]) \n\t"
334  "gsldlc1 %[ftmp2], 0x06(%[src]) \n\t"
335  "gsldrc1 %[ftmp2], -0x01(%[src]) \n\t"
336  "gsldlc1 %[ftmp3], 0x07(%[src]) \n\t"
337  "gsldrc1 %[ftmp3], 0x00(%[src]) \n\t"
338  "gsldlc1 %[ftmp4], 0x08(%[src]) \n\t"
339  "gsldrc1 %[ftmp4], 0x01(%[src]) \n\t"
340  "gsldlc1 %[ftmp5], 0x09(%[src]) \n\t"
341  "gsldrc1 %[ftmp5], 0x02(%[src]) \n\t"
342  "gsldlc1 %[ftmp6], 0x0a(%[src]) \n\t"
343  "gsldrc1 %[ftmp6], 0x03(%[src]) \n\t"
344  "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
345  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
346  "punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
347  "punpckhbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
348  "paddsh %[ftmp3], %[ftmp7], %[ftmp9] \n\t"
349  "paddsh %[ftmp4], %[ftmp8], %[ftmp10] \n\t"
350  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_20] \n\t"
351  "pmullh %[ftmp4], %[ftmp4], %[ff_pw_20] \n\t"
352  "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
353  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
354  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
355  "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
356  "paddsh %[ftmp2], %[ftmp7], %[ftmp9] \n\t"
357  "paddsh %[ftmp5], %[ftmp8], %[ftmp10] \n\t"
358  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
359  "pmullh %[ftmp5], %[ftmp5], %[ff_pw_5] \n\t"
360  "punpcklbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
361  "punpckhbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t"
362  "punpcklbh %[ftmp9], %[ftmp6], %[ftmp0] \n\t"
363  "punpckhbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t"
364  "paddsh %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
365  "paddsh %[ftmp6], %[ftmp8], %[ftmp10] \n\t"
366  "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
367  "psubsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
368  "paddsh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
369  "paddsh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
370  "paddsh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
371  "paddsh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
372  "psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
373  "psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t"
374  "packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t"
375  "ldc1 %[ftmp10], 0x00(%[dst]) \n\t"
376  "pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
377  "sdc1 %[ftmp9], 0x00(%[dst]) \n\t"
378  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
379  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
380  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
381  "bnez %[tmp0], 1b \n\t"
382  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
383  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
384  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
385  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
386  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
387  [ftmp10]"=&f"(ftmp[10]),
388  [tmp0]"=&r"(tmp[0]),
389  [dst]"+&r"(dst), [src]"+&r"(src)
390  : [dstStride]"r"((mips_reg)dstStride),
391  [srcStride]"r"((mips_reg)srcStride),
392  [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5),
393  [ff_pw_16]"f"(ff_pw_16)
394  : "memory"
395  );
396 }
397 
399  int dstStride, int srcStride)
400 {
401  avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
402  avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
403  src += 8*srcStride;
404  dst += 8*dstStride;
405  avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
406  avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
407 }
408 
410  int dstStride, int srcStride)
411 {
412  double ftmp[12];
413  uint64_t tmp[1];
414  uint64_t low32;
415 
416  src -= 2 * srcStride;
417 
418  __asm__ volatile (
419  ".set push \n\t"
420  ".set noreorder \n\t"
421  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
422  "dli %[tmp0], 0x02 \n\t"
423  "uld %[low32], 0x00(%[src]) \n\t"
424  "mtc1 %[low32], %[ftmp1] \n\t"
425  "mtc1 %[tmp0], %[ftmp10] \n\t"
426  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
427  "dli %[tmp0], 0x05 \n\t"
428  "uld %[low32], 0x00(%[src]) \n\t"
429  "mtc1 %[low32], %[ftmp2] \n\t"
430  "mtc1 %[tmp0], %[ftmp11] \n\t"
431  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
432  "uld %[low32], 0x00(%[src]) \n\t"
433  "mtc1 %[low32], %[ftmp3] \n\t"
434  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
435  "uld %[low32], 0x00(%[src]) \n\t"
436  "mtc1 %[low32], %[ftmp4] \n\t"
437  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
438  "uld %[low32], 0x00(%[src]) \n\t"
439  "mtc1 %[low32], %[ftmp5] \n\t"
440  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
441  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
442  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
443  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
444  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
445  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
446  "uld %[low32], 0x00(%[src]) \n\t"
447  "mtc1 %[low32], %[ftmp6] \n\t"
448  "paddh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
449  "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
450  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
451  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
452  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
453  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
454  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
455  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
456  "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
457  "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
458  "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
459  "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
460  "swc1 %[ftmp7], 0x00(%[dst]) \n\t"
461  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
462  "uld %[low32], 0x00(%[src]) \n\t"
463  "mtc1 %[low32], %[ftmp1] \n\t"
464  "paddh %[ftmp7], %[ftmp4], %[ftmp5] \n\t"
465  "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
466  "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
467  "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
468  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
469  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
470  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
471  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
472  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
473  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
474  "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
475  "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
476  "swc1 %[ftmp7], 0x00(%[dst]) \n\t"
477  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
478  "uld %[low32], 0x00(%[src]) \n\t"
479  "mtc1 %[low32], %[ftmp2] \n\t"
480  "paddh %[ftmp7], %[ftmp5], %[ftmp6] \n\t"
481  "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
482  "psubh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
483  "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
484  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
485  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
486  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
487  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
488  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
489  "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
490  "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
491  "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
492  "swc1 %[ftmp7], 0x00(%[dst]) \n\t"
493  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
494  "uld %[low32], 0x00(%[src]) \n\t"
495  "mtc1 %[low32], %[ftmp3] \n\t"
496  "paddh %[ftmp7], %[ftmp6], %[ftmp1] \n\t"
497  "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
498  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
499  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
500  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
501  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
502  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
503  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
504  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
505  "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
506  "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
507  "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
508  "swc1 %[ftmp7], 0x00(%[dst]) \n\t"
509  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
510  ".set pop \n\t"
511  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
512  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
513  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
514  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
515  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
516  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
517  [tmp0]"=&r"(tmp[0]),
518  [dst]"+&r"(dst), [src]"+&r"(src),
519  [low32]"=&r"(low32)
520  : [dstStride]"r"((mips_reg)dstStride),
521  [srcStride]"r"((mips_reg)srcStride),
522  [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
523  : "memory"
524  );
525 }
526 
528  int dstStride, int srcStride)
529 {
530  int w = 2;
531  int h = 8;
532  double ftmp[10];
533  uint64_t tmp[1];
534  uint64_t low32;
535 
536  src -= 2 * srcStride;
537 
538  while (w--) {
539  __asm__ volatile (
540  ".set push \n\t"
541  ".set noreorder \n\t"
542  "dli %[tmp0], 0x02 \n\t"
543  "uld %[low32], 0x00(%[src]) \n\t"
544  "mtc1 %[low32], %[ftmp0] \n\t"
545  "mtc1 %[tmp0], %[ftmp8] \n\t"
546  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
547  "dli %[tmp0], 0x05 \n\t"
548  "uld %[low32], 0x00(%[src]) \n\t"
549  "mtc1 %[low32], %[ftmp1] \n\t"
550  "mtc1 %[tmp0], %[ftmp9] \n\t"
551  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
552  "uld %[low32], 0x00(%[src]) \n\t"
553  "mtc1 %[low32], %[ftmp2] \n\t"
554  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
555  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
556  "uld %[low32], 0x00(%[src]) \n\t"
557  "mtc1 %[low32], %[ftmp3] \n\t"
558  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
559  "uld %[low32], 0x00(%[src]) \n\t"
560  "mtc1 %[low32], %[ftmp4] \n\t"
561  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
562  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
563  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
564  "uld %[low32], 0x00(%[src]) \n\t"
565  "mtc1 %[low32], %[ftmp5] \n\t"
566  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
567  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
568  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
569  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
570  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
571  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
572  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
573  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
574  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
575  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
576  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
577  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
578  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
579  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
580  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
581  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
582  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
583  "uld %[low32], 0x00(%[src]) \n\t"
584  "mtc1 %[low32], %[ftmp0] \n\t"
585  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
586  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
587  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
588  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
589  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
590  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
591  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
592  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
593  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
594  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
595  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
596  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
597  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
598  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
599  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
600  "uld %[low32], 0x00(%[src]) \n\t"
601  "mtc1 %[low32], %[ftmp1] \n\t"
602  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
603  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
604  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
605  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
606  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
607  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
608  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
609  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
610  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
611  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
612  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
613  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
614  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
615  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
616  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
617  "uld %[low32], 0x00(%[src]) \n\t"
618  "mtc1 %[low32], %[ftmp2] \n\t"
619  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
620  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
621  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
622  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
623  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
624  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
625  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
626  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
627  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
628  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
629  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
630  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
631  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
632  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
633  "uld %[low32], 0x00(%[src]) \n\t"
634  "mtc1 %[low32], %[ftmp3] \n\t"
635  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
636  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
637  "punpcklbh %[ftmp3] , %[ftmp3], %[ftmp7] \n\t"
638  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
639  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
640  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
641  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
642  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
643  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
644  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
645  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
646  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
647  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
648  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
649  "uld %[low32], 0x00(%[src]) \n\t"
650  "mtc1 %[low32], %[ftmp4] \n\t"
651  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
652  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
653  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
654  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
655  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
656  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
657  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
658  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
659  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
660  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
661  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
662  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
663  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
664  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
665  "uld %[low32], 0x00(%[src]) \n\t"
666  "mtc1 %[low32], %[ftmp5] \n\t"
667  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
668  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
669  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
670  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
671  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
672  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
673  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
674  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
675  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
676  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
677  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
678  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
679  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
680  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
681  "uld %[low32], 0x00(%[src]) \n\t"
682  "mtc1 %[low32], %[ftmp0] \n\t"
683  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
684  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
685  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
686  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
687  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
688  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
689  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
690  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
691  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
692  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
693  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
694  "bne %[h], 0x10, 2f \n\t"
695  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
696  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
697  "uld %[low32], 0x00(%[src]) \n\t"
698  "mtc1 %[low32], %[ftmp1] \n\t"
699  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
700  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
701  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
702  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
703  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
704  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
705  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
706  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
707  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
708  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
709  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
710  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
711  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
712  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
713  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
714  "uld %[low32], 0x00(%[src]) \n\t"
715  "mtc1 %[low32], %[ftmp2] \n\t"
716  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
717  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
718  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
719  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
720  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
721  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
722  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
723  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
724  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
725  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
726  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
727  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
728  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
729  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
730  "uld %[low32], 0x00(%[src]) \n\t"
731  "mtc1 %[low32], %[ftmp3] \n\t"
732  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
733  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
734  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
735  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
736  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
737  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
738  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
739  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
740  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
741  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
742  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
743  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
744  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
745  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
746  "uld %[low32], 0x00(%[src]) \n\t"
747  "mtc1 %[low32], %[ftmp4] \n\t"
748  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
749  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
750  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
751  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
752  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
753  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
754  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
755  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
756  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
757  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
758  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
759  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
760  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
761  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
762  "uld %[low32], 0x00(%[src]) \n\t"
763  "mtc1 %[low32], %[ftmp5] \n\t"
764  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
765  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
766  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
767  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
768  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
769  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
770  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
771  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
772  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
773  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
774  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
775  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
776  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
777  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
778  "uld %[low32], 0x00(%[src]) \n\t"
779  "mtc1 %[low32], %[ftmp0] \n\t"
780  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
781  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
782  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
783  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
784  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
785  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
786  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
787  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
788  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
789  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
790  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
791  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
792  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
793  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
794  "uld %[low32], 0x00(%[src]) \n\t"
795  "mtc1 %[low32], %[ftmp1] \n\t"
796  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
797  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
798  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
799  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
800  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
801  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
802  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
803  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
804  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
805  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
806  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
807  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
808  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
809  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
810  "uld %[low32], 0x00(%[src]) \n\t"
811  "mtc1 %[low32], %[ftmp2] \n\t"
812  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
813  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
814  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
815  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
816  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
817  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
818  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
819  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
820  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
821  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
822  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
823  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
824  "2: \n\t"
825  ".set pop \n\t"
826  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
827  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
828  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
829  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
830  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
831  [tmp0]"=&r"(tmp[0]),
832  [src]"+&r"(src), [dst]"+&r"(dst),
833  [h]"+&r"(h),
834  [low32]"=&r"(low32)
835  : [dstStride]"r"((mips_reg)dstStride),
836  [srcStride]"r"((mips_reg)srcStride),
837  [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
838  : "memory"
839  );
840 
841  src += 4 - (h + 5) * srcStride;
842  dst += 4 - h * dstStride;
843  }
844 }
845 
847  int dstStride, int srcStride)
848 {
849  put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
850  put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
851  src += 8*srcStride;
852  dst += 8*dstStride;
853  put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
854  put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
855 }
856 
858  int dstStride, int srcStride)
859 {
860  double ftmp[10];
861  uint64_t tmp[1];
862 
863  src -= 2 * srcStride;
864 
865  __asm__ volatile (
866  ".set push \n\t"
867  ".set noreorder \n\t"
868  "dli %[tmp0], 0x02 \n\t"
869  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
870  "mtc1 %[tmp0], %[ftmp9] \n\t"
871  "dli %[tmp0], 0x05 \n\t"
872  "lwc1 %[ftmp0], 0x00(%[src]) \n\t"
873  "mtc1 %[tmp0], %[ftmp8] \n\t"
874  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
875  "lwc1 %[ftmp1], 0x00(%[src]) \n\t"
876  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
877  "lwc1 %[ftmp2], 0x00(%[src]) \n\t"
878  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
879  "lwc1 %[ftmp3], 0x00(%[src]) \n\t"
880  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
881  "lwc1 %[ftmp4], 0x00(%[src]) \n\t"
882  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
883  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
884  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
885  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
886  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
887  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
888  "lwc1 %[ftmp5], 0x00(%[src]) \n\t"
889  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
890  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
891  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
892  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
893  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
894  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
895  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
896  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
897  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
898  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
899  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
900  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
901  "lwc1 %[ftmp0], 0x00(%[dst]) \n\t"
902  "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
903  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
904  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
905  "lwc1 %[ftmp0], 0x00(%[src]) \n\t"
906  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
907  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
908  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
909  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
910  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
911  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
912  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
913  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
914  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
915  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
916  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
917  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
918  "lwc1 %[ftmp1], 0x00(%[dst]) \n\t"
919  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
920  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
921  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
922  "lwc1 %[ftmp1], 0x00(%[src]) \n\t"
923  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
924  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
925  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
926  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
927  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
928  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
929  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
930  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
931  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
932  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
933  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
934  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
935  "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
936  "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
937  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
938  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
939  "lwc1 %[ftmp2], 0x00(%[src]) \n\t"
940  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
941  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
942  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
943  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
944  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
945  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
946  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
947  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
948  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
949  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
950  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
951  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
952  "lwc1 %[ftmp3], 0x00(%[dst]) \n\t"
953  "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
954  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
955  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
956  ".set pop \n\t"
957  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
958  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
959  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
960  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
961  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
962  [tmp0]"=&r"(tmp[0]),
963  [src]"+&r"(src), [dst]"+&r"(dst)
964  : [dstStride]"r"((mips_reg)dstStride),
965  [srcStride]"r"((mips_reg)srcStride),
966  [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
967  : "memory"
968  );
969 }
970 
972  int dstStride, int srcStride)
973 {
974  int w = 2;
975  int h = 8;
976  double ftmp[10];
977  uint64_t tmp[1];
978  uint64_t low32;
979 
980  src -= 2 * srcStride;
981 
982  while (w--) {
983  __asm__ volatile (
984  ".set push \n\t"
985  ".set noreorder \n\t"
986  "dli %[tmp0], 0x02 \n\t"
987  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
988  "mtc1 %[tmp0], %[ftmp9] \n\t"
989  "dli %[tmp0], 0x05 \n\t"
990  "uld %[low32], 0x00(%[src]) \n\t"
991  "mtc1 %[low32], %[ftmp0] \n\t"
992  "mtc1 %[tmp0], %[ftmp8] \n\t"
993  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
994  "uld %[low32], 0x00(%[src]) \n\t"
995  "mtc1 %[low32], %[ftmp1] \n\t"
996  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
997  "uld %[low32], 0x00(%[src]) \n\t"
998  "mtc1 %[low32], %[ftmp2] \n\t"
999  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1000  "uld %[low32], 0x00(%[src]) \n\t"
1001  "mtc1 %[low32], %[ftmp3] \n\t"
1002  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1003  "uld %[low32], 0x00(%[src]) \n\t"
1004  "mtc1 %[low32], %[ftmp4] \n\t"
1005  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1006  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1007  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1008  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1009  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1010  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1011  "uld %[low32], 0x00(%[src]) \n\t"
1012  "mtc1 %[low32], %[ftmp5] \n\t"
1013  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1014  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1015  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1016  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1017  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1018  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1019  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1020  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1021  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1022  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1023  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1024  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1025  "lwc1 %[ftmp0], 0x00(%[dst]) \n\t"
1026  "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1027  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1028  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1029  "uld %[low32], 0x00(%[src]) \n\t"
1030  "mtc1 %[low32], %[ftmp0] \n\t"
1031  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1032  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1033  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1034  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1035  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1036  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1037  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1038  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1039  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1040  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1041  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1042  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1043  "lwc1 %[ftmp1], 0x00(%[dst]) \n\t"
1044  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1045  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1046  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1047  "uld %[low32], 0x00(%[src]) \n\t"
1048  "mtc1 %[low32], %[ftmp1] \n\t"
1049  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1050  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1051  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1052  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1053  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1054  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1055  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1056  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1057  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1058  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1059  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1060  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1061  "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
1062  "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1063  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1064  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1065  "uld %[low32], 0x00(%[src]) \n\t"
1066  "mtc1 %[low32], %[ftmp2] \n\t"
1067  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1068  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1069  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1070  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1071  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1072  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1073  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1074  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1075  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1076  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1077  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1078  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1079  "lwc1 %[ftmp3], 0x00(%[dst]) \n\t"
1080  "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1081  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1082  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1083  "uld %[low32], 0x00(%[src]) \n\t"
1084  "mtc1 %[low32], %[ftmp3] \n\t"
1085  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1086  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1087  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1088  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1089  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1090  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1091  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1092  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1093  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1094  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1095  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1096  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1097  "lwc1 %[ftmp4], 0x00(%[dst]) \n\t"
1098  "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1099  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1100  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1101  "uld %[low32], 0x00(%[src]) \n\t"
1102  "mtc1 %[low32], %[ftmp4] \n\t"
1103  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1104  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1105  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1106  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1107  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1108  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1109  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1110  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1111  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1112  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1113  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1114  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1115  "lwc1 %[ftmp5], 0x00(%[dst]) \n\t"
1116  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1117  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1118  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1119  "uld %[low32], 0x00(%[src]) \n\t"
1120  "mtc1 %[low32], %[ftmp5] \n\t"
1121  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1122  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1123  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1124  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1125  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1126  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1127  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1128  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1129  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1130  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1131  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1132  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1133  "lwc1 %[ftmp0], 0x00(%[dst]) \n\t"
1134  "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1135  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1136  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1137  "uld %[low32], 0x00(%[src]) \n\t"
1138  "mtc1 %[low32], %[ftmp0] \n\t"
1139  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1140  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1141  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1142  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1143  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1144  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1145  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1146  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1147  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1148  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1149  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1150  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1151  "lwc1 %[ftmp1], 0x00(%[dst]) \n\t"
1152  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1153  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1154  "bne %[h], 0x10, 2f \n\t"
1155  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1156  "uld %[low32], 0x00(%[src]) \n\t"
1157  "mtc1 %[low32], %[ftmp1] \n\t"
1158  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1159  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1160  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1161  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1162  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1163  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1164  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1165  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1166  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1167  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1168  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1169  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1170  "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
1171  "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1172  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1173  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1174  "uld %[low32], 0x00(%[src]) \n\t"
1175  "mtc1 %[low32], %[ftmp2] \n\t"
1176  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1177  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1178  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1179  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1180  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1181  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1182  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1183  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1184  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1185  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1186  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1187  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1188  "lwc1 %[ftmp3], 0x00(%[dst]) \n\t"
1189  "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1190  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1191  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1192  "uld %[low32], 0x00(%[src]) \n\t"
1193  "mtc1 %[low32], %[ftmp3] \n\t"
1194  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1195  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1196  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1197  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1198  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1199  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1200  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1201  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1202  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1203  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1204  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1205  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1206  "lwc1 %[ftmp4], 0x00(%[dst]) \n\t"
1207  "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1208  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1209  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1210  "uld %[low32], 0x00(%[src]) \n\t"
1211  "mtc1 %[low32], %[ftmp4] \n\t"
1212  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1213  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1214  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1215  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1216  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1217  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1218  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1219  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1220  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1221  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1222  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1223  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1224  "lwc1 %[ftmp5], 0x00(%[dst]) \n\t"
1225  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1226  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1227  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1228  "uld %[low32], 0x00(%[src]) \n\t"
1229  "mtc1 %[low32], %[ftmp5] \n\t"
1230  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1231  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1232  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1233  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1234  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1235  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1236  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1237  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1238  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1239  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1240  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1241  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1242  "lwc1 %[ftmp0], 0x00(%[dst]) \n\t"
1243  "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1244  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1245  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1246  "uld %[low32], 0x00(%[src]) \n\t"
1247  "mtc1 %[low32], %[ftmp0] \n\t"
1248  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1249  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1250  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1251  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1252  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1253  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1254  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1255  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1256  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1257  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1258  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1259  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1260  "lwc1 %[ftmp1], 0x00(%[dst]) \n\t"
1261  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1262  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1263  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1264  "uld %[low32], 0x00(%[src]) \n\t"
1265  "mtc1 %[low32], %[ftmp1] \n\t"
1266  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1267  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1268  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1269  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1270  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1271  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1272  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1273  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1274  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1275  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1276  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1277  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1278  "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
1279  "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1280  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1281  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1282  "uld %[low32], 0x00(%[src]) \n\t"
1283  "mtc1 %[low32], %[ftmp2] \n\t"
1284  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1285  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1286  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1287  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1288  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1289  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1290  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1291  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1292  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1293  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1294  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1295  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1296  "lwc1 %[ftmp3], 0x00(%[dst]) \n\t"
1297  "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1298  "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1299  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1300  "2: \n\t"
1301  ".set pop \n\t"
1302  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1303  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1304  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1305  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1306  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1307  [tmp0]"=&r"(tmp[0]),
1308  [src]"+&r"(src), [dst]"+&r"(dst),
1309  [h]"+&r"(h),
1310  [low32]"=&r"(low32)
1311  : [dstStride]"r"((mips_reg)dstStride),
1312  [srcStride]"r"((mips_reg)srcStride),
1313  [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
1314  : "memory"
1315  );
1316 
1317  src += 4 - (h + 5) * srcStride;
1318  dst += 4 - h * dstStride;
1319  }
1320 }
1321 
1323  int dstStride, int srcStride)
1324 {
1325  avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
1326  avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1327  src += 8*srcStride;
1328  dst += 8*dstStride;
1329  avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
1330  avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1331 }
1332 
1334  int dstStride, int srcStride)
1335 {
1336  INIT_CLIP
1337  int i;
1338  int16_t _tmp[36];
1339  int16_t *tmp = _tmp;
1340  double ftmp[10];
1341  uint64_t tmp0;
1342  uint64_t low32;
1343 
1344  src -= 2*srcStride;
1345 
1346  __asm__ volatile (
1347  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1348  "dli %[tmp0], 0x09 \n\t"
1349  "1: \n\t"
1350  "uld %[low32], -0x02(%[src]) \n\t"
1351  "mtc1 %[low32], %[ftmp1] \n\t"
1352  "uld %[low32], -0x01(%[src]) \n\t"
1353  "mtc1 %[low32], %[ftmp2] \n\t"
1354  "uld %[low32], 0x00(%[src]) \n\t"
1355  "mtc1 %[low32], %[ftmp3] \n\t"
1356  "uld %[low32], 0x01(%[src]) \n\t"
1357  "mtc1 %[low32], %[ftmp4] \n\t"
1358  "uld %[low32], 0x02(%[src]) \n\t"
1359  "mtc1 %[low32], %[ftmp5] \n\t"
1360  "uld %[low32], 0x03(%[src]) \n\t"
1361  "mtc1 %[low32], %[ftmp6] \n\t"
1362  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1363  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1364  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1365  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1366  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1367  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1368  "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1369  "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
1370  "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
1371  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
1372  "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
1373  "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1374  "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
1375  "sdc1 %[ftmp9], 0x00(%[tmp]) \n\t"
1376  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
1377  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1378  PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t"
1379  "bnez %[tmp0], 1b \n\t"
1380  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1381  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1382  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1383  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1384  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1385  [tmp0]"=&r"(tmp0),
1386  [tmp]"+&r"(tmp), [src]"+&r"(src),
1387  [low32]"=&r"(low32)
1388  : [tmpStride]"r"(8),
1389  [srcStride]"r"((mips_reg)srcStride),
1390  [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5)
1391  : "memory"
1392  );
1393 
1394  tmp -= 28;
1395 
1396  for (i=0; i<4; i++) {
1397  const int16_t tmpB= tmp[-8];
1398  const int16_t tmpA= tmp[-4];
1399  const int16_t tmp0= tmp[ 0];
1400  const int16_t tmp1= tmp[ 4];
1401  const int16_t tmp2= tmp[ 8];
1402  const int16_t tmp3= tmp[12];
1403  const int16_t tmp4= tmp[16];
1404  const int16_t tmp5= tmp[20];
1405  const int16_t tmp6= tmp[24];
1406  op2_put(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1407  op2_put(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1408  op2_put(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1409  op2_put(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1410  dst++;
1411  tmp++;
1412  }
1413 }
1414 
1416  const uint8_t *src, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
1417 {
1418  int w = (size + 8) >> 2;
1419  double ftmp[11];
1420  uint64_t tmp0;
1421  uint64_t low32;
1422 
1423  src -= 2 * srcStride + 2;
1424 
1425  while (w--) {
1426  __asm__ volatile (
1427  "dli %[tmp0], 0x02 \n\t"
1428  "uld %[low32], 0x00(%[src]) \n\t"
1429  "mtc1 %[low32], %[ftmp0] \n\t"
1430  "mtc1 %[tmp0], %[ftmp10] \n\t"
1431  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1432  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1433  "uld %[low32], 0x00(%[src]) \n\t"
1434  "mtc1 %[low32], %[ftmp1] \n\t"
1435  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1436  "uld %[low32], 0x00(%[src]) \n\t"
1437  "mtc1 %[low32], %[ftmp2] \n\t"
1438  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1439  "uld %[low32], 0x00(%[src]) \n\t"
1440  "mtc1 %[low32], %[ftmp3] \n\t"
1441  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1442  "uld %[low32], 0x00(%[src]) \n\t"
1443  "mtc1 %[low32], %[ftmp4] \n\t"
1444  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1445  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1446  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1447  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1448  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1449  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1450  "uld %[low32], 0x00(%[src]) \n\t"
1451  "mtc1 %[low32], %[ftmp5] \n\t"
1452  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1453  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1454  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1455  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1456  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1457  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1458  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1459  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1460  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1461  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1462  "sdc1 %[ftmp6], 0x00(%[tmp]) \n\t"
1463  "uld %[low32], 0x00(%[src]) \n\t"
1464  "mtc1 %[low32], %[ftmp0] \n\t"
1465  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1466  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1467  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1468  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1469  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1470  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1471  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1472  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1473  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1474  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1475  "sdc1 %[ftmp6], 0x30(%[tmp]) \n\t"
1476  "uld %[low32], 0x00(%[src]) \n\t"
1477  "mtc1 %[low32], %[ftmp1] \n\t"
1478  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1479  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1480  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1481  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1482  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1483  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1484  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1485  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1486  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1487  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1488  "sdc1 %[ftmp6], 0x60(%[tmp]) \n\t"
1489  "uld %[low32], 0x00(%[src]) \n\t"
1490  "mtc1 %[low32], %[ftmp2] \n\t"
1491  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1492  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1493  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1494  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1495  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1496  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1497  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1498  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1499  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1500  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1501  "sdc1 %[ftmp6], 0x90(%[tmp]) \n\t"
1502  "uld %[low32], 0x00(%[src]) \n\t"
1503  "mtc1 %[low32], %[ftmp3] \n\t"
1504  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1505  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1506  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1507  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1508  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1509  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1510  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1511  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1512  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1513  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1514  "sdc1 %[ftmp6], 0xc0(%[tmp]) \n\t"
1515  "uld %[low32], 0x00(%[src]) \n\t"
1516  "mtc1 %[low32], %[ftmp4] \n\t"
1517  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1518  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1519  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1520  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1521  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1522  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1523  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1524  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1525  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1526  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1527  "sdc1 %[ftmp6], 0xf0(%[tmp]) \n\t"
1528  "uld %[low32], 0x00(%[src]) \n\t"
1529  "mtc1 %[low32], %[ftmp5] \n\t"
1530  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1531  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1532  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1533  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1534  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1535  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1536  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1537  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1538  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1539  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1540  "sdc1 %[ftmp6], 0x120(%[tmp]) \n\t"
1541  "uld %[low32], 0x00(%[src]) \n\t"
1542  "mtc1 %[low32], %[ftmp0] \n\t"
1543  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1544  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1545  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1546  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1547  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1548  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1549  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1550  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1551  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1552  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1553  "sdc1 %[ftmp6], 0x150(%[tmp]) \n\t"
1554  "bne %[size], 0x10, 2f \n\t"
1555 
1556  "uld %[low32], 0x00(%[src]) \n\t"
1557  "mtc1 %[low32], %[ftmp1] \n\t"
1558  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1559  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1560  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1561  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1562  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1563  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1564  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1565  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1566  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1567  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1568  "sdc1 %[ftmp6], 0x180(%[tmp]) \n\t"
1569  "uld %[low32], 0x00(%[src]) \n\t"
1570  "mtc1 %[low32], %[ftmp2] \n\t"
1571  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1572  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1573  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1574  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1575  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1576  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1577  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1578  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1579  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1580  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1581  "sdc1 %[ftmp6], 0x1b0(%[tmp]) \n\t"
1582  "uld %[low32], 0x00(%[src]) \n\t"
1583  "mtc1 %[low32], %[ftmp3] \n\t"
1584  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1585  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1586  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1587  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1588  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1589  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1590  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1591  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1592  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1593  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1594  "sdc1 %[ftmp6], 0x1e0(%[tmp]) \n\t"
1595  "uld %[low32], 0x00(%[src]) \n\t"
1596  "mtc1 %[low32], %[ftmp4] \n\t"
1597  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1598  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1599  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1600  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1601  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1602  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1603  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1604  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1605  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1606  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1607  "sdc1 %[ftmp6], 0x210(%[tmp]) \n\t"
1608  "uld %[low32], 0x00(%[src]) \n\t"
1609  "mtc1 %[low32], %[ftmp5] \n\t"
1610  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1611  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1612  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1613  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1614  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1615  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1616  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1617  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1618  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1619  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1620  "sdc1 %[ftmp6], 0x240(%[tmp]) \n\t"
1621  "uld %[low32], 0x00(%[src]) \n\t"
1622  "mtc1 %[low32], %[ftmp0] \n\t"
1623  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1624  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1625  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1626  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1627  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1628  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1629  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1630  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1631  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1632  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1633  "sdc1 %[ftmp6], 0x270(%[tmp]) \n\t"
1634  "uld %[low32], 0x00(%[src]) \n\t"
1635  "mtc1 %[low32], %[ftmp1] \n\t"
1636  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1637  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1638  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1639  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1640  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1641  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1642  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1643  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1644  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1645  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1646  "sdc1 %[ftmp6], 0x2a0(%[tmp]) \n\t"
1647  "uld %[low32], 0x00(%[src]) \n\t"
1648  "mtc1 %[low32], %[ftmp2] \n\t"
1649  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1650  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1651  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1652  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1653  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1654  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1655  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1656  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1657  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1658  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1659  "sdc1 %[ftmp6], 0x2d0(%[tmp]) \n\t"
1660  "2: \n\t"
1661  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1662  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1663  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1664  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1665  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1666  [ftmp10]"=&f"(ftmp[10]),
1667  [tmp0]"=&r"(tmp0),
1668  [src]"+&r"(src),
1669  [low32]"=&r"(low32)
1670  : [tmp]"r"(tmp), [size]"r"(size),
1671  [srcStride]"r"((mips_reg)srcStride),
1672  [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
1673  : "memory"
1674  );
1675 
1676  tmp += 4;
1677  src += 4 - (size + 5) * srcStride;
1678  }
1679 }
1680 
1682  int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
1683 {
1684  int w = size >> 4;
1685  double ftmp[10];
1686  uint64_t tmp0;
1687 
1688  do {
1689  int h = size;
1690 
1691  __asm__ volatile (
1692  "dli %[tmp0], 0x02 \n\t"
1693  "mtc1 %[tmp0], %[ftmp8] \n\t"
1694  "dli %[tmp0], 0x06 \n\t"
1695  "mtc1 %[tmp0], %[ftmp9] \n\t"
1696  "1: \n\t"
1697  "ldc1 %[ftmp0], 0x00(%[tmp]) \n\t"
1698  "ldc1 %[ftmp3], 0x08(%[tmp]) \n\t"
1699  "ldc1 %[ftmp6], 0x10(%[tmp]) \n\t"
1700  "gsldlc1 %[ftmp1], 0x09(%[tmp]) \n\t"
1701  "gsldrc1 %[ftmp1], 0x02(%[tmp]) \n\t"
1702  "gsldlc1 %[ftmp4], 0x11(%[tmp]) \n\t"
1703  "gsldrc1 %[ftmp4], 0x0a(%[tmp]) \n\t"
1704  "gsldlc1 %[ftmp5], 0x19(%[tmp]) \n\t"
1705  "gsldrc1 %[ftmp5], 0x12(%[tmp]) \n\t"
1706  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1707  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1708  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1709  "paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1710  "gsldlc1 %[ftmp2], 0x0b(%[tmp]) \n\t"
1711  "gsldrc1 %[ftmp2], 0x04(%[tmp]) \n\t"
1712  "gsldlc1 %[ftmp6], 0x0d(%[tmp]) \n\t"
1713  "gsldrc1 %[ftmp6], 0x06(%[tmp]) \n\t"
1714  "gsldlc1 %[ftmp5], 0x13(%[tmp]) \n\t"
1715  "gsldrc1 %[ftmp5], 0x0c(%[tmp]) \n\t"
1716  "gsldlc1 %[ftmp7], 0x15(%[tmp]) \n\t"
1717  "gsldrc1 %[ftmp7], 0x0e(%[tmp]) \n\t"
1718  "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1719  "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1720  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1721  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1722  "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
1723  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1724  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1725  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1726  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1727  "paddsh %[ftmp3] , %[ftmp3], %[ftmp5] \n\t"
1728  "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
1729  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1730  "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1731  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1732  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1733  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1734  "packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
1735  "addi %[h], %[h], -0x01 \n\t"
1736  "gssdlc1 %[ftmp0], 0x07(%[dst]) \n\t"
1737  "gssdrc1 %[ftmp0], 0x00(%[dst]) \n\t"
1738  PTR_ADDIU "%[tmp], %[tmp], 0x30 \n\t"
1739  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1740  "bnez %[h], 1b \n\t"
1741  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1742  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1743  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1744  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1745  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1746  [tmp0]"=&r"(tmp0),
1747  [tmp]"+&r"(tmp), [dst]"+&r"(dst),
1748  [h]"+&r"(h)
1749  : [dstStride]"r"((mips_reg)dstStride)
1750  : "memory"
1751  );
1752 
1753  tmp += 8 - size * 24;
1754  dst += 8 - size * dstStride;
1755  } while (w--);
1756 }
1757 
1758 static void put_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1759  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1760  ptrdiff_t srcStride, int size)
1761 {
1762  put_h264_qpel8or16_hv1_lowpass_mmi(tmp, src, tmpStride, srcStride, size);
1763  put_h264_qpel8or16_hv2_lowpass_mmi(dst, tmp, dstStride, tmpStride, size);
1764 }
1765 
1766 static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1767  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1768  ptrdiff_t srcStride)
1769 {
1770  put_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1771  srcStride, 8);
1772 }
1773 
1774 static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1775  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1776  ptrdiff_t srcStride)
1777 {
1778  put_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1779  srcStride, 16);
1780 }
1781 
1783  const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1784 {
1785  int h = 8;
1786  double ftmp[9];
1787  uint64_t tmp[1];
1788  uint64_t low32;
1789 
1790  __asm__ volatile (
1791  "dli %[tmp0], 0x02 \n\t"
1792  "mtc1 %[tmp0], %[ftmp7] \n\t"
1793  "dli %[tmp0], 0x05 \n\t"
1794  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1795  "mtc1 %[tmp0], %[ftmp8] \n\t"
1796  "1: \n\t"
1797  "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
1798  "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
1799  "gsldlc1 %[ftmp3], 0x08(%[src]) \n\t"
1800  "gsldrc1 %[ftmp3], 0x01(%[src]) \n\t"
1801  "punpckhbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
1802  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1803  "punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t"
1804  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1805  "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1806  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1807  "psllh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1808  "psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1809  "gsldlc1 %[ftmp3], 0x06(%[src]) \n\t"
1810  "gsldrc1 %[ftmp3], -0x01(%[src]) \n\t"
1811  "gsldlc1 %[ftmp5], 0x09(%[src]) \n\t"
1812  "gsldrc1 %[ftmp5], 0x02(%[src]) \n\t"
1813  "punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t"
1814  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1815  "punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1816  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1817  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1818  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1819  "psubh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1820  "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1821  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
1822  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t"
1823  "uld %[low32], -0x02(%[src]) \n\t"
1824  "mtc1 %[low32], %[ftmp3] \n\t"
1825  "uld %[low32], 0x07(%[src]) \n\t"
1826  "mtc1 %[low32], %[ftmp6] \n\t"
1827  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1828  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1829  "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1830  "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1831  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1832  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1833  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1834  "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1835  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
1836  "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
1837  "gsldlc1 %[ftmp5], 0x07(%[src2]) \n\t"
1838  "gsldrc1 %[ftmp5], 0x00(%[src2]) \n\t"
1839  "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1840  PTR_ADDU "%[src], %[src], %[dstStride] \n\t"
1841  "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1842  PTR_ADDU "%[h], %[h], -0x01 \n\t"
1843  "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
1844  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1845  PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t"
1846  "bgtz %[h], 1b \n\t"
1847  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1848  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1849  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1850  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1851  [ftmp8]"=&f"(ftmp[8]),
1852  [tmp0]"=&r"(tmp[0]),
1853  [src]"+&r"(src), [dst]"+&r"(dst),
1854  [src2]"+&r"(src2), [h]"+&r"(h),
1855  [low32]"=&r"(low32)
1856  : [src2Stride]"r"((mips_reg)src2Stride),
1857  [dstStride]"r"((mips_reg)dstStride),
1858  [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
1859  : "memory"
1860  );
1861 }
1862 
1863 static void put_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
1864  const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
1865 {
1866  double ftmp[7];
1867  uint64_t tmp0;
1868 
1869  do {
1870  __asm__ volatile (
1871  "dli %[tmp0], 0x05 \n\t"
1872  "gsldlc1 %[ftmp0], 0x07(%[src16]) \n\t"
1873  "gsldrc1 %[ftmp0], 0x00(%[src16]) \n\t"
1874  "mtc1 %[tmp0], %[ftmp6] \n\t"
1875  "gsldlc1 %[ftmp1], 0x0f(%[src16]) \n\t"
1876  "gsldrc1 %[ftmp1], 0x08(%[src16]) \n\t"
1877  "gsldlc1 %[ftmp2], 0x37(%[src16]) \n\t"
1878  "gsldrc1 %[ftmp2], 0x30(%[src16]) \n\t"
1879  "gsldlc1 %[ftmp3], 0x3f(%[src16]) \n\t"
1880  "gsldrc1 %[ftmp3], 0x38(%[src16]) \n\t"
1881  "psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1882  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1883  "psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1884  "psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
1885  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1886  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
1887  "ldc1 %[ftmp5], 0x00(%[src8]) \n\t"
1888  "gsldxc1 %[ftmp4], 0x00(%[src8], %[src8Stride]) \n\t"
1889  "pavgb %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1890  "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1891  "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
1892  "gssdxc1 %[ftmp2], 0x00(%[dst], %[dstStride]) \n\t"
1893  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1894  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1895  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1896  [ftmp6]"=&f"(ftmp[6]),
1897  [tmp0]"=&r"(tmp0)
1898  : [src8]"r"(src8), [src16]"r"(src16),
1899  [dst]"r"(dst),
1900  [src8Stride]"r"((mips_reg)src8Stride),
1901  [dstStride]"r"((mips_reg)dstStride)
1902  : "memory"
1903  );
1904 
1905  src8 += 2 * src8Stride;
1906  src16 += 48;
1907  dst += 2 * dstStride;
1908  } while (h -= 2);
1909 }
1910 
1912  const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1913 {
1914  put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
1915  put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
1916  src2Stride);
1917 
1918  src += 8 * dstStride;
1919  dst += 8 * dstStride;
1920  src2 += 8 * src2Stride;
1921 
1922  put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
1923  put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
1924  src2Stride);
1925 }
1926 
1927 static void put_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
1928  const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
1929 {
1930  put_pixels8_l2_shift5_mmi(dst, src16, src8, dstStride, src8Stride, h);
1931  put_pixels8_l2_shift5_mmi(dst + 8, src16 + 8, src8 + 8, dstStride,
1932  src8Stride, h);
1933 }
1934 
1936  int dstStride, int srcStride)
1937 {
1938  INIT_CLIP
1939  int i;
1940  int16_t _tmp[36];
1941  int16_t *tmp = _tmp;
1942  double ftmp[10];
1943  uint64_t tmp0;
1944  uint64_t low32;
1945 
1946  src -= 2*srcStride;
1947 
1948  __asm__ volatile (
1949  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1950  "dli %[tmp0], 0x09 \n\t"
1951  "1: \n\t"
1952  "uld %[low32], -0x02(%[src]) \n\t"
1953  "mtc1 %[low32], %[ftmp1] \n\t"
1954  "uld %[low32], -0x01(%[src]) \n\t"
1955  "mtc1 %[low32], %[ftmp2] \n\t"
1956  "uld %[low32], 0x00(%[src]) \n\t"
1957  "mtc1 %[low32], %[ftmp3] \n\t"
1958  "uld %[low32], 0x01(%[src]) \n\t"
1959  "mtc1 %[low32], %[ftmp4] \n\t"
1960  "uld %[low32], 0x02(%[src]) \n\t"
1961  "mtc1 %[low32], %[ftmp5] \n\t"
1962  "uld %[low32], 0x03(%[src]) \n\t"
1963  "mtc1 %[low32], %[ftmp6] \n\t"
1964  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1965  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1966  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1967  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1968  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1969  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1970  "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1971  "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
1972  "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
1973  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
1974  "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
1975  "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1976  "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
1977  "sdc1 %[ftmp9], 0x00(%[tmp]) \n\t"
1978  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
1979  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1980  PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t"
1981  "bnez %[tmp0], 1b \n\t"
1982  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1983  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1984  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1985  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1986  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1987  [tmp0]"=&r"(tmp0),
1988  [tmp]"+&r"(tmp), [src]"+&r"(src),
1989  [low32]"=&r"(low32)
1990  : [tmpStride]"r"(8),
1991  [srcStride]"r"((mips_reg)srcStride),
1992  [ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5)
1993  : "memory"
1994  );
1995 
1996  tmp -= 28;
1997 
1998  for (i=0; i<4; i++) {
1999  const int16_t tmpB= tmp[-8];
2000  const int16_t tmpA= tmp[-4];
2001  const int16_t tmp0= tmp[ 0];
2002  const int16_t tmp1= tmp[ 4];
2003  const int16_t tmp2= tmp[ 8];
2004  const int16_t tmp3= tmp[12];
2005  const int16_t tmp4= tmp[16];
2006  const int16_t tmp5= tmp[20];
2007  const int16_t tmp6= tmp[24];
2008  op2_avg(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
2009  op2_avg(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
2010  op2_avg(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
2011  op2_avg(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
2012  dst++;
2013  tmp++;
2014  }
2015 }
2016 
2018  int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
2019 {
2020  int w = size >> 4;
2021  double ftmp[11];
2022  uint64_t tmp0;
2023 
2024  do {
2025  int h = size;
2026  __asm__ volatile (
2027  "dli %[tmp0], 0x02 \n\t"
2028  "mtc1 %[tmp0], %[ftmp9] \n\t"
2029  "dli %[tmp0], 0x06 \n\t"
2030  "mtc1 %[tmp0], %[ftmp10] \n\t"
2031  "1: \n\t"
2032  "ldc1 %[ftmp0], 0x00(%[tmp]) \n\t"
2033  "ldc1 %[ftmp3], 0x08(%[tmp]) \n\t"
2034  "gsldlc1 %[ftmp1], 0x09(%[tmp]) \n\t"
2035  "gsldrc1 %[ftmp1], 0x02(%[tmp]) \n\t"
2036  "gsldlc1 %[ftmp4], 0x11(%[tmp]) \n\t"
2037  "gsldrc1 %[ftmp4], 0x0a(%[tmp]) \n\t"
2038  "ldc1 %[ftmp7], 0x10(%[tmp]) \n\t"
2039  "gsldlc1 %[ftmp8], 0x19(%[tmp]) \n\t"
2040  "gsldrc1 %[ftmp8], 0x12(%[tmp]) \n\t"
2041  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2042  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2043  "paddh %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2044  "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
2045  "gsldlc1 %[ftmp2], 0x0b(%[tmp]) \n\t"
2046  "gsldrc1 %[ftmp2], 0x04(%[tmp]) \n\t"
2047  "gsldlc1 %[ftmp5], 0x13(%[tmp]) \n\t"
2048  "gsldrc1 %[ftmp5], 0x0c(%[tmp]) \n\t"
2049  "gsldlc1 %[ftmp7], 0x0d(%[tmp]) \n\t"
2050  "gsldrc1 %[ftmp7], 0x06(%[tmp]) \n\t"
2051  "gsldlc1 %[ftmp8], 0x15(%[tmp]) \n\t"
2052  "gsldrc1 %[ftmp8], 0x0e(%[tmp]) \n\t"
2053  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2054  "paddh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2055  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2056  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2057  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
2058  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
2059  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2060  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2061  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2062  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2063  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
2064  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
2065  "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2066  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2067  "psrah %[ftmp0], %[ftmp0], %[ftmp10] \n\t"
2068  "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
2069  "packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
2070  "ldc1 %[ftmp6], 0x00(%[dst]) \n\t"
2071  "pavgb %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
2072  "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
2073  "addi %[h], %[h], -0x01 \n\t"
2074  PTR_ADDI "%[tmp], %[tmp], 0x30 \n\t"
2075  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
2076  "bnez %[h], 1b \n\t"
2077  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2078  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2079  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2080  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2081  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2082  [ftmp10]"=&f"(ftmp[10]),
2083  [tmp0]"=&r"(tmp0),
2084  [tmp]"+&r"(tmp), [dst]"+&r"(dst),
2085  [h]"+&r"(h)
2086  : [dstStride]"r"((mips_reg)dstStride)
2087  : "memory"
2088  );
2089 
2090  tmp += 8 - size * 24;
2091  dst += 8 - size * dstStride;
2092  } while (w--);
2093 }
2094 
2095 static void avg_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
2096  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
2097  ptrdiff_t srcStride, int size)
2098 {
2099  put_h264_qpel8or16_hv1_lowpass_mmi(tmp, src, tmpStride, srcStride, size);
2100  avg_h264_qpel8or16_hv2_lowpass_mmi(dst, tmp, dstStride, tmpStride, size);
2101 }
2102 
2103 static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
2104  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
2105  ptrdiff_t srcStride)
2106 {
2107  avg_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
2108  srcStride, 8);
2109 }
2110 
2111 static void avg_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
2112  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
2113  ptrdiff_t srcStride)
2114 {
2115  avg_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
2116  srcStride, 16);
2117 }
2118 
2120  const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
2121 {
2122  double ftmp[10];
2123  uint64_t tmp[2];
2124  uint64_t low32;
2125 
2126  __asm__ volatile (
2127  "dli %[tmp1], 0x02 \n\t"
2128  "ori %[tmp0], $0, 0x8 \n\t"
2129  "mtc1 %[tmp1], %[ftmp7] \n\t"
2130  "dli %[tmp1], 0x05 \n\t"
2131  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2132  "mtc1 %[tmp1], %[ftmp8] \n\t"
2133  "1: \n\t"
2134  "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
2135  "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
2136  "gsldlc1 %[ftmp2], 0x08(%[src]) \n\t"
2137  "gsldrc1 %[ftmp2], 0x01(%[src]) \n\t"
2138  "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
2139  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
2140  "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2141  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2142  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2143  "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2144  "psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
2145  "psllh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
2146  "gsldlc1 %[ftmp2], 0x06(%[src]) \n\t"
2147  "gsldrc1 %[ftmp2], -0x01(%[src]) \n\t"
2148  "gsldlc1 %[ftmp5], 0x09(%[src]) \n\t"
2149  "gsldrc1 %[ftmp5], 0x02(%[src]) \n\t"
2150  "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2151  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2152  "punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
2153  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
2154  "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2155  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
2156  "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2157  "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
2158  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t"
2159  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
2160  "uld %[low32], -0x02(%[src]) \n\t"
2161  "mtc1 %[low32], %[ftmp2] \n\t"
2162  "uld %[low32], 0x07(%[src]) \n\t"
2163  "mtc1 %[low32], %[ftmp6] \n\t"
2164  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2165  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2166  "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
2167  "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
2168  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
2169  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
2170  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2171  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2172  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
2173  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2174  "gsldlc1 %[ftmp5], 0x07(%[src2]) \n\t"
2175  "gsldrc1 %[ftmp5], 0x00(%[src2]) \n\t"
2176  "packushb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2177  "ldc1 %[ftmp9], 0x00(%[dst]) \n\t"
2178  "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2179  "pavgb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
2180  PTR_ADDU "%[src], %[src], %[dstStride] \n\t"
2181  "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
2182  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
2183  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
2184  PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t"
2185  "bgtz %[tmp0], 1b \n\t"
2186  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2187  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2188  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2189  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2190  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2191  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
2192  [dst]"+&r"(dst), [src]"+&r"(src),
2193  [src2]"+&r"(src2),
2194  [low32]"=&r"(low32)
2195  : [dstStride]"r"((mips_reg)dstStride),
2196  [src2Stride]"r"((mips_reg)src2Stride),
2197  [ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16)
2198  : "memory"
2199  );
2200 }
2201 
2203  const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
2204 {
2205  avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
2206  avg_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
2207  src2Stride);
2208 
2209  src += 8 * dstStride;
2210  dst += 8 * dstStride;
2211  src2 += 8 * src2Stride;
2212 
2213  avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
2214  avg_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
2215  src2Stride);
2216 }
2217 
2218 static void avg_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
2219  const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
2220 {
2221  double ftmp[8];
2222  uint64_t tmp0;
2223 
2224  do {
2225  __asm__ volatile (
2226  "dli %[tmp0], 0x05 \n\t"
2227  "gsldlc1 %[ftmp0], 0x07(%[src16]) \n\t"
2228  "gsldrc1 %[ftmp0], 0x00(%[src16]) \n\t"
2229  "mtc1 %[tmp0], %[ftmp6] \n\t"
2230  "gsldlc1 %[ftmp1], 0x0f(%[src16]) \n\t"
2231  "gsldrc1 %[ftmp1], 0x08(%[src16]) \n\t"
2232  "gsldlc1 %[ftmp2], 0x37(%[src16]) \n\t"
2233  "gsldrc1 %[ftmp2], 0x30(%[src16]) \n\t"
2234  "gsldlc1 %[ftmp3], 0x3f(%[src16]) \n\t"
2235  "gsldrc1 %[ftmp3], 0x38(%[src16]) \n\t"
2236  "psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
2237  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
2238  "psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2239  "psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
2240  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2241  "ldc1 %[ftmp4], 0x00(%[src8]) \n\t"
2242  "gsldxc1 %[ftmp5], 0x00(%[src8], %[src8Stride]) \n\t"
2243  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2244  "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2245  "pavgb %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2246  "ldc1 %[ftmp7], 0x00(%[dst]) \n\t"
2247  "pavgb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
2248  "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
2249  "gsldxc1 %[ftmp7], 0x00(%[dst], %[dstStride]) \n\t"
2250  "pavgb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2251  "gssdxc1 %[ftmp2], 0x00(%[dst], %[dstStride]) \n\t"
2252  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2253  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2254  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2255  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2256  [tmp0]"=&r"(tmp0)
2257  : [src8]"r"(src8), [src16]"r"(src16),
2258  [dst]"r"(dst),
2259  [src8Stride]"r"((mips_reg)src8Stride),
2260  [dstStride]"r"((mips_reg)dstStride)
2261  : "memory"
2262  );
2263 
2264  src8 += 2 * src8Stride;
2265  src16 += 48;
2266  dst += 2 * dstStride;
2267  } while (b -= 2);
2268 }
2269 
2270 static void avg_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
2271  const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
2272 {
2273  avg_pixels8_l2_shift5_mmi(dst, src16, src8, dstStride, src8Stride, b);
2274  avg_pixels8_l2_shift5_mmi(dst + 8, src16 + 8, src8 + 8, dstStride,
2275  src8Stride, b);
2276 }
2277 
2278 //DEF_H264_MC_MMI(put_, 4)
2280  ptrdiff_t stride)
2281 {
2282  ff_put_pixels4_8_mmi(dst, src, stride, 4);
2283 }
2284 
2286  ptrdiff_t stride)
2287 {
2288  uint8_t half[16];
2289  put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
2290  ff_put_pixels4_l2_8_mmi(dst, src, half, stride, stride, 4, 4);
2291 }
2292 
2294  ptrdiff_t stride)
2295 {
2296  put_h264_qpel4_h_lowpass_mmi(dst, src, stride, stride);
2297 }
2298 
2300  ptrdiff_t stride)
2301 {
2302  uint8_t half[16];
2303  put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
2304  ff_put_pixels4_l2_8_mmi(dst, src+1, half, stride, stride, 4, 4);
2305 }
2306 
2308  ptrdiff_t stride)
2309 {
2310  uint8_t full[36];
2311  uint8_t * const full_mid= full + 8;
2312  uint8_t half[16];
2313  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2314  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2315  ff_put_pixels4_l2_8_mmi(dst, full_mid, half, stride, 4, 4, 4);
2316 }
2317 
2319  ptrdiff_t stride)
2320 {
2321  uint8_t full[36];
2322  uint8_t * const full_mid= full + 8;
2323  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2324  put_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
2325 }
2326 
2328  ptrdiff_t stride)
2329 {
2330  uint8_t full[36];
2331  uint8_t * const full_mid= full + 8;
2332  uint8_t half[16];
2333  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2334  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2335  ff_put_pixels4_l2_8_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
2336 }
2337 
2339  ptrdiff_t stride)
2340 {
2341  uint8_t full[36];
2342  uint8_t * const full_mid= full + 8;
2343  uint8_t halfH[16];
2344  uint8_t halfV[16];
2345  put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2346  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2347  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2348  ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2349 }
2350 
2352  ptrdiff_t stride)
2353 {
2354  uint8_t full[36];
2355  uint8_t * const full_mid= full + 8;
2356  uint8_t halfH[16];
2357  uint8_t halfV[16];
2358  put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2359  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2360  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2361  ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2362 }
2363 
2365  ptrdiff_t stride)
2366 {
2367  uint8_t full[36];
2368  uint8_t * const full_mid= full + 8;
2369  uint8_t halfH[16];
2370  uint8_t halfV[16];
2371  put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2372  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2373  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2374  ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2375 }
2376 
2378  ptrdiff_t stride)
2379 {
2380  uint8_t full[36];
2381  uint8_t * const full_mid= full + 8;
2382  uint8_t halfH[16];
2383  uint8_t halfV[16];
2384  put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2385  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2386  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2387  ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2388 }
2389 
2391  ptrdiff_t stride)
2392 {
2393  put_h264_qpel4_hv_lowpass_mmi(dst, src, stride, stride);
2394 }
2395 
2397  ptrdiff_t stride)
2398 {
2399  uint8_t halfH[16];
2400  uint8_t halfHV[16];
2401  put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2402  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2403  ff_put_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2404 }
2405 
2407  ptrdiff_t stride)
2408 {
2409  uint8_t halfH[16];
2410  uint8_t halfHV[16];
2411  put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2412  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2413  ff_put_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2414 }
2415 
2417  ptrdiff_t stride)
2418 {
2419  uint8_t full[36];
2420  uint8_t * const full_mid= full + 8;
2421  uint8_t halfV[16];
2422  uint8_t halfHV[16];
2423  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2424  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2425  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2426  ff_put_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2427 }
2428 
2430  ptrdiff_t stride)
2431 {
2432  uint8_t full[36];
2433  uint8_t * const full_mid= full + 8;
2434  uint8_t halfV[16];
2435  uint8_t halfHV[16];
2436  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2437  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2438  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2439  ff_put_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2440 }
2441 
2442 //DEF_H264_MC_MMI(avg_, 4)
2444  ptrdiff_t stride)
2445 {
2446  ff_avg_pixels4_8_mmi(dst, src, stride, 4);
2447 }
2448 
2450  ptrdiff_t stride)
2451 {
2452  uint8_t half[16];
2453  put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
2454  ff_avg_pixels4_l2_8_mmi(dst, src, half, stride, stride, 4, 4);
2455 }
2456 
2458  ptrdiff_t stride)
2459 {
2460  avg_h264_qpel4_h_lowpass_mmi(dst, src, stride, stride);
2461 }
2462 
2464  ptrdiff_t stride)
2465 {
2466  uint8_t half[16];
2467  put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
2468  ff_avg_pixels4_l2_8_mmi(dst, src+1, half, stride, stride, 4, 4);
2469 }
2470 
2472  ptrdiff_t stride)
2473 {
2474  uint8_t full[36];
2475  uint8_t * const full_mid= full + 8;
2476  uint8_t half[16];
2477  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2478  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2479  ff_avg_pixels4_l2_8_mmi(dst, full_mid, half, stride, 4, 4, 4);
2480 }
2481 
2483  ptrdiff_t stride)
2484 {
2485  uint8_t full[36];
2486  uint8_t * const full_mid= full + 8;
2487  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2488  avg_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
2489 }
2490 
2492  ptrdiff_t stride)
2493 {
2494  uint8_t full[36];
2495  uint8_t * const full_mid= full + 8;
2496  uint8_t half[16];
2497  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2498  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2499  ff_avg_pixels4_l2_8_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
2500 }
2501 
2503  ptrdiff_t stride)
2504 {
2505  uint8_t full[36];
2506  uint8_t * const full_mid= full + 8;
2507  uint8_t halfH[16];
2508  uint8_t halfV[16];
2509  put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2510  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2511  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2512  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2513 }
2514 
2516  ptrdiff_t stride)
2517 {
2518  uint8_t full[36];
2519  uint8_t * const full_mid= full + 8;
2520  uint8_t halfH[16];
2521  uint8_t halfV[16];
2522  put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2523  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2524  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2525  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2526 }
2527 
2529  ptrdiff_t stride)
2530 {
2531  uint8_t full[36];
2532  uint8_t * const full_mid= full + 8;
2533  uint8_t halfH[16];
2534  uint8_t halfV[16];
2535  put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2536  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2537  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2538  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2539 }
2540 
2542  ptrdiff_t stride)
2543 {
2544  uint8_t full[36];
2545  uint8_t * const full_mid= full + 8;
2546  uint8_t halfH[16];
2547  uint8_t halfV[16];
2548  put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2549  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2550  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2551  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2552 }
2553 
2555  ptrdiff_t stride)
2556 {
2557  avg_h264_qpel4_hv_lowpass_mmi(dst, src, stride, stride);
2558 }
2559 
2561  ptrdiff_t stride)
2562 {
2563  uint8_t halfH[16];
2564  uint8_t halfHV[16];
2565  put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2566  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2567  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2568 }
2569 
2571  ptrdiff_t stride)
2572 {
2573  uint8_t halfH[16];
2574  uint8_t halfHV[16];
2575  put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2576  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2577  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2578 }
2579 
2581  ptrdiff_t stride)
2582 {
2583  uint8_t full[36];
2584  uint8_t * const full_mid= full + 8;
2585  uint8_t halfV[16];
2586  uint8_t halfHV[16];
2587  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2588  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2589  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2590  ff_avg_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2591 }
2592 
2594  ptrdiff_t stride)
2595 {
2596  uint8_t full[36];
2597  uint8_t * const full_mid= full + 8;
2598  uint8_t halfV[16];
2599  uint8_t halfHV[16];
2600  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2601  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2602  put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2603  ff_avg_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2604 }
2605 
2606 //DEF_H264_MC_MMI(put_, 8)
2608  ptrdiff_t stride)
2609 {
2610  ff_put_pixels8_8_mmi(dst, src, stride, 8);
2611 }
2612 
2614  ptrdiff_t stride)
2615 {
2616  uint8_t half[64];
2617  put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2618  ff_put_pixels8_l2_8_mmi(dst, src, half, stride, stride, 8, 8);
2619 }
2620 
2622  ptrdiff_t stride)
2623 {
2624  put_h264_qpel8_h_lowpass_mmi(dst, src, stride, stride);
2625 }
2626 
2628  ptrdiff_t stride)
2629 {
2630  uint8_t half[64];
2631  put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2632  ff_put_pixels8_l2_8_mmi(dst, src+1, half, stride, stride, 8, 8);
2633 }
2634 
2636  ptrdiff_t stride)
2637 {
2638  uint8_t full[104];
2639  uint8_t * const full_mid= full + 16;
2640  uint8_t half[64];
2641  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2642  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2643  ff_put_pixels8_l2_8_mmi(dst, full_mid, half, stride, 8, 8, 8);
2644 }
2645 
2647  ptrdiff_t stride)
2648 {
2649  uint8_t full[104];
2650  uint8_t * const full_mid= full + 16;
2651  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2652  put_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
2653 }
2654 
2656  ptrdiff_t stride)
2657 {
2658  uint8_t full[104];
2659  uint8_t * const full_mid= full + 16;
2660  uint8_t half[64];
2661  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2662  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2663  ff_put_pixels8_l2_8_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
2664 }
2665 
2667  ptrdiff_t stride)
2668 {
2669  uint8_t full[104];
2670  uint8_t * const full_mid= full + 16;
2671  uint8_t halfH[64];
2672  uint8_t halfV[64];
2673  put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2674  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2675  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2676  ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2677 }
2678 
2680  ptrdiff_t stride)
2681 {
2682  uint8_t full[104];
2683  uint8_t * const full_mid= full + 16;
2684  uint8_t halfH[64];
2685  uint8_t halfV[64];
2686  put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2687  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2688  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2689  ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2690 }
2691 
2693  ptrdiff_t stride)
2694 {
2695  uint8_t full[104];
2696  uint8_t * const full_mid= full + 16;
2697  uint8_t halfH[64];
2698  uint8_t halfV[64];
2699  put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2700  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2701  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2702  ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2703 }
2704 
2706  ptrdiff_t stride)
2707 {
2708  uint8_t full[104];
2709  uint8_t * const full_mid= full + 16;
2710  uint8_t halfH[64];
2711  uint8_t halfV[64];
2712  put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2713  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2714  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2715  ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2716 }
2717 
2719  ptrdiff_t stride)
2720 {
2721  uint16_t __attribute__ ((aligned(8))) temp[192];
2722 
2723  put_h264_qpel8_hv_lowpass_mmi(dst, temp, src, stride, 8, stride);
2724 }
2725 
2727  ptrdiff_t stride)
2728 {
2729  uint8_t __attribute__ ((aligned(8))) temp[448];
2730  uint8_t *const halfHV = temp;
2731  int16_t *const halfV = (int16_t *) (temp + 64);
2732 
2733  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2734  put_h264_qpel8_h_lowpass_l2_mmi(dst, src, halfHV, stride, 8);
2735 }
2736 
2738  ptrdiff_t stride)
2739 {
2740  uint8_t __attribute__ ((aligned(8))) temp[448];
2741  uint8_t *const halfHV = temp;
2742  int16_t *const halfV = (int16_t *) (temp + 64);
2743 
2744  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2745  put_h264_qpel8_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 8);
2746 }
2747 
2749  ptrdiff_t stride)
2750 {
2751  uint8_t __attribute__ ((aligned(8))) temp[448];
2752  uint8_t *const halfHV = temp;
2753  int16_t *const halfV = (int16_t *) (temp + 64);
2754 
2755  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2756  put_pixels8_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 8, 8);
2757 }
2758 
2760  ptrdiff_t stride)
2761 {
2762  uint8_t __attribute__ ((aligned(8))) temp[448];
2763  uint8_t *const halfHV = temp;
2764  int16_t *const halfV = (int16_t *) (temp + 64);
2765 
2766  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2767  put_pixels8_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 8, 8);
2768 }
2769 
2770 //DEF_H264_MC_MMI(avg_, 8)
2772  ptrdiff_t stride)
2773 {
2774  ff_avg_pixels8_8_mmi(dst, src, stride, 8);
2775 }
2776 
2778  ptrdiff_t stride)
2779 {
2780  uint8_t half[64];
2781  put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2782  ff_avg_pixels8_l2_8_mmi(dst, src, half, stride, stride, 8, 8);
2783 }
2784 
2786  ptrdiff_t stride)
2787 {
2788  avg_h264_qpel8_h_lowpass_mmi(dst, src, stride, stride);
2789 }
2790 
2792  ptrdiff_t stride)
2793 {
2794  uint8_t half[64];
2795  put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2796  ff_avg_pixels8_l2_8_mmi(dst, src+1, half, stride, stride, 8, 8);
2797 }
2798 
2800  ptrdiff_t stride)
2801 {
2802  uint8_t full[104];
2803  uint8_t * const full_mid= full + 16;
2804  uint8_t half[64];
2805  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2806  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2807  ff_avg_pixels8_l2_8_mmi(dst, full_mid, half, stride, 8, 8, 8);
2808 }
2809 
2811  ptrdiff_t stride)
2812 {
2813  uint8_t full[104];
2814  uint8_t * const full_mid= full + 16;
2815  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2816  avg_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
2817 }
2818 
2820  ptrdiff_t stride)
2821 {
2822  uint8_t full[104];
2823  uint8_t * const full_mid= full + 16;
2824  uint8_t half[64];
2825  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2826  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2827  ff_avg_pixels8_l2_8_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
2828 }
2829 
2831  ptrdiff_t stride)
2832 {
2833  uint8_t full[104];
2834  uint8_t * const full_mid= full + 16;
2835  uint8_t halfH[64];
2836  uint8_t halfV[64];
2837  put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2838  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2839  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2840  ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2841 }
2842 
2844  ptrdiff_t stride)
2845 {
2846  uint8_t full[104];
2847  uint8_t * const full_mid= full + 16;
2848  uint8_t halfH[64];
2849  uint8_t halfV[64];
2850  put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2851  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2852  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2853  ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2854 }
2855 
2857  ptrdiff_t stride)
2858 {
2859  uint8_t full[104];
2860  uint8_t * const full_mid= full + 16;
2861  uint8_t halfH[64];
2862  uint8_t halfV[64];
2863  put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2864  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2865  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2866  ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2867 }
2868 
2870  ptrdiff_t stride)
2871 {
2872  uint8_t full[104];
2873  uint8_t * const full_mid= full + 16;
2874  uint8_t halfH[64];
2875  uint8_t halfV[64];
2876  put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2877  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2878  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2879  ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2880 }
2881 
2883  ptrdiff_t stride)
2884 {
2885  uint16_t __attribute__ ((aligned(8))) temp[192];
2886 
2887  avg_h264_qpel8_hv_lowpass_mmi(dst, temp, src, stride, 8, stride);
2888 }
2889 
2891  ptrdiff_t stride)
2892 {
2893  uint8_t __attribute__ ((aligned(8))) temp[448];
2894  uint8_t *const halfHV = temp;
2895  int16_t *const halfV = (int16_t *) (temp + 64);
2896 
2897  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2898  avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, halfHV, stride, 8);
2899 }
2900 
2902  ptrdiff_t stride)
2903 {
2904  uint8_t __attribute__ ((aligned(8))) temp[448];
2905  uint8_t *const halfHV = temp;
2906  int16_t *const halfV = (int16_t *) (temp + 64);
2907 
2908  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2909  avg_h264_qpel8_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 8);
2910 }
2911 
2913  ptrdiff_t stride)
2914 {
2915  uint8_t __attribute__ ((aligned(8))) temp[448];
2916  uint8_t *const halfHV = temp;
2917  int16_t *const halfV = (int16_t *) (temp + 64);
2918 
2919  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2920  avg_pixels8_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 8, 8);
2921 }
2922 
2924  ptrdiff_t stride)
2925 {
2926  uint8_t __attribute__ ((aligned(8))) temp[448];
2927  uint8_t *const halfHV = temp;
2928  int16_t *const halfV = (int16_t *) (temp + 64);
2929 
2930  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2931  avg_pixels8_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 8, 8);
2932 }
2933 
2934 //DEF_H264_MC_MMI(put_, 16)
2936  ptrdiff_t stride)
2937 {
2938  ff_put_pixels16_8_mmi(dst, src, stride, 16);
2939 }
2940 
2942  ptrdiff_t stride)
2943 {
2944  uint8_t half[256];
2945  put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2946  ff_put_pixels16_l2_8_mmi(dst, src, half, stride, stride, 16, 16);
2947 }
2948 
2950  ptrdiff_t stride)
2951 {
2952  put_h264_qpel16_h_lowpass_mmi(dst, src, stride, stride);
2953 }
2954 
2956  ptrdiff_t stride)
2957 {
2958  uint8_t half[256];
2959  put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2960  ff_put_pixels16_l2_8_mmi(dst, src+1, half, stride, stride, 16, 16);
2961 }
2962 
2964  ptrdiff_t stride)
2965 {
2966  uint8_t full[336];
2967  uint8_t * const full_mid= full + 32;
2968  uint8_t half[256];
2969  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2970  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2971  ff_put_pixels16_l2_8_mmi(dst, full_mid, half, stride, 16, 16, 16);
2972 }
2973 
2975  ptrdiff_t stride)
2976 {
2977  uint8_t full[336];
2978  uint8_t * const full_mid= full + 32;
2979  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2980  put_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
2981 }
2982 
2984  ptrdiff_t stride)
2985 {
2986  uint8_t full[336];
2987  uint8_t * const full_mid= full + 32;
2988  uint8_t half[256];
2989  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2990  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2991  ff_put_pixels16_l2_8_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
2992 }
2993 
2995  ptrdiff_t stride)
2996 {
2997  uint8_t full[336];
2998  uint8_t * const full_mid= full + 32;
2999  uint8_t halfH[256];
3000  uint8_t halfV[256];
3001  put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
3002  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3003  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3004  ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3005 }
3006 
3008  ptrdiff_t stride)
3009 {
3010  uint8_t full[336];
3011  uint8_t * const full_mid= full + 32;
3012  uint8_t halfH[256];
3013  uint8_t halfV[256];
3014  put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
3015  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
3016  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3017  ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3018 }
3019 
3021  ptrdiff_t stride)
3022 {
3023  uint8_t full[336];
3024  uint8_t * const full_mid= full + 32;
3025  uint8_t halfH[256];
3026  uint8_t halfV[256];
3027  put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
3028  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3029  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3030  ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3031 }
3032 
3034  ptrdiff_t stride)
3035 {
3036  uint8_t full[336];
3037  uint8_t * const full_mid= full + 32;
3038  uint8_t halfH[256];
3039  uint8_t halfV[256];
3040  put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
3041  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
3042  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3043  ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3044 }
3045 
3047  ptrdiff_t stride)
3048 {
3049  uint16_t __attribute__ ((aligned(8))) temp[384];
3050 
3051  put_h264_qpel16_hv_lowpass_mmi(dst, temp, src, stride, 16, stride);
3052 }
3053 
3055  ptrdiff_t stride)
3056 {
3057  uint8_t __attribute__ ((aligned(8))) temp[1024];
3058  uint8_t *const halfHV = temp;
3059  int16_t *const halfV = (int16_t *) (temp + 256);
3060 
3061  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3062  put_h264_qpel16_h_lowpass_l2_mmi(dst, src, halfHV, stride, 16);
3063 }
3064 
3066  ptrdiff_t stride)
3067 {
3068  uint8_t __attribute__ ((aligned(8))) temp[1024];
3069  uint8_t *const halfHV = temp;
3070  int16_t *const halfV = (int16_t *) (temp + 256);
3071 
3072  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3073  put_h264_qpel16_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 16);
3074 }
3075 
3077  ptrdiff_t stride)
3078 {
3079  uint8_t __attribute__ ((aligned(8))) temp[1024];
3080  uint8_t *const halfHV = temp;
3081  int16_t *const halfV = (int16_t *) (temp + 256);
3082 
3083  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3084  put_pixels16_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 16, 16);
3085 }
3086 
3088  ptrdiff_t stride)
3089 {
3090  uint8_t __attribute__ ((aligned(8))) temp[1024];
3091  uint8_t *const halfHV = temp;
3092  int16_t *const halfV = (int16_t *) (temp + 256);
3093 
3094  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3095  put_pixels16_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 16, 16);
3096 }
3097 
3098 //DEF_H264_MC_MMI(avg_, 16)
3100  ptrdiff_t stride)
3101 {
3102  ff_avg_pixels16_8_mmi(dst, src, stride, 16);
3103 }
3104 
3106  ptrdiff_t stride)
3107 {
3108  uint8_t half[256];
3109  put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
3110  ff_avg_pixels16_l2_8_mmi(dst, src, half, stride, stride, 16, 16);
3111 }
3112 
3114  ptrdiff_t stride)
3115 {
3116  avg_h264_qpel16_h_lowpass_mmi(dst, src, stride, stride);
3117 }
3118 
3120  ptrdiff_t stride)
3121 {
3122  uint8_t half[256];
3123  put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
3124  ff_avg_pixels16_l2_8_mmi(dst, src+1, half, stride, stride, 16, 16);
3125 }
3126 
3128  ptrdiff_t stride)
3129 {
3130  uint8_t full[336];
3131  uint8_t * const full_mid= full + 32;
3132  uint8_t half[256];
3133  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3134  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
3135  ff_avg_pixels16_l2_8_mmi(dst, full_mid, half, stride, 16, 16, 16);
3136 }
3137 
3139  ptrdiff_t stride)
3140 {
3141  uint8_t full[336];
3142  uint8_t * const full_mid= full + 32;
3143  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3144  avg_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
3145 }
3146 
3148  ptrdiff_t stride)
3149 {
3150  uint8_t full[336];
3151  uint8_t * const full_mid= full + 32;
3152  uint8_t half[256];
3153  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3154  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
3155  ff_avg_pixels16_l2_8_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
3156 }
3157 
3159  ptrdiff_t stride)
3160 {
3161  uint8_t full[336];
3162  uint8_t * const full_mid= full + 32;
3163  uint8_t halfH[256];
3164  uint8_t halfV[256];
3165  put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
3166  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3167  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3168  ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3169 }
3170 
3172  ptrdiff_t stride)
3173 {
3174  uint8_t full[336];
3175  uint8_t * const full_mid= full + 32;
3176  uint8_t halfH[256];
3177  uint8_t halfV[256];
3178  put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
3179  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
3180  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3181  ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3182 }
3183 
3185  ptrdiff_t stride)
3186 {
3187  uint8_t full[336];
3188  uint8_t * const full_mid= full + 32;
3189  uint8_t halfH[256];
3190  uint8_t halfV[256];
3191  put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
3192  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3193  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3194  ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3195 }
3196 
3198  ptrdiff_t stride)
3199 {
3200  uint8_t full[336];
3201  uint8_t * const full_mid= full + 32;
3202  uint8_t halfH[256];
3203  uint8_t halfV[256];
3204  put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
3205  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
3206  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3207  ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3208 }
3209 
3211  ptrdiff_t stride)
3212 {
3213  uint16_t __attribute__ ((aligned(8))) temp[384];
3214 
3215  avg_h264_qpel16_hv_lowpass_mmi(dst, temp, src, stride, 16, stride);
3216 }
3217 
3219  ptrdiff_t stride)
3220 {
3221  uint8_t __attribute__ ((aligned(8))) temp[1024];
3222  uint8_t *const halfHV = temp;
3223  int16_t *const halfV = (int16_t *) (temp + 256);
3224 
3225  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3226  avg_h264_qpel16_h_lowpass_l2_mmi(dst, src, halfHV, stride, 16);
3227 }
3228 
3230  ptrdiff_t stride)
3231 {
3232  uint8_t __attribute__ ((aligned(8))) temp[1024];
3233  uint8_t *const halfHV = temp;
3234  int16_t *const halfV = (int16_t *) (temp + 256);
3235 
3236  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3237  avg_h264_qpel16_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 16);
3238 }
3239 
3241  ptrdiff_t stride)
3242 {
3243  uint8_t __attribute__ ((aligned(8))) temp[1024];
3244  uint8_t *const halfHV = temp;
3245  int16_t *const halfV = (int16_t *) (temp + 256);
3246 
3247  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3248  avg_pixels16_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 16, 16);
3249 }
3250 
3252  ptrdiff_t stride)
3253 {
3254  uint8_t __attribute__ ((aligned(8))) temp[1024];
3255  uint8_t *const halfHV = temp;
3256  int16_t *const halfV = (int16_t *) (temp + 256);
3257 
3258  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3259  avg_pixels16_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 16, 16);
3260 }
3261 
3262 #undef op2_avg
3263 #undef op2_put
void ff_put_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define mips_reg
Definition: asmdefs.h:44
void ff_avg_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
MIPS assembly defines from sys/asm.h but rewritten for use with C inline assembly (rather than from w...
void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:426
void ff_put_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:409
else temp
Definition: vf_mcdeint.c:259
void ff_put_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
void ff_avg_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
const char * b
Definition: vf_curves.c:109
void ff_avg_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
const uint64_t ff_pw_5
Definition: constants.c:30
static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp, const uint8_t *src, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
void ff_put_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:490
void ff_put_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:322
void ff_put_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
void ff_avg_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
uint8_t
void ff_avg_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define PTR_ADDI
Definition: asmdefs.h:49
static void avg_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
void ff_avg_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:398
static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:111
ptrdiff_t size
Definition: opengl_enc.c:101
void ff_avg_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_avg_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
void ff_avg_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:580
void ff_avg_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
void ff_avg_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
const uint64_t ff_pw_20
Definition: constants.c:36
void ff_avg_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
void ff_avg_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:971
void ff_avg_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:740
void ff_put_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:258
static void put_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:247
void ff_put_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
static void copy_block16_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: h264qpel_mmi.c:79
static void put_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:846
void ff_put_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define src
Definition: vp9dsp.c:530
void ff_avg_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:661
void ff_avg_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
void ff_avg_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
void ff_avg_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
void ff_put_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:172
void ff_put_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define PTR_ADDIU
Definition: asmdefs.h:48
static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:527
static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
void ff_avg_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_avg_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_avg_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:857
#define INIT_CLIP
void ff_avg_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define op2_put(a, b)
Definition: h264qpel_mmi.c:110
static void copy_block8_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: h264qpel_mmi.c:55
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
void ff_avg_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
static void copy_block4_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: h264qpel_mmi.c:29
void ff_put_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
void ff_avg_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
const uint64_t ff_pw_16
Definition: constants.c:34
void ff_avg_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_avg_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_put_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static uint8_t tmp[8]
Definition: des.c:38
void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:360
void ff_put_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define PTR_ADDU
Definition: asmdefs.h:47
void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
#define op2_avg(a, b)
Definition: h264qpel_mmi.c:109
void ff_put_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)