FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
h264pred_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264pred
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "h264pred_mips.h"
27 #include "libavutil/mips/asmdefs.h"
28 #include "constants.h"
29 
31 {
32  double ftmp[2];
33  uint64_t tmp[1];
34 
35  __asm__ volatile (
36  "dli %[tmp0], 0x08 \n\t"
37  "gsldlc1 %[ftmp0], 0x07(%[srcA]) \n\t"
38  "gsldrc1 %[ftmp0], 0x00(%[srcA]) \n\t"
39  "gsldlc1 %[ftmp1], 0x0f(%[srcA]) \n\t"
40  "gsldrc1 %[ftmp1], 0x08(%[srcA]) \n\t"
41  "1: \n\t"
42  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
43  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
44  "gssdlc1 %[ftmp1], 0x0f(%[src]) \n\t"
45  "gssdrc1 %[ftmp1], 0x08(%[src]) \n\t"
46  PTR_ADDU "%[src], %[src], %[stride] \n\t"
47  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
48  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
49  "gssdlc1 %[ftmp1], 0x0f(%[src]) \n\t"
50  "gssdrc1 %[ftmp1], 0x08(%[src]) \n\t"
51  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
52  PTR_ADDU "%[src], %[src], %[stride] \n\t"
53  "bnez %[tmp0], 1b \n\t"
54  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
55  [tmp0]"=&r"(tmp[0]),
56  [src]"+&r"(src)
57  : [stride]"r"((mips_reg)stride), [srcA]"r"((mips_reg)(src-stride))
58  : "memory"
59  );
60 }
61 
63 {
64  uint64_t tmp[3];
65  mips_reg addr[2];
66 
67  __asm__ volatile (
68  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
69  PTR_ADDU "%[addr1], %[src], $0 \n\t"
70  "dli %[tmp2], 0x08 \n\t"
71  "1: \n\t"
72  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
73  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
74  "swl %[tmp1], 0x07(%[addr1]) \n\t"
75  "swr %[tmp1], 0x00(%[addr1]) \n\t"
76  "swl %[tmp1], 0x0f(%[addr1]) \n\t"
77  "swr %[tmp1], 0x08(%[addr1]) \n\t"
78  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
79  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
80  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
81  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
82  "swl %[tmp1], 0x07(%[addr1]) \n\t"
83  "swr %[tmp1], 0x00(%[addr1]) \n\t"
84  "swl %[tmp1], 0x0f(%[addr1]) \n\t"
85  "swr %[tmp1], 0x08(%[addr1]) \n\t"
86  "daddi %[tmp2], %[tmp2], -0x01 \n\t"
87  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
88  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
89  "bnez %[tmp2], 1b \n\t"
90  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
91  [tmp2]"=&r"(tmp[2]),
92  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
93  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
94  [ff_pb_1]"r"(ff_pb_1)
95  : "memory"
96  );
97 }
98 
100 {
101  uint64_t tmp[4];
102  mips_reg addr[2];
103 
104  __asm__ volatile (
105  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
106  "dli %[tmp0], 0x08 \n\t"
107  "xor %[tmp3], %[tmp3], %[tmp3] \n\t"
108  "1: \n\t"
109  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
110  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
111  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
112  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
113  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
114  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
115  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
116  "bnez %[tmp0], 1b \n\t"
117 
118  "dli %[tmp0], 0x08 \n\t"
119  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
120  "2: \n\t"
121  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
122  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
123  PTR_ADDIU "%[addr0], %[addr0], 0x01 \n\t"
124  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
125  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
126  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
127  PTR_ADDIU "%[addr0], %[addr0], 0x01 \n\t"
128  "bnez %[tmp0], 2b \n\t"
129 
130  "daddiu %[tmp3], %[tmp3], 0x10 \n\t"
131  "dsra %[tmp3], 0x05 \n\t"
132  "dmul %[tmp2], %[tmp3], %[ff_pb_1] \n\t"
133  PTR_ADDU "%[addr0], %[src], $0 \n\t"
134  "dli %[tmp0], 0x08 \n\t"
135  "3: \n\t"
136  "swl %[tmp2], 0x07(%[addr0]) \n\t"
137  "swr %[tmp2], 0x00(%[addr0]) \n\t"
138  "swl %[tmp2], 0x0f(%[addr0]) \n\t"
139  "swr %[tmp2], 0x08(%[addr0]) \n\t"
140  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
141  "swl %[tmp2], 0x07(%[addr0]) \n\t"
142  "swr %[tmp2], 0x00(%[addr0]) \n\t"
143  "swl %[tmp2], 0x0f(%[addr0]) \n\t"
144  "swr %[tmp2], 0x08(%[addr0]) \n\t"
145  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
146  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
147  "bnez %[tmp0], 3b \n\t"
148  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
149  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
150  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
151  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
152  [ff_pb_1]"r"(ff_pb_1)
153  : "memory"
154  );
155 }
156 
157 void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
158  int has_topright, ptrdiff_t stride)
159 {
160  uint32_t dc;
161  double ftmp[11];
162  mips_reg tmp[3];
163 
164  __asm__ volatile (
165  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
166  "gsldlc1 %[ftmp10], 0x07(%[srcA]) \n\t"
167  "gsldrc1 %[ftmp10], 0x00(%[srcA]) \n\t"
168  "gsldlc1 %[ftmp9], 0x07(%[src0]) \n\t"
169  "gsldrc1 %[ftmp9], 0x00(%[src0]) \n\t"
170  "gsldlc1 %[ftmp8], 0x07(%[src1]) \n\t"
171  "gsldrc1 %[ftmp8], 0x00(%[src1]) \n\t"
172 
173  "punpcklbh %[ftmp7], %[ftmp10], %[ftmp0] \n\t"
174  "punpckhbh %[ftmp6], %[ftmp10], %[ftmp0] \n\t"
175  "punpcklbh %[ftmp5], %[ftmp9], %[ftmp0] \n\t"
176  "punpckhbh %[ftmp4], %[ftmp9], %[ftmp0] \n\t"
177  "punpcklbh %[ftmp3], %[ftmp8], %[ftmp0] \n\t"
178  "punpckhbh %[ftmp2], %[ftmp8], %[ftmp0] \n\t"
179  "bnez %[has_topleft], 1f \n\t"
180  "pinsrh_0 %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
181 
182  "1: \n\t"
183  "bnez %[has_topright], 2f \n\t"
184  "pinsrh_3 %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
185 
186  "2: \n\t"
187  "dli %[tmp0], 0x02 \n\t"
188  "mtc1 %[tmp0], %[ftmp1] \n\t"
189  "pmullh %[ftmp5], %[ftmp5], %[ff_pw_2] \n\t"
190  "pmullh %[ftmp4], %[ftmp4], %[ff_pw_2] \n\t"
191  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
192  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
193  "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
194  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
195  "paddh %[ftmp7], %[ftmp7], %[ff_pw_2] \n\t"
196  "paddh %[ftmp6], %[ftmp6], %[ff_pw_2] \n\t"
197  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
198  "psrah %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
199  "packushb %[ftmp9], %[ftmp7], %[ftmp6] \n\t"
200  "biadd %[ftmp10], %[ftmp9] \n\t"
201  "mfc1 %[tmp1], %[ftmp10] \n\t"
202  "addiu %[tmp1], %[tmp1], 0x04 \n\t"
203  "srl %[tmp1], %[tmp1], 0x03 \n\t"
204  "mul %[dc], %[tmp1], %[ff_pb_1] \n\t"
205  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
206  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
207  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
208  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
209  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
210  [ftmp10]"=&f"(ftmp[10]),
211  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
212  [dc]"=r"(dc)
213  : [srcA]"r"((mips_reg)(src-stride-1)),
214  [src0]"r"((mips_reg)(src-stride)),
215  [src1]"r"((mips_reg)(src-stride+1)),
216  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright),
217  [ff_pb_1]"r"(ff_pb_1), [ff_pw_2]"f"(ff_pw_2)
218  : "memory"
219  );
220 
221  __asm__ volatile (
222  "dli %[tmp0], 0x02 \n\t"
223  "punpcklwd %[ftmp0], %[dc], %[dc] \n\t"
224  "1: \n\t"
225  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
226  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
227  "gssdxc1 %[ftmp0], 0x00(%[src], %[stride]) \n\t"
228  PTR_ADDU "%[src], %[src], %[stride] \n\t"
229  PTR_ADDU "%[src], %[src], %[stride] \n\t"
230  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
231  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
232  "gssdxc1 %[ftmp0], 0x00(%[src], %[stride]) \n\t"
233  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
234  PTR_ADDU "%[src], %[src], %[stride] \n\t"
235  PTR_ADDU "%[src], %[src], %[stride] \n\t"
236  "bnez %[tmp0], 1b \n\t"
237  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
238  [src]"+&r"(src)
239  : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
240  : "memory"
241  );
242 }
243 
244 void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
245  ptrdiff_t stride)
246 {
247  uint32_t dc, dc1, dc2;
248  double ftmp[14];
249  mips_reg tmp[1];
250 
251  const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2;
252  const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2;
253  const int l2 = (src[-1+1*stride] + 2*src[-1+2*stride] + src[-1+3*stride] + 2) >> 2;
254  const int l3 = (src[-1+2*stride] + 2*src[-1+3*stride] + src[-1+4*stride] + 2) >> 2;
255  const int l4 = (src[-1+3*stride] + 2*src[-1+4*stride] + src[-1+5*stride] + 2) >> 2;
256  const int l5 = (src[-1+4*stride] + 2*src[-1+5*stride] + src[-1+6*stride] + 2) >> 2;
257  const int l6 = (src[-1+5*stride] + 2*src[-1+6*stride] + src[-1+7*stride] + 2) >> 2;
258  const int l7 = (src[-1+6*stride] + 2*src[-1+7*stride] + src[-1+7*stride] + 2) >> 2;
259 
260  __asm__ volatile (
261  "gsldlc1 %[ftmp4], 0x07(%[srcA]) \n\t"
262  "gsldrc1 %[ftmp4], 0x00(%[srcA]) \n\t"
263  "gsldlc1 %[ftmp5], 0x07(%[src0]) \n\t"
264  "gsldrc1 %[ftmp5], 0x00(%[src0]) \n\t"
265  "gsldlc1 %[ftmp6], 0x07(%[src1]) \n\t"
266  "gsldrc1 %[ftmp6], 0x00(%[src1]) \n\t"
267  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
268  "dli %[tmp0], 0x03 \n\t"
269  "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t"
270  "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t"
271  "mtc1 %[tmp0], %[ftmp1] \n\t"
272  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
273  "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
274  "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t"
275  "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t"
276  "pshufh %[ftmp3], %[ftmp8], %[ftmp1] \n\t"
277  "pshufh %[ftmp13], %[ftmp12], %[ftmp1] \n\t"
278  "pinsrh_3 %[ftmp8], %[ftmp8], %[ftmp13] \n\t"
279  "pinsrh_3 %[ftmp12], %[ftmp12], %[ftmp3] \n\t"
280  "bnez %[has_topleft], 1f \n\t"
281  "pinsrh_0 %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
282 
283  "1: \n\t"
284  "bnez %[has_topright], 2f \n\t"
285  "pshufh %[ftmp13], %[ftmp10], %[ftmp1] \n\t"
286  "pinsrh_3 %[ftmp8], %[ftmp8], %[ftmp13] \n\t"
287 
288  "2: \n\t"
289  "dli %[tmp0], 0x02 \n\t"
290  "mtc1 %[tmp0], %[ftmp1] \n\t"
291  "pshufh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
292  "pmullh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
293  "pmullh %[ftmp10], %[ftmp10], %[ftmp2] \n\t"
294  "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
295  "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
296  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
297  "paddh %[ftmp8], %[ftmp8], %[ftmp12] \n\t"
298  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
299  "paddh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
300  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
301  "psrah %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
302  "packushb %[ftmp5], %[ftmp7], %[ftmp8] \n\t"
303  "biadd %[ftmp4], %[ftmp5] \n\t"
304  "mfc1 %[dc2], %[ftmp4] \n\t"
305  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
306  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
307  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
308  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
309  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
310  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
311  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
312  [tmp0]"=&r"(tmp[0]), [dc2]"=r"(dc2)
313  : [srcA]"r"((mips_reg)(src-stride-1)),
314  [src0]"r"((mips_reg)(src-stride)),
315  [src1]"r"((mips_reg)(src-stride+1)),
316  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright)
317  : "memory"
318  );
319 
320  dc1 = l0+l1+l2+l3+l4+l5+l6+l7;
321  dc = ((dc1+dc2+8)>>4)*0x01010101U;
322 
323  __asm__ volatile (
324  "dli %[tmp0], 0x02 \n\t"
325  "punpcklwd %[ftmp0], %[dc], %[dc] \n\t"
326  "1: \n\t"
327  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
328  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
329  "gssdxc1 %[ftmp0], 0x00(%[src], %[stride]) \n\t"
330  PTR_ADDU "%[src], %[src], %[stride] \n\t"
331  PTR_ADDU "%[src], %[src], %[stride] \n\t"
332  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
333  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
334  "gssdxc1 %[ftmp0], 0x00(%[src], %[stride]) \n\t"
335  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
336  PTR_ADDU "%[src], %[src], %[stride] \n\t"
337  PTR_ADDU "%[src], %[src], %[stride] \n\t"
338  "bnez %[tmp0], 1b \n\t"
339  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
340  [src]"+&r"(src)
341  : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
342  : "memory"
343  );
344 }
345 
346 void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft,
347  int has_topright, ptrdiff_t stride)
348 {
349  double ftmp[12];
350  mips_reg tmp[1];
351 
352  __asm__ volatile (
353  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
354  "gsldlc1 %[ftmp3], 0x07(%[srcA]) \n\t"
355  "gsldrc1 %[ftmp3], 0x00(%[srcA]) \n\t"
356  "gsldlc1 %[ftmp4], 0x07(%[src0]) \n\t"
357  "gsldrc1 %[ftmp4], 0x00(%[src0]) \n\t"
358  "gsldlc1 %[ftmp5], 0x07(%[src1]) \n\t"
359  "gsldrc1 %[ftmp5], 0x00(%[src1]) \n\t"
360  "punpcklbh %[ftmp6], %[ftmp3], %[ftmp0] \n\t"
361  "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
362  "punpcklbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t"
363  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
364  "punpcklbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
365  "punpckhbh %[ftmp11], %[ftmp5], %[ftmp0] \n\t"
366  "bnez %[has_topleft], 1f \n\t"
367  "pinsrh_0 %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
368 
369  "1: \n\t"
370  "bnez %[has_topright], 2f \n\t"
371  "pinsrh_3 %[ftmp11], %[ftmp11], %[ftmp9] \n\t"
372 
373  "2: \n\t"
374  "dli %[tmp0], 0x02 \n\t"
375  "mtc1 %[tmp0], %[ftmp1] \n\t"
376  "pshufh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
377  "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
378  "pmullh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
379  "paddh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
380  "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
381  "paddh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
382  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
383  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
384  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
385  "psrah %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
386  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
387  "packushb %[ftmp4], %[ftmp6], %[ftmp7] \n\t"
388  "sdc1 %[ftmp4], 0x00(%[src]) \n\t"
389  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
390  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
391  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
392  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
393  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
394  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
395  [tmp0]"=&r"(tmp[0]),
396  [src]"=r"(src)
397  : [srcA]"r"((mips_reg)(src-stride-1)),
398  [src0]"r"((mips_reg)(src-stride)),
399  [src1]"r"((mips_reg)(src-stride+1)),
400  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright)
401  : "memory"
402  );
403 
404  __asm__ volatile (
405  "dli %[tmp0], 0x02 \n\t"
406  "1: \n\t"
407  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
408  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
409  PTR_ADDU "%[src], %[src], %[stride] \n\t"
410  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
411  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
412  PTR_ADDU "%[src], %[src], %[stride] \n\t"
413  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
414  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
415  PTR_ADDU "%[src], %[src], %[stride] \n\t"
416  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
417  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
418  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
419  PTR_ADDU "%[src], %[src], %[stride] \n\t"
420  "bnez %[tmp0], 1b \n\t"
421  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
422  [src]"+&r"(src)
423  : [stride]"r"((mips_reg)stride)
424  : "memory"
425  );
426 }
427 
428 void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
429  ptrdiff_t stride)
430 {
431  const int dc = (src[-stride] + src[1-stride] + src[2-stride]
432  + src[3-stride] + src[-1+0*stride] + src[-1+1*stride]
433  + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
434  uint64_t tmp[2];
435  mips_reg addr[1];
436 
437  __asm__ volatile (
438  PTR_ADDU "%[tmp0], %[dc], $0 \n\t"
439  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
440  "xor %[addr0], %[addr0], %[addr0] \n\t"
441  "gsswx %[tmp1], 0x00(%[src], %[addr0]) \n\t"
442  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
443  "gsswx %[tmp1], 0x00(%[src], %[addr0]) \n\t"
444  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
445  "gsswx %[tmp1], 0x00(%[src], %[addr0]) \n\t"
446  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
447  "gsswx %[tmp1], 0x00(%[src], %[addr0]) \n\t"
448  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
449  [addr0]"=&r"(addr[0])
450  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
451  [dc]"r"(dc), [ff_pb_1]"r"(ff_pb_1)
452  : "memory"
453  );
454 }
455 
457 {
458  uint64_t tmp[2];
459  mips_reg addr[2];
460 
461  __asm__ volatile (
462  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
463  PTR_ADDU "%[addr1], %[src], $0 \n\t"
464  "ldl %[tmp0], 0x07(%[addr0]) \n\t"
465  "ldr %[tmp0], 0x00(%[addr0]) \n\t"
466  "dli %[tmp1], 0x04 \n\t"
467  "1: \n\t"
468  "sdl %[tmp0], 0x07(%[addr1]) \n\t"
469  "sdr %[tmp0], 0x00(%[addr1]) \n\t"
470  PTR_ADDU "%[addr1], %[stride] \n\t"
471  "sdl %[tmp0], 0x07(%[addr1]) \n\t"
472  "sdr %[tmp0], 0x00(%[addr1]) \n\t"
473  "daddi %[tmp1], -0x01 \n\t"
474  PTR_ADDU "%[addr1], %[stride] \n\t"
475  "bnez %[tmp1], 1b \n\t"
476  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
477  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
478  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride)
479  : "memory"
480  );
481 }
482 
484 {
485  uint64_t tmp[3];
486  mips_reg addr[2];
487 
488  __asm__ volatile (
489  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
490  PTR_ADDU "%[addr1], %[src], $0 \n\t"
491  "dli %[tmp0], 0x04 \n\t"
492  "1: \n\t"
493  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
494  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
495  "swl %[tmp2], 0x07(%[addr1]) \n\t"
496  "swr %[tmp2], 0x00(%[addr1]) \n\t"
497  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
498  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
499  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
500  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
501  "swl %[tmp2], 0x07(%[addr1]) \n\t"
502  "swr %[tmp2], 0x00(%[addr1]) \n\t"
503  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
504  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
505  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
506  "bnez %[tmp0], 1b \n\t"
507  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
508  [tmp2]"=&r"(tmp[2]),
509  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
510  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
511  [ff_pb_1]"r"(ff_pb_1)
512  : "memory"
513  );
514 }
515 
517 {
518  double ftmp[4];
519  uint64_t tmp[1];
520  mips_reg addr[1];
521 
522  __asm__ volatile (
523  "dli %[tmp0], 0x02 \n\t"
524  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
525  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
526  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
527  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
528  "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
529  "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
530  "biadd %[ftmp2], %[ftmp2] \n\t"
531  "biadd %[ftmp3], %[ftmp3] \n\t"
532  "mtc1 %[tmp0], %[ftmp1] \n\t"
533  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
534  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
535  "pshufh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
536  "paddush %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
537  "paddush %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
538  "mtc1 %[tmp0], %[ftmp1] \n\t"
539  "psrlh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
540  "psrlh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
541  "packushb %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
542  "gssdlc1 %[ftmp1], 0x07(%[src]) \n\t"
543  "gssdrc1 %[ftmp1], 0x00(%[src]) \n\t"
544  PTR_ADDU "%[src], %[src], %[stride] \n\t"
545  "gssdlc1 %[ftmp1], 0x07(%[src]) \n\t"
546  "gssdrc1 %[ftmp1], 0x00(%[src]) \n\t"
547  PTR_ADDU "%[src], %[src], %[stride] \n\t"
548  "gssdlc1 %[ftmp1], 0x07(%[src]) \n\t"
549  "gssdrc1 %[ftmp1], 0x00(%[src]) \n\t"
550  PTR_ADDU "%[src], %[src], %[stride] \n\t"
551  "gssdlc1 %[ftmp1], 0x07(%[src]) \n\t"
552  "gssdrc1 %[ftmp1], 0x00(%[src]) \n\t"
553  PTR_ADDU "%[src], %[src], %[stride] \n\t"
554  "gssdlc1 %[ftmp1], 0x07(%[src]) \n\t"
555  "gssdrc1 %[ftmp1], 0x00(%[src]) \n\t"
556  PTR_ADDU "%[src], %[src], %[stride] \n\t"
557  "gssdlc1 %[ftmp1], 0x07(%[src]) \n\t"
558  "gssdrc1 %[ftmp1], 0x00(%[src]) \n\t"
559  PTR_ADDU "%[src], %[src], %[stride] \n\t"
560  "gssdlc1 %[ftmp1], 0x07(%[src]) \n\t"
561  "gssdrc1 %[ftmp1], 0x00(%[src]) \n\t"
562  PTR_ADDU "%[src], %[src], %[stride] \n\t"
563  "gssdlc1 %[ftmp1], 0x07(%[src]) \n\t"
564  "gssdrc1 %[ftmp1], 0x00(%[src]) \n\t"
565  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
566  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
567  [tmp0]"=&r"(tmp[0]),
568  [addr0]"=&r"(addr[0]),
569  [src]"+&r"(src)
570  : [stride]"r"((mips_reg)stride)
571  : "memory"
572  );
573 }
574 
576 {
577  double ftmp[5];
578  mips_reg addr[7];
579 
580  __asm__ volatile (
581  "negu %[addr0], %[stride] \n\t"
582  PTR_ADDU "%[addr0], %[addr0], %[src] \n\t"
583  PTR_ADDIU "%[addr1], %[addr0], 0x04 \n\t"
584  "lbu %[addr2], 0x00(%[addr0]) \n\t"
585  PTR_ADDU "%[addr3], $0, %[addr2] \n\t"
586  PTR_ADDIU "%[addr0], 0x01 \n\t"
587  "lbu %[addr2], 0x00(%[addr1]) \n\t"
588  PTR_ADDU "%[addr4], $0, %[addr2] \n\t"
589  PTR_ADDIU "%[addr1], 0x01 \n\t"
590  "lbu %[addr2], 0x00(%[addr0]) \n\t"
591  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
592  PTR_ADDIU "%[addr0], 0x01 \n\t"
593  "lbu %[addr2], 0x00(%[addr1]) \n\t"
594  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
595  PTR_ADDIU "%[addr1], 0x01 \n\t"
596  "lbu %[addr2], 0x00(%[addr0]) \n\t"
597  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
598  PTR_ADDIU "%[addr0], 0x01 \n\t"
599  "lbu %[addr2], 0x00(%[addr1]) \n\t"
600  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
601  PTR_ADDIU "%[addr1], 0x01 \n\t"
602  "lbu %[addr2], 0x00(%[addr0]) \n\t"
603  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
604  PTR_ADDIU "%[addr0], 0x01 \n\t"
605  "lbu %[addr2], 0x00(%[addr1]) \n\t"
606  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
607  PTR_ADDIU "%[addr1], 0x01 \n\t"
608  "dli %[addr2], -0x01 \n\t"
609  PTR_ADDU "%[addr2], %[addr2], %[src] \n\t"
610  "lbu %[addr1], 0x00(%[addr2]) \n\t"
611  PTR_ADDU "%[addr5], $0, %[addr1] \n\t"
612  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
613  "lbu %[addr1], 0x00(%[addr2]) \n\t"
614  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
615  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
616  "lbu %[addr1], 0x00(%[addr2]) \n\t"
617  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
618  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
619  "lbu %[addr1], 0x00(%[addr2]) \n\t"
620  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
621  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
622  "lbu %[addr1], 0x00(%[addr2]) \n\t"
623  PTR_ADDU "%[addr6], $0, %[addr1] \n\t"
624  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
625  "lbu %[addr1], 0x00(%[addr2]) \n\t"
626  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
627  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
628  "lbu %[addr1], 0x00(%[addr2]) \n\t"
629  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
630  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
631  "lbu %[addr1], 0x00(%[addr2]) \n\t"
632  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
633  PTR_ADDU "%[addr3], %[addr3], %[addr5] \n\t"
634  PTR_ADDIU "%[addr3], %[addr3], 0x04 \n\t"
635  PTR_ADDIU "%[addr4], %[addr4], 0x02 \n\t"
636  PTR_ADDIU "%[addr1], %[addr6], 0x02 \n\t"
637  PTR_ADDU "%[addr2], %[addr4], %[addr1] \n\t"
638  PTR_SRL "%[addr3], 0x03 \n\t"
639  PTR_SRL "%[addr4], 0x02 \n\t"
640  PTR_SRL "%[addr1], 0x02 \n\t"
641  PTR_SRL "%[addr2], 0x03 \n\t"
642  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
643  "dmtc1 %[addr3], %[ftmp1] \n\t"
644  "pshufh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
645  "dmtc1 %[addr4], %[ftmp2] \n\t"
646  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
647  "dmtc1 %[addr1], %[ftmp3] \n\t"
648  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
649  "dmtc1 %[addr2], %[ftmp4] \n\t"
650  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
651  "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
652  "packushb %[ftmp2], %[ftmp3], %[ftmp4] \n\t"
653  PTR_ADDU "%[addr0], $0, %[src] \n\t"
654  "sdc1 %[ftmp1], 0x00(%[addr0]) \n\t"
655  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
656  "sdc1 %[ftmp1], 0x00(%[addr0]) \n\t"
657  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
658  "sdc1 %[ftmp1], 0x00(%[addr0]) \n\t"
659  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
660  "sdc1 %[ftmp1], 0x00(%[addr0]) \n\t"
661  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
662  "sdc1 %[ftmp2], 0x00(%[addr0]) \n\t"
663  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
664  "sdc1 %[ftmp2], 0x00(%[addr0]) \n\t"
665  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
666  "sdc1 %[ftmp2], 0x00(%[addr0]) \n\t"
667  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
668  "sdc1 %[ftmp2], 0x00(%[addr0]) \n\t"
669  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
670  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
671  [ftmp4]"=&f"(ftmp[4]),
672  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
673  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
674  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
675  [addr6]"=&r"(addr[6])
676  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride)
677  : "memory"
678  );
679 }
680 
682 {
683  double ftmp[1];
684  uint64_t tmp[1];
685 
686  __asm__ volatile (
687  "gsldlc1 %[ftmp0], 0x07(%[srcA]) \n\t"
688  "gsldrc1 %[ftmp0], 0x00(%[srcA]) \n\t"
689  "dli %[tmp0], 0x04 \n\t"
690  "1: \n\t"
691  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
692  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
693  PTR_ADDU "%[src], %[src], %[stride] \n\t"
694  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
695  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
696  PTR_ADDU "%[src], %[src], %[stride] \n\t"
697  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
698  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
699  PTR_ADDU "%[src], %[src], %[stride] \n\t"
700  "gssdlc1 %[ftmp0], 0x07(%[src]) \n\t"
701  "gssdrc1 %[ftmp0], 0x00(%[src]) \n\t"
702  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
703  PTR_ADDU "%[src], %[src], %[stride] \n\t"
704  "bnez %[tmp0], 1b \n\t"
705  : [ftmp0]"=&f"(ftmp[0]),
706  [tmp0]"=&r"(tmp[0]),
707  [src]"+&r"(src)
708  : [stride]"r"((mips_reg)stride), [srcA]"r"((mips_reg)(src-stride))
709  : "memory"
710  );
711 }
712 
714 {
715  uint64_t tmp[3];
716  mips_reg addr[2];
717 
718  __asm__ volatile (
719  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
720  PTR_ADDU "%[addr1], %[src], $0 \n\t"
721  "dli %[tmp0], 0x08 \n\t"
722  "1: \n\t"
723  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
724  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
725  "swl %[tmp2], 0x07(%[addr1]) \n\t"
726  "swr %[tmp2], 0x00(%[addr1]) \n\t"
727  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
728  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
729  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
730  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
731  "swl %[tmp2], 0x07(%[addr1]) \n\t"
732  "swr %[tmp2], 0x00(%[addr1]) \n\t"
733  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
734  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
735  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
736  "bnez %[tmp0], 1b \n\t"
737  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
738  [tmp2]"=&r"(tmp[2]),
739  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
740  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
741  [ff_pb_1]"r"(ff_pb_1)
742  : "memory"
743  );
744 }
745 
746 static inline void pred16x16_plane_compat_mmi(uint8_t *src, int stride,
747  const int svq3, const int rv40)
748 {
749  double ftmp[11];
750  uint64_t tmp[7];
751  mips_reg addr[1];
752 
753  __asm__ volatile(
754  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
755  "dli %[tmp2], 0x20 \n\t"
756  "dmtc1 %[tmp2], %[ftmp4] \n\t"
757  "gsldlc1 %[ftmp0], 0x06(%[addr0]) \n\t"
758  "gsldlc1 %[ftmp2], 0x0f(%[addr0]) \n\t"
759  "gsldrc1 %[ftmp0], -0x01(%[addr0]) \n\t"
760  "gsldrc1 %[ftmp2], 0x08(%[addr0]) \n\t"
761  "dsrl %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
762  "dsrl %[ftmp3], %[ftmp2], %[ftmp4] \n\t"
763  "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
764  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
765  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
766  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
767  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
768  "pmullh %[ftmp0], %[ftmp0], %[ff_pw_m8tom5] \n\t"
769  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_m4tom1] \n\t"
770  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_1to4] \n\t"
771  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5to8] \n\t"
772  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
773  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
774  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
775  "dli %[tmp2], 0x0e \n\t"
776  "dmtc1 %[tmp2], %[ftmp4] \n\t"
777  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
778  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
779  "dli %[tmp2], 0x01 \n\t"
780  "dmtc1 %[tmp2], %[ftmp4] \n\t"
781  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
782  "paddsh %[ftmp5], %[ftmp0], %[ftmp1] \n\t"
783 
784  PTR_ADDIU "%[addr0], %[src], -0x01 \n\t"
785  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
786  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
787  "lbu %[tmp6], 0x10(%[addr0]) \n\t"
788  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
789  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
790  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
791  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
792  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
793  "lbu %[tmp5], 0x00(%[addr0]) \n\t"
794  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
795  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
796  "dsll %[tmp5], %[tmp5], 0x30 \n\t"
797  "or %[tmp4], %[tmp4], %[tmp5] \n\t"
798  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
799  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
800  "dmtc1 %[tmp2], %[ftmp0] \n\t"
801 
802  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
803  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
804  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
805  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
806  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
807  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
808  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
809  "lbu %[tmp5], 0x00(%[addr0]) \n\t"
810  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
811  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
812  "dsll %[tmp5], %[tmp5], 0x30 \n\t"
813  "or %[tmp4], %[tmp4], %[tmp5] \n\t"
814  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
815  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
816  "dmtc1 %[tmp2], %[ftmp1] \n\t"
817 
818  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
819  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
820  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
821  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
822  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
823  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
824  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
825  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
826  "lbu %[tmp5], 0x00(%[addr0]) \n\t"
827  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
828  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
829  "dsll %[tmp5], %[tmp5], 0x30 \n\t"
830  "or %[tmp4], %[tmp4], %[tmp5] \n\t"
831  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
832  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
833  "dmtc1 %[tmp2], %[ftmp2] \n\t"
834 
835  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
836  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
837  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
838  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
839  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
840  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
841  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
842  "lbu %[tmp5], 0x00(%[addr0]) \n\t"
843  "daddu %[tmp6], %[tmp6], %[tmp5] \n\t"
844  "daddiu %[tmp6], %[tmp6], 0x01 \n\t"
845  "dsll %[tmp6], %[tmp6], 0x04 \n\t"
846 
847  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
848  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
849  "dsll %[tmp5], %[tmp5], 0x30 \n\t"
850  "or %[tmp4], %[tmp4], %[tmp5] \n\t"
851  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
852  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
853  "dmtc1 %[tmp2], %[ftmp3] \n\t"
854 
855  "pmullh %[ftmp0], %[ftmp0], %[ff_pw_m8tom5] \n\t"
856  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_m4tom1] \n\t"
857  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_1to4] \n\t"
858  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5to8] \n\t"
859  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
860  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
861  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
862  "dli %[tmp2], 0x0e \n\t"
863  "dmtc1 %[tmp2], %[ftmp4] \n\t"
864  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
865  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
866 
867  "dli %[tmp2], 0x01 \n\t"
868  "dmtc1 %[tmp2], %[ftmp4] \n\t"
869  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
870  "paddsh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
871 
872  "dmfc1 %[tmp0], %[ftmp5] \n\t"
873  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
874  "dsra %[tmp0], %[tmp0], 0x30 \n\t"
875  "dmfc1 %[tmp1], %[ftmp6] \n\t"
876  "dsll %[tmp1], %[tmp1], 0x30 \n\t"
877  "dsra %[tmp1], %[tmp1], 0x30 \n\t"
878 
879  "beqz %[svq3], 1f \n\t"
880  "dli %[tmp2], 0x04 \n\t"
881  "ddiv %[tmp0], %[tmp0], %[tmp2] \n\t"
882  "ddiv %[tmp1], %[tmp1], %[tmp2] \n\t"
883  "dli %[tmp2], 0x05 \n\t"
884  "dmul %[tmp0], %[tmp0], %[tmp2] \n\t"
885  "dmul %[tmp1], %[tmp1], %[tmp2] \n\t"
886  "dli %[tmp2], 0x10 \n\t"
887  "ddiv %[tmp0], %[tmp0], %[tmp2] \n\t"
888  "ddiv %[tmp1], %[tmp1], %[tmp2] \n\t"
889  "daddu %[tmp2], %[tmp0], $0 \n\t"
890  "daddu %[tmp0], %[tmp1], $0 \n\t"
891  "daddu %[tmp1], %[tmp2], $0 \n\t"
892  "b 2f \n\t"
893 
894  "1: \n\t"
895  "beqz %[rv40], 1f \n\t"
896  "dsra %[tmp2], %[tmp0], 0x02 \n\t"
897  "daddu %[tmp0], %[tmp0], %[tmp2] \n\t"
898  "dsra %[tmp2], %[tmp1], 0x02 \n\t"
899  "daddu %[tmp1], %[tmp1], %[tmp2] \n\t"
900  "dsra %[tmp0], %[tmp0], 0x04 \n\t"
901  "dsra %[tmp1], %[tmp1], 0x04 \n\t"
902  "b 2f \n\t"
903 
904  "1: \n\t"
905  "dli %[tmp2], 0x05 \n\t"
906  "dmul %[tmp0], %[tmp0], %[tmp2] \n\t"
907  "dmul %[tmp1], %[tmp1], %[tmp2] \n\t"
908  "daddiu %[tmp0], %[tmp0], 0x20 \n\t"
909  "daddiu %[tmp1], %[tmp1], 0x20 \n\t"
910  "dsra %[tmp0], %[tmp0], 0x06 \n\t"
911  "dsra %[tmp1], %[tmp1], 0x06 \n\t"
912 
913  "2: \n\t"
914  "daddu %[tmp3], %[tmp0], %[tmp1] \n\t"
915  "dli %[tmp2], 0x07 \n\t"
916  "dmul %[tmp3], %[tmp3], %[tmp2] \n\t"
917  "dsubu %[tmp6], %[tmp6], %[tmp3] \n\t"
918 
919  "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
920  "dmtc1 %[tmp0], %[ftmp0] \n\t"
921  "pshufh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
922  "dmtc1 %[tmp1], %[ftmp5] \n\t"
923  "pshufh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
924  "dmtc1 %[tmp6], %[ftmp6] \n\t"
925  "pshufh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
926  "dli %[tmp2], 0x05 \n\t"
927  "dmtc1 %[tmp2], %[ftmp7] \n\t"
928  "pmullh %[ftmp1], %[ff_pw_0to3], %[ftmp0] \n\t"
929  "dmtc1 %[ff_pw_4to7], %[ftmp2] \n\t"
930  "pmullh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
931  "dmtc1 %[ff_pw_8tob], %[ftmp3] \n\t"
932  "pmullh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
933  "dmtc1 %[ff_pw_ctof], %[ftmp4] \n\t"
934  "pmullh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
935 
936  "dli %[tmp0], 0x10 \n\t"
937  PTR_ADDU "%[addr0], %[src], $0 \n\t"
938  "1: \n\t"
939  "paddsh %[ftmp8], %[ftmp1], %[ftmp6] \n\t"
940  "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
941  "paddsh %[ftmp9], %[ftmp2], %[ftmp6] \n\t"
942  "psrah %[ftmp9], %[ftmp9], %[ftmp7] \n\t"
943  "packushb %[ftmp0], %[ftmp8], %[ftmp9] \n\t"
944  "gssdlc1 %[ftmp0], 0x07(%[addr0]) \n\t"
945  "gssdrc1 %[ftmp0], 0x00(%[addr0]) \n\t"
946 
947  "paddsh %[ftmp8], %[ftmp3], %[ftmp6] \n\t"
948  "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
949  "paddsh %[ftmp9], %[ftmp4], %[ftmp6] \n\t"
950  "psrah %[ftmp9], %[ftmp9], %[ftmp7] \n\t"
951  "packushb %[ftmp0], %[ftmp8], %[ftmp9] \n\t"
952  "gssdlc1 %[ftmp0], 0x0f(%[addr0]) \n\t"
953  "gssdrc1 %[ftmp0], 0x08(%[addr0]) \n\t"
954 
955  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
956  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
957  "daddiu %[tmp0], %[tmp0], -0x01 \n\t"
958  "bnez %[tmp0], 1b \n\t"
959  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
960  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
961  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
962  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
963  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
964  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
965  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
966  [tmp4]"=&r"(tmp[4]), [tmp5]"=&r"(tmp[5]),
967  [tmp6]"=&r"(tmp[6]),
968  [addr0]"=&r"(addr[0])
969  : [src]"r"(src), [stride]"r"((mips_reg)stride),
970  [svq3]"r"(svq3), [rv40]"r"(rv40),
975  : "memory"
976  );
977 }
978 
980 {
981  pred16x16_plane_compat_mmi(src, stride, 0, 0);
982 }
983 
985 {
986  pred16x16_plane_compat_mmi(src, stride, 1, 0);
987 }
988 
990 {
991  pred16x16_plane_compat_mmi(src, stride, 0, 1);
992 }
#define mips_reg
Definition: asmdefs.h:44
MIPS assembly defines from sys/asm.h but rewritten for use with C inline assembly (rather than from w...
void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:713
const uint64_t ff_pw_ctof
Definition: constants.c:50
void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:483
const uint64_t ff_pw_1to4
Definition: constants.c:45
const uint64_t ff_pw_2
Definition: constants.c:27
void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:681
void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:157
static void pred16x16_plane_compat_mmi(uint8_t *src, int stride, const int svq3, const int rv40)
Definition: h264pred_mmi.c:746
uint8_t
void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:99
#define PTR_ADDI
Definition: asmdefs.h:49
const uint64_t ff_pw_0to3
Definition: constants.c:47
void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:428
#define U(x)
Definition: vp56_arith.h:37
const uint64_t ff_pw_4to7
Definition: constants.c:48
void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:244
#define PTR_SUBU
Definition: asmdefs.h:50
void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:62
void ff_pred16x16_plane_h264_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:979
void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:346
#define src
Definition: vp9dsp.c:530
const uint64_t ff_pw_m8tom5
Definition: constants.c:43
#define src1
Definition: h264pred.c:139
void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:30
#define PTR_ADDIU
Definition: asmdefs.h:48
void ff_pred16x16_plane_rv40_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:989
#define src0
Definition: h264pred.c:138
const uint64_t ff_pw_5to8
Definition: constants.c:46
const uint64_t ff_pw_8tob
Definition: constants.c:49
const uint64_t ff_pb_1
Definition: constants.c:52
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
void ff_pred8x8_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:575
const uint64_t ff_pw_m4tom1
Definition: constants.c:44
void ff_pred8x8_top_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:516
static uint8_t tmp[8]
Definition: des.c:38
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> dc
#define PTR_ADDU
Definition: asmdefs.h:47
#define stride
#define PTR_SRL
Definition: asmdefs.h:54
void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:456
void ff_pred16x16_plane_svq3_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:984