FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
mpegvideo_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized mpegvideo
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "mpegvideo_mips.h"
26 #include "libavutil/mips/asmdefs.h"
27 
29  int n, int qscale)
30 {
31  int64_t level, qmul, qadd, nCoeffs;
32  double ftmp[6];
33  mips_reg addr[1];
34 
35  qmul = qscale << 1;
36  av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
37 
38  if (!s->h263_aic) {
39  if (n<4)
40  level = block[0] * s->y_dc_scale;
41  else
42  level = block[0] * s->c_dc_scale;
43  qadd = (qscale-1) | 1;
44  } else {
45  qadd = 0;
46  level = block[0];
47  }
48 
49  if(s->ac_pred)
50  nCoeffs = 63;
51  else
52  nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
53 
54  __asm__ volatile (
55  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
56  "packsswh %[qmul], %[qmul], %[qmul] \n\t"
57  "packsswh %[qmul], %[qmul], %[qmul] \n\t"
58  "packsswh %[qadd], %[qadd], %[qadd] \n\t"
59  "packsswh %[qadd], %[qadd], %[qadd] \n\t"
60  "psubh %[ftmp0], %[ftmp0], %[qadd] \n\t"
61  "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
62  ".p2align 4 \n\t"
63  "1: \n\t"
64  PTR_ADDU "%[addr0], %[block], %[nCoeffs] \n\t"
65  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
66  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
67  "gsldlc1 %[ftmp2], 0x0f(%[addr0]) \n\t"
68  "gsldrc1 %[ftmp2], 0x08(%[addr0]) \n\t"
69  "mov.d %[ftmp3], %[ftmp1] \n\t"
70  "mov.d %[ftmp4], %[ftmp2] \n\t"
71  "pmullh %[ftmp1], %[ftmp1], %[qmul] \n\t"
72  "pmullh %[ftmp2], %[ftmp2], %[qmul] \n\t"
73  "pcmpgth %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
74  "pcmpgth %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
75  "xor %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
76  "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
77  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
78  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
79  "xor %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
80  "xor %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
81  "pcmpeqh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
82  "pcmpeqh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
83  "pandn %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
84  "pandn %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
85  PTR_ADDIU "%[nCoeffs], %[nCoeffs], 0x10 \n\t"
86  "gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
87  "gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
88  "gssdlc1 %[ftmp2], 0x0f(%[addr0]) \n\t"
89  "gssdrc1 %[ftmp2], 0x08(%[addr0]) \n\t"
90  "blez %[nCoeffs], 1b \n\t"
91  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
92  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
93  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
94  [addr0]"=&r"(addr[0])
95  : [block]"r"((mips_reg)(block+nCoeffs)),
96  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
97  [qmul]"f"(qmul), [qadd]"f"(qadd)
98  : "memory"
99  );
100 
101  block[0] = level;
102 }
103 
105  int n, int qscale)
106 {
107  int64_t qmul, qadd, nCoeffs;
108  double ftmp[6];
109  mips_reg addr[1];
110 
111  qmul = qscale << 1;
112  qadd = (qscale - 1) | 1;
113  av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
114  nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
115 
116  __asm__ volatile (
117  "packsswh %[qmul], %[qmul], %[qmul] \n\t"
118  "packsswh %[qmul], %[qmul], %[qmul] \n\t"
119  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
120  "packsswh %[qadd], %[qadd], %[qadd] \n\t"
121  "packsswh %[qadd], %[qadd], %[qadd] \n\t"
122  "psubh %[ftmp0], %[ftmp0], %[qadd] \n\t"
123  "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
124  ".p2align 4 \n\t"
125  "1: \n\t"
126  PTR_ADDU "%[addr0], %[block], %[nCoeffs] \n\t"
127  "gsldlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
128  "gsldrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
129  "gsldlc1 %[ftmp2], 0x0f(%[addr0]) \n\t"
130  "gsldrc1 %[ftmp2], 0x08(%[addr0]) \n\t"
131  "mov.d %[ftmp3], %[ftmp1] \n\t"
132  "mov.d %[ftmp4], %[ftmp2] \n\t"
133  "pmullh %[ftmp1], %[ftmp1], %[qmul] \n\t"
134  "pmullh %[ftmp2], %[ftmp2], %[qmul] \n\t"
135  "pcmpgth %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
136  "pcmpgth %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
137  "xor %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
138  "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
139  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
140  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
141  "xor %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
142  "xor %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
143  "pcmpeqh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
144  "pcmpeqh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
145  "pandn %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
146  "pandn %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
147  PTR_ADDIU "%[nCoeffs], %[nCoeffs], 0x10 \n\t"
148  "gssdlc1 %[ftmp1], 0x07(%[addr0]) \n\t"
149  "gssdrc1 %[ftmp1], 0x00(%[addr0]) \n\t"
150  "gssdlc1 %[ftmp2], 0x0f(%[addr0]) \n\t"
151  "gssdrc1 %[ftmp2], 0x08(%[addr0]) \n\t"
152  "blez %[nCoeffs], 1b \n\t"
153  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
154  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
155  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
156  [addr0]"=&r"(addr[0])
157  : [block]"r"((mips_reg)(block+nCoeffs)),
158  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
159  [qmul]"f"(qmul), [qadd]"f"(qadd)
160  : "memory"
161  );
162 }
163 
165  int n, int qscale)
166 {
167  int64_t nCoeffs;
168  const uint16_t *quant_matrix;
169  int block0;
170  double ftmp[10];
171  uint64_t tmp[1];
172  mips_reg addr[1];
173 
174  av_assert2(s->block_last_index[n]>=0);
175  nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
176 
177  if (n<4)
178  block0 = block[0] * s->y_dc_scale;
179  else
180  block0 = block[0] * s->c_dc_scale;
181 
182  /* XXX: only mpeg1 */
183  quant_matrix = s->intra_matrix;
184 
185  __asm__ volatile (
186  "dli %[tmp0], 0x0f \n\t"
187  "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
188  "dmtc1 %[tmp0], %[ftmp4] \n\t"
189  "dmtc1 %[qscale], %[ftmp1] \n\t"
190  "psrlh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
191  "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
192  "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
193  "or %[addr0], %[nCoeffs], $0 \n\t"
194  ".p2align 4 \n\t"
195  "1: \n\t"
196  "gsldxc1 %[ftmp2], 0x00(%[addr0], %[block]) \n\t"
197  "gsldxc1 %[ftmp3], 0x08(%[addr0], %[block]) \n\t"
198  "mov.d %[ftmp4], %[ftmp2] \n\t"
199  "mov.d %[ftmp5], %[ftmp3] \n\t"
200  "gsldxc1 %[ftmp6], 0x00(%[addr0], %[quant]) \n\t"
201  "gsldxc1 %[ftmp7], 0x08(%[addr0], %[quant]) \n\t"
202  "pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
203  "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
204  "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
205  "xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
206  "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
207  "pcmpgth %[ftmp9], %[ftmp9], %[ftmp3] \n\t"
208  "xor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
209  "xor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
210  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
211  "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
212  "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
213  "pmullh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
214  "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
215  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
216  "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
217  "dli %[tmp0], 0x03 \n\t"
218  "pcmpeqh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
219  "dmtc1 %[tmp0], %[ftmp4] \n\t"
220  "psrah %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
221  "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
222  "psubh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
223  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
224  "or %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
225  "or %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
226  "xor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
227  "xor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
228  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
229  "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
230  "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
231  "pandn %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
232  "gssdxc1 %[ftmp6], 0x00(%[addr0], %[block]) \n\t"
233  "gssdxc1 %[ftmp7], 0x08(%[addr0], %[block]) \n\t"
234  PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t"
235  "bltz %[addr0], 1b \n\t"
236  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
237  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
238  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
239  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
240  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
241  [tmp0]"=&r"(tmp[0]),
242  [addr0]"=&r"(addr[0])
243  : [block]"r"((mips_reg)(block+nCoeffs)),
244  [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
245  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
246  [qscale]"r"(qscale)
247  : "memory"
248  );
249 
250  block[0] = block0;
251 }
252 
254  int n, int qscale)
255 {
256  int64_t nCoeffs;
257  const uint16_t *quant_matrix;
258  double ftmp[10];
259  uint64_t tmp[1];
260  mips_reg addr[1];
261 
262  av_assert2(s->block_last_index[n] >= 0);
263  nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
264  quant_matrix = s->inter_matrix;
265 
266  __asm__ volatile (
267  "dli %[tmp0], 0x0f \n\t"
268  "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
269  "dmtc1 %[tmp0], %[ftmp4] \n\t"
270  "dmtc1 %[qscale], %[ftmp1] \n\t"
271  "psrlh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
272  "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
273  "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
274  "or %[addr0], %[nCoeffs], $0 \n\t"
275  ".p2align 4 \n\t"
276  "1: \n\t"
277  "gsldxc1 %[ftmp2], 0x00(%[addr0], %[block]) \n\t"
278  "gsldxc1 %[ftmp3], 0x08(%[addr0], %[block]) \n\t"
279  "mov.d %[ftmp4], %[ftmp2] \n\t"
280  "mov.d %[ftmp5], %[ftmp3] \n\t"
281  "gsldxc1 %[ftmp6], 0x00(%[addr0], %[quant]) \n\t"
282  "gsldxc1 %[ftmp7], 0x08(%[addr0], %[quant]) \n\t"
283  "pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
284  "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
285  "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
286  "xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
287  "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
288  "pcmpgth %[ftmp9], %[ftmp9], %[ftmp3] \n\t"
289  "xor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
290  "xor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
291  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
292  "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
293  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
294  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
295  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
296  "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
297  "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
298  "pmullh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
299  "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
300  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
301  "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
302  "dli %[tmp0], 0x04 \n\t"
303  "pcmpeqh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
304  "dmtc1 %[tmp0], %[ftmp4] \n\t"
305  "psrah %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
306  "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
307  "psubh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
308  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
309  "or %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
310  "or %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
311  "xor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
312  "xor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
313  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
314  "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
315  "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
316  "pandn %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
317  "gssdxc1 %[ftmp6], 0x00(%[addr0], %[block]) \n\t"
318  "gssdxc1 %[ftmp7], 0x08(%[addr0], %[block]) \n\t"
319  PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t"
320  "bltz %[addr0], 1b \n\t"
321  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
322  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
323  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
324  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
325  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
326  [tmp0]"=&r"(tmp[0]),
327  [addr0]"=&r"(addr[0])
328  : [block]"r"((mips_reg)(block+nCoeffs)),
329  [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
330  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
331  [qscale]"r"(qscale)
332  : "memory"
333  );
334 }
335 
337  int n, int qscale)
338 {
339  uint64_t nCoeffs;
340  const uint16_t *quant_matrix;
341  int block0;
342  double ftmp[10];
343  uint64_t tmp[1];
344  mips_reg addr[1];
345 
346  assert(s->block_last_index[n]>=0);
347 
348  if (s->alternate_scan)
349  nCoeffs = 63;
350  else
351  nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]];
352 
353  if (n < 4)
354  block0 = block[0] * s->y_dc_scale;
355  else
356  block0 = block[0] * s->c_dc_scale;
357 
358  quant_matrix = s->intra_matrix;
359 
360  __asm__ volatile (
361  "dli %[tmp0], 0x0f \n\t"
362  "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
363  "mtc1 %[tmp0], %[ftmp3] \n\t"
364  "mtc1 %[qscale], %[ftmp9] \n\t"
365  "psrlh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
366  "packsswh %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
367  "packsswh %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
368  "or %[addr0], %[nCoeffs], $0 \n\t"
369  ".p2align 4 \n\t"
370  "1: \n\t"
371  "gsldxc1 %[ftmp1], 0x00(%[addr0], %[block]) \n\t"
372  "gsldxc1 %[ftmp2], 0x08(%[addr0], %[block]) \n\t"
373  "mov.d %[ftmp3], %[ftmp1] \n\t"
374  "mov.d %[ftmp4], %[ftmp2] \n\t"
375  "gsldxc1 %[ftmp5], 0x00(%[addr0], %[quant]) \n\t"
376  "gsldxc1 %[ftmp6], 0x00(%[addr0], %[quant]) \n\t"
377  "pmullh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
378  "pmullh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
379  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
380  "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
381  "pcmpgth %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
382  "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
383  "xor %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
384  "xor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
385  "psubh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
386  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
387  "pmullh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
388  "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
389  "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
390  "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
391  "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
392  "dli %[tmp0], 0x03 \n\t"
393  "pcmpeqh %[ftmp6] , %[ftmp6], %[ftmp4] \n\t"
394  "mtc1 %[tmp0], %[ftmp3] \n\t"
395  "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
396  "psrah %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
397  "xor %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
398  "xor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
399  "psubh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
400  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
401  "pandn %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
402  "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
403  PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t"
404  "gssdxc1 %[ftmp5], 0x00(%[addr0], %[block]) \n\t"
405  "gssdxc1 %[ftmp6], 0x08(%[addr0], %[block]) \n\t"
406  "blez %[addr0], 1b \n\t"
407  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
408  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
409  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
410  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
411  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
412  [tmp0]"=&r"(tmp[0]),
413  [addr0]"=&r"(addr[0])
414  : [block]"r"((mips_reg)(block+nCoeffs)),
415  [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
416  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
417  [qscale]"r"(qscale)
418  : "memory"
419  );
420 
421  block[0]= block0;
422 }
423 
425 {
426  const int intra = s->mb_intra;
427  int *sum = s->dct_error_sum[intra];
428  uint16_t *offset = s->dct_offset[intra];
429  double ftmp[8];
430  mips_reg addr[1];
431 
432  s->dct_count[intra]++;
433 
434  __asm__ volatile(
435  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
436  "1: \n\t"
437  "ldc1 %[ftmp1], 0x00(%[block]) \n\t"
438  "xor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
439  "ldc1 %[ftmp3], 0x08(%[block]) \n\t"
440  "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
441  "pcmpgth %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
442  "pcmpgth %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
443  "xor %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
444  "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
445  "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
446  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
447  "ldc1 %[ftmp6], 0x00(%[offset]) \n\t"
448  "mov.d %[ftmp5], %[ftmp1] \n\t"
449  "psubush %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
450  "ldc1 %[ftmp6], 0x08(%[offset]) \n\t"
451  "mov.d %[ftmp7], %[ftmp3] \n\t"
452  "psubush %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
453  "xor %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
454  "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
455  "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
456  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
457  "sdc1 %[ftmp1], 0x00(%[block]) \n\t"
458  "sdc1 %[ftmp3], 0x08(%[block]) \n\t"
459  "mov.d %[ftmp1], %[ftmp5] \n\t"
460  "mov.d %[ftmp3], %[ftmp7] \n\t"
461  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
462  "punpckhhw %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
463  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
464  "punpckhhw %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
465  "ldc1 %[ftmp2], 0x00(%[sum]) \n\t"
466  "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
467  "ldc1 %[ftmp2], 0x08(%[sum]) \n\t"
468  "paddw %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
469  "ldc1 %[ftmp2], 0x10(%[sum]) \n\t"
470  "paddw %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
471  "ldc1 %[ftmp2], 0x18(%[sum]) \n\t"
472  "paddw %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
473  "sdc1 %[ftmp5], 0x00(%[sum]) \n\t"
474  "sdc1 %[ftmp1], 0x08(%[sum]) \n\t"
475  "sdc1 %[ftmp7], 0x10(%[sum]) \n\t"
476  "sdc1 %[ftmp3], 0x18(%[sum]) \n\t"
477  PTR_ADDIU "%[block], %[block], 0x10 \n\t"
478  PTR_ADDIU "%[sum], %[sum], 0x20 \n\t"
479  PTR_SUBU "%[addr0], %[block1], %[block] \n\t"
480  PTR_ADDIU "%[offset], %[offset], 0x10 \n\t"
481  "bgtz %[addr0], 1b \n\t"
482  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
483  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
484  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
485  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
486  [addr0]"=&r"(addr[0]),
487  [block]"+&r"(block), [sum]"+&r"(sum),
488  [offset]"+&r"(offset)
489  : [block1]"r"(block+64)
490  : "memory"
491  );
492 }
#define mips_reg
Definition: asmdefs.h:44
const char * s
Definition: avisynth_c.h:631
MIPS assembly defines from sys/asm.h but rewritten for use with C inline assembly (rather than from w...
uint8_t raster_end[64]
Definition: idctdsp.h:32
void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
int h263_aic
Advanced INTRA Coding (AIC)
Definition: mpegvideo.h:84
static int16_t block[64]
Definition: dct.c:113
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:63
uint16_t(* dct_offset)[64]
Definition: mpegvideo.h:334
void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
Definition: mpegvideo_mmi.c:28
void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
static const uint8_t offset[127][2]
Definition: vf_spp.c:92
#define PTR_SUBU
Definition: asmdefs.h:50
int alternate_scan
Definition: mpegvideo.h:467
int block_last_index[12]
last non zero coefficient in block
Definition: mpegvideo.h:83
int n
Definition: avisynth_c.h:547
void ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
uint16_t inter_matrix[64]
Definition: mpegvideo.h:302
ScanTable intra_scantable
Definition: mpegvideo.h:88
#define PTR_ADDIU
Definition: asmdefs.h:48
void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
const uint8_t * quant
uint8_t level
Definition: svq3.c:193
MpegEncContext.
Definition: mpegvideo.h:78
int(* dct_error_sum)[64]
Definition: mpegvideo.h:332
void ff_denoise_dct_mmi(MpegEncContext *s, int16_t *block)
static uint8_t tmp[8]
Definition: des.c:38
static int16_t block1[64]
Definition: dct.c:114
uint16_t intra_matrix[64]
matrix transmitted in the bitstream
Definition: mpegvideo.h:300
#define PTR_ADDU
Definition: asmdefs.h:47
ScanTable inter_scantable
if inter == intra then intra should be used to reduce the cache usage
Definition: mpegvideo.h:87
int dct_count[2]
Definition: mpegvideo.h:333