FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
mpegvideo_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized mpegvideo
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "mpegvideo_mips.h"
26 
28  int n, int qscale)
29 {
30  int64_t level, qmul, qadd, nCoeffs;
31 
32  qmul = qscale << 1;
33  av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
34 
35  if (!s->h263_aic) {
36  if (n<4)
37  level = block[0] * s->y_dc_scale;
38  else
39  level = block[0] * s->c_dc_scale;
40  qadd = (qscale-1) | 1;
41  } else {
42  qadd = 0;
43  level = block[0];
44  }
45 
46  if(s->ac_pred)
47  nCoeffs = 63;
48  else
49  nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
50 
51  __asm__ volatile (
52  "xor $f12, $f12, $f12 \r\n"
53  "lwc1 $f12, %1 \n\r"
54  "xor $f10, $f10, $f10 \r\n"
55  "lwc1 $f10, %2 \r\n"
56  "xor $f14, $f14, $f14 \r\n"
57  "packsswh $f12, $f12, $f12 \r\n"
58  "packsswh $f12, $f12, $f12 \r\n"
59  "packsswh $f10, $f10, $f10 \r\n"
60  "packsswh $f10, $f10, $f10 \r\n"
61  "psubh $f14, $f14, $f10 \r\n"
62  "xor $f8, $f8, $f8 \r\n"
63  ".p2align 4 \r\n"
64  "1: \r\n"
65  "daddu $8, %0, %3 \r\n"
66  "gsldlc1 $f0, 7($8) \r\n"
67  "gsldrc1 $f0, 0($8) \r\n"
68  "gsldlc1 $f2, 15($8) \r\n"
69  "gsldrc1 $f2, 8($8) \r\n"
70  "mov.d $f4, $f0 \r\n"
71  "mov.d $f6, $f2 \r\n"
72  "pmullh $f0, $f0, $f12 \r\n"
73  "pmullh $f2, $f2, $f12 \r\n"
74  "pcmpgth $f4, $f4, $f8 \r\n"
75  "pcmpgth $f6, $f6, $f8 \r\n"
76  "xor $f0, $f0, $f4 \r\n"
77  "xor $f2, $f2, $f6 \r\n"
78  "paddh $f0, $f0, $f14 \r\n"
79  "paddh $f2, $f2, $f14 \r\n"
80  "xor $f4, $f4, $f0 \r\n"
81  "xor $f6, $f6, $f2 \r\n"
82  "pcmpeqh $f0, $f0, $f14 \r\n"
83  "pcmpeqh $f2, $f2, $f14 \r\n"
84  "pandn $f0, $f0, $f4 \r\n"
85  "pandn $f2, $f2, $f6 \r\n"
86  "gssdlc1 $f0, 7($8) \r\n"
87  "gssdrc1 $f0, 0($8) \r\n"
88  "gssdlc1 $f2, 15($8) \r\n"
89  "gssdrc1 $f2, 8($8) \r\n"
90  "addi %3, %3, 16 \r\n"
91  "blez %3, 1b \r\n"
92  ::"r"(block+nCoeffs),"m"(qmul),"m"(qadd),"r"(2*(-nCoeffs))
93  :"$8","memory"
94  );
95 
96  block[0] = level;
97 }
98 
100  int n, int qscale)
101 {
102  int64_t qmul, qadd, nCoeffs;
103 
104  qmul = qscale << 1;
105  qadd = (qscale - 1) | 1;
106  av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
107  nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
108 
109  __asm__ volatile (
110  "xor $f12, $f12, $f12 \r\n"
111  "lwc1 $f12, %1 \r\n"
112  "xor $f10, $f10, $f10 \r\n"
113  "lwc1 $f10, %2 \r\n"
114  "packsswh $f12, $f12, $f12 \r\n"
115  "packsswh $f12, $f12, $f12 \r\n"
116  "xor $f14, $f14, $f14 \r\n"
117  "packsswh $f10, $f10, $f10 \r\n"
118  "packsswh $f10, $f10, $f10 \r\n"
119  "psubh $f14, $f14, $f10 \r\n"
120  "xor $f8, $f8, $f8 \r\n"
121  ".p2align 4 \r\n"
122  "1: \r\n"
123  "daddu $8, %0, %3 \r\n"
124  "gsldlc1 $f0, 7($8) \r\n"
125  "gsldrc1 $f0, 0($8) \r\n"
126  "gsldlc1 $f2, 15($8) \r\n"
127  "gsldrc1 $f2, 8($8) \r\n"
128  "mov.d $f4, $f0 \r\n"
129  "mov.d $f6, $f2 \r\n"
130  "pmullh $f0, $f0, $f12 \r\n"
131  "pmullh $f2, $f2, $f12 \r\n"
132  "pcmpgth $f4, $f4, $f8 \r\n"
133  "pcmpgth $f6, $f6, $f8 \r\n"
134  "xor $f0, $f0, $f4 \r\n"
135  "xor $f2, $f2, $f6 \r\n"
136  "paddh $f0, $f0, $f14 \r\n"
137  "paddh $f2, $f2, $f14 \r\n"
138  "xor $f4, $f4, $f0 \r\n"
139  "xor $f6, $f6, $f2 \r\n"
140  "pcmpeqh $f0, $f0, $f14 \r\n"
141  "pcmpeqh $f2, $f2, $f14 \r\n"
142  "pandn $f0, $f0, $f4 \r\n"
143  "pandn $f2, $f2, $f6 \r\n"
144  "gssdlc1 $f0, 7($8) \r\n"
145  "gssdrc1 $f0, 0($8) \r\n"
146  "gssdlc1 $f2, 15($8) \r\n"
147  "gssdrc1 $f2, 8($8) \r\n"
148  "addi %3, %3, 16 \r\n"
149  "blez %3, 1b \r\n"
150  ::"r"(block+nCoeffs),"m"(qmul),"m"(qadd),"r"(2*(-nCoeffs))
151  : "$8","memory"
152  );
153 }
154 
156  int n, int qscale)
157 {
158  int64_t nCoeffs;
159  const uint16_t *quant_matrix;
160  int block0;
161 
162  av_assert2(s->block_last_index[n]>=0);
163  nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
164 
165  if (n<4)
166  block0 = block[0] * s->y_dc_scale;
167  else
168  block0 = block[0] * s->c_dc_scale;
169 
170  /* XXX: only mpeg1 */
171  quant_matrix = s->intra_matrix;
172 
173  __asm__ volatile (
174  "pcmpeqh $f14, $f14, $f14 \r\n"
175  "dli $10, 15 \r\n"
176  "dmtc1 $10, $f16 \r\n"
177  "xor $f12, $f12, $f12 \r\n"
178  "lwc1 $f12, %2 \r\n"
179  "psrlh $f14, $f14, $f16 \r\n"
180  "packsswh $f12, $f12, $f12 \r\n"
181  "packsswh $f12, $f12, $f12 \r\n"
182  "or $8, %3, $0 \r\n"
183  ".p2align 4 \r\n"
184  "1: \r\n"
185  "gsldxc1 $f0, 0($8, %0) \r\n"
186  "gsldxc1 $f2, 8($8, %0) \r\n"
187  "mov.d $f16, $f0 \r\n"
188  "mov.d $f18, $f2 \r\n"
189  "gsldxc1 $f8, 0($8, %1) \r\n"
190  "gsldxc1 $f10, 8($8, %1) \r\n"
191  "pmullh $f8, $f8, $f12 \r\n"
192  "pmullh $f10, $f10, $f12 \r\n"
193  "xor $f4, $f4, $f4 \r\n"
194  "xor $f6, $f6, $f6 \r\n"
195  "pcmpgth $f4, $f4, $f0 \r\n"
196  "pcmpgth $f6, $f6, $f2 \r\n"
197  "xor $f0, $f0, $f4 \r\n"
198  "xor $f2, $f2, $f6 \r\n"
199  "psubh $f0, $f0, $f4 \r\n"
200  "psubh $f2, $f2, $f6 \r\n"
201  "pmullh $f0, $f0, $f8 \r\n"
202  "pmullh $f2, $f2, $f10 \r\n"
203  "xor $f8, $f8, $f8 \r\n"
204  "xor $f10, $f10, $f10 \r\n"
205  "pcmpeqh $f8, $f8, $f16 \r\n"
206  "pcmpeqh $f10, $f10, $f18 \r\n"
207  "dli $10, 3 \r\n"
208  "dmtc1 $10, $f16 \r\n"
209  "psrah $f0, $f0, $f16 \r\n"
210  "psrah $f2, $f2, $f16 \r\n"
211  "psubh $f0, $f0, $f14 \r\n"
212  "psubh $f2, $f2, $f14 \r\n"
213  "or $f0, $f0, $f14 \r\n"
214  "or $f2, $f2, $f14 \r\n"
215  "xor $f0, $f0, $f4 \r\n"
216  "xor $f2, $f2, $f6 \r\n"
217  "psubh $f0, $f0, $f4 \r\n"
218  "psubh $f2, $f2, $f6 \r\n"
219  "pandn $f8, $f8, $f0 \r\n"
220  "pandn $f10, $f10, $f2 \r\n"
221  "gssdxc1 $f8, 0($8, %0) \r\n"
222  "gssdxc1 $f10, 8($8, %0) \r\n"
223  "addi $8, $8, 16 \r\n"
224  "bltz $8, 1b \r\n"
225  ::"r"(block+nCoeffs),"r"(quant_matrix+nCoeffs),"m"(qscale),
226  "g"(-2*nCoeffs)
227  : "$8","$10","memory"
228  );
229 
230  block[0] = block0;
231 }
232 
234  int n, int qscale)
235 {
236  int64_t nCoeffs;
237  const uint16_t *quant_matrix;
238 
239  av_assert2(s->block_last_index[n] >= 0);
240  nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
241  quant_matrix = s->inter_matrix;
242 
243  __asm__ volatile (
244  "pcmpeqh $f14, $f14, $f14 \r\n"
245  "dli $10, 15 \r\n"
246  "dmtc1 $10, $f16 \r\n"
247  "xor $f12, $f12, $f12 \r\n"
248  "lwc1 $f12, %2 \r\n"
249  "psrlh $f14, $f14, $f16 \r\n"
250  "packsswh $f12, $f12, $f12 \r\n"
251  "packsswh $f12, $f12, $f12 \r\n"
252  "or $8, %3, $0 \r\n"
253  ".p2align 4 \r\n"
254  "1: \r\n"
255  "gsldxc1 $f0, 0($8, %0) \r\n"
256  "gsldxc1 $f2, 8($8, %0) \r\n"
257  "mov.d $f16, $f0 \r\n"
258  "mov.d $f18, $f2 \r\n"
259  "gsldxc1 $f8, 0($8, %1) \r\n"
260  "gsldxc1 $f10, 8($8, %1) \r\n"
261  "pmullh $f8, $f8, $f12 \r\n"
262  "pmullh $f10, $f10, $f12 \r\n"
263  "xor $f4, $f4, $f4 \r\n"
264  "xor $f6, $f6, $f6 \r\n"
265  "pcmpgth $f4, $f4, $f0 \r\n"
266  "pcmpgth $f6, $f6, $f2 \r\n"
267  "xor $f0, $f0, $f4 \r\n"
268  "xor $f2, $f2, $f6 \r\n"
269  "psubh $f0, $f0, $f4 \r\n"
270  "psubh $f2, $f2, $f6 \r\n"
271  "paddh $f0, $f0, $f0 \r\n"
272  "paddh $f2, $f2, $f2 \r\n"
273  "paddh $f0, $f0, $f14 \r\n"
274  "paddh $f2, $f2, $f14 \r\n"
275  "pmullh $f0, $f0, $f8 \r\n"
276  "pmullh $f2, $f2, $f10 \r\n"
277  "xor $f8, $f8, $f8 \r\n"
278  "xor $f10, $f10, $f10 \r\n"
279  "pcmpeqh $f8, $f8, $f16 \r\n"
280  "pcmpeqh $f10, $f10, $f18 \r\n"
281  "dli $10, 4 \r\n"
282  "dmtc1 $10, $f16 \r\n"
283  "psrah $f0, $f0, $f16 \r\n"
284  "psrah $f2, $f2, $f16 \r\n"
285  "psubh $f0, $f0, $f14 \r\n"
286  "psubh $f2, $f2, $f14 \r\n"
287  "or $f0, $f0, $f14 \r\n"
288  "or $f2, $f2, $f14 \r\n"
289  "xor $f0, $f0, $f4 \r\n"
290  "xor $f2, $f2, $f6 \r\n"
291  "psubh $f0, $f0, $f4 \r\n"
292  "psubh $f2, $f2, $f6 \r\n"
293  "pandn $f8, $f8, $f0 \r\n"
294  "pandn $f10, $f10, $f2 \r\n"
295  "gssdxc1 $f8, 0($8, %0) \r\n"
296  "gssdxc1 $f10, 8($8, %0) \r\n"
297  "addi $8, $8, 16 \r\n"
298  "bltz $8, 1b \r\n"
299  ::"r"(block+nCoeffs),"r"(quant_matrix+nCoeffs),"m"(qscale),
300  "g"(-2*nCoeffs)
301  :"$8","$10","memory"
302  );
303 }
304 
306 {
307  const int intra = s->mb_intra;
308  int *sum = s->dct_error_sum[intra];
309  uint16_t *offset = s->dct_offset[intra];
310 
311  s->dct_count[intra]++;
312 
313  __asm__ volatile(
314  "xor $f14, $f14, $f14 \r\n"
315  "1: \r\n"
316  "ldc1 $f4, 0(%[block]) \r\n"
317  "xor $f0, $f0, $f0 \r\n"
318  "ldc1 $f6, 8(%[block]) \r\n"
319  "xor $f2, $f2, $f2 \r\n"
320  "pcmpgth $f0, $f0, $f4 \r\n"
321  "pcmpgth $f2, $f2, $f6 \r\n"
322  "xor $f4, $f4, $f0 \r\n"
323  "xor $f6, $f6, $f2 \r\n"
324  "psubh $f4, $f4, $f0 \r\n"
325  "psubh $f6, $f6, $f2 \r\n"
326  "ldc1 $f12, 0(%[offset]) \r\n"
327  "mov.d $f8, $f4 \r\n"
328  "psubush $f4, $f4, $f12 \r\n"
329  "ldc1 $f12, 8(%[offset]) \r\n"
330  "mov.d $f10, $f6 \r\n"
331  "psubush $f6, $f6, $f12 \r\n"
332  "xor $f4, $f4, $f0 \r\n"
333  "xor $f6, $f6, $f2 \r\n"
334  "psubh $f4, $f4, $f0 \r\n"
335  "psubh $f6, $f6, $f2 \r\n"
336  "sdc1 $f4, 0(%[block]) \r\n"
337  "sdc1 $f6, 8(%[block]) \r\n"
338  "mov.d $f4, $f8 \r\n"
339  "mov.d $f6, $f10 \r\n"
340  "punpcklhw $f8, $f8, $f14 \r\n"
341  "punpckhhw $f4, $f4, $f14 \r\n"
342  "punpcklhw $f10, $f10, $f14 \r\n"
343  "punpckhhw $f6, $f6, $f14 \r\n"
344  "ldc1 $f0, 0(%[sum]) \r\n"
345  "paddw $f8, $f8, $f0 \r\n"
346  "ldc1 $f0, 8(%[sum]) \r\n"
347  "paddw $f4, $f4, $f0 \r\n"
348  "ldc1 $f0, 16(%[sum]) \r\n"
349  "paddw $f10, $f10, $f0 \r\n"
350  "ldc1 $f0, 24(%[sum]) \r\n"
351  "paddw $f6, $f6, $f0 \r\n"
352  "sdc1 $f8, 0(%[sum]) \r\n"
353  "sdc1 $f4, 8(%[sum]) \r\n"
354  "sdc1 $f10, 16(%[sum]) \r\n"
355  "sdc1 $f6, 24(%[sum]) \r\n"
356  "daddiu %[block], %[block], 16 \r\n"
357  "daddiu %[sum], %[sum], 32 \r\n"
358  "daddiu %[offset], %[offset], 16 \r\n"
359  "dsubu $8, %[block1], %[block] \r\n"
360  "bgtz $8, 1b \r\n"
361  : [block]"+r"(block),[sum]"+r"(sum),[offset]"+r"(offset)
362  : [block1]"r"(block+64)
363  : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14"
364  );
365 }
366 
368  int n, int qscale)
369 {
370  uint64_t nCoeffs;
371  const uint16_t *quant_matrix;
372  int block0;
373 
374  assert(s->block_last_index[n]>=0);
375 
376  if (s->alternate_scan)
377  nCoeffs = 63;
378  else
379  nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]];
380 
381  if (n < 4)
382  block0 = block[0] * s->y_dc_scale;
383  else
384  block0 = block[0] * s->c_dc_scale;
385 
386  quant_matrix = s->intra_matrix;
387 
388  __asm__ volatile (
389  "pcmpeqh $f14, $f14, $f14 \r\n"
390  "dli $10, 15 \r\n"
391  "dmtc1 $10, $f16 \r\n"
392  "xor $f12, $f12, $f12 \r\n"
393  "lwc1 $f12, %[qscale] \r\n"
394  "psrlh $f14, $f14, $f16 \r\n"
395  "packsswh $f12, $f12, $f12 \r\n"
396  "packsswh $f12, $f12, $f12 \r\n"
397  "or $8, %[ncoeffs], $0 \r\n"
398  ".p2align 4 \r\n"
399  "1: \r\n"
400  "gsldxc1 $f0, 0($8, %[block]) \r\n"
401  "gsldxc1 $f2, 8($8, %[block]) \r\n"
402  "mov.d $f16, $f0 \r\n"
403  "mov.d $f18, $f2 \r\n"
404  "gsldxc1 $f8, 0($8, %[quant]) \r\n"
405  "gsldxc1 $f10, 0($8, %[quant]) \r\n"
406  "pmullh $f8, $f8, $f12 \r\n"
407  "pmullh $f10, $f10, $f12 \r\n"
408  "xor $f4, $f4, $f4 \r\n"
409  "xor $f6, $f6, $f6 \r\n"
410  "pcmpgth $f4, $f4, $f0 \r\n"
411  "pcmpgth $f6, $f6, $f2 \r\n"
412  "xor $f0, $f0, $f4 \r\n"
413  "xor $f2, $f2, $f6 \r\n"
414  "psubh $f0, $f0, $f4 \r\n"
415  "psubh $f2, $f2, $f6 \r\n"
416  "pmullh $f0, $f0, $f8 \r\n"
417  "pmullh $f2, $f2, $f10 \r\n"
418  "xor $f8, $f8, $f8 \r\n"
419  "xor $f10, $f10, $f10 \r\n"
420  "pcmpeqh $f8, $f8, $f16 \r\n"
421  "pcmpeqh $f10 ,$f10, $f18 \r\n"
422  "dli $10, 3 \r\n"
423  "dmtc1 $10, $f16 \r\n"
424  "psrah $f0, $f0, $f16 \r\n"
425  "psrah $f2, $f2, $f16 \r\n"
426  "xor $f0, $f0, $f4 \r\n"
427  "xor $f2, $f2, $f6 \r\n"
428  "psubh $f0, $f0, $f4 \r\n"
429  "psubh $f2, $f2, $f6 \r\n"
430  "pandn $f8, $f8, $f0 \r\n"
431  "pandn $f10, $f10, $f2 \r\n"
432  "gssdxc1 $f8, 0($8, %[block]) \r\n"
433  "gssdxc1 $f10, 8($8, %[block]) \r\n"
434  "daddiu $8, $8, 16 \r\n"
435  "blez $8, 1b \r\n"
436  ::[block]"r"(block+nCoeffs),[quant]"r"(quant_matrix+nCoeffs),
437  [qscale]"m"(qscale),[ncoeffs]"g"(-2*nCoeffs)
438  : "$8","$10","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
439  "$f18"
440  );
441 
442  block[0]= block0;
443 }
const char * s
Definition: avisynth_c.h:631
uint8_t raster_end[64]
Definition: idctdsp.h:32
void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
int h263_aic
Advanded INTRA Coding (AIC)
Definition: mpegvideo.h:84
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:63
uint16_t(* dct_offset)[64]
Definition: mpegvideo.h:334
void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
Definition: mpegvideo_mmi.c:27
void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
static const uint8_t offset[127][2]
Definition: vf_spp.c:92
int alternate_scan
Definition: mpegvideo.h:467
int block_last_index[12]
last non zero coefficient in block
Definition: mpegvideo.h:83
int n
Definition: avisynth_c.h:547
void ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
uint16_t inter_matrix[64]
Definition: mpegvideo.h:302
ScanTable intra_scantable
Definition: mpegvideo.h:88
void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
Definition: mpegvideo_mmi.c:99
const uint8_t * quant
static int16_t block1[64]
Definition: dct-test.c:113
uint8_t level
Definition: svq3.c:150
MpegEncContext.
Definition: mpegvideo.h:78
int(* dct_error_sum)[64]
Definition: mpegvideo.h:332
void ff_denoise_dct_mmi(MpegEncContext *s, int16_t *block)
uint16_t intra_matrix[64]
matrix transmitted in the bitstream
Definition: mpegvideo.h:300
ScanTable inter_scantable
if inter == intra then intra should be used to reduce tha cache usage
Definition: mpegvideo.h:87
int dct_count[2]
Definition: mpegvideo.h:333
static int16_t block[64]
Definition: dct-test.c:112