FFmpeg
mmiutils.h
Go to the documentation of this file.
1 /*
2  * Loongson SIMD utils
3  *
4  * Copyright (c) 2016 Loongson Technology Corporation Limited
5  * Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #ifndef AVUTIL_MIPS_MMIUTILS_H
25 #define AVUTIL_MIPS_MMIUTILS_H
26 
27 #include "config.h"
28 
29 #include "libavutil/mem_internal.h"
30 #include "libavutil/mips/asmdefs.h"
31 
32 /*
33  * These were used to define temporary registers for MMI marcos
34  * however now we're using $at. They're theoretically unnecessary
35  * but just leave them here to avoid mess.
36  */
37 #define DECLARE_VAR_LOW32
38 #define RESTRICT_ASM_LOW32
39 #define DECLARE_VAR_ALL64
40 #define RESTRICT_ASM_ALL64
41 #define DECLARE_VAR_ADDRT
42 #define RESTRICT_ASM_ADDRT
43 
44 #if HAVE_LOONGSON2
45 
46 #define MMI_LWX(reg, addr, stride, bias) \
47  ".set noat \n\t" \
48  PTR_ADDU "$at, "#addr", "#stride" \n\t" \
49  "lw "#reg", "#bias"($at) \n\t" \
50  ".set at \n\t"
51 
52 #define MMI_SWX(reg, addr, stride, bias) \
53  ".set noat \n\t" \
54  PTR_ADDU "$at, "#addr", "#stride" \n\t" \
55  "sw "#reg", "#bias"($at) \n\t" \
56  ".set at \n\t"
57 
58 #define MMI_LDX(reg, addr, stride, bias) \
59  ".set noat \n\t" \
60  PTR_ADDU "$at, "#addr", "#stride" \n\t" \
61  "ld "#reg", "#bias"($at) \n\t" \
62  ".set at \n\t"
63 
64 #define MMI_SDX(reg, addr, stride, bias) \
65  ".set noat \n\t" \
66  PTR_ADDU "$at, "#addr", "#stride" \n\t" \
67  "sd "#reg", "#bias"($at) \n\t" \
68  ".set at \n\t"
69 
70 #define MMI_LWC1(fp, addr, bias) \
71  "lwc1 "#fp", "#bias"("#addr") \n\t"
72 
73 #define MMI_ULWC1(fp, addr, bias) \
74  ".set noat \n\t" \
75  "ulw $at, "#bias"("#addr") \n\t" \
76  "mtc1 $at, "#fp" \n\t" \
77  ".set at \n\t"
78 
79 #define MMI_LWXC1(fp, addr, stride, bias) \
80  ".set noat \n\t" \
81  PTR_ADDU "$at, "#addr", "#stride" \n\t" \
82  MMI_LWC1(fp, $at, bias) \
83  ".set at \n\t"
84 
85 #define MMI_SWC1(fp, addr, bias) \
86  "swc1 "#fp", "#bias"("#addr") \n\t"
87 
88 #define MMI_USWC1(fp, addr, bias) \
89  ".set noat \n\t" \
90  "mfc1 $at, "#fp" \n\t" \
91  "usw $at, "#bias"("#addr") \n\t" \
92  ".set at \n\t"
93 
94 #define MMI_SWXC1(fp, addr, stride, bias) \
95  ".set noat \n\t" \
96  PTR_ADDU "$at, "#addr", "#stride" \n\t" \
97  MMI_SWC1(fp, $at, bias) \
98  ".set at \n\t"
99 
100 #define MMI_LDC1(fp, addr, bias) \
101  "ldc1 "#fp", "#bias"("#addr") \n\t"
102 
103 #define MMI_ULDC1(fp, addr, bias) \
104  ".set noat \n\t" \
105  "uld $at, "#bias"("#addr") \n\t" \
106  "dmtc1 $at, "#fp" \n\t" \
107  ".set at \n\t"
108 
109 #define MMI_LDXC1(fp, addr, stride, bias) \
110  ".set noat \n\t" \
111  PTR_ADDU "$at, "#addr", "#stride" \n\t" \
112  MMI_LDC1(fp, $at, bias) \
113  ".set at \n\t"
114 
115 #define MMI_SDC1(fp, addr, bias) \
116  "sdc1 "#fp", "#bias"("#addr") \n\t"
117 
118 #define MMI_USDC1(fp, addr, bias) \
119  ".set noat \n\t" \
120  "dmfc1 $at, "#fp" \n\t" \
121  "usd $at, "#bias"("#addr") \n\t" \
122  ".set at \n\t"
123 
124 #define MMI_SDXC1(fp, addr, stride, bias) \
125  ".set noat \n\t" \
126  PTR_ADDU "$at, "#addr", "#stride" \n\t" \
127  MMI_SDC1(fp, $at, bias) \
128  ".set at \n\t"
129 
130 #define MMI_LQ(reg1, reg2, addr, bias) \
131  "ld "#reg1", "#bias"("#addr") \n\t" \
132  "ld "#reg2", 8+"#bias"("#addr") \n\t"
133 
134 #define MMI_SQ(reg1, reg2, addr, bias) \
135  "sd "#reg1", "#bias"("#addr") \n\t" \
136  "sd "#reg2", 8+"#bias"("#addr") \n\t"
137 
138 #define MMI_LQC1(fp1, fp2, addr, bias) \
139  "ldc1 "#fp1", "#bias"("#addr") \n\t" \
140  "ldc1 "#fp2", 8+"#bias"("#addr") \n\t"
141 
142 #define MMI_SQC1(fp1, fp2, addr, bias) \
143  "sdc1 "#fp1", "#bias"("#addr") \n\t" \
144  "sdc1 "#fp2", 8+"#bias"("#addr") \n\t"
145 
146 #elif HAVE_LOONGSON3 /* !HAVE_LOONGSON2 */
147 
148 #define MMI_LWX(reg, addr, stride, bias) \
149  "gslwx "#reg", "#bias"("#addr", "#stride") \n\t"
150 
151 #define MMI_SWX(reg, addr, stride, bias) \
152  "gsswx "#reg", "#bias"("#addr", "#stride") \n\t"
153 
154 #define MMI_LDX(reg, addr, stride, bias) \
155  "gsldx "#reg", "#bias"("#addr", "#stride") \n\t"
156 
157 #define MMI_SDX(reg, addr, stride, bias) \
158  "gssdx "#reg", "#bias"("#addr", "#stride") \n\t"
159 
160 #define MMI_LWC1(fp, addr, bias) \
161  "lwc1 "#fp", "#bias"("#addr") \n\t"
162 
163 #if _MIPS_SIM == _ABIO32 /* workaround for 3A2000 gslwlc1 bug */
164 
165 #define MMI_LWLRC1(fp, addr, bias, off) \
166  ".set noat \n\t" \
167  "lwl $at, "#bias"+"#off"("#addr") \n\t" \
168  "lwr $at, "#bias"("#addr") \n\t" \
169  "mtc1 $at, "#fp" \n\t" \
170  ".set at \n\t"
171 
172 #else /* _MIPS_SIM != _ABIO32 */
173 
174 #define DECLARE_VAR_LOW32
175 #define RESTRICT_ASM_LOW32
176 
177 #define MMI_ULWC1(fp, addr, bias) \
178  "gslwlc1 "#fp", 3+"#bias"("#addr") \n\t" \
179  "gslwrc1 "#fp", "#bias"("#addr") \n\t"
180 
181 #endif /* _MIPS_SIM != _ABIO32 */
182 
183 #define MMI_LWXC1(fp, addr, stride, bias) \
184  "gslwxc1 "#fp", "#bias"("#addr", "#stride") \n\t"
185 
186 #define MMI_SWC1(fp, addr, bias) \
187  "swc1 "#fp", "#bias"("#addr") \n\t"
188 
189 #define MMI_USWC1(fp, addr, bias) \
190  "gsswlc1 "#fp", 3+"#bias"("#addr") \n\t" \
191  "gsswrc1 "#fp", "#bias"("#addr") \n\t"
192 
193 #define MMI_SWXC1(fp, addr, stride, bias) \
194  "gsswxc1 "#fp", "#bias"("#addr", "#stride") \n\t"
195 
196 #define MMI_LDC1(fp, addr, bias) \
197  "ldc1 "#fp", "#bias"("#addr") \n\t"
198 
199 #define MMI_ULDC1(fp, addr, bias) \
200  "gsldlc1 "#fp", 7+"#bias"("#addr") \n\t" \
201  "gsldrc1 "#fp", "#bias"("#addr") \n\t"
202 
203 #define MMI_LDXC1(fp, addr, stride, bias) \
204  "gsldxc1 "#fp", "#bias"("#addr", "#stride") \n\t"
205 
206 #define MMI_SDC1(fp, addr, bias) \
207  "sdc1 "#fp", "#bias"("#addr") \n\t"
208 
209 #define MMI_USDC1(fp, addr, bias) \
210  "gssdlc1 "#fp", 7+"#bias"("#addr") \n\t" \
211  "gssdrc1 "#fp", "#bias"("#addr") \n\t"
212 
213 #define MMI_SDXC1(fp, addr, stride, bias) \
214  "gssdxc1 "#fp", "#bias"("#addr", "#stride") \n\t"
215 
216 #define MMI_LQ(reg1, reg2, addr, bias) \
217  "gslq "#reg1", "#reg2", "#bias"("#addr") \n\t"
218 
219 #define MMI_SQ(reg1, reg2, addr, bias) \
220  "gssq "#reg1", "#reg2", "#bias"("#addr") \n\t"
221 
222 #define MMI_LQC1(fp1, fp2, addr, bias) \
223  "gslqc1 "#fp1", "#fp2", "#bias"("#addr") \n\t"
224 
225 #define MMI_SQC1(fp1, fp2, addr, bias) \
226  "gssqc1 "#fp1", "#fp2", "#bias"("#addr") \n\t"
227 
228 #endif /* HAVE_LOONGSON2 */
229 
230 /**
231  * Backup saved registers
232  * We're not using compiler's clobber list as it's not smart enough
233  * to take advantage of quad word load/store.
234  */
235 #define BACKUP_REG \
236  LOCAL_ALIGNED_16(double, temp_backup_reg, [8]); \
237  if (_MIPS_SIM == _ABI64) \
238  __asm__ volatile ( \
239  MMI_SQC1($f25, $f24, %[temp], 0x00) \
240  MMI_SQC1($f27, $f26, %[temp], 0x10) \
241  MMI_SQC1($f29, $f28, %[temp], 0x20) \
242  MMI_SQC1($f31, $f30, %[temp], 0x30) \
243  : \
244  : [temp]"r"(temp_backup_reg) \
245  : "memory" \
246  ); \
247  else \
248  __asm__ volatile ( \
249  MMI_SQC1($f22, $f20, %[temp], 0x10) \
250  MMI_SQC1($f26, $f24, %[temp], 0x10) \
251  MMI_SQC1($f30, $f28, %[temp], 0x20) \
252  : \
253  : [temp]"r"(temp_backup_reg) \
254  : "memory" \
255  );
256 
257 /**
258  * recover register
259  */
260 #define RECOVER_REG \
261  if (_MIPS_SIM == _ABI64) \
262  __asm__ volatile ( \
263  MMI_LQC1($f25, $f24, %[temp], 0x00) \
264  MMI_LQC1($f27, $f26, %[temp], 0x10) \
265  MMI_LQC1($f29, $f28, %[temp], 0x20) \
266  MMI_LQC1($f31, $f30, %[temp], 0x30) \
267  : \
268  : [temp]"r"(temp_backup_reg) \
269  : "memory" \
270  ); \
271  else \
272  __asm__ volatile ( \
273  MMI_LQC1($f22, $f20, %[temp], 0x10) \
274  MMI_LQC1($f26, $f24, %[temp], 0x10) \
275  MMI_LQC1($f30, $f28, %[temp], 0x20) \
276  : \
277  : [temp]"r"(temp_backup_reg) \
278  : "memory" \
279  );
280 
281 /**
282  * brief: Transpose 2X2 word packaged data.
283  * fr_i0, fr_i1: src
284  * fr_o0, fr_o1: dst
285  */
286 #define TRANSPOSE_2W(fr_i0, fr_i1, fr_o0, fr_o1) \
287  "punpcklwd "#fr_o0", "#fr_i0", "#fr_i1" \n\t" \
288  "punpckhwd "#fr_o1", "#fr_i0", "#fr_i1" \n\t"
289 
290 /**
291  * brief: Transpose 4X4 half word packaged data.
292  * fr_i0, fr_i1, fr_i2, fr_i3: src & dst
293  * fr_t0, fr_t1, fr_t2, fr_t3: temporary register
294  */
295 #define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3, \
296  fr_t0, fr_t1, fr_t2, fr_t3) \
297  "punpcklhw "#fr_t0", "#fr_i0", "#fr_i1" \n\t" \
298  "punpckhhw "#fr_t1", "#fr_i0", "#fr_i1" \n\t" \
299  "punpcklhw "#fr_t2", "#fr_i2", "#fr_i3" \n\t" \
300  "punpckhhw "#fr_t3", "#fr_i2", "#fr_i3" \n\t" \
301  "punpcklwd "#fr_i0", "#fr_t0", "#fr_t2" \n\t" \
302  "punpckhwd "#fr_i1", "#fr_t0", "#fr_t2" \n\t" \
303  "punpcklwd "#fr_i2", "#fr_t1", "#fr_t3" \n\t" \
304  "punpckhwd "#fr_i3", "#fr_t1", "#fr_t3" \n\t"
305 
306 /**
307  * brief: Transpose 8x8 byte packaged data.
308  * fr_i0~i7: src & dst
309  * fr_t0~t3: temporary register
310  */
311 #define TRANSPOSE_8B(fr_i0, fr_i1, fr_i2, fr_i3, fr_i4, fr_i5, \
312  fr_i6, fr_i7, fr_t0, fr_t1, fr_t2, fr_t3) \
313  "punpcklbh "#fr_t0", "#fr_i0", "#fr_i1" \n\t" \
314  "punpckhbh "#fr_t1", "#fr_i0", "#fr_i1" \n\t" \
315  "punpcklbh "#fr_t2", "#fr_i2", "#fr_i3" \n\t" \
316  "punpckhbh "#fr_t3", "#fr_i2", "#fr_i3" \n\t" \
317  "punpcklbh "#fr_i0", "#fr_i4", "#fr_i5" \n\t" \
318  "punpckhbh "#fr_i1", "#fr_i4", "#fr_i5" \n\t" \
319  "punpcklbh "#fr_i2", "#fr_i6", "#fr_i7" \n\t" \
320  "punpckhbh "#fr_i3", "#fr_i6", "#fr_i7" \n\t" \
321  "punpcklhw "#fr_i4", "#fr_t0", "#fr_t2" \n\t" \
322  "punpckhhw "#fr_i5", "#fr_t0", "#fr_t2" \n\t" \
323  "punpcklhw "#fr_i6", "#fr_t1", "#fr_t3" \n\t" \
324  "punpckhhw "#fr_i7", "#fr_t1", "#fr_t3" \n\t" \
325  "punpcklhw "#fr_t0", "#fr_i0", "#fr_i2" \n\t" \
326  "punpckhhw "#fr_t1", "#fr_i0", "#fr_i2" \n\t" \
327  "punpcklhw "#fr_t2", "#fr_i1", "#fr_i3" \n\t" \
328  "punpckhhw "#fr_t3", "#fr_i1", "#fr_i3" \n\t" \
329  "punpcklwd "#fr_i0", "#fr_i4", "#fr_t0" \n\t" \
330  "punpckhwd "#fr_i1", "#fr_i4", "#fr_t0" \n\t" \
331  "punpcklwd "#fr_i2", "#fr_i5", "#fr_t1" \n\t" \
332  "punpckhwd "#fr_i3", "#fr_i5", "#fr_t1" \n\t" \
333  "punpcklwd "#fr_i4", "#fr_i6", "#fr_t2" \n\t" \
334  "punpckhwd "#fr_i5", "#fr_i6", "#fr_t2" \n\t" \
335  "punpcklwd "#fr_i6", "#fr_i7", "#fr_t3" \n\t" \
336  "punpckhwd "#fr_i7", "#fr_i7", "#fr_t3" \n\t"
337 
338 /**
339  * brief: Parallel SRA for 8 byte packaged data.
340  * fr_i0: src
341  * fr_i1: SRA number(SRAB number + 8)
342  * fr_t0, fr_t1: temporary register
343  * fr_d0: dst
344  */
345 #define PSRAB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0) \
346  "punpcklbh "#fr_t0", "#fr_t0", "#fr_i0" \n\t" \
347  "punpckhbh "#fr_t1", "#fr_t1", "#fr_i0" \n\t" \
348  "psrah "#fr_t0", "#fr_t0", "#fr_i1" \n\t" \
349  "psrah "#fr_t1", "#fr_t1", "#fr_i1" \n\t" \
350  "packsshb "#fr_d0", "#fr_t0", "#fr_t1" \n\t"
351 
352 /**
353  * brief: Parallel SRL for 8 byte packaged data.
354  * fr_i0: src
355  * fr_i1: SRL number(SRLB number + 8)
356  * fr_t0, fr_t1: temporary register
357  * fr_d0: dst
358  */
359 #define PSRLB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0) \
360  "punpcklbh "#fr_t0", "#fr_t0", "#fr_i0" \n\t" \
361  "punpckhbh "#fr_t1", "#fr_t1", "#fr_i0" \n\t" \
362  "psrlh "#fr_t0", "#fr_t0", "#fr_i1" \n\t" \
363  "psrlh "#fr_t1", "#fr_t1", "#fr_i1" \n\t" \
364  "packsshb "#fr_d0", "#fr_t0", "#fr_t1" \n\t"
365 
366 #define PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \
367  "psrah "#fp1", "#fp1", "#shift" \n\t" \
368  "psrah "#fp2", "#fp2", "#shift" \n\t" \
369  "psrah "#fp3", "#fp3", "#shift" \n\t" \
370  "psrah "#fp4", "#fp4", "#shift" \n\t"
371 
372 #define PSRAH_8_MMI(fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, shift) \
373  PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \
374  PSRAH_4_MMI(fp5, fp6, fp7, fp8, shift)
375 
376 /**
377  * brief: (((value) + (1 << ((n) - 1))) >> (n))
378  * fr_i0: src & dst
379  * fr_i1: Operand number
380  * fr_t0, fr_t1: temporary FPR
381  * gr_t0: temporary GPR
382  */
383 #define ROUND_POWER_OF_TWO_MMI(fr_i0, fr_i1, fr_t0, fr_t1, gr_t0) \
384  "li "#gr_t0", 0x01 \n\t" \
385  "dmtc1 "#gr_t0", "#fr_t0" \n\t" \
386  "punpcklwd "#fr_t0", "#fr_t0", "#fr_t0" \n\t" \
387  "psubw "#fr_t1", "#fr_i1", "#fr_t0" \n\t" \
388  "psllw "#fr_t1", "#fr_t0", "#fr_t1" \n\t" \
389  "paddw "#fr_i0", "#fr_i0", "#fr_t1" \n\t" \
390  "psraw "#fr_i0", "#fr_i0", "#fr_i1" \n\t"
391 
392 #endif /* AVUTILS_MIPS_MMIUTILS_H */
mem_internal.h
asmdefs.h