FFmpeg
aaccoder_mips.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012
3  * MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  * contributors may be used to endorse or promote products derived from
15  * this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Author: Stanislav Ocovaj (socovaj@mips.com)
30  * Szabolcs Pal (sabolc@mips.com)
31  *
32  * AAC coefficients encoder optimized for MIPS floating-point architecture
33  *
34  * This file is part of FFmpeg.
35  *
36  * FFmpeg is free software; you can redistribute it and/or
37  * modify it under the terms of the GNU Lesser General Public
38  * License as published by the Free Software Foundation; either
39  * version 2.1 of the License, or (at your option) any later version.
40  *
41  * FFmpeg is distributed in the hope that it will be useful,
42  * but WITHOUT ANY WARRANTY; without even the implied warranty of
43  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
44  * Lesser General Public License for more details.
45  *
46  * You should have received a copy of the GNU Lesser General Public
47  * License along with FFmpeg; if not, write to the Free Software
48  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
49  */
50 
51 /**
52  * @file
53  * Reference: libavcodec/aaccoder.c
54  */
55 
56 #include "libavutil/libm.h"
57 
58 #include <float.h>
59 #include "libavutil/mathematics.h"
60 #include "libavcodec/avcodec.h"
61 #include "libavcodec/put_bits.h"
62 #include "libavcodec/aac.h"
63 #include "libavcodec/aacenc.h"
64 #include "libavcodec/aacencdsp.h"
65 #include "libavcodec/aactab.h"
66 #include "libavcodec/aacenctab.h"
68 
69 #if HAVE_INLINE_ASM
70 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
71 typedef struct BandCodingPath {
72  int prev_idx;
73  float cost;
74  int run;
76 
77 static const uint8_t uquad_sign_bits[81] = {
78  0, 1, 1, 1, 2, 2, 1, 2, 2,
79  1, 2, 2, 2, 3, 3, 2, 3, 3,
80  1, 2, 2, 2, 3, 3, 2, 3, 3,
81  1, 2, 2, 2, 3, 3, 2, 3, 3,
82  2, 3, 3, 3, 4, 4, 3, 4, 4,
83  2, 3, 3, 3, 4, 4, 3, 4, 4,
84  1, 2, 2, 2, 3, 3, 2, 3, 3,
85  2, 3, 3, 3, 4, 4, 3, 4, 4,
86  2, 3, 3, 3, 4, 4, 3, 4, 4
87 };
88 
89 static const uint8_t upair7_sign_bits[64] = {
90  0, 1, 1, 1, 1, 1, 1, 1,
91  1, 2, 2, 2, 2, 2, 2, 2,
92  1, 2, 2, 2, 2, 2, 2, 2,
93  1, 2, 2, 2, 2, 2, 2, 2,
94  1, 2, 2, 2, 2, 2, 2, 2,
95  1, 2, 2, 2, 2, 2, 2, 2,
96  1, 2, 2, 2, 2, 2, 2, 2,
97  1, 2, 2, 2, 2, 2, 2, 2,
98 };
99 
100 static const uint8_t upair12_sign_bits[169] = {
101  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
103  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
104  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
112  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
114 };
115 
116 static const uint8_t esc_sign_bits[289] = {
117  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
118  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
125  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
126  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
127  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
128  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
129  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
133  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
134 };
135 
136 /**
137  * Functions developed from template function and optimized for quantizing and encoding band
138  */
139 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
140  PutBitContext *pb, const float *in, float *out,
141  const float *scaled, int size, int scale_idx,
142  int cb, const float lambda, const float uplim,
143  int *bits, float *energy, const float ROUNDING)
144 {
145  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
146  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
147  int i;
148  int qc1, qc2, qc3, qc4;
149  float qenergy = 0.0f;
150 
151  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
152  uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
153  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
154 
155  abs_pow34_v(s->scoefs, in, size);
156  scaled = s->scoefs;
157  for (i = 0; i < size; i += 4) {
158  int curidx;
159  int *in_int = (int *)&in[i];
160  int t0, t1, t2, t3, t4, t5, t6, t7;
161  const float *vec;
162 
163  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
164  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
165  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
166  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
167 
168  __asm__ volatile (
169  ".set push \n\t"
170  ".set noreorder \n\t"
171 
172  "slt %[qc1], $zero, %[qc1] \n\t"
173  "slt %[qc2], $zero, %[qc2] \n\t"
174  "slt %[qc3], $zero, %[qc3] \n\t"
175  "slt %[qc4], $zero, %[qc4] \n\t"
176  "lw %[t0], 0(%[in_int]) \n\t"
177  "lw %[t1], 4(%[in_int]) \n\t"
178  "lw %[t2], 8(%[in_int]) \n\t"
179  "lw %[t3], 12(%[in_int]) \n\t"
180  "srl %[t0], %[t0], 31 \n\t"
181  "srl %[t1], %[t1], 31 \n\t"
182  "srl %[t2], %[t2], 31 \n\t"
183  "srl %[t3], %[t3], 31 \n\t"
184  "subu %[t4], $zero, %[qc1] \n\t"
185  "subu %[t5], $zero, %[qc2] \n\t"
186  "subu %[t6], $zero, %[qc3] \n\t"
187  "subu %[t7], $zero, %[qc4] \n\t"
188  "movn %[qc1], %[t4], %[t0] \n\t"
189  "movn %[qc2], %[t5], %[t1] \n\t"
190  "movn %[qc3], %[t6], %[t2] \n\t"
191  "movn %[qc4], %[t7], %[t3] \n\t"
192 
193  ".set pop \n\t"
194 
195  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
196  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
197  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
198  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
199  : [in_int]"r"(in_int)
200  : "memory"
201  );
202 
203  curidx = qc1;
204  curidx *= 3;
205  curidx += qc2;
206  curidx *= 3;
207  curidx += qc3;
208  curidx *= 3;
209  curidx += qc4;
210  curidx += 40;
211 
212  put_bits(pb, p_bits[curidx], p_codes[curidx]);
213 
214  if (out || energy) {
215  float e1,e2,e3,e4;
216  vec = &p_vec[curidx*4];
217  e1 = vec[0] * IQ;
218  e2 = vec[1] * IQ;
219  e3 = vec[2] * IQ;
220  e4 = vec[3] * IQ;
221  if (out) {
222  out[i+0] = e1;
223  out[i+1] = e2;
224  out[i+2] = e3;
225  out[i+3] = e4;
226  }
227  if (energy)
228  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
229  }
230  }
231  if (energy)
232  *energy = qenergy;
233 }
234 
235 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
236  PutBitContext *pb, const float *in, float *out,
237  const float *scaled, int size, int scale_idx,
238  int cb, const float lambda, const float uplim,
239  int *bits, float *energy, const float ROUNDING)
240 {
241  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
242  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
243  int i;
244  int qc1, qc2, qc3, qc4;
245  float qenergy = 0.0f;
246 
247  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
248  uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
249  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
250 
251  abs_pow34_v(s->scoefs, in, size);
252  scaled = s->scoefs;
253  for (i = 0; i < size; i += 4) {
254  int curidx, sign, count;
255  int *in_int = (int *)&in[i];
256  uint8_t v_bits;
257  unsigned int v_codes;
258  int t0, t1, t2, t3, t4;
259  const float *vec;
260 
261  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
262  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
263  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
264  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
265 
266  __asm__ volatile (
267  ".set push \n\t"
268  ".set noreorder \n\t"
269 
270  "ori %[t4], $zero, 2 \n\t"
271  "ori %[sign], $zero, 0 \n\t"
272  "slt %[t0], %[t4], %[qc1] \n\t"
273  "slt %[t1], %[t4], %[qc2] \n\t"
274  "slt %[t2], %[t4], %[qc3] \n\t"
275  "slt %[t3], %[t4], %[qc4] \n\t"
276  "movn %[qc1], %[t4], %[t0] \n\t"
277  "movn %[qc2], %[t4], %[t1] \n\t"
278  "movn %[qc3], %[t4], %[t2] \n\t"
279  "movn %[qc4], %[t4], %[t3] \n\t"
280  "lw %[t0], 0(%[in_int]) \n\t"
281  "lw %[t1], 4(%[in_int]) \n\t"
282  "lw %[t2], 8(%[in_int]) \n\t"
283  "lw %[t3], 12(%[in_int]) \n\t"
284  "slt %[t0], %[t0], $zero \n\t"
285  "movn %[sign], %[t0], %[qc1] \n\t"
286  "slt %[t1], %[t1], $zero \n\t"
287  "slt %[t2], %[t2], $zero \n\t"
288  "slt %[t3], %[t3], $zero \n\t"
289  "sll %[t0], %[sign], 1 \n\t"
290  "or %[t0], %[t0], %[t1] \n\t"
291  "movn %[sign], %[t0], %[qc2] \n\t"
292  "slt %[t4], $zero, %[qc1] \n\t"
293  "slt %[t1], $zero, %[qc2] \n\t"
294  "slt %[count], $zero, %[qc3] \n\t"
295  "sll %[t0], %[sign], 1 \n\t"
296  "or %[t0], %[t0], %[t2] \n\t"
297  "movn %[sign], %[t0], %[qc3] \n\t"
298  "slt %[t2], $zero, %[qc4] \n\t"
299  "addu %[count], %[count], %[t4] \n\t"
300  "addu %[count], %[count], %[t1] \n\t"
301  "sll %[t0], %[sign], 1 \n\t"
302  "or %[t0], %[t0], %[t3] \n\t"
303  "movn %[sign], %[t0], %[qc4] \n\t"
304  "addu %[count], %[count], %[t2] \n\t"
305 
306  ".set pop \n\t"
307 
308  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
309  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
310  [sign]"=&r"(sign), [count]"=&r"(count),
311  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
312  [t4]"=&r"(t4)
313  : [in_int]"r"(in_int)
314  : "memory"
315  );
316 
317  curidx = qc1;
318  curidx *= 3;
319  curidx += qc2;
320  curidx *= 3;
321  curidx += qc3;
322  curidx *= 3;
323  curidx += qc4;
324 
325  v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
326  v_bits = p_bits[curidx] + count;
327  put_bits(pb, v_bits, v_codes);
328 
329  if (out || energy) {
330  float e1,e2,e3,e4;
331  vec = &p_vec[curidx*4];
332  e1 = copysignf(vec[0] * IQ, in[i+0]);
333  e2 = copysignf(vec[1] * IQ, in[i+1]);
334  e3 = copysignf(vec[2] * IQ, in[i+2]);
335  e4 = copysignf(vec[3] * IQ, in[i+3]);
336  if (out) {
337  out[i+0] = e1;
338  out[i+1] = e2;
339  out[i+2] = e3;
340  out[i+3] = e4;
341  }
342  if (energy)
343  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
344  }
345  }
346  if (energy)
347  *energy = qenergy;
348 }
349 
350 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
351  PutBitContext *pb, const float *in, float *out,
352  const float *scaled, int size, int scale_idx,
353  int cb, const float lambda, const float uplim,
354  int *bits, float *energy, const float ROUNDING)
355 {
356  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
357  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
358  int i;
359  int qc1, qc2, qc3, qc4;
360  float qenergy = 0.0f;
361 
362  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
363  uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
364  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
365 
366  abs_pow34_v(s->scoefs, in, size);
367  scaled = s->scoefs;
368  for (i = 0; i < size; i += 4) {
369  int curidx, curidx2;
370  int *in_int = (int *)&in[i];
371  uint8_t v_bits;
372  unsigned int v_codes;
373  int t0, t1, t2, t3, t4, t5, t6, t7;
374  const float *vec1, *vec2;
375 
376  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
377  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
378  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
379  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
380 
381  __asm__ volatile (
382  ".set push \n\t"
383  ".set noreorder \n\t"
384 
385  "ori %[t4], $zero, 4 \n\t"
386  "slt %[t0], %[t4], %[qc1] \n\t"
387  "slt %[t1], %[t4], %[qc2] \n\t"
388  "slt %[t2], %[t4], %[qc3] \n\t"
389  "slt %[t3], %[t4], %[qc4] \n\t"
390  "movn %[qc1], %[t4], %[t0] \n\t"
391  "movn %[qc2], %[t4], %[t1] \n\t"
392  "movn %[qc3], %[t4], %[t2] \n\t"
393  "movn %[qc4], %[t4], %[t3] \n\t"
394  "lw %[t0], 0(%[in_int]) \n\t"
395  "lw %[t1], 4(%[in_int]) \n\t"
396  "lw %[t2], 8(%[in_int]) \n\t"
397  "lw %[t3], 12(%[in_int]) \n\t"
398  "srl %[t0], %[t0], 31 \n\t"
399  "srl %[t1], %[t1], 31 \n\t"
400  "srl %[t2], %[t2], 31 \n\t"
401  "srl %[t3], %[t3], 31 \n\t"
402  "subu %[t4], $zero, %[qc1] \n\t"
403  "subu %[t5], $zero, %[qc2] \n\t"
404  "subu %[t6], $zero, %[qc3] \n\t"
405  "subu %[t7], $zero, %[qc4] \n\t"
406  "movn %[qc1], %[t4], %[t0] \n\t"
407  "movn %[qc2], %[t5], %[t1] \n\t"
408  "movn %[qc3], %[t6], %[t2] \n\t"
409  "movn %[qc4], %[t7], %[t3] \n\t"
410 
411  ".set pop \n\t"
412 
413  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
414  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
415  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
416  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
417  : [in_int]"r"(in_int)
418  : "memory"
419  );
420 
421  curidx = 9 * qc1;
422  curidx += qc2 + 40;
423 
424  curidx2 = 9 * qc3;
425  curidx2 += qc4 + 40;
426 
427  v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
428  v_bits = p_bits[curidx] + p_bits[curidx2];
429  put_bits(pb, v_bits, v_codes);
430 
431  if (out || energy) {
432  float e1,e2,e3,e4;
433  vec1 = &p_vec[curidx*2 ];
434  vec2 = &p_vec[curidx2*2];
435  e1 = vec1[0] * IQ;
436  e2 = vec1[1] * IQ;
437  e3 = vec2[0] * IQ;
438  e4 = vec2[1] * IQ;
439  if (out) {
440  out[i+0] = e1;
441  out[i+1] = e2;
442  out[i+2] = e3;
443  out[i+3] = e4;
444  }
445  if (energy)
446  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
447  }
448  }
449  if (energy)
450  *energy = qenergy;
451 }
452 
453 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
454  PutBitContext *pb, const float *in, float *out,
455  const float *scaled, int size, int scale_idx,
456  int cb, const float lambda, const float uplim,
457  int *bits, float *energy, const float ROUNDING)
458 {
459  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
460  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
461  int i;
462  int qc1, qc2, qc3, qc4;
463  float qenergy = 0.0f;
464 
465  uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
466  uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
467  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
468 
469  abs_pow34_v(s->scoefs, in, size);
470  scaled = s->scoefs;
471  for (i = 0; i < size; i += 4) {
472  int curidx1, curidx2, sign1, count1, sign2, count2;
473  int *in_int = (int *)&in[i];
474  uint8_t v_bits;
475  unsigned int v_codes;
476  int t0, t1, t2, t3, t4;
477  const float *vec1, *vec2;
478 
479  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
480  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
481  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
482  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
483 
484  __asm__ volatile (
485  ".set push \n\t"
486  ".set noreorder \n\t"
487 
488  "ori %[t4], $zero, 7 \n\t"
489  "ori %[sign1], $zero, 0 \n\t"
490  "ori %[sign2], $zero, 0 \n\t"
491  "slt %[t0], %[t4], %[qc1] \n\t"
492  "slt %[t1], %[t4], %[qc2] \n\t"
493  "slt %[t2], %[t4], %[qc3] \n\t"
494  "slt %[t3], %[t4], %[qc4] \n\t"
495  "movn %[qc1], %[t4], %[t0] \n\t"
496  "movn %[qc2], %[t4], %[t1] \n\t"
497  "movn %[qc3], %[t4], %[t2] \n\t"
498  "movn %[qc4], %[t4], %[t3] \n\t"
499  "lw %[t0], 0(%[in_int]) \n\t"
500  "lw %[t1], 4(%[in_int]) \n\t"
501  "lw %[t2], 8(%[in_int]) \n\t"
502  "lw %[t3], 12(%[in_int]) \n\t"
503  "slt %[t0], %[t0], $zero \n\t"
504  "movn %[sign1], %[t0], %[qc1] \n\t"
505  "slt %[t2], %[t2], $zero \n\t"
506  "movn %[sign2], %[t2], %[qc3] \n\t"
507  "slt %[t1], %[t1], $zero \n\t"
508  "sll %[t0], %[sign1], 1 \n\t"
509  "or %[t0], %[t0], %[t1] \n\t"
510  "movn %[sign1], %[t0], %[qc2] \n\t"
511  "slt %[t3], %[t3], $zero \n\t"
512  "sll %[t0], %[sign2], 1 \n\t"
513  "or %[t0], %[t0], %[t3] \n\t"
514  "movn %[sign2], %[t0], %[qc4] \n\t"
515  "slt %[count1], $zero, %[qc1] \n\t"
516  "slt %[t1], $zero, %[qc2] \n\t"
517  "slt %[count2], $zero, %[qc3] \n\t"
518  "slt %[t2], $zero, %[qc4] \n\t"
519  "addu %[count1], %[count1], %[t1] \n\t"
520  "addu %[count2], %[count2], %[t2] \n\t"
521 
522  ".set pop \n\t"
523 
524  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
525  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
526  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
527  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
528  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
529  [t4]"=&r"(t4)
530  : [in_int]"r"(in_int)
531  : "t0", "t1", "t2", "t3", "t4",
532  "memory"
533  );
534 
535  curidx1 = 8 * qc1;
536  curidx1 += qc2;
537 
538  v_codes = (p_codes[curidx1] << count1) | sign1;
539  v_bits = p_bits[curidx1] + count1;
540  put_bits(pb, v_bits, v_codes);
541 
542  curidx2 = 8 * qc3;
543  curidx2 += qc4;
544 
545  v_codes = (p_codes[curidx2] << count2) | sign2;
546  v_bits = p_bits[curidx2] + count2;
547  put_bits(pb, v_bits, v_codes);
548 
549  if (out || energy) {
550  float e1,e2,e3,e4;
551  vec1 = &p_vec[curidx1*2];
552  vec2 = &p_vec[curidx2*2];
553  e1 = copysignf(vec1[0] * IQ, in[i+0]);
554  e2 = copysignf(vec1[1] * IQ, in[i+1]);
555  e3 = copysignf(vec2[0] * IQ, in[i+2]);
556  e4 = copysignf(vec2[1] * IQ, in[i+3]);
557  if (out) {
558  out[i+0] = e1;
559  out[i+1] = e2;
560  out[i+2] = e3;
561  out[i+3] = e4;
562  }
563  if (energy)
564  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
565  }
566  }
567  if (energy)
568  *energy = qenergy;
569 }
570 
571 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
572  PutBitContext *pb, const float *in, float *out,
573  const float *scaled, int size, int scale_idx,
574  int cb, const float lambda, const float uplim,
575  int *bits, float *energy, const float ROUNDING)
576 {
577  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
578  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
579  int i;
580  int qc1, qc2, qc3, qc4;
581  float qenergy = 0.0f;
582 
583  uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
584  uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
585  float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
586 
587  abs_pow34_v(s->scoefs, in, size);
588  scaled = s->scoefs;
589  for (i = 0; i < size; i += 4) {
590  int curidx1, curidx2, sign1, count1, sign2, count2;
591  int *in_int = (int *)&in[i];
592  uint8_t v_bits;
593  unsigned int v_codes;
594  int t0, t1, t2, t3, t4;
595  const float *vec1, *vec2;
596 
597  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
598  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
599  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
600  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
601 
602  __asm__ volatile (
603  ".set push \n\t"
604  ".set noreorder \n\t"
605 
606  "ori %[t4], $zero, 12 \n\t"
607  "ori %[sign1], $zero, 0 \n\t"
608  "ori %[sign2], $zero, 0 \n\t"
609  "slt %[t0], %[t4], %[qc1] \n\t"
610  "slt %[t1], %[t4], %[qc2] \n\t"
611  "slt %[t2], %[t4], %[qc3] \n\t"
612  "slt %[t3], %[t4], %[qc4] \n\t"
613  "movn %[qc1], %[t4], %[t0] \n\t"
614  "movn %[qc2], %[t4], %[t1] \n\t"
615  "movn %[qc3], %[t4], %[t2] \n\t"
616  "movn %[qc4], %[t4], %[t3] \n\t"
617  "lw %[t0], 0(%[in_int]) \n\t"
618  "lw %[t1], 4(%[in_int]) \n\t"
619  "lw %[t2], 8(%[in_int]) \n\t"
620  "lw %[t3], 12(%[in_int]) \n\t"
621  "slt %[t0], %[t0], $zero \n\t"
622  "movn %[sign1], %[t0], %[qc1] \n\t"
623  "slt %[t2], %[t2], $zero \n\t"
624  "movn %[sign2], %[t2], %[qc3] \n\t"
625  "slt %[t1], %[t1], $zero \n\t"
626  "sll %[t0], %[sign1], 1 \n\t"
627  "or %[t0], %[t0], %[t1] \n\t"
628  "movn %[sign1], %[t0], %[qc2] \n\t"
629  "slt %[t3], %[t3], $zero \n\t"
630  "sll %[t0], %[sign2], 1 \n\t"
631  "or %[t0], %[t0], %[t3] \n\t"
632  "movn %[sign2], %[t0], %[qc4] \n\t"
633  "slt %[count1], $zero, %[qc1] \n\t"
634  "slt %[t1], $zero, %[qc2] \n\t"
635  "slt %[count2], $zero, %[qc3] \n\t"
636  "slt %[t2], $zero, %[qc4] \n\t"
637  "addu %[count1], %[count1], %[t1] \n\t"
638  "addu %[count2], %[count2], %[t2] \n\t"
639 
640  ".set pop \n\t"
641 
642  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
643  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
644  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
645  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
646  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
647  [t4]"=&r"(t4)
648  : [in_int]"r"(in_int)
649  : "memory"
650  );
651 
652  curidx1 = 13 * qc1;
653  curidx1 += qc2;
654 
655  v_codes = (p_codes[curidx1] << count1) | sign1;
656  v_bits = p_bits[curidx1] + count1;
657  put_bits(pb, v_bits, v_codes);
658 
659  curidx2 = 13 * qc3;
660  curidx2 += qc4;
661 
662  v_codes = (p_codes[curidx2] << count2) | sign2;
663  v_bits = p_bits[curidx2] + count2;
664  put_bits(pb, v_bits, v_codes);
665 
666  if (out || energy) {
667  float e1,e2,e3,e4;
668  vec1 = &p_vec[curidx1*2];
669  vec2 = &p_vec[curidx2*2];
670  e1 = copysignf(vec1[0] * IQ, in[i+0]);
671  e2 = copysignf(vec1[1] * IQ, in[i+1]);
672  e3 = copysignf(vec2[0] * IQ, in[i+2]);
673  e4 = copysignf(vec2[1] * IQ, in[i+3]);
674  if (out) {
675  out[i+0] = e1;
676  out[i+1] = e2;
677  out[i+2] = e3;
678  out[i+3] = e4;
679  }
680  if (energy)
681  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
682  }
683  }
684  if (energy)
685  *energy = qenergy;
686 }
687 
688 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
689  PutBitContext *pb, const float *in, float *out,
690  const float *scaled, int size, int scale_idx,
691  int cb, const float lambda, const float uplim,
692  int *bits, float *energy, const float ROUNDING)
693 {
694  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
695  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
696  int i;
697  int qc1, qc2, qc3, qc4;
698  float qenergy = 0.0f;
699 
700  uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
701  uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
702  float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
703 
704  abs_pow34_v(s->scoefs, in, size);
705  scaled = s->scoefs;
706 
707  if (cb < 11) {
708  for (i = 0; i < size; i += 4) {
709  int curidx, curidx2, sign1, count1, sign2, count2;
710  int *in_int = (int *)&in[i];
711  uint8_t v_bits;
712  unsigned int v_codes;
713  int t0, t1, t2, t3, t4;
714  const float *vec1, *vec2;
715 
716  qc1 = scaled[i ] * Q34 + ROUNDING;
717  qc2 = scaled[i+1] * Q34 + ROUNDING;
718  qc3 = scaled[i+2] * Q34 + ROUNDING;
719  qc4 = scaled[i+3] * Q34 + ROUNDING;
720 
721  __asm__ volatile (
722  ".set push \n\t"
723  ".set noreorder \n\t"
724 
725  "ori %[t4], $zero, 16 \n\t"
726  "ori %[sign1], $zero, 0 \n\t"
727  "ori %[sign2], $zero, 0 \n\t"
728  "slt %[t0], %[t4], %[qc1] \n\t"
729  "slt %[t1], %[t4], %[qc2] \n\t"
730  "slt %[t2], %[t4], %[qc3] \n\t"
731  "slt %[t3], %[t4], %[qc4] \n\t"
732  "movn %[qc1], %[t4], %[t0] \n\t"
733  "movn %[qc2], %[t4], %[t1] \n\t"
734  "movn %[qc3], %[t4], %[t2] \n\t"
735  "movn %[qc4], %[t4], %[t3] \n\t"
736  "lw %[t0], 0(%[in_int]) \n\t"
737  "lw %[t1], 4(%[in_int]) \n\t"
738  "lw %[t2], 8(%[in_int]) \n\t"
739  "lw %[t3], 12(%[in_int]) \n\t"
740  "slt %[t0], %[t0], $zero \n\t"
741  "movn %[sign1], %[t0], %[qc1] \n\t"
742  "slt %[t2], %[t2], $zero \n\t"
743  "movn %[sign2], %[t2], %[qc3] \n\t"
744  "slt %[t1], %[t1], $zero \n\t"
745  "sll %[t0], %[sign1], 1 \n\t"
746  "or %[t0], %[t0], %[t1] \n\t"
747  "movn %[sign1], %[t0], %[qc2] \n\t"
748  "slt %[t3], %[t3], $zero \n\t"
749  "sll %[t0], %[sign2], 1 \n\t"
750  "or %[t0], %[t0], %[t3] \n\t"
751  "movn %[sign2], %[t0], %[qc4] \n\t"
752  "slt %[count1], $zero, %[qc1] \n\t"
753  "slt %[t1], $zero, %[qc2] \n\t"
754  "slt %[count2], $zero, %[qc3] \n\t"
755  "slt %[t2], $zero, %[qc4] \n\t"
756  "addu %[count1], %[count1], %[t1] \n\t"
757  "addu %[count2], %[count2], %[t2] \n\t"
758 
759  ".set pop \n\t"
760 
761  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
762  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
763  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
764  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
765  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
766  [t4]"=&r"(t4)
767  : [in_int]"r"(in_int)
768  : "memory"
769  );
770 
771  curidx = 17 * qc1;
772  curidx += qc2;
773  curidx2 = 17 * qc3;
774  curidx2 += qc4;
775 
776  v_codes = (p_codes[curidx] << count1) | sign1;
777  v_bits = p_bits[curidx] + count1;
778  put_bits(pb, v_bits, v_codes);
779 
780  v_codes = (p_codes[curidx2] << count2) | sign2;
781  v_bits = p_bits[curidx2] + count2;
782  put_bits(pb, v_bits, v_codes);
783 
784  if (out || energy) {
785  float e1,e2,e3,e4;
786  vec1 = &p_vectors[curidx*2 ];
787  vec2 = &p_vectors[curidx2*2];
788  e1 = copysignf(vec1[0] * IQ, in[i+0]);
789  e2 = copysignf(vec1[1] * IQ, in[i+1]);
790  e3 = copysignf(vec2[0] * IQ, in[i+2]);
791  e4 = copysignf(vec2[1] * IQ, in[i+3]);
792  if (out) {
793  out[i+0] = e1;
794  out[i+1] = e2;
795  out[i+2] = e3;
796  out[i+3] = e4;
797  }
798  if (energy)
799  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
800  }
801  }
802  } else {
803  for (i = 0; i < size; i += 4) {
804  int curidx, curidx2, sign1, count1, sign2, count2;
805  int *in_int = (int *)&in[i];
806  uint8_t v_bits;
807  unsigned int v_codes;
808  int c1, c2, c3, c4;
809  int t0, t1, t2, t3, t4;
810 
811  qc1 = scaled[i ] * Q34 + ROUNDING;
812  qc2 = scaled[i+1] * Q34 + ROUNDING;
813  qc3 = scaled[i+2] * Q34 + ROUNDING;
814  qc4 = scaled[i+3] * Q34 + ROUNDING;
815 
816  __asm__ volatile (
817  ".set push \n\t"
818  ".set noreorder \n\t"
819 
820  "ori %[t4], $zero, 16 \n\t"
821  "ori %[sign1], $zero, 0 \n\t"
822  "ori %[sign2], $zero, 0 \n\t"
823  "shll_s.w %[c1], %[qc1], 18 \n\t"
824  "shll_s.w %[c2], %[qc2], 18 \n\t"
825  "shll_s.w %[c3], %[qc3], 18 \n\t"
826  "shll_s.w %[c4], %[qc4], 18 \n\t"
827  "srl %[c1], %[c1], 18 \n\t"
828  "srl %[c2], %[c2], 18 \n\t"
829  "srl %[c3], %[c3], 18 \n\t"
830  "srl %[c4], %[c4], 18 \n\t"
831  "slt %[t0], %[t4], %[qc1] \n\t"
832  "slt %[t1], %[t4], %[qc2] \n\t"
833  "slt %[t2], %[t4], %[qc3] \n\t"
834  "slt %[t3], %[t4], %[qc4] \n\t"
835  "movn %[qc1], %[t4], %[t0] \n\t"
836  "movn %[qc2], %[t4], %[t1] \n\t"
837  "movn %[qc3], %[t4], %[t2] \n\t"
838  "movn %[qc4], %[t4], %[t3] \n\t"
839  "lw %[t0], 0(%[in_int]) \n\t"
840  "lw %[t1], 4(%[in_int]) \n\t"
841  "lw %[t2], 8(%[in_int]) \n\t"
842  "lw %[t3], 12(%[in_int]) \n\t"
843  "slt %[t0], %[t0], $zero \n\t"
844  "movn %[sign1], %[t0], %[qc1] \n\t"
845  "slt %[t2], %[t2], $zero \n\t"
846  "movn %[sign2], %[t2], %[qc3] \n\t"
847  "slt %[t1], %[t1], $zero \n\t"
848  "sll %[t0], %[sign1], 1 \n\t"
849  "or %[t0], %[t0], %[t1] \n\t"
850  "movn %[sign1], %[t0], %[qc2] \n\t"
851  "slt %[t3], %[t3], $zero \n\t"
852  "sll %[t0], %[sign2], 1 \n\t"
853  "or %[t0], %[t0], %[t3] \n\t"
854  "movn %[sign2], %[t0], %[qc4] \n\t"
855  "slt %[count1], $zero, %[qc1] \n\t"
856  "slt %[t1], $zero, %[qc2] \n\t"
857  "slt %[count2], $zero, %[qc3] \n\t"
858  "slt %[t2], $zero, %[qc4] \n\t"
859  "addu %[count1], %[count1], %[t1] \n\t"
860  "addu %[count2], %[count2], %[t2] \n\t"
861 
862  ".set pop \n\t"
863 
864  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
865  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
866  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
867  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
868  [c1]"=&r"(c1), [c2]"=&r"(c2),
869  [c3]"=&r"(c3), [c4]"=&r"(c4),
870  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
871  [t4]"=&r"(t4)
872  : [in_int]"r"(in_int)
873  : "memory"
874  );
875 
876  curidx = 17 * qc1;
877  curidx += qc2;
878 
879  curidx2 = 17 * qc3;
880  curidx2 += qc4;
881 
882  v_codes = (p_codes[curidx] << count1) | sign1;
883  v_bits = p_bits[curidx] + count1;
884  put_bits(pb, v_bits, v_codes);
885 
886  if (p_vectors[curidx*2 ] == 64.0f) {
887  int len = av_log2(c1);
888  v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
889  put_bits(pb, len * 2 - 3, v_codes);
890  }
891  if (p_vectors[curidx*2+1] == 64.0f) {
892  int len = av_log2(c2);
893  v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
894  put_bits(pb, len*2-3, v_codes);
895  }
896 
897  v_codes = (p_codes[curidx2] << count2) | sign2;
898  v_bits = p_bits[curidx2] + count2;
899  put_bits(pb, v_bits, v_codes);
900 
901  if (p_vectors[curidx2*2 ] == 64.0f) {
902  int len = av_log2(c3);
903  v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
904  put_bits(pb, len* 2 - 3, v_codes);
905  }
906  if (p_vectors[curidx2*2+1] == 64.0f) {
907  int len = av_log2(c4);
908  v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
909  put_bits(pb, len * 2 - 3, v_codes);
910  }
911 
912  if (out || energy) {
913  float e1, e2, e3, e4;
914  e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
915  e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
916  e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
917  e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
918  if (out) {
919  out[i+0] = e1;
920  out[i+1] = e2;
921  out[i+2] = e3;
922  out[i+3] = e4;
923  }
924  if (energy)
925  qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
926  }
927  }
928  }
929  if (energy)
930  *energy = qenergy;
931 }
932 
933 static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
934  PutBitContext *pb, const float *in, float *out,
935  const float *scaled, int size, int scale_idx,
936  int cb, const float lambda, const float uplim,
937  int *bits, float *energy, const float ROUNDING) {
938  av_assert0(0);
939 }
940 
941 static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
942  PutBitContext *pb, const float *in, float *out,
943  const float *scaled, int size, int scale_idx,
944  int cb, const float lambda, const float uplim,
945  int *bits, float *energy, const float ROUNDING) {
946  int i;
947  if (bits)
948  *bits = 0;
949  if (out) {
950  for (i = 0; i < size; i += 4) {
951  out[i ] = 0.0f;
952  out[i+1] = 0.0f;
953  out[i+2] = 0.0f;
954  out[i+3] = 0.0f;
955  }
956  }
957  if (energy)
958  *energy = 0.0f;
959 }
960 
961 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
962  PutBitContext *pb, const float *in, float *out,
963  const float *scaled, int size, int scale_idx,
964  int cb, const float lambda, const float uplim,
965  int *bits, float *energy, const float ROUNDING) = {
966  quantize_and_encode_band_cost_ZERO_mips,
967  quantize_and_encode_band_cost_SQUAD_mips,
968  quantize_and_encode_band_cost_SQUAD_mips,
969  quantize_and_encode_band_cost_UQUAD_mips,
970  quantize_and_encode_band_cost_UQUAD_mips,
971  quantize_and_encode_band_cost_SPAIR_mips,
972  quantize_and_encode_band_cost_SPAIR_mips,
973  quantize_and_encode_band_cost_UPAIR7_mips,
974  quantize_and_encode_band_cost_UPAIR7_mips,
975  quantize_and_encode_band_cost_UPAIR12_mips,
976  quantize_and_encode_band_cost_UPAIR12_mips,
977  quantize_and_encode_band_cost_ESC_mips,
978  quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
979  quantize_and_encode_band_cost_ZERO_mips,
980  quantize_and_encode_band_cost_ZERO_mips,
981  quantize_and_encode_band_cost_ZERO_mips,
982 };
983 
984 #define quantize_and_encode_band_cost( \
985  s, pb, in, out, scaled, size, scale_idx, cb, \
986  lambda, uplim, bits, energy, ROUNDING) \
987  quantize_and_encode_band_cost_arr[cb]( \
988  s, pb, in, out, scaled, size, scale_idx, cb, \
989  lambda, uplim, bits, energy, ROUNDING)
990 
991 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
992  const float *in, float *out, int size, int scale_idx,
993  int cb, const float lambda, int rtz)
994 {
995  quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
997 }
998 
999 /**
1000  * Functions developed from template function and optimized for getting the number of bits
1001  */
1002 static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
1003  PutBitContext *pb, const float *in,
1004  const float *scaled, int size, int scale_idx,
1005  int cb, const float lambda, const float uplim,
1006  int *bits)
1007 {
1008  return 0;
1009 }
1010 
1011 static float get_band_numbits_NONE_mips(struct AACEncContext *s,
1012  PutBitContext *pb, const float *in,
1013  const float *scaled, int size, int scale_idx,
1014  int cb, const float lambda, const float uplim,
1015  int *bits)
1016 {
1017  av_assert0(0);
1018  return 0;
1019 }
1020 
1021 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
1022  PutBitContext *pb, const float *in,
1023  const float *scaled, int size, int scale_idx,
1024  int cb, const float lambda, const float uplim,
1025  int *bits)
1026 {
1027  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1028  int i;
1029  int qc1, qc2, qc3, qc4;
1030  int curbits = 0;
1031 
1032  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1033 
1034  for (i = 0; i < size; i += 4) {
1035  int curidx;
1036  int *in_int = (int *)&in[i];
1037  int t0, t1, t2, t3, t4, t5, t6, t7;
1038 
1039  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1040  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1041  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1042  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1043 
1044  __asm__ volatile (
1045  ".set push \n\t"
1046  ".set noreorder \n\t"
1047 
1048  "slt %[qc1], $zero, %[qc1] \n\t"
1049  "slt %[qc2], $zero, %[qc2] \n\t"
1050  "slt %[qc3], $zero, %[qc3] \n\t"
1051  "slt %[qc4], $zero, %[qc4] \n\t"
1052  "lw %[t0], 0(%[in_int]) \n\t"
1053  "lw %[t1], 4(%[in_int]) \n\t"
1054  "lw %[t2], 8(%[in_int]) \n\t"
1055  "lw %[t3], 12(%[in_int]) \n\t"
1056  "srl %[t0], %[t0], 31 \n\t"
1057  "srl %[t1], %[t1], 31 \n\t"
1058  "srl %[t2], %[t2], 31 \n\t"
1059  "srl %[t3], %[t3], 31 \n\t"
1060  "subu %[t4], $zero, %[qc1] \n\t"
1061  "subu %[t5], $zero, %[qc2] \n\t"
1062  "subu %[t6], $zero, %[qc3] \n\t"
1063  "subu %[t7], $zero, %[qc4] \n\t"
1064  "movn %[qc1], %[t4], %[t0] \n\t"
1065  "movn %[qc2], %[t5], %[t1] \n\t"
1066  "movn %[qc3], %[t6], %[t2] \n\t"
1067  "movn %[qc4], %[t7], %[t3] \n\t"
1068 
1069  ".set pop \n\t"
1070 
1071  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1072  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1073  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1074  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1075  : [in_int]"r"(in_int)
1076  : "memory"
1077  );
1078 
1079  curidx = qc1;
1080  curidx *= 3;
1081  curidx += qc2;
1082  curidx *= 3;
1083  curidx += qc3;
1084  curidx *= 3;
1085  curidx += qc4;
1086  curidx += 40;
1087 
1088  curbits += p_bits[curidx];
1089  }
1090  return curbits;
1091 }
1092 
1093 static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
1094  PutBitContext *pb, const float *in,
1095  const float *scaled, int size, int scale_idx,
1096  int cb, const float lambda, const float uplim,
1097  int *bits)
1098 {
1099  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1100  int i;
1101  int curbits = 0;
1102  int qc1, qc2, qc3, qc4;
1103 
1104  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1105 
1106  for (i = 0; i < size; i += 4) {
1107  int curidx;
1108  int t0, t1, t2, t3, t4;
1109 
1110  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1111  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1112  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1113  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1114 
1115  __asm__ volatile (
1116  ".set push \n\t"
1117  ".set noreorder \n\t"
1118 
1119  "ori %[t4], $zero, 2 \n\t"
1120  "slt %[t0], %[t4], %[qc1] \n\t"
1121  "slt %[t1], %[t4], %[qc2] \n\t"
1122  "slt %[t2], %[t4], %[qc3] \n\t"
1123  "slt %[t3], %[t4], %[qc4] \n\t"
1124  "movn %[qc1], %[t4], %[t0] \n\t"
1125  "movn %[qc2], %[t4], %[t1] \n\t"
1126  "movn %[qc3], %[t4], %[t2] \n\t"
1127  "movn %[qc4], %[t4], %[t3] \n\t"
1128 
1129  ".set pop \n\t"
1130 
1131  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1132  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1133  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1134  [t4]"=&r"(t4)
1135  );
1136 
1137  curidx = qc1;
1138  curidx *= 3;
1139  curidx += qc2;
1140  curidx *= 3;
1141  curidx += qc3;
1142  curidx *= 3;
1143  curidx += qc4;
1144 
1145  curbits += p_bits[curidx];
1146  curbits += uquad_sign_bits[curidx];
1147  }
1148  return curbits;
1149 }
1150 
1151 static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
1152  PutBitContext *pb, const float *in,
1153  const float *scaled, int size, int scale_idx,
1154  int cb, const float lambda, const float uplim,
1155  int *bits)
1156 {
1157  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1158  int i;
1159  int qc1, qc2, qc3, qc4;
1160  int curbits = 0;
1161 
1162  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1163 
1164  for (i = 0; i < size; i += 4) {
1165  int curidx, curidx2;
1166  int *in_int = (int *)&in[i];
1167  int t0, t1, t2, t3, t4, t5, t6, t7;
1168 
1169  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1170  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1171  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1172  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1173 
1174  __asm__ volatile (
1175  ".set push \n\t"
1176  ".set noreorder \n\t"
1177 
1178  "ori %[t4], $zero, 4 \n\t"
1179  "slt %[t0], %[t4], %[qc1] \n\t"
1180  "slt %[t1], %[t4], %[qc2] \n\t"
1181  "slt %[t2], %[t4], %[qc3] \n\t"
1182  "slt %[t3], %[t4], %[qc4] \n\t"
1183  "movn %[qc1], %[t4], %[t0] \n\t"
1184  "movn %[qc2], %[t4], %[t1] \n\t"
1185  "movn %[qc3], %[t4], %[t2] \n\t"
1186  "movn %[qc4], %[t4], %[t3] \n\t"
1187  "lw %[t0], 0(%[in_int]) \n\t"
1188  "lw %[t1], 4(%[in_int]) \n\t"
1189  "lw %[t2], 8(%[in_int]) \n\t"
1190  "lw %[t3], 12(%[in_int]) \n\t"
1191  "srl %[t0], %[t0], 31 \n\t"
1192  "srl %[t1], %[t1], 31 \n\t"
1193  "srl %[t2], %[t2], 31 \n\t"
1194  "srl %[t3], %[t3], 31 \n\t"
1195  "subu %[t4], $zero, %[qc1] \n\t"
1196  "subu %[t5], $zero, %[qc2] \n\t"
1197  "subu %[t6], $zero, %[qc3] \n\t"
1198  "subu %[t7], $zero, %[qc4] \n\t"
1199  "movn %[qc1], %[t4], %[t0] \n\t"
1200  "movn %[qc2], %[t5], %[t1] \n\t"
1201  "movn %[qc3], %[t6], %[t2] \n\t"
1202  "movn %[qc4], %[t7], %[t3] \n\t"
1203 
1204  ".set pop \n\t"
1205 
1206  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1207  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1208  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1209  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1210  : [in_int]"r"(in_int)
1211  : "memory"
1212  );
1213 
1214  curidx = 9 * qc1;
1215  curidx += qc2 + 40;
1216 
1217  curidx2 = 9 * qc3;
1218  curidx2 += qc4 + 40;
1219 
1220  curbits += p_bits[curidx] + p_bits[curidx2];
1221  }
1222  return curbits;
1223 }
1224 
1225 static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
1226  PutBitContext *pb, const float *in,
1227  const float *scaled, int size, int scale_idx,
1228  int cb, const float lambda, const float uplim,
1229  int *bits)
1230 {
1231  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1232  int i;
1233  int qc1, qc2, qc3, qc4;
1234  int curbits = 0;
1235 
1236  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1237 
1238  for (i = 0; i < size; i += 4) {
1239  int curidx, curidx2;
1240  int t0, t1, t2, t3, t4;
1241 
1242  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1243  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1244  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1245  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1246 
1247  __asm__ volatile (
1248  ".set push \n\t"
1249  ".set noreorder \n\t"
1250 
1251  "ori %[t4], $zero, 7 \n\t"
1252  "slt %[t0], %[t4], %[qc1] \n\t"
1253  "slt %[t1], %[t4], %[qc2] \n\t"
1254  "slt %[t2], %[t4], %[qc3] \n\t"
1255  "slt %[t3], %[t4], %[qc4] \n\t"
1256  "movn %[qc1], %[t4], %[t0] \n\t"
1257  "movn %[qc2], %[t4], %[t1] \n\t"
1258  "movn %[qc3], %[t4], %[t2] \n\t"
1259  "movn %[qc4], %[t4], %[t3] \n\t"
1260 
1261  ".set pop \n\t"
1262 
1263  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1264  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1265  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1266  [t4]"=&r"(t4)
1267  );
1268 
1269  curidx = 8 * qc1;
1270  curidx += qc2;
1271 
1272  curidx2 = 8 * qc3;
1273  curidx2 += qc4;
1274 
1275  curbits += p_bits[curidx] +
1276  upair7_sign_bits[curidx] +
1277  p_bits[curidx2] +
1278  upair7_sign_bits[curidx2];
1279  }
1280  return curbits;
1281 }
1282 
1283 static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
1284  PutBitContext *pb, const float *in,
1285  const float *scaled, int size, int scale_idx,
1286  int cb, const float lambda, const float uplim,
1287  int *bits)
1288 {
1289  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1290  int i;
1291  int qc1, qc2, qc3, qc4;
1292  int curbits = 0;
1293 
1294  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1295 
1296  for (i = 0; i < size; i += 4) {
1297  int curidx, curidx2;
1298  int t0, t1, t2, t3, t4;
1299 
1300  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1301  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1302  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1303  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1304 
1305  __asm__ volatile (
1306  ".set push \n\t"
1307  ".set noreorder \n\t"
1308 
1309  "ori %[t4], $zero, 12 \n\t"
1310  "slt %[t0], %[t4], %[qc1] \n\t"
1311  "slt %[t1], %[t4], %[qc2] \n\t"
1312  "slt %[t2], %[t4], %[qc3] \n\t"
1313  "slt %[t3], %[t4], %[qc4] \n\t"
1314  "movn %[qc1], %[t4], %[t0] \n\t"
1315  "movn %[qc2], %[t4], %[t1] \n\t"
1316  "movn %[qc3], %[t4], %[t2] \n\t"
1317  "movn %[qc4], %[t4], %[t3] \n\t"
1318 
1319  ".set pop \n\t"
1320 
1321  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1322  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1323  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1324  [t4]"=&r"(t4)
1325  );
1326 
1327  curidx = 13 * qc1;
1328  curidx += qc2;
1329 
1330  curidx2 = 13 * qc3;
1331  curidx2 += qc4;
1332 
1333  curbits += p_bits[curidx] +
1334  p_bits[curidx2] +
1335  upair12_sign_bits[curidx] +
1336  upair12_sign_bits[curidx2];
1337  }
1338  return curbits;
1339 }
1340 
1341 static float get_band_numbits_ESC_mips(struct AACEncContext *s,
1342  PutBitContext *pb, const float *in,
1343  const float *scaled, int size, int scale_idx,
1344  int cb, const float lambda, const float uplim,
1345  int *bits)
1346 {
1347  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1348  int i;
1349  int qc1, qc2, qc3, qc4;
1350  int curbits = 0;
1351 
1352  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1353 
1354  for (i = 0; i < size; i += 4) {
1355  int curidx, curidx2;
1356  int cond0, cond1, cond2, cond3;
1357  int c1, c2, c3, c4;
1358  int t4, t5;
1359 
1360  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1361  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1362  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1363  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1364 
1365  __asm__ volatile (
1366  ".set push \n\t"
1367  ".set noreorder \n\t"
1368 
1369  "ori %[t4], $zero, 15 \n\t"
1370  "ori %[t5], $zero, 16 \n\t"
1371  "shll_s.w %[c1], %[qc1], 18 \n\t"
1372  "shll_s.w %[c2], %[qc2], 18 \n\t"
1373  "shll_s.w %[c3], %[qc3], 18 \n\t"
1374  "shll_s.w %[c4], %[qc4], 18 \n\t"
1375  "srl %[c1], %[c1], 18 \n\t"
1376  "srl %[c2], %[c2], 18 \n\t"
1377  "srl %[c3], %[c3], 18 \n\t"
1378  "srl %[c4], %[c4], 18 \n\t"
1379  "slt %[cond0], %[t4], %[qc1] \n\t"
1380  "slt %[cond1], %[t4], %[qc2] \n\t"
1381  "slt %[cond2], %[t4], %[qc3] \n\t"
1382  "slt %[cond3], %[t4], %[qc4] \n\t"
1383  "movn %[qc1], %[t5], %[cond0] \n\t"
1384  "movn %[qc2], %[t5], %[cond1] \n\t"
1385  "movn %[qc3], %[t5], %[cond2] \n\t"
1386  "movn %[qc4], %[t5], %[cond3] \n\t"
1387  "ori %[t5], $zero, 31 \n\t"
1388  "clz %[c1], %[c1] \n\t"
1389  "clz %[c2], %[c2] \n\t"
1390  "clz %[c3], %[c3] \n\t"
1391  "clz %[c4], %[c4] \n\t"
1392  "subu %[c1], %[t5], %[c1] \n\t"
1393  "subu %[c2], %[t5], %[c2] \n\t"
1394  "subu %[c3], %[t5], %[c3] \n\t"
1395  "subu %[c4], %[t5], %[c4] \n\t"
1396  "sll %[c1], %[c1], 1 \n\t"
1397  "sll %[c2], %[c2], 1 \n\t"
1398  "sll %[c3], %[c3], 1 \n\t"
1399  "sll %[c4], %[c4], 1 \n\t"
1400  "addiu %[c1], %[c1], -3 \n\t"
1401  "addiu %[c2], %[c2], -3 \n\t"
1402  "addiu %[c3], %[c3], -3 \n\t"
1403  "addiu %[c4], %[c4], -3 \n\t"
1404  "subu %[cond0], $zero, %[cond0] \n\t"
1405  "subu %[cond1], $zero, %[cond1] \n\t"
1406  "subu %[cond2], $zero, %[cond2] \n\t"
1407  "subu %[cond3], $zero, %[cond3] \n\t"
1408  "and %[c1], %[c1], %[cond0] \n\t"
1409  "and %[c2], %[c2], %[cond1] \n\t"
1410  "and %[c3], %[c3], %[cond2] \n\t"
1411  "and %[c4], %[c4], %[cond3] \n\t"
1412 
1413  ".set pop \n\t"
1414 
1415  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1416  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1417  [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
1418  [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
1419  [c1]"=&r"(c1), [c2]"=&r"(c2),
1420  [c3]"=&r"(c3), [c4]"=&r"(c4),
1421  [t4]"=&r"(t4), [t5]"=&r"(t5)
1422  );
1423 
1424  curidx = 17 * qc1;
1425  curidx += qc2;
1426 
1427  curidx2 = 17 * qc3;
1428  curidx2 += qc4;
1429 
1430  curbits += p_bits[curidx];
1431  curbits += esc_sign_bits[curidx];
1432  curbits += p_bits[curidx2];
1433  curbits += esc_sign_bits[curidx2];
1434 
1435  curbits += c1;
1436  curbits += c2;
1437  curbits += c3;
1438  curbits += c4;
1439  }
1440  return curbits;
1441 }
1442 
1443 static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
1444  PutBitContext *pb, const float *in,
1445  const float *scaled, int size, int scale_idx,
1446  int cb, const float lambda, const float uplim,
1447  int *bits) = {
1448  get_band_numbits_ZERO_mips,
1449  get_band_numbits_SQUAD_mips,
1450  get_band_numbits_SQUAD_mips,
1451  get_band_numbits_UQUAD_mips,
1452  get_band_numbits_UQUAD_mips,
1453  get_band_numbits_SPAIR_mips,
1454  get_band_numbits_SPAIR_mips,
1455  get_band_numbits_UPAIR7_mips,
1456  get_band_numbits_UPAIR7_mips,
1457  get_band_numbits_UPAIR12_mips,
1458  get_band_numbits_UPAIR12_mips,
1459  get_band_numbits_ESC_mips,
1460  get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
1461  get_band_numbits_ZERO_mips,
1462  get_band_numbits_ZERO_mips,
1463  get_band_numbits_ZERO_mips,
1464 };
1465 
1466 #define get_band_numbits( \
1467  s, pb, in, scaled, size, scale_idx, cb, \
1468  lambda, uplim, bits) \
1469  get_band_numbits_arr[cb]( \
1470  s, pb, in, scaled, size, scale_idx, cb, \
1471  lambda, uplim, bits)
1472 
1473 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
1474  const float *scaled, int size, int scale_idx,
1475  int cb, const float lambda, const float uplim,
1476  int *bits, float *energy)
1477 {
1478  return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1479 }
1480 
1481 /**
1482  * Functions developed from template function and optimized for getting the band cost
1483  */
1484 #if HAVE_MIPSFPU
1485 static float get_band_cost_ZERO_mips(struct AACEncContext *s,
1486  PutBitContext *pb, const float *in,
1487  const float *scaled, int size, int scale_idx,
1488  int cb, const float lambda, const float uplim,
1489  int *bits, float *energy)
1490 {
1491  int i;
1492  float cost = 0;
1493 
1494  for (i = 0; i < size; i += 4) {
1495  cost += in[i ] * in[i ];
1496  cost += in[i+1] * in[i+1];
1497  cost += in[i+2] * in[i+2];
1498  cost += in[i+3] * in[i+3];
1499  }
1500  if (bits)
1501  *bits = 0;
1502  if (energy)
1503  *energy = 0.0f;
1504  return cost * lambda;
1505 }
1506 
1507 static float get_band_cost_NONE_mips(struct AACEncContext *s,
1508  PutBitContext *pb, const float *in,
1509  const float *scaled, int size, int scale_idx,
1510  int cb, const float lambda, const float uplim,
1511  int *bits, float *energy)
1512 {
1513  av_assert0(0);
1514  return 0;
1515 }
1516 
1517 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
1518  PutBitContext *pb, const float *in,
1519  const float *scaled, int size, int scale_idx,
1520  int cb, const float lambda, const float uplim,
1521  int *bits, float *energy)
1522 {
1523  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1524  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1525  int i;
1526  float cost = 0;
1527  float qenergy = 0.0f;
1528  int qc1, qc2, qc3, qc4;
1529  int curbits = 0;
1530 
1531  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1532  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1533 
1534  for (i = 0; i < size; i += 4) {
1535  const float *vec;
1536  int curidx;
1537  int *in_int = (int *)&in[i];
1538  float *in_pos = (float *)&in[i];
1539  float di0, di1, di2, di3;
1540  int t0, t1, t2, t3, t4, t5, t6, t7;
1541 
1542  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1543  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1544  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1545  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1546 
1547  __asm__ volatile (
1548  ".set push \n\t"
1549  ".set noreorder \n\t"
1550 
1551  "slt %[qc1], $zero, %[qc1] \n\t"
1552  "slt %[qc2], $zero, %[qc2] \n\t"
1553  "slt %[qc3], $zero, %[qc3] \n\t"
1554  "slt %[qc4], $zero, %[qc4] \n\t"
1555  "lw %[t0], 0(%[in_int]) \n\t"
1556  "lw %[t1], 4(%[in_int]) \n\t"
1557  "lw %[t2], 8(%[in_int]) \n\t"
1558  "lw %[t3], 12(%[in_int]) \n\t"
1559  "srl %[t0], %[t0], 31 \n\t"
1560  "srl %[t1], %[t1], 31 \n\t"
1561  "srl %[t2], %[t2], 31 \n\t"
1562  "srl %[t3], %[t3], 31 \n\t"
1563  "subu %[t4], $zero, %[qc1] \n\t"
1564  "subu %[t5], $zero, %[qc2] \n\t"
1565  "subu %[t6], $zero, %[qc3] \n\t"
1566  "subu %[t7], $zero, %[qc4] \n\t"
1567  "movn %[qc1], %[t4], %[t0] \n\t"
1568  "movn %[qc2], %[t5], %[t1] \n\t"
1569  "movn %[qc3], %[t6], %[t2] \n\t"
1570  "movn %[qc4], %[t7], %[t3] \n\t"
1571 
1572  ".set pop \n\t"
1573 
1574  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1575  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1576  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1577  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1578  : [in_int]"r"(in_int)
1579  : "memory"
1580  );
1581 
1582  curidx = qc1;
1583  curidx *= 3;
1584  curidx += qc2;
1585  curidx *= 3;
1586  curidx += qc3;
1587  curidx *= 3;
1588  curidx += qc4;
1589  curidx += 40;
1590 
1591  curbits += p_bits[curidx];
1592  vec = &p_codes[curidx*4];
1593 
1594  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1595  + vec[2]*vec[2] + vec[3]*vec[3];
1596 
1597  __asm__ volatile (
1598  ".set push \n\t"
1599  ".set noreorder \n\t"
1600 
1601  "lwc1 $f0, 0(%[in_pos]) \n\t"
1602  "lwc1 $f1, 0(%[vec]) \n\t"
1603  "lwc1 $f2, 4(%[in_pos]) \n\t"
1604  "lwc1 $f3, 4(%[vec]) \n\t"
1605  "lwc1 $f4, 8(%[in_pos]) \n\t"
1606  "lwc1 $f5, 8(%[vec]) \n\t"
1607  "lwc1 $f6, 12(%[in_pos]) \n\t"
1608  "lwc1 $f7, 12(%[vec]) \n\t"
1609  "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1610  "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1611  "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1612  "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1613 
1614  ".set pop \n\t"
1615 
1616  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1617  [di2]"=&f"(di2), [di3]"=&f"(di3)
1618  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1619  [IQ]"f"(IQ)
1620  : "$f0", "$f1", "$f2", "$f3",
1621  "$f4", "$f5", "$f6", "$f7",
1622  "memory"
1623  );
1624 
1625  cost += di0 * di0 + di1 * di1
1626  + di2 * di2 + di3 * di3;
1627  }
1628 
1629  if (bits)
1630  *bits = curbits;
1631  if (energy)
1632  *energy = qenergy * (IQ*IQ);
1633  return cost * lambda + curbits;
1634 }
1635 
1636 static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
1637  PutBitContext *pb, const float *in,
1638  const float *scaled, int size, int scale_idx,
1639  int cb, const float lambda, const float uplim,
1640  int *bits, float *energy)
1641 {
1642  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1643  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1644  int i;
1645  float cost = 0;
1646  float qenergy = 0.0f;
1647  int curbits = 0;
1648  int qc1, qc2, qc3, qc4;
1649 
1650  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1651  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1652 
1653  for (i = 0; i < size; i += 4) {
1654  const float *vec;
1655  int curidx;
1656  float *in_pos = (float *)&in[i];
1657  float di0, di1, di2, di3;
1658  int t0, t1, t2, t3, t4;
1659 
1660  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1661  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1662  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1663  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1664 
1665  __asm__ volatile (
1666  ".set push \n\t"
1667  ".set noreorder \n\t"
1668 
1669  "ori %[t4], $zero, 2 \n\t"
1670  "slt %[t0], %[t4], %[qc1] \n\t"
1671  "slt %[t1], %[t4], %[qc2] \n\t"
1672  "slt %[t2], %[t4], %[qc3] \n\t"
1673  "slt %[t3], %[t4], %[qc4] \n\t"
1674  "movn %[qc1], %[t4], %[t0] \n\t"
1675  "movn %[qc2], %[t4], %[t1] \n\t"
1676  "movn %[qc3], %[t4], %[t2] \n\t"
1677  "movn %[qc4], %[t4], %[t3] \n\t"
1678 
1679  ".set pop \n\t"
1680 
1681  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1682  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1683  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1684  [t4]"=&r"(t4)
1685  );
1686 
1687  curidx = qc1;
1688  curidx *= 3;
1689  curidx += qc2;
1690  curidx *= 3;
1691  curidx += qc3;
1692  curidx *= 3;
1693  curidx += qc4;
1694 
1695  curbits += p_bits[curidx];
1696  curbits += uquad_sign_bits[curidx];
1697  vec = &p_codes[curidx*4];
1698 
1699  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1700  + vec[2]*vec[2] + vec[3]*vec[3];
1701 
1702  __asm__ volatile (
1703  ".set push \n\t"
1704  ".set noreorder \n\t"
1705 
1706  "lwc1 %[di0], 0(%[in_pos]) \n\t"
1707  "lwc1 %[di1], 4(%[in_pos]) \n\t"
1708  "lwc1 %[di2], 8(%[in_pos]) \n\t"
1709  "lwc1 %[di3], 12(%[in_pos]) \n\t"
1710  "abs.s %[di0], %[di0] \n\t"
1711  "abs.s %[di1], %[di1] \n\t"
1712  "abs.s %[di2], %[di2] \n\t"
1713  "abs.s %[di3], %[di3] \n\t"
1714  "lwc1 $f0, 0(%[vec]) \n\t"
1715  "lwc1 $f1, 4(%[vec]) \n\t"
1716  "lwc1 $f2, 8(%[vec]) \n\t"
1717  "lwc1 $f3, 12(%[vec]) \n\t"
1718  "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1719  "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1720  "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1721  "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1722 
1723  ".set pop \n\t"
1724 
1725  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1726  [di2]"=&f"(di2), [di3]"=&f"(di3)
1727  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1728  [IQ]"f"(IQ)
1729  : "$f0", "$f1", "$f2", "$f3",
1730  "memory"
1731  );
1732 
1733  cost += di0 * di0 + di1 * di1
1734  + di2 * di2 + di3 * di3;
1735  }
1736 
1737  if (bits)
1738  *bits = curbits;
1739  if (energy)
1740  *energy = qenergy * (IQ*IQ);
1741  return cost * lambda + curbits;
1742 }
1743 
1744 static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
1745  PutBitContext *pb, const float *in,
1746  const float *scaled, int size, int scale_idx,
1747  int cb, const float lambda, const float uplim,
1748  int *bits, float *energy)
1749 {
1750  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1751  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1752  int i;
1753  float cost = 0;
1754  float qenergy = 0.0f;
1755  int qc1, qc2, qc3, qc4;
1756  int curbits = 0;
1757 
1758  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1759  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1760 
1761  for (i = 0; i < size; i += 4) {
1762  const float *vec, *vec2;
1763  int curidx, curidx2;
1764  int *in_int = (int *)&in[i];
1765  float *in_pos = (float *)&in[i];
1766  float di0, di1, di2, di3;
1767  int t0, t1, t2, t3, t4, t5, t6, t7;
1768 
1769  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1770  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1771  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1772  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1773 
1774  __asm__ volatile (
1775  ".set push \n\t"
1776  ".set noreorder \n\t"
1777 
1778  "ori %[t4], $zero, 4 \n\t"
1779  "slt %[t0], %[t4], %[qc1] \n\t"
1780  "slt %[t1], %[t4], %[qc2] \n\t"
1781  "slt %[t2], %[t4], %[qc3] \n\t"
1782  "slt %[t3], %[t4], %[qc4] \n\t"
1783  "movn %[qc1], %[t4], %[t0] \n\t"
1784  "movn %[qc2], %[t4], %[t1] \n\t"
1785  "movn %[qc3], %[t4], %[t2] \n\t"
1786  "movn %[qc4], %[t4], %[t3] \n\t"
1787  "lw %[t0], 0(%[in_int]) \n\t"
1788  "lw %[t1], 4(%[in_int]) \n\t"
1789  "lw %[t2], 8(%[in_int]) \n\t"
1790  "lw %[t3], 12(%[in_int]) \n\t"
1791  "srl %[t0], %[t0], 31 \n\t"
1792  "srl %[t1], %[t1], 31 \n\t"
1793  "srl %[t2], %[t2], 31 \n\t"
1794  "srl %[t3], %[t3], 31 \n\t"
1795  "subu %[t4], $zero, %[qc1] \n\t"
1796  "subu %[t5], $zero, %[qc2] \n\t"
1797  "subu %[t6], $zero, %[qc3] \n\t"
1798  "subu %[t7], $zero, %[qc4] \n\t"
1799  "movn %[qc1], %[t4], %[t0] \n\t"
1800  "movn %[qc2], %[t5], %[t1] \n\t"
1801  "movn %[qc3], %[t6], %[t2] \n\t"
1802  "movn %[qc4], %[t7], %[t3] \n\t"
1803 
1804  ".set pop \n\t"
1805 
1806  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1807  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1808  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1809  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1810  : [in_int]"r"(in_int)
1811  : "memory"
1812  );
1813 
1814  curidx = 9 * qc1;
1815  curidx += qc2 + 40;
1816 
1817  curidx2 = 9 * qc3;
1818  curidx2 += qc4 + 40;
1819 
1820  curbits += p_bits[curidx];
1821  curbits += p_bits[curidx2];
1822 
1823  vec = &p_codes[curidx*2];
1824  vec2 = &p_codes[curidx2*2];
1825 
1826  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1827  + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1828 
1829  __asm__ volatile (
1830  ".set push \n\t"
1831  ".set noreorder \n\t"
1832 
1833  "lwc1 $f0, 0(%[in_pos]) \n\t"
1834  "lwc1 $f1, 0(%[vec]) \n\t"
1835  "lwc1 $f2, 4(%[in_pos]) \n\t"
1836  "lwc1 $f3, 4(%[vec]) \n\t"
1837  "lwc1 $f4, 8(%[in_pos]) \n\t"
1838  "lwc1 $f5, 0(%[vec2]) \n\t"
1839  "lwc1 $f6, 12(%[in_pos]) \n\t"
1840  "lwc1 $f7, 4(%[vec2]) \n\t"
1841  "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1842  "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1843  "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1844  "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1845 
1846  ".set pop \n\t"
1847 
1848  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1849  [di2]"=&f"(di2), [di3]"=&f"(di3)
1850  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1851  [vec2]"r"(vec2), [IQ]"f"(IQ)
1852  : "$f0", "$f1", "$f2", "$f3",
1853  "$f4", "$f5", "$f6", "$f7",
1854  "memory"
1855  );
1856 
1857  cost += di0 * di0 + di1 * di1
1858  + di2 * di2 + di3 * di3;
1859  }
1860 
1861  if (bits)
1862  *bits = curbits;
1863  if (energy)
1864  *energy = qenergy * (IQ*IQ);
1865  return cost * lambda + curbits;
1866 }
1867 
1868 static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
1869  PutBitContext *pb, const float *in,
1870  const float *scaled, int size, int scale_idx,
1871  int cb, const float lambda, const float uplim,
1872  int *bits, float *energy)
1873 {
1874  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1875  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1876  int i;
1877  float cost = 0;
1878  float qenergy = 0.0f;
1879  int qc1, qc2, qc3, qc4;
1880  int curbits = 0;
1881 
1882  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1883  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1884 
1885  for (i = 0; i < size; i += 4) {
1886  const float *vec, *vec2;
1887  int curidx, curidx2, sign1, count1, sign2, count2;
1888  int *in_int = (int *)&in[i];
1889  float *in_pos = (float *)&in[i];
1890  float di0, di1, di2, di3;
1891  int t0, t1, t2, t3, t4;
1892 
1893  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1894  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1895  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1896  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1897 
1898  __asm__ volatile (
1899  ".set push \n\t"
1900  ".set noreorder \n\t"
1901 
1902  "ori %[t4], $zero, 7 \n\t"
1903  "ori %[sign1], $zero, 0 \n\t"
1904  "ori %[sign2], $zero, 0 \n\t"
1905  "slt %[t0], %[t4], %[qc1] \n\t"
1906  "slt %[t1], %[t4], %[qc2] \n\t"
1907  "slt %[t2], %[t4], %[qc3] \n\t"
1908  "slt %[t3], %[t4], %[qc4] \n\t"
1909  "movn %[qc1], %[t4], %[t0] \n\t"
1910  "movn %[qc2], %[t4], %[t1] \n\t"
1911  "movn %[qc3], %[t4], %[t2] \n\t"
1912  "movn %[qc4], %[t4], %[t3] \n\t"
1913  "lw %[t0], 0(%[in_int]) \n\t"
1914  "lw %[t1], 4(%[in_int]) \n\t"
1915  "lw %[t2], 8(%[in_int]) \n\t"
1916  "lw %[t3], 12(%[in_int]) \n\t"
1917  "slt %[t0], %[t0], $zero \n\t"
1918  "movn %[sign1], %[t0], %[qc1] \n\t"
1919  "slt %[t2], %[t2], $zero \n\t"
1920  "movn %[sign2], %[t2], %[qc3] \n\t"
1921  "slt %[t1], %[t1], $zero \n\t"
1922  "sll %[t0], %[sign1], 1 \n\t"
1923  "or %[t0], %[t0], %[t1] \n\t"
1924  "movn %[sign1], %[t0], %[qc2] \n\t"
1925  "slt %[t3], %[t3], $zero \n\t"
1926  "sll %[t0], %[sign2], 1 \n\t"
1927  "or %[t0], %[t0], %[t3] \n\t"
1928  "movn %[sign2], %[t0], %[qc4] \n\t"
1929  "slt %[count1], $zero, %[qc1] \n\t"
1930  "slt %[t1], $zero, %[qc2] \n\t"
1931  "slt %[count2], $zero, %[qc3] \n\t"
1932  "slt %[t2], $zero, %[qc4] \n\t"
1933  "addu %[count1], %[count1], %[t1] \n\t"
1934  "addu %[count2], %[count2], %[t2] \n\t"
1935 
1936  ".set pop \n\t"
1937 
1938  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1939  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1940  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1941  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1942  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1943  [t4]"=&r"(t4)
1944  : [in_int]"r"(in_int)
1945  : "memory"
1946  );
1947 
1948  curidx = 8 * qc1;
1949  curidx += qc2;
1950 
1951  curidx2 = 8 * qc3;
1952  curidx2 += qc4;
1953 
1954  curbits += p_bits[curidx];
1955  curbits += upair7_sign_bits[curidx];
1956  vec = &p_codes[curidx*2];
1957 
1958  curbits += p_bits[curidx2];
1959  curbits += upair7_sign_bits[curidx2];
1960  vec2 = &p_codes[curidx2*2];
1961 
1962  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1963  + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1964 
1965  __asm__ volatile (
1966  ".set push \n\t"
1967  ".set noreorder \n\t"
1968 
1969  "lwc1 %[di0], 0(%[in_pos]) \n\t"
1970  "lwc1 %[di1], 4(%[in_pos]) \n\t"
1971  "lwc1 %[di2], 8(%[in_pos]) \n\t"
1972  "lwc1 %[di3], 12(%[in_pos]) \n\t"
1973  "abs.s %[di0], %[di0] \n\t"
1974  "abs.s %[di1], %[di1] \n\t"
1975  "abs.s %[di2], %[di2] \n\t"
1976  "abs.s %[di3], %[di3] \n\t"
1977  "lwc1 $f0, 0(%[vec]) \n\t"
1978  "lwc1 $f1, 4(%[vec]) \n\t"
1979  "lwc1 $f2, 0(%[vec2]) \n\t"
1980  "lwc1 $f3, 4(%[vec2]) \n\t"
1981  "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1982  "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1983  "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1984  "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1985 
1986  ".set pop \n\t"
1987 
1988  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1989  [di2]"=&f"(di2), [di3]"=&f"(di3)
1990  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1991  [vec2]"r"(vec2), [IQ]"f"(IQ)
1992  : "$f0", "$f1", "$f2", "$f3",
1993  "memory"
1994  );
1995 
1996  cost += di0 * di0 + di1 * di1
1997  + di2 * di2 + di3 * di3;
1998  }
1999 
2000  if (bits)
2001  *bits = curbits;
2002  if (energy)
2003  *energy = qenergy * (IQ*IQ);
2004  return cost * lambda + curbits;
2005 }
2006 
2007 static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
2008  PutBitContext *pb, const float *in,
2009  const float *scaled, int size, int scale_idx,
2010  int cb, const float lambda, const float uplim,
2011  int *bits, float *energy)
2012 {
2013  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2014  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2015  int i;
2016  float cost = 0;
2017  float qenergy = 0.0f;
2018  int qc1, qc2, qc3, qc4;
2019  int curbits = 0;
2020 
2021  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
2022  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
2023 
2024  for (i = 0; i < size; i += 4) {
2025  const float *vec, *vec2;
2026  int curidx, curidx2;
2027  int sign1, count1, sign2, count2;
2028  int *in_int = (int *)&in[i];
2029  float *in_pos = (float *)&in[i];
2030  float di0, di1, di2, di3;
2031  int t0, t1, t2, t3, t4;
2032 
2033  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2034  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2035  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2036  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2037 
2038  __asm__ volatile (
2039  ".set push \n\t"
2040  ".set noreorder \n\t"
2041 
2042  "ori %[t4], $zero, 12 \n\t"
2043  "ori %[sign1], $zero, 0 \n\t"
2044  "ori %[sign2], $zero, 0 \n\t"
2045  "slt %[t0], %[t4], %[qc1] \n\t"
2046  "slt %[t1], %[t4], %[qc2] \n\t"
2047  "slt %[t2], %[t4], %[qc3] \n\t"
2048  "slt %[t3], %[t4], %[qc4] \n\t"
2049  "movn %[qc1], %[t4], %[t0] \n\t"
2050  "movn %[qc2], %[t4], %[t1] \n\t"
2051  "movn %[qc3], %[t4], %[t2] \n\t"
2052  "movn %[qc4], %[t4], %[t3] \n\t"
2053  "lw %[t0], 0(%[in_int]) \n\t"
2054  "lw %[t1], 4(%[in_int]) \n\t"
2055  "lw %[t2], 8(%[in_int]) \n\t"
2056  "lw %[t3], 12(%[in_int]) \n\t"
2057  "slt %[t0], %[t0], $zero \n\t"
2058  "movn %[sign1], %[t0], %[qc1] \n\t"
2059  "slt %[t2], %[t2], $zero \n\t"
2060  "movn %[sign2], %[t2], %[qc3] \n\t"
2061  "slt %[t1], %[t1], $zero \n\t"
2062  "sll %[t0], %[sign1], 1 \n\t"
2063  "or %[t0], %[t0], %[t1] \n\t"
2064  "movn %[sign1], %[t0], %[qc2] \n\t"
2065  "slt %[t3], %[t3], $zero \n\t"
2066  "sll %[t0], %[sign2], 1 \n\t"
2067  "or %[t0], %[t0], %[t3] \n\t"
2068  "movn %[sign2], %[t0], %[qc4] \n\t"
2069  "slt %[count1], $zero, %[qc1] \n\t"
2070  "slt %[t1], $zero, %[qc2] \n\t"
2071  "slt %[count2], $zero, %[qc3] \n\t"
2072  "slt %[t2], $zero, %[qc4] \n\t"
2073  "addu %[count1], %[count1], %[t1] \n\t"
2074  "addu %[count2], %[count2], %[t2] \n\t"
2075 
2076  ".set pop \n\t"
2077 
2078  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2079  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2080  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
2081  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
2082  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
2083  [t4]"=&r"(t4)
2084  : [in_int]"r"(in_int)
2085  : "memory"
2086  );
2087 
2088  curidx = 13 * qc1;
2089  curidx += qc2;
2090 
2091  curidx2 = 13 * qc3;
2092  curidx2 += qc4;
2093 
2094  curbits += p_bits[curidx];
2095  curbits += p_bits[curidx2];
2096  curbits += upair12_sign_bits[curidx];
2097  curbits += upair12_sign_bits[curidx2];
2098  vec = &p_codes[curidx*2];
2099  vec2 = &p_codes[curidx2*2];
2100 
2101  qenergy += vec[0]*vec[0] + vec[1]*vec[1]
2102  + vec2[0]*vec2[0] + vec2[1]*vec2[1];
2103 
2104  __asm__ volatile (
2105  ".set push \n\t"
2106  ".set noreorder \n\t"
2107 
2108  "lwc1 %[di0], 0(%[in_pos]) \n\t"
2109  "lwc1 %[di1], 4(%[in_pos]) \n\t"
2110  "lwc1 %[di2], 8(%[in_pos]) \n\t"
2111  "lwc1 %[di3], 12(%[in_pos]) \n\t"
2112  "abs.s %[di0], %[di0] \n\t"
2113  "abs.s %[di1], %[di1] \n\t"
2114  "abs.s %[di2], %[di2] \n\t"
2115  "abs.s %[di3], %[di3] \n\t"
2116  "lwc1 $f0, 0(%[vec]) \n\t"
2117  "lwc1 $f1, 4(%[vec]) \n\t"
2118  "lwc1 $f2, 0(%[vec2]) \n\t"
2119  "lwc1 $f3, 4(%[vec2]) \n\t"
2120  "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
2121  "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
2122  "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
2123  "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
2124 
2125  ".set pop \n\t"
2126 
2127  : [di0]"=&f"(di0), [di1]"=&f"(di1),
2128  [di2]"=&f"(di2), [di3]"=&f"(di3)
2129  : [in_pos]"r"(in_pos), [vec]"r"(vec),
2130  [vec2]"r"(vec2), [IQ]"f"(IQ)
2131  : "$f0", "$f1", "$f2", "$f3",
2132  "memory"
2133  );
2134 
2135  cost += di0 * di0 + di1 * di1
2136  + di2 * di2 + di3 * di3;
2137  }
2138 
2139  if (bits)
2140  *bits = curbits;
2141  if (energy)
2142  *energy = qenergy * (IQ*IQ);
2143  return cost * lambda + curbits;
2144 }
2145 
2146 static float get_band_cost_ESC_mips(struct AACEncContext *s,
2147  PutBitContext *pb, const float *in,
2148  const float *scaled, int size, int scale_idx,
2149  int cb, const float lambda, const float uplim,
2150  int *bits, float *energy)
2151 {
2152  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2153  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2154  const float CLIPPED_ESCAPE = 165140.0f * IQ;
2155  int i;
2156  float cost = 0;
2157  float qenergy = 0.0f;
2158  int qc1, qc2, qc3, qc4;
2159  int curbits = 0;
2160 
2161  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
2162  float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
2163 
2164  for (i = 0; i < size; i += 4) {
2165  const float *vec, *vec2;
2166  int curidx, curidx2;
2167  float t1, t2, t3, t4, V;
2168  float di1, di2, di3, di4;
2169  int cond0, cond1, cond2, cond3;
2170  int c1, c2, c3, c4;
2171  int t6, t7;
2172 
2173  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2174  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2175  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2176  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2177 
2178  __asm__ volatile (
2179  ".set push \n\t"
2180  ".set noreorder \n\t"
2181 
2182  "ori %[t6], $zero, 15 \n\t"
2183  "ori %[t7], $zero, 16 \n\t"
2184  "shll_s.w %[c1], %[qc1], 18 \n\t"
2185  "shll_s.w %[c2], %[qc2], 18 \n\t"
2186  "shll_s.w %[c3], %[qc3], 18 \n\t"
2187  "shll_s.w %[c4], %[qc4], 18 \n\t"
2188  "srl %[c1], %[c1], 18 \n\t"
2189  "srl %[c2], %[c2], 18 \n\t"
2190  "srl %[c3], %[c3], 18 \n\t"
2191  "srl %[c4], %[c4], 18 \n\t"
2192  "slt %[cond0], %[t6], %[qc1] \n\t"
2193  "slt %[cond1], %[t6], %[qc2] \n\t"
2194  "slt %[cond2], %[t6], %[qc3] \n\t"
2195  "slt %[cond3], %[t6], %[qc4] \n\t"
2196  "movn %[qc1], %[t7], %[cond0] \n\t"
2197  "movn %[qc2], %[t7], %[cond1] \n\t"
2198  "movn %[qc3], %[t7], %[cond2] \n\t"
2199  "movn %[qc4], %[t7], %[cond3] \n\t"
2200 
2201  ".set pop \n\t"
2202 
2203  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2204  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2205  [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
2206  [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
2207  [c1]"=&r"(c1), [c2]"=&r"(c2),
2208  [c3]"=&r"(c3), [c4]"=&r"(c4),
2209  [t6]"=&r"(t6), [t7]"=&r"(t7)
2210  );
2211 
2212  curidx = 17 * qc1;
2213  curidx += qc2;
2214 
2215  curidx2 = 17 * qc3;
2216  curidx2 += qc4;
2217 
2218  curbits += p_bits[curidx];
2219  curbits += esc_sign_bits[curidx];
2220  vec = &p_codes[curidx*2];
2221 
2222  curbits += p_bits[curidx2];
2223  curbits += esc_sign_bits[curidx2];
2224  vec2 = &p_codes[curidx2*2];
2225 
2226  curbits += (av_log2(c1) * 2 - 3) & (-cond0);
2227  curbits += (av_log2(c2) * 2 - 3) & (-cond1);
2228  curbits += (av_log2(c3) * 2 - 3) & (-cond2);
2229  curbits += (av_log2(c4) * 2 - 3) & (-cond3);
2230 
2231  t1 = fabsf(in[i ]);
2232  t2 = fabsf(in[i+1]);
2233  t3 = fabsf(in[i+2]);
2234  t4 = fabsf(in[i+3]);
2235 
2236  if (cond0) {
2237  if (t1 >= CLIPPED_ESCAPE) {
2238  di1 = t1 - CLIPPED_ESCAPE;
2239  qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2240  } else {
2241  di1 = t1 - (V = c1 * cbrtf(c1) * IQ);
2242  qenergy += V*V;
2243  }
2244  } else {
2245  di1 = t1 - (V = vec[0] * IQ);
2246  qenergy += V*V;
2247  }
2248 
2249  if (cond1) {
2250  if (t2 >= CLIPPED_ESCAPE) {
2251  di2 = t2 - CLIPPED_ESCAPE;
2252  qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2253  } else {
2254  di2 = t2 - (V = c2 * cbrtf(c2) * IQ);
2255  qenergy += V*V;
2256  }
2257  } else {
2258  di2 = t2 - (V = vec[1] * IQ);
2259  qenergy += V*V;
2260  }
2261 
2262  if (cond2) {
2263  if (t3 >= CLIPPED_ESCAPE) {
2264  di3 = t3 - CLIPPED_ESCAPE;
2265  qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2266  } else {
2267  di3 = t3 - (V = c3 * cbrtf(c3) * IQ);
2268  qenergy += V*V;
2269  }
2270  } else {
2271  di3 = t3 - (V = vec2[0] * IQ);
2272  qenergy += V*V;
2273  }
2274 
2275  if (cond3) {
2276  if (t4 >= CLIPPED_ESCAPE) {
2277  di4 = t4 - CLIPPED_ESCAPE;
2278  qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2279  } else {
2280  di4 = t4 - (V = c4 * cbrtf(c4) * IQ);
2281  qenergy += V*V;
2282  }
2283  } else {
2284  di4 = t4 - (V = vec2[1]*IQ);
2285  qenergy += V*V;
2286  }
2287 
2288  cost += di1 * di1 + di2 * di2
2289  + di3 * di3 + di4 * di4;
2290  }
2291 
2292  if (bits)
2293  *bits = curbits;
2294  return cost * lambda + curbits;
2295 }
2296 
2297 static float (*const get_band_cost_arr[])(struct AACEncContext *s,
2298  PutBitContext *pb, const float *in,
2299  const float *scaled, int size, int scale_idx,
2300  int cb, const float lambda, const float uplim,
2301  int *bits, float *energy) = {
2302  get_band_cost_ZERO_mips,
2303  get_band_cost_SQUAD_mips,
2304  get_band_cost_SQUAD_mips,
2305  get_band_cost_UQUAD_mips,
2306  get_band_cost_UQUAD_mips,
2307  get_band_cost_SPAIR_mips,
2308  get_band_cost_SPAIR_mips,
2309  get_band_cost_UPAIR7_mips,
2310  get_band_cost_UPAIR7_mips,
2311  get_band_cost_UPAIR12_mips,
2312  get_band_cost_UPAIR12_mips,
2313  get_band_cost_ESC_mips,
2314  get_band_cost_NONE_mips, /* cb 12 doesn't exist */
2315  get_band_cost_ZERO_mips,
2316  get_band_cost_ZERO_mips,
2317  get_band_cost_ZERO_mips,
2318 };
2319 
2320 #define get_band_cost( \
2321  s, pb, in, scaled, size, scale_idx, cb, \
2322  lambda, uplim, bits, energy) \
2323  get_band_cost_arr[cb]( \
2324  s, pb, in, scaled, size, scale_idx, cb, \
2325  lambda, uplim, bits, energy)
2326 
2327 static float quantize_band_cost(struct AACEncContext *s, const float *in,
2328  const float *scaled, int size, int scale_idx,
2329  int cb, const float lambda, const float uplim,
2330  int *bits, float *energy)
2331 {
2332  return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
2333 }
2334 
2336 
2338 
2339 static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
2340 {
2341  int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
2342  uint8_t nextband0[128], nextband1[128];
2343  float M[128], S[128];
2344  float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
2345  const float lambda = s->lambda;
2346  const float mslambda = FFMIN(1.0f, lambda / 120.f);
2347  SingleChannelElement *sce0 = &cpe->ch[0];
2348  SingleChannelElement *sce1 = &cpe->ch[1];
2349  if (!cpe->common_window)
2350  return;
2351 
2352  /** Scout out next nonzero bands */
2353  ff_init_nextband_map(sce0, nextband0);
2354  ff_init_nextband_map(sce1, nextband1);
2355 
2356  prev_mid = sce0->sf_idx[0];
2357  prev_side = sce1->sf_idx[0];
2358  for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
2359  start = 0;
2360  for (g = 0; g < sce0->ics.num_swb; g++) {
2361  float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
2362  if (!cpe->is_mask[w*16+g])
2363  cpe->ms_mask[w*16+g] = 0;
2364  if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
2365  float Mmax = 0.0f, Smax = 0.0f;
2366 
2367  /* Must compute mid/side SF and book for the whole window group */
2368  for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2369  for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
2370  M[i] = (sce0->coeffs[start+(w+w2)*128+i]
2371  + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
2372  S[i] = M[i]
2373  - sce1->coeffs[start+(w+w2)*128+i];
2374  }
2375  abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2376  abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2377  for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
2378  Mmax = FFMAX(Mmax, M34[i]);
2379  Smax = FFMAX(Smax, S34[i]);
2380  }
2381  }
2382 
2383  for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
2384  float dist1 = 0.0f, dist2 = 0.0f;
2385  int B0 = 0, B1 = 0;
2386  int minidx;
2387  int mididx, sididx;
2388  int midcb, sidcb;
2389 
2390  minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
2391  mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
2392  sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
2393  if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
2394  && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
2395  || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
2396  /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
2397  continue;
2398  }
2399 
2400  midcb = find_min_book(Mmax, mididx);
2401  sidcb = find_min_book(Smax, sididx);
2402 
2403  /* No CB can be zero */
2404  midcb = FFMAX(1,midcb);
2405  sidcb = FFMAX(1,sidcb);
2406 
2407  for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2408  FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
2409  FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
2410  float minthr = FFMIN(band0->threshold, band1->threshold);
2411  int b1,b2,b3,b4;
2412  for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
2413  M[i] = (sce0->coeffs[start+(w+w2)*128+i]
2414  + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
2415  S[i] = M[i]
2416  - sce1->coeffs[start+(w+w2)*128+i];
2417  }
2418 
2419  abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2420  abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2421  abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2422  abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2423  dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
2424  L34,
2425  sce0->ics.swb_sizes[g],
2426  sce0->sf_idx[w*16+g],
2427  sce0->band_type[w*16+g],
2428  lambda / band0->threshold, INFINITY, &b1, NULL);
2429  dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
2430  R34,
2431  sce1->ics.swb_sizes[g],
2432  sce1->sf_idx[w*16+g],
2433  sce1->band_type[w*16+g],
2434  lambda / band1->threshold, INFINITY, &b2, NULL);
2435  dist2 += quantize_band_cost(s, M,
2436  M34,
2437  sce0->ics.swb_sizes[g],
2438  mididx,
2439  midcb,
2440  lambda / minthr, INFINITY, &b3, NULL);
2441  dist2 += quantize_band_cost(s, S,
2442  S34,
2443  sce1->ics.swb_sizes[g],
2444  sididx,
2445  sidcb,
2446  mslambda / (minthr * bmax), INFINITY, &b4, NULL);
2447  B0 += b1+b2;
2448  B1 += b3+b4;
2449  dist1 -= b1+b2;
2450  dist2 -= b3+b4;
2451  }
2452  cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
2453  if (cpe->ms_mask[w*16+g]) {
2454  if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
2455  sce0->sf_idx[w*16+g] = mididx;
2456  sce1->sf_idx[w*16+g] = sididx;
2457  sce0->band_type[w*16+g] = midcb;
2458  sce1->band_type[w*16+g] = sidcb;
2459  } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
2460  /* ms_mask unneeded, and it confuses some decoders */
2461  cpe->ms_mask[w*16+g] = 0;
2462  }
2463  break;
2464  } else if (B1 > B0) {
2465  /* More boost won't fix this */
2466  break;
2467  }
2468  }
2469  }
2470  if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
2471  prev_mid = sce0->sf_idx[w*16+g];
2472  if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
2473  prev_side = sce1->sf_idx[w*16+g];
2474  start += sce0->ics.swb_sizes[g];
2475  }
2476  }
2477 }
2478 #endif /*HAVE_MIPSFPU */
2479 
2481 
2482 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
2483 #endif /* HAVE_INLINE_ASM */
2484 
2486 #if HAVE_INLINE_ASM
2487 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
2488  AACCoefficientsEncoder *e = c->coder;
2489  int option = c->options.coder;
2490 
2491  if (option == 2) {
2492  e->quantize_and_encode_band = quantize_and_encode_band_mips;
2494 #if HAVE_MIPSFPU
2496 #endif /* HAVE_MIPSFPU */
2497  }
2498 #if HAVE_MIPSFPU
2499  e->search_for_ms = search_for_ms_mips;
2500 #endif /* HAVE_MIPSFPU */
2501 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
2502 #endif /* HAVE_INLINE_ASM */
2503 }
M
#define M(a, b)
Definition: vp3dsp.c:48
ff_aac_coder_init_mips
void ff_aac_coder_init_mips(AACEncContext *c)
Definition: aaccoder_mips.c:2485
AACCoefficientsEncoder::encode_window_bands_info
void(* encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
Definition: aacenc.h:152
AACCoefficientsEncoder::search_for_quantizers
void(* search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, SingleChannelElement *sce, const float lambda)
Definition: aacenc.h:150
av_clip
#define av_clip
Definition: common.h:98
INFINITY
#define INFINITY
Definition: mathematics.h:118
libm.h
out
FILE * out
Definition: movenc.c:54
cb
static double cb(void *priv, double x, double y)
Definition: vf_geq.c:241
aacenctab.h
SingleChannelElement::zeroes
uint8_t zeroes[128]
band is not coded
Definition: aacenc.h:120
put_bits
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:222
w
uint8_t w
Definition: llviddspenc.c:38
bval2bmax
static av_always_inline float bval2bmax(float b)
approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f)))
Definition: aacenc_utils.h:164
AACCoefficientsEncoder::search_for_ms
void(* search_for_ms)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:170
t0
#define t0
Definition: regdef.h:28
ROUND_TO_ZERO
#define ROUND_TO_ZERO
Definition: aacenc_utils.h:37
float.h
t1
#define t1
Definition: regdef.h:29
mathematics.h
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
c1
static const uint64_t c1
Definition: murmur3.c:52
ChannelElement::ch
SingleChannelElement ch[2]
Definition: aacdec.h:153
SCALE_MAX_POS
#define SCALE_MAX_POS
scalefactor index maximum value
Definition: aac.h:106
S
#define S(s, c, i)
Definition: flacdsp_template.c:46
IndividualChannelStream::num_swb
int num_swb
number of scalefactor window bands
Definition: aacdec.h:92
b1
static double b1(void *priv, double x, double y)
Definition: vf_xfade.c:2035
abs_pow34_v
static void abs_pow34_v(float *out, const float *in, const int size)
Definition: aacencdsp.h:38
SCALE_DIV_512
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
Definition: aac.h:104
ff_sfdelta_can_replace
static int ff_sfdelta_can_replace(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int new_sf, int band)
Definition: aacenc_utils.h:222
POW_SF2_ZERO
#define POW_SF2_ZERO
ff_aac_pow2sf_tab index corresponding to pow(2, 0);
Definition: aac.h:110
quantize_band_cost_bits
static int quantize_band_cost_bits(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
Definition: aacenc_quantization.h:52
quantize_and_encode_band_cost_arr
static const quantize_and_encode_band_func quantize_and_encode_band_cost_arr[]
Definition: aaccoder.c:229
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
SingleChannelElement::ics
IndividualChannelStream ics
Definition: aacdec.h:132
float
float
Definition: af_crystalizer.c:121
NOISE_BT
@ NOISE_BT
Spectral data are scaled white noise not coded in the bitstream.
Definition: aac.h:74
b3
static double b3(void *priv, double x, double y)
Definition: vf_xfade.c:2037
s
#define s(width, name)
Definition: cbs_vp9.c:198
SingleChannelElement::coeffs
INTFLOAT coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aacdec.h:137
IndividualChannelStream::swb_sizes
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aacenc.h:84
g
const char * g
Definition: vf_curves.c:127
bits
uint8_t bits
Definition: vp3data.h:128
t7
#define t7
Definition: regdef.h:35
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
PutBitContext
Definition: put_bits.h:50
option
option
Definition: libkvazaar.c:320
ChannelElement::is_mask
uint8_t is_mask[128]
Set if intensity stereo is used.
Definition: aacenc.h:142
NULL
#define NULL
Definition: coverity.c:32
B1
@ B1
Definition: vvc_mvs.c:525
t5
#define t5
Definition: regdef.h:33
codebook_trellis_rate
static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
Definition: aaccoder_trellis.h:59
t6
#define t6
Definition: regdef.h:34
V
#define V
Definition: avdct.c:30
ChannelElement::ms_mask
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
Definition: aacdec.h:151
FFPsyBand
single band psychoacoustic information
Definition: psymodel.h:50
aac.h
aactab.h
B0
@ B0
Definition: vvc_mvs.c:524
ff_init_nextband_map
static void ff_init_nextband_map(const SingleChannelElement *sce, uint8_t *nextband)
Definition: aacenc_utils.h:175
aaccoder_twoloop.h
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SingleChannelElement::sf_idx
int sf_idx[128]
scalefactor indices
Definition: aacenc.h:119
ff_aac_pow34sf_tab
float ff_aac_pow34sf_tab[428]
f
f
Definition: af_crystalizer.c:121
size
int size
Definition: twinvq_data.h:10344
aacenc_quantization_misc.h
AACCoefficientsEncoder::quantize_and_encode_band
void(* quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size, int scale_idx, int cb, const float lambda, int rtz)
Definition: aacenc.h:154
ff_aac_spectral_codes
const uint16_t *const ff_aac_spectral_codes[11]
Definition: aactab.c:521
ROUND_STANDARD
#define ROUND_STANDARD
Definition: aacenc_utils.h:36
b2
static double b2(void *priv, double x, double y)
Definition: vf_xfade.c:2036
ChannelElement::common_window
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
Definition: aacenc.h:138
search_for_quantizers_twoloop
static void search_for_quantizers_twoloop(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, const float lambda)
two-loop quantizers search taken from ISO 13818-7 Appendix C
Definition: aaccoder_twoloop.h:67
SingleChannelElement::band_type
enum BandType band_type[128]
band types
Definition: aacdec.h:134
SingleChannelElement
Single Channel Element - used for both SCE and LFE elements.
Definition: aacdec.h:131
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
IndividualChannelStream::num_windows
int num_windows
Definition: aacdec.h:93
SCALE_ONE_POS
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
Definition: aac.h:105
find_min_book
static int find_min_book(float maxval, int sf)
Definition: aacenc_utils.h:68
FFPsyBand::threshold
float threshold
Definition: psymodel.h:53
t4
#define t4
Definition: regdef.h:32
t3
#define t3
Definition: regdef.h:31
aacencdsp.h
ChannelElement
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aacdec.h:148
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
cbrtf
static av_always_inline float cbrtf(float x)
Definition: libm.h:61
ff_aac_codebook_vectors
const float *const ff_aac_codebook_vectors[]
Definition: aactab.c:1022
len
int len
Definition: vorbis_enc_data.h:426
AACCoefficientsEncoder
Definition: aacenc.h:149
avcodec.h
BandCodingPath
structure used in optimal codebook search
Definition: aaccoder.c:291
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
ff_aac_spectral_bits
const uint8_t *const ff_aac_spectral_bits[11]
Definition: aactab.c:526
RESERVED_BT
@ RESERVED_BT
Band types following are encoded differently from others.
Definition: aac.h:73
AACEncContext
AAC encoder context.
Definition: aacenc.h:198
c2
static const uint64_t c2
Definition: murmur3.c:53
t2
#define t2
Definition: regdef.h:30
quantize_band_cost
static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
Definition: aacenc_quantization.h:43
BandCodingPath::prev_idx
int prev_idx
pointer to the previous path point
Definition: aaccoder.c:292
AACEncContext::pb
PutBitContext pb
Definition: aacenc.h:201
aacenc_utils.h
ff_aac_pow2sf_tab
float ff_aac_pow2sf_tab[428]
AACEncContext::lambda
float lambda
Definition: aacenc.h:224
put_bits.h
IndividualChannelStream::group_len
uint8_t group_len[8]
Definition: aacdec.h:89
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
BandCodingPath::run
int run
Definition: aaccoder.c:294
AACEncContext::cpe
ChannelElement * cpe
channel elements
Definition: aacenc.h:218
aaccoder_trellis.h
aacenc.h
BandCodingPath::cost
float cost
path cost
Definition: aaccoder.c:293