FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
aaccoder_mips.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012
3  * MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  * contributors may be used to endorse or promote products derived from
15  * this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Author: Stanislav Ocovaj (socovaj@mips.com)
30  * Szabolcs Pal (sabolc@mips.com)
31  *
32  * AAC coefficients encoder optimized for MIPS floating-point architecture
33  *
34  * This file is part of FFmpeg.
35  *
36  * FFmpeg is free software; you can redistribute it and/or
37  * modify it under the terms of the GNU Lesser General Public
38  * License as published by the Free Software Foundation; either
39  * version 2.1 of the License, or (at your option) any later version.
40  *
41  * FFmpeg is distributed in the hope that it will be useful,
42  * but WITHOUT ANY WARRANTY; without even the implied warranty of
43  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
44  * Lesser General Public License for more details.
45  *
46  * You should have received a copy of the GNU Lesser General Public
47  * License along with FFmpeg; if not, write to the Free Software
48  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
49  */
50 
51 /**
52  * @file
53  * Reference: libavcodec/aaccoder.c
54  */
55 
56 #include "libavutil/libm.h"
57 
58 #include <float.h>
59 #include "libavutil/mathematics.h"
60 #include "libavcodec/avcodec.h"
61 #include "libavcodec/put_bits.h"
62 #include "libavcodec/aac.h"
63 #include "libavcodec/aacenc.h"
64 #include "libavcodec/aactab.h"
65 
66 #if HAVE_INLINE_ASM
67 typedef struct BandCodingPath {
68  int prev_idx;
69  float cost;
70  int run;
72 
73 static const uint8_t run_value_bits_long[64] = {
74  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
75  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
76  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
77  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
78 };
79 
80 static const uint8_t run_value_bits_short[16] = {
81  3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
82 };
83 
84 static const uint8_t * const run_value_bits[2] = {
86 };
87 
88 static const uint8_t uquad_sign_bits[81] = {
89  0, 1, 1, 1, 2, 2, 1, 2, 2,
90  1, 2, 2, 2, 3, 3, 2, 3, 3,
91  1, 2, 2, 2, 3, 3, 2, 3, 3,
92  1, 2, 2, 2, 3, 3, 2, 3, 3,
93  2, 3, 3, 3, 4, 4, 3, 4, 4,
94  2, 3, 3, 3, 4, 4, 3, 4, 4,
95  1, 2, 2, 2, 3, 3, 2, 3, 3,
96  2, 3, 3, 3, 4, 4, 3, 4, 4,
97  2, 3, 3, 3, 4, 4, 3, 4, 4
98 };
99 
100 static const uint8_t upair7_sign_bits[64] = {
101  0, 1, 1, 1, 1, 1, 1, 1,
102  1, 2, 2, 2, 2, 2, 2, 2,
103  1, 2, 2, 2, 2, 2, 2, 2,
104  1, 2, 2, 2, 2, 2, 2, 2,
105  1, 2, 2, 2, 2, 2, 2, 2,
106  1, 2, 2, 2, 2, 2, 2, 2,
107  1, 2, 2, 2, 2, 2, 2, 2,
108  1, 2, 2, 2, 2, 2, 2, 2,
109 };
110 
111 static const uint8_t upair12_sign_bits[169] = {
112  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
115  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
116  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
117  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
125 };
126 
127 static const uint8_t esc_sign_bits[289] = {
128  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
133  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
134  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
135  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
136  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
137  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
139  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
140  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
144  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
145 };
146 
147 #define ROUND_STANDARD 0.4054f
148 #define ROUND_TO_ZERO 0.1054f
149 
150 static void abs_pow34_v(float *out, const float *in, const int size) {
151 #ifndef USE_REALLY_FULL_SEARCH
152  int i;
153  float a, b, c, d;
154  float ax, bx, cx, dx;
155 
156  for (i = 0; i < size; i += 4) {
157  a = fabsf(in[i ]);
158  b = fabsf(in[i+1]);
159  c = fabsf(in[i+2]);
160  d = fabsf(in[i+3]);
161 
162  ax = sqrtf(a);
163  bx = sqrtf(b);
164  cx = sqrtf(c);
165  dx = sqrtf(d);
166 
167  a = a * ax;
168  b = b * bx;
169  c = c * cx;
170  d = d * dx;
171 
172  out[i ] = sqrtf(a);
173  out[i+1] = sqrtf(b);
174  out[i+2] = sqrtf(c);
175  out[i+3] = sqrtf(d);
176  }
177 #endif /* USE_REALLY_FULL_SEARCH */
178 }
179 
180 static float find_max_val(int group_len, int swb_size, const float *scaled) {
181  float maxval = 0.0f;
182  int w2, i;
183  for (w2 = 0; w2 < group_len; w2++) {
184  for (i = 0; i < swb_size; i++) {
185  maxval = FFMAX(maxval, scaled[w2*128+i]);
186  }
187  }
188  return maxval;
189 }
190 
191 static int find_min_book(float maxval, int sf) {
193  float Q34 = sqrtf(Q * sqrtf(Q));
194  int qmaxval, cb;
195  qmaxval = maxval * Q34 + 0.4054f;
196  if (qmaxval == 0) cb = 0;
197  else if (qmaxval == 1) cb = 1;
198  else if (qmaxval == 2) cb = 3;
199  else if (qmaxval <= 4) cb = 5;
200  else if (qmaxval <= 7) cb = 7;
201  else if (qmaxval <= 12) cb = 9;
202  else cb = 11;
203  return cb;
204 }
205 
206 /**
207  * Functions developed from template function and optimized for quantizing and encoding band
208  */
209 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
210  PutBitContext *pb, const float *in, float *out,
211  const float *scaled, int size, int scale_idx,
212  int cb, const float lambda, const float uplim,
213  int *bits, const float ROUNDING)
214 {
215  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
216  int i;
217  int qc1, qc2, qc3, qc4;
218 
219  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
220  uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
221 
222  abs_pow34_v(s->scoefs, in, size);
223  scaled = s->scoefs;
224  for (i = 0; i < size; i += 4) {
225  int curidx;
226  int *in_int = (int *)&in[i];
227  int t0, t1, t2, t3, t4, t5, t6, t7;
228 
229  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
230  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
231  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
232  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
233 
234  __asm__ volatile (
235  ".set push \n\t"
236  ".set noreorder \n\t"
237 
238  "slt %[qc1], $zero, %[qc1] \n\t"
239  "slt %[qc2], $zero, %[qc2] \n\t"
240  "slt %[qc3], $zero, %[qc3] \n\t"
241  "slt %[qc4], $zero, %[qc4] \n\t"
242  "lw %[t0], 0(%[in_int]) \n\t"
243  "lw %[t1], 4(%[in_int]) \n\t"
244  "lw %[t2], 8(%[in_int]) \n\t"
245  "lw %[t3], 12(%[in_int]) \n\t"
246  "srl %[t0], %[t0], 31 \n\t"
247  "srl %[t1], %[t1], 31 \n\t"
248  "srl %[t2], %[t2], 31 \n\t"
249  "srl %[t3], %[t3], 31 \n\t"
250  "subu %[t4], $zero, %[qc1] \n\t"
251  "subu %[t5], $zero, %[qc2] \n\t"
252  "subu %[t6], $zero, %[qc3] \n\t"
253  "subu %[t7], $zero, %[qc4] \n\t"
254  "movn %[qc1], %[t4], %[t0] \n\t"
255  "movn %[qc2], %[t5], %[t1] \n\t"
256  "movn %[qc3], %[t6], %[t2] \n\t"
257  "movn %[qc4], %[t7], %[t3] \n\t"
258 
259  ".set pop \n\t"
260 
261  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
262  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
263  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
264  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
265  : [in_int]"r"(in_int)
266  : "memory"
267  );
268 
269  curidx = qc1;
270  curidx *= 3;
271  curidx += qc2;
272  curidx *= 3;
273  curidx += qc3;
274  curidx *= 3;
275  curidx += qc4;
276  curidx += 40;
277 
278  put_bits(pb, p_bits[curidx], p_codes[curidx]);
279  }
280 }
281 
282 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
283  PutBitContext *pb, const float *in, float *out,
284  const float *scaled, int size, int scale_idx,
285  int cb, const float lambda, const float uplim,
286  int *bits, const float ROUNDING)
287 {
288  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
289  int i;
290  int qc1, qc2, qc3, qc4;
291 
292  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
293  uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
294 
295  abs_pow34_v(s->scoefs, in, size);
296  scaled = s->scoefs;
297  for (i = 0; i < size; i += 4) {
298  int curidx, sign, count;
299  int *in_int = (int *)&in[i];
300  uint8_t v_bits;
301  unsigned int v_codes;
302  int t0, t1, t2, t3, t4;
303 
304  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
305  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
306  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
307  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
308 
309  __asm__ volatile (
310  ".set push \n\t"
311  ".set noreorder \n\t"
312 
313  "ori %[t4], $zero, 2 \n\t"
314  "ori %[sign], $zero, 0 \n\t"
315  "slt %[t0], %[t4], %[qc1] \n\t"
316  "slt %[t1], %[t4], %[qc2] \n\t"
317  "slt %[t2], %[t4], %[qc3] \n\t"
318  "slt %[t3], %[t4], %[qc4] \n\t"
319  "movn %[qc1], %[t4], %[t0] \n\t"
320  "movn %[qc2], %[t4], %[t1] \n\t"
321  "movn %[qc3], %[t4], %[t2] \n\t"
322  "movn %[qc4], %[t4], %[t3] \n\t"
323  "lw %[t0], 0(%[in_int]) \n\t"
324  "lw %[t1], 4(%[in_int]) \n\t"
325  "lw %[t2], 8(%[in_int]) \n\t"
326  "lw %[t3], 12(%[in_int]) \n\t"
327  "slt %[t0], %[t0], $zero \n\t"
328  "movn %[sign], %[t0], %[qc1] \n\t"
329  "slt %[t1], %[t1], $zero \n\t"
330  "slt %[t2], %[t2], $zero \n\t"
331  "slt %[t3], %[t3], $zero \n\t"
332  "sll %[t0], %[sign], 1 \n\t"
333  "or %[t0], %[t0], %[t1] \n\t"
334  "movn %[sign], %[t0], %[qc2] \n\t"
335  "slt %[t4], $zero, %[qc1] \n\t"
336  "slt %[t1], $zero, %[qc2] \n\t"
337  "slt %[count], $zero, %[qc3] \n\t"
338  "sll %[t0], %[sign], 1 \n\t"
339  "or %[t0], %[t0], %[t2] \n\t"
340  "movn %[sign], %[t0], %[qc3] \n\t"
341  "slt %[t2], $zero, %[qc4] \n\t"
342  "addu %[count], %[count], %[t4] \n\t"
343  "addu %[count], %[count], %[t1] \n\t"
344  "sll %[t0], %[sign], 1 \n\t"
345  "or %[t0], %[t0], %[t3] \n\t"
346  "movn %[sign], %[t0], %[qc4] \n\t"
347  "addu %[count], %[count], %[t2] \n\t"
348 
349  ".set pop \n\t"
350 
351  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
352  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
353  [sign]"=&r"(sign), [count]"=&r"(count),
354  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
355  [t4]"=&r"(t4)
356  : [in_int]"r"(in_int)
357  : "memory"
358  );
359 
360  curidx = qc1;
361  curidx *= 3;
362  curidx += qc2;
363  curidx *= 3;
364  curidx += qc3;
365  curidx *= 3;
366  curidx += qc4;
367 
368  v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
369  v_bits = p_bits[curidx] + count;
370  put_bits(pb, v_bits, v_codes);
371  }
372 }
373 
374 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
375  PutBitContext *pb, const float *in, float *out,
376  const float *scaled, int size, int scale_idx,
377  int cb, const float lambda, const float uplim,
378  int *bits, const float ROUNDING)
379 {
380  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
381  int i;
382  int qc1, qc2, qc3, qc4;
383 
384  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
385  uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
386 
387  abs_pow34_v(s->scoefs, in, size);
388  scaled = s->scoefs;
389  for (i = 0; i < size; i += 4) {
390  int curidx, curidx2;
391  int *in_int = (int *)&in[i];
392  uint8_t v_bits;
393  unsigned int v_codes;
394  int t0, t1, t2, t3, t4, t5, t6, t7;
395 
396  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
397  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
398  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
399  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
400 
401  __asm__ volatile (
402  ".set push \n\t"
403  ".set noreorder \n\t"
404 
405  "ori %[t4], $zero, 4 \n\t"
406  "slt %[t0], %[t4], %[qc1] \n\t"
407  "slt %[t1], %[t4], %[qc2] \n\t"
408  "slt %[t2], %[t4], %[qc3] \n\t"
409  "slt %[t3], %[t4], %[qc4] \n\t"
410  "movn %[qc1], %[t4], %[t0] \n\t"
411  "movn %[qc2], %[t4], %[t1] \n\t"
412  "movn %[qc3], %[t4], %[t2] \n\t"
413  "movn %[qc4], %[t4], %[t3] \n\t"
414  "lw %[t0], 0(%[in_int]) \n\t"
415  "lw %[t1], 4(%[in_int]) \n\t"
416  "lw %[t2], 8(%[in_int]) \n\t"
417  "lw %[t3], 12(%[in_int]) \n\t"
418  "srl %[t0], %[t0], 31 \n\t"
419  "srl %[t1], %[t1], 31 \n\t"
420  "srl %[t2], %[t2], 31 \n\t"
421  "srl %[t3], %[t3], 31 \n\t"
422  "subu %[t4], $zero, %[qc1] \n\t"
423  "subu %[t5], $zero, %[qc2] \n\t"
424  "subu %[t6], $zero, %[qc3] \n\t"
425  "subu %[t7], $zero, %[qc4] \n\t"
426  "movn %[qc1], %[t4], %[t0] \n\t"
427  "movn %[qc2], %[t5], %[t1] \n\t"
428  "movn %[qc3], %[t6], %[t2] \n\t"
429  "movn %[qc4], %[t7], %[t3] \n\t"
430 
431  ".set pop \n\t"
432 
433  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
434  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
435  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
436  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
437  : [in_int]"r"(in_int)
438  : "memory"
439  );
440 
441  curidx = 9 * qc1;
442  curidx += qc2 + 40;
443 
444  curidx2 = 9 * qc3;
445  curidx2 += qc4 + 40;
446 
447  v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
448  v_bits = p_bits[curidx] + p_bits[curidx2];
449  put_bits(pb, v_bits, v_codes);
450  }
451 }
452 
453 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
454  PutBitContext *pb, const float *in, float *out,
455  const float *scaled, int size, int scale_idx,
456  int cb, const float lambda, const float uplim,
457  int *bits, const float ROUNDING)
458 {
459  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
460  int i;
461  int qc1, qc2, qc3, qc4;
462 
463  uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
464  uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
465 
466  abs_pow34_v(s->scoefs, in, size);
467  scaled = s->scoefs;
468  for (i = 0; i < size; i += 4) {
469  int curidx, sign1, count1, sign2, count2;
470  int *in_int = (int *)&in[i];
471  uint8_t v_bits;
472  unsigned int v_codes;
473  int t0, t1, t2, t3, t4;
474 
475  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
476  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
477  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
478  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
479 
480  __asm__ volatile (
481  ".set push \n\t"
482  ".set noreorder \n\t"
483 
484  "ori %[t4], $zero, 7 \n\t"
485  "ori %[sign1], $zero, 0 \n\t"
486  "ori %[sign2], $zero, 0 \n\t"
487  "slt %[t0], %[t4], %[qc1] \n\t"
488  "slt %[t1], %[t4], %[qc2] \n\t"
489  "slt %[t2], %[t4], %[qc3] \n\t"
490  "slt %[t3], %[t4], %[qc4] \n\t"
491  "movn %[qc1], %[t4], %[t0] \n\t"
492  "movn %[qc2], %[t4], %[t1] \n\t"
493  "movn %[qc3], %[t4], %[t2] \n\t"
494  "movn %[qc4], %[t4], %[t3] \n\t"
495  "lw %[t0], 0(%[in_int]) \n\t"
496  "lw %[t1], 4(%[in_int]) \n\t"
497  "lw %[t2], 8(%[in_int]) \n\t"
498  "lw %[t3], 12(%[in_int]) \n\t"
499  "slt %[t0], %[t0], $zero \n\t"
500  "movn %[sign1], %[t0], %[qc1] \n\t"
501  "slt %[t2], %[t2], $zero \n\t"
502  "movn %[sign2], %[t2], %[qc3] \n\t"
503  "slt %[t1], %[t1], $zero \n\t"
504  "sll %[t0], %[sign1], 1 \n\t"
505  "or %[t0], %[t0], %[t1] \n\t"
506  "movn %[sign1], %[t0], %[qc2] \n\t"
507  "slt %[t3], %[t3], $zero \n\t"
508  "sll %[t0], %[sign2], 1 \n\t"
509  "or %[t0], %[t0], %[t3] \n\t"
510  "movn %[sign2], %[t0], %[qc4] \n\t"
511  "slt %[count1], $zero, %[qc1] \n\t"
512  "slt %[t1], $zero, %[qc2] \n\t"
513  "slt %[count2], $zero, %[qc3] \n\t"
514  "slt %[t2], $zero, %[qc4] \n\t"
515  "addu %[count1], %[count1], %[t1] \n\t"
516  "addu %[count2], %[count2], %[t2] \n\t"
517 
518  ".set pop \n\t"
519 
520  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
521  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
522  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
523  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
524  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
525  [t4]"=&r"(t4)
526  : [in_int]"r"(in_int)
527  : "t0", "t1", "t2", "t3", "t4",
528  "memory"
529  );
530 
531  curidx = 8 * qc1;
532  curidx += qc2;
533 
534  v_codes = (p_codes[curidx] << count1) | sign1;
535  v_bits = p_bits[curidx] + count1;
536  put_bits(pb, v_bits, v_codes);
537 
538  curidx = 8 * qc3;
539  curidx += qc4;
540 
541  v_codes = (p_codes[curidx] << count2) | sign2;
542  v_bits = p_bits[curidx] + count2;
543  put_bits(pb, v_bits, v_codes);
544  }
545 }
546 
547 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
548  PutBitContext *pb, const float *in, float *out,
549  const float *scaled, int size, int scale_idx,
550  int cb, const float lambda, const float uplim,
551  int *bits, const float ROUNDING)
552 {
553  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
554  int i;
555  int qc1, qc2, qc3, qc4;
556 
557  uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
558  uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
559 
560  abs_pow34_v(s->scoefs, in, size);
561  scaled = s->scoefs;
562  for (i = 0; i < size; i += 4) {
563  int curidx, sign1, count1, sign2, count2;
564  int *in_int = (int *)&in[i];
565  uint8_t v_bits;
566  unsigned int v_codes;
567  int t0, t1, t2, t3, t4;
568 
569  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
570  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
571  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
572  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
573 
574  __asm__ volatile (
575  ".set push \n\t"
576  ".set noreorder \n\t"
577 
578  "ori %[t4], $zero, 12 \n\t"
579  "ori %[sign1], $zero, 0 \n\t"
580  "ori %[sign2], $zero, 0 \n\t"
581  "slt %[t0], %[t4], %[qc1] \n\t"
582  "slt %[t1], %[t4], %[qc2] \n\t"
583  "slt %[t2], %[t4], %[qc3] \n\t"
584  "slt %[t3], %[t4], %[qc4] \n\t"
585  "movn %[qc1], %[t4], %[t0] \n\t"
586  "movn %[qc2], %[t4], %[t1] \n\t"
587  "movn %[qc3], %[t4], %[t2] \n\t"
588  "movn %[qc4], %[t4], %[t3] \n\t"
589  "lw %[t0], 0(%[in_int]) \n\t"
590  "lw %[t1], 4(%[in_int]) \n\t"
591  "lw %[t2], 8(%[in_int]) \n\t"
592  "lw %[t3], 12(%[in_int]) \n\t"
593  "slt %[t0], %[t0], $zero \n\t"
594  "movn %[sign1], %[t0], %[qc1] \n\t"
595  "slt %[t2], %[t2], $zero \n\t"
596  "movn %[sign2], %[t2], %[qc3] \n\t"
597  "slt %[t1], %[t1], $zero \n\t"
598  "sll %[t0], %[sign1], 1 \n\t"
599  "or %[t0], %[t0], %[t1] \n\t"
600  "movn %[sign1], %[t0], %[qc2] \n\t"
601  "slt %[t3], %[t3], $zero \n\t"
602  "sll %[t0], %[sign2], 1 \n\t"
603  "or %[t0], %[t0], %[t3] \n\t"
604  "movn %[sign2], %[t0], %[qc4] \n\t"
605  "slt %[count1], $zero, %[qc1] \n\t"
606  "slt %[t1], $zero, %[qc2] \n\t"
607  "slt %[count2], $zero, %[qc3] \n\t"
608  "slt %[t2], $zero, %[qc4] \n\t"
609  "addu %[count1], %[count1], %[t1] \n\t"
610  "addu %[count2], %[count2], %[t2] \n\t"
611 
612  ".set pop \n\t"
613 
614  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
615  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
616  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
617  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
618  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
619  [t4]"=&r"(t4)
620  : [in_int]"r"(in_int)
621  : "memory"
622  );
623 
624  curidx = 13 * qc1;
625  curidx += qc2;
626 
627  v_codes = (p_codes[curidx] << count1) | sign1;
628  v_bits = p_bits[curidx] + count1;
629  put_bits(pb, v_bits, v_codes);
630 
631  curidx = 13 * qc3;
632  curidx += qc4;
633 
634  v_codes = (p_codes[curidx] << count2) | sign2;
635  v_bits = p_bits[curidx] + count2;
636  put_bits(pb, v_bits, v_codes);
637  }
638 }
639 
640 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
641  PutBitContext *pb, const float *in, float *out,
642  const float *scaled, int size, int scale_idx,
643  int cb, const float lambda, const float uplim,
644  int *bits, const float ROUNDING)
645 {
646  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
647  int i;
648  int qc1, qc2, qc3, qc4;
649 
650  uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
651  uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
652  float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
653 
654  abs_pow34_v(s->scoefs, in, size);
655  scaled = s->scoefs;
656 
657  if (cb < 11) {
658  for (i = 0; i < size; i += 4) {
659  int curidx, curidx2, sign1, count1, sign2, count2;
660  int *in_int = (int *)&in[i];
661  uint8_t v_bits;
662  unsigned int v_codes;
663  int t0, t1, t2, t3, t4;
664 
665  qc1 = scaled[i ] * Q34 + ROUNDING;
666  qc2 = scaled[i+1] * Q34 + ROUNDING;
667  qc3 = scaled[i+2] * Q34 + ROUNDING;
668  qc4 = scaled[i+3] * Q34 + ROUNDING;
669 
670  __asm__ volatile (
671  ".set push \n\t"
672  ".set noreorder \n\t"
673 
674  "ori %[t4], $zero, 16 \n\t"
675  "ori %[sign1], $zero, 0 \n\t"
676  "ori %[sign2], $zero, 0 \n\t"
677  "slt %[t0], %[t4], %[qc1] \n\t"
678  "slt %[t1], %[t4], %[qc2] \n\t"
679  "slt %[t2], %[t4], %[qc3] \n\t"
680  "slt %[t3], %[t4], %[qc4] \n\t"
681  "movn %[qc1], %[t4], %[t0] \n\t"
682  "movn %[qc2], %[t4], %[t1] \n\t"
683  "movn %[qc3], %[t4], %[t2] \n\t"
684  "movn %[qc4], %[t4], %[t3] \n\t"
685  "lw %[t0], 0(%[in_int]) \n\t"
686  "lw %[t1], 4(%[in_int]) \n\t"
687  "lw %[t2], 8(%[in_int]) \n\t"
688  "lw %[t3], 12(%[in_int]) \n\t"
689  "slt %[t0], %[t0], $zero \n\t"
690  "movn %[sign1], %[t0], %[qc1] \n\t"
691  "slt %[t2], %[t2], $zero \n\t"
692  "movn %[sign2], %[t2], %[qc3] \n\t"
693  "slt %[t1], %[t1], $zero \n\t"
694  "sll %[t0], %[sign1], 1 \n\t"
695  "or %[t0], %[t0], %[t1] \n\t"
696  "movn %[sign1], %[t0], %[qc2] \n\t"
697  "slt %[t3], %[t3], $zero \n\t"
698  "sll %[t0], %[sign2], 1 \n\t"
699  "or %[t0], %[t0], %[t3] \n\t"
700  "movn %[sign2], %[t0], %[qc4] \n\t"
701  "slt %[count1], $zero, %[qc1] \n\t"
702  "slt %[t1], $zero, %[qc2] \n\t"
703  "slt %[count2], $zero, %[qc3] \n\t"
704  "slt %[t2], $zero, %[qc4] \n\t"
705  "addu %[count1], %[count1], %[t1] \n\t"
706  "addu %[count2], %[count2], %[t2] \n\t"
707 
708  ".set pop \n\t"
709 
710  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
711  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
712  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
713  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
714  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
715  [t4]"=&r"(t4)
716  : [in_int]"r"(in_int)
717  : "memory"
718  );
719 
720  curidx = 17 * qc1;
721  curidx += qc2;
722  curidx2 = 17 * qc3;
723  curidx2 += qc4;
724 
725  v_codes = (p_codes[curidx] << count1) | sign1;
726  v_bits = p_bits[curidx] + count1;
727  put_bits(pb, v_bits, v_codes);
728 
729  v_codes = (p_codes[curidx2] << count2) | sign2;
730  v_bits = p_bits[curidx2] + count2;
731  put_bits(pb, v_bits, v_codes);
732  }
733  } else {
734  for (i = 0; i < size; i += 4) {
735  int curidx, curidx2, sign1, count1, sign2, count2;
736  int *in_int = (int *)&in[i];
737  uint8_t v_bits;
738  unsigned int v_codes;
739  int c1, c2, c3, c4;
740  int t0, t1, t2, t3, t4;
741 
742  qc1 = scaled[i ] * Q34 + ROUNDING;
743  qc2 = scaled[i+1] * Q34 + ROUNDING;
744  qc3 = scaled[i+2] * Q34 + ROUNDING;
745  qc4 = scaled[i+3] * Q34 + ROUNDING;
746 
747  __asm__ volatile (
748  ".set push \n\t"
749  ".set noreorder \n\t"
750 
751  "ori %[t4], $zero, 16 \n\t"
752  "ori %[sign1], $zero, 0 \n\t"
753  "ori %[sign2], $zero, 0 \n\t"
754  "shll_s.w %[c1], %[qc1], 18 \n\t"
755  "shll_s.w %[c2], %[qc2], 18 \n\t"
756  "shll_s.w %[c3], %[qc3], 18 \n\t"
757  "shll_s.w %[c4], %[qc4], 18 \n\t"
758  "srl %[c1], %[c1], 18 \n\t"
759  "srl %[c2], %[c2], 18 \n\t"
760  "srl %[c3], %[c3], 18 \n\t"
761  "srl %[c4], %[c4], 18 \n\t"
762  "slt %[t0], %[t4], %[qc1] \n\t"
763  "slt %[t1], %[t4], %[qc2] \n\t"
764  "slt %[t2], %[t4], %[qc3] \n\t"
765  "slt %[t3], %[t4], %[qc4] \n\t"
766  "movn %[qc1], %[t4], %[t0] \n\t"
767  "movn %[qc2], %[t4], %[t1] \n\t"
768  "movn %[qc3], %[t4], %[t2] \n\t"
769  "movn %[qc4], %[t4], %[t3] \n\t"
770  "lw %[t0], 0(%[in_int]) \n\t"
771  "lw %[t1], 4(%[in_int]) \n\t"
772  "lw %[t2], 8(%[in_int]) \n\t"
773  "lw %[t3], 12(%[in_int]) \n\t"
774  "slt %[t0], %[t0], $zero \n\t"
775  "movn %[sign1], %[t0], %[qc1] \n\t"
776  "slt %[t2], %[t2], $zero \n\t"
777  "movn %[sign2], %[t2], %[qc3] \n\t"
778  "slt %[t1], %[t1], $zero \n\t"
779  "sll %[t0], %[sign1], 1 \n\t"
780  "or %[t0], %[t0], %[t1] \n\t"
781  "movn %[sign1], %[t0], %[qc2] \n\t"
782  "slt %[t3], %[t3], $zero \n\t"
783  "sll %[t0], %[sign2], 1 \n\t"
784  "or %[t0], %[t0], %[t3] \n\t"
785  "movn %[sign2], %[t0], %[qc4] \n\t"
786  "slt %[count1], $zero, %[qc1] \n\t"
787  "slt %[t1], $zero, %[qc2] \n\t"
788  "slt %[count2], $zero, %[qc3] \n\t"
789  "slt %[t2], $zero, %[qc4] \n\t"
790  "addu %[count1], %[count1], %[t1] \n\t"
791  "addu %[count2], %[count2], %[t2] \n\t"
792 
793  ".set pop \n\t"
794 
795  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
796  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
797  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
798  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
799  [c1]"=&r"(c1), [c2]"=&r"(c2),
800  [c3]"=&r"(c3), [c4]"=&r"(c4),
801  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
802  [t4]"=&r"(t4)
803  : [in_int]"r"(in_int)
804  : "memory"
805  );
806 
807  curidx = 17 * qc1;
808  curidx += qc2;
809 
810  curidx2 = 17 * qc3;
811  curidx2 += qc4;
812 
813  v_codes = (p_codes[curidx] << count1) | sign1;
814  v_bits = p_bits[curidx] + count1;
815  put_bits(pb, v_bits, v_codes);
816 
817  if (p_vectors[curidx*2 ] == 64.0f) {
818  int len = av_log2(c1);
819  v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
820  put_bits(pb, len * 2 - 3, v_codes);
821  }
822  if (p_vectors[curidx*2+1] == 64.0f) {
823  int len = av_log2(c2);
824  v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
825  put_bits(pb, len*2-3, v_codes);
826  }
827 
828  v_codes = (p_codes[curidx2] << count2) | sign2;
829  v_bits = p_bits[curidx2] + count2;
830  put_bits(pb, v_bits, v_codes);
831 
832  if (p_vectors[curidx2*2 ] == 64.0f) {
833  int len = av_log2(c3);
834  v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
835  put_bits(pb, len* 2 - 3, v_codes);
836  }
837  if (p_vectors[curidx2*2+1] == 64.0f) {
838  int len = av_log2(c4);
839  v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
840  put_bits(pb, len * 2 - 3, v_codes);
841  }
842  }
843  }
844 }
845 
846 static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
847  PutBitContext *pb, const float *in, float *out,
848  const float *scaled, int size, int scale_idx,
849  int cb, const float lambda, const float uplim,
850  int *bits, const float ROUNDING) {
851  av_assert0(0);
852 }
853 
854 static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
855  PutBitContext *pb, const float *in, float *out,
856  const float *scaled, int size, int scale_idx,
857  int cb, const float lambda, const float uplim,
858  int *bits, const float ROUNDING) {
859  int i;
860  if (bits)
861  *bits = 0;
862  if (out) {
863  for (i = 0; i < size; i += 4) {
864  out[i ] = 0.0f;
865  out[i+1] = 0.0f;
866  out[i+2] = 0.0f;
867  out[i+3] = 0.0f;
868  }
869  }
870 }
871 
872 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
873  PutBitContext *pb, const float *in, float *out,
874  const float *scaled, int size, int scale_idx,
875  int cb, const float lambda, const float uplim,
876  int *bits, const float ROUNDING) = {
877  quantize_and_encode_band_cost_ZERO_mips,
878  quantize_and_encode_band_cost_SQUAD_mips,
879  quantize_and_encode_band_cost_SQUAD_mips,
880  quantize_and_encode_band_cost_UQUAD_mips,
881  quantize_and_encode_band_cost_UQUAD_mips,
882  quantize_and_encode_band_cost_SPAIR_mips,
883  quantize_and_encode_band_cost_SPAIR_mips,
884  quantize_and_encode_band_cost_UPAIR7_mips,
885  quantize_and_encode_band_cost_UPAIR7_mips,
886  quantize_and_encode_band_cost_UPAIR12_mips,
887  quantize_and_encode_band_cost_UPAIR12_mips,
888  quantize_and_encode_band_cost_ESC_mips,
889  quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
890  quantize_and_encode_band_cost_ZERO_mips,
891  quantize_and_encode_band_cost_ZERO_mips,
892  quantize_and_encode_band_cost_ZERO_mips,
893 };
894 
895 #define quantize_and_encode_band_cost( \
896  s, pb, in, out, scaled, size, scale_idx, cb, \
897  lambda, uplim, bits, ROUNDING) \
898  quantize_and_encode_band_cost_arr[cb]( \
899  s, pb, in, out, scaled, size, scale_idx, cb, \
900  lambda, uplim, bits, ROUNDING)
901 
902 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
903  const float *in, float *out, int size, int scale_idx,
904  int cb, const float lambda, int rtz)
905 {
906  quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
907  INFINITY, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
908 }
909 
910 /**
911  * Functions developed from template function and optimized for getting the number of bits
912  */
913 static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
914  PutBitContext *pb, const float *in,
915  const float *scaled, int size, int scale_idx,
916  int cb, const float lambda, const float uplim,
917  int *bits)
918 {
919  return 0;
920 }
921 
922 static float get_band_numbits_NONE_mips(struct AACEncContext *s,
923  PutBitContext *pb, const float *in,
924  const float *scaled, int size, int scale_idx,
925  int cb, const float lambda, const float uplim,
926  int *bits)
927 {
928  av_assert0(0);
929  return 0;
930 }
931 
932 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
933  PutBitContext *pb, const float *in,
934  const float *scaled, int size, int scale_idx,
935  int cb, const float lambda, const float uplim,
936  int *bits)
937 {
938  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
939  int i;
940  int qc1, qc2, qc3, qc4;
941  int curbits = 0;
942 
943  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
944 
945  for (i = 0; i < size; i += 4) {
946  int curidx;
947  int *in_int = (int *)&in[i];
948  int t0, t1, t2, t3, t4, t5, t6, t7;
949 
950  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
951  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
952  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
953  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
954 
955  __asm__ volatile (
956  ".set push \n\t"
957  ".set noreorder \n\t"
958 
959  "slt %[qc1], $zero, %[qc1] \n\t"
960  "slt %[qc2], $zero, %[qc2] \n\t"
961  "slt %[qc3], $zero, %[qc3] \n\t"
962  "slt %[qc4], $zero, %[qc4] \n\t"
963  "lw %[t0], 0(%[in_int]) \n\t"
964  "lw %[t1], 4(%[in_int]) \n\t"
965  "lw %[t2], 8(%[in_int]) \n\t"
966  "lw %[t3], 12(%[in_int]) \n\t"
967  "srl %[t0], %[t0], 31 \n\t"
968  "srl %[t1], %[t1], 31 \n\t"
969  "srl %[t2], %[t2], 31 \n\t"
970  "srl %[t3], %[t3], 31 \n\t"
971  "subu %[t4], $zero, %[qc1] \n\t"
972  "subu %[t5], $zero, %[qc2] \n\t"
973  "subu %[t6], $zero, %[qc3] \n\t"
974  "subu %[t7], $zero, %[qc4] \n\t"
975  "movn %[qc1], %[t4], %[t0] \n\t"
976  "movn %[qc2], %[t5], %[t1] \n\t"
977  "movn %[qc3], %[t6], %[t2] \n\t"
978  "movn %[qc4], %[t7], %[t3] \n\t"
979 
980  ".set pop \n\t"
981 
982  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
983  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
984  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
985  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
986  : [in_int]"r"(in_int)
987  : "memory"
988  );
989 
990  curidx = qc1;
991  curidx *= 3;
992  curidx += qc2;
993  curidx *= 3;
994  curidx += qc3;
995  curidx *= 3;
996  curidx += qc4;
997  curidx += 40;
998 
999  curbits += p_bits[curidx];
1000  }
1001  return curbits;
1002 }
1003 
1004 static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
1005  PutBitContext *pb, const float *in,
1006  const float *scaled, int size, int scale_idx,
1007  int cb, const float lambda, const float uplim,
1008  int *bits)
1009 {
1010  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1011  int i;
1012  int curbits = 0;
1013  int qc1, qc2, qc3, qc4;
1014 
1015  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1016 
1017  for (i = 0; i < size; i += 4) {
1018  int curidx;
1019  int t0, t1, t2, t3, t4;
1020 
1021  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1022  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1023  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1024  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1025 
1026  __asm__ volatile (
1027  ".set push \n\t"
1028  ".set noreorder \n\t"
1029 
1030  "ori %[t4], $zero, 2 \n\t"
1031  "slt %[t0], %[t4], %[qc1] \n\t"
1032  "slt %[t1], %[t4], %[qc2] \n\t"
1033  "slt %[t2], %[t4], %[qc3] \n\t"
1034  "slt %[t3], %[t4], %[qc4] \n\t"
1035  "movn %[qc1], %[t4], %[t0] \n\t"
1036  "movn %[qc2], %[t4], %[t1] \n\t"
1037  "movn %[qc3], %[t4], %[t2] \n\t"
1038  "movn %[qc4], %[t4], %[t3] \n\t"
1039 
1040  ".set pop \n\t"
1041 
1042  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1043  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1044  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1045  [t4]"=&r"(t4)
1046  );
1047 
1048  curidx = qc1;
1049  curidx *= 3;
1050  curidx += qc2;
1051  curidx *= 3;
1052  curidx += qc3;
1053  curidx *= 3;
1054  curidx += qc4;
1055 
1056  curbits += p_bits[curidx];
1057  curbits += uquad_sign_bits[curidx];
1058  }
1059  return curbits;
1060 }
1061 
1062 static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
1063  PutBitContext *pb, const float *in,
1064  const float *scaled, int size, int scale_idx,
1065  int cb, const float lambda, const float uplim,
1066  int *bits)
1067 {
1068  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1069  int i;
1070  int qc1, qc2, qc3, qc4;
1071  int curbits = 0;
1072 
1073  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1074 
1075  for (i = 0; i < size; i += 4) {
1076  int curidx, curidx2;
1077  int *in_int = (int *)&in[i];
1078  int t0, t1, t2, t3, t4, t5, t6, t7;
1079 
1080  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1081  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1082  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1083  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1084 
1085  __asm__ volatile (
1086  ".set push \n\t"
1087  ".set noreorder \n\t"
1088 
1089  "ori %[t4], $zero, 4 \n\t"
1090  "slt %[t0], %[t4], %[qc1] \n\t"
1091  "slt %[t1], %[t4], %[qc2] \n\t"
1092  "slt %[t2], %[t4], %[qc3] \n\t"
1093  "slt %[t3], %[t4], %[qc4] \n\t"
1094  "movn %[qc1], %[t4], %[t0] \n\t"
1095  "movn %[qc2], %[t4], %[t1] \n\t"
1096  "movn %[qc3], %[t4], %[t2] \n\t"
1097  "movn %[qc4], %[t4], %[t3] \n\t"
1098  "lw %[t0], 0(%[in_int]) \n\t"
1099  "lw %[t1], 4(%[in_int]) \n\t"
1100  "lw %[t2], 8(%[in_int]) \n\t"
1101  "lw %[t3], 12(%[in_int]) \n\t"
1102  "srl %[t0], %[t0], 31 \n\t"
1103  "srl %[t1], %[t1], 31 \n\t"
1104  "srl %[t2], %[t2], 31 \n\t"
1105  "srl %[t3], %[t3], 31 \n\t"
1106  "subu %[t4], $zero, %[qc1] \n\t"
1107  "subu %[t5], $zero, %[qc2] \n\t"
1108  "subu %[t6], $zero, %[qc3] \n\t"
1109  "subu %[t7], $zero, %[qc4] \n\t"
1110  "movn %[qc1], %[t4], %[t0] \n\t"
1111  "movn %[qc2], %[t5], %[t1] \n\t"
1112  "movn %[qc3], %[t6], %[t2] \n\t"
1113  "movn %[qc4], %[t7], %[t3] \n\t"
1114 
1115  ".set pop \n\t"
1116 
1117  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1118  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1119  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1120  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1121  : [in_int]"r"(in_int)
1122  : "memory"
1123  );
1124 
1125  curidx = 9 * qc1;
1126  curidx += qc2 + 40;
1127 
1128  curidx2 = 9 * qc3;
1129  curidx2 += qc4 + 40;
1130 
1131  curbits += p_bits[curidx] + p_bits[curidx2];
1132  }
1133  return curbits;
1134 }
1135 
1136 static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
1137  PutBitContext *pb, const float *in,
1138  const float *scaled, int size, int scale_idx,
1139  int cb, const float lambda, const float uplim,
1140  int *bits)
1141 {
1142  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1143  int i;
1144  int qc1, qc2, qc3, qc4;
1145  int curbits = 0;
1146 
1147  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1148 
1149  for (i = 0; i < size; i += 4) {
1150  int curidx, curidx2;
1151  int t0, t1, t2, t3, t4;
1152 
1153  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1154  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1155  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1156  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1157 
1158  __asm__ volatile (
1159  ".set push \n\t"
1160  ".set noreorder \n\t"
1161 
1162  "ori %[t4], $zero, 7 \n\t"
1163  "slt %[t0], %[t4], %[qc1] \n\t"
1164  "slt %[t1], %[t4], %[qc2] \n\t"
1165  "slt %[t2], %[t4], %[qc3] \n\t"
1166  "slt %[t3], %[t4], %[qc4] \n\t"
1167  "movn %[qc1], %[t4], %[t0] \n\t"
1168  "movn %[qc2], %[t4], %[t1] \n\t"
1169  "movn %[qc3], %[t4], %[t2] \n\t"
1170  "movn %[qc4], %[t4], %[t3] \n\t"
1171 
1172  ".set pop \n\t"
1173 
1174  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1175  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1176  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1177  [t4]"=&r"(t4)
1178  );
1179 
1180  curidx = 8 * qc1;
1181  curidx += qc2;
1182 
1183  curidx2 = 8 * qc3;
1184  curidx2 += qc4;
1185 
1186  curbits += p_bits[curidx] +
1187  upair7_sign_bits[curidx] +
1188  p_bits[curidx2] +
1189  upair7_sign_bits[curidx2];
1190  }
1191  return curbits;
1192 }
1193 
1194 static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
1195  PutBitContext *pb, const float *in,
1196  const float *scaled, int size, int scale_idx,
1197  int cb, const float lambda, const float uplim,
1198  int *bits)
1199 {
1200  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1201  int i;
1202  int qc1, qc2, qc3, qc4;
1203  int curbits = 0;
1204 
1205  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1206 
1207  for (i = 0; i < size; i += 4) {
1208  int curidx, curidx2;
1209  int t0, t1, t2, t3, t4;
1210 
1211  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1212  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1213  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1214  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1215 
1216  __asm__ volatile (
1217  ".set push \n\t"
1218  ".set noreorder \n\t"
1219 
1220  "ori %[t4], $zero, 12 \n\t"
1221  "slt %[t0], %[t4], %[qc1] \n\t"
1222  "slt %[t1], %[t4], %[qc2] \n\t"
1223  "slt %[t2], %[t4], %[qc3] \n\t"
1224  "slt %[t3], %[t4], %[qc4] \n\t"
1225  "movn %[qc1], %[t4], %[t0] \n\t"
1226  "movn %[qc2], %[t4], %[t1] \n\t"
1227  "movn %[qc3], %[t4], %[t2] \n\t"
1228  "movn %[qc4], %[t4], %[t3] \n\t"
1229 
1230  ".set pop \n\t"
1231 
1232  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1233  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1234  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1235  [t4]"=&r"(t4)
1236  );
1237 
1238  curidx = 13 * qc1;
1239  curidx += qc2;
1240 
1241  curidx2 = 13 * qc3;
1242  curidx2 += qc4;
1243 
1244  curbits += p_bits[curidx] +
1245  p_bits[curidx2] +
1246  upair12_sign_bits[curidx] +
1247  upair12_sign_bits[curidx2];
1248  }
1249  return curbits;
1250 }
1251 
1252 static float get_band_numbits_ESC_mips(struct AACEncContext *s,
1253  PutBitContext *pb, const float *in,
1254  const float *scaled, int size, int scale_idx,
1255  int cb, const float lambda, const float uplim,
1256  int *bits)
1257 {
1258  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1259  int i;
1260  int qc1, qc2, qc3, qc4;
1261  int curbits = 0;
1262 
1263  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1264 
1265  for (i = 0; i < size; i += 4) {
1266  int curidx, curidx2;
1267  int cond0, cond1, cond2, cond3;
1268  int c1, c2, c3, c4;
1269  int t4, t5;
1270 
1271  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1272  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1273  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1274  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1275 
1276  __asm__ volatile (
1277  ".set push \n\t"
1278  ".set noreorder \n\t"
1279 
1280  "ori %[t4], $zero, 15 \n\t"
1281  "ori %[t5], $zero, 16 \n\t"
1282  "shll_s.w %[c1], %[qc1], 18 \n\t"
1283  "shll_s.w %[c2], %[qc2], 18 \n\t"
1284  "shll_s.w %[c3], %[qc3], 18 \n\t"
1285  "shll_s.w %[c4], %[qc4], 18 \n\t"
1286  "srl %[c1], %[c1], 18 \n\t"
1287  "srl %[c2], %[c2], 18 \n\t"
1288  "srl %[c3], %[c3], 18 \n\t"
1289  "srl %[c4], %[c4], 18 \n\t"
1290  "slt %[cond0], %[t4], %[qc1] \n\t"
1291  "slt %[cond1], %[t4], %[qc2] \n\t"
1292  "slt %[cond2], %[t4], %[qc3] \n\t"
1293  "slt %[cond3], %[t4], %[qc4] \n\t"
1294  "movn %[qc1], %[t5], %[cond0] \n\t"
1295  "movn %[qc2], %[t5], %[cond1] \n\t"
1296  "movn %[qc3], %[t5], %[cond2] \n\t"
1297  "movn %[qc4], %[t5], %[cond3] \n\t"
1298  "ori %[t5], $zero, 31 \n\t"
1299  "clz %[c1], %[c1] \n\t"
1300  "clz %[c2], %[c2] \n\t"
1301  "clz %[c3], %[c3] \n\t"
1302  "clz %[c4], %[c4] \n\t"
1303  "subu %[c1], %[t5], %[c1] \n\t"
1304  "subu %[c2], %[t5], %[c2] \n\t"
1305  "subu %[c3], %[t5], %[c3] \n\t"
1306  "subu %[c4], %[t5], %[c4] \n\t"
1307  "sll %[c1], %[c1], 1 \n\t"
1308  "sll %[c2], %[c2], 1 \n\t"
1309  "sll %[c3], %[c3], 1 \n\t"
1310  "sll %[c4], %[c4], 1 \n\t"
1311  "addiu %[c1], %[c1], -3 \n\t"
1312  "addiu %[c2], %[c2], -3 \n\t"
1313  "addiu %[c3], %[c3], -3 \n\t"
1314  "addiu %[c4], %[c4], -3 \n\t"
1315  "subu %[cond0], $zero, %[cond0] \n\t"
1316  "subu %[cond1], $zero, %[cond1] \n\t"
1317  "subu %[cond2], $zero, %[cond2] \n\t"
1318  "subu %[cond3], $zero, %[cond3] \n\t"
1319  "and %[c1], %[c1], %[cond0] \n\t"
1320  "and %[c2], %[c2], %[cond1] \n\t"
1321  "and %[c3], %[c3], %[cond2] \n\t"
1322  "and %[c4], %[c4], %[cond3] \n\t"
1323 
1324  ".set pop \n\t"
1325 
1326  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1327  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1328  [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
1329  [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
1330  [c1]"=&r"(c1), [c2]"=&r"(c2),
1331  [c3]"=&r"(c3), [c4]"=&r"(c4),
1332  [t4]"=&r"(t4), [t5]"=&r"(t5)
1333  );
1334 
1335  curidx = 17 * qc1;
1336  curidx += qc2;
1337 
1338  curidx2 = 17 * qc3;
1339  curidx2 += qc4;
1340 
1341  curbits += p_bits[curidx];
1342  curbits += esc_sign_bits[curidx];
1343  curbits += p_bits[curidx2];
1344  curbits += esc_sign_bits[curidx2];
1345 
1346  curbits += c1;
1347  curbits += c2;
1348  curbits += c3;
1349  curbits += c4;
1350  }
1351  return curbits;
1352 }
1353 
1354 static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
1355  PutBitContext *pb, const float *in,
1356  const float *scaled, int size, int scale_idx,
1357  int cb, const float lambda, const float uplim,
1358  int *bits) = {
1359  get_band_numbits_ZERO_mips,
1360  get_band_numbits_SQUAD_mips,
1361  get_band_numbits_SQUAD_mips,
1362  get_band_numbits_UQUAD_mips,
1363  get_band_numbits_UQUAD_mips,
1364  get_band_numbits_SPAIR_mips,
1365  get_band_numbits_SPAIR_mips,
1366  get_band_numbits_UPAIR7_mips,
1367  get_band_numbits_UPAIR7_mips,
1368  get_band_numbits_UPAIR12_mips,
1369  get_band_numbits_UPAIR12_mips,
1370  get_band_numbits_ESC_mips,
1371  get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
1372  get_band_numbits_ZERO_mips,
1373  get_band_numbits_ZERO_mips,
1374  get_band_numbits_ZERO_mips,
1375 };
1376 
1377 #define get_band_numbits( \
1378  s, pb, in, scaled, size, scale_idx, cb, \
1379  lambda, uplim, bits) \
1380  get_band_numbits_arr[cb]( \
1381  s, pb, in, scaled, size, scale_idx, cb, \
1382  lambda, uplim, bits)
1383 
1384 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
1385  const float *scaled, int size, int scale_idx,
1386  int cb, const float lambda, const float uplim,
1387  int *bits)
1388 {
1389  return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1390 }
1391 
1392 /**
1393  * Functions developed from template function and optimized for getting the band cost
1394  */
1395 #if HAVE_MIPSFPU
1396 static float get_band_cost_ZERO_mips(struct AACEncContext *s,
1397  PutBitContext *pb, const float *in,
1398  const float *scaled, int size, int scale_idx,
1399  int cb, const float lambda, const float uplim,
1400  int *bits)
1401 {
1402  int i;
1403  float cost = 0;
1404 
1405  for (i = 0; i < size; i += 4) {
1406  cost += in[i ] * in[i ];
1407  cost += in[i+1] * in[i+1];
1408  cost += in[i+2] * in[i+2];
1409  cost += in[i+3] * in[i+3];
1410  }
1411  if (bits)
1412  *bits = 0;
1413  return cost * lambda;
1414 }
1415 
1416 static float get_band_cost_NONE_mips(struct AACEncContext *s,
1417  PutBitContext *pb, const float *in,
1418  const float *scaled, int size, int scale_idx,
1419  int cb, const float lambda, const float uplim,
1420  int *bits)
1421 {
1422  av_assert0(0);
1423  return 0;
1424 }
1425 
1426 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
1427  PutBitContext *pb, const float *in,
1428  const float *scaled, int size, int scale_idx,
1429  int cb, const float lambda, const float uplim,
1430  int *bits)
1431 {
1432  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1433  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1434  int i;
1435  float cost = 0;
1436  int qc1, qc2, qc3, qc4;
1437  int curbits = 0;
1438 
1439  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1440  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1441 
1442  for (i = 0; i < size; i += 4) {
1443  const float *vec;
1444  int curidx;
1445  int *in_int = (int *)&in[i];
1446  float *in_pos = (float *)&in[i];
1447  float di0, di1, di2, di3;
1448  int t0, t1, t2, t3, t4, t5, t6, t7;
1449 
1450  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1451  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1452  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1453  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1454 
1455  __asm__ volatile (
1456  ".set push \n\t"
1457  ".set noreorder \n\t"
1458 
1459  "slt %[qc1], $zero, %[qc1] \n\t"
1460  "slt %[qc2], $zero, %[qc2] \n\t"
1461  "slt %[qc3], $zero, %[qc3] \n\t"
1462  "slt %[qc4], $zero, %[qc4] \n\t"
1463  "lw %[t0], 0(%[in_int]) \n\t"
1464  "lw %[t1], 4(%[in_int]) \n\t"
1465  "lw %[t2], 8(%[in_int]) \n\t"
1466  "lw %[t3], 12(%[in_int]) \n\t"
1467  "srl %[t0], %[t0], 31 \n\t"
1468  "srl %[t1], %[t1], 31 \n\t"
1469  "srl %[t2], %[t2], 31 \n\t"
1470  "srl %[t3], %[t3], 31 \n\t"
1471  "subu %[t4], $zero, %[qc1] \n\t"
1472  "subu %[t5], $zero, %[qc2] \n\t"
1473  "subu %[t6], $zero, %[qc3] \n\t"
1474  "subu %[t7], $zero, %[qc4] \n\t"
1475  "movn %[qc1], %[t4], %[t0] \n\t"
1476  "movn %[qc2], %[t5], %[t1] \n\t"
1477  "movn %[qc3], %[t6], %[t2] \n\t"
1478  "movn %[qc4], %[t7], %[t3] \n\t"
1479 
1480  ".set pop \n\t"
1481 
1482  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1483  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1484  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1485  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1486  : [in_int]"r"(in_int)
1487  : "memory"
1488  );
1489 
1490  curidx = qc1;
1491  curidx *= 3;
1492  curidx += qc2;
1493  curidx *= 3;
1494  curidx += qc3;
1495  curidx *= 3;
1496  curidx += qc4;
1497  curidx += 40;
1498 
1499  curbits += p_bits[curidx];
1500  vec = &p_codes[curidx*4];
1501 
1502  __asm__ volatile (
1503  ".set push \n\t"
1504  ".set noreorder \n\t"
1505 
1506  "lwc1 $f0, 0(%[in_pos]) \n\t"
1507  "lwc1 $f1, 0(%[vec]) \n\t"
1508  "lwc1 $f2, 4(%[in_pos]) \n\t"
1509  "lwc1 $f3, 4(%[vec]) \n\t"
1510  "lwc1 $f4, 8(%[in_pos]) \n\t"
1511  "lwc1 $f5, 8(%[vec]) \n\t"
1512  "lwc1 $f6, 12(%[in_pos]) \n\t"
1513  "lwc1 $f7, 12(%[vec]) \n\t"
1514  "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1515  "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1516  "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1517  "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1518 
1519  ".set pop \n\t"
1520 
1521  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1522  [di2]"=&f"(di2), [di3]"=&f"(di3)
1523  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1524  [IQ]"f"(IQ)
1525  : "$f0", "$f1", "$f2", "$f3",
1526  "$f4", "$f5", "$f6", "$f7",
1527  "memory"
1528  );
1529 
1530  cost += di0 * di0 + di1 * di1
1531  + di2 * di2 + di3 * di3;
1532  }
1533 
1534  if (bits)
1535  *bits = curbits;
1536  return cost * lambda + curbits;
1537 }
1538 
1539 static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
1540  PutBitContext *pb, const float *in,
1541  const float *scaled, int size, int scale_idx,
1542  int cb, const float lambda, const float uplim,
1543  int *bits)
1544 {
1545  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1546  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1547  int i;
1548  float cost = 0;
1549  int curbits = 0;
1550  int qc1, qc2, qc3, qc4;
1551 
1552  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1553  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1554 
1555  for (i = 0; i < size; i += 4) {
1556  const float *vec;
1557  int curidx;
1558  float *in_pos = (float *)&in[i];
1559  float di0, di1, di2, di3;
1560  int t0, t1, t2, t3, t4;
1561 
1562  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1563  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1564  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1565  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1566 
1567  __asm__ volatile (
1568  ".set push \n\t"
1569  ".set noreorder \n\t"
1570 
1571  "ori %[t4], $zero, 2 \n\t"
1572  "slt %[t0], %[t4], %[qc1] \n\t"
1573  "slt %[t1], %[t4], %[qc2] \n\t"
1574  "slt %[t2], %[t4], %[qc3] \n\t"
1575  "slt %[t3], %[t4], %[qc4] \n\t"
1576  "movn %[qc1], %[t4], %[t0] \n\t"
1577  "movn %[qc2], %[t4], %[t1] \n\t"
1578  "movn %[qc3], %[t4], %[t2] \n\t"
1579  "movn %[qc4], %[t4], %[t3] \n\t"
1580 
1581  ".set pop \n\t"
1582 
1583  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1584  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1585  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1586  [t4]"=&r"(t4)
1587  );
1588 
1589  curidx = qc1;
1590  curidx *= 3;
1591  curidx += qc2;
1592  curidx *= 3;
1593  curidx += qc3;
1594  curidx *= 3;
1595  curidx += qc4;
1596 
1597  curbits += p_bits[curidx];
1598  curbits += uquad_sign_bits[curidx];
1599  vec = &p_codes[curidx*4];
1600 
1601  __asm__ volatile (
1602  ".set push \n\t"
1603  ".set noreorder \n\t"
1604 
1605  "lwc1 %[di0], 0(%[in_pos]) \n\t"
1606  "lwc1 %[di1], 4(%[in_pos]) \n\t"
1607  "lwc1 %[di2], 8(%[in_pos]) \n\t"
1608  "lwc1 %[di3], 12(%[in_pos]) \n\t"
1609  "abs.s %[di0], %[di0] \n\t"
1610  "abs.s %[di1], %[di1] \n\t"
1611  "abs.s %[di2], %[di2] \n\t"
1612  "abs.s %[di3], %[di3] \n\t"
1613  "lwc1 $f0, 0(%[vec]) \n\t"
1614  "lwc1 $f1, 4(%[vec]) \n\t"
1615  "lwc1 $f2, 8(%[vec]) \n\t"
1616  "lwc1 $f3, 12(%[vec]) \n\t"
1617  "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1618  "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1619  "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1620  "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1621 
1622  ".set pop \n\t"
1623 
1624  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1625  [di2]"=&f"(di2), [di3]"=&f"(di3)
1626  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1627  [IQ]"f"(IQ)
1628  : "$f0", "$f1", "$f2", "$f3",
1629  "memory"
1630  );
1631 
1632  cost += di0 * di0 + di1 * di1
1633  + di2 * di2 + di3 * di3;
1634  }
1635 
1636  if (bits)
1637  *bits = curbits;
1638  return cost * lambda + curbits;
1639 }
1640 
1641 static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
1642  PutBitContext *pb, const float *in,
1643  const float *scaled, int size, int scale_idx,
1644  int cb, const float lambda, const float uplim,
1645  int *bits)
1646 {
1647  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1648  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1649  int i;
1650  float cost = 0;
1651  int qc1, qc2, qc3, qc4;
1652  int curbits = 0;
1653 
1654  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1655  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1656 
1657  for (i = 0; i < size; i += 4) {
1658  const float *vec, *vec2;
1659  int curidx, curidx2;
1660  int *in_int = (int *)&in[i];
1661  float *in_pos = (float *)&in[i];
1662  float di0, di1, di2, di3;
1663  int t0, t1, t2, t3, t4, t5, t6, t7;
1664 
1665  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1666  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1667  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1668  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1669 
1670  __asm__ volatile (
1671  ".set push \n\t"
1672  ".set noreorder \n\t"
1673 
1674  "ori %[t4], $zero, 4 \n\t"
1675  "slt %[t0], %[t4], %[qc1] \n\t"
1676  "slt %[t1], %[t4], %[qc2] \n\t"
1677  "slt %[t2], %[t4], %[qc3] \n\t"
1678  "slt %[t3], %[t4], %[qc4] \n\t"
1679  "movn %[qc1], %[t4], %[t0] \n\t"
1680  "movn %[qc2], %[t4], %[t1] \n\t"
1681  "movn %[qc3], %[t4], %[t2] \n\t"
1682  "movn %[qc4], %[t4], %[t3] \n\t"
1683  "lw %[t0], 0(%[in_int]) \n\t"
1684  "lw %[t1], 4(%[in_int]) \n\t"
1685  "lw %[t2], 8(%[in_int]) \n\t"
1686  "lw %[t3], 12(%[in_int]) \n\t"
1687  "srl %[t0], %[t0], 31 \n\t"
1688  "srl %[t1], %[t1], 31 \n\t"
1689  "srl %[t2], %[t2], 31 \n\t"
1690  "srl %[t3], %[t3], 31 \n\t"
1691  "subu %[t4], $zero, %[qc1] \n\t"
1692  "subu %[t5], $zero, %[qc2] \n\t"
1693  "subu %[t6], $zero, %[qc3] \n\t"
1694  "subu %[t7], $zero, %[qc4] \n\t"
1695  "movn %[qc1], %[t4], %[t0] \n\t"
1696  "movn %[qc2], %[t5], %[t1] \n\t"
1697  "movn %[qc3], %[t6], %[t2] \n\t"
1698  "movn %[qc4], %[t7], %[t3] \n\t"
1699 
1700  ".set pop \n\t"
1701 
1702  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1703  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1704  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1705  [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1706  : [in_int]"r"(in_int)
1707  : "memory"
1708  );
1709 
1710  curidx = 9 * qc1;
1711  curidx += qc2 + 40;
1712 
1713  curidx2 = 9 * qc3;
1714  curidx2 += qc4 + 40;
1715 
1716  curbits += p_bits[curidx];
1717  curbits += p_bits[curidx2];
1718 
1719  vec = &p_codes[curidx*2];
1720  vec2 = &p_codes[curidx2*2];
1721 
1722  __asm__ volatile (
1723  ".set push \n\t"
1724  ".set noreorder \n\t"
1725 
1726  "lwc1 $f0, 0(%[in_pos]) \n\t"
1727  "lwc1 $f1, 0(%[vec]) \n\t"
1728  "lwc1 $f2, 4(%[in_pos]) \n\t"
1729  "lwc1 $f3, 4(%[vec]) \n\t"
1730  "lwc1 $f4, 8(%[in_pos]) \n\t"
1731  "lwc1 $f5, 0(%[vec2]) \n\t"
1732  "lwc1 $f6, 12(%[in_pos]) \n\t"
1733  "lwc1 $f7, 4(%[vec2]) \n\t"
1734  "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1735  "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1736  "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1737  "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1738 
1739  ".set pop \n\t"
1740 
1741  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1742  [di2]"=&f"(di2), [di3]"=&f"(di3)
1743  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1744  [vec2]"r"(vec2), [IQ]"f"(IQ)
1745  : "$f0", "$f1", "$f2", "$f3",
1746  "$f4", "$f5", "$f6", "$f7",
1747  "memory"
1748  );
1749 
1750  cost += di0 * di0 + di1 * di1
1751  + di2 * di2 + di3 * di3;
1752  }
1753 
1754  if (bits)
1755  *bits = curbits;
1756  return cost * lambda + curbits;
1757 }
1758 
1759 static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
1760  PutBitContext *pb, const float *in,
1761  const float *scaled, int size, int scale_idx,
1762  int cb, const float lambda, const float uplim,
1763  int *bits)
1764 {
1765  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1766  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1767  int i;
1768  float cost = 0;
1769  int qc1, qc2, qc3, qc4;
1770  int curbits = 0;
1771 
1772  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1773  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1774 
1775  for (i = 0; i < size; i += 4) {
1776  const float *vec, *vec2;
1777  int curidx, curidx2, sign1, count1, sign2, count2;
1778  int *in_int = (int *)&in[i];
1779  float *in_pos = (float *)&in[i];
1780  float di0, di1, di2, di3;
1781  int t0, t1, t2, t3, t4;
1782 
1783  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1784  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1785  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1786  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1787 
1788  __asm__ volatile (
1789  ".set push \n\t"
1790  ".set noreorder \n\t"
1791 
1792  "ori %[t4], $zero, 7 \n\t"
1793  "ori %[sign1], $zero, 0 \n\t"
1794  "ori %[sign2], $zero, 0 \n\t"
1795  "slt %[t0], %[t4], %[qc1] \n\t"
1796  "slt %[t1], %[t4], %[qc2] \n\t"
1797  "slt %[t2], %[t4], %[qc3] \n\t"
1798  "slt %[t3], %[t4], %[qc4] \n\t"
1799  "movn %[qc1], %[t4], %[t0] \n\t"
1800  "movn %[qc2], %[t4], %[t1] \n\t"
1801  "movn %[qc3], %[t4], %[t2] \n\t"
1802  "movn %[qc4], %[t4], %[t3] \n\t"
1803  "lw %[t0], 0(%[in_int]) \n\t"
1804  "lw %[t1], 4(%[in_int]) \n\t"
1805  "lw %[t2], 8(%[in_int]) \n\t"
1806  "lw %[t3], 12(%[in_int]) \n\t"
1807  "slt %[t0], %[t0], $zero \n\t"
1808  "movn %[sign1], %[t0], %[qc1] \n\t"
1809  "slt %[t2], %[t2], $zero \n\t"
1810  "movn %[sign2], %[t2], %[qc3] \n\t"
1811  "slt %[t1], %[t1], $zero \n\t"
1812  "sll %[t0], %[sign1], 1 \n\t"
1813  "or %[t0], %[t0], %[t1] \n\t"
1814  "movn %[sign1], %[t0], %[qc2] \n\t"
1815  "slt %[t3], %[t3], $zero \n\t"
1816  "sll %[t0], %[sign2], 1 \n\t"
1817  "or %[t0], %[t0], %[t3] \n\t"
1818  "movn %[sign2], %[t0], %[qc4] \n\t"
1819  "slt %[count1], $zero, %[qc1] \n\t"
1820  "slt %[t1], $zero, %[qc2] \n\t"
1821  "slt %[count2], $zero, %[qc3] \n\t"
1822  "slt %[t2], $zero, %[qc4] \n\t"
1823  "addu %[count1], %[count1], %[t1] \n\t"
1824  "addu %[count2], %[count2], %[t2] \n\t"
1825 
1826  ".set pop \n\t"
1827 
1828  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1829  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1830  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1831  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1832  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1833  [t4]"=&r"(t4)
1834  : [in_int]"r"(in_int)
1835  : "memory"
1836  );
1837 
1838  curidx = 8 * qc1;
1839  curidx += qc2;
1840 
1841  curidx2 = 8 * qc3;
1842  curidx2 += qc4;
1843 
1844  curbits += p_bits[curidx];
1845  curbits += upair7_sign_bits[curidx];
1846  vec = &p_codes[curidx*2];
1847 
1848  curbits += p_bits[curidx2];
1849  curbits += upair7_sign_bits[curidx2];
1850  vec2 = &p_codes[curidx2*2];
1851 
1852  __asm__ volatile (
1853  ".set push \n\t"
1854  ".set noreorder \n\t"
1855 
1856  "lwc1 %[di0], 0(%[in_pos]) \n\t"
1857  "lwc1 %[di1], 4(%[in_pos]) \n\t"
1858  "lwc1 %[di2], 8(%[in_pos]) \n\t"
1859  "lwc1 %[di3], 12(%[in_pos]) \n\t"
1860  "abs.s %[di0], %[di0] \n\t"
1861  "abs.s %[di1], %[di1] \n\t"
1862  "abs.s %[di2], %[di2] \n\t"
1863  "abs.s %[di3], %[di3] \n\t"
1864  "lwc1 $f0, 0(%[vec]) \n\t"
1865  "lwc1 $f1, 4(%[vec]) \n\t"
1866  "lwc1 $f2, 0(%[vec2]) \n\t"
1867  "lwc1 $f3, 4(%[vec2]) \n\t"
1868  "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1869  "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1870  "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1871  "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1872 
1873  ".set pop \n\t"
1874 
1875  : [di0]"=&f"(di0), [di1]"=&f"(di1),
1876  [di2]"=&f"(di2), [di3]"=&f"(di3)
1877  : [in_pos]"r"(in_pos), [vec]"r"(vec),
1878  [vec2]"r"(vec2), [IQ]"f"(IQ)
1879  : "$f0", "$f1", "$f2", "$f3",
1880  "memory"
1881  );
1882 
1883  cost += di0 * di0 + di1 * di1
1884  + di2 * di2 + di3 * di3;
1885  }
1886 
1887  if (bits)
1888  *bits = curbits;
1889  return cost * lambda + curbits;
1890 }
1891 
1892 static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
1893  PutBitContext *pb, const float *in,
1894  const float *scaled, int size, int scale_idx,
1895  int cb, const float lambda, const float uplim,
1896  int *bits)
1897 {
1898  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1899  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1900  int i;
1901  float cost = 0;
1902  int qc1, qc2, qc3, qc4;
1903  int curbits = 0;
1904 
1905  uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1906  float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1907 
1908  for (i = 0; i < size; i += 4) {
1909  const float *vec, *vec2;
1910  int curidx, curidx2;
1911  int sign1, count1, sign2, count2;
1912  int *in_int = (int *)&in[i];
1913  float *in_pos = (float *)&in[i];
1914  float di0, di1, di2, di3;
1915  int t0, t1, t2, t3, t4;
1916 
1917  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1918  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1919  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1920  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1921 
1922  __asm__ volatile (
1923  ".set push \n\t"
1924  ".set noreorder \n\t"
1925 
1926  "ori %[t4], $zero, 12 \n\t"
1927  "ori %[sign1], $zero, 0 \n\t"
1928  "ori %[sign2], $zero, 0 \n\t"
1929  "slt %[t0], %[t4], %[qc1] \n\t"
1930  "slt %[t1], %[t4], %[qc2] \n\t"
1931  "slt %[t2], %[t4], %[qc3] \n\t"
1932  "slt %[t3], %[t4], %[qc4] \n\t"
1933  "movn %[qc1], %[t4], %[t0] \n\t"
1934  "movn %[qc2], %[t4], %[t1] \n\t"
1935  "movn %[qc3], %[t4], %[t2] \n\t"
1936  "movn %[qc4], %[t4], %[t3] \n\t"
1937  "lw %[t0], 0(%[in_int]) \n\t"
1938  "lw %[t1], 4(%[in_int]) \n\t"
1939  "lw %[t2], 8(%[in_int]) \n\t"
1940  "lw %[t3], 12(%[in_int]) \n\t"
1941  "slt %[t0], %[t0], $zero \n\t"
1942  "movn %[sign1], %[t0], %[qc1] \n\t"
1943  "slt %[t2], %[t2], $zero \n\t"
1944  "movn %[sign2], %[t2], %[qc3] \n\t"
1945  "slt %[t1], %[t1], $zero \n\t"
1946  "sll %[t0], %[sign1], 1 \n\t"
1947  "or %[t0], %[t0], %[t1] \n\t"
1948  "movn %[sign1], %[t0], %[qc2] \n\t"
1949  "slt %[t3], %[t3], $zero \n\t"
1950  "sll %[t0], %[sign2], 1 \n\t"
1951  "or %[t0], %[t0], %[t3] \n\t"
1952  "movn %[sign2], %[t0], %[qc4] \n\t"
1953  "slt %[count1], $zero, %[qc1] \n\t"
1954  "slt %[t1], $zero, %[qc2] \n\t"
1955  "slt %[count2], $zero, %[qc3] \n\t"
1956  "slt %[t2], $zero, %[qc4] \n\t"
1957  "addu %[count1], %[count1], %[t1] \n\t"
1958  "addu %[count2], %[count2], %[t2] \n\t"
1959 
1960  ".set pop \n\t"
1961 
1962  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1963  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1964  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1965  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1966  [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1967  [t4]"=&r"(t4)
1968  : [in_int]"r"(in_int)
1969  : "memory"
1970  );
1971 
1972  curidx = 13 * qc1;
1973  curidx += qc2;
1974 
1975  curidx2 = 13 * qc3;
1976  curidx2 += qc4;
1977 
1978  curbits += p_bits[curidx];
1979  curbits += p_bits[curidx2];
1980  curbits += upair12_sign_bits[curidx];
1981  curbits += upair12_sign_bits[curidx2];
1982  vec = &p_codes[curidx*2];
1983  vec2 = &p_codes[curidx2*2];
1984 
1985  __asm__ volatile (
1986  ".set push \n\t"
1987  ".set noreorder \n\t"
1988 
1989  "lwc1 %[di0], 0(%[in_pos]) \n\t"
1990  "lwc1 %[di1], 4(%[in_pos]) \n\t"
1991  "lwc1 %[di2], 8(%[in_pos]) \n\t"
1992  "lwc1 %[di3], 12(%[in_pos]) \n\t"
1993  "abs.s %[di0], %[di0] \n\t"
1994  "abs.s %[di1], %[di1] \n\t"
1995  "abs.s %[di2], %[di2] \n\t"
1996  "abs.s %[di3], %[di3] \n\t"
1997  "lwc1 $f0, 0(%[vec]) \n\t"
1998  "lwc1 $f1, 4(%[vec]) \n\t"
1999  "lwc1 $f2, 0(%[vec2]) \n\t"
2000  "lwc1 $f3, 4(%[vec2]) \n\t"
2001  "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
2002  "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
2003  "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
2004  "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
2005 
2006  ".set pop \n\t"
2007 
2008  : [di0]"=&f"(di0), [di1]"=&f"(di1),
2009  [di2]"=&f"(di2), [di3]"=&f"(di3)
2010  : [in_pos]"r"(in_pos), [vec]"r"(vec),
2011  [vec2]"r"(vec2), [IQ]"f"(IQ)
2012  : "$f0", "$f1", "$f2", "$f3",
2013  "memory"
2014  );
2015 
2016  cost += di0 * di0 + di1 * di1
2017  + di2 * di2 + di3 * di3;
2018  }
2019 
2020  if (bits)
2021  *bits = curbits;
2022  return cost * lambda + curbits;
2023 }
2024 
2025 static float get_band_cost_ESC_mips(struct AACEncContext *s,
2026  PutBitContext *pb, const float *in,
2027  const float *scaled, int size, int scale_idx,
2028  int cb, const float lambda, const float uplim,
2029  int *bits)
2030 {
2031  const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2032  const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2033  const float CLIPPED_ESCAPE = 165140.0f * IQ;
2034  int i;
2035  float cost = 0;
2036  int qc1, qc2, qc3, qc4;
2037  int curbits = 0;
2038 
2039  uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
2040  float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
2041 
2042  for (i = 0; i < size; i += 4) {
2043  const float *vec, *vec2;
2044  int curidx, curidx2;
2045  float t1, t2, t3, t4;
2046  float di1, di2, di3, di4;
2047  int cond0, cond1, cond2, cond3;
2048  int c1, c2, c3, c4;
2049  int t6, t7;
2050 
2051  qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2052  qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2053  qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2054  qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2055 
2056  __asm__ volatile (
2057  ".set push \n\t"
2058  ".set noreorder \n\t"
2059 
2060  "ori %[t6], $zero, 15 \n\t"
2061  "ori %[t7], $zero, 16 \n\t"
2062  "shll_s.w %[c1], %[qc1], 18 \n\t"
2063  "shll_s.w %[c2], %[qc2], 18 \n\t"
2064  "shll_s.w %[c3], %[qc3], 18 \n\t"
2065  "shll_s.w %[c4], %[qc4], 18 \n\t"
2066  "srl %[c1], %[c1], 18 \n\t"
2067  "srl %[c2], %[c2], 18 \n\t"
2068  "srl %[c3], %[c3], 18 \n\t"
2069  "srl %[c4], %[c4], 18 \n\t"
2070  "slt %[cond0], %[t6], %[qc1] \n\t"
2071  "slt %[cond1], %[t6], %[qc2] \n\t"
2072  "slt %[cond2], %[t6], %[qc3] \n\t"
2073  "slt %[cond3], %[t6], %[qc4] \n\t"
2074  "movn %[qc1], %[t7], %[cond0] \n\t"
2075  "movn %[qc2], %[t7], %[cond1] \n\t"
2076  "movn %[qc3], %[t7], %[cond2] \n\t"
2077  "movn %[qc4], %[t7], %[cond3] \n\t"
2078 
2079  ".set pop \n\t"
2080 
2081  : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2082  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2083  [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
2084  [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
2085  [c1]"=&r"(c1), [c2]"=&r"(c2),
2086  [c3]"=&r"(c3), [c4]"=&r"(c4),
2087  [t6]"=&r"(t6), [t7]"=&r"(t7)
2088  );
2089 
2090  curidx = 17 * qc1;
2091  curidx += qc2;
2092 
2093  curidx2 = 17 * qc3;
2094  curidx2 += qc4;
2095 
2096  curbits += p_bits[curidx];
2097  curbits += esc_sign_bits[curidx];
2098  vec = &p_codes[curidx*2];
2099 
2100  curbits += p_bits[curidx2];
2101  curbits += esc_sign_bits[curidx2];
2102  vec2 = &p_codes[curidx2*2];
2103 
2104  curbits += (av_log2(c1) * 2 - 3) & (-cond0);
2105  curbits += (av_log2(c2) * 2 - 3) & (-cond1);
2106  curbits += (av_log2(c3) * 2 - 3) & (-cond2);
2107  curbits += (av_log2(c4) * 2 - 3) & (-cond3);
2108 
2109  t1 = fabsf(in[i ]);
2110  t2 = fabsf(in[i+1]);
2111  t3 = fabsf(in[i+2]);
2112  t4 = fabsf(in[i+3]);
2113 
2114  if (cond0) {
2115  if (t1 >= CLIPPED_ESCAPE) {
2116  di1 = t1 - CLIPPED_ESCAPE;
2117  } else {
2118  di1 = t1 - c1 * cbrtf(c1) * IQ;
2119  }
2120  } else
2121  di1 = t1 - vec[0] * IQ;
2122 
2123  if (cond1) {
2124  if (t2 >= CLIPPED_ESCAPE) {
2125  di2 = t2 - CLIPPED_ESCAPE;
2126  } else {
2127  di2 = t2 - c2 * cbrtf(c2) * IQ;
2128  }
2129  } else
2130  di2 = t2 - vec[1] * IQ;
2131 
2132  if (cond2) {
2133  if (t3 >= CLIPPED_ESCAPE) {
2134  di3 = t3 - CLIPPED_ESCAPE;
2135  } else {
2136  di3 = t3 - c3 * cbrtf(c3) * IQ;
2137  }
2138  } else
2139  di3 = t3 - vec2[0] * IQ;
2140 
2141  if (cond3) {
2142  if (t4 >= CLIPPED_ESCAPE) {
2143  di4 = t4 - CLIPPED_ESCAPE;
2144  } else {
2145  di4 = t4 - c4 * cbrtf(c4) * IQ;
2146  }
2147  } else
2148  di4 = t4 - vec2[1]*IQ;
2149 
2150  cost += di1 * di1 + di2 * di2
2151  + di3 * di3 + di4 * di4;
2152  }
2153 
2154  if (bits)
2155  *bits = curbits;
2156  return cost * lambda + curbits;
2157 }
2158 
2159 static float (*const get_band_cost_arr[])(struct AACEncContext *s,
2160  PutBitContext *pb, const float *in,
2161  const float *scaled, int size, int scale_idx,
2162  int cb, const float lambda, const float uplim,
2163  int *bits) = {
2164  get_band_cost_ZERO_mips,
2165  get_band_cost_SQUAD_mips,
2166  get_band_cost_SQUAD_mips,
2167  get_band_cost_UQUAD_mips,
2168  get_band_cost_UQUAD_mips,
2169  get_band_cost_SPAIR_mips,
2170  get_band_cost_SPAIR_mips,
2171  get_band_cost_UPAIR7_mips,
2172  get_band_cost_UPAIR7_mips,
2173  get_band_cost_UPAIR12_mips,
2174  get_band_cost_UPAIR12_mips,
2175  get_band_cost_ESC_mips,
2176  get_band_cost_NONE_mips, /* cb 12 doesn't exist */
2177  get_band_cost_ZERO_mips,
2178  get_band_cost_ZERO_mips,
2179  get_band_cost_ZERO_mips,
2180 };
2181 
2182 #define get_band_cost( \
2183  s, pb, in, scaled, size, scale_idx, cb, \
2184  lambda, uplim, bits) \
2185  get_band_cost_arr[cb]( \
2186  s, pb, in, scaled, size, scale_idx, cb, \
2187  lambda, uplim, bits)
2188 
2189 static float quantize_band_cost(struct AACEncContext *s, const float *in,
2190  const float *scaled, int size, int scale_idx,
2191  int cb, const float lambda, const float uplim,
2192  int *bits)
2193 {
2194  return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
2195 }
2196 
2197 static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx,
2198  AACEncContext *s,
2199  SingleChannelElement *sce,
2200  const float lambda)
2201 {
2202  int start = 0, i, w, w2, g;
2203  int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
2204  float dists[128] = { 0 }, uplims[128];
2205  float maxvals[128];
2206  int fflag, minscaler;
2207  int its = 0;
2208  int allz = 0;
2209  float minthr = INFINITY;
2210 
2211  destbits = FFMIN(destbits, 5800);
2212  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2213  for (g = 0; g < sce->ics.num_swb; g++) {
2214  int nz = 0;
2215  float uplim = 0.0f;
2216  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
2217  FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
2218  uplim += band->threshold;
2219  if (band->energy <= band->threshold || band->threshold == 0.0f) {
2220  sce->zeroes[(w+w2)*16+g] = 1;
2221  continue;
2222  }
2223  nz = 1;
2224  }
2225  uplims[w*16+g] = uplim *512;
2226  sce->zeroes[w*16+g] = !nz;
2227  if (nz)
2228  minthr = FFMIN(minthr, uplim);
2229  allz |= nz;
2230  }
2231  }
2232  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2233  for (g = 0; g < sce->ics.num_swb; g++) {
2234  if (sce->zeroes[w*16+g]) {
2235  sce->sf_idx[w*16+g] = SCALE_ONE_POS;
2236  continue;
2237  }
2238  sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
2239  }
2240  }
2241 
2242  if (!allz)
2243  return;
2244  abs_pow34_v(s->scoefs, sce->coeffs, 1024);
2245 
2246  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2247  start = w*128;
2248  for (g = 0; g < sce->ics.num_swb; g++) {
2249  const float *scaled = s->scoefs + start;
2250  maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
2251  start += sce->ics.swb_sizes[g];
2252  }
2253  }
2254 
2255  do {
2256  int tbits, qstep;
2257  minscaler = sce->sf_idx[0];
2258  qstep = its ? 1 : 32;
2259  do {
2260  int prev = -1;
2261  tbits = 0;
2262  fflag = 0;
2263 
2264  if (qstep > 1) {
2265  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2266  start = w*128;
2267  for (g = 0; g < sce->ics.num_swb; g++) {
2268  const float *coefs = sce->coeffs + start;
2269  const float *scaled = s->scoefs + start;
2270  int bits = 0;
2271  int cb;
2272 
2273  if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
2274  start += sce->ics.swb_sizes[g];
2275  continue;
2276  }
2277  minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
2278  cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
2279  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
2280  int b;
2281  bits += quantize_band_cost_bits(s, coefs + w2*128,
2282  scaled + w2*128,
2283  sce->ics.swb_sizes[g],
2284  sce->sf_idx[w*16+g],
2285  cb,
2286  1.0f,
2287  INFINITY,
2288  &b);
2289  }
2290  if (prev != -1) {
2291  bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
2292  }
2293  tbits += bits;
2294  start += sce->ics.swb_sizes[g];
2295  prev = sce->sf_idx[w*16+g];
2296  }
2297  }
2298  }
2299  else {
2300  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2301  start = w*128;
2302  for (g = 0; g < sce->ics.num_swb; g++) {
2303  const float *coefs = sce->coeffs + start;
2304  const float *scaled = s->scoefs + start;
2305  int bits = 0;
2306  int cb;
2307  float dist = 0.0f;
2308 
2309  if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
2310  start += sce->ics.swb_sizes[g];
2311  continue;
2312  }
2313  minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
2314  cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
2315  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
2316  int b;
2317  dist += quantize_band_cost(s, coefs + w2*128,
2318  scaled + w2*128,
2319  sce->ics.swb_sizes[g],
2320  sce->sf_idx[w*16+g],
2321  cb,
2322  1.0f,
2323  INFINITY,
2324  &b);
2325  bits += b;
2326  }
2327  dists[w*16+g] = dist - bits;
2328  if (prev != -1) {
2329  bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
2330  }
2331  tbits += bits;
2332  start += sce->ics.swb_sizes[g];
2333  prev = sce->sf_idx[w*16+g];
2334  }
2335  }
2336  }
2337  if (tbits > destbits) {
2338  for (i = 0; i < 128; i++)
2339  if (sce->sf_idx[i] < 218 - qstep)
2340  sce->sf_idx[i] += qstep;
2341  } else {
2342  for (i = 0; i < 128; i++)
2343  if (sce->sf_idx[i] > 60 - qstep)
2344  sce->sf_idx[i] -= qstep;
2345  }
2346  qstep >>= 1;
2347  if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
2348  qstep = 1;
2349  } while (qstep);
2350 
2351  fflag = 0;
2352  minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
2353  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2354  for (g = 0; g < sce->ics.num_swb; g++) {
2355  int prevsc = sce->sf_idx[w*16+g];
2356  if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
2357  if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
2358  sce->sf_idx[w*16+g]--;
2359  else
2360  sce->sf_idx[w*16+g]-=2;
2361  }
2362  sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
2363  sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
2364  if (sce->sf_idx[w*16+g] != prevsc)
2365  fflag = 1;
2366  sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
2367  }
2368  }
2369  its++;
2370  } while (fflag && its < 10);
2371 }
2372 
2373 static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
2374 {
2375  int start = 0, i, w, w2, g;
2376  float M[128], S[128];
2377  float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
2378  SingleChannelElement *sce0 = &cpe->ch[0];
2379  SingleChannelElement *sce1 = &cpe->ch[1];
2380  if (!cpe->common_window)
2381  return;
2382  for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
2383  for (g = 0; g < sce0->ics.num_swb; g++) {
2384  if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
2385  float dist1 = 0.0f, dist2 = 0.0f;
2386  for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2387  FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
2388  FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
2389  float minthr = FFMIN(band0->threshold, band1->threshold);
2390  float maxthr = FFMAX(band0->threshold, band1->threshold);
2391  for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
2392  M[i ] = (sce0->coeffs[start+w2*128+i ]
2393  + sce1->coeffs[start+w2*128+i ]) * 0.5;
2394  M[i+1] = (sce0->coeffs[start+w2*128+i+1]
2395  + sce1->coeffs[start+w2*128+i+1]) * 0.5;
2396  M[i+2] = (sce0->coeffs[start+w2*128+i+2]
2397  + sce1->coeffs[start+w2*128+i+2]) * 0.5;
2398  M[i+3] = (sce0->coeffs[start+w2*128+i+3]
2399  + sce1->coeffs[start+w2*128+i+3]) * 0.5;
2400 
2401  S[i ] = M[i ]
2402  - sce1->coeffs[start+w2*128+i ];
2403  S[i+1] = M[i+1]
2404  - sce1->coeffs[start+w2*128+i+1];
2405  S[i+2] = M[i+2]
2406  - sce1->coeffs[start+w2*128+i+2];
2407  S[i+3] = M[i+3]
2408  - sce1->coeffs[start+w2*128+i+3];
2409  }
2410  abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
2411  abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
2412  abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2413  abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2414  dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
2415  L34,
2416  sce0->ics.swb_sizes[g],
2417  sce0->sf_idx[(w+w2)*16+g],
2418  sce0->band_type[(w+w2)*16+g],
2419  s->lambda / band0->threshold, INFINITY, NULL);
2420  dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
2421  R34,
2422  sce1->ics.swb_sizes[g],
2423  sce1->sf_idx[(w+w2)*16+g],
2424  sce1->band_type[(w+w2)*16+g],
2425  s->lambda / band1->threshold, INFINITY, NULL);
2426  dist2 += quantize_band_cost(s, M,
2427  M34,
2428  sce0->ics.swb_sizes[g],
2429  sce0->sf_idx[(w+w2)*16+g],
2430  sce0->band_type[(w+w2)*16+g],
2431  s->lambda / maxthr, INFINITY, NULL);
2432  dist2 += quantize_band_cost(s, S,
2433  S34,
2434  sce1->ics.swb_sizes[g],
2435  sce1->sf_idx[(w+w2)*16+g],
2436  sce1->band_type[(w+w2)*16+g],
2437  s->lambda / minthr, INFINITY, NULL);
2438  }
2439  cpe->ms_mask[w*16+g] = dist2 < dist1;
2440  }
2441  start += sce0->ics.swb_sizes[g];
2442  }
2443  }
2444 }
2445 #endif /*HAVE_MIPSFPU */
2446 
2447 static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce,
2448  int win, int group_len, const float lambda)
2449 {
2450  BandCodingPath path[120][12];
2451  int w, swb, cb, start, size;
2452  int i, j;
2453  const int max_sfb = sce->ics.max_sfb;
2454  const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
2455  const int run_esc = (1 << run_bits) - 1;
2456  int idx, ppos, count;
2457  int stackrun[120], stackcb[120], stack_len;
2458  float next_minbits = INFINITY;
2459  int next_mincb = 0;
2460 
2461  abs_pow34_v(s->scoefs, sce->coeffs, 1024);
2462  start = win*128;
2463  for (cb = 0; cb < 12; cb++) {
2464  path[0][cb].cost = run_bits+4;
2465  path[0][cb].prev_idx = -1;
2466  path[0][cb].run = 0;
2467  }
2468  for (swb = 0; swb < max_sfb; swb++) {
2469  size = sce->ics.swb_sizes[swb];
2470  if (sce->zeroes[win*16 + swb]) {
2471  float cost_stay_here = path[swb][0].cost;
2472  float cost_get_here = next_minbits + run_bits + 4;
2473  if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
2474  != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
2475  cost_stay_here += run_bits;
2476  if (cost_get_here < cost_stay_here) {
2477  path[swb+1][0].prev_idx = next_mincb;
2478  path[swb+1][0].cost = cost_get_here;
2479  path[swb+1][0].run = 1;
2480  } else {
2481  path[swb+1][0].prev_idx = 0;
2482  path[swb+1][0].cost = cost_stay_here;
2483  path[swb+1][0].run = path[swb][0].run + 1;
2484  }
2485  next_minbits = path[swb+1][0].cost;
2486  next_mincb = 0;
2487  for (cb = 1; cb < 12; cb++) {
2488  path[swb+1][cb].cost = 61450;
2489  path[swb+1][cb].prev_idx = -1;
2490  path[swb+1][cb].run = 0;
2491  }
2492  } else {
2493  float minbits = next_minbits;
2494  int mincb = next_mincb;
2495  int startcb = sce->band_type[win*16+swb];
2496  next_minbits = INFINITY;
2497  next_mincb = 0;
2498  for (cb = 0; cb < startcb; cb++) {
2499  path[swb+1][cb].cost = 61450;
2500  path[swb+1][cb].prev_idx = -1;
2501  path[swb+1][cb].run = 0;
2502  }
2503  for (cb = startcb; cb < 12; cb++) {
2504  float cost_stay_here, cost_get_here;
2505  float bits = 0.0f;
2506  for (w = 0; w < group_len; w++) {
2507  bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128,
2508  s->scoefs + start + w*128, size,
2509  sce->sf_idx[(win+w)*16+swb], cb,
2510  0, INFINITY, NULL);
2511  }
2512  cost_stay_here = path[swb][cb].cost + bits;
2513  cost_get_here = minbits + bits + run_bits + 4;
2514  if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
2515  != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
2516  cost_stay_here += run_bits;
2517  if (cost_get_here < cost_stay_here) {
2518  path[swb+1][cb].prev_idx = mincb;
2519  path[swb+1][cb].cost = cost_get_here;
2520  path[swb+1][cb].run = 1;
2521  } else {
2522  path[swb+1][cb].prev_idx = cb;
2523  path[swb+1][cb].cost = cost_stay_here;
2524  path[swb+1][cb].run = path[swb][cb].run + 1;
2525  }
2526  if (path[swb+1][cb].cost < next_minbits) {
2527  next_minbits = path[swb+1][cb].cost;
2528  next_mincb = cb;
2529  }
2530  }
2531  }
2532  start += sce->ics.swb_sizes[swb];
2533  }
2534 
2535  stack_len = 0;
2536  idx = 0;
2537  for (cb = 1; cb < 12; cb++)
2538  if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
2539  idx = cb;
2540  ppos = max_sfb;
2541  while (ppos > 0) {
2542  av_assert1(idx >= 0);
2543  cb = idx;
2544  stackrun[stack_len] = path[ppos][cb].run;
2545  stackcb [stack_len] = cb;
2546  idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
2547  ppos -= path[ppos][cb].run;
2548  stack_len++;
2549  }
2550 
2551  start = 0;
2552  for (i = stack_len - 1; i >= 0; i--) {
2553  put_bits(&s->pb, 4, stackcb[i]);
2554  count = stackrun[i];
2555  memset(sce->zeroes + win*16 + start, !stackcb[i], count);
2556  for (j = 0; j < count; j++) {
2557  sce->band_type[win*16 + start] = stackcb[i];
2558  start++;
2559  }
2560  while (count >= run_esc) {
2561  put_bits(&s->pb, run_bits, run_esc);
2562  count -= run_esc;
2563  }
2564  put_bits(&s->pb, run_bits, count);
2565  }
2566 }
2567 #endif /* HAVE_INLINE_ASM */
2568 
2570 #if HAVE_INLINE_ASM
2571  AACCoefficientsEncoder *e = c->coder;
2572  int option = c->options.aac_coder;
2573 
2574  if (option == 2) {
2575 // Disabled due to failure with fate-aac-pns-encode
2576 // e->quantize_and_encode_band = quantize_and_encode_band_mips;
2577 // e->encode_window_bands_info = codebook_trellis_rate_mips;
2578 #if HAVE_MIPSFPU
2579  e->search_for_quantizers = search_for_quantizers_twoloop_mips;
2580  e->search_for_ms = search_for_ms_mips;
2581 #endif /* HAVE_MIPSFPU */
2582  }
2583 #endif /* HAVE_INLINE_ASM */
2584 }
static const uint8_t *const run_value_bits[2]
Definition: aacenctab.h:101
#define NULL
Definition: coverity.c:32
const char * s
Definition: avisynth_c.h:631
static void abs_pow34_v(float *out, const float *in, const int size)
Definition: aacenc_utils.h:39
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:167
#define SCALE_DIFF_ZERO
codebook index corresponding to zero scalefactor indices difference
Definition: aac.h:152
const char * g
Definition: vf_curves.c:108
#define quantize_and_encode_band_cost(s, pb, in, quant, scaled, size, scale_idx, cb, lambda, uplim, bits, rtz)
FFPsyBand psy_bands[PSY_MAX_BANDS]
channel bands information
Definition: psymodel.h:48
#define SCALE_MAX_DIFF
maximum scalefactor difference allowed by standard
Definition: aac.h:151
const char * b
Definition: vf_curves.c:109
AACCoefficientsEncoder * coder
Definition: aacenc.h:98
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
Definition: aac.h:273
int prev_idx
pointer to the previous path point
Definition: aaccoder.c:67
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
Definition: aac.h:276
float lambda
Definition: aacenc.h:101
#define ROUND_TO_ZERO
Definition: aacenc_utils.h:36
#define t7
Definition: regdef.h:35
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
static double cb(void *priv, double x, double y)
Definition: vf_geq.c:97
AACEncOptions options
encoding options
Definition: aacenc.h:82
#define M(a, b)
Definition: vp3dsp.c:44
AAC encoder context.
Definition: aacenc.h:80
uint8_t bits
Definition: crc.c:295
uint8_t
SingleChannelElement ch[2]
Definition: aac.h:279
float ff_aac_pow34sf_tab[428]
Definition: aac_tablegen.h:33
const uint8_t ff_aac_scalefactor_bits[121]
Definition: aactab.c:80
static const uint8_t run_bits[7][16]
Definition: h264_cavlc.c:229
#define t0
Definition: regdef.h:28
void ff_aac_coder_init_mips(AACEncContext *c)
static const uint64_t c1
Definition: murmur3.c:49
single band psychoacoustic information
Definition: psymodel.h:37
ptrdiff_t size
Definition: opengl_enc.c:101
float coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aac.h:258
static const uint8_t run_value_bits_short[16]
bits needed to code codebook run value for short windows
Definition: aacenctab.h:84
#define S(s, c, i)
#define t1
Definition: regdef.h:29
uint8_t max_sfb
number of scalefactor bands per group
Definition: aac.h:172
#define t3
Definition: regdef.h:31
float energy
Definition: psymodel.h:39
GLsizei count
Definition: opengl_enc.c:109
int num_swb
number of scalefactor window bands
Definition: aac.h:180
#define FFMAX(a, b)
Definition: common.h:79
float cost
path cost
Definition: aaccoder.c:68
Libavcodec external API header.
void(* search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s, SingleChannelElement *sce, const float lambda)
Definition: aacenc.h:56
const float *const ff_aac_codebook_vectors[]
Definition: aactab.c:906
#define POW_SF2_ZERO
ff_aac_pow2sf_tab index corresponding to pow(2, 0);
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
Definition: aac.h:148
int bit_rate
the average bitrate
Definition: avcodec.h:1567
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53
int cur_channel
Definition: aacenc.h:99
const uint8_t *const ff_aac_spectral_bits[11]
Definition: aactab.c:410
#define FFMIN(a, b)
Definition: common.h:81
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
AAC definitions and structures.
PutBitContext pb
Definition: aacenc.h:83
#define ROUND_STANDARD
Definition: aacenc_utils.h:35
#define av_log2
Definition: intmath.h:100
#define INFINITY
Definition: math.h:27
void(* search_for_ms)(struct AACEncContext *s, ChannelElement *cpe)
Definition: aacenc.h:70
static int find_min_book(float maxval, int sf)
Definition: aacenc_utils.h:86
int sample_rate
samples per second
Definition: avcodec.h:2262
main external API structure.
Definition: avcodec.h:1502
IndividualChannelStream ics
Definition: aac.h:246
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
static av_always_inline float cbrtf(float x)
Definition: libm.h:59
structure used in optimal codebook search
Definition: aaccoder.c:66
uint8_t group_len[8]
Definition: aac.h:176
Replacements for frequently missing libm functions.
option
Definition: libkvazaar.c:224
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aac.h:179
#define t5
Definition: regdef.h:33
FFPsyContext psy
Definition: aacenc.h:96
static float(*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits)
uint8_t zeroes[128]
band is not coded (used by encoder)
Definition: aac.h:254
int sf_idx[128]
scalefactor indices (used by encoder)
Definition: aac.h:253
int aac_coder
Definition: aacenc.h:46
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
Definition: aac.h:149
#define t6
Definition: regdef.h:34
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:245
#define log2f(x)
Definition: libm.h:127
static double c[64]
ChannelElement * cpe
channel elements
Definition: aacenc.h:95
float ff_aac_pow2sf_tab[428]
Definition: aac_tablegen.h:32
static const uint64_t c2
Definition: murmur3.c:50
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aac.h:270
const uint16_t *const ff_aac_spectral_codes[11]
Definition: aactab.c:405
#define t4
Definition: regdef.h:32
int len
int channels
number of audio channels
Definition: avcodec.h:2263
static const uint8_t run_value_bits_long[64]
bits needed to code codebook run value for long windows
Definition: aacenctab.h:76
FFPsyChannel * ch
single channel information
Definition: psymodel.h:80
enum BandType band_type[128]
band types
Definition: aac.h:249
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> out
static float find_max_val(int group_len, int swb_size, const float *scaled)
Definition: aacenc_utils.h:74
void INT64 start
Definition: avisynth_c.h:553
float threshold
Definition: psymodel.h:40
AAC data declarations.
float scoefs[1024]
scaled coefficients
Definition: aacenc.h:104
static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, int rtz)
#define t2
Definition: regdef.h:30
bitstream writer API