FFmpeg
itx_1d.c
Go to the documentation of this file.
1 /*
2  * VVC 1D transform
3  *
4  * Copyright (C) 2023 Nuo Mi
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /* The copyright in this software is being made available under the BSD
24  * License, included below. This software may be subject to other third party
25  * and contributor rights, including patent rights, and no such rights are
26  * granted under this license.
27  *
28  * Copyright (c) 2010-2021, ITU/ISO/IEC
29  * All rights reserved.
30  *
31  * Redistribution and use in source and binary forms, with or without
32  * modification, are permitted provided that the following conditions are met:
33  *
34  * * Redistributions of source code must retain the above copyright notice,
35  * this list of conditions and the following disclaimer.
36  * * Redistributions in binary form must reproduce the above copyright notice,
37  * this list of conditions and the following disclaimer in the documentation
38  * and/or other materials provided with the distribution.
39  * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
40  * be used to endorse or promote products derived from this software without
41  * specific prior written permission.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
44  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
47  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
48  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
49  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
50  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
51  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
52  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
53  * THE POSSIBILITY OF SUCH DAMAGE.
54  */
55 
56 /* optimizaed with partial butterfly, see Hung C-Y, Landman P (1997)
57  Compact inverse discrete cosine transform circuit for MPEG video decoding.
58  */
59 
60 #include "data.h"
61 #include "itx_1d.h"
62 #include "libavutil/avutil.h"
63 
64 #define G2(m) ((nz > 2) ? (m) : 0)
65 #define G4(m) ((nz > 4) ? (m) : 0)
66 #define G8(m) ((nz > 8) ? (m) : 0)
67 #define G16(m) ((nz > 16) ? (m) : 0)
68 
69 /*
70 transmatrix[2][2] = {
71  { a, a },
72  { a, -a },
73 }
74  */
75 void ff_vvc_inv_dct2_2(int *coeffs, const ptrdiff_t stride, const size_t nz)
76 {
77  const int a = 64;
78  const int x0 = coeffs[0 * stride], x1 = coeffs[1 * stride];
79 
80  coeffs[0 * stride] = a * (x0 + x1);
81  coeffs[1 * stride] = a * (x0 - x1);
82 }
83 
84 /*
85 transmatrix[4][4] = {
86  { a, a, a, a},
87  { b, c, -c, -b},
88  { a, -a, -a, a},
89  { c, -b, b, -c},
90 }
91  */
92 void ff_vvc_inv_dct2_4(int *coeffs, const ptrdiff_t stride, const size_t nz)
93 {
94  const int a = 64, b = 83, c = 36;
95  const int x0 = coeffs[0 * stride], x1 = coeffs[1 * stride];
96  const int x2 = coeffs[2 * stride], x3 = coeffs[3 * stride];
97  const int E[2] = {
98  a * (x0 + G2(+x2)),
99  a * (x0 + G2(-x2)),
100  };
101  const int O[2] = {
102  b * x1 + G2(+c * x3),
103  c * x1 + G2(-b * x3),
104  };
105 
106  coeffs[0 * stride] = E[0] + O[0];
107  coeffs[1 * stride] = E[1] + O[1];
108  coeffs[2 * stride] = E[1] - O[1];
109  coeffs[3 * stride] = E[0] - O[0];
110 }
111 
112 /*
113 transmatrix[8][8] = {
114  { a, a, a, a, a, a, a, a},
115  { d, e, f, g, -g, -f, -e, -d},
116  { b, c, -c, -b, -b, -c, c, b},
117  { e, -g, -d, -f, f, d, g, -e},
118  { a, -a, -a, a, a, -a, -a, a},
119  { f, -d, g, e, -e, -g, d, -f},
120  { c, -b, b, -c, -c, b, -b, c},
121  { g, -f, e, -d, d, -e, f, -g},
122 }
123  */
124 void ff_vvc_inv_dct2_8(int *coeffs, const ptrdiff_t stride, const size_t nz)
125 {
126  const int a = 64, b = 83, c = 36, d = 89, e = 75, f = 50, g = 18;
127  const int x0 = coeffs[0 * stride], x1 = coeffs[1 * stride];
128  const int x2 = coeffs[2 * stride], x3 = coeffs[3 * stride];
129  const int x4 = coeffs[4 * stride], x5 = coeffs[5 * stride];
130  const int x6 = coeffs[6 * stride], x7 = coeffs[7 * stride];
131  const int EE[2] = {
132  a * (x0 + G4(+x4)),
133  a * (x0 + G4(-x4)),
134  };
135  const int EO[2] = {
136  G2(b * x2) + G4(+c * x6),
137  G2(c * x2) + G4(-b * x6),
138  };
139  const int E[4] = {
140  EE[0] + EO[0], EE[1] + EO[1],
141  EE[1] - EO[1], EE[0] - EO[0],
142  };
143  const int O[4] = {
144  d * x1 + G2(+e * x3) + G4(+f * x5 + g * x7),
145  e * x1 + G2(-g * x3) + G4(-d * x5 - f * x7),
146  f * x1 + G2(-d * x3) + G4(+g * x5 + e * x7),
147  g * x1 + G2(-f * x3) + G4(+e * x5 - d * x7),
148  };
149 
150  coeffs[0 * stride] = E[0] + O[0];
151  coeffs[1 * stride] = E[1] + O[1];
152  coeffs[2 * stride] = E[2] + O[2];
153  coeffs[3 * stride] = E[3] + O[3];
154  coeffs[4 * stride] = E[3] - O[3];
155  coeffs[5 * stride] = E[2] - O[2];
156  coeffs[6 * stride] = E[1] - O[1];
157  coeffs[7 * stride] = E[0] - O[0];
158 }
159 
160 /*
161 transmatrix[16][16] = {
162  { a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a},
163  { h, i, j, k, l, m, n, o, -o, -n, -m, -l, -k, -j, -i, -h},
164  { d, e, f, g, -g, -f, -e, -d, -d, -e, -f, -g, g, f, e, d},
165  { i, l, o, -m, -j, -h, -k, -n, n, k, h, j, m, -o, -l, -i},
166  { b, c, -c, -b, -b, -c, c, b, b, c, -c, -b, -b, -c, c, b},
167  { j, o, -k, -i, -n, l, h, m, -m, -h, -l, n, i, k, -o, -j},
168  { e, -g, -d, -f, f, d, g, -e, -e, g, d, f, -f, -d, -g, e},
169  { k, -m, -i, o, h, n, -j, -l, l, j, -n, -h, -o, i, m, -k},
170  { a, -a, -a, a, a, -a, -a, a, a, -a, -a, a, a, -a, -a, a},
171  { l, -j, -n, h, -o, -i, m, k, -k, -m, i, o, -h, n, j, -l},
172  { f, -d, g, e, -e, -g, d, -f, -f, d, -g, -e, e, g, -d, f},
173  { m, -h, l, n, -i, k, o, -j, j, -o, -k, i, -n, -l, h, -m},
174  { c, -b, b, -c, -c, b, -b, c, c, -b, b, -c, -c, b, -b, c},
175  { n, -k, h, -j, m, o, -l, i, -i, l, -o, -m, j, -h, k, -n},
176  { g, -f, e, -d, d, -e, f, -g, -g, f, -e, d, -d, e, -f, g},
177  { o, -n, m, -l, k, -j, i, -h, h, -i, j, -k, l, -m, n, -o},
178 }
179  */
180 void ff_vvc_inv_dct2_16(int *coeffs, const ptrdiff_t stride, const size_t nz)
181 {
182  const int a = 64, b = 83, c = 36, d = 89, e = 75, f = 50, g = 18, h = 90;
183  const int i = 87, j = 80, k = 70, l = 57, m = 43, n = 25, o = 9;
184  const int x0 = coeffs[0 * stride], x1 = coeffs[1 * stride];
185  const int x2 = coeffs[2 * stride], x3 = coeffs[3 * stride];
186  const int x4 = coeffs[4 * stride], x5 = coeffs[5 * stride];
187  const int x6 = coeffs[6 * stride], x7 = coeffs[7 * stride];
188  const int x8 = coeffs[8 * stride], x9 = coeffs[9 * stride];
189  const int x10 = coeffs[10 * stride], x11 = coeffs[11 * stride];
190  const int x12 = coeffs[12 * stride], x13 = coeffs[13 * stride];
191  const int x14 = coeffs[14 * stride], x15 = coeffs[15 * stride];
192  const int EEE[2] = {
193  a * (x0 + G8(+x8)),
194  a * (x0 + G8(-x8)),
195  };
196  const int EEO[2] = {
197  G4(b * x4) + G8(+c * x12),
198  G4(c * x4) + G8(-b * x12),
199  };
200  const int EE[4] = {
201  EEE[0] + EEO[0], EEE[1] + EEO[1],
202  EEE[1] - EEO[1], EEE[0] - EEO[0],
203  };
204  const int EO[4] = {
205  G2(d * x2) + G4(+e * x6) + G8(+f * x10 + g * x14),
206  G2(e * x2) + G4(-g * x6) + G8(-d * x10 - f * x14),
207  G2(f * x2) + G4(-d * x6) + G8(+g * x10 + e * x14),
208  G2(g * x2) + G4(-f * x6) + G8(+e * x10 - d * x14),
209  };
210  const int E[8] = {
211  EE[0] + EO[0], EE[1] + EO[1], EE[2] + EO[2], EE[3] + EO[3],
212  EE[3] - EO[3], EE[2] - EO[2], EE[1] - EO[1], EE[0] - EO[0],
213  };
214  const int O[8] = {
215  h * x1 + G2(+i * x3) + G4(+j * x5 + k * x7) + G8(+l * x9 + m * x11 + n * x13 + o * x15),
216  i * x1 + G2(+l * x3) + G4(+o * x5 - m * x7) + G8(-j * x9 - h * x11 - k * x13 - n * x15),
217  j * x1 + G2(+o * x3) + G4(-k * x5 - i * x7) + G8(-n * x9 + l * x11 + h * x13 + m * x15),
218  k * x1 + G2(-m * x3) + G4(-i * x5 + o * x7) + G8(+h * x9 + n * x11 - j * x13 - l * x15),
219  l * x1 + G2(-j * x3) + G4(-n * x5 + h * x7) + G8(-o * x9 - i * x11 + m * x13 + k * x15),
220  m * x1 + G2(-h * x3) + G4(+l * x5 + n * x7) + G8(-i * x9 + k * x11 + o * x13 - j * x15),
221  n * x1 + G2(-k * x3) + G4(+h * x5 - j * x7) + G8(+m * x9 + o * x11 - l * x13 + i * x15),
222  o * x1 + G2(-n * x3) + G4(+m * x5 - l * x7) + G8(+k * x9 - j * x11 + i * x13 - h * x15),
223  };
224 
225  coeffs[0 * stride] = E[0] + O[0];
226  coeffs[1 * stride] = E[1] + O[1];
227  coeffs[2 * stride] = E[2] + O[2];
228  coeffs[3 * stride] = E[3] + O[3];
229  coeffs[4 * stride] = E[4] + O[4];
230  coeffs[5 * stride] = E[5] + O[5];
231  coeffs[6 * stride] = E[6] + O[6];
232  coeffs[7 * stride] = E[7] + O[7];
233  coeffs[8 * stride] = E[7] - O[7];
234  coeffs[9 * stride] = E[6] - O[6];
235  coeffs[10 * stride] = E[5] - O[5];
236  coeffs[11 * stride] = E[4] - O[4];
237  coeffs[12 * stride] = E[3] - O[3];
238  coeffs[13 * stride] = E[2] - O[2];
239  coeffs[14 * stride] = E[1] - O[1];
240  coeffs[15 * stride] = E[0] - O[0];
241 }
242 
243 /*
244 transMatrix[32][32] = {
245  { a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a},
246  { p, q, r, s, t, u, v, w, x, y, z, A, B, C, D, E, -E, -D, -C, -B, -A, -z, -y, -x, -w, -v, -u, -t, -s, -r, -q, -p},
247  { h, i, j, k, l, m, n, o, -o, -n, -m, -l, -k, -j, -i, -h, -h, -i, -j, -k, -l, -m, -n, -o, o, n, m, l, k, j, i, h},
248  { q, t, w, z, C, -E, -B, -y, -v, -s, -p, -r, -u, -x, -A, -D, D, A, x, u, r, p, s, v, y, B, E, -C, -z, -w, -t, -q},
249  { d, e, f, g, -g, -f, -e, -d, -d, -e, -f, -g, g, f, e, d, d, e, f, g, -g, -f, -e, -d, -d, -e, -f, -g, g, f, e, d},
250  { r, w, B, -D, -y, -t, -p, -u, -z, -E, A, v, q, s, x, C, -C, -x, -s, -q, -v, -A, E, z, u, p, t, y, D, -B, -w, -r},
251  { i, l, o, -m, -j, -h, -k, -n, n, k, h, j, m, -o, -l, -i, -i, -l, -o, m, j, h, k, n, -n, -k, -h, -j, -m, o, l, i},
252  { s, z, -D, -w, -p, -v, -C, A, t, r, y, -E, -x, -q, -u, -B, B, u, q, x, E, -y, -r, -t, -A, C, v, p, w, D, -z, -s},
253  { b, c, -c, -b, -b, -c, c, b, b, c, -c, -b, -b, -c, c, b, b, c, -c, -b, -b, -c, c, b, b, c, -c, -b, -b, -c, c, b},
254  { t, C, -y, -p, -x, D, u, s, B, -z, -q, -w, E, v, r, A, -A, -r, -v, -E, w, q, z, -B, -s, -u, -D, x, p, y, -C, -t},
255  { j, o, -k, -i, -n, l, h, m, -m, -h, -l, n, i, k, -o, -j, -j, -o, k, i, n, -l, -h, -m, m, h, l, -n, -i, -k, o, j},
256  { u, -E, -t, -v, D, s, w, -C, -r, -x, B, q, y, -A, -p, -z, z, p, A, -y, -q, -B, x, r, C, -w, -s, -D, v, t, E, -u},
257  { e, -g, -d, -f, f, d, g, -e, -e, g, d, f, -f, -d, -g, e, e, -g, -d, -f, f, d, g, -e, -e, g, d, f, -f, -d, -g, e},
258  { v, -B, -p, -C, u, w, -A, -q, -D, t, x, -z, -r, -E, s, y, -y, -s, E, r, z, -x, -t, D, q, A, -w, -u, C, p, B, -v},
259  { k, -m, -i, o, h, n, -j, -l, l, j, -n, -h, -o, i, m, -k, -k, m, i, -o, -h, -n, j, l, -l, -j, n, h, o, -i, -m, k},
260  { w, -y, -u, A, s, -C, -q, E, p, D, -r, -B, t, z, -v, -x, x, v, -z, -t, B, r, -D, -p, -E, q, C, -s, -A, u, y, -w},
261  { a, -a, -a, a, a, -a, -a, a, a, -a, -a, a, a, -a, -a, a, a, -a, -a, a, a, -a, -a, a, a, -a, -a, a, a, -a, -a, a},
262  { x, -v, -z, t, B, -r, -D, p, -E, -q, C, s, -A, -u, y, w, -w, -y, u, A, -s, -C, q, E, -p, D, r, -B, -t, z, v, -x},
263  { l, -j, -n, h, -o, -i, m, k, -k, -m, i, o, -h, n, j, -l, -l, j, n, -h, o, i, -m, -k, k, m, -i, -o, h, -n, -j, l},
264  { y, -s, -E, r, -z, -x, t, D, -q, A, w, -u, -C, p, -B, -v, v, B, -p, C, u, -w, -A, q, -D, -t, x, z, -r, E, s, -y},
265  { f, -d, g, e, -e, -g, d, -f, -f, d, -g, -e, e, g, -d, f, f, -d, g, e, -e, -g, d, -f, -f, d, -g, -e, e, g, -d, f},
266  { z, -p, A, y, -q, B, x, -r, C, w, -s, D, v, -t, E, u, -u, -E, t, -v, -D, s, -w, -C, r, -x, -B, q, -y, -A, p, -z},
267  { m, -h, l, n, -i, k, o, -j, j, -o, -k, i, -n, -l, h, -m, -m, h, -l, -n, i, -k, -o, j, -j, o, k, -i, n, l, -h, m},
268  { A, -r, v, -E, -w, q, -z, -B, s, -u, D, x, -p, y, C, -t, t, -C, -y, p, -x, -D, u, -s, B, z, -q, w, E, -v, r, -A},
269  { c, -b, b, -c, -c, b, -b, c, c, -b, b, -c, -c, b, -b, c, c, -b, b, -c, -c, b, -b, c, c, -b, b, -c, -c, b, -b, c},
270  { B, -u, q, -x, E, y, -r, t, -A, -C, v, -p, w, -D, -z, s, -s, z, D, -w, p, -v, C, A, -t, r, -y, -E, x, -q, u, -B},
271  { n, -k, h, -j, m, o, -l, i, -i, l, -o, -m, j, -h, k, -n, -n, k, -h, j, -m, -o, l, -i, i, -l, o, m, -j, h, -k, n},
272  { C, -x, s, -q, v, -A, -E, z, -u, p, -t, y, -D, -B, w, -r, r, -w, B, D, -y, t, -p, u, -z, E, A, -v, q, -s, x, -C},
273  { g, -f, e, -d, d, -e, f, -g, -g, f, -e, d, -d, e, -f, g, g, -f, e, -d, d, -e, f, -g, -g, f, -e, d, -d, e, -f, g},
274  { D, -A, x, -u, r, -p, s, -v, y, -B, E, C, -z, w, -t, q, -q, t, -w, z, -C, -E, B, -y, v, -s, p, -r, u, -x, A, -D},
275  { o, -n, m, -l, k, -j, i, -h, h, -i, j, -k, l, -m, n, -o, -o, n, -m, l, -k, j, -i, h, -h, i, -j, k, -l, m, -n, o},
276  { E, -D, C, -B, A, -z, y, -x, w, -v, u, -t, s, -r, q, -p, p, -q, r, -s, t, -u, v, -w, x, -y, z, -A, B, -C, D, -E},
277 }
278  */
279 void ff_vvc_inv_dct2_32(int *coeffs, const ptrdiff_t stride, const size_t nz)
280 {
281  const int a = 64, b = 83, c = 36, d = 89, e = 75, f = 50, g = 18, h = 90;
282  const int i = 87, j = 80, k = 70, l = 57, m = 43, n = 25, o = 9, p = 90;
283  const int q = 90, r = 88, s = 85, t = 82, u = 78, v = 73, w = 67, x = 61;
284  const int y = 54, z = 46, A = 38, B = 31, C = 22, D = 13, E_= 4;
285  const int x0 = coeffs[0 * stride], x1 = coeffs[1 * stride];
286  const int x2 = coeffs[2 * stride], x3 = coeffs[3 * stride];
287  const int x4 = coeffs[4 * stride], x5 = coeffs[5 * stride];
288  const int x6 = coeffs[6 * stride], x7 = coeffs[7 * stride];
289  const int x8 = coeffs[8 * stride], x9 = coeffs[9 * stride];
290  const int x10 = coeffs[10 * stride], x11 = coeffs[11 * stride];
291  const int x12 = coeffs[12 * stride], x13 = coeffs[13 * stride];
292  const int x14 = coeffs[14 * stride], x15 = coeffs[15 * stride];
293  const int x16 = coeffs[16 * stride], x17 = coeffs[17 * stride];
294  const int x18 = coeffs[18 * stride], x19 = coeffs[19 * stride];
295  const int x20 = coeffs[20 * stride], x21 = coeffs[21 * stride];
296  const int x22 = coeffs[22 * stride], x23 = coeffs[23 * stride];
297  const int x24 = coeffs[24 * stride], x25 = coeffs[25 * stride];
298  const int x26 = coeffs[26 * stride], x27 = coeffs[27 * stride];
299  const int x28 = coeffs[28 * stride], x29 = coeffs[29 * stride];
300  const int x30 = coeffs[30 * stride], x31 = coeffs[31 * stride];
301  const int EEEE[2] = {
302  a * (x0 + G16(+x16)),
303  a * (x0 + G16(-x16)),
304  };
305  const int EEEO[2] = {
306  G8(b * x8) + G16(+c * x24),
307  G8(c * x8) + G16(-b * x24),
308  };
309  const int EEE[4] = {
310  EEEE[0] + EEEO[0], EEEE[1] + EEEO[1],
311  EEEE[1] - EEEO[1], EEEE[0] - EEEO[0],
312  };
313  const int EEO[4] = {
314  G4(d * x4) + G8(+e * x12) + G16(+f * x20 + g * x28),
315  G4(e * x4) + G8(-g * x12) + G16(-d * x20 - f * x28),
316  G4(f * x4) + G8(-d * x12) + G16(+g * x20 + e * x28),
317  G4(g * x4) + G8(-f * x12) + G16(+e * x20 - d * x28),
318  };
319  const int EE[8] = {
320  EEE[0] + EEO[0], EEE[1] + EEO[1], EEE[2] + EEO[2], EEE[3] + EEO[3],
321  EEE[3] - EEO[3], EEE[2] - EEO[2], EEE[1] - EEO[1], EEE[0] - EEO[0],
322  };
323  const int EO[8] = {
324  G2(h * x2) + G4(+i * x6) + G8(+ j * x10 + k * x14) + G16(+l * x18 + m * x22 + n * x26 + o * x30),
325  G2(i * x2) + G4(+l * x6) + G8(+ o * x10 - m * x14) + G16(-j * x18 - h * x22 - k * x26 - n * x30),
326  G2(j * x2) + G4(+o * x6) + G8(- k * x10 - i * x14) + G16(-n * x18 + l * x22 + h * x26 + m * x30),
327  G2(k * x2) + G4(-m * x6) + G8(- i * x10 + o * x14) + G16(+h * x18 + n * x22 - j * x26 - l * x30),
328  G2(l * x2) + G4(-j * x6) + G8(- n * x10 + h * x14) + G16(-o * x18 - i * x22 + m * x26 + k * x30),
329  G2(m * x2) + G4(-h * x6) + G8(+ l * x10 + n * x14) + G16(-i * x18 + k * x22 + o * x26 - j * x30),
330  G2(n * x2) + G4(-k * x6) + G8(+ h * x10 - j * x14) + G16(+m * x18 + o * x22 - l * x26 + i * x30),
331  G2(o * x2) + G4(-n * x6) + G8(+ m * x10 - l * x14) + G16(+k * x18 - j * x22 + i * x26 - h * x30),
332  };
333  const int E[16] = {
334  EE[0] + EO[0], EE[1] + EO[1], EE[2] + EO[2], EE[3] + EO[3], EE[4] + EO[4], EE[5] + EO[5], EE[6] + EO[6], EE[7] + EO[7],
335  EE[7] - EO[7], EE[6] - EO[6], EE[5] - EO[5], EE[4] - EO[4], EE[3] - EO[3], EE[2] - EO[2], EE[1] - EO[1], EE[0] - EO[0],
336  };
337  const int O[16] = {
338  p * x1 + G2(+q * x3) + G4(+r * x5 + s * x7) + G8(+t * x9 + u * x11 + v * x13 + w * x15) + G16(+x * x17 + y * x19 + z * x21 + A * x23 + B * x25 + C * x27 + D * x29 + E_* x31),
339  q * x1 + G2(+t * x3) + G4(+w * x5 + z * x7) + G8(+C * x9 - E_* x11 - B * x13 - y * x15) + G16(-v * x17 - s * x19 - p * x21 - r * x23 - u * x25 - x * x27 - A * x29 - D * x31),
340  r * x1 + G2(+w * x3) + G4(+B * x5 - D * x7) + G8(-y * x9 - t * x11 - p * x13 - u * x15) + G16(-z * x17 - E_* x19 + A * x21 + v * x23 + q * x25 + s * x27 + x * x29 + C * x31),
341  s * x1 + G2(+z * x3) + G4(-D * x5 - w * x7) + G8(-p * x9 - v * x11 - C * x13 + A * x15) + G16(+t * x17 + r * x19 + y * x21 - E_* x23 - x * x25 - q * x27 - u * x29 - B * x31),
342  t * x1 + G2(+C * x3) + G4(-y * x5 - p * x7) + G8(-x * x9 + D * x11 + u * x13 + s * x15) + G16(+B * x17 - z * x19 - q * x21 - w * x23 + E_* x25 + v * x27 + r * x29 + A * x31),
343  u * x1 + G2(-E_* x3) + G4(-t * x5 - v * x7) + G8(+D * x9 + s * x11 + w * x13 - C * x15) + G16(-r * x17 - x * x19 + B * x21 + q * x23 + y * x25 - A * x27 - p * x29 - z * x31),
344  v * x1 + G2(-B * x3) + G4(-p * x5 - C * x7) + G8(+u * x9 + w * x11 - A * x13 - q * x15) + G16(-D * x17 + t * x19 + x * x21 - z * x23 - r * x25 - E_* x27 + s * x29 + y * x31),
345  w * x1 + G2(-y * x3) + G4(-u * x5 + A * x7) + G8(+s * x9 - C * x11 - q * x13 + E_* x15) + G16(+p * x17 + D * x19 - r * x21 - B * x23 + t * x25 + z * x27 - v * x29 - x * x31),
346  x * x1 + G2(-v * x3) + G4(-z * x5 + t * x7) + G8(+B * x9 - r * x11 - D * x13 + p * x15) + G16(-E_* x17 - q * x19 + C * x21 + s * x23 - A * x25 - u * x27 + y * x29 + w * x31),
347  y * x1 + G2(-s * x3) + G4(-E_* x5 + r * x7) + G8(-z * x9 - x * x11 + t * x13 + D * x15) + G16(-q * x17 + A * x19 + w * x21 - u * x23 - C * x25 + p * x27 - B * x29 - v * x31),
348  z * x1 + G2(-p * x3) + G4(+A * x5 + y * x7) + G8(-q * x9 + B * x11 + x * x13 - r * x15) + G16(+C * x17 + w * x19 - s * x21 + D * x23 + v * x25 - t * x27 + E_* x29 + u * x31),
349  A * x1 + G2(-r * x3) + G4(+v * x5 - E_* x7) + G8(-w * x9 + q * x11 - z * x13 - B * x15) + G16(+s * x17 - u * x19 + D * x21 + x * x23 - p * x25 + y * x27 + C * x29 - t * x31),
350  B * x1 + G2(-u * x3) + G4(+q * x5 - x * x7) + G8(+E_* x9 + y * x11 - r * x13 + t * x15) + G16(-A * x17 - C * x19 + v * x21 - p * x23 + w * x25 - D * x27 - z * x29 + s * x31),
351  C * x1 + G2(-x * x3) + G4(+s * x5 - q * x7) + G8(+v * x9 - A * x11 - E_* x13 + z * x15) + G16(-u * x17 + p * x19 - t * x21 + y * x23 - D * x25 - B * x27 + w * x29 - r * x31),
352  D * x1 + G2(-A * x3) + G4(+x * x5 - u * x7) + G8(+r * x9 - p * x11 + s * x13 - v * x15) + G16(+y * x17 - B * x19 + E_* x21 + C * x23 - z * x25 + w * x27 - t * x29 + q * x31),
353  E_* x1 + G2(-D * x3) + G4(+C * x5 - B * x7) + G8(+A * x9 - z * x11 + y * x13 - x * x15) + G16(+w * x17 - v * x19 + u * x21 - t * x23 + s * x25 - r * x27 + q * x29 - p * x31),
354  };
355 
356  coeffs[0 * stride] = E[0] + O[0];
357  coeffs[1 * stride] = E[1] + O[1];
358  coeffs[2 * stride] = E[2] + O[2];
359  coeffs[3 * stride] = E[3] + O[3];
360  coeffs[4 * stride] = E[4] + O[4];
361  coeffs[5 * stride] = E[5] + O[5];
362  coeffs[6 * stride] = E[6] + O[6];
363  coeffs[7 * stride] = E[7] + O[7];
364  coeffs[8 * stride] = E[8] + O[8];
365  coeffs[9 * stride] = E[9] + O[9];
366  coeffs[10 * stride] = E[10] + O[10];
367  coeffs[11 * stride] = E[11] + O[11];
368  coeffs[12 * stride] = E[12] + O[12];
369  coeffs[13 * stride] = E[13] + O[13];
370  coeffs[14 * stride] = E[14] + O[14];
371  coeffs[15 * stride] = E[15] + O[15];
372  coeffs[16 * stride] = E[15] - O[15];
373  coeffs[17 * stride] = E[14] - O[14];
374  coeffs[18 * stride] = E[13] - O[13];
375  coeffs[19 * stride] = E[12] - O[12];
376  coeffs[20 * stride] = E[11] - O[11];
377  coeffs[21 * stride] = E[10] - O[10];
378  coeffs[22 * stride] = E[9] - O[9];
379  coeffs[23 * stride] = E[8] - O[8];
380  coeffs[24 * stride] = E[7] - O[7];
381  coeffs[25 * stride] = E[6] - O[6];
382  coeffs[26 * stride] = E[5] - O[5];
383  coeffs[27 * stride] = E[4] - O[4];
384  coeffs[28 * stride] = E[3] - O[3];
385  coeffs[29 * stride] = E[2] - O[2];
386  coeffs[30 * stride] = E[1] - O[1];
387  coeffs[31 * stride] = E[0] - O[0];
388 }
389 
390 /*
391 transMatrix[64][64] = {
392  { aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa, aa },
393  { bf, bg, bh, bi, bj, bk, bl, bm, bn, bo, bp, bq, br, bs, bt, bu, bv, bw, bx, by, bz, ca, cb, cc, cd, ce, cf, cg, ch, ci, cj, ck, -ck, -cj, -ci, -ch, -cg, -cf, -ce, -cd, -cc, -cb, -ca, -bz, -by, -bx, -bw, -bv, -bu, -bt, -bs, -br, -bq, -bp, -bo, -bn, -bm, -bl, -bk, -bj, -bi, -bh, -bg, -bf },
394  { ap, aq, ar, as, at, au, av, aw, ax, ay, az, ba, bb, bc, bd, be, -be, -bd, -bc, -bb, -ba, -az, -ay, -ax, -aw, -av, -au, -at, -as, -ar, -aq, -ap, -ap, -aq, -ar, -as, -at, -au, -av, -aw, -ax, -ay, -az, -ba, -bb, -bc, -bd, -be, be, bd, bc, bb, ba, az, ay, ax, aw, av, au, at, as, ar, aq, ap },
395  { bg, bj, bm, bp, bs, bv, by, cb, ce, ch, ck, -ci, -cf, -cc, -bz, -bw, -bt, -bq, -bn, -bk, -bh, -bf, -bi, -bl, -bo, -br, -bu, -bx, -ca, -cd, -cg, -cj, cj, cg, cd, ca, bx, bu, br, bo, bl, bi, bf, bh, bk, bn, bq, bt, bw, bz, cc, cf, ci, -ck, -ch, -ce, -cb, -by, -bv, -bs, -bp, -bm, -bj, -bg },
396  { ah, ai, aj, ak, al, am, an, ao, -ao, -an, -am, -al, -ak, -aj, -ai, -ah, -ah, -ai, -aj, -ak, -al, -am, -an, -ao, ao, an, am, al, ak, aj, ai, ah, ah, ai, aj, ak, al, am, an, ao, -ao, -an, -am, -al, -ak, -aj, -ai, -ah, -ah, -ai, -aj, -ak, -al, -am, -an, -ao, ao, an, am, al, ak, aj, ai, ah },
397  { bh, bm, br, bw, cb, cg, -ck, -cf, -ca, -bv, -bq, -bl, -bg, -bi, -bn, -bs, -bx, -cc, -ch, cj, ce, bz, bu, bp, bk, bf, bj, bo, bt, by, cd, ci, -ci, -cd, -by, -bt, -bo, -bj, -bf, -bk, -bp, -bu, -bz, -ce, -cj, ch, cc, bx, bs, bn, bi, bg, bl, bq, bv, ca, cf, ck, -cg, -cb, -bw, -br, -bm, -bh },
398  { aq, at, aw, az, bc, -be, -bb, -ay, -av, -as, -ap, -ar, -au, -ax, -ba, -bd, bd, ba, ax, au, ar, ap, as, av, ay, bb, be, -bc, -az, -aw, -at, -aq, -aq, -at, -aw, -az, -bc, be, bb, ay, av, as, ap, ar, au, ax, ba, bd, -bd, -ba, -ax, -au, -ar, -ap, -as, -av, -ay, -bb, -be, bc, az, aw, at, aq },
399  { bi, bp, bw, cd, ck, -ce, -bx, -bq, -bj, -bh, -bo, -bv, -cc, -cj, cf, by, br, bk, bg, bn, bu, cb, ci, -cg, -bz, -bs, -bl, -bf, -bm, -bt, -ca, -ch, ch, ca, bt, bm, bf, bl, bs, bz, cg, -ci, -cb, -bu, -bn, -bg, -bk, -br, -by, -cf, cj, cc, bv, bo, bh, bj, bq, bx, ce, -ck, -cd, -bw, -bp, -bi },
400  { ad, ae, af, ag, -ag, -af, -ae, -ad, -ad, -ae, -af, -ag, ag, af, ae, ad, ad, ae, af, ag, -ag, -af, -ae, -ad, -ad, -ae, -af, -ag, ag, af, ae, ad, ad, ae, af, ag, -ag, -af, -ae, -ad, -ad, -ae, -af, -ag, ag, af, ae, ad, ad, ae, af, ag, -ag, -af, -ae, -ad, -ad, -ae, -af, -ag, ag, af, ae, ad },
401  { bj, bs, cb, ck, -cc, -bt, -bk, -bi, -br, -ca, -cj, cd, bu, bl, bh, bq, bz, ci, -ce, -bv, -bm, -bg, -bp, -by, -ch, cf, bw, bn, bf, bo, bx, cg, -cg, -bx, -bo, -bf, -bn, -bw, -cf, ch, by, bp, bg, bm, bv, ce, -ci, -bz, -bq, -bh, -bl, -bu, -cd, cj, ca, br, bi, bk, bt, cc, -ck, -cb, -bs, -bj },
402  { ar, aw, bb, -bd, -ay, -at, -ap, -au, -az, -be, ba, av, aq, as, ax, bc, -bc, -ax, -as, -aq, -av, -ba, be, az, au, ap, at, ay, bd, -bb, -aw, -ar, -ar, -aw, -bb, bd, ay, at, ap, au, az, be, -ba, -av, -aq, -as, -ax, -bc, bc, ax, as, aq, av, ba, -be, -az, -au, -ap, -at, -ay, -bd, bb, aw, ar },
403  { bk, bv, cg, -ce, -bt, -bi, -bm, -bx, -ci, cc, br, bg, bo, bz, ck, -ca, -bp, -bf, -bq, -cb, cj, by, bn, bh, bs, cd, -ch, -bw, -bl, -bj, -bu, -cf, cf, bu, bj, bl, bw, ch, -cd, -bs, -bh, -bn, -by, -cj, cb, bq, bf, bp, ca, -ck, -bz, -bo, -bg, -br, -cc, ci, bx, bm, bi, bt, ce, -cg, -bv, -bk },
404  { ai, al, ao, -am, -aj, -ah, -ak, -an, an, ak, ah, aj, am, -ao, -al, -ai, -ai, -al, -ao, am, aj, ah, ak, an, -an, -ak, -ah, -aj, -am, ao, al, ai, ai, al, ao, -am, -aj, -ah, -ak, -an, an, ak, ah, aj, am, -ao, -al, -ai, -ai, -al, -ao, am, aj, ah, ak, an, -an, -ak, -ah, -aj, -am, ao, al, ai },
405  { bl, by, -ck, -bx, -bk, -bm, -bz, cj, bw, bj, bn, ca, -ci, -bv, -bi, -bo, -cb, ch, bu, bh, bp, cc, -cg, -bt, -bg, -bq, -cd, cf, bs, bf, br, ce, -ce, -br, -bf, -bs, -cf, cd, bq, bg, bt, cg, -cc, -bp, -bh, -bu, -ch, cb, bo, bi, bv, ci, -ca, -bn, -bj, -bw, -cj, bz, bm, bk, bx, ck, -by, -bl },
406  { as, az, -bd, -aw, -ap, -av, -bc, ba, at, ar, ay, -be, -ax, -aq, -au, -bb, bb, au, aq, ax, be, -ay, -ar, -at, -ba, bc, av, ap, aw, bd, -az, -as, -as, -az, bd, aw, ap, av, bc, -ba, -at, -ar, -ay, be, ax, aq, au, bb, -bb, -au, -aq, -ax, -be, ay, ar, at, ba, -bc, -av, -ap, -aw, -bd, az, as },
407  { bm, cb, -cf, -bq, -bi, -bx, cj, bu, bf, bt, ci, -by, -bj, -bp, -ce, cc, bn, bl, ca, -cg, -br, -bh, -bw, ck, bv, bg, bs, ch, -bz, -bk, -bo, -cd, cd, bo, bk, bz, -ch, -bs, -bg, -bv, -ck, bw, bh, br, cg, -ca, -bl, -bn, -cc, ce, bp, bj, by, -ci, -bt, -bf, -bu, -cj, bx, bi, bq, cf, -cb, -bm },
408  { ab, ac, -ac, -ab, -ab, -ac, ac, ab, ab, ac, -ac, -ab, -ab, -ac, ac, ab, ab, ac, -ac, -ab, -ab, -ac, ac, ab, ab, ac, -ac, -ab, -ab, -ac, ac, ab, ab, ac, -ac, -ab, -ab, -ac, ac, ab, ab, ac, -ac, -ab, -ab, -ac, ac, ab, ab, ac, -ac, -ab, -ab, -ac, ac, ab, ab, ac, -ac, -ab, -ab, -ac, ac, ab },
409  { bn, ce, -ca, -bj, -br, -ci, bw, bf, bv, -cj, -bs, -bi, -bz, cf, bo, bm, cd, -cb, -bk, -bq, -ch, bx, bg, bu, -ck, -bt, -bh, -by, cg, bp, bl, cc, -cc, -bl, -bp, -cg, by, bh, bt, ck, -bu, -bg, -bx, ch, bq, bk, cb, -cd, -bm, -bo, -cf, bz, bi, bs, cj, -bv, -bf, -bw, ci, br, bj, ca, -ce, -bn },
410  { at, bc, -ay, -ap, -ax, bd, au, as, bb, -az, -aq, -aw, be, av, ar, ba, -ba, -ar, -av, -be, aw, aq, az, -bb, -as, -au, -bd, ax, ap, ay, -bc, -at, -at, -bc, ay, ap, ax, -bd, -au, -as, -bb, az, aq, aw, -be, -av, -ar, -ba, ba, ar, av, be, -aw, -aq, -az, bb, as, au, bd, -ax, -ap, -ay, bc, at },
411  { bo, ch, -bv, -bh, -ca, cc, bj, bt, -cj, -bq, -bm, -cf, bx, bf, by, -ce, -bl, -br, -ck, bs, bk, cd, -bz, -bg, -bw, cg, bn, bp, ci, -bu, -bi, -cb, cb, bi, bu, -ci, -bp, -bn, -cg, bw, bg, bz, -cd, -bk, -bs, ck, br, bl, ce, -by, -bf, -bx, cf, bm, bq, cj, -bt, -bj, -cc, ca, bh, bv, -ch, -bo },
412  { aj, ao, -ak, -ai, -an, al, ah, am, -am, -ah, -al, an, ai, ak, -ao, -aj, -aj, -ao, ak, ai, an, -al, -ah, -am, am, ah, al, -an, -ai, -ak, ao, aj, aj, ao, -ak, -ai, -an, al, ah, am, -am, -ah, -al, an, ai, ak, -ao, -aj, -aj, -ao, ak, ai, an, -al, -ah, -am, am, ah, al, -an, -ai, -ak, ao, aj },
413  { bp, ck, -bq, -bo, -cj, br, bn, ci, -bs, -bm, -ch, bt, bl, cg, -bu, -bk, -cf, bv, bj, ce, -bw, -bi, -cd, bx, bh, cc, -by, -bg, -cb, bz, bf, ca, -ca, -bf, -bz, cb, bg, by, -cc, -bh, -bx, cd, bi, bw, -ce, -bj, -bv, cf, bk, bu, -cg, -bl, -bt, ch, bm, bs, -ci, -bn, -br, cj, bo, bq, -ck, -bp },
414  { au, -be, -at, -av, bd, as, aw, -bc, -ar, -ax, bb, aq, ay, -ba, -ap, -az, az, ap, ba, -ay, -aq, -bb, ax, ar, bc, -aw, -as, -bd, av, at, be, -au, -au, be, at, av, -bd, -as, -aw, bc, ar, ax, -bb, -aq, -ay, ba, ap, az, -az, -ap, -ba, ay, aq, bb, -ax, -ar, -bc, aw, as, bd, -av, -at, -be, au },
415  { bq, -ci, -bl, -bv, cd, bg, ca, -by, -bi, -cf, bt, bn, ck, -bo, -bs, cg, bj, bx, -cb, -bf, -cc, bw, bk, ch, -br, -bp, cj, bm, bu, -ce, -bh, -bz, bz, bh, ce, -bu, -bm, -cj, bp, br, -ch, -bk, -bw, cc, bf, cb, -bx, -bj, -cg, bs, bo, -ck, -bn, -bt, cf, bi, by, -ca, -bg, -cd, bv, bl, ci, -bq },
416  { ae, -ag, -ad, -af, af, ad, ag, -ae, -ae, ag, ad, af, -af, -ad, -ag, ae, ae, -ag, -ad, -af, af, ad, ag, -ae, -ae, ag, ad, af, -af, -ad, -ag, ae, ae, -ag, -ad, -af, af, ad, ag, -ae, -ae, ag, ad, af, -af, -ad, -ag, ae, ae, -ag, -ad, -af, af, ad, ag, -ae, -ae, ag, ad, af, -af, -ad, -ag, ae },
417  { br, -cf, -bg, -cc, bu, bo, -ci, -bj, -bz, bx, bl, ck, -bm, -bw, ca, bi, ch, -bp, -bt, cd, bf, ce, -bs, -bq, cg, bh, cb, -bv, -bn, cj, bk, by, -by, -bk, -cj, bn, bv, -cb, -bh, -cg, bq, bs, -ce, -bf, -cd, bt, bp, -ch, -bi, -ca, bw, bm, -ck, -bl, -bx, bz, bj, ci, -bo, -bu, cc, bg, cf, -br },
418  { av, -bb, -ap, -bc, au, aw, -ba, -aq, -bd, at, ax, -az, -ar, -be, as, ay, -ay, -as, be, ar, az, -ax, -at, bd, aq, ba, -aw, -au, bc, ap, bb, -av, -av, bb, ap, bc, -au, -aw, ba, aq, bd, -at, -ax, az, ar, be, -as, -ay, ay, as, -be, -ar, -az, ax, at, -bd, -aq, -ba, aw, au, -bc, -ap, -bb, av },
419  { bs, -cc, -bi, -cj, bl, bz, -bv, -bp, cf, bf, cg, -bo, -bw, by, bm, -ci, -bh, -cd, br, bt, -cb, -bj, -ck, bk, ca, -bu, -bq, ce, bg, ch, -bn, -bx, bx, bn, -ch, -bg, -ce, bq, bu, -ca, -bk, ck, bj, cb, -bt, -br, cd, bh, ci, -bm, -by, bw, bo, -cg, -bf, -cf, bp, bv, -bz, -bl, cj, bi, cc, -bs },
420  { ak, -am, -ai, ao, ah, an, -aj, -al, al, aj, -an, -ah, -ao, ai, am, -ak, -ak, am, ai, -ao, -ah, -an, aj, al, -al, -aj, an, ah, ao, -ai, -am, ak, ak, -am, -ai, ao, ah, an, -aj, -al, al, aj, -an, -ah, -ao, ai, am, -ak, -ak, am, ai, -ao, -ah, -an, aj, al, -al, -aj, an, ah, ao, -ai, -am, ak },
421  { bt, -bz, -bn, cf, bh, ck, -bi, -ce, bo, by, -bu, -bs, ca, bm, -cg, -bg, -cj, bj, cd, -bp, -bx, bv, br, -cb, -bl, ch, bf, ci, -bk, -cc, bq, bw, -bw, -bq, cc, bk, -ci, -bf, -ch, bl, cb, -br, -bv, bx, bp, -cd, -bj, cj, bg, cg, -bm, -ca, bs, bu, -by, -bo, ce, bi, -ck, -bh, -cf, bn, bz, -bt },
422  { aw, -ay, -au, ba, as, -bc, -aq, be, ap, bd, -ar, -bb, at, az, -av, -ax, ax, av, -az, -at, bb, ar, -bd, -ap, -be, aq, bc, -as, -ba, au, ay, -aw, -aw, ay, au, -ba, -as, bc, aq, -be, -ap, -bd, ar, bb, -at, -az, av, ax, -ax, -av, az, at, -bb, -ar, bd, ap, be, -aq, -bc, as, ba, -au, -ay, aw },
423  { bu, -bw, -bs, by, bq, -ca, -bo, cc, bm, -ce, -bk, cg, bi, -ci, -bg, ck, bf, cj, -bh, -ch, bj, cf, -bl, -cd, bn, cb, -bp, -bz, br, bx, -bt, -bv, bv, bt, -bx, -br, bz, bp, -cb, -bn, cd, bl, -cf, -bj, ch, bh, -cj, -bf, -ck, bg, ci, -bi, -cg, bk, ce, -bm, -cc, bo, ca, -bq, -by, bs, bw, -bu },
424  { aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa, aa, -aa, -aa, aa },
425  { bv, -bt, -bx, br, bz, -bp, -cb, bn, cd, -bl, -cf, bj, ch, -bh, -cj, bf, -ck, -bg, ci, bi, -cg, -bk, ce, bm, -cc, -bo, ca, bq, -by, -bs, bw, bu, -bu, -bw, bs, by, -bq, -ca, bo, cc, -bm, -ce, bk, cg, -bi, -ci, bg, ck, -bf, cj, bh, -ch, -bj, cf, bl, -cd, -bn, cb, bp, -bz, -br, bx, bt, -bv },
426  { ax, -av, -az, at, bb, -ar, -bd, ap, -be, -aq, bc, as, -ba, -au, ay, aw, -aw, -ay, au, ba, -as, -bc, aq, be, -ap, bd, ar, -bb, -at, az, av, -ax, -ax, av, az, -at, -bb, ar, bd, -ap, be, aq, -bc, -as, ba, au, -ay, -aw, aw, ay, -au, -ba, as, bc, -aq, -be, ap, -bd, -ar, bb, at, -az, -av, ax },
427  { bw, -bq, -cc, bk, ci, -bf, ch, bl, -cb, -br, bv, bx, -bp, -cd, bj, cj, -bg, cg, bm, -ca, -bs, bu, by, -bo, -ce, bi, ck, -bh, cf, bn, -bz, -bt, bt, bz, -bn, -cf, bh, -ck, -bi, ce, bo, -by, -bu, bs, ca, -bm, -cg, bg, -cj, -bj, cd, bp, -bx, -bv, br, cb, -bl, -ch, bf, -ci, -bk, cc, bq, -bw },
428  { al, -aj, -an, ah, -ao, -ai, am, ak, -ak, -am, ai, ao, -ah, an, aj, -al, -al, aj, an, -ah, ao, ai, -am, -ak, ak, am, -ai, -ao, ah, -an, -aj, al, al, -aj, -an, ah, -ao, -ai, am, ak, -ak, -am, ai, ao, -ah, an, aj, -al, -al, aj, an, -ah, ao, ai, -am, -ak, ak, am, -ai, -ao, ah, -an, -aj, al },
429  { bx, -bn, -ch, bg, -ce, -bq, bu, ca, -bk, -ck, bj, -cb, -bt, br, cd, -bh, ci, bm, -by, -bw, bo, cg, -bf, cf, bp, -bv, -bz, bl, cj, -bi, cc, bs, -bs, -cc, bi, -cj, -bl, bz, bv, -bp, -cf, bf, -cg, -bo, bw, by, -bm, -ci, bh, -cd, -br, bt, cb, -bj, ck, bk, -ca, -bu, bq, ce, -bg, ch, bn, -bx },
430  { ay, -as, -be, ar, -az, -ax, at, bd, -aq, ba, aw, -au, -bc, ap, -bb, -av, av, bb, -ap, bc, au, -aw, -ba, aq, -bd, -at, ax, az, -ar, be, as, -ay, -ay, as, be, -ar, az, ax, -at, -bd, aq, -ba, -aw, au, bc, -ap, bb, av, -av, -bb, ap, -bc, -au, aw, ba, -aq, bd, at, -ax, -az, ar, -be, -as, ay },
431  { by, -bk, cj, bn, -bv, -cb, bh, -cg, -bq, bs, ce, -bf, cd, bt, -bp, -ch, bi, -ca, -bw, bm, ck, -bl, bx, bz, -bj, ci, bo, -bu, -cc, bg, -cf, -br, br, cf, -bg, cc, bu, -bo, -ci, bj, -bz, -bx, bl, -ck, -bm, bw, ca, -bi, ch, bp, -bt, -cd, bf, -ce, -bs, bq, cg, -bh, cb, bv, -bn, -cj, bk, -by },
432  { af, -ad, ag, ae, -ae, -ag, ad, -af, -af, ad, -ag, -ae, ae, ag, -ad, af, af, -ad, ag, ae, -ae, -ag, ad, -af, -af, ad, -ag, -ae, ae, ag, -ad, af, af, -ad, ag, ae, -ae, -ag, ad, -af, -af, ad, -ag, -ae, ae, ag, -ad, af, af, -ad, ag, ae, -ae, -ag, ad, -af, -af, ad, -ag, -ae, ae, ag, -ad, af },
433  { bz, -bh, ce, bu, -bm, cj, bp, -br, -ch, bk, -bw, -cc, bf, -cb, -bx, bj, -cg, -bs, bo, ck, -bn, bt, cf, -bi, by, ca, -bg, cd, bv, -bl, ci, bq, -bq, -ci, bl, -bv, -cd, bg, -ca, -by, bi, -cf, -bt, bn, -ck, -bo, bs, cg, -bj, bx, cb, -bf, cc, bw, -bk, ch, br, -bp, -cj, bm, -bu, -ce, bh, -bz },
434  { az, -ap, ba, ay, -aq, bb, ax, -ar, bc, aw, -as, bd, av, -at, be, au, -au, -be, at, -av, -bd, as, -aw, -bc, ar, -ax, -bb, aq, -ay, -ba, ap, -az, -az, ap, -ba, -ay, aq, -bb, -ax, ar, -bc, -aw, as, -bd, -av, at, -be, -au, au, be, -at, av, bd, -as, aw, bc, -ar, ax, bb, -aq, ay, ba, -ap, az },
435  { ca, -bf, bz, cb, -bg, by, cc, -bh, bx, cd, -bi, bw, ce, -bj, bv, cf, -bk, bu, cg, -bl, bt, ch, -bm, bs, ci, -bn, br, cj, -bo, bq, ck, -bp, bp, -ck, -bq, bo, -cj, -br, bn, -ci, -bs, bm, -ch, -bt, bl, -cg, -bu, bk, -cf, -bv, bj, -ce, -bw, bi, -cd, -bx, bh, -cc, -by, bg, -cb, -bz, bf, -ca },
436  { am, -ah, al, an, -ai, ak, ao, -aj, aj, -ao, -ak, ai, -an, -al, ah, -am, -am, ah, -al, -an, ai, -ak, -ao, aj, -aj, ao, ak, -ai, an, al, -ah, am, am, -ah, al, an, -ai, ak, ao, -aj, aj, -ao, -ak, ai, -an, -al, ah, -am, -am, ah, -al, -an, ai, -ak, -ao, aj, -aj, ao, ak, -ai, an, al, -ah, am },
437  { cb, -bi, bu, ci, -bp, bn, -cg, -bw, bg, -bz, -cd, bk, -bs, -ck, br, -bl, ce, by, -bf, bx, cf, -bm, bq, -cj, -bt, bj, -cc, -ca, bh, -bv, -ch, bo, -bo, ch, bv, -bh, ca, cc, -bj, bt, cj, -bq, bm, -cf, -bx, bf, -by, -ce, bl, -br, ck, bs, -bk, cd, bz, -bg, bw, cg, -bn, bp, -ci, -bu, bi, -cb },
438  { ba, -ar, av, -be, -aw, aq, -az, -bb, as, -au, bd, ax, -ap, ay, bc, -at, at, -bc, -ay, ap, -ax, -bd, au, -as, bb, az, -aq, aw, be, -av, ar, -ba, -ba, ar, -av, be, aw, -aq, az, bb, -as, au, -bd, -ax, ap, -ay, -bc, at, -at, bc, ay, -ap, ax, bd, -au, as, -bb, -az, aq, -aw, -be, av, -ar, ba },
439  { cc, -bl, bp, -cg, -by, bh, -bt, ck, bu, -bg, bx, ch, -bq, bk, -cb, -cd, bm, -bo, cf, bz, -bi, bs, -cj, -bv, bf, -bw, -ci, br, -bj, ca, ce, -bn, bn, -ce, -ca, bj, -br, ci, bw, -bf, bv, cj, -bs, bi, -bz, -cf, bo, -bm, cd, cb, -bk, bq, -ch, -bx, bg, -bu, -ck, bt, -bh, by, cg, -bp, bl, -cc },
440  { ac, -ab, ab, -ac, -ac, ab, -ab, ac, ac, -ab, ab, -ac, -ac, ab, -ab, ac, ac, -ab, ab, -ac, -ac, ab, -ab, ac, ac, -ab, ab, -ac, -ac, ab, -ab, ac, ac, -ab, ab, -ac, -ac, ab, -ab, ac, ac, -ab, ab, -ac, -ac, ab, -ab, ac, ac, -ab, ab, -ac, -ac, ab, -ab, ac, ac, -ab, ab, -ac, -ac, ab, -ab, ac },
441  { cd, -bo, bk, -bz, -ch, bs, -bg, bv, -ck, -bw, bh, -br, cg, ca, -bl, bn, -cc, -ce, bp, -bj, by, ci, -bt, bf, -bu, cj, bx, -bi, bq, -cf, -cb, bm, -bm, cb, cf, -bq, bi, -bx, -cj, bu, -bf, bt, -ci, -by, bj, -bp, ce, cc, -bn, bl, -ca, -cg, br, -bh, bw, ck, -bv, bg, -bs, ch, bz, -bk, bo, -cd },
442  { bb, -au, aq, -ax, be, ay, -ar, at, -ba, -bc, av, -ap, aw, -bd, -az, as, -as, az, bd, -aw, ap, -av, bc, ba, -at, ar, -ay, -be, ax, -aq, au, -bb, -bb, au, -aq, ax, -be, -ay, ar, -at, ba, bc, -av, ap, -aw, bd, az, -as, as, -az, -bd, aw, -ap, av, -bc, -ba, at, -ar, ay, be, -ax, aq, -au, bb },
443  { ce, -br, bf, -bs, cf, cd, -bq, bg, -bt, cg, cc, -bp, bh, -bu, ch, cb, -bo, bi, -bv, ci, ca, -bn, bj, -bw, cj, bz, -bm, bk, -bx, ck, by, -bl, bl, -by, -ck, bx, -bk, bm, -bz, -cj, bw, -bj, bn, -ca, -ci, bv, -bi, bo, -cb, -ch, bu, -bh, bp, -cc, -cg, bt, -bg, bq, -cd, -cf, bs, -bf, br, -ce },
444  { an, -ak, ah, -aj, am, ao, -al, ai, -ai, al, -ao, -am, aj, -ah, ak, -an, -an, ak, -ah, aj, -am, -ao, al, -ai, ai, -al, ao, am, -aj, ah, -ak, an, an, -ak, ah, -aj, am, ao, -al, ai, -ai, al, -ao, -am, aj, -ah, ak, -an, -an, ak, -ah, aj, -am, -ao, al, -ai, ai, -al, ao, am, -aj, ah, -ak, an },
445  { cf, -bu, bj, -bl, bw, -ch, -cd, bs, -bh, bn, -by, cj, cb, -bq, bf, -bp, ca, ck, -bz, bo, -bg, br, -cc, -ci, bx, -bm, bi, -bt, ce, cg, -bv, bk, -bk, bv, -cg, -ce, bt, -bi, bm, -bx, ci, cc, -br, bg, -bo, bz, -ck, -ca, bp, -bf, bq, -cb, -cj, by, -bn, bh, -bs, cd, ch, -bw, bl, -bj, bu, -cf },
446  { bc, -ax, as, -aq, av, -ba, -be, az, -au, ap, -at, ay, -bd, -bb, aw, -ar, ar, -aw, bb, bd, -ay, at, -ap, au, -az, be, ba, -av, aq, -as, ax, -bc, -bc, ax, -as, aq, -av, ba, be, -az, au, -ap, at, -ay, bd, bb, -aw, ar, -ar, aw, -bb, -bd, ay, -at, ap, -au, az, -be, -ba, av, -aq, as, -ax, bc },
447  { cg, -bx, bo, -bf, bn, -bw, cf, ch, -by, bp, -bg, bm, -bv, ce, ci, -bz, bq, -bh, bl, -bu, cd, cj, -ca, br, -bi, bk, -bt, cc, ck, -cb, bs, -bj, bj, -bs, cb, -ck, -cc, bt, -bk, bi, -br, ca, -cj, -cd, bu, -bl, bh, -bq, bz, -ci, -ce, bv, -bm, bg, -bp, by, -ch, -cf, bw, -bn, bf, -bo, bx, -cg },
448  { ag, -af, ae, -ad, ad, -ae, af, -ag, -ag, af, -ae, ad, -ad, ae, -af, ag, ag, -af, ae, -ad, ad, -ae, af, -ag, -ag, af, -ae, ad, -ad, ae, -af, ag, ag, -af, ae, -ad, ad, -ae, af, -ag, -ag, af, -ae, ad, -ad, ae, -af, ag, ag, -af, ae, -ad, ad, -ae, af, -ag, -ag, af, -ae, ad, -ad, ae, -af, ag },
449  { ch, -ca, bt, -bm, bf, -bl, bs, -bz, cg, ci, -cb, bu, -bn, bg, -bk, br, -by, cf, cj, -cc, bv, -bo, bh, -bj, bq, -bx, ce, ck, -cd, bw, -bp, bi, -bi, bp, -bw, cd, -ck, -ce, bx, -bq, bj, -bh, bo, -bv, cc, -cj, -cf, by, -br, bk, -bg, bn, -bu, cb, -ci, -cg, bz, -bs, bl, -bf, bm, -bt, ca, -ch },
450  { bd, -ba, ax, -au, ar, -ap, as, -av, ay, -bb, be, bc, -az, aw, -at, aq, -aq, at, -aw, az, -bc, -be, bb, -ay, av, -as, ap, -ar, au, -ax, ba, -bd, -bd, ba, -ax, au, -ar, ap, -as, av, -ay, bb, -be, -bc, az, -aw, at, -aq, aq, -at, aw, -az, bc, be, -bb, ay, -av, as, -ap, ar, -au, ax, -ba, bd },
451  { ci, -cd, by, -bt, bo, -bj, bf, -bk, bp, -bu, bz, -ce, cj, ch, -cc, bx, -bs, bn, -bi, bg, -bl, bq, -bv, ca, -cf, ck, cg, -cb, bw, -br, bm, -bh, bh, -bm, br, -bw, cb, -cg, -ck, cf, -ca, bv, -bq, bl, -bg, bi, -bn, bs, -bx, cc, -ch, -cj, ce, -bz, bu, -bp, bk, -bf, bj, -bo, bt, -by, cd, -ci },
452  { ao, -an, am, -al, ak, -aj, ai, -ah, ah, -ai, aj, -ak, al, -am, an, -ao, -ao, an, -am, al, -ak, aj, -ai, ah, -ah, ai, -aj, ak, -al, am, -an, ao, ao, -an, am, -al, ak, -aj, ai, -ah, ah, -ai, aj, -ak, al, -am, an, -ao, -ao, an, -am, al, -ak, aj, -ai, ah, -ah, ai, -aj, ak, -al, am, -an, ao },
453  { cj, -cg, cd, -ca, bx, -bu, br, -bo, bl, -bi, bf, -bh, bk, -bn, bq, -bt, bw, -bz, cc, -cf, ci, ck, -ch, ce, -cb, by, -bv, bs, -bp, bm, -bj, bg, -bg, bj, -bm, bp, -bs, bv, -by, cb, -ce, ch, -ck, -ci, cf, -cc, bz, -bw, bt, -bq, bn, -bk, bh, -bf, bi, -bl, bo, -br, bu, -bx, ca, -cd, cg, -cj },
454  { be, -bd, bc, -bb, ba, -az, ay, -ax, aw, -av, au, -at, as, -ar, aq, -ap, ap, -aq, ar, -as, at, -au, av, -aw, ax, -ay, az, -ba, bb, -bc, bd, -be, -be, bd, -bc, bb, -ba, az, -ay, ax, -aw, av, -au, at, -as, ar, -aq, ap, -ap, aq, -ar, as, -at, au, -av, aw, -ax, ay, -az, ba, -bb, bc, -bd, be },
455  { ck, -cj, ci, -ch, cg, -cf, ce, -cd, cc, -cb, ca, -bz, by, -bx, bw, -bv, bu, -bt, bs, -br, bq, -bp, bo, -bn, bm, -bl, bk, -bj, bi, -bh, bg, -bf, bf, -bg, bh, -bi, bj, -bk, bl, -bm, bn, -bo, bp, -bq, br, -bs, bt, -bu, bv, -bw, bx, -by, bz, -ca, cb, -cc, cd, -ce, cf, -cg, ch, -ci, cj, -ck },
456 }
457  */
458 
459 void ff_vvc_inv_dct2_64(int *coeffs, const ptrdiff_t stride, const size_t nz)
460 {
461  const int aa = 64, ab = 83, ac = 36, ad = 89, ae = 75, af = 50, ag = 18, ah = 90;
462  const int ai = 87, aj = 80, ak = 70, al = 57, am = 43, an = 25, ao = 9, ap = 90;
463  const int aq = 90, ar = 88, as = 85, at = 82, au = 78, av = 73, aw = 67, ax = 61;
464  const int ay = 54, az = 46, ba = 38, bb = 31, bc = 22, bd = 13, be = 4, bf = 91;
465  const int bg = 90, bh = 90, bi = 90, bj = 88, bk = 87, bl = 86, bm = 84, bn = 83;
466  const int bo = 81, bp = 79, bq = 77, br = 73, bs = 71, bt = 69, bu = 65, bv = 62;
467  const int bw = 59, bx = 56, by = 52, bz = 48, ca = 44, cb = 41, cc = 37, cd = 33;
468  const int ce = 28, cf = 24, cg = 20, ch = 15, ci = 11, cj = 7, ck = 2;
469  const int x0 = coeffs[0 * stride], x1 = coeffs[1 * stride];
470  const int x2 = coeffs[2 * stride], x3 = coeffs[3 * stride];
471  const int x4 = coeffs[4 * stride], x5 = coeffs[5 * stride];
472  const int x6 = coeffs[6 * stride], x7 = coeffs[7 * stride];
473  const int x8 = coeffs[8 * stride], x9 = coeffs[9 * stride];
474  const int x10 = coeffs[10 * stride], x11 = coeffs[11 * stride];
475  const int x12 = coeffs[12 * stride], x13 = coeffs[13 * stride];
476  const int x14 = coeffs[14 * stride], x15 = coeffs[15 * stride];
477  const int x16 = coeffs[16 * stride], x17 = coeffs[17 * stride];
478  const int x18 = coeffs[18 * stride], x19 = coeffs[19 * stride];
479  const int x20 = coeffs[20 * stride], x21 = coeffs[21 * stride];
480  const int x22 = coeffs[22 * stride], x23 = coeffs[23 * stride];
481  const int x24 = coeffs[24 * stride], x25 = coeffs[25 * stride];
482  const int x26 = coeffs[26 * stride], x27 = coeffs[27 * stride];
483  const int x28 = coeffs[28 * stride], x29 = coeffs[29 * stride];
484  const int x30 = coeffs[30 * stride], x31 = coeffs[31 * stride];
485  //according to vvc specification, x31 to x63 are zeros
486  const int EEEEE[2] = {
487  aa * x0,
488  aa * x0,
489  };
490  const int EEEEO[2] = {
491  G16(ab * x16),
492  G16(ac * x16),
493  };
494  const int EEEE[4] = {
495  EEEEE[0] + EEEEO[0], EEEEE[1] + EEEEO[1],
496  EEEEE[1] - EEEEO[1], EEEEE[0] - EEEEO[0],
497  };
498  const int EEEO[4] = {
499  G8(ad * x8) + G16(+ae * x24),
500  G8(ae * x8) + G16(-ag * x24),
501  G8(af * x8) + G16(-ad * x24),
502  G8(ag * x8) + G16(-af * x24),
503  };
504  const int EEE[8] = {
505  EEEE[0] + EEEO[0], EEEE[1] + EEEO[1], EEEE[2] + EEEO[2], EEEE[3] + EEEO[3],
506  EEEE[3] - EEEO[3], EEEE[2] - EEEO[2], EEEE[1] - EEEO[1], EEEE[0] - EEEO[0],
507  };
508  const int EEO[8] = {
509  G4(ah * x4) + G8(+ai * x12) + G16(+aj * x20 + ak * x28),
510  G4(ai * x4) + G8(+al * x12) + G16(+ao * x20 - am * x28),
511  G4(aj * x4) + G8(+ao * x12) + G16(-ak * x20 - ai * x28),
512  G4(ak * x4) + G8(-am * x12) + G16(-ai * x20 + ao * x28),
513  G4(al * x4) + G8(-aj * x12) + G16(-an * x20 + ah * x28),
514  G4(am * x4) + G8(-ah * x12) + G16(+al * x20 + an * x28),
515  G4(an * x4) + G8(-ak * x12) + G16(+ah * x20 - aj * x28),
516  G4(ao * x4) + G8(-an * x12) + G16(+am * x20 - al * x28),
517  };
518  const int EE[16] = {
519  EEE[0] + EEO[0], EEE[1] + EEO[1], EEE[2] + EEO[2], EEE[3] + EEO[3], EEE[4] + EEO[4], EEE[5] + EEO[5], EEE[6] + EEO[6], EEE[7] + EEO[7],
520  EEE[7] - EEO[7], EEE[6] - EEO[6], EEE[5] - EEO[5], EEE[4] - EEO[4], EEE[3] - EEO[3], EEE[2] - EEO[2], EEE[1] - EEO[1], EEE[0] - EEO[0],
521  };
522  const int EO[16] = {
523  G2(ap * x2) + G4(+aq * x6) + G8(+ar * x10 + as * x14) + G16(+at * x18 + au * x22 + av * x26 + aw * x30),
524  G2(aq * x2) + G4(+at * x6) + G8(+aw * x10 + az * x14) + G16(+bc * x18 - be * x22 - bb * x26 - ay * x30),
525  G2(ar * x2) + G4(+aw * x6) + G8(+bb * x10 - bd * x14) + G16(-ay * x18 - at * x22 - ap * x26 - au * x30),
526  G2(as * x2) + G4(+az * x6) + G8(-bd * x10 - aw * x14) + G16(-ap * x18 - av * x22 - bc * x26 + ba * x30),
527  G2(at * x2) + G4(+bc * x6) + G8(-ay * x10 - ap * x14) + G16(-ax * x18 + bd * x22 + au * x26 + as * x30),
528  G2(au * x2) + G4(-be * x6) + G8(-at * x10 - av * x14) + G16(+bd * x18 + as * x22 + aw * x26 - bc * x30),
529  G2(av * x2) + G4(-bb * x6) + G8(-ap * x10 - bc * x14) + G16(+au * x18 + aw * x22 - ba * x26 - aq * x30),
530  G2(aw * x2) + G4(-ay * x6) + G8(-au * x10 + ba * x14) + G16(+as * x18 - bc * x22 - aq * x26 + be * x30),
531  G2(ax * x2) + G4(-av * x6) + G8(-az * x10 + at * x14) + G16(+bb * x18 - ar * x22 - bd * x26 + ap * x30),
532  G2(ay * x2) + G4(-as * x6) + G8(-be * x10 + ar * x14) + G16(-az * x18 - ax * x22 + at * x26 + bd * x30),
533  G2(az * x2) + G4(-ap * x6) + G8(+ba * x10 + ay * x14) + G16(-aq * x18 + bb * x22 + ax * x26 - ar * x30),
534  G2(ba * x2) + G4(-ar * x6) + G8(+av * x10 - be * x14) + G16(-aw * x18 + aq * x22 - az * x26 - bb * x30),
535  G2(bb * x2) + G4(-au * x6) + G8(+aq * x10 - ax * x14) + G16(+be * x18 + ay * x22 - ar * x26 + at * x30),
536  G2(bc * x2) + G4(-ax * x6) + G8(+as * x10 - aq * x14) + G16(+av * x18 - ba * x22 - be * x26 + az * x30),
537  G2(bd * x2) + G4(-ba * x6) + G8(+ax * x10 - au * x14) + G16(+ar * x18 - ap * x22 + as * x26 - av * x30),
538  G2(be * x2) + G4(-bd * x6) + G8(+bc * x10 - bb * x14) + G16(+ba * x18 - az * x22 + ay * x26 - ax * x30),
539  };
540  const int E[32] = {
541  EE[0] + EO[0], EE[1] + EO[1], EE[2] + EO[2], EE[3] + EO[3], EE[4] + EO[4], EE[5] + EO[5], EE[6] + EO[6], EE[7] + EO[7], EE[8] + EO[8], EE[9] + EO[9], EE[10] + EO[10], EE[11] + EO[11], EE[12] + EO[12], EE[13] + EO[13], EE[14] + EO[14], EE[15] + EO[15],
542  EE[15] - EO[15], EE[14] - EO[14], EE[13] - EO[13], EE[12] - EO[12], EE[11] - EO[11], EE[10] - EO[10], EE[9] - EO[9], EE[8] - EO[8], EE[7] - EO[7], EE[6] - EO[6], EE[5] - EO[5], EE[4] - EO[4], EE[3] - EO[3], EE[2] - EO[2], EE[1] - EO[1], EE[0] - EO[0],
543  };
544  const int O[32] = {
545  bf * x1 + G2(+bg * x3) + G4(+bh * x5 + bi * x7) + G8(+bj * x9 + bk * x11 + bl * x13 + bm * x15) + G16(+bn * x17 + bo * x19 + bp * x21 + bq * x23 + br * x25 + bs * x27 + bt * x29 + bu * x31),
546  bg * x1 + G2(+bj * x3) + G4(+bm * x5 + bp * x7) + G8(+bs * x9 + bv * x11 + by * x13 + cb * x15) + G16(+ce * x17 + ch * x19 + ck * x21 - ci * x23 + -cf * x25 - cc * x27 - bz * x29 - bw * x31),
547  bh * x1 + G2(+bm * x3) + G4(+br * x5 + bw * x7) + G8(+cb * x9 + cg * x11 - ck * x13 - cf * x15) + G16(-ca * x17 - bv * x19 - bq * x21 - bl * x23 + -bg * x25 - bi * x27 - bn * x29 - bs * x31),
548  bi * x1 + G2(+bp * x3) + G4(+bw * x5 + cd * x7) + G8(+ck * x9 - ce * x11 - bx * x13 - bq * x15) + G16(-bj * x17 - bh * x19 - bo * x21 - bv * x23 + -cc * x25 - cj * x27 + cf * x29 + by * x31),
549  bj * x1 + G2(+bs * x3) + G4(+cb * x5 + ck * x7) + G8(-cc * x9 - bt * x11 - bk * x13 - bi * x15) + G16(-br * x17 - ca * x19 - cj * x21 + cd * x23 + bu * x25 + bl * x27 + bh * x29 + bq * x31),
550  bk * x1 + G2(+bv * x3) + G4(+cg * x5 - ce * x7) + G8(-bt * x9 - bi * x11 - bm * x13 - bx * x15) + G16(-ci * x17 + cc * x19 + br * x21 + bg * x23 + bo * x25 + bz * x27 + ck * x29 - ca * x31),
551  bl * x1 + G2(+by * x3) + G4(-ck * x5 - bx * x7) + G8(-bk * x9 - bm * x11 - bz * x13 + cj * x15) + G16(+bw * x17 + bj * x19 + bn * x21 + ca * x23 + -ci * x25 - bv * x27 - bi * x29 - bo * x31),
552  bm * x1 + G2(+cb * x3) + G4(-cf * x5 - bq * x7) + G8(-bi * x9 - bx * x11 + cj * x13 + bu * x15) + G16(+bf * x17 + bt * x19 + ci * x21 - by * x23 + -bj * x25 - bp * x27 - ce * x29 + cc * x31),
553  bn * x1 + G2(+ce * x3) + G4(-ca * x5 - bj * x7) + G8(-br * x9 - ci * x11 + bw * x13 + bf * x15) + G16(+bv * x17 - cj * x19 - bs * x21 - bi * x23 + -bz * x25 + cf * x27 + bo * x29 + bm * x31),
554  bo * x1 + G2(+ch * x3) + G4(-bv * x5 - bh * x7) + G8(-ca * x9 + cc * x11 + bj * x13 + bt * x15) + G16(-cj * x17 - bq * x19 - bm * x21 - cf * x23 + bx * x25 + bf * x27 + by * x29 - ce * x31),
555  bp * x1 + G2(+ck * x3) + G4(-bq * x5 - bo * x7) + G8(-cj * x9 + br * x11 + bn * x13 + ci * x15) + G16(-bs * x17 - bm * x19 - ch * x21 + bt * x23 + bl * x25 + cg * x27 - bu * x29 - bk * x31),
556  bq * x1 + G2(-ci * x3) + G4(-bl * x5 - bv * x7) + G8(+cd * x9 + bg * x11 + ca * x13 - by * x15) + G16(-bi * x17 - cf * x19 + bt * x21 + bn * x23 + ck * x25 - bo * x27 - bs * x29 + cg * x31),
557  br * x1 + G2(-cf * x3) + G4(-bg * x5 - cc * x7) + G8(+bu * x9 + bo * x11 - ci * x13 - bj * x15) + G16(-bz * x17 + bx * x19 + bl * x21 + ck * x23 + -bm * x25 - bw * x27 + ca * x29 + bi * x31),
558  bs * x1 + G2(-cc * x3) + G4(-bi * x5 - cj * x7) + G8(+bl * x9 + bz * x11 - bv * x13 - bp * x15) + G16(+cf * x17 + bf * x19 + cg * x21 - bo * x23 + -bw * x25 + by * x27 + bm * x29 - ci * x31),
559  bt * x1 + G2(-bz * x3) + G4(-bn * x5 + cf * x7) + G8(+bh * x9 + ck * x11 - bi * x13 - ce * x15) + G16(+bo * x17 + by * x19 - bu * x21 - bs * x23 + ca * x25 + bm * x27 - cg * x29 - bg * x31),
560  bu * x1 + G2(-bw * x3) + G4(-bs * x5 + by * x7) + G8(+bq * x9 - ca * x11 - bo * x13 + cc * x15) + G16(+bm * x17 - ce * x19 - bk * x21 + cg * x23 + bi * x25 - ci * x27 - bg * x29 + ck * x31),
561  bv * x1 + G2(-bt * x3) + G4(-bx * x5 + br * x7) + G8(+bz * x9 - bp * x11 - cb * x13 + bn * x15) + G16(+cd * x17 - bl * x19 - cf * x21 + bj * x23 + ch * x25 - bh * x27 - cj * x29 + bf * x31),
562  bw * x1 + G2(-bq * x3) + G4(-cc * x5 + bk * x7) + G8(+ci * x9 - bf * x11 + ch * x13 + bl * x15) + G16(-cb * x17 - br * x19 + bv * x21 + bx * x23 + -bp * x25 - cd * x27 + bj * x29 + cj * x31),
563  bx * x1 + G2(-bn * x3) + G4(-ch * x5 + bg * x7) + G8(-ce * x9 - bq * x11 + bu * x13 + ca * x15) + G16(-bk * x17 - ck * x19 + bj * x21 - cb * x23 + -bt * x25 + br * x27 + cd * x29 - bh * x31),
564  by * x1 + G2(-bk * x3) + G4(+cj * x5 + bn * x7) + G8(-bv * x9 - cb * x11 + bh * x13 - cg * x15) + G16(-bq * x17 + bs * x19 + ce * x21 - bf * x23 + cd * x25 + bt * x27 - bp * x29 - ch * x31),
565  bz * x1 + G2(-bh * x3) + G4(+ce * x5 + bu * x7) + G8(-bm * x9 + cj * x11 + bp * x13 - br * x15) + G16(-ch * x17 + bk * x19 - bw * x21 - cc * x23 + bf * x25 - cb * x27 - bx * x29 + bj * x31),
566  ca * x1 + G2(-bf * x3) + G4(+bz * x5 + cb * x7) + G8(-bg * x9 + by * x11 + cc * x13 - bh * x15) + G16(+bx * x17 + cd * x19 - bi * x21 + bw * x23 + ce * x25 - bj * x27 + bv * x29 + cf * x31),
567  cb * x1 + G2(-bi * x3) + G4(+bu * x5 + ci * x7) + G8(-bp * x9 + bn * x11 - cg * x13 - bw * x15) + G16(+bg * x17 - bz * x19 - cd * x21 + bk * x23 + -bs * x25 - ck * x27 + br * x29 - bl * x31),
568  cc * x1 + G2(-bl * x3) + G4(+bp * x5 - cg * x7) + G8(-by * x9 + bh * x11 - bt * x13 + ck * x15) + G16(+bu * x17 - bg * x19 + bx * x21 + ch * x23 + -bq * x25 + bk * x27 - cb * x29 - cd * x31),
569  cd * x1 + G2(-bo * x3) + G4(+bk * x5 - bz * x7) + G8(-ch * x9 + bs * x11 - bg * x13 + bv * x15) + G16(-ck * x17 - bw * x19 + bh * x21 - br * x23 + cg * x25 + ca * x27 - bl * x29 + bn * x31),
570  ce * x1 + G2(-br * x3) + G4(+bf * x5 - bs * x7) + G8(+cf * x9 + cd * x11 - bq * x13 + bg * x15) + G16(-bt * x17 + cg * x19 + cc * x21 - bp * x23 + bh * x25 - bu * x27 + ch * x29 + cb * x31),
571  cf * x1 + G2(-bu * x3) + G4(+bj * x5 - bl * x7) + G8(+bw * x9 - ch * x11 - cd * x13 + bs * x15) + G16(-bh * x17 + bn * x19 - by * x21 + cj * x23 + cb * x25 - bq * x27 + bf * x29 - bp * x31),
572  cg * x1 + G2(-bx * x3) + G4(+bo * x5 - bf * x7) + G8(+bn * x9 - bw * x11 + cf * x13 + ch * x15) + G16(-by * x17 + bp * x19 - bg * x21 + bm * x23 + -bv * x25 + ce * x27 + ci * x29 - bz * x31),
573  ch * x1 + G2(-ca * x3) + G4(+bt * x5 - bm * x7) + G8(+bf * x9 - bl * x11 + bs * x13 - bz * x15) + G16(+cg * x17 + ci * x19 - cb * x21 + bu * x23 + -bn * x25 + bg * x27 - bk * x29 + br * x31),
574  ci * x1 + G2(-cd * x3) + G4(+by * x5 - bt * x7) + G8(+bo * x9 - bj * x11 + bf * x13 - bk * x15) + G16(+bp * x17 - bu * x19 + bz * x21 - ce * x23 + cj * x25 + ch * x27 - cc * x29 + bx * x31),
575  cj * x1 + G2(-cg * x3) + G4(+cd * x5 - ca * x7) + G8(+bx * x9 - bu * x11 + br * x13 - bo * x15) + G16(+bl * x17 - bi * x19 + bf * x21 - bh * x23 + bk * x25 - bn * x27 + bq * x29 - bt * x31),
576  ck * x1 + G2(-cj * x3) + G4(+ci * x5 - ch * x7) + G8(+cg * x9 - cf * x11 + ce * x13 - cd * x15) + G16(+cc * x17 - cb * x19 + ca * x21 - bz * x23 + by * x25 - bx * x27 + bw * x29 - bv * x31),
577  };
578  coeffs[0 * stride] = E[0 ] + O[0 ];
579  coeffs[1 * stride] = E[1 ] + O[1 ];
580  coeffs[2 * stride] = E[2 ] + O[2 ];
581  coeffs[3 * stride] = E[3 ] + O[3 ];
582  coeffs[4 * stride] = E[4 ] + O[4 ];
583  coeffs[5 * stride] = E[5 ] + O[5 ];
584  coeffs[6 * stride] = E[6 ] + O[6 ];
585  coeffs[7 * stride] = E[7 ] + O[7 ];
586  coeffs[8 * stride] = E[8 ] + O[8 ];
587  coeffs[9 * stride] = E[9 ] + O[9 ];
588  coeffs[10 * stride] = E[10] + O[10];
589  coeffs[11 * stride] = E[11] + O[11];
590  coeffs[12 * stride] = E[12] + O[12];
591  coeffs[13 * stride] = E[13] + O[13];
592  coeffs[14 * stride] = E[14] + O[14];
593  coeffs[15 * stride] = E[15] + O[15];
594  coeffs[16 * stride] = E[16] + O[16];
595  coeffs[17 * stride] = E[17] + O[17];
596  coeffs[18 * stride] = E[18] + O[18];
597  coeffs[19 * stride] = E[19] + O[19];
598  coeffs[20 * stride] = E[20] + O[20];
599  coeffs[21 * stride] = E[21] + O[21];
600  coeffs[22 * stride] = E[22] + O[22];
601  coeffs[23 * stride] = E[23] + O[23];
602  coeffs[24 * stride] = E[24] + O[24];
603  coeffs[25 * stride] = E[25] + O[25];
604  coeffs[26 * stride] = E[26] + O[26];
605  coeffs[27 * stride] = E[27] + O[27];
606  coeffs[28 * stride] = E[28] + O[28];
607  coeffs[29 * stride] = E[29] + O[29];
608  coeffs[30 * stride] = E[30] + O[30];
609  coeffs[31 * stride] = E[31] + O[31];
610  coeffs[32 * stride] = E[31] - O[31];
611  coeffs[33 * stride] = E[30] - O[30];
612  coeffs[34 * stride] = E[29] - O[29];
613  coeffs[35 * stride] = E[28] - O[28];
614  coeffs[36 * stride] = E[27] - O[27];
615  coeffs[37 * stride] = E[26] - O[26];
616  coeffs[38 * stride] = E[25] - O[25];
617  coeffs[39 * stride] = E[24] - O[24];
618  coeffs[40 * stride] = E[23] - O[23];
619  coeffs[41 * stride] = E[22] - O[22];
620  coeffs[42 * stride] = E[21] - O[21];
621  coeffs[43 * stride] = E[20] - O[20];
622  coeffs[44 * stride] = E[19] - O[19];
623  coeffs[45 * stride] = E[18] - O[18];
624  coeffs[46 * stride] = E[17] - O[17];
625  coeffs[47 * stride] = E[16] - O[16];
626  coeffs[48 * stride] = E[15] - O[15];
627  coeffs[49 * stride] = E[14] - O[14];
628  coeffs[50 * stride] = E[13] - O[13];
629  coeffs[51 * stride] = E[12] - O[12];
630  coeffs[52 * stride] = E[11] - O[11];
631  coeffs[53 * stride] = E[10] - O[10];
632  coeffs[54 * stride] = E[9] - O[9];
633  coeffs[55 * stride] = E[8] - O[8];
634  coeffs[56 * stride] = E[7] - O[7];
635  coeffs[57 * stride] = E[6] - O[6];
636  coeffs[58 * stride] = E[5] - O[5];
637  coeffs[59 * stride] = E[4] - O[4];
638  coeffs[60 * stride] = E[3] - O[3];
639  coeffs[61 * stride] = E[2] - O[2];
640  coeffs[62 * stride] = E[1] - O[1];
641  coeffs[63 * stride] = E[0] - O[0];
642 }
643 
644 static void matrix_mul(int *coeffs, const ptrdiff_t stride, const int8_t* matrix, const int size, const size_t nz)
645 {
646  //for dst7 and dct8, coeffs > 16 are zero out
647  int tmp[16];
648 
649  for (int i = 0; i < nz; i++)
650  tmp[i] = coeffs[i * stride];
651 
652  for (int i = 0; i < size; i++) {
653  int o = 0;
654 
655  for (int j = 0; j < nz; j++)
656  o += tmp[j] * matrix[j * size];
657  *coeffs = o;
658  coeffs += stride;
659  matrix++;
660  }
661 }
662 
663 static void inv_dct8(int *coeffs, const ptrdiff_t stride, const int8_t *matrix, const int size, const size_t nz)
664 {
665  matrix_mul(coeffs, stride, matrix, size, nz);
666 }
667 
668 #define DEFINE_INV_DCT8_1D(S) \
669 void ff_vvc_inv_dct8_ ## S(int *coeffs, const ptrdiff_t stride, const size_t nz) \
670 { \
671  inv_dct8(coeffs, stride, &ff_vvc_dct8_##S##x##S[0][0], S, nz); \
672 }
673 
678 
679 static void inv_dst7(int *coeffs, const ptrdiff_t stride, const int8_t *matrix, const int size, const size_t nz)
680 {
681  matrix_mul(coeffs, stride, matrix, size, nz);
682 }
683 
684 #define DEFINE_INV_DST7_1D(S) \
685 void ff_vvc_inv_dst7_ ## S(int *coeffs, const ptrdiff_t stride, const size_t nz) \
686 { \
687  inv_dst7(coeffs, stride, &ff_vvc_dst7_##S##x##S[0][0], S, nz); \
688 }
689 
694 
695 void ff_vvc_inv_lfnst_1d(int *v, const int *u, int no_zero_size, int n_tr_s,
696  int pred_mode_intra, int lfnst_idx, int log2_transform_range)
697 {
698  int lfnst_tr_set_idx = pred_mode_intra < 0 ? 1 : ff_vvc_lfnst_tr_set_index[pred_mode_intra];
699  const int8_t *tr_mat = n_tr_s > 16 ? ff_vvc_lfnst_8x8[lfnst_tr_set_idx][lfnst_idx-1][0] : ff_vvc_lfnst_4x4[lfnst_tr_set_idx][lfnst_idx - 1][0];
700 
701  for (int j = 0; j < n_tr_s; j++, tr_mat++) {
702  int t = 0;
703 
704  for (int i = 0; i < no_zero_size; i++)
705  t += u[i] * tr_mat[i * n_tr_s];
706  v[j] = av_clip_intp2((t + 64) >> 7 , log2_transform_range);
707  }
708 }
A
#define A(x)
Definition: vpx_arith.h:28
be
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it be(in the first position) for now. Options ------- Then comes the options array. This is what will define the user accessible options. For example
r
const char * r
Definition: vf_curves.c:127
DEFINE_INV_DCT8_1D
#define DEFINE_INV_DCT8_1D(S)
Definition: itx_1d.c:668
cb
static double cb(void *priv, double x, double y)
Definition: vf_geq.c:247
G2
#define G2(m)
Definition: itx_1d.c:64
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:251
matrix
Definition: vc1dsp.c:43
ff_vvc_inv_dct2_32
void ff_vvc_inv_dct2_32(int *coeffs, const ptrdiff_t stride, const size_t nz)
Definition: itx_1d.c:279
data.h
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
w
uint8_t w
Definition: llviddspenc.c:38
inv_dct8
static void inv_dct8(int *coeffs, const ptrdiff_t stride, const int8_t *matrix, const int size, const size_t nz)
Definition: itx_1d.c:663
b
#define b
Definition: input.c:41
ff_vvc_lfnst_4x4
const int8_t ff_vvc_lfnst_4x4[4][2][16][16]
Definition: data.c:1476
D
D(D(float, sse)
Definition: rematrix_init.c:30
G4
#define G4(m)
Definition: itx_1d.c:65
ff_vvc_inv_dct2_64
void ff_vvc_inv_dct2_64(int *coeffs, const ptrdiff_t stride, const size_t nz)
Definition: itx_1d.c:459
matrix_mul
static void matrix_mul(int *coeffs, const ptrdiff_t stride, const int8_t *matrix, const int size, const size_t nz)
Definition: itx_1d.c:644
ff_vvc_inv_dct2_2
void ff_vvc_inv_dct2_2(int *coeffs, const ptrdiff_t stride, const size_t nz)
Definition: itx_1d.c:75
C
s EdgeDetect Foobar g libavfilter vf_edgedetect c libavfilter vf_foobar c edit libavfilter and add an entry for foobar following the pattern of the other filters edit libavfilter allfilters and add an entry for foobar following the pattern of the other filters configure make j< whatever > ffmpeg ffmpeg i you should get a foobar png with Lena edge detected That s your new playground is ready Some little details about what s going which in turn will define variables for the build system and the C
Definition: writing_filters.txt:58
bf
#define bf(fn, bd, opt)
Definition: vvcdsp_init.c:28
s
#define s(width, name)
Definition: cbs_vp9.c:198
g
const char * g
Definition: vf_curves.c:128
B
#define B
Definition: huffyuv.h:42
E
#define E
Definition: avdct.c:33
ff_vvc_inv_dct2_4
void ff_vvc_inv_dct2_4(int *coeffs, const ptrdiff_t stride, const size_t nz)
Definition: itx_1d.c:92
av_clip_intp2
#define av_clip_intp2
Definition: common.h:121
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
f
f
Definition: af_crystalizer.c:122
ff_vvc_lfnst_tr_set_index
const uint8_t ff_vvc_lfnst_tr_set_index[95]
Definition: data.c:1631
size
int size
Definition: twinvq_data.h:10344
G16
#define G16(m)
Definition: itx_1d.c:67
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
itx_1d.h
G8
#define G8(m)
Definition: itx_1d.c:66
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
inv_dst7
static void inv_dst7(int *coeffs, const ptrdiff_t stride, const int8_t *matrix, const int size, const size_t nz)
Definition: itx_1d.c:679
stride
#define stride
Definition: h264pred_template.c:537
DEFINE_INV_DST7_1D
#define DEFINE_INV_DST7_1D(S)
Definition: itx_1d.c:684
ff_vvc_inv_lfnst_1d
void ff_vvc_inv_lfnst_1d(int *v, const int *u, int no_zero_size, int n_tr_s, int pred_mode_intra, int lfnst_idx, int log2_transform_range)
Definition: itx_1d.c:695
ff_vvc_inv_dct2_16
void ff_vvc_inv_dct2_16(int *coeffs, const ptrdiff_t stride, const size_t nz)
Definition: itx_1d.c:180
avutil.h
ff_vvc_lfnst_8x8
const int8_t ff_vvc_lfnst_8x8[4][2][16][48]
Definition: data.c:1321
h
h
Definition: vp9dsp_template.c:2070
ff_vvc_inv_dct2_8
void ff_vvc_inv_dct2_8(int *coeffs, const ptrdiff_t stride, const size_t nz)
Definition: itx_1d.c:124