FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dct-test.c
Go to the documentation of this file.
1 /*
2  * (c) 2001 Fabrice Bellard
3  * 2007 Marc Hoffman <marc.hoffman@analog.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * DCT test (c) 2001 Fabrice Bellard
25  * Started from sample code by Juan J. Sierralta P.
26  */
27 
28 #include "config.h"
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #if HAVE_UNISTD_H
33 #include <unistd.h>
34 #endif
35 #include <math.h>
36 
37 #include "libavutil/cpu.h"
38 #include "libavutil/common.h"
39 #include "libavutil/lfg.h"
40 #include "libavutil/time.h"
41 
42 #include "simple_idct.h"
43 #include "aandcttab.h"
44 #include "faandct.h"
45 #include "faanidct.h"
46 #include "x86/idct_xvid.h"
47 #include "dctref.h"
48 
49 #undef printf
50 
51 void ff_mmx_idct(DCTELEM *data);
53 
54 // BFIN
57 
58 // ALTIVEC
60 
61 // ARM
67 
69 
70 struct algo {
71  const char *name;
76  int nonspec;
77 };
78 
79 static int cpu_flags;
80 
81 static const struct algo fdct_tab[] = {
82  { "REF-DBL", ff_ref_fdct, NO_PERM },
83  { "FAAN", ff_faandct, NO_PERM },
84  { "IJG-AAN-INT", ff_fdct_ifast, SCALE_PERM },
85  { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM },
86 
87 #if HAVE_MMX_INLINE
88  { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
91 #endif
92 
93 #if HAVE_ALTIVEC
94  { "altivecfdct", ff_fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC },
95 #endif
96 
97 #if ARCH_BFIN
98  { "BFINfdct", ff_bfin_fdct, NO_PERM },
99 #endif
100 
101  { 0 }
102 };
103 
104 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
105 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
106  DCTELEM *block, int16_t *qmat);
107 
108 static void ff_prores_idct_put_10_sse2_wrap(DCTELEM *dst){
109  DECLARE_ALIGNED(16, static int16_t, qmat)[64];
110  DECLARE_ALIGNED(16, static int16_t, tmp)[64];
111  int i;
112 
113  for(i=0; i<64; i++){
114  qmat[i]=4;
115  tmp[i]= dst[i];
116  }
117  ff_prores_idct_put_10_sse2(dst, 16, tmp, qmat);
118 }
119 #endif
120 
121 static const struct algo idct_tab[] = {
122  { "FAANI", ff_faanidct, NO_PERM },
123  { "REF-DBL", ff_ref_idct, NO_PERM },
124  { "INT", ff_j_rev_dct, MMX_PERM },
125  { "SIMPLE-C", ff_simple_idct_8, NO_PERM },
126 
127 #if HAVE_MMX_INLINE
128 #if CONFIG_GPL
129  { "LIBMPEG2-MMX", ff_mmx_idct, MMX_PERM, AV_CPU_FLAG_MMX, 1 },
130  { "LIBMPEG2-MMX2", ff_mmxext_idct, MMX_PERM, AV_CPU_FLAG_MMX2, 1 },
131 #endif
133  { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
134  { "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 },
135  { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
136 #if ARCH_X86_64 && HAVE_YASM
137  { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 },
138 #endif
139 #endif
140 
141 #if ARCH_BFIN
142  { "BFINidct", ff_bfin_idct, NO_PERM },
143 #endif
144 
145 #if ARCH_ARM
146  { "SIMPLE-ARM", ff_simple_idct_arm, NO_PERM },
147  { "INT-ARM", ff_j_rev_dct_arm, MMX_PERM },
148 #endif
149 #if HAVE_ARMV5TE
150  { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM, AV_CPU_FLAG_ARMV5TE },
151 #endif
152 #if HAVE_ARMV6
153  { "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM, AV_CPU_FLAG_ARMV6 },
154 #endif
155 #if HAVE_NEON
157 #endif
158 
159 #if ARCH_ALPHA
160  { "SIMPLE-ALPHA", ff_simple_idct_axp, NO_PERM },
161 #endif
162 
163  { 0 }
164 };
165 
166 #define AANSCALE_BITS 12
167 
168 #define NB_ITS 20000
169 #define NB_ITS_SPEED 50000
170 
171 static short idct_mmx_perm[64];
172 
173 static short idct_simple_mmx_perm[64] = {
174  0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
175  0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
176  0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
177  0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
178  0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
179  0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
180  0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
181  0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
182 };
183 
184 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
185 
186 static void idct_mmx_init(void)
187 {
188  int i;
189 
190  /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
191  for (i = 0; i < 64; i++) {
192  idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
193  }
194 }
195 
196 DECLARE_ALIGNED(16, static DCTELEM, block)[64];
198 
199 static void init_block(DCTELEM block[64], int test, int is_idct, AVLFG *prng, int vals)
200 {
201  int i, j;
202 
203  memset(block, 0, 64 * sizeof(*block));
204 
205  switch (test) {
206  case 0:
207  for (i = 0; i < 64; i++)
208  block[i] = (av_lfg_get(prng) % (2*vals)) -vals;
209  if (is_idct) {
210  ff_ref_fdct(block);
211  for (i = 0; i < 64; i++)
212  block[i] >>= 3;
213  }
214  break;
215  case 1:
216  j = av_lfg_get(prng) % 10 + 1;
217  for (i = 0; i < j; i++) {
218  int idx = av_lfg_get(prng) % 64;
219  block[idx] = av_lfg_get(prng) % (2*vals) -vals;
220  }
221  break;
222  case 2:
223  block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals);
224  block[63] = (block[0] & 1) ^ 1;
225  break;
226  }
227 }
228 
229 static void permute(DCTELEM dst[64], const DCTELEM src[64], int perm)
230 {
231  int i;
232 
233  if (perm == MMX_PERM) {
234  for (i = 0; i < 64; i++)
235  dst[idct_mmx_perm[i]] = src[i];
236  } else if (perm == MMX_SIMPLE_PERM) {
237  for (i = 0; i < 64; i++)
238  dst[idct_simple_mmx_perm[i]] = src[i];
239  } else if (perm == SSE2_PERM) {
240  for (i = 0; i < 64; i++)
241  dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
242  } else if (perm == PARTTRANS_PERM) {
243  for (i = 0; i < 64; i++)
244  dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
245  } else if (perm == TRANSPOSE_PERM) {
246  for (i = 0; i < 64; i++)
247  dst[(i>>3) | ((i<<3)&0x38)] = src[i];
248  } else {
249  for (i = 0; i < 64; i++)
250  dst[i] = src[i];
251  }
252 }
253 
254 static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
255 {
256  void (*ref)(DCTELEM *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
257  int it, i, scale;
258  int err_inf, v;
259  int64_t err2, ti, ti1, it1, err_sum = 0;
260  int64_t sysErr[64], sysErrMax = 0;
261  int maxout = 0;
262  int blockSumErrMax = 0, blockSumErr;
263  AVLFG prng;
264  const int vals=1<<bits;
265  double omse, ome;
266  int spec_err;
267 
268  av_lfg_init(&prng, 1);
269 
270  err_inf = 0;
271  err2 = 0;
272  for (i = 0; i < 64; i++)
273  sysErr[i] = 0;
274  for (it = 0; it < NB_ITS; it++) {
275  init_block(block1, test, is_idct, &prng, vals);
276  permute(block, block1, dct->format);
277 
278  dct->func(block);
279  emms_c();
280 
281  if (dct->format == SCALE_PERM) {
282  for (i = 0; i < 64; i++) {
283  scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
284  block[i] = (block[i] * scale) >> AANSCALE_BITS;
285  }
286  }
287 
288  ref(block1);
289 
290  blockSumErr = 0;
291  for (i = 0; i < 64; i++) {
292  int err = block[i] - block1[i];
293  err_sum += err;
294  v = abs(err);
295  if (v > err_inf)
296  err_inf = v;
297  err2 += v * v;
298  sysErr[i] += block[i] - block1[i];
299  blockSumErr += v;
300  if (abs(block[i]) > maxout)
301  maxout = abs(block[i]);
302  }
303  if (blockSumErrMax < blockSumErr)
304  blockSumErrMax = blockSumErr;
305  }
306  for (i = 0; i < 64; i++)
307  sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
308 
309  for (i = 0; i < 64; i++) {
310  if (i % 8 == 0)
311  printf("\n");
312  printf("%7d ", (int) sysErr[i]);
313  }
314  printf("\n");
315 
316  omse = (double) err2 / NB_ITS / 64;
317  ome = (double) err_sum / NB_ITS / 64;
318 
319  spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
320 
321  printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
322  is_idct ? "IDCT" : "DCT", dct->name, err_inf,
323  omse, ome, (double) sysErrMax / NB_ITS,
324  maxout, blockSumErrMax);
325 
326  if (spec_err && !dct->nonspec)
327  return 1;
328 
329  if (!speed)
330  return 0;
331 
332  /* speed test */
333 
334  init_block(block, test, is_idct, &prng, vals);
335  permute(block1, block, dct->format);
336 
337  ti = av_gettime();
338  it1 = 0;
339  do {
340  for (it = 0; it < NB_ITS_SPEED; it++) {
341  memcpy(block, block1, sizeof(block));
342  dct->func(block);
343  }
344  emms_c();
345  it1 += NB_ITS_SPEED;
346  ti1 = av_gettime() - ti;
347  } while (ti1 < 1000000);
348 
349  printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
350  (double) it1 * 1000.0 / (double) ti1);
351 
352  return 0;
353 }
354 
357 
358 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
359 {
360  static int init;
361  static double c8[8][8];
362  static double c4[4][4];
363  double block1[64], block2[64], block3[64];
364  double s, sum, v;
365  int i, j, k;
366 
367  if (!init) {
368  init = 1;
369 
370  for (i = 0; i < 8; i++) {
371  sum = 0;
372  for (j = 0; j < 8; j++) {
373  s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
374  c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
375  sum += c8[i][j] * c8[i][j];
376  }
377  }
378 
379  for (i = 0; i < 4; i++) {
380  sum = 0;
381  for (j = 0; j < 4; j++) {
382  s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
383  c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
384  sum += c4[i][j] * c4[i][j];
385  }
386  }
387  }
388 
389  /* butterfly */
390  s = 0.5 * sqrt(2.0);
391  for (i = 0; i < 4; i++) {
392  for (j = 0; j < 8; j++) {
393  block1[8 * (2 * i) + j] =
394  (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
395  block1[8 * (2 * i + 1) + j] =
396  (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
397  }
398  }
399 
400  /* idct8 on lines */
401  for (i = 0; i < 8; i++) {
402  for (j = 0; j < 8; j++) {
403  sum = 0;
404  for (k = 0; k < 8; k++)
405  sum += c8[k][j] * block1[8 * i + k];
406  block2[8 * i + j] = sum;
407  }
408  }
409 
410  /* idct4 */
411  for (i = 0; i < 8; i++) {
412  for (j = 0; j < 4; j++) {
413  /* top */
414  sum = 0;
415  for (k = 0; k < 4; k++)
416  sum += c4[k][j] * block2[8 * (2 * k) + i];
417  block3[8 * (2 * j) + i] = sum;
418 
419  /* bottom */
420  sum = 0;
421  for (k = 0; k < 4; k++)
422  sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
423  block3[8 * (2 * j + 1) + i] = sum;
424  }
425  }
426 
427  /* clamp and store the result */
428  for (i = 0; i < 8; i++) {
429  for (j = 0; j < 8; j++) {
430  v = block3[8 * i + j];
431  if (v < 0) v = 0;
432  else if (v > 255) v = 255;
433  dest[i * linesize + j] = (int) rint(v);
434  }
435  }
436 }
437 
438 static void idct248_error(const char *name,
439  void (*idct248_put)(uint8_t *dest, int line_size,
440  int16_t *block),
441  int speed)
442 {
443  int it, i, it1, ti, ti1, err_max, v;
444  AVLFG prng;
445 
446  av_lfg_init(&prng, 1);
447 
448  /* just one test to see if code is correct (precision is less
449  important here) */
450  err_max = 0;
451  for (it = 0; it < NB_ITS; it++) {
452  /* XXX: use forward transform to generate values */
453  for (i = 0; i < 64; i++)
454  block1[i] = av_lfg_get(&prng) % 256 - 128;
455  block1[0] += 1024;
456 
457  for (i = 0; i < 64; i++)
458  block[i] = block1[i];
459  idct248_ref(img_dest1, 8, block);
460 
461  for (i = 0; i < 64; i++)
462  block[i] = block1[i];
463  idct248_put(img_dest, 8, block);
464 
465  for (i = 0; i < 64; i++) {
466  v = abs((int) img_dest[i] - (int) img_dest1[i]);
467  if (v == 255)
468  printf("%d %d\n", img_dest[i], img_dest1[i]);
469  if (v > err_max)
470  err_max = v;
471  }
472 #if 0
473  printf("ref=\n");
474  for(i=0;i<8;i++) {
475  int j;
476  for(j=0;j<8;j++) {
477  printf(" %3d", img_dest1[i*8+j]);
478  }
479  printf("\n");
480  }
481 
482  printf("out=\n");
483  for(i=0;i<8;i++) {
484  int j;
485  for(j=0;j<8;j++) {
486  printf(" %3d", img_dest[i*8+j]);
487  }
488  printf("\n");
489  }
490 #endif
491  }
492  printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
493 
494  if (!speed)
495  return;
496 
497  ti = av_gettime();
498  it1 = 0;
499  do {
500  for (it = 0; it < NB_ITS_SPEED; it++) {
501  for (i = 0; i < 64; i++)
502  block[i] = block1[i];
503  idct248_put(img_dest, 8, block);
504  }
505  emms_c();
506  it1 += NB_ITS_SPEED;
507  ti1 = av_gettime() - ti;
508  } while (ti1 < 1000000);
509 
510  printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
511  (double) it1 * 1000.0 / (double) ti1);
512 }
513 
514 static void help(void)
515 {
516  printf("dct-test [-i] [<test-number>] [<bits>]\n"
517  "test-number 0 -> test with random matrixes\n"
518  " 1 -> test with random sparse matrixes\n"
519  " 2 -> do 3. test from mpeg4 std\n"
520  "bits Number of time domain bits to use, 8 is default\n"
521  "-i test IDCT implementations\n"
522  "-4 test IDCT248 implementations\n"
523  "-t speed test\n");
524 }
525 
526 #if !HAVE_GETOPT
527 #include "compat/getopt.c"
528 #endif
529 
530 int main(int argc, char **argv)
531 {
532  int test_idct = 0, test_248_dct = 0;
533  int c, i;
534  int test = 1;
535  int speed = 0;
536  int err = 0;
537  int bits=8;
538 
540 
541  ff_ref_dct_init();
542  idct_mmx_init();
543 
544  for (;;) {
545  c = getopt(argc, argv, "ih4t");
546  if (c == -1)
547  break;
548  switch (c) {
549  case 'i':
550  test_idct = 1;
551  break;
552  case '4':
553  test_248_dct = 1;
554  break;
555  case 't':
556  speed = 1;
557  break;
558  default:
559  case 'h':
560  help();
561  return 0;
562  }
563  }
564 
565  if (optind < argc)
566  test = atoi(argv[optind]);
567  if(optind+1 < argc) bits= atoi(argv[optind+1]);
568 
569  printf("ffmpeg DCT/IDCT test\n");
570 
571  if (test_248_dct) {
572  idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
573  } else {
574  const struct algo *algos = test_idct ? idct_tab : fdct_tab;
575  for (i = 0; algos[i].name; i++)
576  if (!(~cpu_flags & algos[i].mm_support)) {
577  err |= dct_error(&algos[i], test, test_idct, speed, bits);
578  }
579  }
580 
581  return err;
582 }