104 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
108 static void ff_prores_idct_put_10_sse2_wrap(
DCTELEM *
dst){
136 #if ARCH_X86_64 && HAVE_YASM
166 #define AANSCALE_BITS 12
169 #define NB_ITS_SPEED 50000
174 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
175 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
176 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
177 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
178 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
179 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
180 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
181 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
191 for (i = 0; i < 64; i++) {
192 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
203 memset(block, 0, 64 *
sizeof(*block));
207 for (i = 0; i < 64; i++)
208 block[i] = (
av_lfg_get(prng) % (2*vals)) -vals;
211 for (i = 0; i < 64; i++)
217 for (i = 0; i < j; i++) {
219 block[idx] =
av_lfg_get(prng) % (2*vals) -vals;
223 block[ 0] =
av_lfg_get(prng) % (16*vals) - (8*vals);
224 block[63] = (block[0] & 1) ^ 1;
234 for (i = 0; i < 64; i++)
235 dst[idct_mmx_perm[i]] = src[i];
237 for (i = 0; i < 64; i++)
238 dst[idct_simple_mmx_perm[i]] = src[i];
240 for (i = 0; i < 64; i++)
241 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
243 for (i = 0; i < 64; i++)
244 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
246 for (i = 0; i < 64; i++)
247 dst[(i>>3) | ((i<<3)&0x38)] = src[i];
249 for (i = 0; i < 64; i++)
259 int64_t err2, ti, ti1, it1, err_sum = 0;
260 int64_t sysErr[64], sysErrMax = 0;
262 int blockSumErrMax = 0, blockSumErr;
264 const int vals=1<<
bits;
272 for (i = 0; i < 64; i++)
274 for (it = 0; it <
NB_ITS; it++) {
282 for (i = 0; i < 64; i++) {
291 for (i = 0; i < 64; i++) {
298 sysErr[i] +=
block[i] - block1[i];
300 if (abs(
block[i]) > maxout)
301 maxout = abs(
block[i]);
303 if (blockSumErrMax < blockSumErr)
304 blockSumErrMax = blockSumErr;
306 for (i = 0; i < 64; i++)
307 sysErrMax =
FFMAX(sysErrMax,
FFABS(sysErr[i]));
309 for (i = 0; i < 64; i++) {
312 printf(
"%7d ", (
int) sysErr[i]);
316 omse = (double) err2 / NB_ITS / 64;
317 ome = (double) err_sum / NB_ITS / 64;
319 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
321 printf(
"%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
322 is_idct ?
"IDCT" :
"DCT", dct->
name, err_inf,
323 omse, ome, (
double) sysErrMax / NB_ITS,
324 maxout, blockSumErrMax);
347 }
while (ti1 < 1000000);
349 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT", dct->
name,
350 (
double) it1 * 1000.0 / (
double) ti1);
361 static double c8[8][8];
362 static double c4[4][4];
363 double block1[64], block2[64], block3[64];
370 for (i = 0; i < 8; i++) {
372 for (j = 0; j < 8; j++) {
373 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
374 c8[i][j] = s * cos(
M_PI * i * (j + 0.5) / 8.0);
375 sum += c8[i][j] * c8[i][j];
379 for (i = 0; i < 4; i++) {
381 for (j = 0; j < 4; j++) {
382 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
383 c4[i][j] = s * cos(
M_PI * i * (j + 0.5) / 4.0);
384 sum += c4[i][j] * c4[i][j];
391 for (i = 0; i < 4; i++) {
392 for (j = 0; j < 8; j++) {
393 block1[8 * (2 * i) + j] =
394 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
395 block1[8 * (2 * i + 1) + j] =
396 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
401 for (i = 0; i < 8; i++) {
402 for (j = 0; j < 8; j++) {
404 for (k = 0; k < 8; k++)
405 sum += c8[k][j] * block1[8 * i + k];
406 block2[8 * i + j] = sum;
411 for (i = 0; i < 8; i++) {
412 for (j = 0; j < 4; j++) {
415 for (k = 0; k < 4; k++)
416 sum += c4[k][j] * block2[8 * (2 * k) + i];
417 block3[8 * (2 * j) + i] = sum;
421 for (k = 0; k < 4; k++)
422 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
423 block3[8 * (2 * j + 1) + i] = sum;
428 for (i = 0; i < 8; i++) {
429 for (j = 0; j < 8; j++) {
430 v = block3[8 * i + j];
432 else if (v > 255) v = 255;
433 dest[i * linesize + j] = (int)
rint(v);
439 void (*idct248_put)(
uint8_t *dest,
int line_size,
443 int it, i, it1, ti, ti1, err_max, v;
451 for (it = 0; it <
NB_ITS; it++) {
453 for (i = 0; i < 64; i++)
457 for (i = 0; i < 64; i++)
461 for (i = 0; i < 64; i++)
465 for (i = 0; i < 64; i++) {
492 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248", name, err_max);
501 for (i = 0; i < 64; i++)
508 }
while (ti1 < 1000000);
510 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248", name,
511 (
double) it1 * 1000.0 / (
double) ti1);
516 printf(
"dct-test [-i] [<test-number>] [<bits>]\n"
517 "test-number 0 -> test with random matrixes\n"
518 " 1 -> test with random sparse matrixes\n"
519 " 2 -> do 3. test from mpeg4 std\n"
520 "bits Number of time domain bits to use, 8 is default\n"
521 "-i test IDCT implementations\n"
522 "-4 test IDCT248 implementations\n"
530 int main(
int argc,
char **argv)
532 int test_idct = 0, test_248_dct = 0;
545 c =
getopt(argc, argv,
"ih4t");
566 test = atoi(argv[
optind]);
567 if(optind+1 < argc) bits= atoi(argv[optind+1]);
569 printf(
"ffmpeg DCT/IDCT test\n");
574 const struct algo *algos = test_idct ? idct_tab :
fdct_tab;
575 for (i = 0; algos[i].
name; i++)
577 err |=
dct_error(&algos[i], test, test_idct, speed, bits);