93 #if CONFIG_MPEG4_DECODER 111 #define AANSCALE_BITS 12 114 #define NB_ITS_SPEED 50000 123 memset(block, 0, 64 *
sizeof(*block));
127 for (i = 0; i < 64; i++)
128 block[i] = (
av_lfg_get(prng) % (2*vals)) -vals;
131 for (i = 0; i < 64; i++)
137 for (i = 0; i < j; i++) {
139 block[idx] =
av_lfg_get(prng) % (2*vals) -vals;
143 block[ 0] =
av_lfg_get(prng) % (16*vals) - (8*vals);
144 block[63] = (block[0] & 1) ^ 1;
161 for (i = 0; i < 64; i++)
162 dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[
i];
165 for (i = 0; i < 64; i++)
166 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[
i];
169 for (i = 0; i < 64; i++)
170 dst[(i>>3) | ((i<<3)&0x38)] = src[
i];
173 for (i = 0; i < 64; i++)
184 int64_t err2, ti, ti1, it1, err_sum = 0;
185 int64_t sysErr[64], sysErrMax = 0;
186 int64_t err2_matrix[64], err2_max = 0;
188 int blockSumErrMax = 0, blockSumErr;
190 const int vals=1<<
bits;
198 for (i = 0; i < 64; i++)
199 err2_matrix[i] = sysErr[i] = 0;
200 for (it = 0; it <
NB_ITS; it++) {
207 if (!strcmp(dct->
name,
"IJG-AAN-INT")) {
208 for (i = 0; i < 64; i++) {
215 if (!strcmp(dct->
name,
"PR-SSE2"))
216 for (i = 0; i < 64; i++)
220 for (i = 0; i < 64; i++) {
226 err2_matrix[
i] += v * v;
233 if (blockSumErrMax < blockSumErr)
234 blockSumErrMax = blockSumErr;
236 for (i = 0; i < 64; i++) {
237 sysErrMax =
FFMAX(sysErrMax,
FFABS(sysErr[i]));
238 err2_max =
FFMAX(err2_max ,
FFABS(err2_matrix[i]));
241 for (i = 0; i < 64; i++) {
244 printf(
"%7d ", (
int) sysErr[i]);
248 omse = (double) err2 / NB_ITS / 64;
249 ome = (double) err_sum / NB_ITS / 64;
251 spec_err = is_idct && (err_inf > 1 || omse > 0.02 ||
fabs(ome) > 0.0015);
253 spec_err = is_idct && ((double) err2_max / NB_ITS > 0.06 || (
double) sysErrMax / NB_ITS > 0.015);
255 printf(
"%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
256 is_idct ?
"IDCT" :
"DCT", dct->
name, err_inf,
257 omse, ome, (
double) sysErrMax / NB_ITS,
258 maxout, blockSumErrMax);
260 if (spec_err && !dct->
nonspec) {
283 }
while (ti1 < 1000000);
285 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT", dct->
name,
286 (
double) it1 * 1000.0 / (
double) ti1);
297 static double c8[8][8];
298 static double c4[4][4];
299 double block1[64], block2[64], block3[64];
306 for (i = 0; i < 8; i++) {
308 for (j = 0; j < 8; j++) {
309 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
310 c8[
i][j] = s * cos(
M_PI * i * (j + 0.5) / 8.0);
311 sum += c8[
i][j] * c8[
i][j];
315 for (i = 0; i < 4; i++) {
317 for (j = 0; j < 4; j++) {
318 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
319 c4[
i][j] = s * cos(
M_PI * i * (j + 0.5) / 4.0);
320 sum += c4[
i][j] * c4[
i][j];
327 for (i = 0; i < 4; i++) {
328 for (j = 0; j < 8; j++) {
329 block1[8 * (2 *
i) + j] =
330 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) *
s;
331 block1[8 * (2 * i + 1) + j] =
332 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) *
s;
337 for (i = 0; i < 8; i++) {
338 for (j = 0; j < 8; j++) {
340 for (k = 0; k < 8; k++)
341 sum += c8[k][j] * block1[8 * i + k];
342 block2[8 * i + j] = sum;
347 for (i = 0; i < 8; i++) {
348 for (j = 0; j < 4; j++) {
351 for (k = 0; k < 4; k++)
352 sum += c4[k][j] * block2[8 * (2 * k) +
i];
353 block3[8 * (2 * j) + i] = sum;
357 for (k = 0; k < 4; k++)
358 sum += c4[k][j] * block2[8 * (2 * k + 1) +
i];
359 block3[8 * (2 * j + 1) + i] = sum;
364 for (i = 0; i < 8; i++) {
365 for (j = 0; j < 8; j++) {
366 v = block3[8 * i + j];
368 else if (v > 255) v = 255;
369 dest[i * linesize + j] = (
int)
rint(v);
375 void (*idct248_put)(
uint8_t *dest,
380 int it,
i, it1, ti, ti1, err_max, v;
388 for (it = 0; it <
NB_ITS; it++) {
390 for (i = 0; i < 64; i++)
394 for (i = 0; i < 64; i++)
398 for (i = 0; i < 64; i++)
402 for (i = 0; i < 64; i++) {
429 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248", name, err_max);
438 for (i = 0; i < 64; i++)
445 }
while (ti1 < 1000000);
447 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248", name,
448 (
double) it1 * 1000.0 / (
double) ti1);
453 printf(
"dct-test [-i] [<test-number>] [<bits>]\n" 454 "test-number 0 -> test with random matrixes\n" 455 " 1 -> test with random sparse matrixes\n" 456 " 2 -> do 3. test from MPEG-4 std\n" 457 "bits Number of time domain bits to use, 8 is default\n" 458 "-i test IDCT implementations\n" 459 "-4 test IDCT248 implementations\n" 467 int main(
int argc,
char **argv)
469 int test_idct = 0, test_248_dct = 0;
479 c =
getopt(argc, argv,
"ih4t");
500 test = atoi(argv[
optind]);
501 if(optind+1 < argc) bits= atoi(argv[optind+1]);
503 printf(
"ffmpeg DCT/IDCT test\n");
511 err |=
dct_error(&idct_tab[i], test, test_idct, speed, bits);
513 for (i = 0; idct_tab_arch[
i].
name; i++)
514 if (!(~cpu_flags & idct_tab_arch[i].
cpu_flag))
515 err |=
dct_error(&idct_tab_arch[i], test, test_idct, speed, bits);
520 err |=
dct_error(&fdct_tab[i], test, test_idct, speed, bits);
522 for (i = 0; fdct_tab_arch[
i].
name; i++)
523 if (!(~cpu_flags & fdct_tab_arch[i].
cpu_flag))
524 err |=
dct_error(&fdct_tab_arch[i], test, test_idct, speed, bits);
530 printf(
"Error: %d.\n", err);
static const struct algo idct_tab_arch[]
Context structure for the Lagged Fibonacci PRNG.
void ff_fdct_ifast(int16_t *data)
static av_cold int init(AVCodecContext *avctx)
#define LOCAL_ALIGNED(a, t, v,...)
void ff_simple_idct_int16_8bit(int16_t *block)
static void ff_prores_idct_wrap(int16_t *dst)
static atomic_int cpu_flags
#define FF_ARRAY_ELEMS(a)
av_cold void ff_ref_dct_init(void)
Initialize the double precision discrete cosine transform functions fdct & idct.
void ff_faanidct(int16_t block[64])
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
static int permute_x86(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
void ff_simple_idct_int16_12bit(int16_t *block)
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
const uint16_t ff_aanscales[64]
s EdgeDetect Foobar g libavfilter vf_edgedetect c libavfilter vf_foobar c edit libavfilter and add an entry for foobar following the pattern of the other filters edit libavfilter allfilters and add an entry for foobar following the pattern of the other filters configure make j< whatever > ffmpeg ffmpeg i you should get a foobar png with Lena edge detected That s it
static void idct248_error(const char *name, void(*idct248_put)(uint8_t *dest, ptrdiff_t line_size, int16_t *block), int speed)
static __device__ float fabs(float a)
void ff_prores_idct_10(int16_t *block, const int16_t *qmat)
Special version of ff_simple_idct_int16_10bit() which does dequantization and scales by a factor of 2...
static uint8_t img_dest1[64]
void(* func)(int16_t *block)
common internal API header
int main(int argc, char **argv)
static void permute(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
static const struct algo idct_tab[]
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
void ff_jpeg_fdct_islow_8(int16_t *data)
void ff_j_rev_dct(int16_t *data)
void ff_faandct(int16_t *data)
static void test(const char *pattern, const char *host)
static int getopt(int argc, char *argv[], char *opts)
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
static const struct algo fdct_tab_arch[]
void ff_ref_fdct(short *block)
Transform 8x8 block of data with a double precision forward DCT This is a reference implementation...
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
void ff_xvid_idct(int16_t *const in)
void ff_simple_idct_int16_10bit(int16_t *block)
int64_t av_gettime_relative(void)
Get the current time in microseconds since some unspecified starting point.
static uint8_t img_dest[64]
AAN (Arai, Agui and Nakajima) (I)DCT tables.
static void idct248_ref(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
common internal and external API header
static int ref[MAX_W *MAX_W]
void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
enum idct_permutation_type perm_type
void ff_ref_idct(short *block)
Transform 8x8 block of data with a double precision inverse DCT This is a reference implementation...
static void dct(AudioRNNContext *s, float *out, const float *in)
printf("static const uint8_t my_array[100] = {\n")
static int16_t block1[64]
static const struct algo fdct_tab[]