FFmpeg
diracdsp_init.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2010 David Conrad
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/x86/cpu.h"
22 #include "libavcodec/diracdsp.h"
23 #include "fpel.h"
24 
25 void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
26 void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
27 
28 void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
29 void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
30 void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
31 
32 void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
33 void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
34 
35 void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
36 void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
37 void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
38 void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
39 void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
40 
41 void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
42 
43 #if HAVE_X86ASM
44 
45 #define HPEL_FILTER(MMSIZE, EXT) \
46  void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \
47  void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \
48  \
49  static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \
50  const uint8_t *src, int stride, int width, int height) \
51  { \
52  while( height-- ) \
53  { \
54  ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \
55  ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \
56  ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \
57  \
58  dsth += stride; \
59  dstv += stride; \
60  dstc += stride; \
61  src += stride; \
62  } \
63  }
64 
65 #define PIXFUNC(PFX, IDX, EXT) \
66  /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = PFX ## _dirac_pixels8_ ## EXT;*/ \
67  c->PFX ## _dirac_pixels_tab[1][IDX] = PFX ## _dirac_pixels16_ ## EXT; \
68  c->PFX ## _dirac_pixels_tab[2][IDX] = PFX ## _dirac_pixels32_ ## EXT
69 
70 #define DIRAC_PIXOP(OPNAME, EXT)\
71 static void OPNAME ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], \
72  int stride, int h) \
73 {\
74  if (h&3)\
75  ff_ ## OPNAME ## _dirac_pixels16_c(dst, src, stride, h);\
76  else\
77  ff_ ## OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
78 }\
79 static void OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], \
80  int stride, int h) \
81 {\
82  if (h&3) {\
83  ff_ ## OPNAME ## _dirac_pixels32_c(dst, src, stride, h);\
84  } else {\
85  ff_ ## OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\
86  ff_ ## OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
87  }\
88 }
89 
90 DIRAC_PIXOP(put, mmx)
91 DIRAC_PIXOP(avg, mmx)
92 DIRAC_PIXOP(avg, mmxext)
93 
94 DIRAC_PIXOP(put, sse2)
95 DIRAC_PIXOP(avg, sse2)
96 
97 #if !ARCH_X86_64
98 HPEL_FILTER(8, mmx)
99 #endif
100 HPEL_FILTER(16, sse2)
101 
102 #endif // HAVE_X86ASM
103 
105 {
106 #if HAVE_X86ASM
107  int mm_flags = av_get_cpu_flags();
108 
109  if (EXTERNAL_MMX(mm_flags)) {
111 #if !ARCH_X86_64
114  c->dirac_hpel_filter = dirac_hpel_filter_mmx;
117 #endif
118  PIXFUNC(put, 0, mmx);
119  PIXFUNC(avg, 0, mmx);
120  }
121 
122  if (EXTERNAL_MMXEXT(mm_flags)) {
123  PIXFUNC(avg, 0, mmxext);
124  }
125 
126  if (EXTERNAL_SSE2(mm_flags)) {
127  c->dirac_hpel_filter = dirac_hpel_filter_sse2;
130 
133 
134  c->put_dirac_pixels_tab[1][0] = put_dirac_pixels16_sse2;
135  c->avg_dirac_pixels_tab[1][0] = avg_dirac_pixels16_sse2;
136  c->put_dirac_pixels_tab[2][0] = put_dirac_pixels32_sse2;
137  c->avg_dirac_pixels_tab[2][0] = avg_dirac_pixels32_sse2;
138  }
139 
140  if (EXTERNAL_SSE4(mm_flags)) {
143  }
144 #endif
145 }
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int)
void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h)
void(* add_rect_clamped)(uint8_t *dst, const uint16_t *src, int stride, const int16_t *idwt, int idwt_stride, int width, int height)
Definition: diracdsp.h:47
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
uint8_t
void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
#define height
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
void(* avg_dirac_pixels_tab[3][4])(uint8_t *dst, const uint8_t *src[5], int stride, int h)
Definition: diracdsp.h:43
#define src
Definition: vp8dsp.c:255
void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int)
void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
#define width
void(* add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
Definition: diracdsp.h:48
void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:95
#define PIXFUNC(PFX, WIDTH)
Definition: diracdsp.c:213
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
#define avg(a, b, c, d)
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
void ff_diracdsp_init_x86(DiracDSPContext *c)
void(* put_dirac_pixels_tab[3][4])(uint8_t *dst, const uint8_t *src[5], int stride, int h)
dirac_pixels_tab[width][subpel] width is 2 for 32, 1 for 16, 0 for 8 subpel is 0 for fpel and hpel (o...
Definition: diracdsp.h:42
void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
void(* dequant_subband[4])(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h)
Definition: diracdsp.h:51
void(* dirac_hpel_filter)(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, const uint8_t *src, int stride, int width, int height)
Definition: diracdsp.h:31
void(* put_signed_rect_clamped[3])(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
Definition: diracdsp.h:45