FFmpeg
vf_noise.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
3  * Copyright (c) 2013 Paul B Mahol
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "libavutil/attributes.h"
23 #include "libavutil/x86/cpu.h"
24 #include "libavutil/x86/asm.h"
25 #include "libavfilter/vf_noise.h"
26 
27 #if HAVE_INLINE_ASM
28 #if HAVE_6REGS
29 static void line_noise_avg_sse2(uint8_t *dst, const uint8_t *src,
30  int len, const int8_t * const *shift)
31 {
32  x86_reg xmm_len = len & (~15);
33 
34  __asm__ volatile(
35  "mov %5, %%"FF_REG_a" \n\t"
36  "pxor %%xmm4, %%xmm4 \n\t"
37  ".p2align 4 \n\t"
38  "1: \n\t"
39  "movdqu (%1, %%"FF_REG_a"), %%xmm1 \n\t"
40  "movdqu (%2, %%"FF_REG_a"), %%xmm2 \n\t"
41  "movdqu (%3, %%"FF_REG_a"), %%xmm3 \n\t"
42  "movdqa (%0, %%"FF_REG_a"), %%xmm0 \n\t"
43  "paddb %%xmm2, %%xmm1 \n\t"
44  "paddb %%xmm3, %%xmm1 \n\t"
45  "movdqa %%xmm4, %%xmm5 \n\t"
46  "pcmpgtb %%xmm0, %%xmm5 \n\t"
47  "movdqa %%xmm0, %%xmm6 \n\t"
48  "movdqa %%xmm0, %%xmm2 \n\t"
49  "punpcklbw %%xmm5, %%xmm0 \n\t"
50  "punpckhbw %%xmm5, %%xmm2 \n\t"
51  "movdqa %%xmm4, %%xmm5 \n\t"
52  "pcmpgtb %%xmm1, %%xmm5 \n\t"
53  "movdqa %%xmm1, %%xmm3 \n\t"
54  "punpcklbw %%xmm5, %%xmm1 \n\t"
55  "punpckhbw %%xmm5, %%xmm3 \n\t"
56  "pmullw %%xmm0, %%xmm1 \n\t"
57  "pmullw %%xmm2, %%xmm3 \n\t"
58  "psraw $7, %%xmm1 \n\t"
59  "psraw $7, %%xmm3 \n\t"
60  "packsswb %%xmm3, %%xmm1 \n\t"
61  "paddb %%xmm6, %%xmm1 \n\t"
62  "movdqa %%xmm1, (%4, %%"FF_REG_a") \n\t"
63  "add $16, %%"FF_REG_a" \n\t"
64  " js 1b \n\t"
65  :: "r" (src+xmm_len), "r" (shift[0]+xmm_len), "r" (shift[1]+xmm_len), "r" (shift[2]+xmm_len),
66  "r" (dst+xmm_len), "g" (-xmm_len)
67  : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3",
68  "%xmm4", "%xmm5", "%xmm6",) "%"FF_REG_a
69  );
70 
71  if (xmm_len != len){
72  const int8_t *shift2[3] = { shift[0]+xmm_len, shift[1]+xmm_len, shift[2]+xmm_len };
73  ff_line_noise_avg_c(dst + xmm_len, src + xmm_len, len - xmm_len, shift2);
74  }
75 }
76 #endif /* HAVE_6REGS */
77 
78 static void line_noise_sse2(uint8_t *dst, const uint8_t *src,
79  const int8_t *noise, int len, int shift)
80 {
81  x86_reg xmm_len = len & (~15);
82  noise += shift;
83 
84  __asm__ volatile(
85  "mov %3, %%"FF_REG_a" \n\t"
86  "pcmpeqb %%xmm2, %%xmm2 \n\t"
87  "psllw $15, %%xmm2 \n\t"
88  "packsswb %%xmm2, %%xmm2 \n\t"
89  ".p2align 4 \n\t"
90  "1: \n\t"
91  "movdqa (%0, %%"FF_REG_a"), %%xmm0 \n\t"
92  "movdqu (%1, %%"FF_REG_a"), %%xmm1 \n\t"
93  "pxor %%xmm2, %%xmm0 \n\t"
94  "paddsb %%xmm1, %%xmm0 \n\t"
95  "pxor %%xmm2, %%xmm0 \n\t"
96  "movntdq %%xmm0, (%2, %%"FF_REG_a") \n\t"
97  "add $16, %%"FF_REG_a" \n\t"
98  " js 1b \n\t"
99  :: "r" (src+xmm_len), "r" (noise+xmm_len), "r" (dst+xmm_len), "g" (-xmm_len)
100  : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2",) "%"FF_REG_a
101  );
102  if (xmm_len != len)
103  ff_line_noise_c(dst+xmm_len, src + xmm_len, noise + xmm_len, len - xmm_len, 0);
104 }
105 #endif /* HAVE_INLINE_ASM */
106 
108 {
109 #if HAVE_INLINE_ASM
110  int cpu_flags = av_get_cpu_flags();
111 
112  if (INLINE_SSE2(cpu_flags)) {
113 #if HAVE_6REGS
114  n->line_noise_avg = line_noise_avg_sse2;
115 #endif
116  n->line_noise = line_noise_sse2;
117  }
118 #endif
119 }
cpu.h
x86_reg
int x86_reg
Definition: asm.h:72
noise
static void noise(uint8_t *dst, const uint8_t *src, int dst_linesize, int src_linesize, int width, int start, int end, NoiseContext *n, int comp)
Definition: vf_noise.c:192
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
vf_noise.h
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
NoiseContext
Definition: noise.c:62
av_cold
#define av_cold
Definition: attributes.h:106
INLINE_SSE2
#define INLINE_SSE2(flags)
Definition: cpu.h:90
asm.h
NoiseContext::line_noise
void(* line_noise)(uint8_t *dst, const uint8_t *src, const int8_t *noise, int len, int shift)
Definition: vf_noise.h:59
ff_noise_init_x86
av_cold void ff_noise_init_x86(NoiseContext *n)
Definition: vf_noise.c:107
shift
static int shift(int a, int b)
Definition: bonk.c:261
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
attributes.h
ff_line_noise_avg_c
void ff_line_noise_avg_c(uint8_t *dst, const uint8_t *src, int len, const int8_t *const *shift)
Definition: vf_noise.c:180
shift2
static const uint8_t shift2[6]
Definition: dxa.c:49
len
int len
Definition: vorbis_enc_data.h:426
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
XMM_CLOBBERS
#define XMM_CLOBBERS(...)
Definition: asm.h:98
ff_line_noise_c
void ff_line_noise_c(uint8_t *dst, const uint8_t *src, const int8_t *noise, int len, int shift)
Definition: vf_noise.c:167
NoiseContext::line_noise_avg
void(* line_noise_avg)(uint8_t *dst, const uint8_t *src, int len, const int8_t *const *shift)
Definition: vf_noise.h:60
src
#define src
Definition: vp8dsp.c:248