FFmpeg
alf_template.c
Go to the documentation of this file.
1 /*
2  * VVC filters DSP
3  *
4  * Copyright (C) 2024 Zhao Zhili
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
24 
25 void FUNC2(ff_alf_filter_luma_kernel, BIT_DEPTH, _neon)(pixel *dst,
26  const pixel **p,
27  const int16_t *filter,
28  const int16_t *clip,
29  int is_near_vb);
30 
31 void FUNC2(ff_alf_filter_chroma_kernel, BIT_DEPTH, _neon)(pixel *dst,
32  const pixel **p,
33  const int16_t *filter,
34  const int16_t *clip,
35  int is_near_vb);
36 
37 static void FUNC2(alf_filter_luma, BIT_DEPTH, _neon)(uint8_t *_dst,
38  ptrdiff_t dst_stride,
39  const uint8_t *_src,
40  ptrdiff_t src_stride,
41  const int width, const int height,
42  const int16_t *filter,
43  const int16_t *clip,
44  const int vb_pos)
45 {
46  const pixel *src = (pixel *)_src;
47 
48  dst_stride /= sizeof(pixel);
49  src_stride /= sizeof(pixel);
50 
51  for (int y = 0; y < height; y += ALF_BLOCK_SIZE) {
52  int far = (y + 3 < vb_pos - 3) || (y > vb_pos + 2);
53 
54  for (int x = 0; x < width; x += 2 * ALF_BLOCK_SIZE) {
55  const pixel *s0 = src + y * src_stride + x;
56  const pixel *s1 = s0 + src_stride;
57  const pixel *s2 = s0 - src_stride;
58  const pixel *s3 = s1 + src_stride;
59  const pixel *s4 = s2 - src_stride;
60  const pixel *s5 = s3 + src_stride;
61  const pixel *s6 = s4 - src_stride;
62 
63  for (int i = 0; i < ALF_BLOCK_SIZE; i++) {
64  pixel *dst = (pixel *) _dst + (y + i) * dst_stride + x;
65 
66  const pixel *p0 = s0 + i * src_stride;
67  const pixel *p1 = s1 + i * src_stride;
68  const pixel *p2 = s2 + i * src_stride;
69  const pixel *p3 = s3 + i * src_stride;
70  const pixel *p4 = s4 + i * src_stride;
71  const pixel *p5 = s5 + i * src_stride;
72  const pixel *p6 = s6 + i * src_stride;
73  int is_near_vb = 0;
74 
75  if (!far) {
76  is_near_vb = (y + i == vb_pos - 1) || (y + i == vb_pos);
77  if (is_near_vb) {
78  p1 = p0;
79  p2 = p0;
80  }
81  if (y + i >= vb_pos - 2 && y + i <= vb_pos + 1) {
82  p3 = p1;
83  p4 = p2;
84  }
85  if (y + i >= vb_pos - 3 && y + i <= vb_pos + 2) {
86  p5 = p3;
87  p6 = p4;
88  }
89  }
90  FUNC2(ff_alf_filter_luma_kernel, BIT_DEPTH, _neon)(dst,
91  (const pixel *[]) { p0, p1, p2, p3, p4, p5, p6},
92  filter,
93  clip,
94  is_near_vb);
95  }
97  clip += 2 * ALF_NUM_COEFF_LUMA;
98  }
99  }
100 }
101 
102 static void FUNC2(alf_filter_chroma, BIT_DEPTH, _neon)(uint8_t *_dst,
103  ptrdiff_t dst_stride,
104  const uint8_t *_src,
105  ptrdiff_t src_stride,
106  const int width,
107  const int height,
108  const int16_t *filter,
109  const int16_t *clip,
110  const int vb_pos)
111 {
112  const pixel *src = (pixel *)_src;
113 
114  dst_stride /= sizeof(pixel);
115  src_stride /= sizeof(pixel);
116 
117  for (int y = 0; y < height; y += ALF_BLOCK_SIZE) {
118  int far = (y + 3 < vb_pos - 2) || (y > vb_pos + 1);
119 
120  for (int x = 0; x < width; x += ALF_BLOCK_SIZE) {
121  const pixel *s0 = src + y * src_stride + x;
122  const pixel *s1 = s0 + src_stride;
123  const pixel *s2 = s0 - src_stride;
124  const pixel *s3 = s1 + src_stride;
125  const pixel *s4 = s2 - src_stride;
126 
127  for (int i = 0; i < ALF_BLOCK_SIZE; i++) {
128  pixel *dst = (pixel *)_dst + (y + i) * dst_stride + x;
129 
130  const pixel *p0 = s0 + i * src_stride;
131  const pixel *p1 = s1 + i * src_stride;
132  const pixel *p2 = s2 + i * src_stride;
133  const pixel *p3 = s3 + i * src_stride;
134  const pixel *p4 = s4 + i * src_stride;
135  int is_near_vb = 0;
136 
137  if (!far) {
138  is_near_vb = (y + i == vb_pos - 1) || (y + i == vb_pos);
139  if (is_near_vb) {
140  p1 = p0;
141  p2 = p0;
142  }
143 
144  if (y + i >= vb_pos - 2 && y + i <= vb_pos + 1) {
145  p3 = p1;
146  p4 = p2;
147  }
148  }
149 
150  FUNC2(ff_alf_filter_chroma_kernel, BIT_DEPTH, _neon)(dst,
151  (const pixel *[]){p0, p1, p2, p3, p4},
152  filter, clip,
153  is_near_vb);
154  }
155  }
156  }
157 }
158 
159 #define ALF_DIR_VERT 0
160 #define ALF_DIR_HORZ 1
161 #define ALF_DIR_DIGA0 2
162 #define ALF_DIR_DIGA1 3
163 
164 static void FUNC(alf_get_idx)(int *class_idx, int *transpose_idx, const int *sum, const int ac)
165 {
166  static const int arg_var[] = {0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 };
167 
168  int hv0, hv1, dir_hv, d0, d1, dir_d, hvd1, hvd0, sum_hv, dir1;
169 
170  dir_hv = sum[ALF_DIR_VERT] <= sum[ALF_DIR_HORZ];
171  hv1 = FFMAX(sum[ALF_DIR_VERT], sum[ALF_DIR_HORZ]);
172  hv0 = FFMIN(sum[ALF_DIR_VERT], sum[ALF_DIR_HORZ]);
173 
174  dir_d = sum[ALF_DIR_DIGA0] <= sum[ALF_DIR_DIGA1];
175  d1 = FFMAX(sum[ALF_DIR_DIGA0], sum[ALF_DIR_DIGA1]);
176  d0 = FFMIN(sum[ALF_DIR_DIGA0], sum[ALF_DIR_DIGA1]);
177 
178  //promote to avoid overflow
179  dir1 = (uint64_t)d1 * hv0 <= (uint64_t)hv1 * d0;
180  hvd1 = dir1 ? hv1 : d1;
181  hvd0 = dir1 ? hv0 : d0;
182 
183  sum_hv = sum[ALF_DIR_HORZ] + sum[ALF_DIR_VERT];
184  *class_idx = arg_var[av_clip_uintp2(sum_hv * ac >> (BIT_DEPTH - 1), 4)];
185  if (hvd1 * 2 > 9 * hvd0)
186  *class_idx += ((dir1 << 1) + 2) * 5;
187  else if (hvd1 > 2 * hvd0)
188  *class_idx += ((dir1 << 1) + 1) * 5;
189 
190  *transpose_idx = dir_d * 2 + dir_hv;
191 }
192 
193 static void FUNC(alf_classify)(int *class_idx, int *transpose_idx,
194  const uint8_t *_src, const ptrdiff_t _src_stride, const int width, const int height,
195  const int vb_pos, int16_t *gradient_tmp)
196 {
197  int16_t *grad;
198 
199  const int w = width + ALF_GRADIENT_BORDER * 2;
201  const int gstride = (w / ALF_GRADIENT_STEP) * ALF_NUM_DIR;
202  const int gshift = gstride - size * ALF_NUM_DIR;
203 
204  for (int y = 0; y < height ; y += ALF_BLOCK_SIZE ) {
205  int start = 0;
207  int ac = 2;
208  if (y + ALF_BLOCK_SIZE == vb_pos) {
210  ac = 3;
211  } else if (y == vb_pos) {
213  ac = 3;
214  }
215  for (int x = 0; x < width; x += (2*ALF_BLOCK_SIZE)) {
216  const int xg = x / ALF_GRADIENT_STEP;
217  const int yg = y / ALF_GRADIENT_STEP;
218  int sum0[ALF_NUM_DIR];
219  int sum1[ALF_NUM_DIR];
220  grad = gradient_tmp + (yg + start) * gstride + xg * ALF_NUM_DIR;
221  ff_alf_classify_sum_neon(sum0, sum1, grad, gshift, end-start);
222  FUNC(alf_get_idx)(class_idx, transpose_idx, sum0, ac);
223  class_idx++;
224  transpose_idx++;
225  FUNC(alf_get_idx)(class_idx, transpose_idx, sum1, ac);
226  class_idx++;
227  transpose_idx++;
228  }
229  }
230 
231 }
232 
233 void FUNC2(ff_alf_classify_grad, BIT_DEPTH, _neon)(int *class_idx, int *transpose_idx,
234  const uint8_t *_src, const ptrdiff_t _src_stride, const int width, const int height,
235  const int vb_pos, int16_t *gradient_tmp);
236 
237 static void FUNC2(alf_classify, BIT_DEPTH, _neon)(int *class_idx, int *transpose_idx,
238  const uint8_t *_src, const ptrdiff_t _src_stride, const int width, const int height,
239  const int vb_pos, int *gradient_tmp)
240 {
241  FUNC2(ff_alf_classify_grad, BIT_DEPTH, _neon)(class_idx, transpose_idx, _src, _src_stride, width, height, vb_pos, (int16_t*)gradient_tmp);
242  FUNC(alf_classify)(class_idx, transpose_idx, _src, _src_stride, width, height, vb_pos, (int16_t*)gradient_tmp);
243 }
_dst
uint8_t * _dst
Definition: dsp.h:56
ALF_DIR_VERT
#define ALF_DIR_VERT
Definition: alf_template.c:159
av_clip_uintp2
#define av_clip_uintp2
Definition: common.h:124
w
uint8_t w
Definition: llviddspenc.c:38
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
_src
uint8_t ptrdiff_t const uint8_t * _src
Definition: dsp.h:56
ALF_GRADIENT_BORDER
#define ALF_GRADIENT_BORDER
Definition: ctu.h:86
clip
clip
Definition: af_crystalizer.c:122
ALF_DIR_DIGA1
#define ALF_DIR_DIGA1
Definition: alf_template.c:162
ALF_DIR_HORZ
#define ALF_DIR_HORZ
Definition: alf_template.c:160
pixel
uint8_t pixel
Definition: tiny_ssim.c:41
grad
static double grad(int hash, double x, double y, double z)
Definition: perlin.c:42
alf_filter_chroma
static void alf_filter_chroma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *src, const ptrdiff_t dst_stride, const ptrdiff_t src_stride, const int c_idx, const int width, const int height, const int vb_pos, const ALFParams *alf)
Definition: filter.c:1047
bit_depth_template.c
ALF_NUM_DIR
#define ALF_NUM_DIR
Definition: ctu.h:88
height
#define height
Definition: dsp.h:89
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
size
int size
Definition: twinvq_data.h:10344
alf_get_idx
static void FUNC() alf_get_idx(int *class_idx, int *transpose_idx, const int *sum, const int ac)
Definition: alf_template.c:164
ALF_BLOCK_SIZE
#define ALF_BLOCK_SIZE
Definition: ctu.h:77
ff_alf_classify_sum_neon
void ff_alf_classify_sum_neon(int *sum0, int *sum1, int16_t *grad, uint32_t gshift, uint32_t steps)
FUNC2
void FUNC2(ff_alf_filter_luma_kernel, void FUNC2(BIT_DEPTH, _neon)
Definition: alf_template.c:25
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
ALF_NUM_COEFF_LUMA
#define ALF_NUM_COEFF_LUMA
Definition: ps.h:167
ALF_DIR_DIGA0
#define ALF_DIR_DIGA0
Definition: alf_template.c:161
FUNC
#define FUNC(a)
Definition: bit_depth_template.c:101
BIT_DEPTH
#define BIT_DEPTH
Definition: dsp_init.c:44
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
ALF_GRADIENT_STEP
#define ALF_GRADIENT_STEP
Definition: ctu.h:85
width
#define width
Definition: dsp.h:89
alf_classify
static void FUNC() alf_classify(int *class_idx, int *transpose_idx, const uint8_t *_src, const ptrdiff_t _src_stride, const int width, const int height, const int vb_pos, int16_t *gradient_tmp)
Definition: alf_template.c:193
src
#define src
Definition: vp8dsp.c:248
alf_filter_luma
static void alf_filter_luma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *src, const ptrdiff_t dst_stride, const ptrdiff_t src_stride, const int x0, const int y0, const int width, const int height, const int _vb_pos, const ALFParams *alf)
Definition: filter.c:1023