28 #define avg(a,b,c,d) (a) 
   31 #define avg(a,b,c,d) (((a) + (b) + 1) >> 1) 
   34 #define avg(a,b,c,d) (((a) + (b) + (c) + (d) + 2) >> 2) 
   40 #define fn3(a,b,c) a##_##c##p##b##_c 
   41 #define fn2(a,b,c) fn3(a,b,c) 
   42 #define fn(a) fn2(a, BIT_DEPTH, ss) 
   48 #define av_clip_pixel(x) av_clip_uint8(x) 
   50 #define pixel uint16_t 
   51 #define av_clip_pixel(x) av_clip_uintp2(x, BIT_DEPTH) 
   55                         uint8_t *_yuv[3], 
const ptrdiff_t yuv_stride[3],
 
   56                         int w, 
int h, 
const int16_t yuv2rgb_coeffs[3][3][8],
 
   57                         const int16_t yuv_offset[8])
 
   60     const pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
 
   61     int16_t *rgb0 = 
rgb[0], *rgb1 = 
rgb[1], *rgb2 = 
rgb[2];
 
   63     int cy = yuv2rgb_coeffs[0][0][0];
 
   64     int crv = yuv2rgb_coeffs[0][2][0];
 
   65     int cgu = yuv2rgb_coeffs[1][1][0];
 
   66     int cgv = yuv2rgb_coeffs[1][2][0];
 
   67     int cbu = yuv2rgb_coeffs[2][1][0];
 
   69     const int uv_offset = 128 << (
BIT_DEPTH - 8);
 
   73     av_assert2(yuv2rgb_coeffs[1][0][0] == cy && yuv2rgb_coeffs[2][0][0] == cy);
 
   77     for (y = 0; y < 
h; y++) {
 
   78         for (x = 0; x < 
w; x++) {
 
   79             int y00 = yuv0[x << 
SS_W] - yuv_offset[0];
 
   81             int y01 = yuv0[2 * x + 1] - yuv_offset[0];
 
   83             int y10 = yuv0[yuv_stride[0] / 
sizeof(
pixel) + 2 * x] - yuv_offset[0];
 
   84             int y11 = yuv0[yuv_stride[0] / 
sizeof(
pixel) + 2 * x + 1] - yuv_offset[0];
 
   87             int u = yuv1[x] - uv_offset, v = yuv2[x] - uv_offset;
 
   94             rgb0[2 * x + rgb_stride + 1] = 
av_clip_int16((y11 * cy + crv * v + 
rnd) >> sh);
 
   99                                                           cgv * v + 
rnd) >> sh);
 
  102                                                           cgv * v + 
rnd) >> sh);
 
  105                                                           cgv * v + 
rnd) >> sh);
 
  106             rgb1[2 * x + rgb_stride + 1] = 
av_clip_int16((y11 * cy + cgu * 
u +
 
  107                                                           cgv * v + 
rnd) >> sh);
 
  121         yuv0 += (yuv_stride[0] * (1 << 
SS_H)) / 
sizeof(
pixel);
 
  122         yuv1 += yuv_stride[1] / 
sizeof(
pixel);
 
  123         yuv2 += yuv_stride[2] / 
sizeof(
pixel);
 
  124         rgb0 += rgb_stride * (1 << 
SS_H);
 
  125         rgb1 += rgb_stride * (1 << 
SS_H);
 
  126         rgb2 += rgb_stride * (1 << 
SS_H);
 
  130 static void fn(
rgb2yuv)(uint8_t *_yuv[3], 
const ptrdiff_t yuv_stride[3],
 
  131                         int16_t *
rgb[3], ptrdiff_t 
s,
 
  132                         int w, 
int h, 
const int16_t rgb2yuv_coeffs[3][3][8],
 
  133                         const int16_t yuv_offset[8])
 
  136     pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
 
  137     const int16_t *rgb0 = 
rgb[0], *rgb1 = 
rgb[1], *rgb2 = 
rgb[2];
 
  140     const int rnd = 1 << (sh - 1);
 
  141     int cry = rgb2yuv_coeffs[0][0][0];
 
  142     int cgy = rgb2yuv_coeffs[0][1][0];
 
  143     int cby = rgb2yuv_coeffs[0][2][0];
 
  144     int cru = rgb2yuv_coeffs[1][0][0];
 
  145     int cgu = rgb2yuv_coeffs[1][1][0];
 
  146     int cburv = rgb2yuv_coeffs[1][2][0];
 
  147     int cgv = rgb2yuv_coeffs[2][1][0];
 
  148     int cbv = rgb2yuv_coeffs[2][2][0];
 
  149     ptrdiff_t 
s0 = yuv_stride[0] / 
sizeof(
pixel);
 
  150     const int uv_offset = 128 << (
BIT_DEPTH - 8);
 
  152     av_assert2(rgb2yuv_coeffs[1][2][0] == rgb2yuv_coeffs[2][0][0]);
 
  155     for (y = 0; y < 
h; y++) {
 
  156         for (x = 0; x < 
w; x++) {
 
  157             int r00 = rgb0[x << 
SS_W], g00 = rgb1[x << 
SS_W], b00 = rgb2[x << 
SS_W];
 
  159             int r01 = rgb0[x * 2 + 1], g01 = rgb1[x * 2 + 1], b01 = rgb2[x * 2 + 1];
 
  161             int r10 = rgb0[x * 2 + 0 + 
s], g10 = rgb1[x * 2 + 0 + 
s], b10 = rgb2[x * 2 + 0 + 
s];
 
  162             int r11 = rgb0[x * 2 + 1 + 
s], g11 = rgb1[x * 2 + 1 + 
s], b11 = rgb2[x * 2 + 1 + 
s];
 
  167                                                  ((r00 * cry + g00 * cgy +
 
  168                                                    b00 * cby + 
rnd) >> sh));
 
  171                                                  ((r01 * cry + g01 * cgy +
 
  172                                                    b01 * cby + 
rnd) >> sh));
 
  175                                                  ((r10 * cry + g10 * cgy +
 
  176                                                    b10 * cby + 
rnd) >> sh));
 
  178                                                  ((r11 * cry + g11 * cgy +
 
  179                                                    b11 * cby + 
rnd) >> sh));
 
  184                                          ((
avg(r00, r01, r10, r11) * cru +
 
  185                                            avg(g00, g01, g10, g11) * cgu +
 
  186                                            avg(b00, b01, b10, b11) * cburv + 
rnd) >> sh));
 
  188                                          ((
avg(r00, r01, r10, r11) * cburv +
 
  189                                            avg(g00, g01, g10, g11) * cgv +
 
  190                                            avg(b00, b01, b10, b11) * cbv + 
rnd) >> sh));
 
  194         yuv1 += yuv_stride[1] / 
sizeof(
pixel);
 
  195         yuv2 += yuv_stride[2] / 
sizeof(
pixel);
 
  196         rgb0 += 
s * (1 << 
SS_H);
 
  197         rgb1 += 
s * (1 << 
SS_H);
 
  198         rgb2 += 
s * (1 << 
SS_H);
 
  208 static void fn(
rgb2yuv_fsb)(uint8_t *_yuv[3], 
const ptrdiff_t yuv_stride[3],
 
  209                             int16_t *
rgb[3], ptrdiff_t 
s,
 
  210                             int w, 
int h, 
const int16_t rgb2yuv_coeffs[3][3][8],
 
  211                             const int16_t yuv_offset[8],
 
  212                             int *rnd_scratch[3][2])
 
  215     pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
 
  216     const int16_t *rgb0 = 
rgb[0], *rgb1 = 
rgb[1], *rgb2 = 
rgb[2];
 
  219     const int rnd = 1 << (sh - 1);
 
  220     int cry = rgb2yuv_coeffs[0][0][0];
 
  221     int cgy = rgb2yuv_coeffs[0][1][0];
 
  222     int cby = rgb2yuv_coeffs[0][2][0];
 
  223     int cru = rgb2yuv_coeffs[1][0][0];
 
  224     int cgu = rgb2yuv_coeffs[1][1][0];
 
  225     int cburv = rgb2yuv_coeffs[1][2][0];
 
  226     int cgv = rgb2yuv_coeffs[2][1][0];
 
  227     int cbv = rgb2yuv_coeffs[2][2][0];
 
  228     ptrdiff_t 
s0 = yuv_stride[0] / 
sizeof(
pixel);
 
  229     const int uv_offset = 128 << (
BIT_DEPTH - 8);
 
  230     unsigned mask = (1 << sh) - 1;
 
  232     for (x = 0; x < 
w; x++) {
 
  233         rnd_scratch[0][0][x] =
 
  234         rnd_scratch[0][1][x] = 
rnd;
 
  236     av_assert2(rgb2yuv_coeffs[1][2][0] == rgb2yuv_coeffs[2][0][0]);
 
  239     for (x = 0; x < 
w; x++) {
 
  240         rnd_scratch[1][0][x] =
 
  241         rnd_scratch[1][1][x] =
 
  242         rnd_scratch[2][0][x] =
 
  243         rnd_scratch[2][1][x] = 
rnd;
 
  245     for (y = 0; y < 
h; y++) {
 
  246         for (x = 0; x < 
w; x++) {
 
  247             int r00 = rgb0[x << 
SS_W], g00 = rgb1[x << 
SS_W], b00 = rgb2[x << 
SS_W];
 
  250             int r01 = rgb0[x * 2 + 1], g01 = rgb1[x * 2 + 1], b01 = rgb2[x * 2 + 1];
 
  253             int r10 = rgb0[x * 2 + 0 + 
s], g10 = rgb1[x * 2 + 0 + 
s], b10 = rgb2[x * 2 + 0 + 
s];
 
  254             int r11 = rgb0[x * 2 + 1 + 
s], g11 = rgb1[x * 2 + 1 + 
s], b11 = rgb2[x * 2 + 1 + 
s];
 
  260             y00 = r00 * cry + g00 * cgy + b00 * cby + rnd_scratch[0][y & !
SS_H][x << 
SS_W];
 
  263             rnd_scratch[0][ (y & !
SS_H)][(x << 
SS_W) + 1] += (
diff * 7 + 8) >> 4;
 
  264             rnd_scratch[0][!(y & !
SS_H)][(x << 
SS_W) - 1] += (
diff * 3 + 8) >> 4;
 
  265             rnd_scratch[0][!(y & !
SS_H)][(x << 
SS_W) + 0] += (
diff * 5 + 8) >> 4;
 
  266             rnd_scratch[0][!(y & !
SS_H)][(x << 
SS_W) + 1] += (
diff * 1 + 8) >> 4;
 
  267             rnd_scratch[0][ (y & !
SS_H)][(x << 
SS_W) + 0]  = 
rnd;
 
  269             y01 = r01 * cry + g01 * cgy + b01 * cby + rnd_scratch[0][y & !
SS_H][x * 2 + 1];
 
  271             yuv0[x * 2 + 1]      = 
av_clip_pixel(yuv_offset[0] + (y01 >> sh));
 
  272             rnd_scratch[0][ (y & !
SS_H)][x * 2 + 2] += (
diff * 7 + 8) >> 4;
 
  273             rnd_scratch[0][!(y & !
SS_H)][x * 2 + 0] += (
diff * 3 + 8) >> 4;
 
  274             rnd_scratch[0][!(y & !
SS_H)][x * 2 + 1] += (
diff * 5 + 8) >> 4;
 
  275             rnd_scratch[0][!(y & !
SS_H)][x * 2 + 2] += (
diff * 1 + 8) >> 4;
 
  276             rnd_scratch[0][ (y & !
SS_H)][x * 2 + 1]  = 
rnd;
 
  278             y10 = r10 * cry + g10 * cgy + b10 * cby + rnd_scratch[0][1][x * 2 + 0];
 
  281             rnd_scratch[0][1][x * 2 + 1] += (
diff * 7 + 8) >> 4;
 
  282             rnd_scratch[0][0][x * 2 - 1] += (
diff * 3 + 8) >> 4;
 
  283             rnd_scratch[0][0][x * 2 + 0] += (
diff * 5 + 8) >> 4;
 
  284             rnd_scratch[0][0][x * 2 + 1] += (
diff * 1 + 8) >> 4;
 
  285             rnd_scratch[0][1][x * 2 + 0]  = 
rnd;
 
  287             y11 = r11 * cry + g11 * cgy + b11 * cby + rnd_scratch[0][1][x * 2 + 1];
 
  290             rnd_scratch[0][1][x * 2 + 2] += (
diff * 7 + 8) >> 4;
 
  291             rnd_scratch[0][0][x * 2 + 0] += (
diff * 3 + 8) >> 4;
 
  292             rnd_scratch[0][0][x * 2 + 1] += (
diff * 5 + 8) >> 4;
 
  293             rnd_scratch[0][0][x * 2 + 2] += (
diff * 1 + 8) >> 4;
 
  294             rnd_scratch[0][1][x * 2 + 1]  = 
rnd;
 
  298             u = 
avg(r00, r01, r10, r11) * cru +
 
  299                 avg(g00, g01, g10, g11) * cgu +
 
  300                 avg(b00, b01, b10, b11) * cburv + rnd_scratch[1][y & 1][x];
 
  303             rnd_scratch[1][ (y & 1)][x + 1] += (
diff * 7 + 8) >> 4;
 
  304             rnd_scratch[1][!(y & 1)][x - 1] += (
diff * 3 + 8) >> 4;
 
  305             rnd_scratch[1][!(y & 1)][x + 0] += (
diff * 5 + 8) >> 4;
 
  306             rnd_scratch[1][!(y & 1)][x + 1] += (
diff * 1 + 8) >> 4;
 
  307             rnd_scratch[1][ (y & 1)][x + 0]  = 
rnd;
 
  309             v = 
avg(r00, r01, r10, r11) * cburv +
 
  310                 avg(g00, g01, g10, g11) * cgv +
 
  311                 avg(b00, b01, b10, b11) * cbv + rnd_scratch[2][y & 1][x];
 
  314             rnd_scratch[2][ (y & 1)][x + 1] += (
diff * 7 + 8) >> 4;
 
  315             rnd_scratch[2][!(y & 1)][x - 1] += (
diff * 3 + 8) >> 4;
 
  316             rnd_scratch[2][!(y & 1)][x + 0] += (
diff * 5 + 8) >> 4;
 
  317             rnd_scratch[2][!(y & 1)][x + 1] += (
diff * 1 + 8) >> 4;
 
  318             rnd_scratch[2][ (y & 1)][x + 0]  = 
rnd;
 
  322         yuv1 += yuv_stride[1] / 
sizeof(
pixel);
 
  323         yuv2 += yuv_stride[2] / 
sizeof(
pixel);
 
  324         rgb0 += 
s * (1 << 
SS_H);
 
  325         rgb1 += 
s * (1 << 
SS_H);
 
  326         rgb2 += 
s * (1 << 
SS_H);
 
  332 #define OUT_BIT_DEPTH BIT_DEPTH 
  333 #define IN_BIT_DEPTH 8 
  337 #define IN_BIT_DEPTH 10 
  341 #define IN_BIT_DEPTH 12