doxygen/3.3/deshake__opencl__kernel_8h_source.html

 /*

  * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>

  * Copyright (C) 2013 Lenny Wang

  *

  *

  * This file is part of FFmpeg.

  *

  * FFmpeg is free software; you can redistribute it and/or

  * modify it under the terms of the GNU Lesser General Public

  * License as published by the Free Software Foundation; either

  * version 2.1 of the License, or (at your option) any later version.

  *

  * FFmpeg is distributed in the hope that it will be useful,

  * but WITHOUT ANY WARRANTY; without even the implied warranty of

  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

  * Lesser General Public License for more details.

  *

  * You should have received a copy of the GNU Lesser General Public

  * License along with FFmpeg; if not, write to the Free Software

  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

  */


 #ifndef AVFILTER_DESHAKE_OPENCL_KERNEL_H

 #define AVFILTER_DESHAKE_OPENCL_KERNEL_H


 #include "libavutil/opencl.h"


 const char *ff_kernel_deshake_opencl = AV_OPENCL_KERNEL(

 inline unsigned char pixel(global const unsigned char *src, int x, int y,

                            int w, int h,int stride, unsigned char def)

 {

     return (x < 0 || y < 0 || x >= w || y >= h) ? def : src[x + y * stride];

 }


 unsigned char interpolate_nearest(float x, float y, global const unsigned char *src,

                                   int width, int height, int stride, unsigned char def)

 {

     return pixel(src, (int)(x + 0.5f), (int)(y + 0.5f), width, height, stride, def);

 }


 unsigned char interpolate_bilinear(float x, float y, global const unsigned char *src,

                                    int width, int height, int stride, unsigned char def)

 {

     int x_c, x_f, y_c, y_f;

     int v1, v2, v3, v4;

     x_f = (int)x;

     y_f = (int)y;

     x_c = x_f + 1;

     y_c = y_f + 1;


     if (x_f < -1 || x_f > width || y_f < -1 || y_f > height) {

         return def;

     } else {

         v4 = pixel(src, x_f, y_f, width, height, stride, def);

         v2 = pixel(src, x_c, y_f, width, height, stride, def);

         v3 = pixel(src, x_f, y_c, width, height, stride, def);

         v1 = pixel(src, x_c, y_c, width, height, stride, def);

         return (v1*(x - x_f)*(y - y_f) + v2*((x - x_f)*(y_c - y)) +

                 v3*(x_c - x)*(y - y_f) + v4*((x_c - x)*(y_c - y)));

     }

 }


 unsigned char interpolate_biquadratic(float x, float y, global const unsigned char *src,

                                       int width, int height, int stride, unsigned char def)

 {

     int     x_c, x_f, y_c, y_f;

     unsigned char v1,  v2,  v3,  v4;

     float   f1,  f2,  f3,  f4;

     x_f = (int)x;

     y_f = (int)y;

     x_c = x_f + 1;

     y_c = y_f + 1;


     if (x_f < - 1 || x_f > width || y_f < -1 || y_f > height)

         return def;

     else {

         v4 = pixel(src, x_f, y_f, width, height, stride, def);

         v2 = pixel(src, x_c, y_f, width, height, stride, def);

         v3 = pixel(src, x_f, y_c, width, height, stride, def);

         v1 = pixel(src, x_c, y_c, width, height, stride, def);


         f1 = 1 - sqrt((x_c - x) * (y_c - y));

         f2 = 1 - sqrt((x_c - x) * (y - y_f));

         f3 = 1 - sqrt((x - x_f) * (y_c - y));

         f4 = 1 - sqrt((x - x_f) * (y - y_f));

         return (v1 * f1 + v2 * f2 + v3 * f3 + v4 * f4) / (f1 + f2 + f3 + f4);

     }

 }


 inline const float clipf(float a, float amin, float amax)

 {

     if      (a < amin) return amin;

     else if (a > amax) return amax;

     else               return a;

 }


 inline int mirror(int v, int m)

 {

     while ((unsigned)v > (unsigned)m) {

         v = -v;

         if (v < 0)

             v += 2 * m;

     }

     return v;

 }


 kernel void avfilter_transform_luma(global unsigned char *src,

                                     global unsigned char *dst,

                                     float4 matrix,

                                     int interpolate,

                                     int fill,

                                     int src_stride_lu,

                                     int dst_stride_lu,

                                     int height,

                                     int width)

 {

     int x = get_global_id(0);

     int y = get_global_id(1);

     int idx_dst = y * dst_stride_lu + x;

     unsigned char def = 0;

     float x_s = x * matrix.x + y * matrix.y + matrix.z;

     float y_s = x * (-matrix.y) + y * matrix.x + matrix.w;


     if (x < width && y < height) {

         switch (fill) {

             case 0: //FILL_BLANK

                 def = 0;

                 break;

             case 1: //FILL_ORIGINAL

                 def = src[y*src_stride_lu + x];

                 break;

             case 2: //FILL_CLAMP

                 y_s = clipf(y_s, 0, height - 1);

                 x_s = clipf(x_s, 0, width - 1);

                 def = src[(int)y_s * src_stride_lu + (int)x_s];

                 break;

             case 3: //FILL_MIRROR

                 y_s = mirror(y_s, height - 1);

                 x_s = mirror(x_s, width - 1);

                 def = src[(int)y_s * src_stride_lu + (int)x_s];

                 break;

         }

         switch (interpolate) {

             case 0: //INTERPOLATE_NEAREST

                 dst[idx_dst] = interpolate_nearest(x_s, y_s, src, width, height, src_stride_lu, def);

                 break;

             case 1: //INTERPOLATE_BILINEAR

                 dst[idx_dst] = interpolate_bilinear(x_s, y_s, src, width, height, src_stride_lu, def);

                 break;

             case 2: //INTERPOLATE_BIQUADRATIC

                 dst[idx_dst] = interpolate_biquadratic(x_s, y_s, src, width, height, src_stride_lu, def);

                 break;

             default:

                 return;

         }

     }

 }


 kernel void avfilter_transform_chroma(global unsigned char *src,

                                       global unsigned char *dst,

                                       float4 matrix,

                                       int interpolate,

                                       int fill,

                                       int src_stride_lu,

                                       int dst_stride_lu,

                                       int src_stride_ch,

                                       int dst_stride_ch,

                                       int height,

                                       int width,

                                       int ch,

                                       int cw)

 {


     int x = get_global_id(0);

     int y = get_global_id(1);

     int pad_ch = get_global_size(1)>>1;

     global unsigned char *dst_u = dst + height * dst_stride_lu;

     global unsigned char *src_u = src + height * src_stride_lu;

     global unsigned char *dst_v = dst_u + ch * dst_stride_ch;

     global unsigned char *src_v = src_u + ch * src_stride_ch;

     src = y < pad_ch ? src_u : src_v;

     dst = y < pad_ch ? dst_u : dst_v;

     y = select(y - pad_ch, y, y < pad_ch);

     float x_s = x * matrix.x + y * matrix.y + matrix.z;

     float y_s = x * (-matrix.y) + y * matrix.x + matrix.w;

     int idx_dst = y * dst_stride_ch + x;

     unsigned char def;


     if (x < cw && y < ch) {

         switch (fill) {

             case 0: //FILL_BLANK

                 def = 0;

                 break;

             case 1: //FILL_ORIGINAL

                 def = src[y*src_stride_ch + x];

                 break;

             case 2: //FILL_CLAMP

                 y_s = clipf(y_s, 0, ch - 1);

                 x_s = clipf(x_s, 0, cw - 1);

                 def = src[(int)y_s * src_stride_ch + (int)x_s];

                 break;

             case 3: //FILL_MIRROR

                 y_s = mirror(y_s, ch - 1);

                 x_s = mirror(x_s, cw - 1);

                 def = src[(int)y_s * src_stride_ch + (int)x_s];

                 break;

         }

         switch (interpolate) {

             case 0: //INTERPOLATE_NEAREST

                 dst[idx_dst] = interpolate_nearest(x_s, y_s, src, cw, ch, src_stride_ch, def);

                 break;

             case 1: //INTERPOLATE_BILINEAR

                 dst[idx_dst] = interpolate_bilinear(x_s, y_s, src, cw, ch, src_stride_ch, def);

                 break;

             case 2: //INTERPOLATE_BIQUADRATIC

                 dst[idx_dst] = interpolate_biquadratic(x_s, y_s, src, cw, ch, src_stride_ch, def);

                 break;

             default:

                 return;

         }

     }

 }

 );


 #endif /* AVFILTER_DESHAKE_OPENCL_KERNEL_H */

h
h
Definition: vp9dsp_template.c:2038

AV_OPENCL_KERNEL
#define AV_OPENCL_KERNEL(...)
Definition: opencl.h:46

src
#define src
Definition: vp8dsp.c:254

float4::w
float w
Definition: deshake_opencl.h:30

float4::x
float x
Definition: deshake_opencl.h:27

interpolate
static void interpolate(float *out, float v1, float v2, int size)
Definition: twinvq.c:84

height
#define height

float4
Definition: deshake_opencl.h:26

width
#define width

float4::y
float y
Definition: deshake_opencl.h:28

pixel
uint8_t pixel
Definition: tiny_ssim.c:42

float4::z
float z
Definition: deshake_opencl.h:29

int
int
Definition: ffmpeg_filter.c:190

stride
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105

if
if(ret< 0)
Definition: vf_mcdeint.c:282

ch
uint8_t pi<< 24) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8,(uint64_t)((*(constuint8_t *) pi-0x80U))<< 56) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16,(uint64_t)(*(constint16_t *) pi)<< 48) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32,(uint64_t)(*(constint32_t *) pi)<< 32) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S64,(*(constint64_t *) pi >>56)+0x80) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S64,*(constint64_t *) pi *(1.0f/(INT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S64,*(constint64_t *) pi *(1.0/(INT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, llrintf(*(constfloat *) pi *(INT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, llrint(*(constdouble *) pi *(INT64_C(1)<< 63)))#defineFMT_PAIR_FUNC(out, in) staticconv_func_type *constfmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB *AV_SAMPLE_FMT_NB]={FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S64),};staticvoidcpy1(uint8_t **dst, constuint8_t **src, intlen){memcpy(*dst,*src, len);}staticvoidcpy2(uint8_t **dst, constuint8_t **src, intlen){memcpy(*dst,*src, 2 *len);}staticvoidcpy4(uint8_t **dst, constuint8_t **src, intlen){memcpy(*dst,*src, 4 *len);}staticvoidcpy8(uint8_t **dst, constuint8_t **src, intlen){memcpy(*dst,*src, 8 *len);}AudioConvert *swri_audio_convert_alloc(enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, constint *ch_map, intflags){AudioConvert *ctx;conv_func_type *f=fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt)+AV_SAMPLE_FMT_NB *av_get_packed_sample_fmt(in_fmt)];if(!f) returnNULL;ctx=av_mallocz(sizeof(*ctx));if(!ctx) returnNULL;if(channels==1){in_fmt=av_get_planar_sample_fmt(in_fmt);out_fmt=av_get_planar_sample_fmt(out_fmt);}ctx->channels=channels;ctx->conv_f=f;ctx->ch_map=ch_map;if(in_fmt==AV_SAMPLE_FMT_U8||in_fmt==AV_SAMPLE_FMT_U8P) memset(ctx->silence, 0x80, sizeof(ctx->silence));if(out_fmt==in_fmt &&!ch_map){switch(av_get_bytes_per_sample(in_fmt)){case1:ctx->simd_f=cpy1;break;case2:ctx->simd_f=cpy2;break;case4:ctx->simd_f=cpy4;break;case8:ctx->simd_f=cpy8;break;}}if(HAVE_YASM &&1) swri_audio_convert_init_x86(ctx, out_fmt, in_fmt, channels);if(ARCH_ARM) swri_audio_convert_init_arm(ctx, out_fmt, in_fmt, channels);if(ARCH_AARCH64) swri_audio_convert_init_aarch64(ctx, out_fmt, in_fmt, channels);returnctx;}voidswri_audio_convert_free(AudioConvert **ctx){av_freep(ctx);}intswri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, intlen){intch;intoff=0;constintos=(out->planar?1:out->ch_count)*out->bps;unsignedmisaligned=0;av_assert0(ctx->channels==out->ch_count);if(ctx->in_simd_align_mask){intplanes=in->planar?in->ch_count:1;unsignedm=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) in->ch[ch];misaligned|=m &ctx->in_simd_align_mask;}if(ctx->out_simd_align_mask){intplanes=out->planar?out->ch_count:1;unsignedm=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) out->ch[ch];misaligned|=m &ctx->out_simd_align_mask;}if(ctx->simd_f &&!ctx->ch_map &&!misaligned){off=len &~15;av_assert1(off >=0);av_assert1(off<=len);av_assert2(ctx->channels==SWR_CH_MAX||!in->ch[ctx->channels]);if(off >0){if(out->planar==in->planar){intplanes=out->planar?out->ch_count:1;for(ch=0;ch< planes;ch++){ctx->simd_f(out-> ch ch
Definition: audioconvert.c:56

opencl.h
OpenCL wrapper.

ff_kernel_deshake_opencl
const char * ff_kernel_deshake_opencl
Definition: deshake_opencl_kernel.h:28

a
a
Definition: h264pred_template.c:468