00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef AVCODEC_X86_CABAC_H
00022 #define AVCODEC_X86_CABAC_H
00023
00024 #include "libavcodec/cabac.h"
00025 #include "libavutil/attributes.h"
00026 #include "libavutil/x86_cpu.h"
00027 #include "libavutil/internal.h"
00028 #include "config.h"
00029
00030 #ifdef BROKEN_RELOCATIONS
00031 #define TABLES_ARG , "r"(tables)
00032
00033 #if HAVE_FAST_CMOV
00034 #define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
00035 "cmp "low" , "tmp" \n\t"\
00036 "cmova %%ecx , "range" \n\t"\
00037 "sbb %%rcx , %%rcx \n\t"\
00038 "and %%ecx , "tmp" \n\t"\
00039 "xor %%rcx , "retq" \n\t"\
00040 "sub "tmp" , "low" \n\t"
00041 #else
00042 #define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
00043 \
00044 "sub "low" , "tmp" \n\t"\
00045 "sar $31 , "tmp" \n\t"\
00046 "sub %%ecx , "range" \n\t"\
00047 "and "tmp" , "range" \n\t"\
00048 "add %%ecx , "range" \n\t"\
00049 "shl $17 , %%ecx \n\t"\
00050 "and "tmp" , %%ecx \n\t"\
00051 "sub %%ecx , "low" \n\t"\
00052 "xor "tmp" , "ret" \n\t"\
00053 "movslq "ret" , "retq" \n\t"
00054 #endif
00055
00056 #define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
00057 "movzbl "statep" , "ret" \n\t"\
00058 "mov "range" , "tmp" \n\t"\
00059 "and $0xC0 , "range" \n\t"\
00060 "lea ("ret", "range", 2), %%ecx \n\t"\
00061 "movzbl "lps_off"("tables", %%rcx), "range" \n\t"\
00062 "sub "range" , "tmp" \n\t"\
00063 "mov "tmp" , %%ecx \n\t"\
00064 "shl $17 , "tmp" \n\t"\
00065 BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
00066 "movzbl "norm_off"("tables", "rangeq"), %%ecx \n\t"\
00067 "shl %%cl , "range" \n\t"\
00068 "movzbl "mlps_off"+128("tables", "retq"), "tmp" \n\t"\
00069 "shl %%cl , "low" \n\t"\
00070 "mov "tmpbyte" , "statep" \n\t"\
00071 "test "lowword" , "lowword" \n\t"\
00072 "jnz 2f \n\t"\
00073 "mov "byte" , %%"REG_c" \n\t"\
00074 "add"OPSIZE" $2 , "byte" \n\t"\
00075 "movzwl (%%"REG_c") , "tmp" \n\t"\
00076 "lea -1("low") , %%ecx \n\t"\
00077 "xor "low" , %%ecx \n\t"\
00078 "shr $15 , %%ecx \n\t"\
00079 "bswap "tmp" \n\t"\
00080 "shr $15 , "tmp" \n\t"\
00081 "movzbl "norm_off"("tables", %%rcx), %%ecx \n\t"\
00082 "sub $0xFFFF , "tmp" \n\t"\
00083 "neg %%ecx \n\t"\
00084 "add $7 , %%ecx \n\t"\
00085 "shl %%cl , "tmp" \n\t"\
00086 "add "tmp" , "low" \n\t"\
00087 "2: \n\t"
00088
00089 #else
00090 #define TABLES_ARG
00091 #define RIP_ARG
00092
00093 #if HAVE_FAST_CMOV
00094 #define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
00095 "mov "tmp" , %%ecx \n\t"\
00096 "shl $17 , "tmp" \n\t"\
00097 "cmp "low" , "tmp" \n\t"\
00098 "cmova %%ecx , "range" \n\t"\
00099 "sbb %%ecx , %%ecx \n\t"\
00100 "and %%ecx , "tmp" \n\t"\
00101 "xor %%ecx , "ret" \n\t"\
00102 "sub "tmp" , "low" \n\t"
00103 #else
00104 #define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
00105 "mov "tmp" , %%ecx \n\t"\
00106 "shl $17 , "tmp" \n\t"\
00107 "sub "low" , "tmp" \n\t"\
00108 "sar $31 , "tmp" \n\t" \
00109 "sub %%ecx , "range" \n\t" \
00110 "and "tmp" , "range" \n\t" \
00111 "add %%ecx , "range" \n\t" \
00112 "shl $17 , %%ecx \n\t"\
00113 "and "tmp" , %%ecx \n\t"\
00114 "sub %%ecx , "low" \n\t"\
00115 "xor "tmp" , "ret" \n\t"
00116 #endif
00117
00118 #define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
00119 "movzbl "statep" , "ret" \n\t"\
00120 "mov "range" , "tmp" \n\t"\
00121 "and $0xC0 , "range" \n\t"\
00122 "movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\
00123 "sub "range" , "tmp" \n\t"\
00124 BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \
00125 "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx \n\t"\
00126 "shl %%cl , "range" \n\t"\
00127 "movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp" \n\t"\
00128 "shl %%cl , "low" \n\t"\
00129 "mov "tmpbyte" , "statep" \n\t"\
00130 "test "lowword" , "lowword" \n\t"\
00131 " jnz 2f \n\t"\
00132 "mov "byte" , %%"REG_c" \n\t"\
00133 "add"OPSIZE" $2 , "byte" \n\t"\
00134 "movzwl (%%"REG_c") , "tmp" \n\t"\
00135 "lea -1("low") , %%ecx \n\t"\
00136 "xor "low" , %%ecx \n\t"\
00137 "shr $15 , %%ecx \n\t"\
00138 "bswap "tmp" \n\t"\
00139 "shr $15 , "tmp" \n\t"\
00140 "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\
00141 "sub $0xFFFF , "tmp" \n\t"\
00142 "neg %%ecx \n\t"\
00143 "add $7 , %%ecx \n\t"\
00144 "shl %%cl , "tmp" \n\t"\
00145 "add "tmp" , "low" \n\t"\
00146 "2: \n\t"
00147
00148 #endif
00149
00150
00151 #if HAVE_7REGS && !(defined(__i386) && defined(__clang__) && (__clang_major__<2 || (__clang_major__==2 && __clang_minor__<10)))\
00152 && !(defined(__i386) && !defined(__clang__) && defined(__llvm__) && __GNUC__==4 && __GNUC_MINOR__==2 && __GNUC_PATCHLEVEL__<=1)
00153 #define get_cabac_inline get_cabac_inline_x86
00154 static av_always_inline int get_cabac_inline_x86(CABACContext *c,
00155 uint8_t *const state)
00156 {
00157 int bit, tmp;
00158 #ifdef BROKEN_RELOCATIONS
00159 void *tables;
00160
00161 __asm__ volatile(
00162 "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
00163 : "=&r"(tables)
00164 );
00165 #endif
00166
00167 __asm__ volatile(
00168 BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1",
00169 "%2", "%q2", "%3", "%b3",
00170 "%a6(%5)", "%a7(%5)", "%a8", "%a9", "%a10", "%11")
00171 : "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp)
00172 : "r"(state), "r"(c),
00173 "i"(offsetof(CABACContext, bytestream)),
00174 "i"(offsetof(CABACContext, bytestream_end)),
00175 "i"(H264_NORM_SHIFT_OFFSET),
00176 "i"(H264_LPS_RANGE_OFFSET),
00177 "i"(H264_MLPS_STATE_OFFSET) TABLES_ARG
00178 : "%"REG_c, "memory"
00179 );
00180 return bit & 1;
00181 }
00182 #endif
00183
00184 #define get_cabac_bypass_sign get_cabac_bypass_sign_x86
00185 static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
00186 {
00187 x86_reg tmp;
00188 __asm__ volatile(
00189 "movl %a6(%2), %k1 \n\t"
00190 "movl %a3(%2), %%eax \n\t"
00191 "shl $17, %k1 \n\t"
00192 "add %%eax, %%eax \n\t"
00193 "sub %k1, %%eax \n\t"
00194 "cltd \n\t"
00195 "and %%edx, %k1 \n\t"
00196 "add %k1, %%eax \n\t"
00197 "xor %%edx, %%ecx \n\t"
00198 "sub %%edx, %%ecx \n\t"
00199 "test %%ax, %%ax \n\t"
00200 "jnz 1f \n\t"
00201 "mov %a4(%2), %1 \n\t"
00202 "subl $0xFFFF, %%eax \n\t"
00203 "movzwl (%1), %%edx \n\t"
00204 "bswap %%edx \n\t"
00205 "shrl $15, %%edx \n\t"
00206 "add $2, %1 \n\t"
00207 "addl %%edx, %%eax \n\t"
00208 "mov %1, %a4(%2) \n\t"
00209 "1: \n\t"
00210 "movl %%eax, %a3(%2) \n\t"
00211
00212 : "+c"(val), "=&r"(tmp)
00213 : "r"(c),
00214 "i"(offsetof(CABACContext, low)),
00215 "i"(offsetof(CABACContext, bytestream)),
00216 "i"(offsetof(CABACContext, bytestream_end)),
00217 "i"(offsetof(CABACContext, range))
00218 : "%eax", "%edx", "memory"
00219 );
00220 return val;
00221 }
00222
00223 #endif