55 #define XMIN(a,b) ((a) < (b) ? (a) : (b))
59 { 0, 48, 12, 60, 3, 51, 15, 63, },
60 { 32, 16, 44, 28, 35, 19, 47, 31, },
61 { 8, 56, 4, 52, 11, 59, 7, 55, },
62 { 40, 24, 36, 20, 43, 27, 39, 23, },
63 { 2, 50, 14, 62, 1, 49, 13, 61, },
64 { 34, 18, 46, 30, 33, 17, 45, 29, },
65 { 10, 58, 6, 54, 9, 57, 5, 53, },
66 { 42, 26, 38, 22, 41, 25, 37, 21, },
72 {0,0}, {2,2}, {6,4}, {4,6},
73 {0,0}, {5,1}, {2,2}, {7,3}, {4,4}, {1,5}, {6,6}, {3,7},
75 {0,0}, {4,0}, {1,1}, {5,1}, {3,2}, {7,2}, {2,3}, {6,3},
76 {0,4}, {4,4}, {1,5}, {5,5}, {3,6}, {7,6}, {2,7}, {6,7},
78 {0,0}, {0,2}, {0,4}, {0,6}, {1,1}, {1,3}, {1,5}, {1,7},
79 {2,0}, {2,2}, {2,4}, {2,6}, {3,1}, {3,3}, {3,5}, {3,7},
80 {4,0}, {4,2}, {4,4}, {4,6}, {5,1}, {5,3}, {5,5}, {5,7},
81 {6,0}, {6,2}, {6,4}, {6,6}, {7,1}, {7,3}, {7,5}, {7,7},
83 {0,0}, {4,4}, {0,4}, {4,0}, {2,2}, {6,6}, {2,6}, {6,2},
84 {0,2}, {4,6}, {0,6}, {4,2}, {2,0}, {6,4}, {2,4}, {6,0},
85 {1,1}, {5,5}, {1,5}, {5,1}, {3,3}, {7,7}, {3,7}, {7,3},
86 {1,3}, {5,7}, {1,7}, {5,3}, {3,1}, {7,5}, {3,5}, {7,1},
87 {0,1}, {4,5}, {0,5}, {4,1}, {2,3}, {6,7}, {2,7}, {6,3},
88 {0,3}, {4,7}, {0,7}, {4,3}, {2,1}, {6,5}, {2,5}, {6,1},
89 {1,0}, {5,4}, {1,4}, {5,0}, {3,2}, {7,6}, {3,6}, {7,2},
90 {1,2}, {5,6}, {1,6}, {5,2}, {3,0}, {7,4}, {3,4}, {7,0},
111 unsigned int threshold1, threshold2;
113 threshold1= qp*((1<<4) - bias) - 1;
114 threshold2= (threshold1<<1);
116 memset(dst, 0, 64*
sizeof(
DCTELEM));
117 dst[0]= (src[0] + 4)>>3;
121 if(((
unsigned)(level+threshold1))>threshold2){
122 const int j= permutation[i];
123 dst[j]= (level + 4)>>3;
131 unsigned int threshold1, threshold2;
133 threshold1= qp*((1<<4) - bias) - 1;
134 threshold2= (threshold1<<1);
136 memset(dst, 0, 64*
sizeof(
DCTELEM));
137 dst[0]= (src[0] + 4)>>3;
141 if(((
unsigned)(level+threshold1))>threshold2){
142 const int j= permutation[i];
144 dst[j]= (level - threshold1 + 4)>>3;
146 dst[j]= (level + threshold1 + 4)>>3;
154 unsigned int threshold1;
156 threshold1= qp*((1<<4) - bias) - 1;
159 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
160 "movq " #src0 ", %%mm0 \n\t"\
161 "movq " #src1 ", %%mm1 \n\t"\
162 "movq " #src2 ", %%mm2 \n\t"\
163 "movq " #src3 ", %%mm3 \n\t"\
164 "psubw %%mm4, %%mm0 \n\t"\
165 "psubw %%mm4, %%mm1 \n\t"\
166 "psubw %%mm4, %%mm2 \n\t"\
167 "psubw %%mm4, %%mm3 \n\t"\
168 "paddusw %%mm5, %%mm0 \n\t"\
169 "paddusw %%mm5, %%mm1 \n\t"\
170 "paddusw %%mm5, %%mm2 \n\t"\
171 "paddusw %%mm5, %%mm3 \n\t"\
172 "paddw %%mm6, %%mm0 \n\t"\
173 "paddw %%mm6, %%mm1 \n\t"\
174 "paddw %%mm6, %%mm2 \n\t"\
175 "paddw %%mm6, %%mm3 \n\t"\
176 "psubusw %%mm6, %%mm0 \n\t"\
177 "psubusw %%mm6, %%mm1 \n\t"\
178 "psubusw %%mm6, %%mm2 \n\t"\
179 "psubusw %%mm6, %%mm3 \n\t"\
180 "psraw $3, %%mm0 \n\t"\
181 "psraw $3, %%mm1 \n\t"\
182 "psraw $3, %%mm2 \n\t"\
183 "psraw $3, %%mm3 \n\t"\
185 "movq %%mm0, %%mm7 \n\t"\
186 "punpcklwd %%mm2, %%mm0 \n\t" \
187 "punpckhwd %%mm2, %%mm7 \n\t" \
188 "movq %%mm1, %%mm2 \n\t"\
189 "punpcklwd %%mm3, %%mm1 \n\t" \
190 "punpckhwd %%mm3, %%mm2 \n\t" \
191 "movq %%mm0, %%mm3 \n\t"\
192 "punpcklwd %%mm1, %%mm0 \n\t" \
193 "punpckhwd %%mm7, %%mm3 \n\t" \
194 "punpcklwd %%mm2, %%mm7 \n\t" \
195 "punpckhwd %%mm2, %%mm1 \n\t" \
197 "movq %%mm0, " #dst0 " \n\t"\
198 "movq %%mm7, " #dst1 " \n\t"\
199 "movq %%mm3, " #dst2 " \n\t"\
200 "movq %%mm1, " #dst3 " \n\t"
202 "movd %2, %%mm4 \n\t"
203 "movd %3, %%mm5 \n\t"
204 "movd %4, %%mm6 \n\t"
205 "packssdw %%mm4, %%mm4 \n\t"
206 "packssdw %%mm5, %%mm5 \n\t"
207 "packssdw %%mm6, %%mm6 \n\t"
208 "packssdw %%mm4, %%mm4 \n\t"
209 "packssdw %%mm5, %%mm5 \n\t"
210 "packssdw %%mm6, %%mm6 \n\t"
211 REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0))
212 REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
213 REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
214 REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
215 : :
"r" (src),
"r" (dst),
"g" (threshold1+1),
"g" (threshold1+5),
"g" (threshold1-4)
217 dst[0]= (src[0] + 4)>>3;
222 unsigned int threshold1;
224 threshold1= qp*((1<<4) - bias) - 1;
228 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
229 "movq " #src0 ", %%mm0 \n\t"\
230 "movq " #src1 ", %%mm1 \n\t"\
231 "pxor %%mm6, %%mm6 \n\t"\
232 "pxor %%mm7, %%mm7 \n\t"\
233 "pcmpgtw %%mm0, %%mm6 \n\t"\
234 "pcmpgtw %%mm1, %%mm7 \n\t"\
235 "pxor %%mm6, %%mm0 \n\t"\
236 "pxor %%mm7, %%mm1 \n\t"\
237 "psubusw %%mm4, %%mm0 \n\t"\
238 "psubusw %%mm4, %%mm1 \n\t"\
239 "pxor %%mm6, %%mm0 \n\t"\
240 "pxor %%mm7, %%mm1 \n\t"\
241 "movq " #src2 ", %%mm2 \n\t"\
242 "movq " #src3 ", %%mm3 \n\t"\
243 "pxor %%mm6, %%mm6 \n\t"\
244 "pxor %%mm7, %%mm7 \n\t"\
245 "pcmpgtw %%mm2, %%mm6 \n\t"\
246 "pcmpgtw %%mm3, %%mm7 \n\t"\
247 "pxor %%mm6, %%mm2 \n\t"\
248 "pxor %%mm7, %%mm3 \n\t"\
249 "psubusw %%mm4, %%mm2 \n\t"\
250 "psubusw %%mm4, %%mm3 \n\t"\
251 "pxor %%mm6, %%mm2 \n\t"\
252 "pxor %%mm7, %%mm3 \n\t"\
254 "paddsw %%mm5, %%mm0 \n\t"\
255 "paddsw %%mm5, %%mm1 \n\t"\
256 "paddsw %%mm5, %%mm2 \n\t"\
257 "paddsw %%mm5, %%mm3 \n\t"\
258 "psraw $3, %%mm0 \n\t"\
259 "psraw $3, %%mm1 \n\t"\
260 "psraw $3, %%mm2 \n\t"\
261 "psraw $3, %%mm3 \n\t"\
263 "movq %%mm0, %%mm7 \n\t"\
264 "punpcklwd %%mm2, %%mm0 \n\t" \
265 "punpckhwd %%mm2, %%mm7 \n\t" \
266 "movq %%mm1, %%mm2 \n\t"\
267 "punpcklwd %%mm3, %%mm1 \n\t" \
268 "punpckhwd %%mm3, %%mm2 \n\t" \
269 "movq %%mm0, %%mm3 \n\t"\
270 "punpcklwd %%mm1, %%mm0 \n\t" \
271 "punpckhwd %%mm7, %%mm3 \n\t" \
272 "punpcklwd %%mm2, %%mm7 \n\t" \
273 "punpckhwd %%mm2, %%mm1 \n\t" \
275 "movq %%mm0, " #dst0 " \n\t"\
276 "movq %%mm7, " #dst1 " \n\t"\
277 "movq %%mm3, " #dst2 " \n\t"\
278 "movq %%mm1, " #dst3 " \n\t"
280 "movd %2, %%mm4 \n\t"
281 "movd %3, %%mm5 \n\t"
282 "packssdw %%mm4, %%mm4 \n\t"
283 "packssdw %%mm5, %%mm5 \n\t"
284 "packssdw %%mm4, %%mm4 \n\t"
285 "packssdw %%mm5, %%mm5 \n\t"
286 REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0))
287 REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
288 REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
289 REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
290 : :
"r" (src),
"r" (dst),
"g" (threshold1),
"rm" (4)
293 dst[0]= (src[0] + 4)>>3;
301 *(uint32_t*)&dst[0 + y*stride]+= *(uint32_t*)&block[0 + y*8];
302 *(uint32_t*)&dst[2 + y*stride]+= *(uint32_t*)&block[2 + y*8];
303 *(uint32_t*)&dst[4 + y*stride]+= *(uint32_t*)&block[4 + y*8];
304 *(uint32_t*)&dst[6 + y*stride]+= *(uint32_t*)&block[6 + y*8];
312 temp= ((src[x + y*src_stride + pos]<<log2_scale) + d[pos])>>6;\
313 if(temp & 0x100) temp= ~(temp>>31);\
314 dst[x + y*dst_stride + pos]= temp;
318 for(x=0; x<
width; x+=8){
340 "movq (%3), %%mm3 \n\t"
341 "movq (%3), %%mm4 \n\t"
342 "movd %4, %%mm2 \n\t"
343 "pxor %%mm0, %%mm0 \n\t"
344 "punpcklbw %%mm0, %%mm3 \n\t"
345 "punpckhbw %%mm0, %%mm4 \n\t"
346 "psraw %%mm2, %%mm3 \n\t"
347 "psraw %%mm2, %%mm4 \n\t"
348 "movd %5, %%mm2 \n\t"
350 "movq (%0), %%mm0 \n\t"
351 "movq 8(%0), %%mm1 \n\t"
352 "paddw %%mm3, %%mm0 \n\t"
353 "paddw %%mm4, %%mm1 \n\t"
354 "psraw %%mm2, %%mm0 \n\t"
355 "psraw %%mm2, %%mm1 \n\t"
356 "packuswb %%mm1, %%mm0 \n\t"
357 "movq %%mm0, (%1) \n\t"
362 :
"+r" (src1),
"+r"(dst1)
363 :
"r"(dst +
width),
"r"(
dither[y]),
"g"(log2_scale),
"g"(6-log2_scale)
381 uint64_t __attribute__((aligned(16))) block_align[32];
385 if (!src || !dst)
return;
390 p->
src[index - x - 1]= p->
src[index + x ];
391 p->
src[index + width + x ]= p->
src[index + width - x - 1];
400 for(y=0; y<height+8; y+=8){
401 memset(p->
temp + (8+y)*stride, 0, 8*stride*
sizeof(int16_t));
402 for(x=0; x<width+8; x+=8){
403 const int qps= 3 + is_luma;
409 qp= qp_store[ (
XMIN(x, width-1)>>qps) + (
XMIN(y, height-1)>>qps) * qp_stride];
412 for(i=0; i<count; i++){
413 const int x1= x + offset[i+count-1][0];
414 const int y1= y + offset[i+count-1][1];
428 for(x=0; x<
width; x++){
429 if((((x>>6) ^ (y>>6)) & 1) == 0)
430 dst[x + y*dst_stride]= p->
src[8 + 8*stride + x + y*
stride];
431 if((x&63) == 0 || (y&63)==0)
432 dst[x + y*dst_stride] += 128;
439 static int config(
struct vf_instance *vf,
440 int width,
int height,
int d_width,
int d_height,
441 unsigned int flags,
unsigned int outfmt){
442 int h= (height+16+15)&(~15);
444 vf->priv->temp_stride= (width+16+15)&(~15);
445 vf->priv->temp= malloc(vf->priv->temp_stride*h*
sizeof(int16_t));
446 vf->priv->src = malloc(vf->priv->temp_stride*h*
sizeof(
uint8_t));
456 mpi->
planes[0]=vf->dmpi->planes[0];
457 mpi->
stride[0]=vf->dmpi->stride[0];
458 mpi->
width=vf->dmpi->width;
460 mpi->
planes[1]=vf->dmpi->planes[1];
461 mpi->
planes[2]=vf->dmpi->planes[2];
462 mpi->
stride[1]=vf->dmpi->stride[1];
463 mpi->
stride[2]=vf->dmpi->stride[2];
485 int h = (mpi->
h + 15) >> 4;
487 w = (mpi->
w + 15) >> 4;
490 if(!vf->priv->non_b_qp)
491 vf->priv->non_b_qp= malloc(w*h);
495 char *qp_tab= vf->priv->non_b_qp;
496 if((vf->priv->mode&4) || !qp_tab)
499 if(qp_tab || vf->priv->qp){
520 static void uninit(
struct vf_instance *vf){
521 if(!vf->priv)
return;
523 free(vf->priv->temp);
524 vf->priv->temp=
NULL;
527 free(vf->priv->avctx);
528 vf->priv->avctx=
NULL;
529 free(vf->priv->non_b_qp);
530 vf->priv->non_b_qp=
NULL;
555 static int control(
struct vf_instance *vf,
int request,
void*
data){
560 vf->priv->log2_count= *((
unsigned int*)data);
581 vf->
priv->
avctx= avcodec_alloc_context();
586 if (args) sscanf(args,
"%d:%d:%d", &log2c, &vf->
priv->
qp, &vf->
priv->
mode);
588 if( log2c >=0 && log2c <=6 )
614 "simple postprocess",
616 "Michael Niedermayer",