00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00030 #include "avcodec.h"
00031 #include "dsputil.h"
00032 #include "mpegvideo.h"
00033
00034 #include "svq1.h"
00035 #include "svq1enc_cb.h"
00036
00037 #undef NDEBUG
00038 #include <assert.h>
00039
00040
00041 typedef struct SVQ1Context {
00042 MpegEncContext m;
00043 AVCodecContext *avctx;
00044 DSPContext dsp;
00045 AVFrame picture;
00046 AVFrame current_picture;
00047 AVFrame last_picture;
00048 PutBitContext pb;
00049 GetBitContext gb;
00050
00051 PutBitContext reorder_pb[6];
00052
00053 int frame_width;
00054 int frame_height;
00055
00056
00057 int y_block_width;
00058 int y_block_height;
00059
00060
00061 int c_block_width;
00062 int c_block_height;
00063
00064 uint16_t *mb_type;
00065 uint32_t *dummy;
00066 int16_t (*motion_val8[3])[2];
00067 int16_t (*motion_val16[3])[2];
00068
00069 int64_t rd_total;
00070
00071 uint8_t *scratchbuf;
00072 } SVQ1Context;
00073
00074 static void svq1_write_header(SVQ1Context *s, int frame_type)
00075 {
00076 int i;
00077
00078
00079 put_bits(&s->pb, 22, 0x20);
00080
00081
00082 put_bits(&s->pb, 8, 0x00);
00083
00084
00085 put_bits(&s->pb, 2, frame_type - 1);
00086
00087 if (frame_type == FF_I_TYPE) {
00088
00089
00090
00091
00092
00093
00094 put_bits(&s->pb, 5, 2);
00095
00096 for (i = 0; i < 7; i++)
00097 {
00098 if ((ff_svq1_frame_size_table[i].width == s->frame_width) &&
00099 (ff_svq1_frame_size_table[i].height == s->frame_height))
00100 {
00101 put_bits(&s->pb, 3, i);
00102 break;
00103 }
00104 }
00105
00106 if (i == 7)
00107 {
00108 put_bits(&s->pb, 3, 7);
00109 put_bits(&s->pb, 12, s->frame_width);
00110 put_bits(&s->pb, 12, s->frame_height);
00111 }
00112 }
00113
00114
00115 put_bits(&s->pb, 2, 0);
00116 }
00117
00118
00119 #define QUALITY_THRESHOLD 100
00120 #define THRESHOLD_MULTIPLIER 0.6
00121
00122 #if HAVE_ALTIVEC
00123 #undef vector
00124 #endif
00125
00126 static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){
00127 int count, y, x, i, j, split, best_mean, best_score, best_count;
00128 int best_vector[6];
00129 int block_sum[7]= {0, 0, 0, 0, 0, 0};
00130 int w= 2<<((level+2)>>1);
00131 int h= 2<<((level+1)>>1);
00132 int size=w*h;
00133 int16_t block[7][256];
00134 const int8_t *codebook_sum, *codebook;
00135 const uint16_t (*mean_vlc)[2];
00136 const uint8_t (*multistage_vlc)[2];
00137
00138 best_score=0;
00139
00140 if(intra){
00141 codebook_sum= svq1_intra_codebook_sum[level];
00142 codebook= ff_svq1_intra_codebooks[level];
00143 mean_vlc= ff_svq1_intra_mean_vlc;
00144 multistage_vlc= ff_svq1_intra_multistage_vlc[level];
00145 for(y=0; y<h; y++){
00146 for(x=0; x<w; x++){
00147 int v= src[x + y*stride];
00148 block[0][x + w*y]= v;
00149 best_score += v*v;
00150 block_sum[0] += v;
00151 }
00152 }
00153 }else{
00154 codebook_sum= svq1_inter_codebook_sum[level];
00155 codebook= ff_svq1_inter_codebooks[level];
00156 mean_vlc= ff_svq1_inter_mean_vlc + 256;
00157 multistage_vlc= ff_svq1_inter_multistage_vlc[level];
00158 for(y=0; y<h; y++){
00159 for(x=0; x<w; x++){
00160 int v= src[x + y*stride] - ref[x + y*stride];
00161 block[0][x + w*y]= v;
00162 best_score += v*v;
00163 block_sum[0] += v;
00164 }
00165 }
00166 }
00167
00168 best_count=0;
00169 best_score -= ((block_sum[0]*block_sum[0])>>(level+3));
00170 best_mean= (block_sum[0] + (size>>1)) >> (level+3);
00171
00172 if(level<4){
00173 for(count=1; count<7; count++){
00174 int best_vector_score= INT_MAX;
00175 int best_vector_sum=-999, best_vector_mean=-999;
00176 const int stage= count-1;
00177 const int8_t *vector;
00178
00179 for(i=0; i<16; i++){
00180 int sum= codebook_sum[stage*16 + i];
00181 int sqr, diff, score;
00182
00183 vector = codebook + stage*size*16 + i*size;
00184 sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size);
00185 diff= block_sum[stage] - sum;
00186 score= sqr - ((diff*(int64_t)diff)>>(level+3));
00187 if(score < best_vector_score){
00188 int mean= (diff + (size>>1)) >> (level+3);
00189 assert(mean >-300 && mean<300);
00190 mean= av_clip(mean, intra?0:-256, 255);
00191 best_vector_score= score;
00192 best_vector[stage]= i;
00193 best_vector_sum= sum;
00194 best_vector_mean= mean;
00195 }
00196 }
00197 assert(best_vector_mean != -999);
00198 vector= codebook + stage*size*16 + best_vector[stage]*size;
00199 for(j=0; j<size; j++){
00200 block[stage+1][j] = block[stage][j] - vector[j];
00201 }
00202 block_sum[stage+1]= block_sum[stage] - best_vector_sum;
00203 best_vector_score +=
00204 lambda*(+ 1 + 4*count
00205 + multistage_vlc[1+count][1]
00206 + mean_vlc[best_vector_mean][1]);
00207
00208 if(best_vector_score < best_score){
00209 best_score= best_vector_score;
00210 best_count= count;
00211 best_mean= best_vector_mean;
00212 }
00213 }
00214 }
00215
00216 split=0;
00217 if(best_score > threshold && level){
00218 int score=0;
00219 int offset= (level&1) ? stride*h/2 : w/2;
00220 PutBitContext backup[6];
00221
00222 for(i=level-1; i>=0; i--){
00223 backup[i]= s->reorder_pb[i];
00224 }
00225 score += encode_block(s, src , ref , decoded , stride, level-1, threshold>>1, lambda, intra);
00226 score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra);
00227 score += lambda;
00228
00229 if(score < best_score){
00230 best_score= score;
00231 split=1;
00232 }else{
00233 for(i=level-1; i>=0; i--){
00234 s->reorder_pb[i]= backup[i];
00235 }
00236 }
00237 }
00238 if (level > 0)
00239 put_bits(&s->reorder_pb[level], 1, split);
00240
00241 if(!split){
00242 assert((best_mean >= 0 && best_mean<256) || !intra);
00243 assert(best_mean >= -256 && best_mean<256);
00244 assert(best_count >=0 && best_count<7);
00245 assert(level<4 || best_count==0);
00246
00247
00248 put_bits(&s->reorder_pb[level],
00249 multistage_vlc[1 + best_count][1],
00250 multistage_vlc[1 + best_count][0]);
00251 put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1],
00252 mean_vlc[best_mean][0]);
00253
00254 for (i = 0; i < best_count; i++){
00255 assert(best_vector[i]>=0 && best_vector[i]<16);
00256 put_bits(&s->reorder_pb[level], 4, best_vector[i]);
00257 }
00258
00259 for(y=0; y<h; y++){
00260 for(x=0; x<w; x++){
00261 decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean;
00262 }
00263 }
00264 }
00265
00266 return best_score;
00267 }
00268
00269
00270 static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane,
00271 int width, int height, int src_stride, int stride)
00272 {
00273 int x, y;
00274 int i;
00275 int block_width, block_height;
00276 int level;
00277 int threshold[6];
00278 const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT);
00279
00280
00281 threshold[5] = QUALITY_THRESHOLD;
00282 for (level = 4; level >= 0; level--)
00283 threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER;
00284
00285 block_width = (width + 15) / 16;
00286 block_height = (height + 15) / 16;
00287
00288 if(s->picture.pict_type == FF_P_TYPE){
00289 s->m.avctx= s->avctx;
00290 s->m.current_picture_ptr= &s->m.current_picture;
00291 s->m.last_picture_ptr = &s->m.last_picture;
00292 s->m.last_picture.data[0]= ref_plane;
00293 s->m.linesize=
00294 s->m.last_picture.linesize[0]=
00295 s->m.new_picture.linesize[0]=
00296 s->m.current_picture.linesize[0]= stride;
00297 s->m.width= width;
00298 s->m.height= height;
00299 s->m.mb_width= block_width;
00300 s->m.mb_height= block_height;
00301 s->m.mb_stride= s->m.mb_width+1;
00302 s->m.b8_stride= 2*s->m.mb_width+1;
00303 s->m.f_code=1;
00304 s->m.pict_type= s->picture.pict_type;
00305 s->m.me_method= s->avctx->me_method;
00306 s->m.me.scene_change_score=0;
00307 s->m.flags= s->avctx->flags;
00308
00309
00310
00311 s->m.lambda= s->picture.quality;
00312 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
00313 s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
00314
00315 if(!s->motion_val8[plane]){
00316 s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t));
00317 s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t));
00318 }
00319
00320 s->m.mb_type= s->mb_type;
00321
00322
00323 s->m.current_picture.mb_mean= (uint8_t *)s->dummy;
00324 s->m.current_picture.mb_var= (uint16_t*)s->dummy;
00325 s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy;
00326 s->m.current_picture.mb_type= s->dummy;
00327
00328 s->m.current_picture.motion_val[0]= s->motion_val8[plane] + 2;
00329 s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1;
00330 s->m.dsp= s->dsp;
00331 ff_init_me(&s->m);
00332
00333 s->m.me.dia_size= s->avctx->dia_size;
00334 s->m.first_slice_line=1;
00335 for (y = 0; y < block_height; y++) {
00336 uint8_t src[stride*16];
00337
00338 s->m.new_picture.data[0]= src - y*16*stride;
00339 s->m.mb_y= y;
00340
00341 for(i=0; i<16 && i + 16*y<height; i++){
00342 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
00343 for(x=width; x<16*block_width; x++)
00344 src[i*stride+x]= src[i*stride+x-1];
00345 }
00346 for(; i<16 && i + 16*y<16*block_height; i++)
00347 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
00348
00349 for (x = 0; x < block_width; x++) {
00350 s->m.mb_x= x;
00351 ff_init_block_index(&s->m);
00352 ff_update_block_index(&s->m);
00353
00354 ff_estimate_p_frame_motion(&s->m, x, y);
00355 }
00356 s->m.first_slice_line=0;
00357 }
00358
00359 ff_fix_long_p_mvs(&s->m);
00360 ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0);
00361 }
00362
00363 s->m.first_slice_line=1;
00364 for (y = 0; y < block_height; y++) {
00365 uint8_t src[stride*16];
00366
00367 for(i=0; i<16 && i + 16*y<height; i++){
00368 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
00369 for(x=width; x<16*block_width; x++)
00370 src[i*stride+x]= src[i*stride+x-1];
00371 }
00372 for(; i<16 && i + 16*y<16*block_height; i++)
00373 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
00374
00375 s->m.mb_y= y;
00376 for (x = 0; x < block_width; x++) {
00377 uint8_t reorder_buffer[3][6][7*32];
00378 int count[3][6];
00379 int offset = y * 16 * stride + x * 16;
00380 uint8_t *decoded= decoded_plane + offset;
00381 uint8_t *ref= ref_plane + offset;
00382 int score[4]={0,0,0,0}, best;
00383 uint8_t *temp = s->scratchbuf;
00384
00385 if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){
00386 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
00387 return -1;
00388 }
00389
00390 s->m.mb_x= x;
00391 ff_init_block_index(&s->m);
00392 ff_update_block_index(&s->m);
00393
00394 if(s->picture.pict_type == FF_I_TYPE || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){
00395 for(i=0; i<6; i++){
00396 init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32);
00397 }
00398 if(s->picture.pict_type == FF_P_TYPE){
00399 const uint8_t *vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
00400 put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
00401 score[0]= vlc[1]*lambda;
00402 }
00403 score[0]+= encode_block(s, src+16*x, NULL, temp, stride, 5, 64, lambda, 1);
00404 for(i=0; i<6; i++){
00405 count[0][i]= put_bits_count(&s->reorder_pb[i]);
00406 flush_put_bits(&s->reorder_pb[i]);
00407 }
00408 }else
00409 score[0]= INT_MAX;
00410
00411 best=0;
00412
00413 if(s->picture.pict_type == FF_P_TYPE){
00414 const uint8_t *vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_INTER];
00415 int mx, my, pred_x, pred_y, dxy;
00416 int16_t *motion_ptr;
00417
00418 motion_ptr= h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y);
00419 if(s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER){
00420 for(i=0; i<6; i++)
00421 init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32);
00422
00423 put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
00424
00425 s->m.pb= s->reorder_pb[5];
00426 mx= motion_ptr[0];
00427 my= motion_ptr[1];
00428 assert(mx>=-32 && mx<=31);
00429 assert(my>=-32 && my<=31);
00430 assert(pred_x>=-32 && pred_x<=31);
00431 assert(pred_y>=-32 && pred_y<=31);
00432 ff_h263_encode_motion(&s->m, mx - pred_x, 1);
00433 ff_h263_encode_motion(&s->m, my - pred_y, 1);
00434 s->reorder_pb[5]= s->m.pb;
00435 score[1] += lambda*put_bits_count(&s->reorder_pb[5]);
00436
00437 dxy= (mx&1) + 2*(my&1);
00438
00439 s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16);
00440
00441 score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0);
00442 best= score[1] <= score[0];
00443
00444 vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_SKIP];
00445 score[2]= s->dsp.sse[0](NULL, src+16*x, ref, stride, 16);
00446 score[2]+= vlc[1]*lambda;
00447 if(score[2] < score[best] && mx==0 && my==0){
00448 best=2;
00449 s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16);
00450 for(i=0; i<6; i++){
00451 count[2][i]=0;
00452 }
00453 put_bits(&s->pb, vlc[1], vlc[0]);
00454 }
00455 }
00456
00457 if(best==1){
00458 for(i=0; i<6; i++){
00459 count[1][i]= put_bits_count(&s->reorder_pb[i]);
00460 flush_put_bits(&s->reorder_pb[i]);
00461 }
00462 }else{
00463 motion_ptr[0 ] = motion_ptr[1 ]=
00464 motion_ptr[2 ] = motion_ptr[3 ]=
00465 motion_ptr[0+2*s->m.b8_stride] = motion_ptr[1+2*s->m.b8_stride]=
00466 motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0;
00467 }
00468 }
00469
00470 s->rd_total += score[best];
00471
00472 for(i=5; i>=0; i--){
00473 ff_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]);
00474 }
00475 if(best==0){
00476 s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16);
00477 }
00478 }
00479 s->m.first_slice_line=0;
00480 }
00481 return 0;
00482 }
00483
00484 static av_cold int svq1_encode_init(AVCodecContext *avctx)
00485 {
00486 SVQ1Context * const s = avctx->priv_data;
00487
00488 dsputil_init(&s->dsp, avctx);
00489 avctx->coded_frame= (AVFrame*)&s->picture;
00490
00491 s->frame_width = avctx->width;
00492 s->frame_height = avctx->height;
00493
00494 s->y_block_width = (s->frame_width + 15) / 16;
00495 s->y_block_height = (s->frame_height + 15) / 16;
00496
00497 s->c_block_width = (s->frame_width / 4 + 15) / 16;
00498 s->c_block_height = (s->frame_height / 4 + 15) / 16;
00499
00500 s->avctx= avctx;
00501 s->m.avctx= avctx;
00502 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
00503 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
00504 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
00505 s->mb_type = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t));
00506 s->dummy = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t));
00507 h263_encode_init(&s->m);
00508
00509 return 0;
00510 }
00511
00512 static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
00513 int buf_size, void *data)
00514 {
00515 SVQ1Context * const s = avctx->priv_data;
00516 AVFrame *pict = data;
00517 AVFrame * const p= (AVFrame*)&s->picture;
00518 AVFrame temp;
00519 int i;
00520
00521 if(avctx->pix_fmt != PIX_FMT_YUV410P){
00522 av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
00523 return -1;
00524 }
00525
00526 if(!s->current_picture.data[0]){
00527 avctx->get_buffer(avctx, &s->current_picture);
00528 avctx->get_buffer(avctx, &s->last_picture);
00529 s->scratchbuf = av_malloc(s->current_picture.linesize[0] * 16);
00530 }
00531
00532 temp= s->current_picture;
00533 s->current_picture= s->last_picture;
00534 s->last_picture= temp;
00535
00536 init_put_bits(&s->pb, buf, buf_size);
00537
00538 *p = *pict;
00539 p->pict_type = avctx->gop_size && avctx->frame_number % avctx->gop_size ? FF_P_TYPE : FF_I_TYPE;
00540 p->key_frame = p->pict_type == FF_I_TYPE;
00541
00542 svq1_write_header(s, p->pict_type);
00543 for(i=0; i<3; i++){
00544 if(svq1_encode_plane(s, i,
00545 s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i],
00546 s->frame_width / (i?4:1), s->frame_height / (i?4:1),
00547 s->picture.linesize[i], s->current_picture.linesize[i]) < 0)
00548 return -1;
00549 }
00550
00551
00552 while(put_bits_count(&s->pb) & 31)
00553 put_bits(&s->pb, 1, 0);
00554
00555 flush_put_bits(&s->pb);
00556
00557 return put_bits_count(&s->pb) / 8;
00558 }
00559
00560 static av_cold int svq1_encode_end(AVCodecContext *avctx)
00561 {
00562 SVQ1Context * const s = avctx->priv_data;
00563 int i;
00564
00565 av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number));
00566
00567 av_freep(&s->m.me.scratchpad);
00568 av_freep(&s->m.me.map);
00569 av_freep(&s->m.me.score_map);
00570 av_freep(&s->mb_type);
00571 av_freep(&s->dummy);
00572 av_freep(&s->scratchbuf);
00573
00574 for(i=0; i<3; i++){
00575 av_freep(&s->motion_val8[i]);
00576 av_freep(&s->motion_val16[i]);
00577 }
00578
00579 return 0;
00580 }
00581
00582
00583 AVCodec svq1_encoder = {
00584 "svq1",
00585 CODEC_TYPE_VIDEO,
00586 CODEC_ID_SVQ1,
00587 sizeof(SVQ1Context),
00588 svq1_encode_init,
00589 svq1_encode_frame,
00590 svq1_encode_end,
00591 .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, PIX_FMT_NONE},
00592 .long_name= NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1"),
00593 };