00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00030 #include "avcodec.h"
00031 #include "dsputil.h"
00032 #include "mpegvideo.h"
00033 #include "h263.h"
00034 #include "internal.h"
00035 #include "libavutil/avassert.h"
00036 
00037 #include "svq1.h"
00038 #include "svq1enc_cb.h"
00039 
00040 
00041 
00042 typedef struct SVQ1Context {
00043     MpegEncContext m; 
00044     AVCodecContext *avctx;
00045     DSPContext dsp;
00046     AVFrame picture;
00047     AVFrame current_picture;
00048     AVFrame last_picture;
00049     PutBitContext pb;
00050     GetBitContext gb;
00051 
00052     PutBitContext reorder_pb[6]; 
00053 
00054     int frame_width;
00055     int frame_height;
00056 
00057     
00058     int y_block_width;
00059     int y_block_height;
00060 
00061     
00062     int c_block_width;
00063     int c_block_height;
00064 
00065     uint16_t *mb_type;
00066     uint32_t *dummy;
00067     int16_t (*motion_val8[3])[2];
00068     int16_t (*motion_val16[3])[2];
00069 
00070     int64_t rd_total;
00071 
00072     uint8_t *scratchbuf;
00073 } SVQ1Context;
00074 
00075 static void svq1_write_header(SVQ1Context *s, int frame_type)
00076 {
00077     int i;
00078 
00079     
00080     put_bits(&s->pb, 22, 0x20);
00081 
00082     
00083     put_bits(&s->pb, 8, 0x00);
00084 
00085     
00086     put_bits(&s->pb, 2, frame_type - 1);
00087 
00088     if (frame_type == AV_PICTURE_TYPE_I) {
00089 
00090         
00091 
00092         
00093 
00094         
00095         put_bits(&s->pb, 5, 2); 
00096 
00097         i= ff_match_2uint16((void*)ff_svq1_frame_size_table, FF_ARRAY_ELEMS(ff_svq1_frame_size_table), s->frame_width, s->frame_height);
00098         put_bits(&s->pb, 3, i);
00099 
00100         if (i == 7)
00101         {
00102                 put_bits(&s->pb, 12, s->frame_width);
00103                 put_bits(&s->pb, 12, s->frame_height);
00104         }
00105     }
00106 
00107     
00108     put_bits(&s->pb, 2, 0);
00109 }
00110 
00111 
00112 #define QUALITY_THRESHOLD 100
00113 #define THRESHOLD_MULTIPLIER 0.6
00114 
00115 static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){
00116     int count, y, x, i, j, split, best_mean, best_score, best_count;
00117     int best_vector[6];
00118     int block_sum[7]= {0, 0, 0, 0, 0, 0};
00119     int w= 2<<((level+2)>>1);
00120     int h= 2<<((level+1)>>1);
00121     int size=w*h;
00122     int16_t block[7][256];
00123     const int8_t *codebook_sum, *codebook;
00124     const uint16_t (*mean_vlc)[2];
00125     const uint8_t (*multistage_vlc)[2];
00126 
00127     best_score=0;
00128     
00129     if(intra){
00130         codebook_sum= svq1_intra_codebook_sum[level];
00131         codebook= ff_svq1_intra_codebooks[level];
00132         mean_vlc= ff_svq1_intra_mean_vlc;
00133         multistage_vlc= ff_svq1_intra_multistage_vlc[level];
00134         for(y=0; y<h; y++){
00135             for(x=0; x<w; x++){
00136                 int v= src[x + y*stride];
00137                 block[0][x + w*y]= v;
00138                 best_score += v*v;
00139                 block_sum[0] += v;
00140             }
00141         }
00142     }else{
00143         codebook_sum= svq1_inter_codebook_sum[level];
00144         codebook= ff_svq1_inter_codebooks[level];
00145         mean_vlc= ff_svq1_inter_mean_vlc + 256;
00146         multistage_vlc= ff_svq1_inter_multistage_vlc[level];
00147         for(y=0; y<h; y++){
00148             for(x=0; x<w; x++){
00149                 int v= src[x + y*stride] - ref[x + y*stride];
00150                 block[0][x + w*y]= v;
00151                 best_score += v*v;
00152                 block_sum[0] += v;
00153             }
00154         }
00155     }
00156 
00157     best_count=0;
00158     best_score -= (int)(((unsigned)block_sum[0]*block_sum[0])>>(level+3));
00159     best_mean= (block_sum[0] + (size>>1)) >> (level+3);
00160 
00161     if(level<4){
00162         for(count=1; count<7; count++){
00163             int best_vector_score= INT_MAX;
00164             int best_vector_sum=-999, best_vector_mean=-999;
00165             const int stage= count-1;
00166             const int8_t *vector;
00167 
00168             for(i=0; i<16; i++){
00169                 int sum= codebook_sum[stage*16 + i];
00170                 int sqr, diff, score;
00171 
00172                 vector = codebook + stage*size*16 + i*size;
00173                 sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size);
00174                 diff= block_sum[stage] - sum;
00175                 score= sqr - ((diff*(int64_t)diff)>>(level+3)); 
00176                 if(score < best_vector_score){
00177                     int mean= (diff + (size>>1)) >> (level+3);
00178                     av_assert2(mean >-300 && mean<300);
00179                     mean= av_clip(mean, intra?0:-256, 255);
00180                     best_vector_score= score;
00181                     best_vector[stage]= i;
00182                     best_vector_sum= sum;
00183                     best_vector_mean= mean;
00184                 }
00185             }
00186             av_assert0(best_vector_mean != -999);
00187             vector= codebook + stage*size*16 + best_vector[stage]*size;
00188             for(j=0; j<size; j++){
00189                 block[stage+1][j] = block[stage][j] - vector[j];
00190             }
00191             block_sum[stage+1]= block_sum[stage] - best_vector_sum;
00192             best_vector_score +=
00193                 lambda*(+ 1 + 4*count
00194                         + multistage_vlc[1+count][1]
00195                         + mean_vlc[best_vector_mean][1]);
00196 
00197             if(best_vector_score < best_score){
00198                 best_score= best_vector_score;
00199                 best_count= count;
00200                 best_mean= best_vector_mean;
00201             }
00202         }
00203     }
00204 
00205     split=0;
00206     if(best_score > threshold && level){
00207         int score=0;
00208         int offset= (level&1) ? stride*h/2 : w/2;
00209         PutBitContext backup[6];
00210 
00211         for(i=level-1; i>=0; i--){
00212             backup[i]= s->reorder_pb[i];
00213         }
00214         score += encode_block(s, src         , ref         , decoded         , stride, level-1, threshold>>1, lambda, intra);
00215         score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra);
00216         score += lambda;
00217 
00218         if(score < best_score){
00219             best_score= score;
00220             split=1;
00221         }else{
00222             for(i=level-1; i>=0; i--){
00223                 s->reorder_pb[i]= backup[i];
00224             }
00225         }
00226     }
00227     if (level > 0)
00228         put_bits(&s->reorder_pb[level], 1, split);
00229 
00230     if(!split){
00231         av_assert1((best_mean >= 0 && best_mean<256) || !intra);
00232         av_assert1(best_mean >= -256 && best_mean<256);
00233         av_assert1(best_count >=0 && best_count<7);
00234         av_assert1(level<4 || best_count==0);
00235 
00236         
00237         put_bits(&s->reorder_pb[level],
00238             multistage_vlc[1 + best_count][1],
00239             multistage_vlc[1 + best_count][0]);
00240         put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1],
00241             mean_vlc[best_mean][0]);
00242 
00243         for (i = 0; i < best_count; i++){
00244             av_assert2(best_vector[i]>=0 && best_vector[i]<16);
00245             put_bits(&s->reorder_pb[level], 4, best_vector[i]);
00246         }
00247 
00248         for(y=0; y<h; y++){
00249             for(x=0; x<w; x++){
00250                 decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean;
00251             }
00252         }
00253     }
00254 
00255     return best_score;
00256 }
00257 
00258 
00259 static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane,
00260     int width, int height, int src_stride, int stride)
00261 {
00262     int x, y;
00263     int i;
00264     int block_width, block_height;
00265     int level;
00266     int threshold[6];
00267     uint8_t *src = s->scratchbuf + stride * 16;
00268     const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT);
00269 
00270     
00271     threshold[5] = QUALITY_THRESHOLD;
00272     for (level = 4; level >= 0; level--)
00273         threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER;
00274 
00275     block_width = (width + 15) / 16;
00276     block_height = (height + 15) / 16;
00277 
00278     if(s->picture.pict_type == AV_PICTURE_TYPE_P){
00279         s->m.avctx= s->avctx;
00280         s->m.current_picture_ptr= &s->m.current_picture;
00281         s->m.last_picture_ptr   = &s->m.last_picture;
00282         s->m.last_picture.f.data[0] = ref_plane;
00283         s->m.linesize=
00284         s->m.last_picture.f.linesize[0] =
00285         s->m.new_picture.f.linesize[0] =
00286         s->m.current_picture.f.linesize[0] = stride;
00287         s->m.width= width;
00288         s->m.height= height;
00289         s->m.mb_width= block_width;
00290         s->m.mb_height= block_height;
00291         s->m.mb_stride= s->m.mb_width+1;
00292         s->m.b8_stride= 2*s->m.mb_width+1;
00293         s->m.f_code=1;
00294         s->m.pict_type= s->picture.pict_type;
00295         s->m.me_method= s->avctx->me_method;
00296         s->m.me.scene_change_score=0;
00297         s->m.flags= s->avctx->flags;
00298 
00299 
00300 
00301         s->m.lambda= s->picture.quality;
00302         s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
00303         s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
00304 
00305         if(!s->motion_val8[plane]){
00306             s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t));
00307             s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t));
00308         }
00309 
00310         s->m.mb_type= s->mb_type;
00311 
00312         
00313         s->m.current_picture.mb_mean=   (uint8_t *)s->dummy;
00314         s->m.current_picture.mb_var=    (uint16_t*)s->dummy;
00315         s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy;
00316         s->m.current_picture.f.mb_type = s->dummy;
00317 
00318         s->m.current_picture.f.motion_val[0] = s->motion_val8[plane] + 2;
00319         s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1;
00320         s->m.dsp= s->dsp; 
00321         ff_init_me(&s->m);
00322 
00323         s->m.me.dia_size= s->avctx->dia_size;
00324         s->m.first_slice_line=1;
00325         for (y = 0; y < block_height; y++) {
00326             s->m.new_picture.f.data[0] = src - y*16*stride; 
00327             s->m.mb_y= y;
00328 
00329             for(i=0; i<16 && i + 16*y<height; i++){
00330                 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
00331                 for(x=width; x<16*block_width; x++)
00332                     src[i*stride+x]= src[i*stride+x-1];
00333             }
00334             for(; i<16 && i + 16*y<16*block_height; i++)
00335                 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
00336 
00337             for (x = 0; x < block_width; x++) {
00338                 s->m.mb_x= x;
00339                 ff_init_block_index(&s->m);
00340                 ff_update_block_index(&s->m);
00341 
00342                 ff_estimate_p_frame_motion(&s->m, x, y);
00343             }
00344             s->m.first_slice_line=0;
00345         }
00346 
00347         ff_fix_long_p_mvs(&s->m);
00348         ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0);
00349     }
00350 
00351     s->m.first_slice_line=1;
00352     for (y = 0; y < block_height; y++) {
00353         for(i=0; i<16 && i + 16*y<height; i++){
00354             memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
00355             for(x=width; x<16*block_width; x++)
00356                 src[i*stride+x]= src[i*stride+x-1];
00357         }
00358         for(; i<16 && i + 16*y<16*block_height; i++)
00359             memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
00360 
00361         s->m.mb_y= y;
00362         for (x = 0; x < block_width; x++) {
00363             uint8_t reorder_buffer[3][6][7*32];
00364             int count[3][6];
00365             int offset = y * 16 * stride + x * 16;
00366             uint8_t *decoded= decoded_plane + offset;
00367             uint8_t *ref= ref_plane + offset;
00368             int score[4]={0,0,0,0}, best;
00369             uint8_t *temp = s->scratchbuf;
00370 
00371             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){ 
00372                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
00373                 return -1;
00374             }
00375 
00376             s->m.mb_x= x;
00377             ff_init_block_index(&s->m);
00378             ff_update_block_index(&s->m);
00379 
00380             if(s->picture.pict_type == AV_PICTURE_TYPE_I || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){
00381                 for(i=0; i<6; i++){
00382                     init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32);
00383                 }
00384                 if(s->picture.pict_type == AV_PICTURE_TYPE_P){
00385                     const uint8_t *vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
00386                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
00387                     score[0]= vlc[1]*lambda;
00388                 }
00389                 score[0]+= encode_block(s, src+16*x, NULL, temp, stride, 5, 64, lambda, 1);
00390                 for(i=0; i<6; i++){
00391                     count[0][i]= put_bits_count(&s->reorder_pb[i]);
00392                     flush_put_bits(&s->reorder_pb[i]);
00393                 }
00394             }else
00395                 score[0]= INT_MAX;
00396 
00397             best=0;
00398 
00399             if(s->picture.pict_type == AV_PICTURE_TYPE_P){
00400                 const uint8_t *vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_INTER];
00401                 int mx, my, pred_x, pred_y, dxy;
00402                 int16_t *motion_ptr;
00403 
00404                 motion_ptr= ff_h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y);
00405                 if(s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER){
00406                     for(i=0; i<6; i++)
00407                         init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32);
00408 
00409                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
00410 
00411                     s->m.pb= s->reorder_pb[5];
00412                     mx= motion_ptr[0];
00413                     my= motion_ptr[1];
00414                     av_assert1(mx>=-32 && mx<=31);
00415                     av_assert1(my>=-32 && my<=31);
00416                     av_assert1(pred_x>=-32 && pred_x<=31);
00417                     av_assert1(pred_y>=-32 && pred_y<=31);
00418                     ff_h263_encode_motion(&s->m, mx - pred_x, 1);
00419                     ff_h263_encode_motion(&s->m, my - pred_y, 1);
00420                     s->reorder_pb[5]= s->m.pb;
00421                     score[1] += lambda*put_bits_count(&s->reorder_pb[5]);
00422 
00423                     dxy= (mx&1) + 2*(my&1);
00424 
00425                     s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16);
00426 
00427                     score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0);
00428                     best= score[1] <= score[0];
00429 
00430                     vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_SKIP];
00431                     score[2]= s->dsp.sse[0](NULL, src+16*x, ref, stride, 16);
00432                     score[2]+= vlc[1]*lambda;
00433                     if(score[2] < score[best] && mx==0 && my==0){
00434                         best=2;
00435                         s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16);
00436                         for(i=0; i<6; i++){
00437                             count[2][i]=0;
00438                         }
00439                         put_bits(&s->pb, vlc[1], vlc[0]);
00440                     }
00441                 }
00442 
00443                 if(best==1){
00444                     for(i=0; i<6; i++){
00445                         count[1][i]= put_bits_count(&s->reorder_pb[i]);
00446                         flush_put_bits(&s->reorder_pb[i]);
00447                     }
00448                 }else{
00449                     motion_ptr[0                 ] = motion_ptr[1                 ]=
00450                     motion_ptr[2                 ] = motion_ptr[3                 ]=
00451                     motion_ptr[0+2*s->m.b8_stride] = motion_ptr[1+2*s->m.b8_stride]=
00452                     motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0;
00453                 }
00454             }
00455 
00456             s->rd_total += score[best];
00457 
00458             for(i=5; i>=0; i--){
00459                 avpriv_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]);
00460             }
00461             if(best==0){
00462                 s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16);
00463             }
00464         }
00465         s->m.first_slice_line=0;
00466     }
00467     return 0;
00468 }
00469 
00470 static av_cold int svq1_encode_init(AVCodecContext *avctx)
00471 {
00472     SVQ1Context * const s = avctx->priv_data;
00473 
00474     ff_dsputil_init(&s->dsp, avctx);
00475     avctx->coded_frame = &s->picture;
00476 
00477     s->frame_width = avctx->width;
00478     s->frame_height = avctx->height;
00479 
00480     s->y_block_width = (s->frame_width + 15) / 16;
00481     s->y_block_height = (s->frame_height + 15) / 16;
00482 
00483     s->c_block_width = (s->frame_width / 4 + 15) / 16;
00484     s->c_block_height = (s->frame_height / 4 + 15) / 16;
00485 
00486     s->avctx= avctx;
00487     s->m.avctx= avctx;
00488     s->m.picture_structure = PICT_FRAME;
00489     s->m.me.temp      =
00490     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
00491     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
00492     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
00493     s->mb_type        = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t));
00494     s->dummy          = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t));
00495     ff_h263_encode_init(&s->m); 
00496 
00497     return 0;
00498 }
00499 
00500 static int svq1_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
00501                              const AVFrame *pict, int *got_packet)
00502 {
00503     SVQ1Context * const s = avctx->priv_data;
00504     AVFrame * const p = &s->picture;
00505     AVFrame temp;
00506     int i, ret;
00507 
00508     if ((ret = ff_alloc_packet2(avctx, pkt, s->y_block_width*s->y_block_height*MAX_MB_BYTES*3 + FF_MIN_BUFFER_SIZE) < 0))
00509         return ret;
00510 
00511     if(avctx->pix_fmt != PIX_FMT_YUV410P){
00512         av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
00513         return -1;
00514     }
00515 
00516     if(!s->current_picture.data[0]){
00517         avctx->get_buffer(avctx, &s->current_picture);
00518         avctx->get_buffer(avctx, &s->last_picture);
00519         s->scratchbuf = av_malloc(s->current_picture.linesize[0] * 16 * 2);
00520     }
00521 
00522     temp= s->current_picture;
00523     s->current_picture= s->last_picture;
00524     s->last_picture= temp;
00525 
00526     init_put_bits(&s->pb, pkt->data, pkt->size);
00527 
00528     *p = *pict;
00529     p->pict_type = avctx->gop_size && avctx->frame_number % avctx->gop_size ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I;
00530     p->key_frame = p->pict_type == AV_PICTURE_TYPE_I;
00531 
00532     svq1_write_header(s, p->pict_type);
00533     for(i=0; i<3; i++){
00534         if(svq1_encode_plane(s, i,
00535             s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i],
00536             s->frame_width / (i?4:1), s->frame_height / (i?4:1),
00537             s->picture.linesize[i], s->current_picture.linesize[i]) < 0)
00538                 return -1;
00539     }
00540 
00541 
00542     while(put_bits_count(&s->pb) & 31)
00543         put_bits(&s->pb, 1, 0);
00544 
00545     flush_put_bits(&s->pb);
00546 
00547     pkt->size = put_bits_count(&s->pb) / 8;
00548     if (p->pict_type == AV_PICTURE_TYPE_I)
00549         pkt->flags |= AV_PKT_FLAG_KEY;
00550     *got_packet = 1;
00551 
00552     return 0;
00553 }
00554 
00555 static av_cold int svq1_encode_end(AVCodecContext *avctx)
00556 {
00557     SVQ1Context * const s = avctx->priv_data;
00558     int i;
00559 
00560     av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number));
00561 
00562     av_freep(&s->m.me.scratchpad);
00563     av_freep(&s->m.me.map);
00564     av_freep(&s->m.me.score_map);
00565     av_freep(&s->mb_type);
00566     av_freep(&s->dummy);
00567     av_freep(&s->scratchbuf);
00568 
00569     for(i=0; i<3; i++){
00570         av_freep(&s->motion_val8[i]);
00571         av_freep(&s->motion_val16[i]);
00572     }
00573     if(s->current_picture.data[0])
00574         avctx->release_buffer(avctx, &s->current_picture);
00575     if(s->last_picture.data[0])
00576         avctx->release_buffer(avctx, &s->last_picture);
00577 
00578     return 0;
00579 }
00580 
00581 
00582 AVCodec ff_svq1_encoder = {
00583     .name           = "svq1",
00584     .type           = AVMEDIA_TYPE_VIDEO,
00585     .id             = AV_CODEC_ID_SVQ1,
00586     .priv_data_size = sizeof(SVQ1Context),
00587     .init           = svq1_encode_init,
00588     .encode2        = svq1_encode_frame,
00589     .close          = svq1_encode_end,
00590     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV410P, PIX_FMT_NONE },
00591     .long_name      = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1 / Sorenson Video 1 / SVQ1"),
00592 };