Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
diff libavcodec/h264_mc.c @ 2:897f711a7157
rearrange to work with autoconf
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 25 Sep 2012 15:55:33 +0200 |
| parents | |
| children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libavcodec/h264_mc.c Tue Sep 25 15:55:33 2012 +0200 1.3 @@ -0,0 +1,272 @@ 1.4 +#include "h264_types.h" 1.5 +#include "h264_data.h" 1.6 + 1.7 +static inline void mc_dir_part(MBRecContext *d, MBRecState *mrs, H264Mb *m, DecodedPicture *pic, int n, int square, 1.8 + int chroma_height, int delta, int list,uint8_t *dest_y, 1.9 + uint8_t *dest_cb, uint8_t *dest_cr, int src_x_offset, int src_y_offset, 1.10 + qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){ 1.11 + const int mx= mrs->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; 1.12 + const int my= mrs->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; 1.13 + const int luma_xy= (mx&3) + ((my&3)<<2); 1.14 + const int pic_width = 16*d->mb_width; 1.15 + const int pic_height = 16*d->mb_height; 1.16 + 1.17 + uint8_t *src_y, *src_cb, *src_cr; 1.18 + int ymx= mx>>2; 1.19 + int ymy= my>>2; 1.20 + int cmy= my>>3; 1.21 + int cmx= mx>>3; 1.22 + 1.23 + //truncate the motion vectors references 1.24 + if(ymy>= pic_height+2){ 1.25 + ymy=pic_height+1; 1.26 + }else if(ymy <=-19){ 1.27 + ymy=-18; 1.28 + } 1.29 + if(ymx>= pic_width+2){ 1.30 + ymx= pic_width+1; 1.31 + }else if(ymx<=-19){ 1.32 + ymx=-19; 1.33 + } 1.34 + 1.35 + src_y = pic->data[0] + ymx + ymy*d->linesize; 1.36 + qpix_op[luma_xy](dest_y, src_y, d->linesize); //FIXME try variable height perhaps? 1.37 + if(!square){ 1.38 + qpix_op[luma_xy](dest_y + delta, src_y + delta, d->linesize); 1.39 + } 1.40 + 1.41 + if(cmy >= pic_height>>1){ 1.42 + cmy = (pic_height>>1) -1; 1.43 + }else if(cmy<=-9){ 1.44 + cmy=-8; 1.45 + } 1.46 + if(cmx >= pic_width>>1){ 1.47 + cmx = (pic_width>>1) -1; 1.48 + }else if(cmx<=-9){ 1.49 + cmx=-8; 1.50 + } 1.51 + 1.52 + src_cb= pic->data[1] + cmx + cmy*d->uvlinesize; 1.53 + src_cr= pic->data[2] + cmx + cmy*d->uvlinesize; 1.54 + 1.55 + chroma_op(dest_cb, src_cb, d->uvlinesize, chroma_height, mx&7, my&7); 1.56 + chroma_op(dest_cr, src_cr, d->uvlinesize, chroma_height, mx&7, my&7); 1.57 +} 1.58 + 1.59 +static inline void mc_part_std(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int n, int square, int chroma_height, int delta, 1.60 + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 1.61 + int x_offset, int y_offset, 1.62 + qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 1.63 + qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, 1.64 + int list0, int list1){ 1.65 + qpel_mc_func *qpix_op= qpix_put; 1.66 + h264_chroma_mc_func chroma_op= chroma_put; 1.67 + 1.68 + dest_y += 2*x_offset + 2*y_offset*d-> linesize; 1.69 + dest_cb += x_offset + y_offset*d->uvlinesize; 1.70 + dest_cr += x_offset + y_offset*d->uvlinesize; 1.71 + x_offset += 8*m->mb_x; 1.72 + y_offset += 8*m->mb_y; 1.73 + 1.74 + if(list0){ 1.75 + DecodedPicture *ref= s->dp_ref_list[0][ mrs->ref_cache[0][ scan8[n] ] ]; 1.76 + mc_dir_part(d, mrs, m, ref, n, square, chroma_height, delta, 0, 1.77 + dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_op, chroma_op); 1.78 + 1.79 + qpix_op= qpix_avg; 1.80 + chroma_op= chroma_avg; 1.81 + } 1.82 + 1.83 + if(list1){ 1.84 + DecodedPicture *ref= s->dp_ref_list[1][ mrs->ref_cache[1][ scan8[n] ] ]; 1.85 + mc_dir_part(d, mrs, m, ref, n, square, chroma_height, delta, 1, 1.86 + dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_op, chroma_op); 1.87 + } 1.88 +} 1.89 + 1.90 +static inline void mc_part_weighted(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int n, int square, int chroma_height, int delta, 1.91 + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 1.92 + int x_offset, int y_offset, 1.93 + qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 1.94 + h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, 1.95 + h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, 1.96 + int list0, int list1){ 1.97 + dest_y += 2*x_offset + 2*y_offset*d-> linesize; 1.98 + dest_cb += x_offset + y_offset*d->uvlinesize; 1.99 + dest_cr += x_offset + y_offset*d->uvlinesize; 1.100 + x_offset += 8*m->mb_x; 1.101 + y_offset += 8*m->mb_y; 1.102 + 1.103 + if(list0 && list1){ 1.104 + /* don't optimize for luma-only case, since B-frames usually 1.105 + * use implicit weights => chroma too. */ 1.106 + uint8_t *tmp_y = d->scratchpad_y + 2*x_offset +16 ; 1.107 + uint8_t *tmp_cb = d->scratchpad_cb + x_offset + 8; 1.108 + uint8_t *tmp_cr = d->scratchpad_cr + x_offset + 8; 1.109 + 1.110 +/* 1.111 + uint8_t *tmp_cb = d->scratchpad; 1.112 + uint8_t *tmp_cr = d->scratchpad + 8; 1.113 + uint8_t *tmp_y = d->scratchpad + 8*d->uvlinesize;*/ 1.114 + int refn0 = mrs->ref_cache[0][ scan8[n] ]; 1.115 + int refn1 = mrs->ref_cache[1][ scan8[n] ]; 1.116 + 1.117 + mc_dir_part(d, mrs, m, s->dp_ref_list[0][refn0], n, square, chroma_height, delta, 0, 1.118 + dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put, chroma_put); 1.119 + mc_dir_part(d, mrs, m, s->dp_ref_list[1][refn1], n, square, chroma_height, delta, 1, 1.120 + tmp_y, tmp_cb, tmp_cr, x_offset, y_offset, qpix_put, chroma_put); 1.121 + 1.122 + if(s->use_weight == 2){ 1.123 + int weight0 = s->implicit_weight[refn0][refn1][m->mb_y&1]; 1.124 + int weight1 = 64 - weight0; 1.125 + luma_weight_avg( dest_y, tmp_y, d-> linesize, 5, weight0, weight1, 0); 1.126 + chroma_weight_avg(dest_cb, tmp_cb, d->uvlinesize, 5, weight0, weight1, 0); 1.127 + chroma_weight_avg(dest_cr, tmp_cr, d->uvlinesize, 5, weight0, weight1, 0); 1.128 + }else{ 1.129 + luma_weight_avg(dest_y, tmp_y, d->linesize, s->luma_log2_weight_denom, 1.130 + s->luma_weight[refn0][0][0] , s->luma_weight[refn1][1][0], 1.131 + s->luma_weight[refn0][0][1] + s->luma_weight[refn1][1][1]); 1.132 + chroma_weight_avg(dest_cb, tmp_cb, d->uvlinesize, s->chroma_log2_weight_denom, 1.133 + s->chroma_weight[refn0][0][0][0] , s->chroma_weight[refn1][1][0][0], 1.134 + s->chroma_weight[refn0][0][0][1] + s->chroma_weight[refn1][1][0][1]); 1.135 + chroma_weight_avg(dest_cr, tmp_cr, d->uvlinesize, s->chroma_log2_weight_denom, 1.136 + s->chroma_weight[refn0][0][1][0] , s->chroma_weight[refn1][1][1][0], 1.137 + s->chroma_weight[refn0][0][1][1] + s->chroma_weight[refn1][1][1][1]); 1.138 + } 1.139 + }else{ 1.140 + int list = list1 ? 1 : 0; 1.141 + int refn = mrs->ref_cache[list][ scan8[n] ]; 1.142 + DecodedPicture *ref= s->dp_ref_list[list][refn]; 1.143 + mc_dir_part(d, mrs, m, ref, n, square, chroma_height, delta, list, 1.144 + dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put, chroma_put); 1.145 + 1.146 + luma_weight_op(dest_y, d->linesize, s->luma_log2_weight_denom, 1.147 + s->luma_weight[refn][list][0], s->luma_weight[refn][list][1]); 1.148 + if(s->use_weight_chroma){ 1.149 + chroma_weight_op(dest_cb, d->uvlinesize, s->chroma_log2_weight_denom, 1.150 + s->chroma_weight[refn][list][0][0], s->chroma_weight[refn][list][0][1]); 1.151 + chroma_weight_op(dest_cr, d->uvlinesize, s->chroma_log2_weight_denom, 1.152 + s->chroma_weight[refn][list][1][0], s->chroma_weight[refn][list][1][1]); 1.153 + } 1.154 + } 1.155 +} 1.156 + 1.157 +static inline void mc_part(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int n, int square, int chroma_height, int delta, 1.158 + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 1.159 + int x_offset, int y_offset, 1.160 + qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 1.161 + qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, 1.162 + h264_weight_func *weight_op, h264_biweight_func *weight_avg, 1.163 + int list0, int list1){ 1.164 + if((s->use_weight==2 && list0 && list1 1.165 + && (s->implicit_weight[ mrs->ref_cache[0][scan8[n]] ][ mrs->ref_cache[1][scan8[n]] ][m->mb_y&1] != 32)) 1.166 + || s->use_weight==1) 1.167 + mc_part_weighted(d, mrs, s, m, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, 1.168 + x_offset, y_offset, qpix_put, chroma_put, 1.169 + weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1); 1.170 + else 1.171 + mc_part_std(d, mrs, s, m, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, 1.172 + x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); 1.173 +} 1.174 + 1.175 +static inline void prefetch_motion(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int list){ 1.176 + /* fetch pixels for estimated mv 4 macroblocks ahead 1.177 + * optimized for 64byte cache lines */ 1.178 + const int refn = mrs->ref_cache[list][scan8[0]]; 1.179 + 1.180 + if(refn >= 0){ 1.181 + const int mx= (mrs->mv_cache[list][scan8[0]][0]>>2) + 16*m->mb_x + 8; 1.182 + const int my= (mrs->mv_cache[list][scan8[0]][1]>>2) + 16*m->mb_y; 1.183 + uint8_t **src= s->dp_ref_list[list][refn]->data; 1.184 + int off= mx + (my + (m->mb_x&3)*4)*d->linesize + 64; 1.185 + 1.186 + d->dsp.prefetch(src[0]+off, d->linesize, 4); 1.187 + off= (mx>>1) + ((my>>1) + (m->mb_x&7))*d->uvlinesize + 64; 1.188 + d->dsp.prefetch(src[1]+off, src[2]-src[1], 2); 1.189 + } 1.190 +} 1.191 + 1.192 +void hl_motion(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 1.193 + qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), 1.194 + qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), 1.195 + h264_weight_func *weight_op, h264_biweight_func *weight_avg){ 1.196 + const int mb_type= m->mb_type; 1.197 + assert(IS_INTER(mb_type)); 1.198 + 1.199 + if (mb_type & MB_TYPE_L0) 1.200 + prefetch_motion(d, mrs, s, m, 0); 1.201 + if (mb_type & MB_TYPE_L1) 1.202 + prefetch_motion(d, mrs, s, m, 1); 1.203 + 1.204 + if(IS_16X16(mb_type)){ 1.205 + mc_part(d, mrs, s, m, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, 1.206 + qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], 1.207 + weight_op, weight_avg, 1.208 + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); 1.209 + }else if(IS_16X8(mb_type)){ 1.210 + mc_part(d, mrs, s, m, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0, 1.211 + qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], 1.212 + &weight_op[1], &weight_avg[1], 1.213 + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); 1.214 + mc_part(d, mrs, s, m, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4, 1.215 + qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], 1.216 + &weight_op[1], &weight_avg[1], 1.217 + IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); 1.218 + }else if(IS_8X16(mb_type)){ 1.219 + mc_part(d, mrs, s, m, 0, 0, 8, 8*d->linesize, dest_y, dest_cb, dest_cr, 0, 0, 1.220 + qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 1.221 + &weight_op[2], &weight_avg[2], 1.222 + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); 1.223 + mc_part(d, mrs, s, m, 4, 0, 8, 8*d->linesize, dest_y, dest_cb, dest_cr, 4, 0, 1.224 + qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 1.225 + &weight_op[2], &weight_avg[2], 1.226 + IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); 1.227 + }else{ 1.228 + int i; 1.229 + 1.230 + assert(IS_8X8(mb_type)); 1.231 + 1.232 + for(i=0; i<4; i++){ 1.233 + const int sub_mb_type= m->sub_mb_type[i]; 1.234 + const int n= 4*i; 1.235 + int x_offset= (i&1)<<2; 1.236 + int y_offset= (i&2)<<1; 1.237 + 1.238 + if(IS_SUB_8X8(sub_mb_type)){ 1.239 + mc_part(d, mrs, s, m, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, 1.240 + qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 1.241 + &weight_op[3], &weight_avg[3], 1.242 + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 1.243 + }else if(IS_SUB_8X4(sub_mb_type)){ 1.244 + mc_part(d, mrs, s, m, n, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset, 1.245 + qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], 1.246 + &weight_op[4], &weight_avg[4], 1.247 + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 1.248 + mc_part(d, mrs, s, m, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, 1.249 + qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], 1.250 + &weight_op[4], &weight_avg[4], 1.251 + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 1.252 + }else if(IS_SUB_4X8(sub_mb_type)){ 1.253 + mc_part(d, mrs, s, m, n, 0, 4, 4*d->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, 1.254 + qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 1.255 + &weight_op[5], &weight_avg[5], 1.256 + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 1.257 + mc_part(d, mrs, s, m, n+1, 0, 4, 4*d->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, 1.258 + qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 1.259 + &weight_op[5], &weight_avg[5], 1.260 + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 1.261 + }else{ 1.262 + int j; 1.263 + assert(IS_SUB_4X4(sub_mb_type)); 1.264 + for(j=0; j<4; j++){ 1.265 + int sub_x_offset= x_offset + 2*(j&1); 1.266 + int sub_y_offset= y_offset + (j&2); 1.267 + mc_part(d, mrs, s, m, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, 1.268 + qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 1.269 + &weight_op[6], &weight_avg[6], 1.270 + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 1.271 + } 1.272 + } 1.273 + } 1.274 + } 1.275 +}
