diff libavcodec/h264_mc.c @ 2:897f711a7157

rearrange to work with autoconf
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Tue, 25 Sep 2012 15:55:33 +0200
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/libavcodec/h264_mc.c	Tue Sep 25 15:55:33 2012 +0200
     1.3 @@ -0,0 +1,272 @@
     1.4 +#include "h264_types.h"
     1.5 +#include "h264_data.h"
     1.6 +
     1.7 +static inline void mc_dir_part(MBRecContext *d, MBRecState *mrs, H264Mb *m, DecodedPicture *pic, int n, int square,
     1.8 +							   int chroma_height, int delta, int list,uint8_t *dest_y,
     1.9 +							   uint8_t *dest_cb, uint8_t *dest_cr, int src_x_offset, int src_y_offset,
    1.10 +							   qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
    1.11 +	const int mx= mrs->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
    1.12 +	const int my= mrs->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
    1.13 +	const int luma_xy= (mx&3) + ((my&3)<<2);
    1.14 +	const int pic_width  = 16*d->mb_width;
    1.15 +	const int pic_height = 16*d->mb_height;
    1.16 +
    1.17 +	uint8_t *src_y, *src_cb, *src_cr;
    1.18 +	int ymx= mx>>2;
    1.19 +	int ymy= my>>2;
    1.20 +	int cmy= my>>3;
    1.21 +	int cmx= mx>>3;
    1.22 +
    1.23 +	//truncate the motion vectors references
    1.24 +	if(ymy>= pic_height+2){
    1.25 +		ymy=pic_height+1;
    1.26 +	}else if(ymy <=-19){
    1.27 +		ymy=-18;
    1.28 +	}
    1.29 +	if(ymx>= pic_width+2){
    1.30 +		ymx= pic_width+1;
    1.31 +	}else if(ymx<=-19){
    1.32 +		ymx=-19;
    1.33 +	}
    1.34 +
    1.35 +	src_y = pic->data[0] + ymx + ymy*d->linesize;
    1.36 +	qpix_op[luma_xy](dest_y, src_y, d->linesize); //FIXME try variable height perhaps?
    1.37 +	if(!square){
    1.38 +		qpix_op[luma_xy](dest_y + delta, src_y + delta, d->linesize);
    1.39 +	}
    1.40 +
    1.41 +	if(cmy >= pic_height>>1){
    1.42 +		cmy = (pic_height>>1) -1;
    1.43 +	}else if(cmy<=-9){
    1.44 +		cmy=-8;
    1.45 +	}
    1.46 +	if(cmx >= pic_width>>1){
    1.47 +		cmx = (pic_width>>1) -1;
    1.48 +	}else if(cmx<=-9){
    1.49 +		cmx=-8;
    1.50 +	}
    1.51 +
    1.52 +	src_cb= pic->data[1] + cmx + cmy*d->uvlinesize;
    1.53 +	src_cr= pic->data[2] + cmx + cmy*d->uvlinesize;
    1.54 +
    1.55 +	chroma_op(dest_cb, src_cb, d->uvlinesize, chroma_height, mx&7, my&7);
    1.56 +	chroma_op(dest_cr, src_cr, d->uvlinesize, chroma_height, mx&7, my&7);
    1.57 +}
    1.58 +
    1.59 +static inline void mc_part_std(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int n, int square, int chroma_height, int delta,
    1.60 +								uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
    1.61 +								int x_offset, int y_offset,
    1.62 +								qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
    1.63 +								qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
    1.64 +								int list0, int list1){
    1.65 +	qpel_mc_func *qpix_op=  qpix_put;
    1.66 +	h264_chroma_mc_func chroma_op= chroma_put;
    1.67 +
    1.68 +	dest_y  += 2*x_offset + 2*y_offset*d->  linesize;
    1.69 +	dest_cb +=   x_offset +   y_offset*d->uvlinesize;
    1.70 +	dest_cr +=   x_offset +   y_offset*d->uvlinesize;
    1.71 +	x_offset += 8*m->mb_x;
    1.72 +	y_offset += 8*m->mb_y;
    1.73 +
    1.74 +	if(list0){
    1.75 +		DecodedPicture *ref= s->dp_ref_list[0][ mrs->ref_cache[0][ scan8[n] ] ];
    1.76 +		mc_dir_part(d, mrs, m, ref, n, square, chroma_height, delta, 0,
    1.77 +					dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_op, chroma_op);
    1.78 +
    1.79 +		qpix_op=  qpix_avg;
    1.80 +		chroma_op= chroma_avg;
    1.81 +	}
    1.82 +
    1.83 +	if(list1){
    1.84 +		DecodedPicture *ref= s->dp_ref_list[1][ mrs->ref_cache[1][ scan8[n] ] ];
    1.85 +		mc_dir_part(d, mrs, m, ref, n, square, chroma_height, delta, 1,
    1.86 +					dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_op, chroma_op);
    1.87 +	}
    1.88 +}
    1.89 +
    1.90 +static inline void mc_part_weighted(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int n, int square, int chroma_height, int delta,
    1.91 +									uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
    1.92 +									int x_offset, int y_offset,
    1.93 +									qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
    1.94 +									h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
    1.95 +									h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
    1.96 +									int list0, int list1){
    1.97 +	dest_y  += 2*x_offset + 2*y_offset*d->  linesize;
    1.98 +	dest_cb +=   x_offset +   y_offset*d->uvlinesize;
    1.99 +	dest_cr +=   x_offset +   y_offset*d->uvlinesize;
   1.100 +	x_offset += 8*m->mb_x;
   1.101 +	y_offset += 8*m->mb_y;
   1.102 +
   1.103 +	if(list0 && list1){
   1.104 +		/* don't optimize for luma-only case, since B-frames usually
   1.105 +		* use implicit weights => chroma too. */
   1.106 +		uint8_t *tmp_y  = d->scratchpad_y  + 2*x_offset +16 ;
   1.107 +		uint8_t *tmp_cb = d->scratchpad_cb + x_offset + 8;
   1.108 +		uint8_t *tmp_cr = d->scratchpad_cr + x_offset + 8;
   1.109 +
   1.110 +/*
   1.111 +		uint8_t *tmp_cb = d->scratchpad;
   1.112 +		uint8_t *tmp_cr = d->scratchpad + 8;
   1.113 +		uint8_t *tmp_y  = d->scratchpad + 8*d->uvlinesize;*/
   1.114 +		int refn0 = mrs->ref_cache[0][ scan8[n] ];
   1.115 +		int refn1 = mrs->ref_cache[1][ scan8[n] ];
   1.116 +
   1.117 +		mc_dir_part(d, mrs, m, s->dp_ref_list[0][refn0], n, square, chroma_height, delta, 0,
   1.118 +					dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put, chroma_put);
   1.119 +		mc_dir_part(d, mrs, m, s->dp_ref_list[1][refn1], n, square, chroma_height, delta, 1,
   1.120 +					tmp_y, tmp_cb, tmp_cr, x_offset, y_offset, qpix_put, chroma_put);
   1.121 +
   1.122 +		if(s->use_weight == 2){
   1.123 +			int weight0 = s->implicit_weight[refn0][refn1][m->mb_y&1];
   1.124 +			int weight1 = 64 - weight0;
   1.125 +			luma_weight_avg(  dest_y,  tmp_y,  d->  linesize, 5, weight0, weight1, 0);
   1.126 +			chroma_weight_avg(dest_cb, tmp_cb, d->uvlinesize, 5, weight0, weight1, 0);
   1.127 +			chroma_weight_avg(dest_cr, tmp_cr, d->uvlinesize, 5, weight0, weight1, 0);
   1.128 +		}else{
   1.129 +			luma_weight_avg(dest_y, tmp_y, d->linesize, s->luma_log2_weight_denom,
   1.130 +							s->luma_weight[refn0][0][0] , s->luma_weight[refn1][1][0],
   1.131 +							s->luma_weight[refn0][0][1] + s->luma_weight[refn1][1][1]);
   1.132 +			chroma_weight_avg(dest_cb, tmp_cb, d->uvlinesize, s->chroma_log2_weight_denom,
   1.133 +							s->chroma_weight[refn0][0][0][0] , s->chroma_weight[refn1][1][0][0],
   1.134 +							s->chroma_weight[refn0][0][0][1] + s->chroma_weight[refn1][1][0][1]);
   1.135 +			chroma_weight_avg(dest_cr, tmp_cr, d->uvlinesize, s->chroma_log2_weight_denom,
   1.136 +							s->chroma_weight[refn0][0][1][0] , s->chroma_weight[refn1][1][1][0],
   1.137 +							s->chroma_weight[refn0][0][1][1] + s->chroma_weight[refn1][1][1][1]);
   1.138 +		}
   1.139 +	}else{
   1.140 +		int list = list1 ? 1 : 0;
   1.141 +		int refn = mrs->ref_cache[list][ scan8[n] ];
   1.142 +		DecodedPicture *ref= s->dp_ref_list[list][refn];
   1.143 +		mc_dir_part(d, mrs, m, ref, n, square, chroma_height, delta, list,
   1.144 +					dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put, chroma_put);
   1.145 +
   1.146 +		luma_weight_op(dest_y, d->linesize, s->luma_log2_weight_denom,
   1.147 +						s->luma_weight[refn][list][0], s->luma_weight[refn][list][1]);
   1.148 +		if(s->use_weight_chroma){
   1.149 +			chroma_weight_op(dest_cb, d->uvlinesize, s->chroma_log2_weight_denom,
   1.150 +							s->chroma_weight[refn][list][0][0], s->chroma_weight[refn][list][0][1]);
   1.151 +			chroma_weight_op(dest_cr, d->uvlinesize, s->chroma_log2_weight_denom,
   1.152 +							s->chroma_weight[refn][list][1][0], s->chroma_weight[refn][list][1][1]);
   1.153 +		}
   1.154 +	}
   1.155 +}
   1.156 +
   1.157 +static inline void mc_part(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int n, int square, int chroma_height, int delta,
   1.158 +							uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
   1.159 +							int x_offset, int y_offset,
   1.160 +							qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
   1.161 +							qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
   1.162 +							h264_weight_func *weight_op, h264_biweight_func *weight_avg,
   1.163 +							int list0, int list1){
   1.164 +	if((s->use_weight==2 && list0 && list1
   1.165 +		&& (s->implicit_weight[ mrs->ref_cache[0][scan8[n]] ][ mrs->ref_cache[1][scan8[n]] ][m->mb_y&1] != 32))
   1.166 +		|| s->use_weight==1)
   1.167 +		mc_part_weighted(d, mrs, s, m, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
   1.168 +						x_offset, y_offset, qpix_put, chroma_put,
   1.169 +						weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
   1.170 +	else
   1.171 +		mc_part_std(d, mrs, s, m, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
   1.172 +					x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
   1.173 +}
   1.174 +
   1.175 +static inline void prefetch_motion(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int list){
   1.176 +	/* fetch pixels for estimated mv 4 macroblocks ahead
   1.177 +	* optimized for 64byte cache lines */
   1.178 +	const int refn = mrs->ref_cache[list][scan8[0]];
   1.179 +
   1.180 +	if(refn >= 0){
   1.181 +		const int mx= (mrs->mv_cache[list][scan8[0]][0]>>2) + 16*m->mb_x + 8;
   1.182 +		const int my= (mrs->mv_cache[list][scan8[0]][1]>>2) + 16*m->mb_y;
   1.183 +		uint8_t **src= s->dp_ref_list[list][refn]->data;
   1.184 +		int off= mx + (my + (m->mb_x&3)*4)*d->linesize + 64;
   1.185 +
   1.186 +		d->dsp.prefetch(src[0]+off, d->linesize, 4);
   1.187 +		off= (mx>>1) + ((my>>1) + (m->mb_x&7))*d->uvlinesize + 64;
   1.188 +		d->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
   1.189 +	}
   1.190 +}
   1.191 +
   1.192 +void hl_motion(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
   1.193 +					qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
   1.194 +					qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
   1.195 +					h264_weight_func *weight_op, h264_biweight_func *weight_avg){
   1.196 +	const int mb_type= m->mb_type;
   1.197 +	assert(IS_INTER(mb_type));
   1.198 +
   1.199 +	if (mb_type & MB_TYPE_L0)
   1.200 +		prefetch_motion(d, mrs, s, m, 0);
   1.201 +	if (mb_type & MB_TYPE_L1)
   1.202 +		prefetch_motion(d, mrs, s, m, 1);
   1.203 +
   1.204 +	if(IS_16X16(mb_type)){
   1.205 +		mc_part(d, mrs, s, m, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
   1.206 +				qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
   1.207 +				weight_op, weight_avg,
   1.208 +				IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
   1.209 +	}else if(IS_16X8(mb_type)){
   1.210 +		mc_part(d, mrs, s, m, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
   1.211 +				qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
   1.212 +				&weight_op[1], &weight_avg[1],
   1.213 +				IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
   1.214 +		mc_part(d, mrs, s, m, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
   1.215 +				qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
   1.216 +				&weight_op[1], &weight_avg[1],
   1.217 +				IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
   1.218 +	}else if(IS_8X16(mb_type)){
   1.219 +		mc_part(d, mrs, s, m, 0, 0, 8, 8*d->linesize, dest_y, dest_cb, dest_cr, 0, 0,
   1.220 +				qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
   1.221 +				&weight_op[2], &weight_avg[2],
   1.222 +				IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
   1.223 +		mc_part(d, mrs, s, m, 4, 0, 8, 8*d->linesize, dest_y, dest_cb, dest_cr, 4, 0,
   1.224 +				qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
   1.225 +				&weight_op[2], &weight_avg[2],
   1.226 +				IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
   1.227 +	}else{
   1.228 +		int i;
   1.229 +
   1.230 +		assert(IS_8X8(mb_type));
   1.231 +
   1.232 +		for(i=0; i<4; i++){
   1.233 +			const int sub_mb_type= m->sub_mb_type[i];
   1.234 +			const int n= 4*i;
   1.235 +			int x_offset= (i&1)<<2;
   1.236 +			int y_offset= (i&2)<<1;
   1.237 +
   1.238 +			if(IS_SUB_8X8(sub_mb_type)){
   1.239 +				mc_part(d, mrs, s, m, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
   1.240 +						qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
   1.241 +						&weight_op[3], &weight_avg[3],
   1.242 +						IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
   1.243 +			}else if(IS_SUB_8X4(sub_mb_type)){
   1.244 +				mc_part(d, mrs, s, m, n, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
   1.245 +						qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
   1.246 +						&weight_op[4], &weight_avg[4],
   1.247 +						IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
   1.248 +				mc_part(d, mrs, s, m, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
   1.249 +						qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
   1.250 +						&weight_op[4], &weight_avg[4],
   1.251 +						IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
   1.252 +			}else if(IS_SUB_4X8(sub_mb_type)){
   1.253 +				mc_part(d, mrs, s, m, n, 0, 4, 4*d->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
   1.254 +						qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
   1.255 +						&weight_op[5], &weight_avg[5],
   1.256 +						IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
   1.257 +				mc_part(d, mrs, s, m, n+1, 0, 4, 4*d->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
   1.258 +						qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
   1.259 +						&weight_op[5], &weight_avg[5],
   1.260 +						IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
   1.261 +			}else{
   1.262 +				int j;
   1.263 +				assert(IS_SUB_4X4(sub_mb_type));
   1.264 +				for(j=0; j<4; j++){
   1.265 +					int sub_x_offset= x_offset + 2*(j&1);
   1.266 +					int sub_y_offset= y_offset +   (j&2);
   1.267 +					mc_part(d, mrs, s, m, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
   1.268 +							qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
   1.269 +							&weight_op[6], &weight_avg[6],
   1.270 +							IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
   1.271 +				}
   1.272 +			}
   1.273 +		}
   1.274 +	}
   1.275 +}