Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
diff libavcodec/h264_pred_mode.c @ 2:897f711a7157
rearrange to work with autoconf
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 25 Sep 2012 15:55:33 +0200 |
| parents | |
| children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libavcodec/h264_pred_mode.c Tue Sep 25 15:55:33 2012 +0200 1.3 @@ -0,0 +1,1013 @@ 1.4 +/* 1.5 + * H.26L/H.264/AVC/JVT/14496-10/... direct mb/block decoding 1.6 + * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 1.7 + * 1.8 + * This file is part of FFmpeg. 1.9 + * 1.10 + * FFmpeg is free software; you can redistribute it and/or 1.11 + * modify it under the terms of the GNU Lesser General Public 1.12 + * License as published by the Free Software Foundation; either 1.13 + * version 2.1 of the License, or (at your option) any later version. 1.14 + * 1.15 + * FFmpeg is distributed in the hope that it will be useful, 1.16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 1.17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1.18 + * Lesser General Public License for more details. 1.19 + * 1.20 + * You should have received a copy of the GNU Lesser General Public 1.21 + * License along with FFmpeg; if not, write to the Free Software 1.22 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 1.23 + */ 1.24 + 1.25 +/** 1.26 + * @file 1.27 + * H.264 / AVC / MPEG4 part10 direct mb/block decoding. 1.28 + * @author Michael Niedermayer <michaelni@gmx.at> 1.29 + */ 1.30 + 1.31 +#include "dsputil.h" 1.32 +#include "avcodec.h" 1.33 +#include "h264_data.h" 1.34 +#include "h264.h" 1.35 +#include "rectangle.h" 1.36 + 1.37 +//#undef NDEBUG 1.38 +#include <assert.h> 1.39 + 1.40 +static const uint8_t left_block_options[4][16]={ 1.41 + {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8}, 1.42 + {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8}, 1.43 + {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}, 1.44 + {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8} 1.45 +}; 1.46 + 1.47 + 1.48 +// static void check_cache_copy(MBRecContext *mrc, H264Slice *s, H264Mb *m){ 1.49 +// for (int list=0; list<2; list++){ 1.50 +// for (int i=0; i<40; i++){ 1.51 +// assert (m->ref_cache[list][i] == m->ref_cache_copy[list][i]); 1.52 +// assert (mrs->mv_cache[list][i][0] == mrs->mv_cache_copy[list][i][0]); 1.53 +// assert (mrs->mv_cache[list][i][1] == mrs->mv_cache_copy[list][i][1]); 1.54 +// } 1.55 +// } 1.56 +// } 1.57 + 1.58 +// static void check_cache_copy2(MBRecContext *mrc, H264Slice *s, H264Mb *m){ 1.59 +// for (int list=0; list<2; list++){ 1.60 +// for (int i=0; i<40; i++){ 1.61 +// assert (m->ref_cache[list][i] == m->ref_cache_copy2[list][i]); 1.62 +// assert (mrs->mv_cache[list][i][0] == mrs->mv_cache_copy2[list][i][0]); 1.63 +// assert (mrs->mv_cache[list][i][1] == mrs->mv_cache_copy2[list][i][1]); 1.64 +// } 1.65 +// } 1.66 +// } 1.67 + 1.68 +static void fill_decode_caches_rec(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int mb_type){ 1.69 + int topleft_type, top_type, topright_type, left_type; 1.70 + const uint8_t * left_block= left_block_options[0]; 1.71 + const int mb_x = m->mb_x; 1.72 + int i; 1.73 + 1.74 + mrs->top_type = mrs->mb_type_top[mb_x ]; 1.75 + mrs->left_type = mrs->mb_type [mb_x-1]; 1.76 + 1.77 + topleft_type = mrs->mb_type_top[mb_x-1]; 1.78 + top_type = mrs->mb_type_top[mb_x ]; 1.79 + topright_type= mrs->mb_type_top[mb_x+1]; 1.80 + left_type = mrs->mb_type [mb_x-1]; 1.81 + 1.82 + int type_mask= s->pps.constrained_intra_pred ? 1 : -1; 1.83 + 1.84 + if(!IS_SKIP(mb_type)){ 1.85 +// memset(mrc->non_zero_count_cache, 0, sizeof(mrc->non_zero_count_cache)); 1.86 + AV_COPY32(&mrs->non_zero_count_cache[4+8*1], &m->non_zero_count[ 0]); 1.87 + AV_COPY32(&mrs->non_zero_count_cache[4+8*2], &m->non_zero_count[ 4]); 1.88 + AV_COPY32(&mrs->non_zero_count_cache[4+8*3], &m->non_zero_count[ 8]); 1.89 + AV_COPY32(&mrs->non_zero_count_cache[4+8*4], &m->non_zero_count[12]); 1.90 + 1.91 + for (int i=0; i<2; i++) { 1.92 + mrs->non_zero_count_cache[8*1 + 8*i + 1] = m->non_zero_count[16 + i*2 ]; 1.93 + mrs->non_zero_count_cache[8*1 + 8*i + 2] = m->non_zero_count[16 + i*2 +1]; 1.94 + mrs->non_zero_count_cache[8*4 + 8*i + 1] = m->non_zero_count[20 + i*2 ]; 1.95 + mrs->non_zero_count_cache[8*4 + 8*i + 2] = m->non_zero_count[20 + i*2 +1]; 1.96 + } 1.97 + 1.98 + if(IS_INTRA(mb_type)){ 1.99 +// memset(mrc->intra4x4_pred_mode_cache, 0, sizeof(mrc->intra4x4_pred_mode_cache)); 1.100 + 1.101 + mrs->topleft_samples_available= 1.102 + mrs->top_samples_available= 1.103 + mrs->left_samples_available= 0xFFFF; 1.104 + mrs->topright_samples_available= 0xEEEA; 1.105 + 1.106 + if(!(top_type & type_mask)){ 1.107 + mrs->topleft_samples_available= 0xB3FF; 1.108 + mrs->top_samples_available= 0x33FF; 1.109 + mrs->topright_samples_available= 0x26EA; 1.110 + } 1.111 + 1.112 + if(!(left_type & type_mask)){ 1.113 + mrs->topleft_samples_available&= 0xDF5F; 1.114 + mrs->left_samples_available&= 0x5F5F; 1.115 + } 1.116 + 1.117 + if(!(topleft_type & type_mask)) 1.118 + mrs->topleft_samples_available&= 0x7FFF; 1.119 + 1.120 + if(!(topright_type & type_mask)) 1.121 + mrs->topright_samples_available&= 0xFBFF; 1.122 + 1.123 + if(IS_INTRA4x4(mb_type)){ 1.124 + if(IS_INTRA4x4(top_type)){ 1.125 + AV_COPY32(mrs->intra4x4_pred_mode_cache+4+8*0, &mrs->intra4x4_pred_mode_top[4*mb_x]); 1.126 + }else{ 1.127 + mrs->intra4x4_pred_mode_cache[4+8*0]= 1.128 + mrs->intra4x4_pred_mode_cache[5+8*0]= 1.129 + mrs->intra4x4_pred_mode_cache[6+8*0]= 1.130 + mrs->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask); 1.131 + } 1.132 + 1.133 + if(IS_INTRA4x4(left_type)){ 1.134 +#if OMPSS 1.135 + mrs->intra4x4_pred_mode_cache[3+8*1]= m->intra4x4_pred_mode_left[0]; 1.136 + mrs->intra4x4_pred_mode_cache[3+8*2]= m->intra4x4_pred_mode_left[1]; 1.137 + mrs->intra4x4_pred_mode_cache[3+8*3]= m->intra4x4_pred_mode_left[2]; 1.138 + mrs->intra4x4_pred_mode_cache[3+8*4]= m->intra4x4_pred_mode_left[3]; 1.139 +#else 1.140 + mrs->intra4x4_pred_mode_cache[3+8*1]= mrs->intra4x4_pred_mode_left[0]; 1.141 + mrs->intra4x4_pred_mode_cache[3+8*2]= mrs->intra4x4_pred_mode_left[1]; 1.142 + mrs->intra4x4_pred_mode_cache[3+8*3]= mrs->intra4x4_pred_mode_left[2]; 1.143 + mrs->intra4x4_pred_mode_cache[3+8*4]= mrs->intra4x4_pred_mode_left[3]; 1.144 +#endif 1.145 + }else{ 1.146 + mrs->intra4x4_pred_mode_cache[3+8*1]= 1.147 + mrs->intra4x4_pred_mode_cache[3+8*2]= 1.148 + mrs->intra4x4_pred_mode_cache[3+8*3]= 1.149 + mrs->intra4x4_pred_mode_cache[3+8*4]= 2 - 3*!(left_type & type_mask); 1.150 + } 1.151 + } 1.152 + } 1.153 + } 1.154 + 1.155 + if(IS_INTER(mb_type) ||(IS_DIRECT(mb_type) && s->direct_spatial_mv_pred)){ 1.156 + int list; 1.157 + 1.158 +// memset(mrs->mv_cache, 0, sizeof(mrs->mv_cache)); 1.159 +// memset(mrs->ref_cache, 0, sizeof(mrs->ref_cache)); 1.160 + 1.161 + mrs->ref_cache[0][scan8[5 ]+1] = mrs->ref_cache[0][scan8[7 ]+1] = mrs->ref_cache[0][scan8[13]+1] = 1.162 + mrs->ref_cache[1][scan8[5 ]+1] = mrs->ref_cache[1][scan8[7 ]+1] = mrs->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE; 1.163 + 1.164 + for(list=0; list<s->list_count; list++){ 1.165 + if(!USES_LIST(mb_type, list)){ 1.166 + continue; 1.167 + } 1.168 + assert(!(IS_DIRECT(mb_type) && !s->direct_spatial_mv_pred)); 1.169 + 1.170 + if(USES_LIST(top_type, list)){ 1.171 + const int b_xy= 4*mb_x + 3*mrc->b_stride; 1.172 + AV_COPY128(mrs->mv_cache[list][scan8[0] + 0 - 1*8], mrs->motion_val_top[list][b_xy + 0]); 1.173 + mrs->ref_cache[list][scan8[0] + 0 - 1*8]= 1.174 + mrs->ref_cache[list][scan8[0] + 1 - 1*8]= mrs->ref_index_top[list][4*mb_x + 2]; 1.175 + mrs->ref_cache[list][scan8[0] + 2 - 1*8]= 1.176 + mrs->ref_cache[list][scan8[0] + 3 - 1*8]= mrs->ref_index_top[list][4*mb_x + 3]; 1.177 + }else{ 1.178 + AV_ZERO128(mrs->mv_cache[list][scan8[0] + 0 - 1*8]); 1.179 + AV_WN32A(&mrs->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101); 1.180 + } 1.181 + 1.182 + if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){ 1.183 + for(i=0; i<2; i++){ 1.184 + int cache_idx = scan8[0] - 1 + i*2*8; 1.185 + if(USES_LIST(left_type, list)){ 1.186 + const int b_xy= 4*(mb_x-1) + 3; 1.187 + const int b8_x= 4*(mb_x-1) + 1; 1.188 + AV_COPY32(mrs->mv_cache[list][cache_idx ], mrs->motion_val[list][b_xy + mrc->b_stride*left_block[0+i*2]]); 1.189 + AV_COPY32(mrs->mv_cache[list][cache_idx+8], mrs->motion_val[list][b_xy + mrc->b_stride*left_block[1+i*2]]); 1.190 + mrs->ref_cache[list][cache_idx ]= mrs->ref_index[list][b8_x + (left_block[0+i*2]&~1)]; 1.191 + mrs->ref_cache[list][cache_idx+8]= mrs->ref_index[list][b8_x + (left_block[1+i*2]&~1)]; 1.192 + }else{ 1.193 + AV_ZERO32(mrs->mv_cache [list][cache_idx ]); 1.194 + AV_ZERO32(mrs->mv_cache [list][cache_idx+8]); 1.195 + mrs->ref_cache[list][cache_idx ]= 1.196 + mrs->ref_cache[list][cache_idx+8]= (left_type ? LIST_NOT_USED : PART_NOT_AVAILABLE); 1.197 + } 1.198 + } 1.199 + }else{ 1.200 + if(USES_LIST(left_type, list)){ 1.201 + const int b_x = 4*(mb_x-1) + 3; 1.202 + const int b8_x= 4*(mb_x-1) + 1; 1.203 + AV_COPY32(mrs->mv_cache[list][scan8[0] - 1], mrs->motion_val[list][b_x + mrc->b_stride*left_block[0]]); 1.204 + mrs->ref_cache[list][scan8[0] - 1]= mrs->ref_index[list][b8_x + (left_block[0]&~1)]; 1.205 + }else{ 1.206 + AV_ZERO32(mrs->mv_cache [list][scan8[0] - 1]); 1.207 + mrs->ref_cache[list][scan8[0] - 1]= left_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; 1.208 + } 1.209 + } 1.210 + 1.211 + if(USES_LIST(topright_type, list)){ 1.212 + const int b_xy= 4*(mb_x+1) + 3*mrc->b_stride; 1.213 + AV_COPY32(mrs->mv_cache[list][scan8[0] + 4 - 1*8], mrs->motion_val_top[list][b_xy]); 1.214 + mrs->ref_cache[list][scan8[0] + 4 - 1*8]= mrs->ref_index_top[list][4*(mb_x+1) + 2]; 1.215 + }else{ 1.216 + AV_ZERO32(mrs->mv_cache [list][scan8[0] + 4 - 1*8]); 1.217 + mrs->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; 1.218 + } 1.219 + if(mrs->ref_cache[list][scan8[0] + 4 - 1*8] < 0){ 1.220 + int topleft_partition= -1; 1.221 + if(USES_LIST(topleft_type, list)){ 1.222 + const int b_xy = 4*(mb_x-1) + 3 + mrc->b_stride + (topleft_partition & 2*mrc->b_stride); 1.223 + const int b8_x= 4*(mb_x-1) + 1 + (topleft_partition & 2); 1.224 + AV_COPY32(mrs->mv_cache[list][scan8[0] - 1 - 1*8], mrs->motion_val_top[list][b_xy]); 1.225 + mrs->ref_cache[list][scan8[0] - 1 - 1*8]= mrs->ref_index_top[list][b8_x]; 1.226 + }else{ 1.227 + AV_ZERO32(mrs->mv_cache[list][scan8[0] - 1 - 1*8]); 1.228 + mrs->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; 1.229 + } 1.230 + } 1.231 + 1.232 + if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) 1.233 + continue; 1.234 + 1.235 + if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) { 1.236 + mrs->ref_cache[list][scan8[4 ]] = 1.237 + mrs->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; 1.238 + AV_ZERO32(mrs->mv_cache [list][scan8[4 ]]); 1.239 + AV_ZERO32(mrs->mv_cache [list][scan8[12]]); 1.240 + } 1.241 + } 1.242 + } 1.243 +} 1.244 + 1.245 +static inline void write_back_motion_rec(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int mb_type){ 1.246 + const int b_stride = mrc->b_stride; 1.247 + const int b_x = 4*m->mb_x; //try mb2b(8)_xy 1.248 + const int b8_x= 4*m->mb_x; 1.249 + int list; 1.250 + 1.251 + if(!USES_LIST(mb_type, 0)) 1.252 + fill_rectangle(&mrs->ref_index[0][b8_x], 2, 2, 2, (uint8_t)LIST_NOT_USED, 1); 1.253 + 1.254 + for(list=0; list<s->list_count; list++){ 1.255 + int y; 1.256 + int16_t (*mv_dst)[2]; 1.257 + int16_t (*mv_src)[2]; 1.258 + 1.259 + if(!USES_LIST(mb_type, list)) 1.260 + continue; 1.261 + 1.262 + mv_dst = &mrs->motion_val[list][b_x]; 1.263 + mv_src = &mrs->mv_cache[list][scan8[0]]; 1.264 + for(y=0; y<4; y++){ 1.265 + AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y); 1.266 + } 1.267 + 1.268 + { 1.269 + int8_t *ref_index = &mrs->ref_index[list][b8_x]; 1.270 + ref_index[0+0*2]= mrs->ref_cache[list][scan8[0]]; 1.271 + ref_index[1+0*2]= mrs->ref_cache[list][scan8[4]]; 1.272 + ref_index[0+1*2]= mrs->ref_cache[list][scan8[8]]; 1.273 + ref_index[1+1*2]= mrs->ref_cache[list][scan8[12]]; 1.274 + } 1.275 + } 1.276 +} 1.277 + 1.278 + 1.279 +/** 1.280 +* checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. 1.281 +*/ 1.282 +static int check_intra4x4_pred_mode(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m){ 1.283 + static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0}; 1.284 + static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED}; 1.285 + int i; 1.286 + 1.287 + if(!(mrs->top_samples_available&0x8000)){ 1.288 + for(i=0; i<4; i++){ 1.289 + int status= top[ mrs->intra4x4_pred_mode_cache[scan8[0] + i] ]; 1.290 + if(status<0){ 1.291 + av_log(AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, m->mb_x, m->mb_y); 1.292 + return -1; 1.293 + } else if(status){ 1.294 + mrs->intra4x4_pred_mode_cache[scan8[0] + i]= status; 1.295 + } 1.296 + } 1.297 + } 1.298 + 1.299 + if((mrs->left_samples_available&0x8888)!=0x8888){ 1.300 + static const int mask[4]={0x8000,0x2000,0x80,0x20}; 1.301 + for(i=0; i<4; i++){ 1.302 + if(!(mrs->left_samples_available&mask[i])){ 1.303 + int status= left[ mrs->intra4x4_pred_mode_cache[scan8[0] + 8*i] ]; 1.304 + if(status<0){ 1.305 + av_log(AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, m->mb_x, m->mb_y); 1.306 + return -1; 1.307 + } else if(status){ 1.308 + mrs->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status; 1.309 + } 1.310 + } 1.311 + } 1.312 + } 1.313 + return 0; 1.314 +} 1.315 + 1.316 +/** 1.317 +* checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. 1.318 +*/ 1.319 +static int check_intra_pred_mode(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int mode){ 1.320 + static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1}; 1.321 + static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8}; 1.322 + 1.323 + if(mode > 6) { 1.324 + av_log(AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", m->mb_x, m->mb_y); 1.325 + return -1; 1.326 + } 1.327 + 1.328 + if(!(mrs->top_samples_available&0x8000)){ 1.329 + mode= top[ mode ]; 1.330 + if(mode<0){ 1.331 + av_log(AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", m->mb_x, m->mb_y); 1.332 + return -1; 1.333 + } 1.334 + } 1.335 + 1.336 + if((mrs->left_samples_available&0x8080) != 0x8080){ 1.337 + mode= left[ mode ]; 1.338 + if(mrs->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred 1.339 + mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(mrs->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8); 1.340 + } 1.341 + if(mode<0){ 1.342 + av_log(AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", m->mb_x, m->mb_y); 1.343 + return -1; 1.344 + } 1.345 + } 1.346 + return mode; 1.347 +} 1.348 + 1.349 +/** 1.350 + * gets the predicted intra4x4 prediction mode. 1.351 + */ 1.352 +static inline int pred_intra_mode(MBRecContext *mrc, MBRecState *mrs, int n){ 1.353 + const int index8= scan8[n]; 1.354 + const int left= mrs->intra4x4_pred_mode_cache[index8 - 1]; 1.355 + const int top = mrs->intra4x4_pred_mode_cache[index8 - 8]; 1.356 + const int min= FFMIN(left, top); 1.357 + 1.358 + if(min<0) return DC_PRED; 1.359 + else return min; 1.360 +} 1.361 + 1.362 +static void write_back_intra_pred_mode_rec(MBRecContext *mrc, MBRecState *mrs, H264Mb *m, int mb_x){ 1.363 + int8_t *mode= &mrs->intra4x4_pred_mode[4*mb_x]; 1.364 + 1.365 + AV_COPY32(mode, mrs->intra4x4_pred_mode_cache + 4 + 8*4); 1.366 +#if OMPSS 1.367 + if (m->mb_x < mrc->mb_width-1){ 1.368 + H264Mb *mr= m+1; 1.369 + mode = mr->intra4x4_pred_mode_left; 1.370 + mode[0]= mrs->intra4x4_pred_mode_cache[7+8*1]; 1.371 + mode[1]= mrs->intra4x4_pred_mode_cache[7+8*2]; 1.372 + mode[2]= mrs->intra4x4_pred_mode_cache[7+8*3]; 1.373 + mode[3]= mrs->intra4x4_pred_mode_cache[7+8*4]; 1.374 + } 1.375 +#else 1.376 + mode = mrs->intra4x4_pred_mode_left; 1.377 + mode[0]= mrs->intra4x4_pred_mode_cache[7+8*1]; 1.378 + mode[1]= mrs->intra4x4_pred_mode_cache[7+8*2]; 1.379 + mode[2]= mrs->intra4x4_pred_mode_cache[7+8*3]; 1.380 + mode[3]= mrs->intra4x4_pred_mode_cache[7+8*4]; 1.381 +#endif 1.382 +} 1.383 + 1.384 +static void pred_spatial_direct_motion_rec(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int *mb_type){ 1.385 + int b4_stride = mrc->b_stride; 1.386 + const int mb_x = m->mb_x; 1.387 + int mb_type_col[2]; 1.388 + const int16_t (*l1mv0)[2], (*l1mv1)[2]; 1.389 + const int8_t *l1ref0, *l1ref1; 1.390 + const int is_b8x8 = IS_8X8(*mb_type); 1.391 + unsigned int sub_mb_type= MB_TYPE_L0L1; 1.392 + int i8, i4; 1.393 + int ref[2]; 1.394 + int mv[2]; 1.395 + int list; 1.396 + 1.397 + //assert(h->ref_list[1][0].reference&3); 1.398 + 1.399 +#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM) 1.400 + 1.401 + /* ref = min(neighbors) */ 1.402 + for(list=0; list<2; list++){ 1.403 + int left_ref = mrs->ref_cache[list][scan8[0] - 1]; 1.404 + int top_ref = mrs->ref_cache[list][scan8[0] - 8]; 1.405 + int refc = mrs->ref_cache[list][scan8[0] - 8 + 4]; 1.406 + const int16_t *C= mrs->mv_cache[list][ scan8[0] - 8 + 4]; 1.407 + if(refc == PART_NOT_AVAILABLE){ 1.408 + refc = mrs->ref_cache[list][scan8[0] - 8 - 1]; 1.409 + C = mrs->mv_cache[list][scan8[0] - 8 - 1]; 1.410 + } 1.411 + ref[list] = FFMIN3((unsigned)left_ref, (unsigned)top_ref, (unsigned)refc); 1.412 + if(ref[list] >= 0){ 1.413 + //this is just pred_motion() but with the cases removed that cannot happen for direct blocks 1.414 + const int16_t * const A= mrs->mv_cache[list][ scan8[0] - 1 ]; 1.415 + const int16_t * const B= mrs->mv_cache[list][ scan8[0] - 8 ]; 1.416 + 1.417 + int match_count= (left_ref==ref[list]) + (top_ref==ref[list]) + (refc==ref[list]); 1.418 + if(match_count > 1){ //most common 1.419 + mv[list]= pack16to32(mid_pred(A[0], B[0], C[0]), 1.420 + mid_pred(A[1], B[1], C[1]) ); 1.421 + }else { 1.422 + assert(match_count==1); 1.423 + if(left_ref==ref[list]){ 1.424 + mv[list]= AV_RN32A(A); 1.425 + }else if(top_ref==ref[list]){ 1.426 + mv[list]= AV_RN32A(B); 1.427 + }else{ 1.428 + mv[list]= AV_RN32A(C); 1.429 + } 1.430 + } 1.431 + }else{ 1.432 + int mask= ~(MB_TYPE_L0 << (2*list)); 1.433 + mv[list] = 0; 1.434 + ref[list] = -1; 1.435 + if(!is_b8x8) 1.436 + *mb_type &= mask; 1.437 + sub_mb_type &= mask; 1.438 + } 1.439 + } 1.440 + 1.441 + if(ref[0] < 0 && ref[1] < 0){ 1.442 + ref[0] = ref[1] = 0; 1.443 + if(!is_b8x8) 1.444 + *mb_type |= MB_TYPE_L0L1; 1.445 + sub_mb_type |= MB_TYPE_L0L1; 1.446 + } 1.447 + 1.448 + if(!(is_b8x8|mv[0]|mv[1])){ 1.449 + fill_rectangle(&mrs->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1); 1.450 + fill_rectangle(&mrs->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1); 1.451 + fill_rectangle(&mrs->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); 1.452 + fill_rectangle(&mrs->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4); 1.453 + *mb_type= (*mb_type & ~(MB_TYPE_8x8|MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_P1L0|MB_TYPE_P1L1))|MB_TYPE_16x16|MB_TYPE_DIRECT2; 1.454 + return; 1.455 + } 1.456 + 1.457 + mb_type_col[0] = 1.458 + mb_type_col[1] = mrs->list1_mb_type[mb_x]; 1.459 + 1.460 + sub_mb_type |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ 1.461 + if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){ 1.462 + *mb_type |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_16x16 */ 1.463 + }else if(!is_b8x8 && (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16))){ 1.464 + *mb_type |= MB_TYPE_DIRECT2 | (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16)); 1.465 + }else{ 1.466 + if(!s->direct_8x8_inference_flag){ 1.467 + /* FIXME save sub mb types from previous frames (or derive from MVs) 1.468 + * so we know exactly what block size to use */ 1.469 + sub_mb_type += (MB_TYPE_8x8-MB_TYPE_16x16); /* B_SUB_4x4 */ 1.470 + } 1.471 + *mb_type |= MB_TYPE_8x8; 1.472 + } 1.473 + 1.474 + l1mv0 = (void *) &mrs->list1_motion_val[0][4*mb_x]; 1.475 + l1mv1 = (void *) &mrs->list1_motion_val[1][4*mb_x]; 1.476 + l1ref0 = &mrs->list1_ref_index [0][4*mb_x]; 1.477 + l1ref1 = &mrs->list1_ref_index [1][4*mb_x]; 1.478 +// if(!b8_stride){ 1.479 +// if(m->mb_y&1){ 1.480 +// l1ref0 += 2; 1.481 +// l1ref1 += 2; 1.482 +// l1mv0 += 2*b4_stride; 1.483 +// l1mv1 += 2*b4_stride; 1.484 +// } 1.485 +// } 1.486 + 1.487 + if(IS_16X16(*mb_type)){ 1.488 + int a,b; 1.489 + 1.490 + fill_rectangle(&mrs->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1); 1.491 + fill_rectangle(&mrs->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1); 1.492 + if(!IS_INTRA(mb_type_col[0]) && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1) 1.493 + || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1 1.494 + ))){ 1.495 + a=b=0; 1.496 + if(ref[0] > 0) 1.497 + a= mv[0]; 1.498 + if(ref[1] > 0) 1.499 + b= mv[1]; 1.500 + }else{ 1.501 + a= mv[0]; 1.502 + b= mv[1]; 1.503 + } 1.504 + fill_rectangle(&mrs->mv_cache[0][scan8[0]], 4, 4, 8, a, 4); 1.505 + fill_rectangle(&mrs->mv_cache[1][scan8[0]], 4, 4, 8, b, 4); 1.506 + }else{ 1.507 + int n=0; 1.508 + for(i8=0; i8<4; i8++){ 1.509 + const int x8 = i8&1; 1.510 + const int y8 = i8>>1; 1.511 + 1.512 + if(is_b8x8 && !IS_DIRECT(m->sub_mb_type[i8])) 1.513 + continue; 1.514 + m->sub_mb_type[i8] = sub_mb_type; 1.515 + 1.516 + fill_rectangle(&mrs->mv_cache[0][scan8[i8*4]], 2, 2, 8, mv[0], 4); 1.517 + fill_rectangle(&mrs->mv_cache[1][scan8[i8*4]], 2, 2, 8, mv[1], 4); 1.518 + fill_rectangle(&mrs->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1); 1.519 + fill_rectangle(&mrs->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1); 1.520 + 1.521 + /* col_zero_flag */ 1.522 + if(!IS_INTRA(mb_type_col[0]) && (l1ref0[i8] == 0 || (l1ref0[i8] < 0 && l1ref1[i8] == 0 )) 1.523 + ){ 1.524 + const int16_t (*l1mv)[2]= l1ref0[i8] == 0 ? l1mv0 : l1mv1; 1.525 + if(IS_SUB_8X8(sub_mb_type)){ 1.526 + const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride]; 1.527 + if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){ 1.528 + if(ref[0] == 0) 1.529 + fill_rectangle(&mrs->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); 1.530 + if(ref[1] == 0) 1.531 + fill_rectangle(&mrs->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4); 1.532 + n+=4; 1.533 + } 1.534 + }else{ 1.535 + int k=0; 1.536 + for(i4=0; i4<4; i4++){ 1.537 + const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride]; 1.538 + if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){ 1.539 + if(ref[0] == 0) 1.540 + AV_ZERO32(mrs->mv_cache[0][scan8[i8*4+i4]]); 1.541 + if(ref[1] == 0) 1.542 + AV_ZERO32(mrs->mv_cache[1][scan8[i8*4+i4]]); 1.543 + k++; 1.544 + } 1.545 + } 1.546 + if(!(k&3)) 1.547 + m->sub_mb_type[i8]+= MB_TYPE_16x16 - MB_TYPE_8x8; 1.548 + n+=k; 1.549 + } 1.550 + } 1.551 + } 1.552 + if(!is_b8x8 && !(n&15)){ 1.553 + *mb_type= (*mb_type & ~(MB_TYPE_8x8|MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_P1L0|MB_TYPE_P1L1))|MB_TYPE_16x16|MB_TYPE_DIRECT2; 1.554 + } 1.555 + } 1.556 +} 1.557 + 1.558 +static void pred_temp_direct_motion_rec(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int *mb_type){ 1.559 + const int mb_x = m->mb_x; 1.560 + int b4_stride = mrc->b_stride; 1.561 + int mb_type_col[2]; 1.562 + const int16_t (*l1mv0)[2], (*l1mv1)[2]; 1.563 + const int8_t *l1ref0, *l1ref1; 1.564 + const int is_b8x8 = IS_8X8(*mb_type); 1.565 + unsigned int sub_mb_type; 1.566 + int i8, i4; 1.567 + const int *map_col_to_list0[2] = {s->map_col_to_list0[0], s->map_col_to_list0[1]}; 1.568 + const int *dist_scale_factor = s->dist_scale_factor; 1.569 + 1.570 + mb_type_col[0] = 1.571 + mb_type_col[1] = mrs->list1_mb_type[mb_x]; 1.572 + 1.573 + sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ 1.574 + if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){ 1.575 + *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */ 1.576 + }else if(!is_b8x8 && (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16))){ 1.577 + *mb_type |= MB_TYPE_L0L1|MB_TYPE_DIRECT2 | (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16)); 1.578 + }else{ 1.579 + if(!s->direct_8x8_inference_flag){ 1.580 + /* FIXME save sub mb types from previous frames (or derive from MVs) 1.581 + * so we know exactly what block size to use */ 1.582 + sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */ 1.583 + } 1.584 + *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1; 1.585 + } 1.586 + 1.587 + l1mv0 = (void *) &mrs->list1_motion_val[0][4*mb_x]; 1.588 + l1mv1 = (void *) &mrs->list1_motion_val[1][4*mb_x]; 1.589 + l1ref0 = &mrs->list1_ref_index [0][4*mb_x]; 1.590 + l1ref1 = &mrs->list1_ref_index [1][4*mb_x]; 1.591 + 1.592 + /* one-to-one mv scaling */ 1.593 + if(IS_16X16(*mb_type)){ 1.594 + int ref, mv0, mv1; 1.595 + 1.596 + fill_rectangle(&mrs->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1); 1.597 + if(IS_INTRA(mb_type_col[0])){ 1.598 + ref=mv0=mv1=0; 1.599 + }else{ 1.600 + const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]] 1.601 + : map_col_to_list0[1][l1ref1[0]]; 1.602 + const int scale = dist_scale_factor[ref0]; 1.603 + const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0]; 1.604 + int mv_l0[2]; 1.605 + mv_l0[0] = (scale * mv_col[0] + 128) >> 8; 1.606 + mv_l0[1] = (scale * mv_col[1] + 128) >> 8; 1.607 + ref= ref0; 1.608 + mv0= pack16to32(mv_l0[0],mv_l0[1]); 1.609 + mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]); 1.610 + } 1.611 + fill_rectangle(&mrs->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1); 1.612 + fill_rectangle(&mrs->mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4); 1.613 + fill_rectangle(&mrs->mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4); 1.614 + }else{ 1.615 + for(i8=0; i8<4; i8++){ 1.616 + const int x8 = i8&1; 1.617 + const int y8 = i8>>1; 1.618 + int ref0, scale; 1.619 + const int16_t (*l1mv)[2]= l1mv0; 1.620 + 1.621 + if(is_b8x8 && !IS_DIRECT(m->sub_mb_type[i8])) 1.622 + continue; 1.623 + m->sub_mb_type[i8] = sub_mb_type; 1.624 + fill_rectangle(&mrs->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1); 1.625 + if(IS_INTRA(mb_type_col[0])){ 1.626 + fill_rectangle(&mrs->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1); 1.627 + fill_rectangle(&mrs->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); 1.628 + fill_rectangle(&mrs->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4); 1.629 + continue; 1.630 + } 1.631 + 1.632 + ref0 = l1ref0[i8]; 1.633 + if(ref0 >= 0) 1.634 + ref0 = map_col_to_list0[0][ref0 ]; 1.635 + else{ 1.636 + ref0 = map_col_to_list0[1][l1ref1[i8]]; 1.637 + l1mv= l1mv1; 1.638 + } 1.639 + scale = dist_scale_factor[ref0]; 1.640 + 1.641 + fill_rectangle(&mrs->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1); 1.642 + if(IS_SUB_8X8(sub_mb_type)){ 1.643 + const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride]; 1.644 + int mx = (scale * mv_col[0] + 128) >> 8; 1.645 + int my = (scale * mv_col[1] + 128) >> 8; 1.646 + fill_rectangle(&mrs->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4); 1.647 + fill_rectangle(&mrs->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4); 1.648 + }else 1.649 + for(i4=0; i4<4; i4++){ 1.650 + const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride]; 1.651 + int16_t *mv_l0 = mrs->mv_cache[0][scan8[i8*4+i4]]; 1.652 + mv_l0[0] = (scale * mv_col[0] + 128) >> 8; 1.653 + mv_l0[1] = (scale * mv_col[1] + 128) >> 8; 1.654 + AV_WN32A(mrs->mv_cache[1][scan8[i8*4+i4]], 1.655 + pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1])); 1.656 + } 1.657 + } 1.658 + } 1.659 +} 1.660 + 1.661 +void ff_h264_pred_direct_motion_rec(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int *mb_type){ 1.662 + if(s->direct_spatial_mv_pred){ 1.663 + pred_spatial_direct_motion_rec(mrc, mrs, s, m, mb_type); 1.664 + }else{ 1.665 + pred_temp_direct_motion_rec(mrc, mrs, s, m, mb_type); 1.666 + } 1.667 +} 1.668 + 1.669 +static inline int fetch_diagonal_mv(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, const int16_t **C, int i, int list, int part_width){ 1.670 + const int topright_ref= mrs->ref_cache[list][ i - 8 + part_width ]; 1.671 + 1.672 + if(topright_ref != PART_NOT_AVAILABLE){ 1.673 + *C= mrs->mv_cache[list][ i - 8 + part_width ]; 1.674 + return topright_ref; 1.675 + }else{ 1.676 + *C= mrs->mv_cache[list][ i - 8 - 1 ]; 1.677 + return mrs->ref_cache[list][ i - 8 - 1 ]; 1.678 + } 1.679 +} 1.680 + 1.681 +/** 1.682 + * gets the predicted MV. 1.683 + * @param n the block index 1.684 + * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4) 1.685 + * @param mx the x component of the predicted motion vector 1.686 + * @param my the y component of the predicted motion vector 1.687 + */ 1.688 +static inline void pred_motion(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, int n, int part_width, int list, int ref, int * const mx, int * const my){ 1.689 + const int index8= scan8[n]; 1.690 + const int top_ref= mrs->ref_cache[list][ index8 - 8 ]; 1.691 + const int left_ref= mrs->ref_cache[list][ index8 - 1 ]; 1.692 + const int16_t * const A= mrs->mv_cache[list][ index8 - 1 ]; 1.693 + const int16_t * const B= mrs->mv_cache[list][ index8 - 8 ]; 1.694 + const int16_t * C; 1.695 + int diagonal_ref, match_count; 1.696 + 1.697 + assert(part_width==1 || part_width==2 || part_width==4); 1.698 + 1.699 +/* mv_cache 1.700 + B . . A T T T T 1.701 + U . . L . . , . 1.702 + U . . L . . . . 1.703 + U . . L . . , . 1.704 + . . . L . . . . 1.705 +*/ 1.706 + 1.707 + diagonal_ref= fetch_diagonal_mv(mrc, mrs, s, &C, index8, list, part_width); 1.708 + match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref); 1.709 + 1.710 + if(match_count > 1){ //most common 1.711 + *mx= mid_pred(A[0], B[0], C[0]); 1.712 + *my= mid_pred(A[1], B[1], C[1]); 1.713 + }else if(match_count==1){ 1.714 + if(left_ref==ref){ 1.715 + *mx= A[0]; 1.716 + *my= A[1]; 1.717 + }else if(top_ref==ref){ 1.718 + *mx= B[0]; 1.719 + *my= B[1]; 1.720 + }else{ 1.721 + *mx= C[0]; 1.722 + *my= C[1]; 1.723 + } 1.724 + }else{ 1.725 + if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){ 1.726 + *mx= A[0]; 1.727 + *my= A[1]; 1.728 + }else{ 1.729 + *mx= mid_pred(A[0], B[0], C[0]); 1.730 + *my= mid_pred(A[1], B[1], C[1]); 1.731 + } 1.732 + } 1.733 + 1.734 +} 1.735 + 1.736 +/** 1.737 + * gets the directionally predicted 16x8 MV. 1.738 + * @param n the block index 1.739 + * @param mx the x component of the predicted motion vector 1.740 + * @param my the y component of the predicted motion vector 1.741 + */ 1.742 +static inline void pred_16x8_motion(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, int n, int list, int ref, int * const mx, int * const my){ 1.743 + if(n==0){ 1.744 + const int top_ref= mrs->ref_cache[list][ scan8[0] - 8 ]; 1.745 + const int16_t * const B= mrs->mv_cache[list][ scan8[0] - 8 ]; 1.746 + 1.747 + if(top_ref == ref){ 1.748 + *mx= B[0]; 1.749 + *my= B[1]; 1.750 + return; 1.751 + } 1.752 + }else{ 1.753 + const int left_ref= mrs->ref_cache[list][ scan8[8] - 1 ]; 1.754 + const int16_t * const A= mrs->mv_cache[list][ scan8[8] - 1 ]; 1.755 + 1.756 + if(left_ref == ref){ 1.757 + *mx= A[0]; 1.758 + *my= A[1]; 1.759 + return; 1.760 + } 1.761 + } 1.762 + 1.763 + //RARE 1.764 + pred_motion(mrc, mrs, s, n, 4, list, ref, mx, my); 1.765 +} 1.766 + 1.767 +/** 1.768 + * gets the directionally predicted 8x16 MV. 1.769 + * @param n the block index 1.770 + * @param mx the x component of the predicted motion vector 1.771 + * @param my the y component of the predicted motion vector 1.772 + */ 1.773 +static inline void pred_8x16_motion(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, int n, int list, int ref, int * const mx, int * const my){ 1.774 + if(n==0){ 1.775 + const int left_ref= mrs->ref_cache[list][ scan8[0] - 1 ]; 1.776 + const int16_t * const A= mrs->mv_cache[list][ scan8[0] - 1 ]; 1.777 + 1.778 + if(left_ref == ref){ 1.779 + *mx= A[0]; 1.780 + *my= A[1]; 1.781 + return; 1.782 + } 1.783 + }else{ 1.784 + const int16_t * C; 1.785 + int diagonal_ref; 1.786 + 1.787 + diagonal_ref= fetch_diagonal_mv(mrc, mrs, s, &C, scan8[4], list, 2); 1.788 + if(diagonal_ref == ref){ 1.789 + *mx= C[0]; 1.790 + *my= C[1]; 1.791 + return; 1.792 + } 1.793 + } 1.794 + 1.795 + //RARE 1.796 + pred_motion(mrc, mrs, s, n, 2, list, ref, mx, my); 1.797 +} 1.798 + 1.799 +static inline void pred_pskip_motion(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb * m, int * const mx, int * const my){ 1.800 + const int top_ref = mrs->ref_cache[0][ scan8[0] - 8 ]; 1.801 + const int left_ref= mrs->ref_cache[0][ scan8[0] - 1 ]; 1.802 + 1.803 + if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE 1.804 + || !( top_ref | AV_RN32A(mrs->mv_cache[0][ scan8[0] - 8 ])) 1.805 + || !(left_ref | AV_RN32A(mrs->mv_cache[0][ scan8[0] - 1 ]))){ 1.806 + 1.807 + *mx = *my = 0; 1.808 + return; 1.809 + } 1.810 + 1.811 + pred_motion(mrc, mrs, s, 0, 4, 0, 0, mx, my); 1.812 + 1.813 + return; 1.814 +} 1.815 + 1.816 +#define ADD_MVD(list) \ 1.817 +{ \ 1.818 + mx += m->mvd[list][mp][0]; \ 1.819 + my += m->mvd[list][mp][1]; \ 1.820 + mp++; \ 1.821 +} 1.822 + 1.823 +int pred_motion_mb_rec (MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m){ 1.824 + int mp=0; 1.825 + int mb_type = m->mb_type; 1.826 + const int mb_x = m->mb_x; 1.827 + 1.828 +// mrc->m =m; 1.829 + 1.830 + fill_decode_caches_rec(mrc, mrs, s, m, mb_type); 1.831 + if (IS_SKIP(mb_type)){ 1.832 + mb_type=0; 1.833 + 1.834 + if( s->slice_type_nos == FF_B_TYPE ) 1.835 + { 1.836 + mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; 1.837 + ff_h264_pred_direct_motion_rec(mrc, mrs, s, m, &mb_type); 1.838 + } 1.839 + else 1.840 + { 1.841 + int mx, my; 1.842 + 1.843 + mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; //FIXME check required 1.844 + pred_pskip_motion(mrc, mrs, s, m, &mx, &my); 1.845 + fill_rectangle(&mrs->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); 1.846 + fill_rectangle(mrs->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); 1.847 + } 1.848 + 1.849 + write_back_motion_rec(mrc, mrs, s, m, mb_type); 1.850 + m->mb_type = mrs->mb_type[mb_x]= mb_type; 1.851 + return 0; 1.852 + } 1.853 + 1.854 + 1.855 + if (IS_INTRA_PCM(mb_type)){ 1.856 + mrs->mb_type[mb_x] = mb_type; 1.857 + return 0; 1.858 + } 1.859 + else if (IS_INTRA(mb_type)){ 1.860 + int i, pred_mode; 1.861 + 1.862 + if( IS_INTRA4x4( mb_type ) ) { 1.863 + if ( IS_8x8DCT(mb_type) ) { 1.864 + for( i = 0; i < 16; i+=4 ) { 1.865 + int pred = pred_intra_mode(mrc, mrs, i ); 1.866 + int mode = m->intra4x4_pred_mode[i]; 1.867 + 1.868 + mode = mode < 0 ? pred : mode + ( mode >= pred ); 1.869 + fill_rectangle( &mrs->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 ); 1.870 + } 1.871 + } else { 1.872 + for( i = 0; i < 16; i++ ) { 1.873 + int pred = pred_intra_mode(mrc, mrs, i ); 1.874 + int mode = m->intra4x4_pred_mode[i]; 1.875 + mode = mode < 0 ? pred : mode + ( mode >= pred ); 1.876 + mrs->intra4x4_pred_mode_cache[ scan8[i] ] = mode; 1.877 + } 1.878 + } 1.879 + write_back_intra_pred_mode_rec(mrc, mrs, m, mb_x); 1.880 + if( check_intra4x4_pred_mode(mrc, mrs, s, m) < 0 ) return -1; 1.881 + } else { 1.882 + m->intra16x16_pred_mode= check_intra_pred_mode(mrc, mrs, s, m, m->intra16x16_pred_mode ); 1.883 + if( m->intra16x16_pred_mode < 0 ) return -1; 1.884 + } 1.885 + 1.886 + pred_mode = m->chroma_pred_mode; 1.887 + pred_mode= check_intra_pred_mode( mrc, mrs, s, m, pred_mode ); 1.888 + if( pred_mode < 0 ) return -1; 1.889 + m->chroma_pred_mode= pred_mode; 1.890 + 1.891 + } 1.892 + else if (IS_8X8(mb_type)){ 1.893 + int i, j, list; 1.894 + 1.895 + if( s->slice_type_nos == FF_B_TYPE ) { 1.896 + if( IS_DIRECT(m->sub_mb_type[0] | m->sub_mb_type[1] | 1.897 + m->sub_mb_type[2] | m->sub_mb_type[3]) ) { 1.898 + ff_h264_pred_direct_motion_rec(mrc, mrs, s, m, &mb_type); 1.899 + mrs->ref_cache[0][scan8[4]] = 1.900 + mrs->ref_cache[1][scan8[4]] = 1.901 + mrs->ref_cache[0][scan8[12]] = 1.902 + mrs->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE; 1.903 + } 1.904 + } 1.905 + 1.906 + for(list=0; list<s->list_count; list++){ 1.907 + for(i=0; i<4; i++){ 1.908 + if(IS_DIRECT(m->sub_mb_type[i])){ 1.909 + mrs->ref_cache[list][ scan8[4*i] ]=mrs->ref_cache[list][ scan8[4*i]+1 ]; 1.910 + continue; 1.911 + } else { 1.912 + mrs->ref_cache[list][ scan8[4*i] ]=mrs->ref_cache[list][ scan8[4*i]+1 ]= 1.913 + mrs->ref_cache[list][ scan8[4*i]+8 ]=mrs->ref_cache[list][ scan8[4*i]+9 ]= m->ref_index[list][i]; 1.914 + 1.915 + if(IS_DIR(m->sub_mb_type[i], 0, list) ){ 1.916 + const int sub_mb_type= m->sub_mb_type[i]; 1.917 + const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1; 1.918 + 1.919 + int sub_partition_count = IS_SUB_8X8(sub_mb_type) ? 1 : (IS_SUB_4X4(sub_mb_type)? 4 :2); 1.920 + for(j=0; j<sub_partition_count; j++){ 1.921 + int mx, my; 1.922 + const int index= 4*i + block_width*j; 1.923 + int16_t (* mv_cache)[2]= &mrs->mv_cache[list][ scan8[index]]; 1.924 + pred_motion(mrc, mrs, s, index, block_width, list, mrs->ref_cache[list][ scan8[index] ], &mx, &my); 1.925 + 1.926 + ADD_MVD(list) 1.927 + 1.928 + if(IS_SUB_8X8(sub_mb_type)){ 1.929 + mv_cache[ 1 ][0]= 1.930 + mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx; 1.931 + mv_cache[ 1 ][1]= 1.932 + mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my; 1.933 + }else if(IS_SUB_8X4(sub_mb_type)){ 1.934 + mv_cache[ 1 ][0]= mx; 1.935 + mv_cache[ 1 ][1]= my; 1.936 + }else if(IS_SUB_4X8(sub_mb_type)){ 1.937 + mv_cache[ 8 ][0]= mx; 1.938 + mv_cache[ 8 ][1]= my; 1.939 + } 1.940 + mv_cache[ 0 ][0]= mx; 1.941 + mv_cache[ 0 ][1]= my; 1.942 + } 1.943 + }else{ 1.944 + fill_rectangle(mrs->mv_cache [list][ scan8[4*i] ], 2, 2, 8, 0, 4); 1.945 + } 1.946 + } 1.947 + } 1.948 + } 1.949 + } else if( IS_DIRECT(mb_type) ) { 1.950 + mb_type &= ~MB_TYPE_16x16; //FIXME not nice 1.951 + ff_h264_pred_direct_motion_rec(mrc, mrs, s, m, &mb_type); 1.952 + } 1.953 + else { 1.954 + int list, i; 1.955 + if(IS_16X16(mb_type)){ 1.956 + for(list=0; list<s->list_count; list++){ 1.957 + if(IS_DIR(mb_type, 0, list)){ 1.958 + int ref; 1.959 + int mx,my; 1.960 + 1.961 + ref = m->ref_index[list][0]; 1.962 + fill_rectangle(&mrs->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1); 1.963 + pred_motion(mrc, mrs, s, 0, 4, list, mrs->ref_cache[list][ scan8[0] ], &mx, &my); 1.964 + ADD_MVD(list) 1.965 + fill_rectangle(mrs->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4); 1.966 + } 1.967 + } 1.968 + } 1.969 + else if(IS_16X8(mb_type)){ 1.970 + for(list=0; list<s->list_count; list++){ 1.971 + for(i=0; i<2; i++){ 1.972 + if(IS_DIR(mb_type, i, list)){ 1.973 + int ref; 1.974 + int mx,my; 1.975 + ref = m->ref_index[list][i]; 1.976 + fill_rectangle(&mrs->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1); 1.977 + 1.978 + pred_16x8_motion(mrc, mrs, s, 8*i, list, mrs->ref_cache[list][scan8[0] + 16*i], &mx, &my); 1.979 + ADD_MVD(list) 1.980 + 1.981 + fill_rectangle(mrs->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4); 1.982 + }else{ 1.983 + fill_rectangle(&mrs->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1); 1.984 + fill_rectangle(mrs->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); 1.985 + } 1.986 + } 1.987 + } 1.988 + 1.989 + }else{ 1.990 + assert(IS_8X16(mb_type)); 1.991 + 1.992 + for(list=0; list<s->list_count; list++){ 1.993 + for(i=0; i<2; i++){ 1.994 + if(IS_DIR(mb_type, i, list)){ //FIXME optimize 1.995 + int ref; 1.996 + int mx,my; 1.997 + ref = m->ref_index[list][i]; 1.998 + fill_rectangle(&mrs->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1); 1.999 + pred_8x16_motion(mrc, mrs, s, i*4, list, mrs->ref_cache[list][ scan8[0] + 2*i ], &mx, &my); 1.1000 + ADD_MVD(list) 1.1001 + fill_rectangle(mrs->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4); 1.1002 + }else{ 1.1003 + fill_rectangle(&mrs->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1); 1.1004 + fill_rectangle(mrs->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); 1.1005 + } 1.1006 + } 1.1007 + } 1.1008 + } 1.1009 + } 1.1010 + 1.1011 + if (IS_INTER(mb_type)||(IS_DIRECT(mb_type))) 1.1012 + write_back_motion_rec(mrc, mrs, s, m, mb_type); 1.1013 + m->mb_type = mrs->mb_type[mb_x]= mb_type; 1.1014 + 1.1015 + return 0; 1.1016 +}
