diff libavcodec/h264_pred_mode.c @ 2:897f711a7157

rearrange to work with autoconf
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Tue, 25 Sep 2012 15:55:33 +0200
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/libavcodec/h264_pred_mode.c	Tue Sep 25 15:55:33 2012 +0200
     1.3 @@ -0,0 +1,1013 @@
     1.4 +/*
     1.5 + * H.26L/H.264/AVC/JVT/14496-10/... direct mb/block decoding
     1.6 + * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
     1.7 + *
     1.8 + * This file is part of FFmpeg.
     1.9 + *
    1.10 + * FFmpeg is free software; you can redistribute it and/or
    1.11 + * modify it under the terms of the GNU Lesser General Public
    1.12 + * License as published by the Free Software Foundation; either
    1.13 + * version 2.1 of the License, or (at your option) any later version.
    1.14 + *
    1.15 + * FFmpeg is distributed in the hope that it will be useful,
    1.16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.18 + * Lesser General Public License for more details.
    1.19 + *
    1.20 + * You should have received a copy of the GNU Lesser General Public
    1.21 + * License along with FFmpeg; if not, write to the Free Software
    1.22 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    1.23 + */
    1.24 +
    1.25 +/**
    1.26 + * @file
    1.27 + * H.264 / AVC / MPEG4 part10 direct mb/block decoding.
    1.28 + * @author Michael Niedermayer <michaelni@gmx.at>
    1.29 + */
    1.30 +
    1.31 +#include "dsputil.h"
    1.32 +#include "avcodec.h"
    1.33 +#include "h264_data.h"
    1.34 +#include "h264.h"
    1.35 +#include "rectangle.h"
    1.36 +
    1.37 +//#undef NDEBUG
    1.38 +#include <assert.h>
    1.39 +
    1.40 +static const uint8_t left_block_options[4][16]={
    1.41 +    {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8},
    1.42 +    {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8},
    1.43 +    {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8},
    1.44 +    {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}
    1.45 +};
    1.46 +
    1.47 +
    1.48 +// static void check_cache_copy(MBRecContext *mrc, H264Slice *s, H264Mb *m){
    1.49 +//     for (int list=0; list<2; list++){
    1.50 +//         for (int i=0; i<40; i++){
    1.51 +//             assert (m->ref_cache[list][i] == m->ref_cache_copy[list][i]);
    1.52 +//             assert (mrs->mv_cache[list][i][0] == mrs->mv_cache_copy[list][i][0]);
    1.53 +//             assert (mrs->mv_cache[list][i][1] == mrs->mv_cache_copy[list][i][1]);
    1.54 +//         }
    1.55 +//     }
    1.56 +// }
    1.57 +
    1.58 +// static void check_cache_copy2(MBRecContext *mrc, H264Slice *s, H264Mb *m){
    1.59 +//     for (int list=0; list<2; list++){
    1.60 +//         for (int i=0; i<40; i++){
    1.61 +//             assert (m->ref_cache[list][i] == m->ref_cache_copy2[list][i]);
    1.62 +//             assert (mrs->mv_cache[list][i][0] == mrs->mv_cache_copy2[list][i][0]);
    1.63 +//             assert (mrs->mv_cache[list][i][1] == mrs->mv_cache_copy2[list][i][1]);
    1.64 +//         }
    1.65 +//     }
    1.66 +// }
    1.67 +
    1.68 +static void fill_decode_caches_rec(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int mb_type){
    1.69 +    int topleft_type, top_type, topright_type, left_type;
    1.70 +    const uint8_t * left_block= left_block_options[0];
    1.71 +    const int mb_x = m->mb_x;
    1.72 +    int i;
    1.73 +
    1.74 +    mrs->top_type  = mrs->mb_type_top[mb_x  ];
    1.75 +    mrs->left_type = mrs->mb_type    [mb_x-1];
    1.76 +
    1.77 +    topleft_type = mrs->mb_type_top[mb_x-1];
    1.78 +    top_type     = mrs->mb_type_top[mb_x  ];
    1.79 +    topright_type= mrs->mb_type_top[mb_x+1];
    1.80 +    left_type    = mrs->mb_type    [mb_x-1];
    1.81 +
    1.82 +    int type_mask= s->pps.constrained_intra_pred ? 1 : -1;
    1.83 +
    1.84 +    if(!IS_SKIP(mb_type)){
    1.85 +//         memset(mrc->non_zero_count_cache, 0, sizeof(mrc->non_zero_count_cache));
    1.86 +        AV_COPY32(&mrs->non_zero_count_cache[4+8*1], &m->non_zero_count[ 0]);
    1.87 +        AV_COPY32(&mrs->non_zero_count_cache[4+8*2], &m->non_zero_count[ 4]);
    1.88 +        AV_COPY32(&mrs->non_zero_count_cache[4+8*3], &m->non_zero_count[ 8]);
    1.89 +        AV_COPY32(&mrs->non_zero_count_cache[4+8*4], &m->non_zero_count[12]);
    1.90 +
    1.91 +        for (int i=0; i<2; i++) {
    1.92 +            mrs->non_zero_count_cache[8*1 + 8*i + 1] = m->non_zero_count[16 + i*2   ];
    1.93 +            mrs->non_zero_count_cache[8*1 + 8*i + 2] = m->non_zero_count[16 + i*2 +1];
    1.94 +            mrs->non_zero_count_cache[8*4 + 8*i + 1] = m->non_zero_count[20 + i*2   ];
    1.95 +            mrs->non_zero_count_cache[8*4 + 8*i + 2] = m->non_zero_count[20 + i*2 +1];
    1.96 +        }
    1.97 +
    1.98 +        if(IS_INTRA(mb_type)){
    1.99 +//             memset(mrc->intra4x4_pred_mode_cache, 0, sizeof(mrc->intra4x4_pred_mode_cache));
   1.100 +
   1.101 +            mrs->topleft_samples_available=
   1.102 +            mrs->top_samples_available=
   1.103 +            mrs->left_samples_available= 0xFFFF;
   1.104 +            mrs->topright_samples_available= 0xEEEA;
   1.105 +
   1.106 +            if(!(top_type & type_mask)){
   1.107 +                mrs->topleft_samples_available= 0xB3FF;
   1.108 +                mrs->top_samples_available= 0x33FF;
   1.109 +                mrs->topright_samples_available= 0x26EA;
   1.110 +            }
   1.111 +
   1.112 +            if(!(left_type & type_mask)){
   1.113 +                mrs->topleft_samples_available&= 0xDF5F;
   1.114 +                mrs->left_samples_available&= 0x5F5F;
   1.115 +            }
   1.116 +
   1.117 +            if(!(topleft_type & type_mask))
   1.118 +                mrs->topleft_samples_available&= 0x7FFF;
   1.119 +
   1.120 +            if(!(topright_type & type_mask))
   1.121 +                mrs->topright_samples_available&= 0xFBFF;
   1.122 +
   1.123 +            if(IS_INTRA4x4(mb_type)){
   1.124 +                if(IS_INTRA4x4(top_type)){
   1.125 +                    AV_COPY32(mrs->intra4x4_pred_mode_cache+4+8*0, &mrs->intra4x4_pred_mode_top[4*mb_x]);
   1.126 +                }else{
   1.127 +                    mrs->intra4x4_pred_mode_cache[4+8*0]=
   1.128 +                    mrs->intra4x4_pred_mode_cache[5+8*0]=
   1.129 +                    mrs->intra4x4_pred_mode_cache[6+8*0]=
   1.130 +                    mrs->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask);
   1.131 +                }
   1.132 +
   1.133 +                if(IS_INTRA4x4(left_type)){
   1.134 +#if OMPSS
   1.135 +                    mrs->intra4x4_pred_mode_cache[3+8*1]= m->intra4x4_pred_mode_left[0];
   1.136 +                    mrs->intra4x4_pred_mode_cache[3+8*2]= m->intra4x4_pred_mode_left[1];
   1.137 +                    mrs->intra4x4_pred_mode_cache[3+8*3]= m->intra4x4_pred_mode_left[2];
   1.138 +                    mrs->intra4x4_pred_mode_cache[3+8*4]= m->intra4x4_pred_mode_left[3];
   1.139 +#else
   1.140 +                    mrs->intra4x4_pred_mode_cache[3+8*1]= mrs->intra4x4_pred_mode_left[0];
   1.141 +                    mrs->intra4x4_pred_mode_cache[3+8*2]= mrs->intra4x4_pred_mode_left[1];
   1.142 +                    mrs->intra4x4_pred_mode_cache[3+8*3]= mrs->intra4x4_pred_mode_left[2];
   1.143 +                    mrs->intra4x4_pred_mode_cache[3+8*4]= mrs->intra4x4_pred_mode_left[3];
   1.144 +#endif
   1.145 +                }else{
   1.146 +                    mrs->intra4x4_pred_mode_cache[3+8*1]=
   1.147 +                    mrs->intra4x4_pred_mode_cache[3+8*2]=
   1.148 +                    mrs->intra4x4_pred_mode_cache[3+8*3]=
   1.149 +                    mrs->intra4x4_pred_mode_cache[3+8*4]= 2 - 3*!(left_type & type_mask);
   1.150 +                }
   1.151 +            }
   1.152 +        }
   1.153 +    }
   1.154 +
   1.155 +    if(IS_INTER(mb_type) ||(IS_DIRECT(mb_type) && s->direct_spatial_mv_pred)){
   1.156 +        int list;
   1.157 +
   1.158 +//         memset(mrs->mv_cache, 0, sizeof(mrs->mv_cache));
   1.159 +//         memset(mrs->ref_cache, 0, sizeof(mrs->ref_cache));
   1.160 +
   1.161 +        mrs->ref_cache[0][scan8[5 ]+1] = mrs->ref_cache[0][scan8[7 ]+1] = mrs->ref_cache[0][scan8[13]+1] =
   1.162 +        mrs->ref_cache[1][scan8[5 ]+1] = mrs->ref_cache[1][scan8[7 ]+1] = mrs->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
   1.163 +
   1.164 +        for(list=0; list<s->list_count; list++){
   1.165 +            if(!USES_LIST(mb_type, list)){
   1.166 +                continue;
   1.167 +            }
   1.168 +            assert(!(IS_DIRECT(mb_type) && !s->direct_spatial_mv_pred));
   1.169 +
   1.170 +            if(USES_LIST(top_type, list)){
   1.171 +                const int b_xy= 4*mb_x + 3*mrc->b_stride;
   1.172 +                AV_COPY128(mrs->mv_cache[list][scan8[0] + 0 - 1*8], mrs->motion_val_top[list][b_xy + 0]);
   1.173 +                    mrs->ref_cache[list][scan8[0] + 0 - 1*8]=
   1.174 +                    mrs->ref_cache[list][scan8[0] + 1 - 1*8]= mrs->ref_index_top[list][4*mb_x + 2];
   1.175 +                    mrs->ref_cache[list][scan8[0] + 2 - 1*8]=
   1.176 +                    mrs->ref_cache[list][scan8[0] + 3 - 1*8]= mrs->ref_index_top[list][4*mb_x + 3];
   1.177 +            }else{
   1.178 +                AV_ZERO128(mrs->mv_cache[list][scan8[0] + 0 - 1*8]);
   1.179 +                AV_WN32A(&mrs->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101);
   1.180 +            }
   1.181 +
   1.182 +            if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){
   1.183 +                for(i=0; i<2; i++){
   1.184 +                    int cache_idx = scan8[0] - 1 + i*2*8;
   1.185 +                    if(USES_LIST(left_type, list)){
   1.186 +                        const int b_xy= 4*(mb_x-1) + 3;
   1.187 +                        const int b8_x= 4*(mb_x-1) + 1;
   1.188 +                        AV_COPY32(mrs->mv_cache[list][cache_idx  ], mrs->motion_val[list][b_xy + mrc->b_stride*left_block[0+i*2]]);
   1.189 +                        AV_COPY32(mrs->mv_cache[list][cache_idx+8], mrs->motion_val[list][b_xy + mrc->b_stride*left_block[1+i*2]]);
   1.190 +                        mrs->ref_cache[list][cache_idx  ]= mrs->ref_index[list][b8_x + (left_block[0+i*2]&~1)];
   1.191 +                        mrs->ref_cache[list][cache_idx+8]= mrs->ref_index[list][b8_x + (left_block[1+i*2]&~1)];
   1.192 +                    }else{
   1.193 +                        AV_ZERO32(mrs->mv_cache [list][cache_idx  ]);
   1.194 +                        AV_ZERO32(mrs->mv_cache [list][cache_idx+8]);
   1.195 +                        mrs->ref_cache[list][cache_idx  ]=
   1.196 +                        mrs->ref_cache[list][cache_idx+8]= (left_type ? LIST_NOT_USED : PART_NOT_AVAILABLE);
   1.197 +                    }
   1.198 +                }
   1.199 +            }else{
   1.200 +                if(USES_LIST(left_type, list)){
   1.201 +                    const int b_x = 4*(mb_x-1) + 3;
   1.202 +                    const int b8_x= 4*(mb_x-1) + 1;
   1.203 +                    AV_COPY32(mrs->mv_cache[list][scan8[0] - 1], mrs->motion_val[list][b_x + mrc->b_stride*left_block[0]]);
   1.204 +                    mrs->ref_cache[list][scan8[0] - 1]= mrs->ref_index[list][b8_x + (left_block[0]&~1)];
   1.205 +                }else{
   1.206 +                    AV_ZERO32(mrs->mv_cache [list][scan8[0] - 1]);
   1.207 +                    mrs->ref_cache[list][scan8[0] - 1]= left_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
   1.208 +                }
   1.209 +            }
   1.210 +
   1.211 +            if(USES_LIST(topright_type, list)){
   1.212 +                const int b_xy= 4*(mb_x+1) + 3*mrc->b_stride;
   1.213 +                AV_COPY32(mrs->mv_cache[list][scan8[0] + 4 - 1*8], mrs->motion_val_top[list][b_xy]);
   1.214 +                mrs->ref_cache[list][scan8[0] + 4 - 1*8]= mrs->ref_index_top[list][4*(mb_x+1) + 2];
   1.215 +            }else{
   1.216 +                AV_ZERO32(mrs->mv_cache [list][scan8[0] + 4 - 1*8]);
   1.217 +                mrs->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
   1.218 +            }
   1.219 +            if(mrs->ref_cache[list][scan8[0] + 4 - 1*8] < 0){
   1.220 +                int topleft_partition= -1;
   1.221 +                if(USES_LIST(topleft_type, list)){
   1.222 +                    const int b_xy = 4*(mb_x-1) + 3 + mrc->b_stride + (topleft_partition & 2*mrc->b_stride);
   1.223 +                    const int b8_x= 4*(mb_x-1) + 1 + (topleft_partition & 2);
   1.224 +                    AV_COPY32(mrs->mv_cache[list][scan8[0] - 1 - 1*8], mrs->motion_val_top[list][b_xy]);
   1.225 +                    mrs->ref_cache[list][scan8[0] - 1 - 1*8]= mrs->ref_index_top[list][b8_x];
   1.226 +                }else{
   1.227 +                    AV_ZERO32(mrs->mv_cache[list][scan8[0] - 1 - 1*8]);
   1.228 +                    mrs->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
   1.229 +                }
   1.230 +            }
   1.231 +
   1.232 +            if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)))
   1.233 +                continue;
   1.234 +
   1.235 +            if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) {
   1.236 +                mrs->ref_cache[list][scan8[4 ]] =
   1.237 +                mrs->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
   1.238 +                AV_ZERO32(mrs->mv_cache [list][scan8[4 ]]);
   1.239 +                AV_ZERO32(mrs->mv_cache [list][scan8[12]]);
   1.240 +            }
   1.241 +        }
   1.242 +    }
   1.243 +}
   1.244 +
   1.245 +static inline void write_back_motion_rec(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int mb_type){
   1.246 +    const int b_stride = mrc->b_stride;
   1.247 +    const int b_x = 4*m->mb_x; //try mb2b(8)_xy
   1.248 +    const int b8_x= 4*m->mb_x;
   1.249 +    int list;
   1.250 +
   1.251 +    if(!USES_LIST(mb_type, 0))
   1.252 +        fill_rectangle(&mrs->ref_index[0][b8_x], 2, 2, 2, (uint8_t)LIST_NOT_USED, 1);
   1.253 +
   1.254 +    for(list=0; list<s->list_count; list++){
   1.255 +        int y;
   1.256 +        int16_t (*mv_dst)[2];
   1.257 +        int16_t (*mv_src)[2];
   1.258 +
   1.259 +        if(!USES_LIST(mb_type, list))
   1.260 +            continue;
   1.261 +
   1.262 +        mv_dst   = &mrs->motion_val[list][b_x];
   1.263 +        mv_src   = &mrs->mv_cache[list][scan8[0]];
   1.264 +        for(y=0; y<4; y++){
   1.265 +            AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y);
   1.266 +        }
   1.267 +
   1.268 +        {
   1.269 +            int8_t *ref_index = &mrs->ref_index[list][b8_x];
   1.270 +            ref_index[0+0*2]= mrs->ref_cache[list][scan8[0]];
   1.271 +            ref_index[1+0*2]= mrs->ref_cache[list][scan8[4]];
   1.272 +            ref_index[0+1*2]= mrs->ref_cache[list][scan8[8]];
   1.273 +            ref_index[1+1*2]= mrs->ref_cache[list][scan8[12]];
   1.274 +        }
   1.275 +    }
   1.276 +}
   1.277 +
   1.278 +
   1.279 +/**
   1.280 +* checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
   1.281 +*/
   1.282 +static int check_intra4x4_pred_mode(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m){
   1.283 +    static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
   1.284 +    static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
   1.285 +    int i;
   1.286 +
   1.287 +    if(!(mrs->top_samples_available&0x8000)){
   1.288 +        for(i=0; i<4; i++){
   1.289 +            int status= top[ mrs->intra4x4_pred_mode_cache[scan8[0] + i] ];
   1.290 +            if(status<0){
   1.291 +                av_log(AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, m->mb_x, m->mb_y);
   1.292 +                return -1;
   1.293 +            } else if(status){
   1.294 +                mrs->intra4x4_pred_mode_cache[scan8[0] + i]= status;
   1.295 +            }
   1.296 +        }
   1.297 +    }
   1.298 +
   1.299 +    if((mrs->left_samples_available&0x8888)!=0x8888){
   1.300 +        static const int mask[4]={0x8000,0x2000,0x80,0x20};
   1.301 +        for(i=0; i<4; i++){
   1.302 +            if(!(mrs->left_samples_available&mask[i])){
   1.303 +                int status= left[ mrs->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
   1.304 +                if(status<0){
   1.305 +                    av_log(AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, m->mb_x, m->mb_y);
   1.306 +                    return -1;
   1.307 +                } else if(status){
   1.308 +                    mrs->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
   1.309 +                }
   1.310 +            }
   1.311 +        }
   1.312 +    }
   1.313 +    return 0;
   1.314 +}
   1.315 +
   1.316 +/**
   1.317 +* checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
   1.318 +*/
   1.319 +static int check_intra_pred_mode(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int mode){
   1.320 +    static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
   1.321 +    static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
   1.322 +
   1.323 +    if(mode > 6) {
   1.324 +        av_log(AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", m->mb_x, m->mb_y);
   1.325 +        return -1;
   1.326 +    }
   1.327 +
   1.328 +    if(!(mrs->top_samples_available&0x8000)){
   1.329 +        mode= top[ mode ];
   1.330 +        if(mode<0){
   1.331 +            av_log(AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", m->mb_x, m->mb_y);
   1.332 +            return -1;
   1.333 +        }
   1.334 +    }
   1.335 +
   1.336 +    if((mrs->left_samples_available&0x8080) != 0x8080){
   1.337 +        mode= left[ mode ];
   1.338 +        if(mrs->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
   1.339 +            mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(mrs->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
   1.340 +        }
   1.341 +        if(mode<0){
   1.342 +            av_log(AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", m->mb_x, m->mb_y);
   1.343 +            return -1;
   1.344 +        }
   1.345 +    }
   1.346 +    return mode;
   1.347 +}
   1.348 +
   1.349 +/**
   1.350 + * gets the predicted intra4x4 prediction mode.
   1.351 + */
   1.352 +static inline int pred_intra_mode(MBRecContext *mrc, MBRecState *mrs, int n){
   1.353 +    const int index8= scan8[n];
   1.354 +    const int left= mrs->intra4x4_pred_mode_cache[index8 - 1];
   1.355 +    const int top = mrs->intra4x4_pred_mode_cache[index8 - 8];
   1.356 +    const int min= FFMIN(left, top);
   1.357 +
   1.358 +    if(min<0) return DC_PRED;
   1.359 +    else      return min;
   1.360 +}
   1.361 +
   1.362 +static void write_back_intra_pred_mode_rec(MBRecContext *mrc, MBRecState *mrs, H264Mb *m, int mb_x){
   1.363 +    int8_t *mode= &mrs->intra4x4_pred_mode[4*mb_x];
   1.364 +
   1.365 +    AV_COPY32(mode, mrs->intra4x4_pred_mode_cache + 4 + 8*4);
   1.366 +#if OMPSS
   1.367 +    if (m->mb_x < mrc->mb_width-1){
   1.368 +        H264Mb *mr= m+1;
   1.369 +        mode = mr->intra4x4_pred_mode_left;
   1.370 +        mode[0]= mrs->intra4x4_pred_mode_cache[7+8*1];
   1.371 +        mode[1]= mrs->intra4x4_pred_mode_cache[7+8*2];
   1.372 +        mode[2]= mrs->intra4x4_pred_mode_cache[7+8*3];
   1.373 +        mode[3]= mrs->intra4x4_pred_mode_cache[7+8*4];
   1.374 +    }
   1.375 +#else
   1.376 +    mode = mrs->intra4x4_pred_mode_left;
   1.377 +    mode[0]= mrs->intra4x4_pred_mode_cache[7+8*1];
   1.378 +    mode[1]= mrs->intra4x4_pred_mode_cache[7+8*2];
   1.379 +    mode[2]= mrs->intra4x4_pred_mode_cache[7+8*3];
   1.380 +    mode[3]= mrs->intra4x4_pred_mode_cache[7+8*4];
   1.381 +#endif
   1.382 +}
   1.383 +
   1.384 +static void pred_spatial_direct_motion_rec(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int *mb_type){
   1.385 +    int b4_stride = mrc->b_stride;
   1.386 +    const int mb_x = m->mb_x;
   1.387 +    int mb_type_col[2];
   1.388 +    const int16_t (*l1mv0)[2], (*l1mv1)[2];
   1.389 +    const int8_t *l1ref0, *l1ref1;
   1.390 +    const int is_b8x8 = IS_8X8(*mb_type);
   1.391 +    unsigned int sub_mb_type= MB_TYPE_L0L1;
   1.392 +    int i8, i4;
   1.393 +    int ref[2];
   1.394 +    int mv[2];
   1.395 +    int list;
   1.396 +
   1.397 +    //assert(h->ref_list[1][0].reference&3);
   1.398 +
   1.399 +#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
   1.400 +
   1.401 +    /* ref = min(neighbors) */
   1.402 +    for(list=0; list<2; list++){
   1.403 +        int left_ref = mrs->ref_cache[list][scan8[0] - 1];
   1.404 +        int top_ref  = mrs->ref_cache[list][scan8[0] - 8];
   1.405 +        int refc = mrs->ref_cache[list][scan8[0] - 8 + 4];
   1.406 +        const int16_t *C= mrs->mv_cache[list][ scan8[0] - 8 + 4];
   1.407 +        if(refc == PART_NOT_AVAILABLE){
   1.408 +            refc = mrs->ref_cache[list][scan8[0] - 8 - 1];
   1.409 +            C    = mrs->mv_cache[list][scan8[0] - 8 - 1];
   1.410 +        }
   1.411 +        ref[list] = FFMIN3((unsigned)left_ref, (unsigned)top_ref, (unsigned)refc);
   1.412 +        if(ref[list] >= 0){
   1.413 +            //this is just pred_motion() but with the cases removed that cannot happen for direct blocks
   1.414 +            const int16_t * const A= mrs->mv_cache[list][ scan8[0] - 1 ];
   1.415 +            const int16_t * const B= mrs->mv_cache[list][ scan8[0] - 8 ];
   1.416 +
   1.417 +            int match_count= (left_ref==ref[list]) + (top_ref==ref[list]) + (refc==ref[list]);
   1.418 +            if(match_count > 1){ //most common
   1.419 +                mv[list]= pack16to32(mid_pred(A[0], B[0], C[0]),
   1.420 +                                     mid_pred(A[1], B[1], C[1]) );
   1.421 +            }else {
   1.422 +                assert(match_count==1);
   1.423 +                if(left_ref==ref[list]){
   1.424 +                    mv[list]= AV_RN32A(A);
   1.425 +                }else if(top_ref==ref[list]){
   1.426 +                    mv[list]= AV_RN32A(B);
   1.427 +                }else{
   1.428 +                    mv[list]= AV_RN32A(C);
   1.429 +                }
   1.430 +            }
   1.431 +        }else{
   1.432 +            int mask= ~(MB_TYPE_L0 << (2*list));
   1.433 +            mv[list] = 0;
   1.434 +            ref[list] = -1;
   1.435 +            if(!is_b8x8)
   1.436 +                *mb_type &= mask;
   1.437 +            sub_mb_type &= mask;
   1.438 +        }
   1.439 +    }
   1.440 +
   1.441 +    if(ref[0] < 0 && ref[1] < 0){
   1.442 +        ref[0] = ref[1] = 0;
   1.443 +        if(!is_b8x8)
   1.444 +            *mb_type |= MB_TYPE_L0L1;
   1.445 +        sub_mb_type |= MB_TYPE_L0L1;
   1.446 +    }
   1.447 +
   1.448 +    if(!(is_b8x8|mv[0]|mv[1])){
   1.449 +        fill_rectangle(&mrs->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
   1.450 +        fill_rectangle(&mrs->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
   1.451 +        fill_rectangle(&mrs->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
   1.452 +        fill_rectangle(&mrs->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
   1.453 +        *mb_type= (*mb_type & ~(MB_TYPE_8x8|MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_P1L0|MB_TYPE_P1L1))|MB_TYPE_16x16|MB_TYPE_DIRECT2;
   1.454 +        return;
   1.455 +    }
   1.456 +
   1.457 +    mb_type_col[0] =
   1.458 +    mb_type_col[1] = mrs->list1_mb_type[mb_x];
   1.459 +
   1.460 +    sub_mb_type |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
   1.461 +    if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
   1.462 +        *mb_type   |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_16x16 */
   1.463 +    }else if(!is_b8x8 && (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16))){
   1.464 +        *mb_type   |= MB_TYPE_DIRECT2 | (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16));
   1.465 +    }else{
   1.466 +        if(!s->direct_8x8_inference_flag){
   1.467 +            /* FIXME save sub mb types from previous frames (or derive from MVs)
   1.468 +            * so we know exactly what block size to use */
   1.469 +            sub_mb_type += (MB_TYPE_8x8-MB_TYPE_16x16); /* B_SUB_4x4 */
   1.470 +        }
   1.471 +        *mb_type   |= MB_TYPE_8x8;
   1.472 +    }
   1.473 +
   1.474 +    l1mv0  = (void *) &mrs->list1_motion_val[0][4*mb_x];
   1.475 +    l1mv1  = (void *) &mrs->list1_motion_val[1][4*mb_x];
   1.476 +    l1ref0 = &mrs->list1_ref_index [0][4*mb_x];
   1.477 +    l1ref1 = &mrs->list1_ref_index [1][4*mb_x];
   1.478 +//     if(!b8_stride){
   1.479 +//         if(m->mb_y&1){
   1.480 +//             l1ref0 += 2;
   1.481 +//             l1ref1 += 2;
   1.482 +//             l1mv0  +=  2*b4_stride;
   1.483 +//             l1mv1  +=  2*b4_stride;
   1.484 +//         }
   1.485 +//     }
   1.486 +
   1.487 +    if(IS_16X16(*mb_type)){
   1.488 +        int a,b;
   1.489 +
   1.490 +        fill_rectangle(&mrs->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
   1.491 +        fill_rectangle(&mrs->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
   1.492 +        if(!IS_INTRA(mb_type_col[0]) && (   (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
   1.493 +            || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
   1.494 +            ))){
   1.495 +            a=b=0;
   1.496 +            if(ref[0] > 0)
   1.497 +                a= mv[0];
   1.498 +            if(ref[1] > 0)
   1.499 +                b= mv[1];
   1.500 +        }else{
   1.501 +            a= mv[0];
   1.502 +            b= mv[1];
   1.503 +        }
   1.504 +        fill_rectangle(&mrs->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
   1.505 +        fill_rectangle(&mrs->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
   1.506 +    }else{
   1.507 +        int n=0;
   1.508 +        for(i8=0; i8<4; i8++){
   1.509 +            const int x8 = i8&1;
   1.510 +            const int y8 = i8>>1;
   1.511 +
   1.512 +            if(is_b8x8 && !IS_DIRECT(m->sub_mb_type[i8]))
   1.513 +                continue;
   1.514 +            m->sub_mb_type[i8] = sub_mb_type;
   1.515 +
   1.516 +            fill_rectangle(&mrs->mv_cache[0][scan8[i8*4]], 2, 2, 8, mv[0], 4);
   1.517 +            fill_rectangle(&mrs->mv_cache[1][scan8[i8*4]], 2, 2, 8, mv[1], 4);
   1.518 +            fill_rectangle(&mrs->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
   1.519 +            fill_rectangle(&mrs->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
   1.520 +
   1.521 +            /* col_zero_flag */
   1.522 +            if(!IS_INTRA(mb_type_col[0]) && (l1ref0[i8] == 0 || (l1ref0[i8] < 0 && l1ref1[i8] == 0 ))
   1.523 +                ){
   1.524 +                const int16_t (*l1mv)[2]= l1ref0[i8] == 0 ? l1mv0 : l1mv1;
   1.525 +                if(IS_SUB_8X8(sub_mb_type)){
   1.526 +                    const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
   1.527 +                    if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
   1.528 +                        if(ref[0] == 0)
   1.529 +                            fill_rectangle(&mrs->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
   1.530 +                        if(ref[1] == 0)
   1.531 +                            fill_rectangle(&mrs->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
   1.532 +                        n+=4;
   1.533 +                    }
   1.534 +                }else{
   1.535 +                    int k=0;
   1.536 +                    for(i4=0; i4<4; i4++){
   1.537 +                        const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
   1.538 +                        if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
   1.539 +                            if(ref[0] == 0)
   1.540 +                                AV_ZERO32(mrs->mv_cache[0][scan8[i8*4+i4]]);
   1.541 +                            if(ref[1] == 0)
   1.542 +                                AV_ZERO32(mrs->mv_cache[1][scan8[i8*4+i4]]);
   1.543 +                            k++;
   1.544 +                        }
   1.545 +                    }
   1.546 +                    if(!(k&3))
   1.547 +                        m->sub_mb_type[i8]+= MB_TYPE_16x16 - MB_TYPE_8x8;
   1.548 +                    n+=k;
   1.549 +                }
   1.550 +            }
   1.551 +        }
   1.552 +        if(!is_b8x8 && !(n&15)){
   1.553 +            *mb_type= (*mb_type & ~(MB_TYPE_8x8|MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_P1L0|MB_TYPE_P1L1))|MB_TYPE_16x16|MB_TYPE_DIRECT2;
   1.554 +        }
   1.555 +    }
   1.556 +}
   1.557 +
   1.558 +static void pred_temp_direct_motion_rec(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int *mb_type){
   1.559 +    const int mb_x = m->mb_x;
   1.560 +    int b4_stride = mrc->b_stride;
   1.561 +    int mb_type_col[2];
   1.562 +    const int16_t (*l1mv0)[2], (*l1mv1)[2];
   1.563 +    const int8_t *l1ref0, *l1ref1;
   1.564 +    const int is_b8x8 = IS_8X8(*mb_type);
   1.565 +    unsigned int sub_mb_type;
   1.566 +    int i8, i4;
   1.567 +    const int *map_col_to_list0[2] = {s->map_col_to_list0[0], s->map_col_to_list0[1]};
   1.568 +    const int *dist_scale_factor = s->dist_scale_factor;
   1.569 +
   1.570 +    mb_type_col[0] =
   1.571 +    mb_type_col[1] = mrs->list1_mb_type[mb_x];
   1.572 +
   1.573 +    sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
   1.574 +    if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
   1.575 +        *mb_type   |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
   1.576 +    }else if(!is_b8x8 && (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16))){
   1.577 +        *mb_type   |= MB_TYPE_L0L1|MB_TYPE_DIRECT2 | (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16));
   1.578 +    }else{
   1.579 +        if(!s->direct_8x8_inference_flag){
   1.580 +            /* FIXME save sub mb types from previous frames (or derive from MVs)
   1.581 +            * so we know exactly what block size to use */
   1.582 +            sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
   1.583 +        }
   1.584 +        *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
   1.585 +    }
   1.586 +
   1.587 +    l1mv0  = (void *) &mrs->list1_motion_val[0][4*mb_x];
   1.588 +    l1mv1  = (void *) &mrs->list1_motion_val[1][4*mb_x];
   1.589 +    l1ref0 = &mrs->list1_ref_index [0][4*mb_x];
   1.590 +    l1ref1 = &mrs->list1_ref_index [1][4*mb_x];
   1.591 +
   1.592 +    /* one-to-one mv scaling */
   1.593 +    if(IS_16X16(*mb_type)){
   1.594 +        int ref, mv0, mv1;
   1.595 +
   1.596 +        fill_rectangle(&mrs->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
   1.597 +        if(IS_INTRA(mb_type_col[0])){
   1.598 +            ref=mv0=mv1=0;
   1.599 +        }else{
   1.600 +            const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
   1.601 +            : map_col_to_list0[1][l1ref1[0]];
   1.602 +            const int scale = dist_scale_factor[ref0];
   1.603 +            const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
   1.604 +            int mv_l0[2];
   1.605 +            mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
   1.606 +            mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
   1.607 +            ref= ref0;
   1.608 +            mv0= pack16to32(mv_l0[0],mv_l0[1]);
   1.609 +            mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
   1.610 +        }
   1.611 +        fill_rectangle(&mrs->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
   1.612 +        fill_rectangle(&mrs->mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
   1.613 +        fill_rectangle(&mrs->mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
   1.614 +    }else{
   1.615 +        for(i8=0; i8<4; i8++){
   1.616 +            const int x8 = i8&1;
   1.617 +            const int y8 = i8>>1;
   1.618 +            int ref0, scale;
   1.619 +            const int16_t (*l1mv)[2]= l1mv0;
   1.620 +
   1.621 +            if(is_b8x8 && !IS_DIRECT(m->sub_mb_type[i8]))
   1.622 +                continue;
   1.623 +            m->sub_mb_type[i8] = sub_mb_type;
   1.624 +            fill_rectangle(&mrs->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
   1.625 +            if(IS_INTRA(mb_type_col[0])){
   1.626 +                fill_rectangle(&mrs->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
   1.627 +                fill_rectangle(&mrs->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
   1.628 +                fill_rectangle(&mrs->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
   1.629 +                continue;
   1.630 +            }
   1.631 +
   1.632 +            ref0 = l1ref0[i8];
   1.633 +            if(ref0 >= 0)
   1.634 +                ref0 = map_col_to_list0[0][ref0 ];
   1.635 +            else{
   1.636 +                ref0 = map_col_to_list0[1][l1ref1[i8]];
   1.637 +                l1mv= l1mv1;
   1.638 +            }
   1.639 +            scale = dist_scale_factor[ref0];
   1.640 +
   1.641 +            fill_rectangle(&mrs->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
   1.642 +            if(IS_SUB_8X8(sub_mb_type)){
   1.643 +                const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
   1.644 +                int mx = (scale * mv_col[0] + 128) >> 8;
   1.645 +                int my = (scale * mv_col[1] + 128) >> 8;
   1.646 +                fill_rectangle(&mrs->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
   1.647 +                fill_rectangle(&mrs->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
   1.648 +            }else
   1.649 +            for(i4=0; i4<4; i4++){
   1.650 +                const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
   1.651 +                int16_t *mv_l0 = mrs->mv_cache[0][scan8[i8*4+i4]];
   1.652 +                mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
   1.653 +                mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
   1.654 +                AV_WN32A(mrs->mv_cache[1][scan8[i8*4+i4]],
   1.655 +                    pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]));
   1.656 +            }
   1.657 +        }
   1.658 +    }
   1.659 +}
   1.660 +
   1.661 +void ff_h264_pred_direct_motion_rec(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int *mb_type){
   1.662 +    if(s->direct_spatial_mv_pred){
   1.663 +        pred_spatial_direct_motion_rec(mrc, mrs, s, m, mb_type);
   1.664 +    }else{
   1.665 +        pred_temp_direct_motion_rec(mrc, mrs, s, m, mb_type);
   1.666 +    }
   1.667 +}
   1.668 +
   1.669 +static inline int fetch_diagonal_mv(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, const int16_t **C, int i, int list, int part_width){
   1.670 +    const int topright_ref= mrs->ref_cache[list][ i - 8 + part_width ];
   1.671 +
   1.672 +    if(topright_ref != PART_NOT_AVAILABLE){
   1.673 +        *C= mrs->mv_cache[list][ i - 8 + part_width ];
   1.674 +        return topright_ref;
   1.675 +    }else{
   1.676 +        *C= mrs->mv_cache[list][ i - 8 - 1 ];
   1.677 +        return mrs->ref_cache[list][ i - 8 - 1 ];
   1.678 +    }
   1.679 +}
   1.680 +
   1.681 +/**
   1.682 + * gets the predicted MV.
   1.683 + * @param n the block index
   1.684 + * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
   1.685 + * @param mx the x component of the predicted motion vector
   1.686 + * @param my the y component of the predicted motion vector
   1.687 + */
   1.688 +static inline void pred_motion(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, int n, int part_width, int list, int ref, int * const mx, int * const my){
   1.689 +    const int index8= scan8[n];
   1.690 +    const int top_ref=      mrs->ref_cache[list][ index8 - 8 ];
   1.691 +    const int left_ref=     mrs->ref_cache[list][ index8 - 1 ];
   1.692 +    const int16_t * const A= mrs->mv_cache[list][ index8 - 1 ];
   1.693 +    const int16_t * const B= mrs->mv_cache[list][ index8 - 8 ];
   1.694 +    const int16_t * C;
   1.695 +    int diagonal_ref, match_count;
   1.696 +
   1.697 +    assert(part_width==1 || part_width==2 || part_width==4);
   1.698 +
   1.699 +/* mv_cache
   1.700 +  B . . A T T T T
   1.701 +  U . . L . . , .
   1.702 +  U . . L . . . .
   1.703 +  U . . L . . , .
   1.704 +  . . . L . . . .
   1.705 +*/
   1.706 +
   1.707 +    diagonal_ref= fetch_diagonal_mv(mrc, mrs, s, &C, index8, list, part_width);
   1.708 +    match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
   1.709 +
   1.710 +    if(match_count > 1){ //most common
   1.711 +        *mx= mid_pred(A[0], B[0], C[0]);
   1.712 +        *my= mid_pred(A[1], B[1], C[1]);
   1.713 +    }else if(match_count==1){
   1.714 +        if(left_ref==ref){
   1.715 +            *mx= A[0];
   1.716 +            *my= A[1];
   1.717 +        }else if(top_ref==ref){
   1.718 +            *mx= B[0];
   1.719 +            *my= B[1];
   1.720 +        }else{
   1.721 +            *mx= C[0];
   1.722 +            *my= C[1];
   1.723 +        }
   1.724 +    }else{
   1.725 +        if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
   1.726 +            *mx= A[0];
   1.727 +            *my= A[1];
   1.728 +        }else{
   1.729 +            *mx= mid_pred(A[0], B[0], C[0]);
   1.730 +            *my= mid_pred(A[1], B[1], C[1]);
   1.731 +        }
   1.732 +    }
   1.733 +
   1.734 +}
   1.735 +
   1.736 +/**
   1.737 + * gets the directionally predicted 16x8 MV.
   1.738 + * @param n the block index
   1.739 + * @param mx the x component of the predicted motion vector
   1.740 + * @param my the y component of the predicted motion vector
   1.741 + */
   1.742 +static inline void pred_16x8_motion(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, int n, int list, int ref, int * const mx, int * const my){
   1.743 +    if(n==0){
   1.744 +        const int top_ref=      mrs->ref_cache[list][ scan8[0] - 8 ];
   1.745 +        const int16_t * const B= mrs->mv_cache[list][ scan8[0] - 8 ];
   1.746 +
   1.747 +        if(top_ref == ref){
   1.748 +            *mx= B[0];
   1.749 +            *my= B[1];
   1.750 +            return;
   1.751 +        }
   1.752 +    }else{
   1.753 +        const int left_ref=     mrs->ref_cache[list][ scan8[8] - 1 ];
   1.754 +        const int16_t * const A= mrs->mv_cache[list][ scan8[8] - 1 ];
   1.755 +
   1.756 +        if(left_ref == ref){
   1.757 +            *mx= A[0];
   1.758 +            *my= A[1];
   1.759 +            return;
   1.760 +        }
   1.761 +    }
   1.762 +
   1.763 +    //RARE
   1.764 +    pred_motion(mrc, mrs, s, n, 4, list, ref, mx, my);
   1.765 +}
   1.766 +
   1.767 +/**
   1.768 + * gets the directionally predicted 8x16 MV.
   1.769 + * @param n the block index
   1.770 + * @param mx the x component of the predicted motion vector
   1.771 + * @param my the y component of the predicted motion vector
   1.772 + */
   1.773 +static inline void pred_8x16_motion(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, int n, int list, int ref, int * const mx, int * const my){
   1.774 +    if(n==0){
   1.775 +        const int left_ref=      mrs->ref_cache[list][ scan8[0] - 1 ];
   1.776 +        const int16_t * const A=  mrs->mv_cache[list][ scan8[0] - 1 ];
   1.777 +
   1.778 +        if(left_ref == ref){
   1.779 +            *mx= A[0];
   1.780 +            *my= A[1];
   1.781 +            return;
   1.782 +        }
   1.783 +    }else{
   1.784 +        const int16_t * C;
   1.785 +        int diagonal_ref;
   1.786 +
   1.787 +        diagonal_ref= fetch_diagonal_mv(mrc, mrs, s, &C, scan8[4], list, 2);
   1.788 +        if(diagonal_ref == ref){
   1.789 +            *mx= C[0];
   1.790 +            *my= C[1];
   1.791 +            return;
   1.792 +        }
   1.793 +    }
   1.794 +
   1.795 +    //RARE
   1.796 +    pred_motion(mrc, mrs, s, n, 2, list, ref, mx, my);
   1.797 +}
   1.798 +
   1.799 +static inline void pred_pskip_motion(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb * m, int * const mx, int * const my){
   1.800 +    const int top_ref = mrs->ref_cache[0][ scan8[0] - 8 ];
   1.801 +    const int left_ref= mrs->ref_cache[0][ scan8[0] - 1 ];
   1.802 +
   1.803 +    if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
   1.804 +       || !( top_ref | AV_RN32A(mrs->mv_cache[0][ scan8[0] - 8 ]))
   1.805 +       || !(left_ref | AV_RN32A(mrs->mv_cache[0][ scan8[0] - 1 ]))){
   1.806 +
   1.807 +        *mx = *my = 0;
   1.808 +        return;
   1.809 +    }
   1.810 +
   1.811 +    pred_motion(mrc, mrs, s, 0, 4, 0, 0, mx, my);
   1.812 +
   1.813 +    return;
   1.814 +}
   1.815 +
   1.816 +#define ADD_MVD(list) \
   1.817 +{ \
   1.818 +    mx += m->mvd[list][mp][0]; \
   1.819 +    my += m->mvd[list][mp][1]; \
   1.820 +    mp++; \
   1.821 +}
   1.822 +
   1.823 +int pred_motion_mb_rec (MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m){
   1.824 +    int mp=0;
   1.825 +    int mb_type = m->mb_type;
   1.826 +    const int mb_x = m->mb_x;
   1.827 +
   1.828 +//     mrc->m =m;
   1.829 +
   1.830 +    fill_decode_caches_rec(mrc, mrs, s, m, mb_type);
   1.831 +    if (IS_SKIP(mb_type)){
   1.832 +        mb_type=0;
   1.833 +
   1.834 +        if( s->slice_type_nos == FF_B_TYPE )
   1.835 +        {
   1.836 +            mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
   1.837 +            ff_h264_pred_direct_motion_rec(mrc, mrs, s, m, &mb_type);
   1.838 +        }
   1.839 +        else
   1.840 +        {
   1.841 +            int mx, my;
   1.842 +
   1.843 +            mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; //FIXME check required
   1.844 +            pred_pskip_motion(mrc, mrs, s, m, &mx, &my);
   1.845 +            fill_rectangle(&mrs->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
   1.846 +            fill_rectangle(mrs->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
   1.847 +        }
   1.848 +
   1.849 +        write_back_motion_rec(mrc, mrs, s, m, mb_type);
   1.850 +        m->mb_type = mrs->mb_type[mb_x]= mb_type;
   1.851 +        return 0;
   1.852 +    }
   1.853 +
   1.854 +
   1.855 +    if (IS_INTRA_PCM(mb_type)){
   1.856 +        mrs->mb_type[mb_x] =  mb_type;
   1.857 +        return 0;
   1.858 +    }
   1.859 +    else if (IS_INTRA(mb_type)){
   1.860 +        int i, pred_mode;
   1.861 +
   1.862 +        if( IS_INTRA4x4( mb_type ) ) {
   1.863 +            if ( IS_8x8DCT(mb_type) ) {
   1.864 +                for( i = 0; i < 16; i+=4 ) {
   1.865 +                    int pred = pred_intra_mode(mrc, mrs, i );
   1.866 +                    int mode = m->intra4x4_pred_mode[i];
   1.867 +
   1.868 +                    mode = mode < 0 ?  pred : mode + ( mode >= pred );
   1.869 +                    fill_rectangle( &mrs->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
   1.870 +                }
   1.871 +            } else {
   1.872 +                for( i = 0; i < 16; i++ ) {
   1.873 +                    int pred = pred_intra_mode(mrc, mrs, i );
   1.874 +                    int mode = m->intra4x4_pred_mode[i];
   1.875 +                    mode = mode < 0 ?  pred : mode + ( mode >= pred );
   1.876 +                    mrs->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
   1.877 +                }
   1.878 +            }
   1.879 +            write_back_intra_pred_mode_rec(mrc, mrs, m, mb_x);
   1.880 +            if( check_intra4x4_pred_mode(mrc, mrs, s, m) < 0 ) return -1;
   1.881 +        } else {
   1.882 +            m->intra16x16_pred_mode= check_intra_pred_mode(mrc, mrs, s, m, m->intra16x16_pred_mode );
   1.883 +            if( m->intra16x16_pred_mode < 0 ) return -1;
   1.884 +        }
   1.885 +
   1.886 +        pred_mode = m->chroma_pred_mode;
   1.887 +        pred_mode= check_intra_pred_mode( mrc, mrs, s, m, pred_mode );
   1.888 +        if( pred_mode < 0 ) return -1;
   1.889 +        m->chroma_pred_mode= pred_mode;
   1.890 +
   1.891 +    }
   1.892 +    else if (IS_8X8(mb_type)){
   1.893 +        int i, j, list;
   1.894 +
   1.895 +        if( s->slice_type_nos == FF_B_TYPE ) {
   1.896 +            if( IS_DIRECT(m->sub_mb_type[0] | m->sub_mb_type[1] |
   1.897 +                            m->sub_mb_type[2] | m->sub_mb_type[3]) ) {
   1.898 +                ff_h264_pred_direct_motion_rec(mrc, mrs, s, m, &mb_type);
   1.899 +                mrs->ref_cache[0][scan8[4]] =
   1.900 +                mrs->ref_cache[1][scan8[4]] =
   1.901 +                mrs->ref_cache[0][scan8[12]] =
   1.902 +                mrs->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
   1.903 +            }
   1.904 +        }
   1.905 +
   1.906 +        for(list=0; list<s->list_count; list++){
   1.907 +            for(i=0; i<4; i++){
   1.908 +                if(IS_DIRECT(m->sub_mb_type[i])){
   1.909 +                    mrs->ref_cache[list][ scan8[4*i]   ]=mrs->ref_cache[list][ scan8[4*i]+1 ];
   1.910 +                    continue;
   1.911 +                } else {
   1.912 +                    mrs->ref_cache[list][ scan8[4*i]   ]=mrs->ref_cache[list][ scan8[4*i]+1 ]=
   1.913 +                    mrs->ref_cache[list][ scan8[4*i]+8 ]=mrs->ref_cache[list][ scan8[4*i]+9 ]= m->ref_index[list][i];
   1.914 +
   1.915 +                    if(IS_DIR(m->sub_mb_type[i], 0, list) ){
   1.916 +                        const int sub_mb_type= m->sub_mb_type[i];
   1.917 +                        const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
   1.918 +
   1.919 +                        int sub_partition_count = IS_SUB_8X8(sub_mb_type) ? 1 : (IS_SUB_4X4(sub_mb_type)? 4 :2);
   1.920 +                        for(j=0; j<sub_partition_count; j++){
   1.921 +                            int mx, my;
   1.922 +                            const int index= 4*i + block_width*j;
   1.923 +                            int16_t (* mv_cache)[2]= &mrs->mv_cache[list][ scan8[index]];
   1.924 +                            pred_motion(mrc, mrs, s, index, block_width, list, mrs->ref_cache[list][ scan8[index] ], &mx, &my);
   1.925 +
   1.926 +                            ADD_MVD(list)
   1.927 +
   1.928 +                            if(IS_SUB_8X8(sub_mb_type)){
   1.929 +                                mv_cache[ 1 ][0]=
   1.930 +                                mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
   1.931 +                                mv_cache[ 1 ][1]=
   1.932 +                                mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
   1.933 +                            }else if(IS_SUB_8X4(sub_mb_type)){
   1.934 +                                mv_cache[ 1 ][0]= mx;
   1.935 +                                mv_cache[ 1 ][1]= my;
   1.936 +                            }else if(IS_SUB_4X8(sub_mb_type)){
   1.937 +                                mv_cache[ 8 ][0]= mx;
   1.938 +                                mv_cache[ 8 ][1]= my;
   1.939 +                            }
   1.940 +                            mv_cache[ 0 ][0]= mx;
   1.941 +                            mv_cache[ 0 ][1]= my;
   1.942 +                        }
   1.943 +                    }else{
   1.944 +                        fill_rectangle(mrs->mv_cache [list][ scan8[4*i] ], 2, 2, 8, 0, 4);
   1.945 +                    }
   1.946 +                }
   1.947 +            }
   1.948 +        }
   1.949 +    } else if( IS_DIRECT(mb_type) ) {
   1.950 +        mb_type &= ~MB_TYPE_16x16;  //FIXME not nice
   1.951 +        ff_h264_pred_direct_motion_rec(mrc, mrs, s, m, &mb_type);
   1.952 +    }
   1.953 +    else {
   1.954 +        int list, i;
   1.955 +        if(IS_16X16(mb_type)){
   1.956 +            for(list=0; list<s->list_count; list++){
   1.957 +                if(IS_DIR(mb_type, 0, list)){
   1.958 +                    int ref;
   1.959 +                    int mx,my;
   1.960 +
   1.961 +                    ref = m->ref_index[list][0];
   1.962 +                    fill_rectangle(&mrs->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
   1.963 +                    pred_motion(mrc, mrs, s, 0, 4, list, mrs->ref_cache[list][ scan8[0] ], &mx, &my);
   1.964 +                    ADD_MVD(list)
   1.965 +                    fill_rectangle(mrs->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
   1.966 +                }
   1.967 +            }
   1.968 +        }
   1.969 +        else if(IS_16X8(mb_type)){
   1.970 +            for(list=0; list<s->list_count; list++){
   1.971 +                for(i=0; i<2; i++){
   1.972 +                    if(IS_DIR(mb_type, i, list)){
   1.973 +                        int ref;
   1.974 +                        int mx,my;
   1.975 +                        ref = m->ref_index[list][i];
   1.976 +                        fill_rectangle(&mrs->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
   1.977 +
   1.978 +                        pred_16x8_motion(mrc, mrs, s, 8*i, list, mrs->ref_cache[list][scan8[0] + 16*i], &mx, &my);
   1.979 +                        ADD_MVD(list)
   1.980 +
   1.981 +                        fill_rectangle(mrs->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
   1.982 +                    }else{
   1.983 +                        fill_rectangle(&mrs->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
   1.984 +                        fill_rectangle(mrs->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
   1.985 +                    }
   1.986 +                }
   1.987 +            }
   1.988 +
   1.989 +        }else{
   1.990 +            assert(IS_8X16(mb_type));
   1.991 +
   1.992 +            for(list=0; list<s->list_count; list++){
   1.993 +                for(i=0; i<2; i++){
   1.994 +                    if(IS_DIR(mb_type, i, list)){ //FIXME optimize
   1.995 +                        int ref;
   1.996 +                        int mx,my;
   1.997 +                        ref = m->ref_index[list][i];
   1.998 +                        fill_rectangle(&mrs->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
   1.999 +                        pred_8x16_motion(mrc, mrs, s, i*4, list, mrs->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
  1.1000 +                        ADD_MVD(list)
  1.1001 +                        fill_rectangle(mrs->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
  1.1002 +                    }else{
  1.1003 +                        fill_rectangle(&mrs->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
  1.1004 +                        fill_rectangle(mrs->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
  1.1005 +                    }
  1.1006 +                }
  1.1007 +            }
  1.1008 +        }
  1.1009 +    }
  1.1010 +
  1.1011 +    if (IS_INTER(mb_type)||(IS_DIRECT(mb_type)))
  1.1012 +        write_back_motion_rec(mrc, mrs, s, m, mb_type);
  1.1013 +    m->mb_type = mrs->mb_type[mb_x]= mb_type;
  1.1014 +
  1.1015 +    return 0;
  1.1016 +}