Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
diff libavcodec/cell/spe_ed.c @ 2:897f711a7157
rearrange to work with autoconf
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 25 Sep 2012 15:55:33 +0200 |
| parents | |
| children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libavcodec/cell/spe_ed.c Tue Sep 25 15:55:33 2012 +0200 1.3 @@ -0,0 +1,508 @@ 1.4 +#define CELL_SPE 1.5 + 1.6 +#include <string.h> 1.7 +#include <stdio.h> 1.8 +#include <spu_intrinsics.h> 1.9 +#include <spu_mfcio.h> 1.10 +#include "libavcodec/avcodec.h" 1.11 +#include "h264_cabac_spu.h" 1.12 +#include "cabac_spu.h" 1.13 +#include "h264_types_spu.h" 1.14 +#include "h264_tables.h" 1.15 +#include "h264_dma.h" 1.16 +#include "h264_tables.h" 1.17 + 1.18 +#define MB_WIDTH 240 1.19 +#define MB_STRIDE (MB_WIDTH+16) 1.20 + 1.21 +H264Cabac_spu hcabac; 1.22 +CABACContext cabac; 1.23 +DECLARE_ALIGNED_16(EDSlice_spu, slice[2]); 1.24 +DECLARE_ALIGNED_16(H264Mb, mb[2]); 1.25 +DECLARE_ALIGNED_16(H264spe, spe); 1.26 + 1.27 +DECLARE_ALIGNED_16(uint8_t, non_zero_count_table[2][MB_STRIDE][32]); 1.28 +DECLARE_ALIGNED_16(uint8_t, mvd_table[2][2][8*MB_STRIDE][2]); 1.29 +DECLARE_ALIGNED_16(uint8_t, direct_table[2][4*MB_STRIDE]); 1.30 +DECLARE_ALIGNED_16(uint8_t, chroma_pred_mode_table[2][MB_STRIDE]); 1.31 +DECLARE_ALIGNED_16(uint8_t, intra4x4_pred_mode_table[2][8*MB_STRIDE]); 1.32 +DECLARE_ALIGNED_16(uint16_t,cbp_table[2][MB_STRIDE]); 1.33 +DECLARE_ALIGNED_16(uint8_t, qscale_table[2][MB_STRIDE]); 1.34 + 1.35 +DECLARE_ALIGNED_16(uint32_t, mb_type_table[2][MB_STRIDE]); 1.36 +DECLARE_ALIGNED_16(int8_t, ref_index_table[2][2][4*MB_STRIDE]); 1.37 +DECLARE_ALIGNED_16(int16_t, motion_val_table[2][2][4*4*MB_WIDTH][2]); 1.38 + 1.39 +DECLARE_ALIGNED(128, uint8_t, bytestream_ls[4096]); 1.40 +DECLARE_ALIGNED_16(uint32_t, list1_mb_type_table[2][MB_STRIDE]); 1.41 +DECLARE_ALIGNED_16(int8_t, list1_ref_index_table[2][2][4*MB_STRIDE]); 1.42 + 1.43 +DECLARE_ALIGNED_16(spe_pos, dma_temp); //dma temp for sending 1.44 +//mb position of neighbouring spes 1.45 +DECLARE_ALIGNED_16(volatile spe_pos, src_spe); //written by SPE_ID -1 1.46 +static int total_lines; 1.47 + 1.48 +static inline int dep_resolved(H264spe *p){ 1.49 + int spe_id = p->spe_id; 1.50 + volatile int lines_proc = src_spe.count; 1.51 + if (spe_id==0) 1.52 + return (total_lines < lines_proc-1 +p->mb_height)? 1:0; 1.53 + else 1.54 + return (total_lines < lines_proc-1)? 1:0; 1.55 +} 1.56 + 1.57 +static void update_tgt_spe_dep(H264spe *p, int end){ 1.58 + // if (end ){ 1.59 + total_lines++; 1.60 + spe_pos* dma_spe = &dma_temp; 1.61 + spe_pos* tgt_spe = p->tgt_spe + (unsigned) &src_spe; //located in target spe local store 1.62 + dma_spe->count = end? total_lines+1: total_lines; 1.63 + spu_dma_barrier_put(dma_spe, (unsigned) tgt_spe, sizeof(dma_temp), ED_put); 1.64 + // } 1.65 + 1.66 +} 1.67 + 1.68 +static int init_cabac(H264spe *p, H264Cabac_spu *hc){ 1.69 + hc->mb_height = p->mb_height; 1.70 + hc->mb_width = p->mb_width; 1.71 + hc->b_stride = 4*p->mb_width; 1.72 + hc->mb_stride = p->mb_stride; 1.73 + 1.74 + for(int i=0; i<16; i++){ 1.75 + #define T(x) (x>>2) | ((x<<2) & 0xF) 1.76 + hc->zigzag_scan[i] = T(zigzag_scan[i]); 1.77 + #undef T 1.78 + } 1.79 + for(int i=0; i<64; i++){ 1.80 + #define T(x) (x>>3) | ((x&7)<<3) 1.81 + hc->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); 1.82 + #undef T 1.83 + } 1.84 +} 1.85 + 1.86 +static void reset_cabac_buffers(){ 1.87 + memset(intra4x4_pred_mode_table, 0, sizeof(intra4x4_pred_mode_table)); 1.88 + memset(mvd_table, 0, sizeof(mvd_table)); 1.89 + memset(direct_table, 0, sizeof(direct_table)); 1.90 + memset(chroma_pred_mode_table, 0, sizeof(chroma_pred_mode_table)); 1.91 + memset(cbp_table, 0, sizeof(cbp_table)); 1.92 + memset(qscale_table, 0, sizeof(qscale_table)); 1.93 + memset(mb_type_table, 0, sizeof(mb_type_table)); 1.94 + memset(ref_index_table, 0, sizeof(ref_index_table)); 1.95 + memset(motion_val_table, 0, sizeof(motion_val_table)); 1.96 +} 1.97 + 1.98 +static void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int bufsize){ 1.99 + int align = (unsigned) buf & 0xF; 1.100 + int dma_size; 1.101 + 1.102 + c->bytestream_ea_start= 1.103 + c->bytestream_ea= buf; 1.104 + c->bytestream_ea_end= buf + bufsize; 1.105 + c->bufsize = bufsize; 1.106 + 1.107 + if (bufsize + align >= sizeof(bytestream_ls)){ 1.108 + dma_size = sizeof(bytestream_ls); 1.109 + c->bufsize = c->bufsize +align - sizeof(bytestream_ls); 1.110 + }else{ 1.111 + int align_end = (bufsize+align) &0xF; 1.112 + if (align_end) 1.113 + dma_size = bufsize+align + 16-align_end; 1.114 + else 1.115 + dma_size = bufsize+align; 1.116 + c->bufsize = 0; 1.117 + } 1.118 +// printf("%d\n", dma_size); 1.119 + c->bytestream_end = &bytestream_ls[dma_size]; 1.120 + c->bytestream_start= c->bytestream = &bytestream_ls[align]; 1.121 + spu_dma_get(bytestream_ls, (unsigned) buf - align, dma_size, ED_get ); 1.122 + c->bytestream_ea_start= 1.123 + c->bytestream_ea= buf + dma_size -align; 1.124 + 1.125 + wait_dma_id(ED_get); 1.126 + 1.127 + if (align %2){ 1.128 + c->low = (*c->bytestream++)<<18; 1.129 + c->low+= (*c->bytestream++)<<10; 1.130 + c->low+= ((*c->bytestream++)<<2) + 2; 1.131 + }else { 1.132 + c->low = (*c->bytestream++)<<18; 1.133 + c->low+= (*c->bytestream++)<<10; 1.134 + c->low+= (2<<8); 1.135 + } 1.136 + 1.137 + c->range= 0x1FE; 1.138 + bytecount=0; 1.139 +} 1.140 + 1.141 +static void init_dequant8_coeff_table(EDSlice_spu *s, H264Cabac_spu *hc){ 1.142 + int i,q,x; 1.143 + const int transpose = HAVE_ALTIVEC; 1.144 + hc->dequant8_coeff[0] = hc->dequant8_buffer[0]; 1.145 + hc->dequant8_coeff[1] = hc->dequant8_buffer[1]; 1.146 + 1.147 + for(i=0; i<2; i++){ 1.148 + if(i && !memcmp(s->pps.scaling_matrix8[0], s->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ 1.149 + hc->dequant8_coeff[1] = hc->dequant8_buffer[0]; 1.150 + break; 1.151 + } 1.152 + 1.153 + for(q=0; q<52; q++){ 1.154 + int shift = div6[q]; 1.155 + int idx = rem6[q]; 1.156 + for(x=0; x<64; x++) 1.157 + hc->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = 1.158 + ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * 1.159 + s->pps.scaling_matrix8[i][x]) << shift; 1.160 + } 1.161 + } 1.162 +} 1.163 + 1.164 +static void init_dequant4_coeff_table(EDSlice_spu *s, H264Cabac_spu *hc){ 1.165 + int i,j,q,x; 1.166 + const int transpose = HAVE_MMX | HAVE_ALTIVEC | HAVE_NEON; 1.167 + for(i=0; i<6; i++ ){ 1.168 + hc->dequant4_coeff[i] = hc->dequant4_buffer[i]; 1.169 + for(j=0; j<i; j++){ 1.170 + if(!memcmp(s->pps.scaling_matrix4[j], s->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){ 1.171 + hc->dequant4_coeff[i] = hc->dequant4_buffer[j]; 1.172 + break; 1.173 + } 1.174 + } 1.175 + if(j<i) 1.176 + continue; 1.177 + 1.178 + for(q=0; q<52; q++){ 1.179 + int shift = div6[q] + 2; 1.180 + int idx = rem6[q]; 1.181 + for(x=0; x<16; x++) 1.182 + hc->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] = 1.183 + ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * 1.184 + s->pps.scaling_matrix4[i][x]) << shift; 1.185 + } 1.186 + } 1.187 +} 1.188 + 1.189 +static void init_dequant_tables(EDSlice_spu *s, H264Cabac_spu *hc){ 1.190 + int i,x; 1.191 + 1.192 + init_dequant4_coeff_table(s, hc); 1.193 + if(s->pps.transform_8x8_mode) 1.194 + init_dequant8_coeff_table(s, hc); 1.195 + if(s->transform_bypass){ 1.196 + for(i=0; i<6; i++) 1.197 + for(x=0; x<16; x++) 1.198 + hc->dequant4_coeff[i][0][x] = 1<<6; 1.199 + if(s->pps.transform_8x8_mode) 1.200 + for(i=0; i<2; i++) 1.201 + for(x=0; x<64; x++) 1.202 + hc->dequant8_coeff[i][0][x] = 1<<6; 1.203 + } 1.204 +} 1.205 + 1.206 +static void init_entropy_buf(H264Cabac_spu *hc, EDSlice_spu *s){ 1.207 + hc->non_zero_count_top = non_zero_count_table[0]; 1.208 + hc->non_zero_count = non_zero_count_table[1]; 1.209 + hc->mvd_top[0] = mvd_table[0][0]; 1.210 + hc->mvd[0] = mvd_table[0][1]; 1.211 + hc->mvd_top[1] = mvd_table[1][0]; 1.212 + hc->mvd[1] = mvd_table[1][1]; 1.213 + hc->direct_top = direct_table[0]; 1.214 + hc->direct = direct_table[1]; 1.215 + hc->chroma_pred_mode_top = chroma_pred_mode_table[0]; 1.216 + hc->chroma_pred_mode = chroma_pred_mode_table[1]; 1.217 + hc->intra4x4_pred_mode_top = intra4x4_pred_mode_table[0]; 1.218 + hc->intra4x4_pred_mode = intra4x4_pred_mode_table[1]; 1.219 + hc->cbp_top = cbp_table[0]; 1.220 + hc->cbp = cbp_table[1]; 1.221 + hc->qscale_top = qscale_table[0] +1; 1.222 + hc->qscale = qscale_table[1] +1; 1.223 + 1.224 + hc->mb_type_top = mb_type_table[0]+1; 1.225 + hc->mb_type = mb_type_table[1]+1; 1.226 + hc->ref_index_top[0] = ref_index_table[0][0]; 1.227 + hc->ref_index_top[1] = ref_index_table[1][0]; 1.228 + hc->ref_index[0] = ref_index_table[0][1]; 1.229 + hc->ref_index[1] = ref_index_table[1][1]; 1.230 + hc->motion_val_top[0] = motion_val_table[0][0]; 1.231 + hc->motion_val_top[1] = motion_val_table[1][0]; 1.232 + hc->motion_val[0] = motion_val_table[0][1]; 1.233 + hc->motion_val[1] = motion_val_table[1][1]; 1.234 + 1.235 + int mb_stride = hc->mb_stride; 1.236 + 1.237 + if (s->slice_type_nos == FF_B_TYPE){ 1.238 + while(!dep_resolved(&spe)); 1.239 + spu_dma_get(list1_mb_type_table[0], (unsigned) (s->list1.mb_type -1), mb_stride*sizeof(uint32_t), ED_get); 1.240 + spu_dma_get(list1_ref_index_table[0][0], (unsigned) s->list1.ref_index[0], mb_stride*4*sizeof(int8_t), ED_get); 1.241 + spu_dma_get(list1_ref_index_table[0][1], (unsigned) s->list1.ref_index[1], mb_stride*4*sizeof(int8_t), ED_get); 1.242 + wait_dma_id(ED_get); 1.243 + spu_dma_get(list1_mb_type_table[1], (unsigned) (s->list1.mb_type -1 + mb_stride), mb_stride*sizeof(uint32_t), ED_get); 1.244 + spu_dma_get(list1_ref_index_table[1][0], (unsigned) (s->list1.ref_index[0] + 4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get); 1.245 + spu_dma_get(list1_ref_index_table[1][1], (unsigned) (s->list1.ref_index[1] + 4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get); 1.246 + hc->list1_mb_type = list1_mb_type_table[0]+1; 1.247 + hc->list1_ref_index[0] = list1_ref_index_table[0][0]; 1.248 + hc->list1_ref_index[1] = list1_ref_index_table[0][1]; 1.249 + } 1.250 + 1.251 +} 1.252 + 1.253 +static void update_entropy_buf(H264Cabac_spu *hc, EDSlice_spu *s, int line){ 1.254 + int mb_stride = hc->mb_stride; 1.255 + int mb_width = hc->mb_width; 1.256 + int top = (line+1)%2; 1.257 + int cur = line%2; 1.258 + int bottom = (line+1)%2; //same as top, but to identify prebuffering of next line. 1.259 + 1.260 + hc->non_zero_count_top = non_zero_count_table[top]; 1.261 + hc->non_zero_count = non_zero_count_table[cur]; 1.262 + hc->mvd_top[0] = mvd_table[0][top]; 1.263 + hc->mvd[0] = mvd_table[0][cur]; 1.264 + hc->mvd_top[1] = mvd_table[1][top]; 1.265 + hc->mvd[1] = mvd_table[1][cur]; 1.266 + hc->direct_top = direct_table[top]; 1.267 + hc->direct = direct_table[cur]; 1.268 + hc->chroma_pred_mode_top = chroma_pred_mode_table[top]; 1.269 + hc->chroma_pred_mode = chroma_pred_mode_table[cur]; 1.270 + hc->intra4x4_pred_mode_top = intra4x4_pred_mode_table[top]; 1.271 + hc->intra4x4_pred_mode = intra4x4_pred_mode_table[cur]; 1.272 + hc->cbp_top = cbp_table[top]; 1.273 + hc->cbp = cbp_table[cur]; 1.274 + hc->qscale_top = qscale_table[top] +1; 1.275 + hc->qscale = qscale_table[cur] +1; 1.276 + 1.277 + hc->mb_type_top = mb_type_table[top]+1; 1.278 + hc->mb_type = mb_type_table[cur]+1; 1.279 + hc->ref_index_top[0] = ref_index_table[0][top]; 1.280 + hc->ref_index_top[1] = ref_index_table[1][top]; 1.281 + hc->ref_index[0] = ref_index_table[0][cur]; 1.282 + hc->ref_index[1] = ref_index_table[1][cur]; 1.283 + hc->motion_val_top[0] = motion_val_table[0][top]; 1.284 + hc->motion_val_top[1] = motion_val_table[1][top]; 1.285 + hc->motion_val[0] = motion_val_table[0][cur]; 1.286 + hc->motion_val[1] = motion_val_table[1][cur]; 1.287 + 1.288 + wait_dma_id(ED_put); 1.289 + 1.290 + spu_dma_put(mb_type_table[top], (unsigned) (s->pic.mb_type -1 + line*mb_stride), mb_stride*sizeof(uint32_t), ED_put); 1.291 + spu_dma_put(ref_index_table[0][top], (unsigned) (s->pic.ref_index[0] + line*4*mb_stride), 4*mb_stride*sizeof(int8_t), ED_put); 1.292 + spu_dma_put(ref_index_table[1][top], (unsigned) (s->pic.ref_index[1] + line*4*mb_stride), 4*mb_stride*sizeof(int8_t), ED_put); 1.293 + spu_dma_put(motion_val_table[0][top], (unsigned) (s->pic.motion_val[0]+ line*16*mb_width), 16*mb_width*2*sizeof(int16_t), ED_put); 1.294 + spu_dma_put(motion_val_table[1][top], (unsigned) (s->pic.motion_val[1]+ line*16*mb_width), 16*mb_width*2*sizeof(int16_t), ED_put); 1.295 + 1.296 + if (s->slice_type_nos == FF_B_TYPE){ 1.297 + update_tgt_spe_dep(&spe, 0); 1.298 + wait_dma_id(ED_get); 1.299 + 1.300 + if (line + 2 < hc->mb_height){ 1.301 + while(!dep_resolved(&spe)); 1.302 + spu_dma_get(list1_mb_type_table[cur], (unsigned) (s->list1.mb_type -1 + (line+2)*mb_stride), mb_stride*sizeof(uint32_t), ED_get); 1.303 + spu_dma_get(list1_ref_index_table[cur][0], (unsigned) (s->list1.ref_index[0] + (line+2)*4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get); 1.304 + spu_dma_get(list1_ref_index_table[cur][1], (unsigned) (s->list1.ref_index[1] + (line+2)*4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get); 1.305 + } 1.306 + hc->list1_mb_type = list1_mb_type_table[bottom]+1; 1.307 + hc->list1_ref_index[0] = list1_ref_index_table[bottom][0]; 1.308 + hc->list1_ref_index[1] = list1_ref_index_table[bottom][1]; 1.309 + } 1.310 + 1.311 +} 1.312 + 1.313 +// void printmbdiff(EDSlice_spu *s, H264Cabac_spu *hc, H264Mb *mp, H264Mb *ms){ 1.314 +// 1.315 +// printf("mb_x %d, %d\n", mp->mb_x, ms->mb_x); 1.316 +// printf("mb_y %d, %d\n", mp->mb_y, ms->mb_y); 1.317 +// printf("mb_xy %d, %d\n", mp->mb_xy, ms->mb_xy); 1.318 +// printf("top_mb_xy %d, %d\n", mp->top_mb_xy, ms->top_mb_xy); 1.319 +// printf("left_mb_xy %d, %d\n", mp->left_mb_xy, ms->left_mb_xy); 1.320 +// printf("chroma_pred_mode %d, %d\n", mp->chroma_pred_mode, ms->chroma_pred_mode); 1.321 +// printf("intra16x16_pred_mode %d, %d\n", mp->intra16x16_pred_mode, ms->intra16x16_pred_mode); 1.322 +// printf("topleft_samples %d, %d\n", mp->topleft_samples_available, ms->topleft_samples_available); 1.323 +// printf("topright_samples %d, %d\n", mp->topright_samples_available, ms->topright_samples_available); 1.324 +// printf("top_samples %d, %d\n", mp->top_samples_available, ms->top_samples_available); 1.325 +// printf("left_samples %d, %d\n", mp->left_samples_available, ms->left_samples_available); 1.326 +// 1.327 +// if (memcmp(mp->intra4x4_pred_mode_cache, ms->intra4x4_pred_mode_cache, 40)){ 1.328 +// for (int i=0; i<5; i++){ 1.329 +// for (int j=0; j<8; j++){ 1.330 +// printf("%d, %d\t", mp->intra4x4_pred_mode_cache[i*8+j],ms->intra4x4_pred_mode_cache[i*8+j]); 1.331 +// } 1.332 +// printf("\n"); 1.333 +// } 1.334 +// } 1.335 +// 1.336 +// if (memcmp(mp->non_zero_count_cache, ms->non_zero_count_cache, 48)){ 1.337 +// for (int i=0; i<6; i++){ 1.338 +// for (int j=0; j<8; j++){ 1.339 +// printf("%u, %u\t", mp->non_zero_count_cache[i*8+j],ms->non_zero_count_cache[i*8+j]); 1.340 +// } 1.341 +// printf("\n"); 1.342 +// } 1.343 +// } 1.344 +// 1.345 +// if (memcmp(mp->sub_mb_type, ms->sub_mb_type, 8)){ 1.346 +// for (int i=0; i<4; i++){ 1.347 +// printf("%u, %u\t", mp->sub_mb_type[i], mp->sub_mb_type[i]); 1.348 +// printf("\n"); 1.349 +// } 1.350 +// } 1.351 +// 1.352 +// if (memcmp(mp->mv_cache, ms->mv_cache, 320)){ 1.353 +// for (int k=0; k<2; k++){ 1.354 +// for (int i=0; i<5; i++){ 1.355 +// for (int j=0; j<8; j++){ 1.356 +// printf("%d, %d, %d, %d\t", mp->mv_cache[k][i*8+j][0], mp->mv_cache[k][i*8+j][1], ms->mv_cache[k][i*8+j][0], ms->mv_cache[k][i*8+j][1]); 1.357 +// } 1.358 +// printf("\n"); 1.359 +// } 1.360 +// } 1.361 +// } 1.362 +// 1.363 +// if (memcmp(mp->ref_cache, ms->ref_cache, 80)){ 1.364 +// for (int k=0; k<2; k++){ 1.365 +// for (int i=0; i<5; i++){ 1.366 +// for (int j=0; j<8; j++){ 1.367 +// printf("%d, %d\t", mp->ref_cache[k][i*8+j], ms->ref_cache[k][i*8+j]); 1.368 +// } 1.369 +// printf("\n"); 1.370 +// } 1.371 +// } 1.372 +// } 1.373 +// 1.374 +// printf("cbp %d, %d\n", mp->cbp, ms->cbp); 1.375 +// for (int i=0; i<hc->mb_stride; i++){ 1.376 +// printf("%d, ", hc->cbp[i]); fflush(0); 1.377 +// } 1.378 +// printf("\n"); 1.379 +// 1.380 +// printf("mb_type %x, %x\n", mp->mb_type, ms->mb_type); 1.381 +// printf("mb_type IS_INTRA %d, IS_INTRA16x16 %d, IS_DIRECT %d\n", IS_INTRA(ms->mb_type), IS_INTRA16x16(ms->mb_type), IS_DIRECT(ms->mb_type) ); 1.382 +// printf("left_type %d, %d\n", mp->left_type, ms->left_type); 1.383 +// printf("top_type %d, %d\n", mp->top_type, ms->top_type); 1.384 +// printf("qscale_mb_xy %d, %d\n", mp->qscale_mb_xy, ms->qscale_mb_xy); 1.385 +// printf("qscale_left_mb_xy %d, %d\n", mp->qscale_left_mb_xy, ms->qscale_left_mb_xy); 1.386 +// printf("qscale_top_mb_xy %d, %d\n", mp->qscale_top_mb_xy, ms->qscale_top_mb_xy); 1.387 +// // for (int i=0; i<hc->mb_stride; i++){ 1.388 +// // printf("%d, ", qscale_table[0][i]); fflush(0); 1.389 +// // } 1.390 +// 1.391 +// if (memcmp(mp->mb, ms->mb, 768)){ 1.392 +// for (int i=0; i<16; i++){ 1.393 +// for (int j=0; j<16; j++){ 1.394 +// printf("%d, %d\t", mp->mb[j + i*16], ms->ref_cache[j + i*16]); 1.395 +// } 1.396 +// printf("\n"); 1.397 +// } 1.398 +// for (int i=0; i<8; i++){ 1.399 +// for (int j=0; j<8; j++){ 1.400 +// printf("%d, %d\t", mp->mb[256 + j + i*8], ms->ref_cache[j + i*8]); 1.401 +// } 1.402 +// printf("\n"); 1.403 +// } 1.404 +// for (int i=0; i<8; i++){ 1.405 +// for (int j=0; j<8; j++){ 1.406 +// printf("%d, %d\t", mp->mb[320+ j + i*8], ms->ref_cache[j + i*8]); 1.407 +// } 1.408 +// printf("\n"); 1.409 +// } 1.410 +// } 1.411 +// 1.412 +// if (memcmp(mp->bS, ms->bS, 32)){ 1.413 +// for (int k=0; k<2; k++){ 1.414 +// for (int i=0; i<4; i++){ 1.415 +// for (int j=0; j<4; j++){ 1.416 +// printf("%d, %d\t", mp->bS[k][i][j], mp->mv_cache[k][i][j]); 1.417 +// } 1.418 +// printf("\n"); 1.419 +// } 1.420 +// } 1.421 +// } 1.422 +// if (memcmp(mp->edges, ms->edges, 4)){ 1.423 +// printf("edges %d, %d, %d, %d\n", mp->edges[0], ms->edges[0], mp->edges[1], ms->edges[1]); 1.424 +// printf("deblock %d, %d\n", mp->deblock_mb, ms->deblock_mb); 1.425 +// } 1.426 +// 1.427 +// printf("dequant4_coeff_y %d, %d\n", mp->dequant4_coeff_y, ms->dequant4_coeff_y); 1.428 +// printf("dequant4_coeff_cb %d, %d\n", mp->dequant4_coeff_cb, ms->dequant4_coeff_cb); 1.429 +// printf("dequant4_coeff_cr %d, %d\n", mp->dequant4_coeff_cr, ms->dequant4_coeff_cr); 1.430 +// } 1.431 +// DECLARE_ALIGNED_16(H264Mb, tmp); 1.432 + 1.433 + 1.434 +int main(unsigned long long id, unsigned long long argp){ 1.435 + EDSlice_spu *s; 1.436 + H264Cabac_spu *hc = &hcabac; 1.437 + CABACContext *c = &cabac; 1.438 + H264spe *p = &spe; 1.439 + 1.440 + spu_write_out_mbox((unsigned) slice); 1.441 + spu_dma_get(p, (unsigned) argp, sizeof(H264spe), ED_spe); //ID_slice is used out of convienience 1.442 + wait_dma_id(ED_spe); 1.443 + 1.444 + ff_init_cabac_states(); 1.445 + init_cabac(p, hc); 1.446 + hc->blocking=0; 1.447 + for(;;){ 1.448 + spu_read_in_mbox(); 1.449 + s = &slice[0]; 1.450 + reset_cabac_buffers(); 1.451 + init_entropy_buf(hc, s); 1.452 + 1.453 + if (hc->blocking) wait_dma_id(ED_get); 1.454 + //printf("framesize %d\n", s->byte_bufsize);fflush(0); 1.455 + init_dequant_tables(s, hc); 1.456 + ff_init_cabac_decoder( c, s->bytestream_start, s->byte_bufsize ); 1.457 + ff_h264_init_cabac_states(s, c); 1.458 + 1.459 + int mb_slot=0; 1.460 + for(int j=0; j<hc->mb_height; j++){ 1.461 + for(int i=0; i<hc->mb_width; i++){ 1.462 + int eos,ret; 1.463 + H264Mb *m = &mb[mb_slot]; 1.464 + m->mb_x=i; 1.465 + m->mb_y=j; 1.466 + s->m = m; 1.467 + 1.468 + ret = ff_h264_decode_mb_cabac(hc, s, c); 1.469 + 1.470 +// spu_dma_get(&tmp, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_get); 1.471 +// wait_dma_id(ED_get); 1.472 +// if (memcmp(&tmp, m, sizeof(H264Mb))){ 1.473 +// printf("coded pic num %d\n", s->coded_pic_num); 1.474 +// printmbdiff(s, hc,&tmp, m); 1.475 +// return 0; 1.476 +// } 1.477 + //printf("qscale %d\n", m->qscale_mb_xy); 1.478 + if (!hc->blocking){ 1.479 + if (mb_slot){ 1.480 + spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb1); 1.481 + wait_dma_id(ED_putmb0); 1.482 + }else { 1.483 + spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb0); 1.484 + wait_dma_id(ED_putmb1); 1.485 + } 1.486 + mb_slot++; mb_slot%=2; 1.487 + }else { 1.488 + spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb0); 1.489 + wait_dma_id(ED_putmb0); 1.490 + } 1.491 + 1.492 + 1.493 + eos = get_cabac_terminate( c); 1.494 + 1.495 + if( ret < 0) { 1.496 + fprintf(stderr, "error at %d bytecount\n", bytecount); 1.497 + return -1; 1.498 + } 1.499 + } 1.500 + update_entropy_buf(hc, s, j); 1.501 + if (hc->blocking){ wait_dma_id(ED_get); wait_dma_id(ED_put);} 1.502 + } 1.503 + wait_dma_id(ED_put); 1.504 + spu_write_out_mbox(1); 1.505 + 1.506 + } 1.507 + 1.508 + return 0; 1.509 + 1.510 + 1.511 +}
