annotate libavcodec/cell/spe_ed.c @ 2:897f711a7157

rearrange to work with autoconf
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Tue, 25 Sep 2012 15:55:33 +0200
parents
children
rev   line source
nengel@2 1 #define CELL_SPE
nengel@2 2
nengel@2 3 #include <string.h>
nengel@2 4 #include <stdio.h>
nengel@2 5 #include <spu_intrinsics.h>
nengel@2 6 #include <spu_mfcio.h>
nengel@2 7 #include "libavcodec/avcodec.h"
nengel@2 8 #include "h264_cabac_spu.h"
nengel@2 9 #include "cabac_spu.h"
nengel@2 10 #include "h264_types_spu.h"
nengel@2 11 #include "h264_tables.h"
nengel@2 12 #include "h264_dma.h"
nengel@2 13 #include "h264_tables.h"
nengel@2 14
nengel@2 15 #define MB_WIDTH 240
nengel@2 16 #define MB_STRIDE (MB_WIDTH+16)
nengel@2 17
nengel@2 18 H264Cabac_spu hcabac;
nengel@2 19 CABACContext cabac;
nengel@2 20 DECLARE_ALIGNED_16(EDSlice_spu, slice[2]);
nengel@2 21 DECLARE_ALIGNED_16(H264Mb, mb[2]);
nengel@2 22 DECLARE_ALIGNED_16(H264spe, spe);
nengel@2 23
nengel@2 24 DECLARE_ALIGNED_16(uint8_t, non_zero_count_table[2][MB_STRIDE][32]);
nengel@2 25 DECLARE_ALIGNED_16(uint8_t, mvd_table[2][2][8*MB_STRIDE][2]);
nengel@2 26 DECLARE_ALIGNED_16(uint8_t, direct_table[2][4*MB_STRIDE]);
nengel@2 27 DECLARE_ALIGNED_16(uint8_t, chroma_pred_mode_table[2][MB_STRIDE]);
nengel@2 28 DECLARE_ALIGNED_16(uint8_t, intra4x4_pred_mode_table[2][8*MB_STRIDE]);
nengel@2 29 DECLARE_ALIGNED_16(uint16_t,cbp_table[2][MB_STRIDE]);
nengel@2 30 DECLARE_ALIGNED_16(uint8_t, qscale_table[2][MB_STRIDE]);
nengel@2 31
nengel@2 32 DECLARE_ALIGNED_16(uint32_t, mb_type_table[2][MB_STRIDE]);
nengel@2 33 DECLARE_ALIGNED_16(int8_t, ref_index_table[2][2][4*MB_STRIDE]);
nengel@2 34 DECLARE_ALIGNED_16(int16_t, motion_val_table[2][2][4*4*MB_WIDTH][2]);
nengel@2 35
nengel@2 36 DECLARE_ALIGNED(128, uint8_t, bytestream_ls[4096]);
nengel@2 37 DECLARE_ALIGNED_16(uint32_t, list1_mb_type_table[2][MB_STRIDE]);
nengel@2 38 DECLARE_ALIGNED_16(int8_t, list1_ref_index_table[2][2][4*MB_STRIDE]);
nengel@2 39
nengel@2 40 DECLARE_ALIGNED_16(spe_pos, dma_temp); //dma temp for sending
nengel@2 41 //mb position of neighbouring spes
nengel@2 42 DECLARE_ALIGNED_16(volatile spe_pos, src_spe); //written by SPE_ID -1
nengel@2 43 static int total_lines;
nengel@2 44
nengel@2 45 static inline int dep_resolved(H264spe *p){
nengel@2 46 int spe_id = p->spe_id;
nengel@2 47 volatile int lines_proc = src_spe.count;
nengel@2 48 if (spe_id==0)
nengel@2 49 return (total_lines < lines_proc-1 +p->mb_height)? 1:0;
nengel@2 50 else
nengel@2 51 return (total_lines < lines_proc-1)? 1:0;
nengel@2 52 }
nengel@2 53
nengel@2 54 static void update_tgt_spe_dep(H264spe *p, int end){
nengel@2 55 // if (end ){
nengel@2 56 total_lines++;
nengel@2 57 spe_pos* dma_spe = &dma_temp;
nengel@2 58 spe_pos* tgt_spe = p->tgt_spe + (unsigned) &src_spe; //located in target spe local store
nengel@2 59 dma_spe->count = end? total_lines+1: total_lines;
nengel@2 60 spu_dma_barrier_put(dma_spe, (unsigned) tgt_spe, sizeof(dma_temp), ED_put);
nengel@2 61 // }
nengel@2 62
nengel@2 63 }
nengel@2 64
nengel@2 65 static int init_cabac(H264spe *p, H264Cabac_spu *hc){
nengel@2 66 hc->mb_height = p->mb_height;
nengel@2 67 hc->mb_width = p->mb_width;
nengel@2 68 hc->b_stride = 4*p->mb_width;
nengel@2 69 hc->mb_stride = p->mb_stride;
nengel@2 70
nengel@2 71 for(int i=0; i<16; i++){
nengel@2 72 #define T(x) (x>>2) | ((x<<2) & 0xF)
nengel@2 73 hc->zigzag_scan[i] = T(zigzag_scan[i]);
nengel@2 74 #undef T
nengel@2 75 }
nengel@2 76 for(int i=0; i<64; i++){
nengel@2 77 #define T(x) (x>>3) | ((x&7)<<3)
nengel@2 78 hc->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
nengel@2 79 #undef T
nengel@2 80 }
nengel@2 81 }
nengel@2 82
nengel@2 83 static void reset_cabac_buffers(){
nengel@2 84 memset(intra4x4_pred_mode_table, 0, sizeof(intra4x4_pred_mode_table));
nengel@2 85 memset(mvd_table, 0, sizeof(mvd_table));
nengel@2 86 memset(direct_table, 0, sizeof(direct_table));
nengel@2 87 memset(chroma_pred_mode_table, 0, sizeof(chroma_pred_mode_table));
nengel@2 88 memset(cbp_table, 0, sizeof(cbp_table));
nengel@2 89 memset(qscale_table, 0, sizeof(qscale_table));
nengel@2 90 memset(mb_type_table, 0, sizeof(mb_type_table));
nengel@2 91 memset(ref_index_table, 0, sizeof(ref_index_table));
nengel@2 92 memset(motion_val_table, 0, sizeof(motion_val_table));
nengel@2 93 }
nengel@2 94
nengel@2 95 static void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int bufsize){
nengel@2 96 int align = (unsigned) buf & 0xF;
nengel@2 97 int dma_size;
nengel@2 98
nengel@2 99 c->bytestream_ea_start=
nengel@2 100 c->bytestream_ea= buf;
nengel@2 101 c->bytestream_ea_end= buf + bufsize;
nengel@2 102 c->bufsize = bufsize;
nengel@2 103
nengel@2 104 if (bufsize + align >= sizeof(bytestream_ls)){
nengel@2 105 dma_size = sizeof(bytestream_ls);
nengel@2 106 c->bufsize = c->bufsize +align - sizeof(bytestream_ls);
nengel@2 107 }else{
nengel@2 108 int align_end = (bufsize+align) &0xF;
nengel@2 109 if (align_end)
nengel@2 110 dma_size = bufsize+align + 16-align_end;
nengel@2 111 else
nengel@2 112 dma_size = bufsize+align;
nengel@2 113 c->bufsize = 0;
nengel@2 114 }
nengel@2 115 // printf("%d\n", dma_size);
nengel@2 116 c->bytestream_end = &bytestream_ls[dma_size];
nengel@2 117 c->bytestream_start= c->bytestream = &bytestream_ls[align];
nengel@2 118 spu_dma_get(bytestream_ls, (unsigned) buf - align, dma_size, ED_get );
nengel@2 119 c->bytestream_ea_start=
nengel@2 120 c->bytestream_ea= buf + dma_size -align;
nengel@2 121
nengel@2 122 wait_dma_id(ED_get);
nengel@2 123
nengel@2 124 if (align %2){
nengel@2 125 c->low = (*c->bytestream++)<<18;
nengel@2 126 c->low+= (*c->bytestream++)<<10;
nengel@2 127 c->low+= ((*c->bytestream++)<<2) + 2;
nengel@2 128 }else {
nengel@2 129 c->low = (*c->bytestream++)<<18;
nengel@2 130 c->low+= (*c->bytestream++)<<10;
nengel@2 131 c->low+= (2<<8);
nengel@2 132 }
nengel@2 133
nengel@2 134 c->range= 0x1FE;
nengel@2 135 bytecount=0;
nengel@2 136 }
nengel@2 137
nengel@2 138 static void init_dequant8_coeff_table(EDSlice_spu *s, H264Cabac_spu *hc){
nengel@2 139 int i,q,x;
nengel@2 140 const int transpose = HAVE_ALTIVEC;
nengel@2 141 hc->dequant8_coeff[0] = hc->dequant8_buffer[0];
nengel@2 142 hc->dequant8_coeff[1] = hc->dequant8_buffer[1];
nengel@2 143
nengel@2 144 for(i=0; i<2; i++){
nengel@2 145 if(i && !memcmp(s->pps.scaling_matrix8[0], s->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
nengel@2 146 hc->dequant8_coeff[1] = hc->dequant8_buffer[0];
nengel@2 147 break;
nengel@2 148 }
nengel@2 149
nengel@2 150 for(q=0; q<52; q++){
nengel@2 151 int shift = div6[q];
nengel@2 152 int idx = rem6[q];
nengel@2 153 for(x=0; x<64; x++)
nengel@2 154 hc->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
nengel@2 155 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
nengel@2 156 s->pps.scaling_matrix8[i][x]) << shift;
nengel@2 157 }
nengel@2 158 }
nengel@2 159 }
nengel@2 160
nengel@2 161 static void init_dequant4_coeff_table(EDSlice_spu *s, H264Cabac_spu *hc){
nengel@2 162 int i,j,q,x;
nengel@2 163 const int transpose = HAVE_MMX | HAVE_ALTIVEC | HAVE_NEON;
nengel@2 164 for(i=0; i<6; i++ ){
nengel@2 165 hc->dequant4_coeff[i] = hc->dequant4_buffer[i];
nengel@2 166 for(j=0; j<i; j++){
nengel@2 167 if(!memcmp(s->pps.scaling_matrix4[j], s->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
nengel@2 168 hc->dequant4_coeff[i] = hc->dequant4_buffer[j];
nengel@2 169 break;
nengel@2 170 }
nengel@2 171 }
nengel@2 172 if(j<i)
nengel@2 173 continue;
nengel@2 174
nengel@2 175 for(q=0; q<52; q++){
nengel@2 176 int shift = div6[q] + 2;
nengel@2 177 int idx = rem6[q];
nengel@2 178 for(x=0; x<16; x++)
nengel@2 179 hc->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
nengel@2 180 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
nengel@2 181 s->pps.scaling_matrix4[i][x]) << shift;
nengel@2 182 }
nengel@2 183 }
nengel@2 184 }
nengel@2 185
nengel@2 186 static void init_dequant_tables(EDSlice_spu *s, H264Cabac_spu *hc){
nengel@2 187 int i,x;
nengel@2 188
nengel@2 189 init_dequant4_coeff_table(s, hc);
nengel@2 190 if(s->pps.transform_8x8_mode)
nengel@2 191 init_dequant8_coeff_table(s, hc);
nengel@2 192 if(s->transform_bypass){
nengel@2 193 for(i=0; i<6; i++)
nengel@2 194 for(x=0; x<16; x++)
nengel@2 195 hc->dequant4_coeff[i][0][x] = 1<<6;
nengel@2 196 if(s->pps.transform_8x8_mode)
nengel@2 197 for(i=0; i<2; i++)
nengel@2 198 for(x=0; x<64; x++)
nengel@2 199 hc->dequant8_coeff[i][0][x] = 1<<6;
nengel@2 200 }
nengel@2 201 }
nengel@2 202
nengel@2 203 static void init_entropy_buf(H264Cabac_spu *hc, EDSlice_spu *s){
nengel@2 204 hc->non_zero_count_top = non_zero_count_table[0];
nengel@2 205 hc->non_zero_count = non_zero_count_table[1];
nengel@2 206 hc->mvd_top[0] = mvd_table[0][0];
nengel@2 207 hc->mvd[0] = mvd_table[0][1];
nengel@2 208 hc->mvd_top[1] = mvd_table[1][0];
nengel@2 209 hc->mvd[1] = mvd_table[1][1];
nengel@2 210 hc->direct_top = direct_table[0];
nengel@2 211 hc->direct = direct_table[1];
nengel@2 212 hc->chroma_pred_mode_top = chroma_pred_mode_table[0];
nengel@2 213 hc->chroma_pred_mode = chroma_pred_mode_table[1];
nengel@2 214 hc->intra4x4_pred_mode_top = intra4x4_pred_mode_table[0];
nengel@2 215 hc->intra4x4_pred_mode = intra4x4_pred_mode_table[1];
nengel@2 216 hc->cbp_top = cbp_table[0];
nengel@2 217 hc->cbp = cbp_table[1];
nengel@2 218 hc->qscale_top = qscale_table[0] +1;
nengel@2 219 hc->qscale = qscale_table[1] +1;
nengel@2 220
nengel@2 221 hc->mb_type_top = mb_type_table[0]+1;
nengel@2 222 hc->mb_type = mb_type_table[1]+1;
nengel@2 223 hc->ref_index_top[0] = ref_index_table[0][0];
nengel@2 224 hc->ref_index_top[1] = ref_index_table[1][0];
nengel@2 225 hc->ref_index[0] = ref_index_table[0][1];
nengel@2 226 hc->ref_index[1] = ref_index_table[1][1];
nengel@2 227 hc->motion_val_top[0] = motion_val_table[0][0];
nengel@2 228 hc->motion_val_top[1] = motion_val_table[1][0];
nengel@2 229 hc->motion_val[0] = motion_val_table[0][1];
nengel@2 230 hc->motion_val[1] = motion_val_table[1][1];
nengel@2 231
nengel@2 232 int mb_stride = hc->mb_stride;
nengel@2 233
nengel@2 234 if (s->slice_type_nos == FF_B_TYPE){
nengel@2 235 while(!dep_resolved(&spe));
nengel@2 236 spu_dma_get(list1_mb_type_table[0], (unsigned) (s->list1.mb_type -1), mb_stride*sizeof(uint32_t), ED_get);
nengel@2 237 spu_dma_get(list1_ref_index_table[0][0], (unsigned) s->list1.ref_index[0], mb_stride*4*sizeof(int8_t), ED_get);
nengel@2 238 spu_dma_get(list1_ref_index_table[0][1], (unsigned) s->list1.ref_index[1], mb_stride*4*sizeof(int8_t), ED_get);
nengel@2 239 wait_dma_id(ED_get);
nengel@2 240 spu_dma_get(list1_mb_type_table[1], (unsigned) (s->list1.mb_type -1 + mb_stride), mb_stride*sizeof(uint32_t), ED_get);
nengel@2 241 spu_dma_get(list1_ref_index_table[1][0], (unsigned) (s->list1.ref_index[0] + 4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get);
nengel@2 242 spu_dma_get(list1_ref_index_table[1][1], (unsigned) (s->list1.ref_index[1] + 4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get);
nengel@2 243 hc->list1_mb_type = list1_mb_type_table[0]+1;
nengel@2 244 hc->list1_ref_index[0] = list1_ref_index_table[0][0];
nengel@2 245 hc->list1_ref_index[1] = list1_ref_index_table[0][1];
nengel@2 246 }
nengel@2 247
nengel@2 248 }
nengel@2 249
nengel@2 250 static void update_entropy_buf(H264Cabac_spu *hc, EDSlice_spu *s, int line){
nengel@2 251 int mb_stride = hc->mb_stride;
nengel@2 252 int mb_width = hc->mb_width;
nengel@2 253 int top = (line+1)%2;
nengel@2 254 int cur = line%2;
nengel@2 255 int bottom = (line+1)%2; //same as top, but to identify prebuffering of next line.
nengel@2 256
nengel@2 257 hc->non_zero_count_top = non_zero_count_table[top];
nengel@2 258 hc->non_zero_count = non_zero_count_table[cur];
nengel@2 259 hc->mvd_top[0] = mvd_table[0][top];
nengel@2 260 hc->mvd[0] = mvd_table[0][cur];
nengel@2 261 hc->mvd_top[1] = mvd_table[1][top];
nengel@2 262 hc->mvd[1] = mvd_table[1][cur];
nengel@2 263 hc->direct_top = direct_table[top];
nengel@2 264 hc->direct = direct_table[cur];
nengel@2 265 hc->chroma_pred_mode_top = chroma_pred_mode_table[top];
nengel@2 266 hc->chroma_pred_mode = chroma_pred_mode_table[cur];
nengel@2 267 hc->intra4x4_pred_mode_top = intra4x4_pred_mode_table[top];
nengel@2 268 hc->intra4x4_pred_mode = intra4x4_pred_mode_table[cur];
nengel@2 269 hc->cbp_top = cbp_table[top];
nengel@2 270 hc->cbp = cbp_table[cur];
nengel@2 271 hc->qscale_top = qscale_table[top] +1;
nengel@2 272 hc->qscale = qscale_table[cur] +1;
nengel@2 273
nengel@2 274 hc->mb_type_top = mb_type_table[top]+1;
nengel@2 275 hc->mb_type = mb_type_table[cur]+1;
nengel@2 276 hc->ref_index_top[0] = ref_index_table[0][top];
nengel@2 277 hc->ref_index_top[1] = ref_index_table[1][top];
nengel@2 278 hc->ref_index[0] = ref_index_table[0][cur];
nengel@2 279 hc->ref_index[1] = ref_index_table[1][cur];
nengel@2 280 hc->motion_val_top[0] = motion_val_table[0][top];
nengel@2 281 hc->motion_val_top[1] = motion_val_table[1][top];
nengel@2 282 hc->motion_val[0] = motion_val_table[0][cur];
nengel@2 283 hc->motion_val[1] = motion_val_table[1][cur];
nengel@2 284
nengel@2 285 wait_dma_id(ED_put);
nengel@2 286
nengel@2 287 spu_dma_put(mb_type_table[top], (unsigned) (s->pic.mb_type -1 + line*mb_stride), mb_stride*sizeof(uint32_t), ED_put);
nengel@2 288 spu_dma_put(ref_index_table[0][top], (unsigned) (s->pic.ref_index[0] + line*4*mb_stride), 4*mb_stride*sizeof(int8_t), ED_put);
nengel@2 289 spu_dma_put(ref_index_table[1][top], (unsigned) (s->pic.ref_index[1] + line*4*mb_stride), 4*mb_stride*sizeof(int8_t), ED_put);
nengel@2 290 spu_dma_put(motion_val_table[0][top], (unsigned) (s->pic.motion_val[0]+ line*16*mb_width), 16*mb_width*2*sizeof(int16_t), ED_put);
nengel@2 291 spu_dma_put(motion_val_table[1][top], (unsigned) (s->pic.motion_val[1]+ line*16*mb_width), 16*mb_width*2*sizeof(int16_t), ED_put);
nengel@2 292
nengel@2 293 if (s->slice_type_nos == FF_B_TYPE){
nengel@2 294 update_tgt_spe_dep(&spe, 0);
nengel@2 295 wait_dma_id(ED_get);
nengel@2 296
nengel@2 297 if (line + 2 < hc->mb_height){
nengel@2 298 while(!dep_resolved(&spe));
nengel@2 299 spu_dma_get(list1_mb_type_table[cur], (unsigned) (s->list1.mb_type -1 + (line+2)*mb_stride), mb_stride*sizeof(uint32_t), ED_get);
nengel@2 300 spu_dma_get(list1_ref_index_table[cur][0], (unsigned) (s->list1.ref_index[0] + (line+2)*4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get);
nengel@2 301 spu_dma_get(list1_ref_index_table[cur][1], (unsigned) (s->list1.ref_index[1] + (line+2)*4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get);
nengel@2 302 }
nengel@2 303 hc->list1_mb_type = list1_mb_type_table[bottom]+1;
nengel@2 304 hc->list1_ref_index[0] = list1_ref_index_table[bottom][0];
nengel@2 305 hc->list1_ref_index[1] = list1_ref_index_table[bottom][1];
nengel@2 306 }
nengel@2 307
nengel@2 308 }
nengel@2 309
nengel@2 310 // void printmbdiff(EDSlice_spu *s, H264Cabac_spu *hc, H264Mb *mp, H264Mb *ms){
nengel@2 311 //
nengel@2 312 // printf("mb_x %d, %d\n", mp->mb_x, ms->mb_x);
nengel@2 313 // printf("mb_y %d, %d\n", mp->mb_y, ms->mb_y);
nengel@2 314 // printf("mb_xy %d, %d\n", mp->mb_xy, ms->mb_xy);
nengel@2 315 // printf("top_mb_xy %d, %d\n", mp->top_mb_xy, ms->top_mb_xy);
nengel@2 316 // printf("left_mb_xy %d, %d\n", mp->left_mb_xy, ms->left_mb_xy);
nengel@2 317 // printf("chroma_pred_mode %d, %d\n", mp->chroma_pred_mode, ms->chroma_pred_mode);
nengel@2 318 // printf("intra16x16_pred_mode %d, %d\n", mp->intra16x16_pred_mode, ms->intra16x16_pred_mode);
nengel@2 319 // printf("topleft_samples %d, %d\n", mp->topleft_samples_available, ms->topleft_samples_available);
nengel@2 320 // printf("topright_samples %d, %d\n", mp->topright_samples_available, ms->topright_samples_available);
nengel@2 321 // printf("top_samples %d, %d\n", mp->top_samples_available, ms->top_samples_available);
nengel@2 322 // printf("left_samples %d, %d\n", mp->left_samples_available, ms->left_samples_available);
nengel@2 323 //
nengel@2 324 // if (memcmp(mp->intra4x4_pred_mode_cache, ms->intra4x4_pred_mode_cache, 40)){
nengel@2 325 // for (int i=0; i<5; i++){
nengel@2 326 // for (int j=0; j<8; j++){
nengel@2 327 // printf("%d, %d\t", mp->intra4x4_pred_mode_cache[i*8+j],ms->intra4x4_pred_mode_cache[i*8+j]);
nengel@2 328 // }
nengel@2 329 // printf("\n");
nengel@2 330 // }
nengel@2 331 // }
nengel@2 332 //
nengel@2 333 // if (memcmp(mp->non_zero_count_cache, ms->non_zero_count_cache, 48)){
nengel@2 334 // for (int i=0; i<6; i++){
nengel@2 335 // for (int j=0; j<8; j++){
nengel@2 336 // printf("%u, %u\t", mp->non_zero_count_cache[i*8+j],ms->non_zero_count_cache[i*8+j]);
nengel@2 337 // }
nengel@2 338 // printf("\n");
nengel@2 339 // }
nengel@2 340 // }
nengel@2 341 //
nengel@2 342 // if (memcmp(mp->sub_mb_type, ms->sub_mb_type, 8)){
nengel@2 343 // for (int i=0; i<4; i++){
nengel@2 344 // printf("%u, %u\t", mp->sub_mb_type[i], mp->sub_mb_type[i]);
nengel@2 345 // printf("\n");
nengel@2 346 // }
nengel@2 347 // }
nengel@2 348 //
nengel@2 349 // if (memcmp(mp->mv_cache, ms->mv_cache, 320)){
nengel@2 350 // for (int k=0; k<2; k++){
nengel@2 351 // for (int i=0; i<5; i++){
nengel@2 352 // for (int j=0; j<8; j++){
nengel@2 353 // printf("%d, %d, %d, %d\t", mp->mv_cache[k][i*8+j][0], mp->mv_cache[k][i*8+j][1], ms->mv_cache[k][i*8+j][0], ms->mv_cache[k][i*8+j][1]);
nengel@2 354 // }
nengel@2 355 // printf("\n");
nengel@2 356 // }
nengel@2 357 // }
nengel@2 358 // }
nengel@2 359 //
nengel@2 360 // if (memcmp(mp->ref_cache, ms->ref_cache, 80)){
nengel@2 361 // for (int k=0; k<2; k++){
nengel@2 362 // for (int i=0; i<5; i++){
nengel@2 363 // for (int j=0; j<8; j++){
nengel@2 364 // printf("%d, %d\t", mp->ref_cache[k][i*8+j], ms->ref_cache[k][i*8+j]);
nengel@2 365 // }
nengel@2 366 // printf("\n");
nengel@2 367 // }
nengel@2 368 // }
nengel@2 369 // }
nengel@2 370 //
nengel@2 371 // printf("cbp %d, %d\n", mp->cbp, ms->cbp);
nengel@2 372 // for (int i=0; i<hc->mb_stride; i++){
nengel@2 373 // printf("%d, ", hc->cbp[i]); fflush(0);
nengel@2 374 // }
nengel@2 375 // printf("\n");
nengel@2 376 //
nengel@2 377 // printf("mb_type %x, %x\n", mp->mb_type, ms->mb_type);
nengel@2 378 // printf("mb_type IS_INTRA %d, IS_INTRA16x16 %d, IS_DIRECT %d\n", IS_INTRA(ms->mb_type), IS_INTRA16x16(ms->mb_type), IS_DIRECT(ms->mb_type) );
nengel@2 379 // printf("left_type %d, %d\n", mp->left_type, ms->left_type);
nengel@2 380 // printf("top_type %d, %d\n", mp->top_type, ms->top_type);
nengel@2 381 // printf("qscale_mb_xy %d, %d\n", mp->qscale_mb_xy, ms->qscale_mb_xy);
nengel@2 382 // printf("qscale_left_mb_xy %d, %d\n", mp->qscale_left_mb_xy, ms->qscale_left_mb_xy);
nengel@2 383 // printf("qscale_top_mb_xy %d, %d\n", mp->qscale_top_mb_xy, ms->qscale_top_mb_xy);
nengel@2 384 // // for (int i=0; i<hc->mb_stride; i++){
nengel@2 385 // // printf("%d, ", qscale_table[0][i]); fflush(0);
nengel@2 386 // // }
nengel@2 387 //
nengel@2 388 // if (memcmp(mp->mb, ms->mb, 768)){
nengel@2 389 // for (int i=0; i<16; i++){
nengel@2 390 // for (int j=0; j<16; j++){
nengel@2 391 // printf("%d, %d\t", mp->mb[j + i*16], ms->ref_cache[j + i*16]);
nengel@2 392 // }
nengel@2 393 // printf("\n");
nengel@2 394 // }
nengel@2 395 // for (int i=0; i<8; i++){
nengel@2 396 // for (int j=0; j<8; j++){
nengel@2 397 // printf("%d, %d\t", mp->mb[256 + j + i*8], ms->ref_cache[j + i*8]);
nengel@2 398 // }
nengel@2 399 // printf("\n");
nengel@2 400 // }
nengel@2 401 // for (int i=0; i<8; i++){
nengel@2 402 // for (int j=0; j<8; j++){
nengel@2 403 // printf("%d, %d\t", mp->mb[320+ j + i*8], ms->ref_cache[j + i*8]);
nengel@2 404 // }
nengel@2 405 // printf("\n");
nengel@2 406 // }
nengel@2 407 // }
nengel@2 408 //
nengel@2 409 // if (memcmp(mp->bS, ms->bS, 32)){
nengel@2 410 // for (int k=0; k<2; k++){
nengel@2 411 // for (int i=0; i<4; i++){
nengel@2 412 // for (int j=0; j<4; j++){
nengel@2 413 // printf("%d, %d\t", mp->bS[k][i][j], mp->mv_cache[k][i][j]);
nengel@2 414 // }
nengel@2 415 // printf("\n");
nengel@2 416 // }
nengel@2 417 // }
nengel@2 418 // }
nengel@2 419 // if (memcmp(mp->edges, ms->edges, 4)){
nengel@2 420 // printf("edges %d, %d, %d, %d\n", mp->edges[0], ms->edges[0], mp->edges[1], ms->edges[1]);
nengel@2 421 // printf("deblock %d, %d\n", mp->deblock_mb, ms->deblock_mb);
nengel@2 422 // }
nengel@2 423 //
nengel@2 424 // printf("dequant4_coeff_y %d, %d\n", mp->dequant4_coeff_y, ms->dequant4_coeff_y);
nengel@2 425 // printf("dequant4_coeff_cb %d, %d\n", mp->dequant4_coeff_cb, ms->dequant4_coeff_cb);
nengel@2 426 // printf("dequant4_coeff_cr %d, %d\n", mp->dequant4_coeff_cr, ms->dequant4_coeff_cr);
nengel@2 427 // }
nengel@2 428 // DECLARE_ALIGNED_16(H264Mb, tmp);
nengel@2 429
nengel@2 430
nengel@2 431 int main(unsigned long long id, unsigned long long argp){
nengel@2 432 EDSlice_spu *s;
nengel@2 433 H264Cabac_spu *hc = &hcabac;
nengel@2 434 CABACContext *c = &cabac;
nengel@2 435 H264spe *p = &spe;
nengel@2 436
nengel@2 437 spu_write_out_mbox((unsigned) slice);
nengel@2 438 spu_dma_get(p, (unsigned) argp, sizeof(H264spe), ED_spe); //ID_slice is used out of convienience
nengel@2 439 wait_dma_id(ED_spe);
nengel@2 440
nengel@2 441 ff_init_cabac_states();
nengel@2 442 init_cabac(p, hc);
nengel@2 443 hc->blocking=0;
nengel@2 444 for(;;){
nengel@2 445 spu_read_in_mbox();
nengel@2 446 s = &slice[0];
nengel@2 447 reset_cabac_buffers();
nengel@2 448 init_entropy_buf(hc, s);
nengel@2 449
nengel@2 450 if (hc->blocking) wait_dma_id(ED_get);
nengel@2 451 //printf("framesize %d\n", s->byte_bufsize);fflush(0);
nengel@2 452 init_dequant_tables(s, hc);
nengel@2 453 ff_init_cabac_decoder( c, s->bytestream_start, s->byte_bufsize );
nengel@2 454 ff_h264_init_cabac_states(s, c);
nengel@2 455
nengel@2 456 int mb_slot=0;
nengel@2 457 for(int j=0; j<hc->mb_height; j++){
nengel@2 458 for(int i=0; i<hc->mb_width; i++){
nengel@2 459 int eos,ret;
nengel@2 460 H264Mb *m = &mb[mb_slot];
nengel@2 461 m->mb_x=i;
nengel@2 462 m->mb_y=j;
nengel@2 463 s->m = m;
nengel@2 464
nengel@2 465 ret = ff_h264_decode_mb_cabac(hc, s, c);
nengel@2 466
nengel@2 467 // spu_dma_get(&tmp, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_get);
nengel@2 468 // wait_dma_id(ED_get);
nengel@2 469 // if (memcmp(&tmp, m, sizeof(H264Mb))){
nengel@2 470 // printf("coded pic num %d\n", s->coded_pic_num);
nengel@2 471 // printmbdiff(s, hc,&tmp, m);
nengel@2 472 // return 0;
nengel@2 473 // }
nengel@2 474 //printf("qscale %d\n", m->qscale_mb_xy);
nengel@2 475 if (!hc->blocking){
nengel@2 476 if (mb_slot){
nengel@2 477 spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb1);
nengel@2 478 wait_dma_id(ED_putmb0);
nengel@2 479 }else {
nengel@2 480 spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb0);
nengel@2 481 wait_dma_id(ED_putmb1);
nengel@2 482 }
nengel@2 483 mb_slot++; mb_slot%=2;
nengel@2 484 }else {
nengel@2 485 spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb0);
nengel@2 486 wait_dma_id(ED_putmb0);
nengel@2 487 }
nengel@2 488
nengel@2 489
nengel@2 490 eos = get_cabac_terminate( c);
nengel@2 491
nengel@2 492 if( ret < 0) {
nengel@2 493 fprintf(stderr, "error at %d bytecount\n", bytecount);
nengel@2 494 return -1;
nengel@2 495 }
nengel@2 496 }
nengel@2 497 update_entropy_buf(hc, s, j);
nengel@2 498 if (hc->blocking){ wait_dma_id(ED_get); wait_dma_id(ED_put);}
nengel@2 499 }
nengel@2 500 wait_dma_id(ED_put);
nengel@2 501 spu_write_out_mbox(1);
nengel@2 502
nengel@2 503 }
nengel@2 504
nengel@2 505 return 0;
nengel@2 506
nengel@2 507
nengel@2 508 }