view libavcodec/cell/spe_ed.c @ 2:897f711a7157

rearrange to work with autoconf
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Tue, 25 Sep 2012 15:55:33 +0200
parents
children
line source
1 #define CELL_SPE
3 #include <string.h>
4 #include <stdio.h>
5 #include <spu_intrinsics.h>
6 #include <spu_mfcio.h>
7 #include "libavcodec/avcodec.h"
8 #include "h264_cabac_spu.h"
9 #include "cabac_spu.h"
10 #include "h264_types_spu.h"
11 #include "h264_tables.h"
12 #include "h264_dma.h"
13 #include "h264_tables.h"
15 #define MB_WIDTH 240
16 #define MB_STRIDE (MB_WIDTH+16)
18 H264Cabac_spu hcabac;
19 CABACContext cabac;
20 DECLARE_ALIGNED_16(EDSlice_spu, slice[2]);
21 DECLARE_ALIGNED_16(H264Mb, mb[2]);
22 DECLARE_ALIGNED_16(H264spe, spe);
24 DECLARE_ALIGNED_16(uint8_t, non_zero_count_table[2][MB_STRIDE][32]);
25 DECLARE_ALIGNED_16(uint8_t, mvd_table[2][2][8*MB_STRIDE][2]);
26 DECLARE_ALIGNED_16(uint8_t, direct_table[2][4*MB_STRIDE]);
27 DECLARE_ALIGNED_16(uint8_t, chroma_pred_mode_table[2][MB_STRIDE]);
28 DECLARE_ALIGNED_16(uint8_t, intra4x4_pred_mode_table[2][8*MB_STRIDE]);
29 DECLARE_ALIGNED_16(uint16_t,cbp_table[2][MB_STRIDE]);
30 DECLARE_ALIGNED_16(uint8_t, qscale_table[2][MB_STRIDE]);
32 DECLARE_ALIGNED_16(uint32_t, mb_type_table[2][MB_STRIDE]);
33 DECLARE_ALIGNED_16(int8_t, ref_index_table[2][2][4*MB_STRIDE]);
34 DECLARE_ALIGNED_16(int16_t, motion_val_table[2][2][4*4*MB_WIDTH][2]);
36 DECLARE_ALIGNED(128, uint8_t, bytestream_ls[4096]);
37 DECLARE_ALIGNED_16(uint32_t, list1_mb_type_table[2][MB_STRIDE]);
38 DECLARE_ALIGNED_16(int8_t, list1_ref_index_table[2][2][4*MB_STRIDE]);
40 DECLARE_ALIGNED_16(spe_pos, dma_temp); //dma temp for sending
41 //mb position of neighbouring spes
42 DECLARE_ALIGNED_16(volatile spe_pos, src_spe); //written by SPE_ID -1
43 static int total_lines;
45 static inline int dep_resolved(H264spe *p){
46 int spe_id = p->spe_id;
47 volatile int lines_proc = src_spe.count;
48 if (spe_id==0)
49 return (total_lines < lines_proc-1 +p->mb_height)? 1:0;
50 else
51 return (total_lines < lines_proc-1)? 1:0;
52 }
54 static void update_tgt_spe_dep(H264spe *p, int end){
55 // if (end ){
56 total_lines++;
57 spe_pos* dma_spe = &dma_temp;
58 spe_pos* tgt_spe = p->tgt_spe + (unsigned) &src_spe; //located in target spe local store
59 dma_spe->count = end? total_lines+1: total_lines;
60 spu_dma_barrier_put(dma_spe, (unsigned) tgt_spe, sizeof(dma_temp), ED_put);
61 // }
63 }
65 static int init_cabac(H264spe *p, H264Cabac_spu *hc){
66 hc->mb_height = p->mb_height;
67 hc->mb_width = p->mb_width;
68 hc->b_stride = 4*p->mb_width;
69 hc->mb_stride = p->mb_stride;
71 for(int i=0; i<16; i++){
72 #define T(x) (x>>2) | ((x<<2) & 0xF)
73 hc->zigzag_scan[i] = T(zigzag_scan[i]);
74 #undef T
75 }
76 for(int i=0; i<64; i++){
77 #define T(x) (x>>3) | ((x&7)<<3)
78 hc->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
79 #undef T
80 }
81 }
83 static void reset_cabac_buffers(){
84 memset(intra4x4_pred_mode_table, 0, sizeof(intra4x4_pred_mode_table));
85 memset(mvd_table, 0, sizeof(mvd_table));
86 memset(direct_table, 0, sizeof(direct_table));
87 memset(chroma_pred_mode_table, 0, sizeof(chroma_pred_mode_table));
88 memset(cbp_table, 0, sizeof(cbp_table));
89 memset(qscale_table, 0, sizeof(qscale_table));
90 memset(mb_type_table, 0, sizeof(mb_type_table));
91 memset(ref_index_table, 0, sizeof(ref_index_table));
92 memset(motion_val_table, 0, sizeof(motion_val_table));
93 }
95 static void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int bufsize){
96 int align = (unsigned) buf & 0xF;
97 int dma_size;
99 c->bytestream_ea_start=
100 c->bytestream_ea= buf;
101 c->bytestream_ea_end= buf + bufsize;
102 c->bufsize = bufsize;
104 if (bufsize + align >= sizeof(bytestream_ls)){
105 dma_size = sizeof(bytestream_ls);
106 c->bufsize = c->bufsize +align - sizeof(bytestream_ls);
107 }else{
108 int align_end = (bufsize+align) &0xF;
109 if (align_end)
110 dma_size = bufsize+align + 16-align_end;
111 else
112 dma_size = bufsize+align;
113 c->bufsize = 0;
114 }
115 // printf("%d\n", dma_size);
116 c->bytestream_end = &bytestream_ls[dma_size];
117 c->bytestream_start= c->bytestream = &bytestream_ls[align];
118 spu_dma_get(bytestream_ls, (unsigned) buf - align, dma_size, ED_get );
119 c->bytestream_ea_start=
120 c->bytestream_ea= buf + dma_size -align;
122 wait_dma_id(ED_get);
124 if (align %2){
125 c->low = (*c->bytestream++)<<18;
126 c->low+= (*c->bytestream++)<<10;
127 c->low+= ((*c->bytestream++)<<2) + 2;
128 }else {
129 c->low = (*c->bytestream++)<<18;
130 c->low+= (*c->bytestream++)<<10;
131 c->low+= (2<<8);
132 }
134 c->range= 0x1FE;
135 bytecount=0;
136 }
138 static void init_dequant8_coeff_table(EDSlice_spu *s, H264Cabac_spu *hc){
139 int i,q,x;
140 const int transpose = HAVE_ALTIVEC;
141 hc->dequant8_coeff[0] = hc->dequant8_buffer[0];
142 hc->dequant8_coeff[1] = hc->dequant8_buffer[1];
144 for(i=0; i<2; i++){
145 if(i && !memcmp(s->pps.scaling_matrix8[0], s->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
146 hc->dequant8_coeff[1] = hc->dequant8_buffer[0];
147 break;
148 }
150 for(q=0; q<52; q++){
151 int shift = div6[q];
152 int idx = rem6[q];
153 for(x=0; x<64; x++)
154 hc->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
155 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
156 s->pps.scaling_matrix8[i][x]) << shift;
157 }
158 }
159 }
161 static void init_dequant4_coeff_table(EDSlice_spu *s, H264Cabac_spu *hc){
162 int i,j,q,x;
163 const int transpose = HAVE_MMX | HAVE_ALTIVEC | HAVE_NEON;
164 for(i=0; i<6; i++ ){
165 hc->dequant4_coeff[i] = hc->dequant4_buffer[i];
166 for(j=0; j<i; j++){
167 if(!memcmp(s->pps.scaling_matrix4[j], s->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
168 hc->dequant4_coeff[i] = hc->dequant4_buffer[j];
169 break;
170 }
171 }
172 if(j<i)
173 continue;
175 for(q=0; q<52; q++){
176 int shift = div6[q] + 2;
177 int idx = rem6[q];
178 for(x=0; x<16; x++)
179 hc->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
180 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
181 s->pps.scaling_matrix4[i][x]) << shift;
182 }
183 }
184 }
186 static void init_dequant_tables(EDSlice_spu *s, H264Cabac_spu *hc){
187 int i,x;
189 init_dequant4_coeff_table(s, hc);
190 if(s->pps.transform_8x8_mode)
191 init_dequant8_coeff_table(s, hc);
192 if(s->transform_bypass){
193 for(i=0; i<6; i++)
194 for(x=0; x<16; x++)
195 hc->dequant4_coeff[i][0][x] = 1<<6;
196 if(s->pps.transform_8x8_mode)
197 for(i=0; i<2; i++)
198 for(x=0; x<64; x++)
199 hc->dequant8_coeff[i][0][x] = 1<<6;
200 }
201 }
203 static void init_entropy_buf(H264Cabac_spu *hc, EDSlice_spu *s){
204 hc->non_zero_count_top = non_zero_count_table[0];
205 hc->non_zero_count = non_zero_count_table[1];
206 hc->mvd_top[0] = mvd_table[0][0];
207 hc->mvd[0] = mvd_table[0][1];
208 hc->mvd_top[1] = mvd_table[1][0];
209 hc->mvd[1] = mvd_table[1][1];
210 hc->direct_top = direct_table[0];
211 hc->direct = direct_table[1];
212 hc->chroma_pred_mode_top = chroma_pred_mode_table[0];
213 hc->chroma_pred_mode = chroma_pred_mode_table[1];
214 hc->intra4x4_pred_mode_top = intra4x4_pred_mode_table[0];
215 hc->intra4x4_pred_mode = intra4x4_pred_mode_table[1];
216 hc->cbp_top = cbp_table[0];
217 hc->cbp = cbp_table[1];
218 hc->qscale_top = qscale_table[0] +1;
219 hc->qscale = qscale_table[1] +1;
221 hc->mb_type_top = mb_type_table[0]+1;
222 hc->mb_type = mb_type_table[1]+1;
223 hc->ref_index_top[0] = ref_index_table[0][0];
224 hc->ref_index_top[1] = ref_index_table[1][0];
225 hc->ref_index[0] = ref_index_table[0][1];
226 hc->ref_index[1] = ref_index_table[1][1];
227 hc->motion_val_top[0] = motion_val_table[0][0];
228 hc->motion_val_top[1] = motion_val_table[1][0];
229 hc->motion_val[0] = motion_val_table[0][1];
230 hc->motion_val[1] = motion_val_table[1][1];
232 int mb_stride = hc->mb_stride;
234 if (s->slice_type_nos == FF_B_TYPE){
235 while(!dep_resolved(&spe));
236 spu_dma_get(list1_mb_type_table[0], (unsigned) (s->list1.mb_type -1), mb_stride*sizeof(uint32_t), ED_get);
237 spu_dma_get(list1_ref_index_table[0][0], (unsigned) s->list1.ref_index[0], mb_stride*4*sizeof(int8_t), ED_get);
238 spu_dma_get(list1_ref_index_table[0][1], (unsigned) s->list1.ref_index[1], mb_stride*4*sizeof(int8_t), ED_get);
239 wait_dma_id(ED_get);
240 spu_dma_get(list1_mb_type_table[1], (unsigned) (s->list1.mb_type -1 + mb_stride), mb_stride*sizeof(uint32_t), ED_get);
241 spu_dma_get(list1_ref_index_table[1][0], (unsigned) (s->list1.ref_index[0] + 4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get);
242 spu_dma_get(list1_ref_index_table[1][1], (unsigned) (s->list1.ref_index[1] + 4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get);
243 hc->list1_mb_type = list1_mb_type_table[0]+1;
244 hc->list1_ref_index[0] = list1_ref_index_table[0][0];
245 hc->list1_ref_index[1] = list1_ref_index_table[0][1];
246 }
248 }
250 static void update_entropy_buf(H264Cabac_spu *hc, EDSlice_spu *s, int line){
251 int mb_stride = hc->mb_stride;
252 int mb_width = hc->mb_width;
253 int top = (line+1)%2;
254 int cur = line%2;
255 int bottom = (line+1)%2; //same as top, but to identify prebuffering of next line.
257 hc->non_zero_count_top = non_zero_count_table[top];
258 hc->non_zero_count = non_zero_count_table[cur];
259 hc->mvd_top[0] = mvd_table[0][top];
260 hc->mvd[0] = mvd_table[0][cur];
261 hc->mvd_top[1] = mvd_table[1][top];
262 hc->mvd[1] = mvd_table[1][cur];
263 hc->direct_top = direct_table[top];
264 hc->direct = direct_table[cur];
265 hc->chroma_pred_mode_top = chroma_pred_mode_table[top];
266 hc->chroma_pred_mode = chroma_pred_mode_table[cur];
267 hc->intra4x4_pred_mode_top = intra4x4_pred_mode_table[top];
268 hc->intra4x4_pred_mode = intra4x4_pred_mode_table[cur];
269 hc->cbp_top = cbp_table[top];
270 hc->cbp = cbp_table[cur];
271 hc->qscale_top = qscale_table[top] +1;
272 hc->qscale = qscale_table[cur] +1;
274 hc->mb_type_top = mb_type_table[top]+1;
275 hc->mb_type = mb_type_table[cur]+1;
276 hc->ref_index_top[0] = ref_index_table[0][top];
277 hc->ref_index_top[1] = ref_index_table[1][top];
278 hc->ref_index[0] = ref_index_table[0][cur];
279 hc->ref_index[1] = ref_index_table[1][cur];
280 hc->motion_val_top[0] = motion_val_table[0][top];
281 hc->motion_val_top[1] = motion_val_table[1][top];
282 hc->motion_val[0] = motion_val_table[0][cur];
283 hc->motion_val[1] = motion_val_table[1][cur];
285 wait_dma_id(ED_put);
287 spu_dma_put(mb_type_table[top], (unsigned) (s->pic.mb_type -1 + line*mb_stride), mb_stride*sizeof(uint32_t), ED_put);
288 spu_dma_put(ref_index_table[0][top], (unsigned) (s->pic.ref_index[0] + line*4*mb_stride), 4*mb_stride*sizeof(int8_t), ED_put);
289 spu_dma_put(ref_index_table[1][top], (unsigned) (s->pic.ref_index[1] + line*4*mb_stride), 4*mb_stride*sizeof(int8_t), ED_put);
290 spu_dma_put(motion_val_table[0][top], (unsigned) (s->pic.motion_val[0]+ line*16*mb_width), 16*mb_width*2*sizeof(int16_t), ED_put);
291 spu_dma_put(motion_val_table[1][top], (unsigned) (s->pic.motion_val[1]+ line*16*mb_width), 16*mb_width*2*sizeof(int16_t), ED_put);
293 if (s->slice_type_nos == FF_B_TYPE){
294 update_tgt_spe_dep(&spe, 0);
295 wait_dma_id(ED_get);
297 if (line + 2 < hc->mb_height){
298 while(!dep_resolved(&spe));
299 spu_dma_get(list1_mb_type_table[cur], (unsigned) (s->list1.mb_type -1 + (line+2)*mb_stride), mb_stride*sizeof(uint32_t), ED_get);
300 spu_dma_get(list1_ref_index_table[cur][0], (unsigned) (s->list1.ref_index[0] + (line+2)*4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get);
301 spu_dma_get(list1_ref_index_table[cur][1], (unsigned) (s->list1.ref_index[1] + (line+2)*4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get);
302 }
303 hc->list1_mb_type = list1_mb_type_table[bottom]+1;
304 hc->list1_ref_index[0] = list1_ref_index_table[bottom][0];
305 hc->list1_ref_index[1] = list1_ref_index_table[bottom][1];
306 }
308 }
310 // void printmbdiff(EDSlice_spu *s, H264Cabac_spu *hc, H264Mb *mp, H264Mb *ms){
311 //
312 // printf("mb_x %d, %d\n", mp->mb_x, ms->mb_x);
313 // printf("mb_y %d, %d\n", mp->mb_y, ms->mb_y);
314 // printf("mb_xy %d, %d\n", mp->mb_xy, ms->mb_xy);
315 // printf("top_mb_xy %d, %d\n", mp->top_mb_xy, ms->top_mb_xy);
316 // printf("left_mb_xy %d, %d\n", mp->left_mb_xy, ms->left_mb_xy);
317 // printf("chroma_pred_mode %d, %d\n", mp->chroma_pred_mode, ms->chroma_pred_mode);
318 // printf("intra16x16_pred_mode %d, %d\n", mp->intra16x16_pred_mode, ms->intra16x16_pred_mode);
319 // printf("topleft_samples %d, %d\n", mp->topleft_samples_available, ms->topleft_samples_available);
320 // printf("topright_samples %d, %d\n", mp->topright_samples_available, ms->topright_samples_available);
321 // printf("top_samples %d, %d\n", mp->top_samples_available, ms->top_samples_available);
322 // printf("left_samples %d, %d\n", mp->left_samples_available, ms->left_samples_available);
323 //
324 // if (memcmp(mp->intra4x4_pred_mode_cache, ms->intra4x4_pred_mode_cache, 40)){
325 // for (int i=0; i<5; i++){
326 // for (int j=0; j<8; j++){
327 // printf("%d, %d\t", mp->intra4x4_pred_mode_cache[i*8+j],ms->intra4x4_pred_mode_cache[i*8+j]);
328 // }
329 // printf("\n");
330 // }
331 // }
332 //
333 // if (memcmp(mp->non_zero_count_cache, ms->non_zero_count_cache, 48)){
334 // for (int i=0; i<6; i++){
335 // for (int j=0; j<8; j++){
336 // printf("%u, %u\t", mp->non_zero_count_cache[i*8+j],ms->non_zero_count_cache[i*8+j]);
337 // }
338 // printf("\n");
339 // }
340 // }
341 //
342 // if (memcmp(mp->sub_mb_type, ms->sub_mb_type, 8)){
343 // for (int i=0; i<4; i++){
344 // printf("%u, %u\t", mp->sub_mb_type[i], mp->sub_mb_type[i]);
345 // printf("\n");
346 // }
347 // }
348 //
349 // if (memcmp(mp->mv_cache, ms->mv_cache, 320)){
350 // for (int k=0; k<2; k++){
351 // for (int i=0; i<5; i++){
352 // for (int j=0; j<8; j++){
353 // printf("%d, %d, %d, %d\t", mp->mv_cache[k][i*8+j][0], mp->mv_cache[k][i*8+j][1], ms->mv_cache[k][i*8+j][0], ms->mv_cache[k][i*8+j][1]);
354 // }
355 // printf("\n");
356 // }
357 // }
358 // }
359 //
360 // if (memcmp(mp->ref_cache, ms->ref_cache, 80)){
361 // for (int k=0; k<2; k++){
362 // for (int i=0; i<5; i++){
363 // for (int j=0; j<8; j++){
364 // printf("%d, %d\t", mp->ref_cache[k][i*8+j], ms->ref_cache[k][i*8+j]);
365 // }
366 // printf("\n");
367 // }
368 // }
369 // }
370 //
371 // printf("cbp %d, %d\n", mp->cbp, ms->cbp);
372 // for (int i=0; i<hc->mb_stride; i++){
373 // printf("%d, ", hc->cbp[i]); fflush(0);
374 // }
375 // printf("\n");
376 //
377 // printf("mb_type %x, %x\n", mp->mb_type, ms->mb_type);
378 // printf("mb_type IS_INTRA %d, IS_INTRA16x16 %d, IS_DIRECT %d\n", IS_INTRA(ms->mb_type), IS_INTRA16x16(ms->mb_type), IS_DIRECT(ms->mb_type) );
379 // printf("left_type %d, %d\n", mp->left_type, ms->left_type);
380 // printf("top_type %d, %d\n", mp->top_type, ms->top_type);
381 // printf("qscale_mb_xy %d, %d\n", mp->qscale_mb_xy, ms->qscale_mb_xy);
382 // printf("qscale_left_mb_xy %d, %d\n", mp->qscale_left_mb_xy, ms->qscale_left_mb_xy);
383 // printf("qscale_top_mb_xy %d, %d\n", mp->qscale_top_mb_xy, ms->qscale_top_mb_xy);
384 // // for (int i=0; i<hc->mb_stride; i++){
385 // // printf("%d, ", qscale_table[0][i]); fflush(0);
386 // // }
387 //
388 // if (memcmp(mp->mb, ms->mb, 768)){
389 // for (int i=0; i<16; i++){
390 // for (int j=0; j<16; j++){
391 // printf("%d, %d\t", mp->mb[j + i*16], ms->ref_cache[j + i*16]);
392 // }
393 // printf("\n");
394 // }
395 // for (int i=0; i<8; i++){
396 // for (int j=0; j<8; j++){
397 // printf("%d, %d\t", mp->mb[256 + j + i*8], ms->ref_cache[j + i*8]);
398 // }
399 // printf("\n");
400 // }
401 // for (int i=0; i<8; i++){
402 // for (int j=0; j<8; j++){
403 // printf("%d, %d\t", mp->mb[320+ j + i*8], ms->ref_cache[j + i*8]);
404 // }
405 // printf("\n");
406 // }
407 // }
408 //
409 // if (memcmp(mp->bS, ms->bS, 32)){
410 // for (int k=0; k<2; k++){
411 // for (int i=0; i<4; i++){
412 // for (int j=0; j<4; j++){
413 // printf("%d, %d\t", mp->bS[k][i][j], mp->mv_cache[k][i][j]);
414 // }
415 // printf("\n");
416 // }
417 // }
418 // }
419 // if (memcmp(mp->edges, ms->edges, 4)){
420 // printf("edges %d, %d, %d, %d\n", mp->edges[0], ms->edges[0], mp->edges[1], ms->edges[1]);
421 // printf("deblock %d, %d\n", mp->deblock_mb, ms->deblock_mb);
422 // }
423 //
424 // printf("dequant4_coeff_y %d, %d\n", mp->dequant4_coeff_y, ms->dequant4_coeff_y);
425 // printf("dequant4_coeff_cb %d, %d\n", mp->dequant4_coeff_cb, ms->dequant4_coeff_cb);
426 // printf("dequant4_coeff_cr %d, %d\n", mp->dequant4_coeff_cr, ms->dequant4_coeff_cr);
427 // }
428 // DECLARE_ALIGNED_16(H264Mb, tmp);
431 int main(unsigned long long id, unsigned long long argp){
432 EDSlice_spu *s;
433 H264Cabac_spu *hc = &hcabac;
434 CABACContext *c = &cabac;
435 H264spe *p = &spe;
437 spu_write_out_mbox((unsigned) slice);
438 spu_dma_get(p, (unsigned) argp, sizeof(H264spe), ED_spe); //ID_slice is used out of convienience
439 wait_dma_id(ED_spe);
441 ff_init_cabac_states();
442 init_cabac(p, hc);
443 hc->blocking=0;
444 for(;;){
445 spu_read_in_mbox();
446 s = &slice[0];
447 reset_cabac_buffers();
448 init_entropy_buf(hc, s);
450 if (hc->blocking) wait_dma_id(ED_get);
451 //printf("framesize %d\n", s->byte_bufsize);fflush(0);
452 init_dequant_tables(s, hc);
453 ff_init_cabac_decoder( c, s->bytestream_start, s->byte_bufsize );
454 ff_h264_init_cabac_states(s, c);
456 int mb_slot=0;
457 for(int j=0; j<hc->mb_height; j++){
458 for(int i=0; i<hc->mb_width; i++){
459 int eos,ret;
460 H264Mb *m = &mb[mb_slot];
461 m->mb_x=i;
462 m->mb_y=j;
463 s->m = m;
465 ret = ff_h264_decode_mb_cabac(hc, s, c);
467 // spu_dma_get(&tmp, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_get);
468 // wait_dma_id(ED_get);
469 // if (memcmp(&tmp, m, sizeof(H264Mb))){
470 // printf("coded pic num %d\n", s->coded_pic_num);
471 // printmbdiff(s, hc,&tmp, m);
472 // return 0;
473 // }
474 //printf("qscale %d\n", m->qscale_mb_xy);
475 if (!hc->blocking){
476 if (mb_slot){
477 spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb1);
478 wait_dma_id(ED_putmb0);
479 }else {
480 spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb0);
481 wait_dma_id(ED_putmb1);
482 }
483 mb_slot++; mb_slot%=2;
484 }else {
485 spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb0);
486 wait_dma_id(ED_putmb0);
487 }
490 eos = get_cabac_terminate( c);
492 if( ret < 0) {
493 fprintf(stderr, "error at %d bytecount\n", bytecount);
494 return -1;
495 }
496 }
497 update_entropy_buf(hc, s, j);
498 if (hc->blocking){ wait_dma_id(ED_get); wait_dma_id(ED_put);}
499 }
500 wait_dma_id(ED_put);
501 spu_write_out_mbox(1);
503 }
505 return 0;
508 }