| rev |
line source |
|
nengel@2
|
1 #define CELL_SPE
|
|
nengel@2
|
2
|
|
nengel@2
|
3 #include <string.h>
|
|
nengel@2
|
4 #include <stdio.h>
|
|
nengel@2
|
5 #include <spu_intrinsics.h>
|
|
nengel@2
|
6 #include <spu_mfcio.h>
|
|
nengel@2
|
7 #include "libavcodec/avcodec.h"
|
|
nengel@2
|
8 #include "h264_cabac_spu.h"
|
|
nengel@2
|
9 #include "cabac_spu.h"
|
|
nengel@2
|
10 #include "h264_types_spu.h"
|
|
nengel@2
|
11 #include "h264_tables.h"
|
|
nengel@2
|
12 #include "h264_dma.h"
|
|
nengel@2
|
13 #include "h264_tables.h"
|
|
nengel@2
|
14
|
|
nengel@2
|
15 #define MB_WIDTH 240
|
|
nengel@2
|
16 #define MB_STRIDE (MB_WIDTH+16)
|
|
nengel@2
|
17
|
|
nengel@2
|
18 H264Cabac_spu hcabac;
|
|
nengel@2
|
19 CABACContext cabac;
|
|
nengel@2
|
20 DECLARE_ALIGNED_16(EDSlice_spu, slice[2]);
|
|
nengel@2
|
21 DECLARE_ALIGNED_16(H264Mb, mb[2]);
|
|
nengel@2
|
22 DECLARE_ALIGNED_16(H264spe, spe);
|
|
nengel@2
|
23
|
|
nengel@2
|
24 DECLARE_ALIGNED_16(uint8_t, non_zero_count_table[2][MB_STRIDE][32]);
|
|
nengel@2
|
25 DECLARE_ALIGNED_16(uint8_t, mvd_table[2][2][8*MB_STRIDE][2]);
|
|
nengel@2
|
26 DECLARE_ALIGNED_16(uint8_t, direct_table[2][4*MB_STRIDE]);
|
|
nengel@2
|
27 DECLARE_ALIGNED_16(uint8_t, chroma_pred_mode_table[2][MB_STRIDE]);
|
|
nengel@2
|
28 DECLARE_ALIGNED_16(uint8_t, intra4x4_pred_mode_table[2][8*MB_STRIDE]);
|
|
nengel@2
|
29 DECLARE_ALIGNED_16(uint16_t,cbp_table[2][MB_STRIDE]);
|
|
nengel@2
|
30 DECLARE_ALIGNED_16(uint8_t, qscale_table[2][MB_STRIDE]);
|
|
nengel@2
|
31
|
|
nengel@2
|
32 DECLARE_ALIGNED_16(uint32_t, mb_type_table[2][MB_STRIDE]);
|
|
nengel@2
|
33 DECLARE_ALIGNED_16(int8_t, ref_index_table[2][2][4*MB_STRIDE]);
|
|
nengel@2
|
34 DECLARE_ALIGNED_16(int16_t, motion_val_table[2][2][4*4*MB_WIDTH][2]);
|
|
nengel@2
|
35
|
|
nengel@2
|
36 DECLARE_ALIGNED(128, uint8_t, bytestream_ls[4096]);
|
|
nengel@2
|
37 DECLARE_ALIGNED_16(uint32_t, list1_mb_type_table[2][MB_STRIDE]);
|
|
nengel@2
|
38 DECLARE_ALIGNED_16(int8_t, list1_ref_index_table[2][2][4*MB_STRIDE]);
|
|
nengel@2
|
39
|
|
nengel@2
|
40 DECLARE_ALIGNED_16(spe_pos, dma_temp); //dma temp for sending
|
|
nengel@2
|
41 //mb position of neighbouring spes
|
|
nengel@2
|
42 DECLARE_ALIGNED_16(volatile spe_pos, src_spe); //written by SPE_ID -1
|
|
nengel@2
|
43 static int total_lines;
|
|
nengel@2
|
44
|
|
nengel@2
|
45 static inline int dep_resolved(H264spe *p){
|
|
nengel@2
|
46 int spe_id = p->spe_id;
|
|
nengel@2
|
47 volatile int lines_proc = src_spe.count;
|
|
nengel@2
|
48 if (spe_id==0)
|
|
nengel@2
|
49 return (total_lines < lines_proc-1 +p->mb_height)? 1:0;
|
|
nengel@2
|
50 else
|
|
nengel@2
|
51 return (total_lines < lines_proc-1)? 1:0;
|
|
nengel@2
|
52 }
|
|
nengel@2
|
53
|
|
nengel@2
|
54 static void update_tgt_spe_dep(H264spe *p, int end){
|
|
nengel@2
|
55 // if (end ){
|
|
nengel@2
|
56 total_lines++;
|
|
nengel@2
|
57 spe_pos* dma_spe = &dma_temp;
|
|
nengel@2
|
58 spe_pos* tgt_spe = p->tgt_spe + (unsigned) &src_spe; //located in target spe local store
|
|
nengel@2
|
59 dma_spe->count = end? total_lines+1: total_lines;
|
|
nengel@2
|
60 spu_dma_barrier_put(dma_spe, (unsigned) tgt_spe, sizeof(dma_temp), ED_put);
|
|
nengel@2
|
61 // }
|
|
nengel@2
|
62
|
|
nengel@2
|
63 }
|
|
nengel@2
|
64
|
|
nengel@2
|
65 static int init_cabac(H264spe *p, H264Cabac_spu *hc){
|
|
nengel@2
|
66 hc->mb_height = p->mb_height;
|
|
nengel@2
|
67 hc->mb_width = p->mb_width;
|
|
nengel@2
|
68 hc->b_stride = 4*p->mb_width;
|
|
nengel@2
|
69 hc->mb_stride = p->mb_stride;
|
|
nengel@2
|
70
|
|
nengel@2
|
71 for(int i=0; i<16; i++){
|
|
nengel@2
|
72 #define T(x) (x>>2) | ((x<<2) & 0xF)
|
|
nengel@2
|
73 hc->zigzag_scan[i] = T(zigzag_scan[i]);
|
|
nengel@2
|
74 #undef T
|
|
nengel@2
|
75 }
|
|
nengel@2
|
76 for(int i=0; i<64; i++){
|
|
nengel@2
|
77 #define T(x) (x>>3) | ((x&7)<<3)
|
|
nengel@2
|
78 hc->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
|
|
nengel@2
|
79 #undef T
|
|
nengel@2
|
80 }
|
|
nengel@2
|
81 }
|
|
nengel@2
|
82
|
|
nengel@2
|
83 static void reset_cabac_buffers(){
|
|
nengel@2
|
84 memset(intra4x4_pred_mode_table, 0, sizeof(intra4x4_pred_mode_table));
|
|
nengel@2
|
85 memset(mvd_table, 0, sizeof(mvd_table));
|
|
nengel@2
|
86 memset(direct_table, 0, sizeof(direct_table));
|
|
nengel@2
|
87 memset(chroma_pred_mode_table, 0, sizeof(chroma_pred_mode_table));
|
|
nengel@2
|
88 memset(cbp_table, 0, sizeof(cbp_table));
|
|
nengel@2
|
89 memset(qscale_table, 0, sizeof(qscale_table));
|
|
nengel@2
|
90 memset(mb_type_table, 0, sizeof(mb_type_table));
|
|
nengel@2
|
91 memset(ref_index_table, 0, sizeof(ref_index_table));
|
|
nengel@2
|
92 memset(motion_val_table, 0, sizeof(motion_val_table));
|
|
nengel@2
|
93 }
|
|
nengel@2
|
94
|
|
nengel@2
|
95 static void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int bufsize){
|
|
nengel@2
|
96 int align = (unsigned) buf & 0xF;
|
|
nengel@2
|
97 int dma_size;
|
|
nengel@2
|
98
|
|
nengel@2
|
99 c->bytestream_ea_start=
|
|
nengel@2
|
100 c->bytestream_ea= buf;
|
|
nengel@2
|
101 c->bytestream_ea_end= buf + bufsize;
|
|
nengel@2
|
102 c->bufsize = bufsize;
|
|
nengel@2
|
103
|
|
nengel@2
|
104 if (bufsize + align >= sizeof(bytestream_ls)){
|
|
nengel@2
|
105 dma_size = sizeof(bytestream_ls);
|
|
nengel@2
|
106 c->bufsize = c->bufsize +align - sizeof(bytestream_ls);
|
|
nengel@2
|
107 }else{
|
|
nengel@2
|
108 int align_end = (bufsize+align) &0xF;
|
|
nengel@2
|
109 if (align_end)
|
|
nengel@2
|
110 dma_size = bufsize+align + 16-align_end;
|
|
nengel@2
|
111 else
|
|
nengel@2
|
112 dma_size = bufsize+align;
|
|
nengel@2
|
113 c->bufsize = 0;
|
|
nengel@2
|
114 }
|
|
nengel@2
|
115 // printf("%d\n", dma_size);
|
|
nengel@2
|
116 c->bytestream_end = &bytestream_ls[dma_size];
|
|
nengel@2
|
117 c->bytestream_start= c->bytestream = &bytestream_ls[align];
|
|
nengel@2
|
118 spu_dma_get(bytestream_ls, (unsigned) buf - align, dma_size, ED_get );
|
|
nengel@2
|
119 c->bytestream_ea_start=
|
|
nengel@2
|
120 c->bytestream_ea= buf + dma_size -align;
|
|
nengel@2
|
121
|
|
nengel@2
|
122 wait_dma_id(ED_get);
|
|
nengel@2
|
123
|
|
nengel@2
|
124 if (align %2){
|
|
nengel@2
|
125 c->low = (*c->bytestream++)<<18;
|
|
nengel@2
|
126 c->low+= (*c->bytestream++)<<10;
|
|
nengel@2
|
127 c->low+= ((*c->bytestream++)<<2) + 2;
|
|
nengel@2
|
128 }else {
|
|
nengel@2
|
129 c->low = (*c->bytestream++)<<18;
|
|
nengel@2
|
130 c->low+= (*c->bytestream++)<<10;
|
|
nengel@2
|
131 c->low+= (2<<8);
|
|
nengel@2
|
132 }
|
|
nengel@2
|
133
|
|
nengel@2
|
134 c->range= 0x1FE;
|
|
nengel@2
|
135 bytecount=0;
|
|
nengel@2
|
136 }
|
|
nengel@2
|
137
|
|
nengel@2
|
138 static void init_dequant8_coeff_table(EDSlice_spu *s, H264Cabac_spu *hc){
|
|
nengel@2
|
139 int i,q,x;
|
|
nengel@2
|
140 const int transpose = HAVE_ALTIVEC;
|
|
nengel@2
|
141 hc->dequant8_coeff[0] = hc->dequant8_buffer[0];
|
|
nengel@2
|
142 hc->dequant8_coeff[1] = hc->dequant8_buffer[1];
|
|
nengel@2
|
143
|
|
nengel@2
|
144 for(i=0; i<2; i++){
|
|
nengel@2
|
145 if(i && !memcmp(s->pps.scaling_matrix8[0], s->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
|
|
nengel@2
|
146 hc->dequant8_coeff[1] = hc->dequant8_buffer[0];
|
|
nengel@2
|
147 break;
|
|
nengel@2
|
148 }
|
|
nengel@2
|
149
|
|
nengel@2
|
150 for(q=0; q<52; q++){
|
|
nengel@2
|
151 int shift = div6[q];
|
|
nengel@2
|
152 int idx = rem6[q];
|
|
nengel@2
|
153 for(x=0; x<64; x++)
|
|
nengel@2
|
154 hc->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
|
|
nengel@2
|
155 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
|
|
nengel@2
|
156 s->pps.scaling_matrix8[i][x]) << shift;
|
|
nengel@2
|
157 }
|
|
nengel@2
|
158 }
|
|
nengel@2
|
159 }
|
|
nengel@2
|
160
|
|
nengel@2
|
161 static void init_dequant4_coeff_table(EDSlice_spu *s, H264Cabac_spu *hc){
|
|
nengel@2
|
162 int i,j,q,x;
|
|
nengel@2
|
163 const int transpose = HAVE_MMX | HAVE_ALTIVEC | HAVE_NEON;
|
|
nengel@2
|
164 for(i=0; i<6; i++ ){
|
|
nengel@2
|
165 hc->dequant4_coeff[i] = hc->dequant4_buffer[i];
|
|
nengel@2
|
166 for(j=0; j<i; j++){
|
|
nengel@2
|
167 if(!memcmp(s->pps.scaling_matrix4[j], s->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
|
|
nengel@2
|
168 hc->dequant4_coeff[i] = hc->dequant4_buffer[j];
|
|
nengel@2
|
169 break;
|
|
nengel@2
|
170 }
|
|
nengel@2
|
171 }
|
|
nengel@2
|
172 if(j<i)
|
|
nengel@2
|
173 continue;
|
|
nengel@2
|
174
|
|
nengel@2
|
175 for(q=0; q<52; q++){
|
|
nengel@2
|
176 int shift = div6[q] + 2;
|
|
nengel@2
|
177 int idx = rem6[q];
|
|
nengel@2
|
178 for(x=0; x<16; x++)
|
|
nengel@2
|
179 hc->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
|
|
nengel@2
|
180 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
|
|
nengel@2
|
181 s->pps.scaling_matrix4[i][x]) << shift;
|
|
nengel@2
|
182 }
|
|
nengel@2
|
183 }
|
|
nengel@2
|
184 }
|
|
nengel@2
|
185
|
|
nengel@2
|
186 static void init_dequant_tables(EDSlice_spu *s, H264Cabac_spu *hc){
|
|
nengel@2
|
187 int i,x;
|
|
nengel@2
|
188
|
|
nengel@2
|
189 init_dequant4_coeff_table(s, hc);
|
|
nengel@2
|
190 if(s->pps.transform_8x8_mode)
|
|
nengel@2
|
191 init_dequant8_coeff_table(s, hc);
|
|
nengel@2
|
192 if(s->transform_bypass){
|
|
nengel@2
|
193 for(i=0; i<6; i++)
|
|
nengel@2
|
194 for(x=0; x<16; x++)
|
|
nengel@2
|
195 hc->dequant4_coeff[i][0][x] = 1<<6;
|
|
nengel@2
|
196 if(s->pps.transform_8x8_mode)
|
|
nengel@2
|
197 for(i=0; i<2; i++)
|
|
nengel@2
|
198 for(x=0; x<64; x++)
|
|
nengel@2
|
199 hc->dequant8_coeff[i][0][x] = 1<<6;
|
|
nengel@2
|
200 }
|
|
nengel@2
|
201 }
|
|
nengel@2
|
202
|
|
nengel@2
|
203 static void init_entropy_buf(H264Cabac_spu *hc, EDSlice_spu *s){
|
|
nengel@2
|
204 hc->non_zero_count_top = non_zero_count_table[0];
|
|
nengel@2
|
205 hc->non_zero_count = non_zero_count_table[1];
|
|
nengel@2
|
206 hc->mvd_top[0] = mvd_table[0][0];
|
|
nengel@2
|
207 hc->mvd[0] = mvd_table[0][1];
|
|
nengel@2
|
208 hc->mvd_top[1] = mvd_table[1][0];
|
|
nengel@2
|
209 hc->mvd[1] = mvd_table[1][1];
|
|
nengel@2
|
210 hc->direct_top = direct_table[0];
|
|
nengel@2
|
211 hc->direct = direct_table[1];
|
|
nengel@2
|
212 hc->chroma_pred_mode_top = chroma_pred_mode_table[0];
|
|
nengel@2
|
213 hc->chroma_pred_mode = chroma_pred_mode_table[1];
|
|
nengel@2
|
214 hc->intra4x4_pred_mode_top = intra4x4_pred_mode_table[0];
|
|
nengel@2
|
215 hc->intra4x4_pred_mode = intra4x4_pred_mode_table[1];
|
|
nengel@2
|
216 hc->cbp_top = cbp_table[0];
|
|
nengel@2
|
217 hc->cbp = cbp_table[1];
|
|
nengel@2
|
218 hc->qscale_top = qscale_table[0] +1;
|
|
nengel@2
|
219 hc->qscale = qscale_table[1] +1;
|
|
nengel@2
|
220
|
|
nengel@2
|
221 hc->mb_type_top = mb_type_table[0]+1;
|
|
nengel@2
|
222 hc->mb_type = mb_type_table[1]+1;
|
|
nengel@2
|
223 hc->ref_index_top[0] = ref_index_table[0][0];
|
|
nengel@2
|
224 hc->ref_index_top[1] = ref_index_table[1][0];
|
|
nengel@2
|
225 hc->ref_index[0] = ref_index_table[0][1];
|
|
nengel@2
|
226 hc->ref_index[1] = ref_index_table[1][1];
|
|
nengel@2
|
227 hc->motion_val_top[0] = motion_val_table[0][0];
|
|
nengel@2
|
228 hc->motion_val_top[1] = motion_val_table[1][0];
|
|
nengel@2
|
229 hc->motion_val[0] = motion_val_table[0][1];
|
|
nengel@2
|
230 hc->motion_val[1] = motion_val_table[1][1];
|
|
nengel@2
|
231
|
|
nengel@2
|
232 int mb_stride = hc->mb_stride;
|
|
nengel@2
|
233
|
|
nengel@2
|
234 if (s->slice_type_nos == FF_B_TYPE){
|
|
nengel@2
|
235 while(!dep_resolved(&spe));
|
|
nengel@2
|
236 spu_dma_get(list1_mb_type_table[0], (unsigned) (s->list1.mb_type -1), mb_stride*sizeof(uint32_t), ED_get);
|
|
nengel@2
|
237 spu_dma_get(list1_ref_index_table[0][0], (unsigned) s->list1.ref_index[0], mb_stride*4*sizeof(int8_t), ED_get);
|
|
nengel@2
|
238 spu_dma_get(list1_ref_index_table[0][1], (unsigned) s->list1.ref_index[1], mb_stride*4*sizeof(int8_t), ED_get);
|
|
nengel@2
|
239 wait_dma_id(ED_get);
|
|
nengel@2
|
240 spu_dma_get(list1_mb_type_table[1], (unsigned) (s->list1.mb_type -1 + mb_stride), mb_stride*sizeof(uint32_t), ED_get);
|
|
nengel@2
|
241 spu_dma_get(list1_ref_index_table[1][0], (unsigned) (s->list1.ref_index[0] + 4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get);
|
|
nengel@2
|
242 spu_dma_get(list1_ref_index_table[1][1], (unsigned) (s->list1.ref_index[1] + 4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get);
|
|
nengel@2
|
243 hc->list1_mb_type = list1_mb_type_table[0]+1;
|
|
nengel@2
|
244 hc->list1_ref_index[0] = list1_ref_index_table[0][0];
|
|
nengel@2
|
245 hc->list1_ref_index[1] = list1_ref_index_table[0][1];
|
|
nengel@2
|
246 }
|
|
nengel@2
|
247
|
|
nengel@2
|
248 }
|
|
nengel@2
|
249
|
|
nengel@2
|
250 static void update_entropy_buf(H264Cabac_spu *hc, EDSlice_spu *s, int line){
|
|
nengel@2
|
251 int mb_stride = hc->mb_stride;
|
|
nengel@2
|
252 int mb_width = hc->mb_width;
|
|
nengel@2
|
253 int top = (line+1)%2;
|
|
nengel@2
|
254 int cur = line%2;
|
|
nengel@2
|
255 int bottom = (line+1)%2; //same as top, but to identify prebuffering of next line.
|
|
nengel@2
|
256
|
|
nengel@2
|
257 hc->non_zero_count_top = non_zero_count_table[top];
|
|
nengel@2
|
258 hc->non_zero_count = non_zero_count_table[cur];
|
|
nengel@2
|
259 hc->mvd_top[0] = mvd_table[0][top];
|
|
nengel@2
|
260 hc->mvd[0] = mvd_table[0][cur];
|
|
nengel@2
|
261 hc->mvd_top[1] = mvd_table[1][top];
|
|
nengel@2
|
262 hc->mvd[1] = mvd_table[1][cur];
|
|
nengel@2
|
263 hc->direct_top = direct_table[top];
|
|
nengel@2
|
264 hc->direct = direct_table[cur];
|
|
nengel@2
|
265 hc->chroma_pred_mode_top = chroma_pred_mode_table[top];
|
|
nengel@2
|
266 hc->chroma_pred_mode = chroma_pred_mode_table[cur];
|
|
nengel@2
|
267 hc->intra4x4_pred_mode_top = intra4x4_pred_mode_table[top];
|
|
nengel@2
|
268 hc->intra4x4_pred_mode = intra4x4_pred_mode_table[cur];
|
|
nengel@2
|
269 hc->cbp_top = cbp_table[top];
|
|
nengel@2
|
270 hc->cbp = cbp_table[cur];
|
|
nengel@2
|
271 hc->qscale_top = qscale_table[top] +1;
|
|
nengel@2
|
272 hc->qscale = qscale_table[cur] +1;
|
|
nengel@2
|
273
|
|
nengel@2
|
274 hc->mb_type_top = mb_type_table[top]+1;
|
|
nengel@2
|
275 hc->mb_type = mb_type_table[cur]+1;
|
|
nengel@2
|
276 hc->ref_index_top[0] = ref_index_table[0][top];
|
|
nengel@2
|
277 hc->ref_index_top[1] = ref_index_table[1][top];
|
|
nengel@2
|
278 hc->ref_index[0] = ref_index_table[0][cur];
|
|
nengel@2
|
279 hc->ref_index[1] = ref_index_table[1][cur];
|
|
nengel@2
|
280 hc->motion_val_top[0] = motion_val_table[0][top];
|
|
nengel@2
|
281 hc->motion_val_top[1] = motion_val_table[1][top];
|
|
nengel@2
|
282 hc->motion_val[0] = motion_val_table[0][cur];
|
|
nengel@2
|
283 hc->motion_val[1] = motion_val_table[1][cur];
|
|
nengel@2
|
284
|
|
nengel@2
|
285 wait_dma_id(ED_put);
|
|
nengel@2
|
286
|
|
nengel@2
|
287 spu_dma_put(mb_type_table[top], (unsigned) (s->pic.mb_type -1 + line*mb_stride), mb_stride*sizeof(uint32_t), ED_put);
|
|
nengel@2
|
288 spu_dma_put(ref_index_table[0][top], (unsigned) (s->pic.ref_index[0] + line*4*mb_stride), 4*mb_stride*sizeof(int8_t), ED_put);
|
|
nengel@2
|
289 spu_dma_put(ref_index_table[1][top], (unsigned) (s->pic.ref_index[1] + line*4*mb_stride), 4*mb_stride*sizeof(int8_t), ED_put);
|
|
nengel@2
|
290 spu_dma_put(motion_val_table[0][top], (unsigned) (s->pic.motion_val[0]+ line*16*mb_width), 16*mb_width*2*sizeof(int16_t), ED_put);
|
|
nengel@2
|
291 spu_dma_put(motion_val_table[1][top], (unsigned) (s->pic.motion_val[1]+ line*16*mb_width), 16*mb_width*2*sizeof(int16_t), ED_put);
|
|
nengel@2
|
292
|
|
nengel@2
|
293 if (s->slice_type_nos == FF_B_TYPE){
|
|
nengel@2
|
294 update_tgt_spe_dep(&spe, 0);
|
|
nengel@2
|
295 wait_dma_id(ED_get);
|
|
nengel@2
|
296
|
|
nengel@2
|
297 if (line + 2 < hc->mb_height){
|
|
nengel@2
|
298 while(!dep_resolved(&spe));
|
|
nengel@2
|
299 spu_dma_get(list1_mb_type_table[cur], (unsigned) (s->list1.mb_type -1 + (line+2)*mb_stride), mb_stride*sizeof(uint32_t), ED_get);
|
|
nengel@2
|
300 spu_dma_get(list1_ref_index_table[cur][0], (unsigned) (s->list1.ref_index[0] + (line+2)*4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get);
|
|
nengel@2
|
301 spu_dma_get(list1_ref_index_table[cur][1], (unsigned) (s->list1.ref_index[1] + (line+2)*4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get);
|
|
nengel@2
|
302 }
|
|
nengel@2
|
303 hc->list1_mb_type = list1_mb_type_table[bottom]+1;
|
|
nengel@2
|
304 hc->list1_ref_index[0] = list1_ref_index_table[bottom][0];
|
|
nengel@2
|
305 hc->list1_ref_index[1] = list1_ref_index_table[bottom][1];
|
|
nengel@2
|
306 }
|
|
nengel@2
|
307
|
|
nengel@2
|
308 }
|
|
nengel@2
|
309
|
|
nengel@2
|
310 // void printmbdiff(EDSlice_spu *s, H264Cabac_spu *hc, H264Mb *mp, H264Mb *ms){
|
|
nengel@2
|
311 //
|
|
nengel@2
|
312 // printf("mb_x %d, %d\n", mp->mb_x, ms->mb_x);
|
|
nengel@2
|
313 // printf("mb_y %d, %d\n", mp->mb_y, ms->mb_y);
|
|
nengel@2
|
314 // printf("mb_xy %d, %d\n", mp->mb_xy, ms->mb_xy);
|
|
nengel@2
|
315 // printf("top_mb_xy %d, %d\n", mp->top_mb_xy, ms->top_mb_xy);
|
|
nengel@2
|
316 // printf("left_mb_xy %d, %d\n", mp->left_mb_xy, ms->left_mb_xy);
|
|
nengel@2
|
317 // printf("chroma_pred_mode %d, %d\n", mp->chroma_pred_mode, ms->chroma_pred_mode);
|
|
nengel@2
|
318 // printf("intra16x16_pred_mode %d, %d\n", mp->intra16x16_pred_mode, ms->intra16x16_pred_mode);
|
|
nengel@2
|
319 // printf("topleft_samples %d, %d\n", mp->topleft_samples_available, ms->topleft_samples_available);
|
|
nengel@2
|
320 // printf("topright_samples %d, %d\n", mp->topright_samples_available, ms->topright_samples_available);
|
|
nengel@2
|
321 // printf("top_samples %d, %d\n", mp->top_samples_available, ms->top_samples_available);
|
|
nengel@2
|
322 // printf("left_samples %d, %d\n", mp->left_samples_available, ms->left_samples_available);
|
|
nengel@2
|
323 //
|
|
nengel@2
|
324 // if (memcmp(mp->intra4x4_pred_mode_cache, ms->intra4x4_pred_mode_cache, 40)){
|
|
nengel@2
|
325 // for (int i=0; i<5; i++){
|
|
nengel@2
|
326 // for (int j=0; j<8; j++){
|
|
nengel@2
|
327 // printf("%d, %d\t", mp->intra4x4_pred_mode_cache[i*8+j],ms->intra4x4_pred_mode_cache[i*8+j]);
|
|
nengel@2
|
328 // }
|
|
nengel@2
|
329 // printf("\n");
|
|
nengel@2
|
330 // }
|
|
nengel@2
|
331 // }
|
|
nengel@2
|
332 //
|
|
nengel@2
|
333 // if (memcmp(mp->non_zero_count_cache, ms->non_zero_count_cache, 48)){
|
|
nengel@2
|
334 // for (int i=0; i<6; i++){
|
|
nengel@2
|
335 // for (int j=0; j<8; j++){
|
|
nengel@2
|
336 // printf("%u, %u\t", mp->non_zero_count_cache[i*8+j],ms->non_zero_count_cache[i*8+j]);
|
|
nengel@2
|
337 // }
|
|
nengel@2
|
338 // printf("\n");
|
|
nengel@2
|
339 // }
|
|
nengel@2
|
340 // }
|
|
nengel@2
|
341 //
|
|
nengel@2
|
342 // if (memcmp(mp->sub_mb_type, ms->sub_mb_type, 8)){
|
|
nengel@2
|
343 // for (int i=0; i<4; i++){
|
|
nengel@2
|
344 // printf("%u, %u\t", mp->sub_mb_type[i], mp->sub_mb_type[i]);
|
|
nengel@2
|
345 // printf("\n");
|
|
nengel@2
|
346 // }
|
|
nengel@2
|
347 // }
|
|
nengel@2
|
348 //
|
|
nengel@2
|
349 // if (memcmp(mp->mv_cache, ms->mv_cache, 320)){
|
|
nengel@2
|
350 // for (int k=0; k<2; k++){
|
|
nengel@2
|
351 // for (int i=0; i<5; i++){
|
|
nengel@2
|
352 // for (int j=0; j<8; j++){
|
|
nengel@2
|
353 // printf("%d, %d, %d, %d\t", mp->mv_cache[k][i*8+j][0], mp->mv_cache[k][i*8+j][1], ms->mv_cache[k][i*8+j][0], ms->mv_cache[k][i*8+j][1]);
|
|
nengel@2
|
354 // }
|
|
nengel@2
|
355 // printf("\n");
|
|
nengel@2
|
356 // }
|
|
nengel@2
|
357 // }
|
|
nengel@2
|
358 // }
|
|
nengel@2
|
359 //
|
|
nengel@2
|
360 // if (memcmp(mp->ref_cache, ms->ref_cache, 80)){
|
|
nengel@2
|
361 // for (int k=0; k<2; k++){
|
|
nengel@2
|
362 // for (int i=0; i<5; i++){
|
|
nengel@2
|
363 // for (int j=0; j<8; j++){
|
|
nengel@2
|
364 // printf("%d, %d\t", mp->ref_cache[k][i*8+j], ms->ref_cache[k][i*8+j]);
|
|
nengel@2
|
365 // }
|
|
nengel@2
|
366 // printf("\n");
|
|
nengel@2
|
367 // }
|
|
nengel@2
|
368 // }
|
|
nengel@2
|
369 // }
|
|
nengel@2
|
370 //
|
|
nengel@2
|
371 // printf("cbp %d, %d\n", mp->cbp, ms->cbp);
|
|
nengel@2
|
372 // for (int i=0; i<hc->mb_stride; i++){
|
|
nengel@2
|
373 // printf("%d, ", hc->cbp[i]); fflush(0);
|
|
nengel@2
|
374 // }
|
|
nengel@2
|
375 // printf("\n");
|
|
nengel@2
|
376 //
|
|
nengel@2
|
377 // printf("mb_type %x, %x\n", mp->mb_type, ms->mb_type);
|
|
nengel@2
|
378 // printf("mb_type IS_INTRA %d, IS_INTRA16x16 %d, IS_DIRECT %d\n", IS_INTRA(ms->mb_type), IS_INTRA16x16(ms->mb_type), IS_DIRECT(ms->mb_type) );
|
|
nengel@2
|
379 // printf("left_type %d, %d\n", mp->left_type, ms->left_type);
|
|
nengel@2
|
380 // printf("top_type %d, %d\n", mp->top_type, ms->top_type);
|
|
nengel@2
|
381 // printf("qscale_mb_xy %d, %d\n", mp->qscale_mb_xy, ms->qscale_mb_xy);
|
|
nengel@2
|
382 // printf("qscale_left_mb_xy %d, %d\n", mp->qscale_left_mb_xy, ms->qscale_left_mb_xy);
|
|
nengel@2
|
383 // printf("qscale_top_mb_xy %d, %d\n", mp->qscale_top_mb_xy, ms->qscale_top_mb_xy);
|
|
nengel@2
|
384 // // for (int i=0; i<hc->mb_stride; i++){
|
|
nengel@2
|
385 // // printf("%d, ", qscale_table[0][i]); fflush(0);
|
|
nengel@2
|
386 // // }
|
|
nengel@2
|
387 //
|
|
nengel@2
|
388 // if (memcmp(mp->mb, ms->mb, 768)){
|
|
nengel@2
|
389 // for (int i=0; i<16; i++){
|
|
nengel@2
|
390 // for (int j=0; j<16; j++){
|
|
nengel@2
|
391 // printf("%d, %d\t", mp->mb[j + i*16], ms->ref_cache[j + i*16]);
|
|
nengel@2
|
392 // }
|
|
nengel@2
|
393 // printf("\n");
|
|
nengel@2
|
394 // }
|
|
nengel@2
|
395 // for (int i=0; i<8; i++){
|
|
nengel@2
|
396 // for (int j=0; j<8; j++){
|
|
nengel@2
|
397 // printf("%d, %d\t", mp->mb[256 + j + i*8], ms->ref_cache[j + i*8]);
|
|
nengel@2
|
398 // }
|
|
nengel@2
|
399 // printf("\n");
|
|
nengel@2
|
400 // }
|
|
nengel@2
|
401 // for (int i=0; i<8; i++){
|
|
nengel@2
|
402 // for (int j=0; j<8; j++){
|
|
nengel@2
|
403 // printf("%d, %d\t", mp->mb[320+ j + i*8], ms->ref_cache[j + i*8]);
|
|
nengel@2
|
404 // }
|
|
nengel@2
|
405 // printf("\n");
|
|
nengel@2
|
406 // }
|
|
nengel@2
|
407 // }
|
|
nengel@2
|
408 //
|
|
nengel@2
|
409 // if (memcmp(mp->bS, ms->bS, 32)){
|
|
nengel@2
|
410 // for (int k=0; k<2; k++){
|
|
nengel@2
|
411 // for (int i=0; i<4; i++){
|
|
nengel@2
|
412 // for (int j=0; j<4; j++){
|
|
nengel@2
|
413 // printf("%d, %d\t", mp->bS[k][i][j], mp->mv_cache[k][i][j]);
|
|
nengel@2
|
414 // }
|
|
nengel@2
|
415 // printf("\n");
|
|
nengel@2
|
416 // }
|
|
nengel@2
|
417 // }
|
|
nengel@2
|
418 // }
|
|
nengel@2
|
419 // if (memcmp(mp->edges, ms->edges, 4)){
|
|
nengel@2
|
420 // printf("edges %d, %d, %d, %d\n", mp->edges[0], ms->edges[0], mp->edges[1], ms->edges[1]);
|
|
nengel@2
|
421 // printf("deblock %d, %d\n", mp->deblock_mb, ms->deblock_mb);
|
|
nengel@2
|
422 // }
|
|
nengel@2
|
423 //
|
|
nengel@2
|
424 // printf("dequant4_coeff_y %d, %d\n", mp->dequant4_coeff_y, ms->dequant4_coeff_y);
|
|
nengel@2
|
425 // printf("dequant4_coeff_cb %d, %d\n", mp->dequant4_coeff_cb, ms->dequant4_coeff_cb);
|
|
nengel@2
|
426 // printf("dequant4_coeff_cr %d, %d\n", mp->dequant4_coeff_cr, ms->dequant4_coeff_cr);
|
|
nengel@2
|
427 // }
|
|
nengel@2
|
428 // DECLARE_ALIGNED_16(H264Mb, tmp);
|
|
nengel@2
|
429
|
|
nengel@2
|
430
|
|
nengel@2
|
431 int main(unsigned long long id, unsigned long long argp){
|
|
nengel@2
|
432 EDSlice_spu *s;
|
|
nengel@2
|
433 H264Cabac_spu *hc = &hcabac;
|
|
nengel@2
|
434 CABACContext *c = &cabac;
|
|
nengel@2
|
435 H264spe *p = &spe;
|
|
nengel@2
|
436
|
|
nengel@2
|
437 spu_write_out_mbox((unsigned) slice);
|
|
nengel@2
|
438 spu_dma_get(p, (unsigned) argp, sizeof(H264spe), ED_spe); //ID_slice is used out of convienience
|
|
nengel@2
|
439 wait_dma_id(ED_spe);
|
|
nengel@2
|
440
|
|
nengel@2
|
441 ff_init_cabac_states();
|
|
nengel@2
|
442 init_cabac(p, hc);
|
|
nengel@2
|
443 hc->blocking=0;
|
|
nengel@2
|
444 for(;;){
|
|
nengel@2
|
445 spu_read_in_mbox();
|
|
nengel@2
|
446 s = &slice[0];
|
|
nengel@2
|
447 reset_cabac_buffers();
|
|
nengel@2
|
448 init_entropy_buf(hc, s);
|
|
nengel@2
|
449
|
|
nengel@2
|
450 if (hc->blocking) wait_dma_id(ED_get);
|
|
nengel@2
|
451 //printf("framesize %d\n", s->byte_bufsize);fflush(0);
|
|
nengel@2
|
452 init_dequant_tables(s, hc);
|
|
nengel@2
|
453 ff_init_cabac_decoder( c, s->bytestream_start, s->byte_bufsize );
|
|
nengel@2
|
454 ff_h264_init_cabac_states(s, c);
|
|
nengel@2
|
455
|
|
nengel@2
|
456 int mb_slot=0;
|
|
nengel@2
|
457 for(int j=0; j<hc->mb_height; j++){
|
|
nengel@2
|
458 for(int i=0; i<hc->mb_width; i++){
|
|
nengel@2
|
459 int eos,ret;
|
|
nengel@2
|
460 H264Mb *m = &mb[mb_slot];
|
|
nengel@2
|
461 m->mb_x=i;
|
|
nengel@2
|
462 m->mb_y=j;
|
|
nengel@2
|
463 s->m = m;
|
|
nengel@2
|
464
|
|
nengel@2
|
465 ret = ff_h264_decode_mb_cabac(hc, s, c);
|
|
nengel@2
|
466
|
|
nengel@2
|
467 // spu_dma_get(&tmp, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_get);
|
|
nengel@2
|
468 // wait_dma_id(ED_get);
|
|
nengel@2
|
469 // if (memcmp(&tmp, m, sizeof(H264Mb))){
|
|
nengel@2
|
470 // printf("coded pic num %d\n", s->coded_pic_num);
|
|
nengel@2
|
471 // printmbdiff(s, hc,&tmp, m);
|
|
nengel@2
|
472 // return 0;
|
|
nengel@2
|
473 // }
|
|
nengel@2
|
474 //printf("qscale %d\n", m->qscale_mb_xy);
|
|
nengel@2
|
475 if (!hc->blocking){
|
|
nengel@2
|
476 if (mb_slot){
|
|
nengel@2
|
477 spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb1);
|
|
nengel@2
|
478 wait_dma_id(ED_putmb0);
|
|
nengel@2
|
479 }else {
|
|
nengel@2
|
480 spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb0);
|
|
nengel@2
|
481 wait_dma_id(ED_putmb1);
|
|
nengel@2
|
482 }
|
|
nengel@2
|
483 mb_slot++; mb_slot%=2;
|
|
nengel@2
|
484 }else {
|
|
nengel@2
|
485 spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb0);
|
|
nengel@2
|
486 wait_dma_id(ED_putmb0);
|
|
nengel@2
|
487 }
|
|
nengel@2
|
488
|
|
nengel@2
|
489
|
|
nengel@2
|
490 eos = get_cabac_terminate( c);
|
|
nengel@2
|
491
|
|
nengel@2
|
492 if( ret < 0) {
|
|
nengel@2
|
493 fprintf(stderr, "error at %d bytecount\n", bytecount);
|
|
nengel@2
|
494 return -1;
|
|
nengel@2
|
495 }
|
|
nengel@2
|
496 }
|
|
nengel@2
|
497 update_entropy_buf(hc, s, j);
|
|
nengel@2
|
498 if (hc->blocking){ wait_dma_id(ED_get); wait_dma_id(ED_put);}
|
|
nengel@2
|
499 }
|
|
nengel@2
|
500 wait_dma_id(ED_put);
|
|
nengel@2
|
501 spu_write_out_mbox(1);
|
|
nengel@2
|
502
|
|
nengel@2
|
503 }
|
|
nengel@2
|
504
|
|
nengel@2
|
505 return 0;
|
|
nengel@2
|
506
|
|
nengel@2
|
507
|
|
nengel@2
|
508 }
|