Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
view libavcodec/cell/spe_mbd.c @ 2:897f711a7157
rearrange to work with autoconf
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 25 Sep 2012 15:55:33 +0200 |
| parents | |
| children |
line source
1 /*
2 * Copyright (c) 2009 TUDelft
3 *
4 * Cell Parallel SPU - 2DWave Macroblock Decoding.
5 */
7 /**
8 * @file libavcodec/cell/spu/h264_main_spu.c
9 * Cell Parallel SPU - 2DWave Macroblock Decoding
10 * @author C C Chi <c.c.chi@student.tudelft.nl>
11 *
12 * SIMD kernels
13 * H.264/AVC motion compensation
14 * @author Mauricio Alvarez <alvarez@ac.upc.edu>
15 * @author Albert Paradis <apar7632@hotmail.com>
16 */
19 /* Enable this lines to enable simulator statistic or generate traces */
21 //#define ENABLE_SIMULATOR
22 //#define ENABLE_PARAVER_TRACING_CELL
24 #ifdef ENABLE_SIMULATOR
25 #include "/opt/ibm/systemsim-cell/include/callthru/spu/profile.h"
26 #endif
28 #ifdef ENABLE_TRACES
29 #include "spu_trace.h"
30 #endif
31 #include <unistd.h>
32 #include <stdio.h>
33 #include <spu_intrinsics.h>
34 #include <spu_mfcio.h>
35 #include <libsync.h>
36 #include <sys/time.h>
37 #include <assert.h>
39 //#include "dsputil_cell.h"
40 #include "types_spu.h"
41 #include "h264_intra_spu.h"
42 #include "h264_decode_mb_spu.h"
43 #include "h264_mc_spu.h"
44 #include "h264_tables.h"
45 #include "h264_dma.h"
48 /** functions for supporting tracing with paraver for the SPU
49 *
50 */
51 inline void trace_init_SPU(){
52 #ifdef ENABLE_PARAVER_TRACING_CELL
53 SPUtrace_init ();
54 #endif
55 }
57 inline void trace_fini_SPU(){
58 #ifdef ENABLE_PARAVER_TRACING_CELL
59 SPUtrace_fini ();
60 #endif
61 }
63 inline void trace_event_SPU(int event, int id){
64 #ifdef ENABLE_PARAVER_TRACING_CELL
65 SPUtrace_event (event, id);
66 #else
67 (void) event;
68 (void) id;
69 #endif
70 }
72 // for simulator statistic
73 inline void clear_statistic(){
74 #ifdef ENABLE_SIMULATOR
75 prof_clear();
76 #endif
77 }
79 inline void start_statistic(){
80 #ifdef ENABLE_SIMULATOR
81 prof_start();
82 #endif
83 }
85 inline void stop_statistic(){
86 #ifdef ENABLE_SIMULATOR
87 prof_stop();
88 #endif
89 }
91 H264Context_spu h_context; // struct that contain all the params to decode a macroblock
93 DECLARE_ALIGNED_16(spe_pos, dma_temp); //dma temp for sending
94 //mb position of neighbouring spes
95 DECLARE_ALIGNED_16(volatile spe_pos, src_spe); //written by SPE_ID -1
96 //DECLARE_ALIGNED_16(spe_pos, tgt_spe); //written by SPE_ID +1
98 /**
99 * Initializes the buffering of the mb data and associated mc data. The init_mb_buffer needs to
100 * be called before any get_next_mb and only once at the beginning of the slice.
101 *
102 * Note: init_mc_buffer and get_next_mb expect the width of the picture to be more than 2 mb's
103 */
104 #define TAG_OFFSET_MB MBD_buf1
105 #define TAG_OFFSET_MC MBD_mc_buf1
106 static void init_mb_buffer(H264Context_spu* h){
107 H264slice *s = h->s;
108 H264Mb *next_mb;
109 int mb_height = s->mb_height;
110 int mb_width = s->mb_width;
112 h->mc_idx =0;
114 h->mb_dec = 0;
115 h->mb_mc = 0;
116 h->mb_dma = 0;
118 h->curr_line %= mb_height;
119 h->next_mb_idx = h->curr_line * mb_width;
120 h->mb_id = h->curr_line * mb_width;
121 h->n_mc= h->curr_line * mb_width;
123 next_mb = s->blocks + h->mb_id;
124 spu_dma_get(&h->mb_buf[h->mb_dma], (unsigned) next_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB);
125 h->mb_dma++;
126 h->mb_id++;
128 next_mb = s->blocks + h->mb_id;
129 spu_dma_get(&h->mb_buf[h->mb_dma], (unsigned) next_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB);
130 h->mb_dma++;
131 h->mb_id++;
132 wait_dma_id(0 + TAG_OFFSET_MB);
134 H264Mb *mb = &h->mb_buf[0];
135 H264mc *mc = &h->mc_buf[0];
136 if(!IS_INTRA(mb->mb_type)){
137 calc_mc_params(mb, mc);
138 fill_ref_buf(h, mb, mc);
139 }
140 h->n_mc++;
141 h->mb_mc++;
142 }
144 static void *get_next_mb(H264Context_spu *h){
145 H264slice *s = h->s;
146 H264spe *spe = &h->spe;
147 H264Mb *mb_buf = h->mb_buf;
148 H264mc *mc_buf = h->mc_buf;
149 H264Mb *next_mb;
150 H264Mb *next_dma_mb;
152 if (h->curr_line >= s->mb_height)
153 return NULL;
155 if (h->mb_id < h->mb_total){
156 next_dma_mb = s->blocks + h->mb_id;
157 spu_dma_get(&mb_buf[h->mb_dma], (unsigned) next_dma_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB);
158 h->mb_dma = (h->mb_dma+1)%3;
159 h->mb_id++;
160 if (h->mb_id%s->mb_width ==0){
161 h->mb_id+=(spe->spe_total-1)*s->mb_width;
162 }
163 }
165 h->mc = &mc_buf[h->mc_idx];
166 wait_dma_id(h->mc_idx + TAG_OFFSET_MC);
167 h->mc_idx = (h->mc_idx+1)%2;
168 if (h->n_mc < h->mb_total){
169 wait_dma_id(h->mb_mc + TAG_OFFSET_MB);
170 H264Mb *mb = &mb_buf[h->mb_mc];
171 H264mc *mc = &mc_buf[h->mc_idx];
172 if(!IS_INTRA(mb->mb_type)){
173 calc_mc_params(mb, mc);
174 fill_ref_buf(h, mb, mc);
175 }
176 h->n_mc++;
177 if (h->n_mc%s->mb_width ==0){
178 h->n_mc+=(spe->spe_total-1)*s->mb_width;
179 }
180 }
181 h->next_mb_idx++;
182 if (h->next_mb_idx % s->mb_width ==0){
183 h->next_mb_idx+=(spe->spe_total-1)*s->mb_width;
184 h->curr_line+=spe->spe_total;
185 }
187 h->mb_mc = (h->mb_mc+1)%3;
188 next_mb = &mb_buf[h->mb_dec];
189 h->mb_dec = (h->mb_dec+1)%3;
190 return next_mb;
191 }
193 static void *get_next_mb_blocking(H264Context_spu *h){
194 H264slice *s = h->s;
195 H264spe *spe = &h->spe;
196 H264Mb *mb_buf = h->mb_buf;
197 H264mc *mc_buf = h->mc_buf;
198 H264Mb *next_mb;
199 H264Mb *next_dma_mb;
201 if (h->mb_id >= h->mb_total)
202 return NULL;
204 //printf("%d\n", h->mb_id);
205 next_dma_mb = s->blocks + h->mb_id;
206 spu_dma_get(&mb_buf[0], (unsigned) next_dma_mb, sizeof(H264Mb), MBD_buf1);
207 //h->mb_dma = (h->mb_dma+1)%3;
208 h->mb_id++;
209 if (h->mb_id%s->mb_width ==0){
210 h->mb_id+=(spe->spe_total-1)*s->mb_width;
211 }
212 wait_dma_id(MBD_buf1);
214 h->mc = &mc_buf[0];
215 //h->mc_idx = (h->mc_idx+1)%2;
216 //if (h->n_mc < h->mb_total){
217 H264Mb *mb = &mb_buf[0];
218 H264mc *mc = &mc_buf[0];
219 if(!IS_INTRA(mb->mb_type)){
220 calc_mc_params(mb, mc);
221 fill_ref_buf(h, mb, mc);
222 }
223 //h->n_mc++;
224 /*if (h->n_mc%s->mb_width ==0){
225 h->n_mc+=(spe->spe_total-1)*s->mb_width;
226 }*/
227 // wait_dma_id(MBD_mc_buf1);
229 // h->next_mb_idx++;
230 // if (h->next_mb_idx % s->mb_width ==0){
231 // h->next_mb_idx+=(spe->spe_total-1)*s->mb_width;
232 // h->curr_line+=spe->spe_total;
233 // }
235 // h->mb_mc = (h->mb_mc+1)%3;
236 next_mb = &mb_buf[0];
237 // h->mb_dec = (h->mb_dec+1)%3;
238 return next_mb;
239 }
242 #undef TAG_OFFSET_MB
243 #undef TAG_OFFSET_MC
244 static inline int dep_resolved(H264Context_spu *h){
245 H264slice *s = h->s;
246 int spe_id = h->spe.spe_id;
247 volatile int mb_proc_dep = src_spe.count;
248 if (spe_id==0)
249 return (h->mb_proc < mb_proc_dep-1 +s->mb_width)? 1:0;
250 else
251 return (h->mb_proc < mb_proc_dep-1)? 1:0;
252 }
254 void update_tgt_spe_dep(H264Context_spu *h, int end){
255 H264Mb *mb = h->mb;
256 H264slice *s = h->s;
257 H264spe *spe = &h->spe;
258 int mb_x = mb->mb_x;
260 if (end || (mb_x%2==0 && mb_x!=0) || mb_x==s->mb_width-1){
261 spe_pos* dma_spe = &dma_temp;
262 spe_pos* tgt_spe = (spe_pos*) ((unsigned) spe->tgt_spe + (unsigned) &src_spe); //located in target spe local store
263 dma_spe->count = end? h->mb_proc+1: h->mb_proc;
264 spu_dma_barrier_put(dma_spe, (unsigned) tgt_spe, sizeof(dma_temp), MBD_put);
265 }
266 h->mb_proc++;
267 }
270 int main(unsigned long long id, unsigned long long argp)
271 {
272 (void) id;
273 H264Context_spu* h = &h_context;
274 H264spe *spe_params = (H264spe *) (unsigned) argp;
276 spu_dma_get(&h->spe, (unsigned) spe_params, sizeof(H264spe), MBD_slice); //ID_slice is used out of convienience
277 wait_dma_id(MBD_slice);
279 //clear_statistic();
280 dsputil_h264_init_cell(&h->dsp);
281 ff_cropTbl_init();
282 init_pred_ptrs(&h->hpc);
284 //send slice_buf to ppe
285 spu_write_out_mbox((unsigned) h->slice_buf);
286 h->sl_idx=0;
287 // initialize tracing with paraver
288 //trace_init_SPU();
289 h->frames =0;
290 src_spe.count =0;
291 h->mb_proc = 0;
293 h->mb_id=0;
294 h->mc_idx=0;
295 h->mb_dec=0;
296 h->mb_mc=0;
297 h->mb_dma=0;
298 h->next_mb_idx=0;
300 h->blocking=0;
303 H264spe* p = &h->spe;
304 h->curr_line =p->spe_id;
305 h->mb_total = p->mb_height*p->mb_width;
306 int stride_y = 32;
307 int stride_c = 16;
308 //init block_offset array
309 init_block_offset(stride_y, stride_c);
310 for(;;){
311 spu_read_in_mbox();
313 h->s = &h->slice_buf[h->sl_idx];
314 h->sl_idx++; h->sl_idx%=2;
316 if (h->s->state< 0){
317 break;
318 }
320 {
321 if(!h->blocking){
322 init_mb_buffer(h);
323 while((h->mb=(H264Mb *)get_next_mb(h))){
324 while(!dep_resolved(h));
325 //printf("frame %d mbx %d\t mby %d id %d\n", h->frames, h->mb->mb_x, h->mb->mb_y, p- >spe_id);
326 hl_decode_mb_internal(h, stride_y, stride_c);
327 }
328 update_tgt_spe_dep(h, 1);
329 }else{
330 h->mb_id=0;
331 while((h->mb=(H264Mb *)get_next_mb_blocking(h))){
332 while(!dep_resolved(h));
333 //printf("frame %d mbx %d\t mby %d id %d\n", h->frames, h->mb->mb_x, h->mb->mb_y, p- >spe_id);
334 hl_decode_mb_internal(h, stride_y, stride_c);
335 }
336 update_tgt_spe_dep(h, 1);
337 }
339 }
341 h->frames++;
343 if (p->spe_id == ((h->frames*p->mb_height -1)%p->spe_total)){
344 //printf("spe %d, %d\n", atomic_read(p->rl_cnt), h->frames);
345 //MBSlice is copied beforehand.
346 //only inc cnt.
347 atomic_inc(p->rl_cnt);
348 }
349 {
350 atomic_dec(p->cnt);
351 }
352 }
354 return 0;
355 }
