view CoreController.c @ 276:1d7ea1b0f176

Dev_ML Working in sequential mode
author Sean Halle <seanhalle@yahoo.com>
date Mon, 04 Mar 2013 00:40:38 -0800
parents 40e7625e57bd
children 2fc69e6c14ea
line source
1 /*
2 * Copyright 2010 OpenSourceResearchInstitute
3 *
4 * Licensed under BSD
5 */
8 #include "PR.h"
10 #include <stdlib.h>
11 #include <stdio.h>
12 #include <time.h>
14 #include <pthread.h>
15 #include <sched.h>
17 //===================== Functions local to this file =======================
18 void *terminateCoreController(SlaveVP *currSlv);
20 inline void
21 doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1,
22 uint32 *seed2 );
23 inline void
24 doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1,
25 uint32 *seed2 );
27 //===========================================================================
30 /*The Core Controller is logically "beneath" the masterVP and slave VPs. Its
31 * job is to control which of those VPs the core animates. Any time one of
32 * those VPs suspends, the suspend-primitive switches the core over to
33 * animating the core controller. The core controller then follows a very
34 * basic pattern to choose which VP will get animated next, then switches
35 * the core over to animating that VP. So, all VPs switch the core to
36 * core controller, which then chooses which VP the core animates next.
37 *
38 *The way the core controller decides which VP to switch the core to next is:
39 * 1) There are a number of "animation slots", which the master VP fills up
40 * with slave VPs that are ready to be animated. So, the core controller
41 * just iterates through the animation slots. When the next slot has a
42 * slave VP in it, the core controller switches the core over to animate
43 * that slave.
44 * 2) When the core controller checks a animation slot, and it's empty,
45 * then the controller switches the core over to animating the master VP,
46 * whose job is to find more slave VPs ready, and assign those to
47 * animation slots.
48 *
49 *So, in effect, a animation slot functions as another layer of virtual
50 * processor. A slot has the logical meaning of being an animator that
51 * animates the slave assigned to it. However, the core controller sits
52 * below the slots, and sequences down them, assigning the actual physical
53 * core to each slot, in turn.
54 *The reason for having the animation slots and core controller is to
55 * amortize the overhead of switching to the master VP and running it. With
56 * multiple animation slots, the time to switch-to-master and the code in
57 * the animation master is divided by the number of animation slots.
58 *The core controller and animation slots are not fundamental parts of PR,
59 * but rather optimizations put into the shared-semantic-state version of
60 * PR. Other versions of PR will not have a core controller nor scheduling
61 * slots.
62 *
63 *The core controller "owns" the physical core, in effect, and is the
64 * function given to the pthread's creation call. Hence, it contains code
65 * related to pthread startup, synchronizing the controllers to all start
66 * at the same time-point, and pinning the pthreads to physical cores.
67 *
68 */
69 void *
70 coreController( void *paramsIn )
71 {
72 int32 thisCoresIdx;
73 int32 numRepetitionsWithNoWork;
74 bool32 foundWork;
75 AnimSlot *animSlot;
76 volatile int32 *addrOfMasterLock; //thing pointed to is volatile, not ptr
77 // SlaveVP *thisCoresMasterVP;
78 //Variables used for pthread related things
79 ThdParams *thisCoresThdParams;
80 cpu_set_t coreMask; //used during pinning pthread to CPU core
81 int32 errorCode;
82 //Variables used during measurements (inside macro!)
83 TSCountLowHigh endSusp;
84 //Variables used in random-backoff, for master-lock and waiting for work
85 uint32_t seed1 = rand()%1000; // init random number generator for backoffs
86 uint32_t seed2 = rand()%1000;
89 //=============== Initializations ===================
90 thisCoresThdParams = (ThdParams *)paramsIn;
91 thisCoresIdx = thisCoresThdParams->coreNum;
93 //Assembly that saves addr of label of return instr -- addr used in assmbly
94 recordCoreCtlrReturnLabelAddr((void**)&(_PRTopEnv->coreCtlrReturnPt));
96 //TODO: DEBUG: check get correct pointer here
97 animSlot = _PRTopEnv->allAnimSlots[ thisCoresIdx ][0];
98 animSlot->slaveAssignedToSlot = _PRTopEnv->idleSlv[thisCoresIdx][ZERO];
100 numRepetitionsWithNoWork = 0;
101 addrOfMasterLock = &(_PRTopEnv->masterLock);
102 // thisCoresMasterVP = _PRTopEnv->masterVPs[ thisCoresIdx ];
104 //==================== pthread related stuff ======================
105 //pin the pthread to the core -- takes away Linux control
106 //Linux requires pinning to be done inside the thread-function
107 //Designate a core by a 1 in bit-position corresponding to the core
108 CPU_ZERO(&coreMask); //initialize mask bits to zero
109 CPU_SET(thisCoresThdParams->coreNum,&coreMask); //set bit repr the coreNum
110 pthread_t selfThd = pthread_self();
111 errorCode =
112 pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
113 if(errorCode){ printf("\n pinning thd to core failed \n"); exit(0); }
115 //make sure the controllers all start at same time, by making them wait
116 pthread_mutex_lock( &suspendLock );
117 while( !(_PRTopEnv->firstProcessReady) )
118 { pthread_cond_wait( &suspendCond, &suspendLock );
119 }
120 pthread_mutex_unlock( &suspendLock );
122 HOLISTIC__CoreCtrl_Setup;
124 DEBUG__printf1(TRUE, "started coreCtrlr", thisCoresIdx );
126 //====================== The Core Controller ======================
127 while(1)
128 { //Assembly code switches the core between animating a VP and
129 // animating this core controller. The switch is done by
130 // changing the stack-pointer and frame-pointer and then doing
131 // an assembly jmp. When reading this code, the effect is
132 // that the "switchToSlv()" at the end of the loop is sort of a
133 // "warp in time" -- the core disappears inside this, jmps to
134 // animating a VP, and when that VP suspends, the suspend
135 // jmps back. This has the effect of "returning" from the
136 // switchToSlv() call. Then control loops back to here.
137 //Alternatively, the VP suspend primitive could just not bother
138 // returning from switchToSlv, and instead jmp directly to here.
140 if(animSlot->slaveAssignedToSlot->typeOfVP == IdleVP)
141 { //The Holistic stuff turns on idle slaves.. but can also be in mode
142 // where have no idle slaves.. so, this IF statement can only be true
143 // executed when HOLISTIC is turned on..
144 numRepetitionsWithNoWork ++;
145 HOLISTIC__Record_last_work;
146 }
150 HOLISTIC__Record_AppResponderInvocation_start;
151 MEAS__Capture_Pre_Master_Lock_Point;
153 int numTriesToGetLock = 0; int gotLock = 0;
154 while( gotLock == FALSE ) //keep going until get master lock
155 {
156 //want to
157 // reduce lock contention from cores with no work, so first
158 // check if this is a core with no work, and busy wait if so.
159 //Then, if it's been way too long without work, yield pthread
160 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF)
161 doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 );
162 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD )
163 { numRepetitionsWithNoWork = 0; pthread_yield(); }
166 //Try to get the lock
167 gotLock = __sync_bool_compare_and_swap( addrOfMasterLock,
168 UNLOCKED, LOCKED );
169 if( gotLock )
170 { //At this point, have successfully gotten master lock.
171 //So, break out of get-lock loop.
172 break;
173 }
174 //Get here only when failed to get lock -- check in should do backoff
176 numTriesToGetLock++; //if too many, means too much contention
177 if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF )
178 doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 );
179 if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD )
180 { numTriesToGetLock = 0; pthread_yield(); }
181 } //while( currVP == NULL )
182 MEAS__Capture_Post_Master_Lock_Point;
184 //have master lock, perform master function, which manages request
185 // handling and assigning work to this core's slot
186 foundWork =
188 masterFunction( animSlot );
190 PR_int__release_master_lock();
192 if( foundWork )
193 numRepetitionsWithNoWork = 0;
194 else
195 numRepetitionsWithNoWork += 1;
197 //now that master is done, have work in the slot, so switch to it
198 HOLISTIC__Record_Work_start;
200 switchToSlv(animSlot->slaveAssignedToSlot); //Slave suspend makes core "return" from this call
201 flushRegisters(); //prevent GCC optimization from doing bad things
203 MEAS__Capture_End_Susp_in_CoreCtlr_ForSys;
204 HOLISTIC__Record_Work_end;
205 }//while(1)
206 }
208 /*Shutdown of PR involves several steps, of which this is the last. This
209 * function is jumped to from the asmTerminateCoreCtrl, which is in turn
210 * called from endOSThreadFn, which is the top-level-fn of the shutdown
211 * slaves.
212 */
213 void *
214 terminateCoreCtlr(SlaveVP *currSlv)
215 {
216 //first, free shutdown Slv that jumped here, then end the pthread
217 PR_int__recycle_slaveVP( currSlv );
218 pthread_exit( NULL );
219 }
221 inline uint32_t
222 randomNumber()
223 {
224 _PRTopEnv->seed1 = (uint32)(36969 * (_PRTopEnv->seed1 & 65535) +
225 (_PRTopEnv->seed1 >> 16) );
226 _PRTopEnv->seed2 = (uint32)(18000 * (_PRTopEnv->seed2 & 65535) +
227 (_PRTopEnv->seed2 >> 16) );
228 return (_PRTopEnv->seed1 << 16) + _PRTopEnv->seed2;
229 }
233 /*Busy-wait for a random number of cycles -- chooses number of cycles
234 * differently than for the too-many-tries-to-get-lock backoff
235 */
236 inline void
237 doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1,
238 uint32 *seed2 )
239 { int32 i, waitIterations;
240 volatile double fakeWorkVar; //busy-wait fake work
242 //Get a random number of iterations to busy-wait. The % is a simple
243 // way to set the maximum value that can be generated.
244 waitIterations =
245 randomNumber(seed1, seed2) %
246 (numRepsWithNoWork * numRepsWithNoWork * NUM_CORES);
247 for( i = 0; i < waitIterations; i++ )
248 { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait
249 }
250 }
252 /*Busy-waits for a random number of cycles -- chooses number of cycles
253 * differently than for the no-work backoff
254 */
255 inline void
256 doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1,
257 uint32 *seed2 )
258 { int32 i, waitIterations;
259 volatile double fakeWorkVar; //busy-wait fake work
261 waitIterations =
262 randomNumber(seed1, seed2) %
263 (numTriesToGetLock * GET_LOCK_BACKOFF_WEIGHT);
264 //addToHist( wait_iterations, coreLoopThdParams->wait_iterations_hist );
265 for( i = 0; i < waitIterations; i++ )
266 { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait
267 }
268 }
271 #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
273 //===========================================================================
274 /*This sequential version does the same as threaded, except doesn't do the
275 * pin-threads part, nor the wait until setup complete and acquire master
276 * lock parts.
277 */
278 void *
279 coreCtlr_Seq( void *paramsIn )
280 {
281 int32 thisCoresIdx;
282 int32 numRepetitionsWithNoWork;
283 bool32 foundWork;
284 AnimSlot *animSlot;
285 volatile int32 *addrOfMasterLock; //thing pointed to is volatile, not ptr
286 //Variables used for pthread related things
287 ThdParams *thisCoresThdParams;
288 cpu_set_t coreMask; //used during pinning pthread to CPU core
289 int32 errorCode;
290 //Variables used during measurements (inside macro!)
291 TSCountLowHigh endSusp;
292 //Variables used in random-backoff, for master-lock and waiting for work
293 uint32_t seed1 = rand()%1000; // init random number generator for backoffs
294 uint32_t seed2 = rand()%1000;
297 //=============== Initializations ===================
298 // thisCoresThdParams = (ThdParams *)paramsIn;
299 // thisCoresIdx = thisCoresThdParams->coreNum;
300 thisCoresIdx = 0;
302 //Assembly that saves addr of label of return instr -- addr used in assmbly
303 recordCoreCtlrReturnLabelAddr((void**)&(_PRTopEnv->coreCtlrReturnPt));
305 //TODO: DEBUG: check get correct pointer here
306 animSlot = _PRTopEnv->allAnimSlots[ thisCoresIdx ][0];
307 animSlot->slaveAssignedToSlot = _PRTopEnv->idleSlv[thisCoresIdx][ZERO];
309 numRepetitionsWithNoWork = 0;
310 addrOfMasterLock = &(_PRTopEnv->masterLock);
312 //==================== pthread related stuff ======================
313 //pin the pthread to the core -- takes away Linux control
314 //Linux requires pinning to be done inside the thread-function
315 //Designate a core by a 1 in bit-position corresponding to the core
316 /*
317 CPU_ZERO(&coreMask); //initialize mask bits to zero
318 CPU_SET(thisCoresThdParams->coreNum,&coreMask); //set bit repr the coreNum
319 pthread_t selfThd = pthread_self();
320 errorCode =
321 pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
322 if(errorCode){ printf("\n pinning thd to core failed \n"); exit(0); }
324 //make sure the controllers all start at same time, by making them wait
325 pthread_mutex_lock( &suspendLock );
326 while( !(_PRTopEnv->firstProcessReady) )
327 { pthread_cond_wait( &suspendCond, &suspendLock );
328 }
329 pthread_mutex_unlock( &suspendLock );
331 HOLISTIC__CoreCtrl_Setup;
333 DEBUG__printf1(TRUE, "started coreCtrlr", thisCoresIdx );
334 */
335 //====================== The Core Controller ======================
336 while(1)
337 { //Assembly code switches the core between animating a VP and
338 // animating this core controller. The switch is done by
339 // changing the stack-pointer and frame-pointer and then doing
340 // an assembly jmp. When reading this code, the effect is
341 // that the "switchToSlv()" at the end of the loop is sort of a
342 // "warp in time" -- the core disappears inside this, jmps to
343 // animating a VP, and when that VP suspends, the suspend
344 // jmps back. This has the effect of "returning" from the
345 // switchToSlv() call. Then control loops back to here.
346 //Alternatively, the VP suspend primitive could just not bother
347 // returning from switchToSlv, and instead jmp directly to here.
348 //core controller top of loop
349 if(animSlot->slaveAssignedToSlot->typeOfVP == IdleVP)
350 { //The Holistic stuff turns on idle slaves.. but can also be in mode
351 // where have no idle slaves.. so, this IF statement can only be true
352 // executed when HOLISTIC is turned on..
353 numRepetitionsWithNoWork ++;
354 HOLISTIC__Record_last_work;
355 }
359 HOLISTIC__Record_AppResponderInvocation_start;
360 MEAS__Capture_Pre_Master_Lock_Point;
362 int numTriesToGetLock = 0; int gotLock = 0;
363 while( gotLock == FALSE ) //keep going until get master lock
364 {
365 //want to
366 // reduce lock contention from cores with no work, so first
367 // check if this is a core with no work, and busy wait if so.
368 //Then, if it's been way too long without work, yield pthread
369 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF)
370 doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 );
371 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD )
372 { //This is sequential mode.. just return
373 return;
374 }
377 //Try to get the lock
378 gotLock = __sync_bool_compare_and_swap( addrOfMasterLock,
379 UNLOCKED, LOCKED );
380 if( gotLock )
381 { //At this point, have successfully gotten master lock.
382 //So, break out of get-lock loop.
383 break;
384 }
385 //Get here only when failed to get lock -- check in should do backoff
387 numTriesToGetLock++; //if too many, means too much contention
388 if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF )
389 doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 );
390 if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD )
391 { numTriesToGetLock = 0; pthread_yield(); }
392 } //while( currVP == NULL )
393 MEAS__Capture_Post_Master_Lock_Point;
395 //have master lock, perform master function, which manages request
396 // handling and assigning work to this core's slot
397 foundWork =
399 masterFunction( animSlot );
401 PR_int__release_master_lock();
403 if( foundWork )
404 numRepetitionsWithNoWork = 0;
405 else
406 numRepetitionsWithNoWork += 1;
408 //now that master is done, have work in the slot, so switch to it
409 HOLISTIC__Record_Work_start;
411 switchToSlv(animSlot->slaveAssignedToSlot); //Slave suspend makes core "return" from this call
412 flushRegisters(); //prevent GCC optimization from doing bad things
414 MEAS__Capture_End_Susp_in_CoreCtlr_ForSys;
415 HOLISTIC__Record_Work_end;
416 }//while(1)
417 }
419 #endif