VMS/VMS_Implementations/VMS_impls/VMS__MC_shared_impl

view CoreController.c @ 290:c63b498d0a00

Fixed post-merge issues -- renames and includes..
author Sean Halle <seanhalle@yahoo.com>
date Thu, 05 Sep 2013 18:49:48 -0700
parents 1d7ea1b0f176
children
line source
1 /*
2 * Copyright 2010 OpenSourceResearchInstitute
3 *
4 * Licensed under BSD
5 */
8 #include "PR.h"
10 #include <stdlib.h>
11 #include <stdio.h>
12 #include <time.h>
14 #include <pthread.h>
15 #include <sched.h>
17 //===================== Functions local to this file =======================
18 void *terminateCoreController(SlaveVP *currSlv);
20 inline void
21 doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1,
22 uint32 *seed2 );
23 inline void
24 doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1,
25 uint32 *seed2 );
27 //===========================================================================
30 /*The Core Controller is logically "beneath" the masterVP and slave VPs. Its
31 * job is to control which of those VPs the core animates. Any time one of
32 * those VPs suspends, the suspend-primitive switches the core over to
33 * animating the core controller. The core controller then follows a very
34 * basic pattern to choose which VP will get animated next, then switches
35 * the core over to animating that VP. So, all VPs switch the core to
36 * core controller, which then chooses which VP the core animates next.
37 *
38 *The way the core controller decides which VP to switch the core to next is:
39 * 1) There are a number of "animation slots", which the master VP fills up
40 * with slave VPs that are ready to be animated. So, the core controller
41 * just iterates through the animation slots. When the next slot has a
42 * slave VP in it, the core controller switches the core over to animate
43 * that slave.
44 * 2) When the core controller checks a animation slot, and it's empty,
45 * then the controller switches the core over to animating the master VP,
46 * whose job is to find more slave VPs ready, and assign those to
47 * animation slots.
48 *
49 *So, in effect, a animation slot functions as another layer of virtual
50 * processor. A slot has the logical meaning of being an animator that
51 * animates the slave assigned to it. However, the core controller sits
52 * below the slots, and sequences down them, assigning the actual physical
53 * core to each slot, in turn.
54 *The reason for having the animation slots and core controller is to
55 * amortize the overhead of switching to the master VP and running it. With
56 * multiple animation slots, the time to switch-to-master and the code in
57 * the animation master is divided by the number of animation slots.
58 *The core controller and animation slots are not fundamental parts of PR,
59 * but rather optimizations put into the shared-semantic-state version of
60 * PR. Other versions of PR will not have a core controller nor scheduling
61 * slots.
62 *
63 *The core controller "owns" the physical core, in effect, and is the
64 * function given to the pthread's creation call. Hence, it contains code
65 * related to pthread startup, synchronizing the controllers to all start
66 * at the same time-point, and pinning the pthreads to physical cores.
67 *
68 */
69 void *
70 coreController( void *paramsIn )
71 {
72 int32 thisCoresIdx;
73 int32 numRepetitionsWithNoWork;
74 bool32 foundWork;
75 AnimSlot *animSlot;
76 volatile int32 *addrOfMasterLock; //thing pointed to is volatile, not ptr
77 // SlaveVP *thisCoresMasterVP;
78 //Variables used for pthread related things
79 ThdParams *thisCoresThdParams;
80 cpu_set_t coreMask; //used during pinning pthread to CPU core
81 int32 errorCode;
82 //Variables used during measurements (inside macro!)
83 TSCountLowHigh endSusp;
84 //Variables used in random-backoff, for master-lock and waiting for work
85 uint32_t seed1 = rand()%1000; // init random number generator for backoffs
86 uint32_t seed2 = rand()%1000;
89 //=============== Initializations ===================
90 thisCoresThdParams = (ThdParams *)paramsIn;
91 thisCoresIdx = thisCoresThdParams->coreNum;
93 //Assembly that saves addr of label of return instr -- addr used in assmbly
94 recordCoreCtlrReturnLabelAddr((void**)&(_PRTopEnv->coreCtlrReturnPt));
96 //TODO: DEBUG: check get correct pointer here
97 animSlot = _PRTopEnv->allAnimSlots[ thisCoresIdx ][0];
98 animSlot->slaveAssignedToSlot = _PRTopEnv->idleSlv[thisCoresIdx][ZERO];
100 numRepetitionsWithNoWork = 0;
101 addrOfMasterLock = &(_PRTopEnv->masterLock);
102 // thisCoresMasterVP = _PRTopEnv->masterVPs[ thisCoresIdx ];
104 //==================== pthread related stuff ======================
105 //pin the pthread to the core -- takes away Linux control
106 //Linux requires pinning to be done inside the thread-function
107 //Designate a core by a 1 in bit-position corresponding to the core
108 CPU_ZERO(&coreMask); //initialize mask bits to zero
109 CPU_SET(thisCoresThdParams->coreNum,&coreMask); //set bit repr the coreNum
110 pthread_t selfThd = pthread_self();
111 errorCode =
112 pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
113 if(errorCode){ printf("\n pinning thd to core failed \n"); exit(0); }
115 //make sure the controllers all start at same time, by making them wait
116 pthread_mutex_lock( &suspendLock );
117 while( !(_PRTopEnv->firstProcessReady) )
118 { pthread_cond_wait( &suspendCond, &suspendLock );
119 }
120 pthread_mutex_unlock( &suspendLock );
122 HOLISTIC__CoreCtrl_Setup;
124 DEBUG__printf1(TRUE, "started coreCtrlr %d", thisCoresIdx );
126 //====================== The Core Controller ======================
127 while(1)
128 { //Assembly code switches the core between animating a VP and
129 // animating this core controller. The switch is done by
130 // changing the stack-pointer and frame-pointer and then doing
131 // an assembly jmp. When reading this code, the effect is
132 // that the "switchToSlv()" at the end of the loop is sort of a
133 // "warp in time" -- the core disappears inside this, jmps to
134 // animating a VP, and when that VP suspends, the suspend
135 // jmps back. This has the effect of "returning" from the
136 // switchToSlv() call. Then control loops back to here.
137 //Alternatively, the VP suspend primitive could just not bother
138 // returning from switchToSlv, and instead jmp directly to here.
140 if(animSlot->slaveAssignedToSlot->typeOfVP == IdleVP)
141 { //The Holistic stuff turns on idle slaves.. but can also be in mode
142 // where have no idle slaves.. so, this IF statement can only be true
143 // executed when HOLISTIC is turned on..
144 numRepetitionsWithNoWork ++;
145 HOLISTIC__Record_last_work;
146 }
150 HOLISTIC__Record_AppResponderInvocation_start;
151 MEAS__Capture_Pre_Master_Lock_Point;
153 int numTriesToGetLock = 0; int gotLock = 0;
154 while( gotLock == FALSE ) //keep going until get master lock
155 {
156 //want to
157 // reduce lock contention from cores with no work, so first
158 // check if this is a core with no work, and busy wait if so.
159 //Then, if it's been way too long without work, yield pthread
160 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF)
161 doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 );
162 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD )
163 { numRepetitionsWithNoWork = 0; pthread_yield(); }
166 //Try to get the lock
167 gotLock = __sync_bool_compare_and_swap( addrOfMasterLock,
168 UNLOCKED, LOCKED );
169 if( gotLock )
170 { //At this point, have successfully gotten master lock.
171 //So, break out of get-lock loop.
172 break;
173 }
174 //Get here only when failed to get lock -- check in should do backoff
176 numTriesToGetLock++; //if too many, means too much contention
177 if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF )
178 doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 );
179 if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD )
180 { numTriesToGetLock = 0; pthread_yield(); }
181 } //while( currVP == NULL )
182 MEAS__Capture_Post_Master_Lock_Point;
183 //have master lock, perform master function, which manages request
184 // handling and assigning work to this core's slot
185 foundWork =
187 masterFunction( animSlot );
189 PR_int__release_master_lock();
191 if( foundWork )
192 numRepetitionsWithNoWork = 0;
193 else
194 numRepetitionsWithNoWork += 1;
196 //now that master is done, have work in the slot, so switch to it
197 HOLISTIC__Record_Work_start;
199 switchToSlv(animSlot->slaveAssignedToSlot); //Slave suspend makes core "return" from this call
200 flushRegisters(); //prevent GCC optimization from doing bad things
202 MEAS__Capture_End_Susp_in_CoreCtlr_ForSys;
203 HOLISTIC__Record_Work_end;
204 }//while(1)
205 }
207 /*Shutdown of PR involves several steps, of which this is the last. This
208 * function is jumped to from the asmTerminateCoreCtrl, which is in turn
209 * called from endOSThreadFn, which is the top-level-fn of the shutdown
210 * slaves.
211 */
212 void *
213 terminateCoreCtlr(SlaveVP *currSlv)
214 {
215 //first, free shutdown Slv that jumped here, then end the pthread
216 // PR_int__free_slaveVP( currSlv );
217 pthread_exit( NULL );
218 }
220 inline uint32_t
221 randomNumber()
222 {
223 _PRTopEnv->seed1 = (uint32)(36969 * (_PRTopEnv->seed1 & 65535) +
224 (_PRTopEnv->seed1 >> 16) );
225 _PRTopEnv->seed2 = (uint32)(18000 * (_PRTopEnv->seed2 & 65535) +
226 (_PRTopEnv->seed2 >> 16) );
227 return (_PRTopEnv->seed1 << 16) + _PRTopEnv->seed2;
228 }
232 /*Busy-wait for a random number of cycles -- chooses number of cycles
233 * differently than for the too-many-tries-to-get-lock backoff
234 */
235 inline void
236 doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1,
237 uint32 *seed2 )
238 { int32 i, waitIterations;
239 volatile double fakeWorkVar; //busy-wait fake work
241 //Get a random number of iterations to busy-wait. The % is a simple
242 // way to set the maximum value that can be generated.
243 waitIterations =
244 randomNumber(seed1, seed2) %
245 (numRepsWithNoWork * numRepsWithNoWork * NUM_CORES);
246 for( i = 0; i < waitIterations; i++ )
247 { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait
248 }
249 }
251 /*Busy-waits for a random number of cycles -- chooses number of cycles
252 * differently than for the no-work backoff
253 */
254 inline void
255 doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1,
256 uint32 *seed2 )
257 { int32 i, waitIterations;
258 volatile double fakeWorkVar; //busy-wait fake work
260 waitIterations =
261 randomNumber(seed1, seed2) %
262 (numTriesToGetLock * GET_LOCK_BACKOFF_WEIGHT);
263 //addToHist( wait_iterations, coreLoopThdParams->wait_iterations_hist );
264 for( i = 0; i < waitIterations; i++ )
265 { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait
266 }
267 }
270 #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
272 //===========================================================================
273 /*This sequential version does the same as threaded, except doesn't do the
274 * pin-threads part, nor the wait until setup complete and acquire master
275 * lock parts.
276 */
277 void *
278 coreCtlr_Seq( void *paramsIn )
279 {
280 int32 thisCoresIdx;
281 int32 numRepetitionsWithNoWork;
282 bool32 foundWork;
283 AnimSlot *animSlot;
284 volatile int32 *addrOfMasterLock; //thing pointed to is volatile, not ptr
285 //Variables used for pthread related things
286 ThdParams *thisCoresThdParams;
287 cpu_set_t coreMask; //used during pinning pthread to CPU core
288 int32 errorCode;
289 //Variables used during measurements (inside macro!)
290 TSCountLowHigh endSusp;
291 //Variables used in random-backoff, for master-lock and waiting for work
292 uint32_t seed1 = rand()%1000; // init random number generator for backoffs
293 uint32_t seed2 = rand()%1000;
296 //=============== Initializations ===================
297 // thisCoresThdParams = (ThdParams *)paramsIn;
298 // thisCoresIdx = thisCoresThdParams->coreNum;
299 thisCoresIdx = 0;
301 //Assembly that saves addr of label of return instr -- addr used in assmbly
302 recordCoreCtlrReturnLabelAddr((void**)&(_PRTopEnv->coreCtlrReturnPt));
304 //TODO: DEBUG: check get correct pointer here
305 animSlot = _PRTopEnv->allAnimSlots[ thisCoresIdx ][0];
306 animSlot->slaveAssignedToSlot = _PRTopEnv->idleSlv[thisCoresIdx][ZERO];
308 numRepetitionsWithNoWork = 0;
309 addrOfMasterLock = &(_PRTopEnv->masterLock);
311 //==================== pthread related stuff ======================
312 //pin the pthread to the core -- takes away Linux control
313 //Linux requires pinning to be done inside the thread-function
314 //Designate a core by a 1 in bit-position corresponding to the core
315 /*
316 CPU_ZERO(&coreMask); //initialize mask bits to zero
317 CPU_SET(thisCoresThdParams->coreNum,&coreMask); //set bit repr the coreNum
318 pthread_t selfThd = pthread_self();
319 errorCode =
320 pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
321 if(errorCode){ printf("\n pinning thd to core failed \n"); exit(0); }
323 //make sure the controllers all start at same time, by making them wait
324 pthread_mutex_lock( &suspendLock );
325 while( !(_PRTopEnv->firstProcessReady) )
326 { pthread_cond_wait( &suspendCond, &suspendLock );
327 }
328 pthread_mutex_unlock( &suspendLock );
330 HOLISTIC__CoreCtrl_Setup;
332 DEBUG__printf1(TRUE, "started coreCtrlr", thisCoresIdx );
333 */
334 //====================== The Core Controller ======================
335 while(1)
336 { //Assembly code switches the core between animating a VP and
337 // animating this core controller. The switch is done by
338 // changing the stack-pointer and frame-pointer and then doing
339 // an assembly jmp. When reading this code, the effect is
340 // that the "switchToSlv()" at the end of the loop is sort of a
341 // "warp in time" -- the core disappears inside this, jmps to
342 // animating a VP, and when that VP suspends, the suspend
343 // jmps back. This has the effect of "returning" from the
344 // switchToSlv() call. Then control loops back to here.
345 //Alternatively, the VP suspend primitive could just not bother
346 // returning from switchToSlv, and instead jmp directly to here.
347 //core controller top of loop
348 if(animSlot->slaveAssignedToSlot->typeOfVP == IdleVP)
349 { //The Holistic stuff turns on idle slaves.. but can also be in mode
350 // where have no idle slaves.. so, this IF statement can only be true
351 // executed when HOLISTIC is turned on..
352 numRepetitionsWithNoWork ++;
353 HOLISTIC__Record_last_work;
354 }
358 HOLISTIC__Record_AppResponderInvocation_start;
359 MEAS__Capture_Pre_Master_Lock_Point;
361 int numTriesToGetLock = 0; int gotLock = 0;
362 while( gotLock == FALSE ) //keep going until get master lock
363 {
364 //want to
365 // reduce lock contention from cores with no work, so first
366 // check if this is a core with no work, and busy wait if so.
367 //Then, if it's been way too long without work, yield pthread
368 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF)
369 doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 );
370 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD )
371 { //This is sequential mode.. just return
372 return;
373 }
376 //Try to get the lock
377 gotLock = __sync_bool_compare_and_swap( addrOfMasterLock,
378 UNLOCKED, LOCKED );
379 if( gotLock )
380 { //At this point, have successfully gotten master lock.
381 //So, break out of get-lock loop.
382 break;
383 }
384 //Get here only when failed to get lock -- check in should do backoff
386 numTriesToGetLock++; //if too many, means too much contention
387 if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF )
388 doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 );
389 if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD )
390 { numTriesToGetLock = 0; pthread_yield(); }
391 } //while( currVP == NULL )
392 MEAS__Capture_Post_Master_Lock_Point;
394 //have master lock, perform master function, which manages request
395 // handling and assigning work to this core's slot
396 foundWork =
398 masterFunction( animSlot );
400 PR_int__release_master_lock();
402 if( foundWork )
403 numRepetitionsWithNoWork = 0;
404 else
405 numRepetitionsWithNoWork += 1;
407 //now that master is done, have work in the slot, so switch to it
408 HOLISTIC__Record_Work_start;
410 switchToSlv(animSlot->slaveAssignedToSlot); //Slave suspend makes core "return" from this call
411 flushRegisters(); //prevent GCC optimization from doing bad things
413 MEAS__Capture_End_Susp_in_CoreCtlr_ForSys;
414 HOLISTIC__Record_Work_end;
415 }//while(1)
416 }
418 #endif