VMS/VMS_Implementations/VMS_impls/VMS__MC_shared_impl

view CoreController.c @ 256:2bcf37fd50c6

Prev msg should have been: isolated valgrind with a new compiler switch use SERVICES__TURN_ON_VALGRIND in order to compile valgrind support into vmalloc
author Sean Halle <seanhalle@yahoo.com>
date Mon, 10 Sep 2012 01:26:51 -0700
parents f1267bc7b342
children f5b110414453 999f2966a3e5
line source
1 /*
2 * Copyright 2010 OpenSourceStewardshipFoundation
3 *
4 * Licensed under BSD
5 */
8 #include "VMS.h"
10 #include <stdlib.h>
11 #include <stdio.h>
12 #include <time.h>
14 #include <pthread.h>
15 #include <sched.h>
17 //===================== Functions local to this file =======================
18 void *terminateCoreController(SlaveVP *currSlv);
20 inline void
21 doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1,
22 uint32 *seed2 );
23 inline void
24 doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1,
25 uint32 *seed2 );
27 //===========================================================================
30 /*The Core Controller is logically "beneath" the masterVP and slave VPs. Its
31 * job is to control which of those VPs the core animates. Any time one of
32 * those VPs suspends, the suspend-primitive switches the core over to
33 * animating the core controller. The core controller then follows a very
34 * basic pattern to choose which VP will get animated next, then switches
35 * the core over to animating that VP. So, all VPs switch the core to
36 * core controller, which then chooses which VP the core animates next.
37 *
38 *The way the core controller decides which VP to switch the core to next is:
39 * 1) There are a number of "animation slots", which the master VP fills up
40 * with slave VPs that are ready to be animated. So, the core controller
41 * just iterates through the animation slots. When the next slot has a
42 * slave VP in it, the core controller switches the core over to animate
43 * that slave.
44 * 2) When the core controller checks a animation slot, and it's empty,
45 * then the controller switches the core over to animating the master VP,
46 * whose job is to find more slave VPs ready, and assign those to
47 * animation slots.
48 *
49 *So, in effect, a animation slot functions as another layer of virtual
50 * processor. A slot has the logical meaning of being an animator that
51 * animates the slave assigned to it. However, the core controller sits
52 * below the slots, and sequences down them, assigning the actual physical
53 * core to each slot, in turn.
54 *The reason for having the animation slots and core controller is to
55 * amortize the overhead of switching to the master VP and running it. With
56 * multiple animation slots, the time to switch-to-master and the code in
57 * the animation master is divided by the number of animation slots.
58 *The core controller and animation slots are not fundamental parts of VMS,
59 * but rather optimizations put into the shared-semantic-state version of
60 * VMS. Other versions of VMS will not have a core controller nor scheduling
61 * slots.
62 *
63 *The core controller "owns" the physical core, in effect, and is the
64 * function given to the pthread's creation call. Hence, it contains code
65 * related to pthread startup, synchronizing the controllers to all start
66 * at the same time-point, and pinning the pthreads to physical cores.
67 *
68 */
69 void *
70 coreController( void *paramsIn )
71 {
72 int32 thisCoresIdx;
73 int32 numRepetitionsWithNoWork;
74 SlaveVP *currVP;
75 AnimSlot *currSlot, **animSlots;
76 int32 currSlotIdx;
77 volatile int32 *addrOfMasterLock; //thing pointed to is volatile, not ptr
78 SlaveVP *thisCoresMasterVP;
79 //Variables used for pthread related things
80 ThdParams *thisCoresThdParams;
81 cpu_set_t coreMask; //used during pinning pthread to CPU core
82 int32 errorCode;
83 //Variables used during measurements
84 TSCountLowHigh endSusp;
85 //Variables used in random-backoff, for master-lock and waiting for work
86 uint32_t seed1 = rand()%1000; // init random number generator for backoffs
87 uint32_t seed2 = rand()%1000;
90 //=============== Initializations ===================
91 thisCoresThdParams = (ThdParams *)paramsIn;
92 thisCoresIdx = thisCoresThdParams->coreNum;
94 //Assembly that saves addr of label of return instr -- label in assmbly
95 recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt));
97 animSlots = _VMSMasterEnv->allAnimSlots[thisCoresIdx];
98 currSlotIdx = 0; //start at slot 0, go up until one empty, then do master
99 numRepetitionsWithNoWork = 0;
100 addrOfMasterLock = &(_VMSMasterEnv->masterLock);
101 thisCoresMasterVP = _VMSMasterEnv->masterVPs[thisCoresIdx];
103 //==================== pthread related stuff ======================
104 //pin the pthread to the core -- takes away Linux control
105 //Linux requires pinning to be done inside the thread-function
106 //Designate a core by a 1 in bit-position corresponding to the core
107 CPU_ZERO(&coreMask); //initialize mask bits to zero
108 CPU_SET(thisCoresThdParams->coreNum,&coreMask); //set bit repr the coreNum
109 pthread_t selfThd = pthread_self();
110 errorCode =
111 pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
112 if(errorCode){ printf("\n pinning thd to core failed \n"); exit(0); }
114 //make sure the controllers all start at same time, by making them wait
115 pthread_mutex_lock( &suspendLock );
116 while( !(_VMSMasterEnv->setupComplete) )
117 { pthread_cond_wait( &suspendCond, &suspendLock );
118 }
119 pthread_mutex_unlock( &suspendLock );
121 HOLISTIC__CoreCtrl_Setup;
123 DEBUG__printf1(TRUE, "started coreCtrlr", thisCoresIdx );
125 //====================== The Core Controller ======================
126 while(1) //An endless loop is just one way of doing the control structure
127 { //Assembly code switches the core between animating a VP and
128 // animating this core controller. The switch is done by
129 // changing the stack-pointer and frame-pointer and then doing
130 // an assembly jmp. When reading this code, the effect is
131 // that the "switchToSlv()" at the end of the loop is sort of a
132 // "warp in time" -- the core disappears inside this, jmps to
133 // animating a VP, and when that VP suspends, the suspend
134 // jmps back. This has the effect of "returning" from the
135 // switchToSlv() call. Then control loops back to here.
136 //Alternatively, the VP suspend primitive could just not bother
137 // returning from switchToSlv, and instead jmp directly to here.
139 if( currSlotIdx >= NUM_ANIM_SLOTS ) goto switchToMaster;
140 currSlot = animSlots[ currSlotIdx ];
142 if( ! currSlot->needsSlaveAssigned ) //slot does have slave assigned
143 { if(currSlot->slaveAssignedToSlot->typeOfVP == Idle){
144 numRepetitionsWithNoWork ++;
145 } else {
146 numRepetitionsWithNoWork = 0; //reset back2back master count
147 }
148 currSlotIdx ++;
149 currVP = currSlot->slaveAssignedToSlot;
150 HOLISTIC__Record_last_work;
151 }
152 else //slot is empty, so switch to master
153 {
154 switchToMaster:
155 currSlotIdx = 0; //doing switch to master, so start over at slot 0
156 currVP = NULL;
158 MEAS__Capture_Pre_Master_Lock_Point;
159 HOLISTIC__Record_AppResponderInvocation_start;
161 int numTriesToGetLock = 0; int gotLock = 0;
162 while( currVP == NULL ) //keep going until get master lock
163 {
164 //At this point, first thing to do is get lock. But, want to
165 // reduce lock contention from cores with no work, so first
166 // check if this is a core with no work, and busy wait if so.
167 //Then, if it's been way too long without work, yield pthread
168 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF)
169 doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 );
170 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD )
171 { numRepetitionsWithNoWork = 0; pthread_yield(); }
174 //Now, try to get the lock
175 gotLock = __sync_bool_compare_and_swap( addrOfMasterLock,
176 UNLOCKED, LOCKED );
177 if( gotLock )
178 { //At this point, have run out of slaves, so tried to get
179 // the master lock, and have successfully gotten it.
180 //So, set the currVP to this core's masterVP and break out
181 // of the get-lock loop. Below, assembly code will switch
182 // the core over to animating the masterVP. When it's
183 // done, the masterVP will use assembly to switch the core
184 // back to animating this core controller
185 currVP = thisCoresMasterVP;
186 numRepetitionsWithNoWork += 1;
187 break; //end while -- have a VP to animate now
188 }
189 //Get here only when failed to get lock
191 numTriesToGetLock++; //if too many, means too much contention
192 if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF )
193 doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 );
194 if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD )
195 { numTriesToGetLock = 0; pthread_yield(); }
196 }
197 MEAS__Capture_Post_Master_Lock_Point;
198 }
200 HOLISTIC__Record_Work_start;
202 switchToSlv(currVP); //Slave suspend makes core "return" from this call
203 flushRegisters(); //prevent GCC optimization from doing bad things
205 HOLISTIC__Record_Work_end;
207 MEAS__Capture_End_Susp_in_CoreCtlr_ForSys;
209 }//while(1)
210 }
212 /*Shutdown of VMS involves several steps, of which this is the last. This
213 * function is jumped to from the asmTerminateCoreCtrl, which is in turn
214 * called from endOSThreadFn, which is the top-level-fn of the shutdown
215 * slaves.
216 */
217 void *
218 terminateCoreCtlr(SlaveVP *currSlv)
219 {
220 //first, free shutdown Slv that jumped here, then end the pthread
221 VMS_int__dissipate_slaveVP( currSlv );
222 pthread_exit( NULL );
223 }
225 inline uint32_t
226 randomNumber()
227 {
228 _VMSMasterEnv->seed1 = (uint32)(36969 * (_VMSMasterEnv->seed1 & 65535) +
229 (_VMSMasterEnv->seed1 >> 16) );
230 _VMSMasterEnv->seed2 = (uint32)(18000 * (_VMSMasterEnv->seed2 & 65535) +
231 (_VMSMasterEnv->seed2 >> 16) );
232 return (_VMSMasterEnv->seed1 << 16) + _VMSMasterEnv->seed2;
233 }
237 /*Busy-wait for a random number of cycles -- chooses number of cycles
238 * differently than for the too-many-tries-to-get-lock backoff
239 */
240 inline void
241 doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1,
242 uint32 *seed2 )
243 { int32 i, waitIterations;
244 volatile double fakeWorkVar; //busy-wait fake work
246 //Get a random number of iterations to busy-wait. The % is a simple
247 // way to set the maximum value that can be generated.
248 waitIterations =
249 randomNumber(seed1, seed2) %
250 (numRepsWithNoWork * numRepsWithNoWork * NUM_CORES);
251 for( i = 0; i < waitIterations; i++ )
252 { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait
253 }
254 }
256 /*Busy-waits for a random number of cycles -- chooses number of cycles
257 * differently than for the no-work backoff
258 */
259 inline void
260 doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1,
261 uint32 *seed2 )
262 { int32 i, waitIterations;
263 volatile double fakeWorkVar; //busy-wait fake work
265 waitIterations =
266 randomNumber(seed1, seed2) %
267 (numTriesToGetLock * GET_LOCK_BACKOFF_WEIGHT);
268 //addToHist( wait_iterations, coreLoopThdParams->wait_iterations_hist );
269 for( i = 0; i < waitIterations; i++ )
270 { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait
271 }
272 }
275 #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
277 //===========================================================================
278 /*This sequential version does the same as threaded, except doesn't do the
279 * pin-threads part, nor the wait until setup complete and acquire master
280 * lock parts.
281 */
282 void *
283 coreCtlr_Seq( void *paramsIn )
284 {
285 int32 thisCoresIdx;
286 int32 numRepetitionsWithNoWork;
287 SlaveVP *currVP;
288 AnimSlot *currSlot, **animSlots;
289 int32 currSlotIdx;
290 int32 *addrOfMasterLock;
291 SlaveVP *thisCoresMasterVP;
293 //=============== Initializations ===================
294 thisCoresIdx = 0; //sequential version
295 animSlots = _VMSMasterEnv->allAnimSlots[thisCoresIdx];
296 currSlotIdx = 0; //start at slot 0, go up until one empty, then do master
297 numRepetitionsWithNoWork = 0;
298 addrOfMasterLock = &(_VMSMasterEnv->masterLock);
299 thisCoresMasterVP = _VMSMasterEnv->masterVPs[thisCoresIdx];
301 //Assembly that saves addr of label of return instr -- label in assmbly
302 recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt));
305 //====================== The Core Controller ======================
306 while(1)
307 {
308 if( currSlotIdx >= NUM_ANIM_SLOTS ) goto switchToMaster;
309 currSlot = animSlots[ currSlotIdx ];
311 if( ! currSlot->needsSlaveAssigned ) //slot does have slave assigned
312 { numRepetitionsWithNoWork = 0; //reset B2B master count
313 currSlotIdx ++;
314 currVP = currSlot->slaveAssignedToSlot;
315 }
316 else //slot is empty, so switch to master
317 {
318 switchToMaster:
319 currSlotIdx = 0; //doing switch to master, so start over at slot 0
321 currVP = thisCoresMasterVP;
323 MEAS__Capture_Pre_Master_Lock_Point; //back to back because
324 MEAS__Capture_Post_Master_Lock_Point; // sequential version
326 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD )
327 { printf("Lots of reps w/o work\n");
328 exit(0); //if no work, no way to ever get it in sequential!
329 }
330 numRepetitionsWithNoWork += 1;
331 }
333 switchToSlv(currVP); //Slave suspend makes core "return" from this call
334 flushRegisters(); //prevent GCC optimization from doing bad things
336 MEAS__Capture_End_Susp_in_CoreCtlr_ForSys;
338 } //while(1)
339 }
340 #endif