VMS/VMS_Implementations/VMS_impls/VMS__MC_shared_impl

view CoreController.c @ 245:f1267bc7b342

added exceptions, make malloc in WL and App get master lock, added rand num
author Sean Halle <seanhalle@yahoo.com>
date Wed, 30 May 2012 14:23:47 -0700
parents 7ed97c961901
children 4c7414df4f0e
line source
1 /*
2 * Copyright 2010 OpenSourceStewardshipFoundation
3 *
4 * Licensed under BSD
5 */
8 #include "VMS.h"
10 #include <stdlib.h>
11 #include <stdio.h>
12 #include <time.h>
14 #include <pthread.h>
15 #include <sched.h>
17 //===================== Functions local to this file =======================
18 void *terminateCoreController(SlaveVP *currSlv);
20 inline void
21 doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1,
22 uint32 *seed2 );
23 inline void
24 doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1,
25 uint32 *seed2 );
27 //===========================================================================
30 /*The Core Controller is logically "beneath" the masterVP and slave VPs. Its
31 * job is to control which of those VPs the core animates. Any time one of
32 * those VPs suspends, the suspend-primitive switches the core over to
33 * animating the core controller. The core controller then follows a very
34 * basic pattern to choose which VP will get animated next, then switches
35 * the core over to animating that VP. So, all VPs switch the core to
36 * core controller, which then chooses which VP the core animates next.
37 *
38 *The way the core controller decides which VP to switch the core to next is:
39 * 1) There are a number of "animation slots", which the master VP fills up
40 * with slave VPs that are ready to be animated. So, the core controller
41 * just iterates through the animation slots. When the next slot has a
42 * slave VP in it, the core controller switches the core over to animate
43 * that slave.
44 * 2) When the core controller checks a animation slot, and it's empty,
45 * then the controller switches the core over to animating the master VP,
46 * whose job is to find more slave VPs ready, and assign those to
47 * animation slots.
48 *
49 *So, in effect, a animation slot functions as another layer of virtual
50 * processor. A slot has the logical meaning of being an animator that
51 * animates the slave assigned to it. However, the core controller sits
52 * below the slots, and sequences down them, assigning the actual physical
53 * core to each slot, in turn.
54 *The reason for having the animation slots and core controller is to
55 * amortize the overhead of switching to the master VP and running it. With
56 * multiple animation slots, the time to switch-to-master and the code in
57 * the animation master is divided by the number of animation slots.
58 *The core controller and animation slots are not fundamental parts of VMS,
59 * but rather optimizations put into the shared-semantic-state version of
60 * VMS. Other versions of VMS will not have a core controller nor scheduling
61 * slots.
62 *
63 *The core controller "owns" the physical core, in effect, and is the
64 * function given to the pthread's creation call. Hence, it contains code
65 * related to pthread startup, synchronizing the controllers to all start
66 * at the same time-point, and pinning the pthreads to physical cores.
67 *
68 */
69 void *
70 coreController( void *paramsIn )
71 {
72 int32 thisCoresIdx;
73 int32 numRepetitionsWithNoWork;
74 SlaveVP *currVP;
75 AnimSlot *currSlot, **animSlots;
76 int32 currSlotIdx;
77 volatile int32 *addrOfMasterLock; //thing pointed to is volatile, not ptr
78 SlaveVP *thisCoresMasterVP;
79 //Variables used for pthread related things
80 ThdParams *thisCoresThdParams;
81 cpu_set_t coreMask; //used during pinning pthread to CPU core
82 int32 errorCode;
83 //Variables used during measurements
84 TSCountLowHigh endSusp;
85 //Variables used in random-backoff, for master-lock and waiting for work
86 uint32_t seed1 = rand()%1000; // init random number generator for backoffs
87 uint32_t seed2 = rand()%1000;
90 //=============== Initializations ===================
91 thisCoresThdParams = (ThdParams *)paramsIn;
92 thisCoresIdx = thisCoresThdParams->coreNum;
94 //Assembly that saves addr of label of return instr -- label in assmbly
95 recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt));
97 animSlots = _VMSMasterEnv->allAnimSlots[thisCoresIdx];
98 currSlotIdx = 0; //start at slot 0, go up until one empty, then do master
99 numRepetitionsWithNoWork = 0;
100 addrOfMasterLock = &(_VMSMasterEnv->masterLock);
101 thisCoresMasterVP = _VMSMasterEnv->masterVPs[thisCoresIdx];
103 //==================== pthread related stuff ======================
104 //pin the pthread to the core -- takes away Linux control
105 //Linux requires pinning to be done inside the thread-function
106 //Designate a core by a 1 in bit-position corresponding to the core
107 CPU_ZERO(&coreMask); //initialize mask bits to zero
108 CPU_SET(thisCoresThdParams->coreNum,&coreMask); //set bit repr the coreNum
109 pthread_t selfThd = pthread_self();
110 errorCode =
111 pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
112 if(errorCode){ printf("\n pinning thd to core failed \n"); exit(0); }
114 //make sure the controllers all start at same time, by making them wait
115 pthread_mutex_lock( &suspendLock );
116 while( !(_VMSMasterEnv->setupComplete) )
117 { pthread_cond_wait( &suspendCond, &suspendLock );
118 }
119 pthread_mutex_unlock( &suspendLock );
121 HOLISTIC__CoreCtrl_Setup;
123 DEBUG__printf1(TRUE, "started coreCtrlr", thisCoresIdx );
125 //====================== The Core Controller ======================
126 while(1) //An endless loop is just one way of doing the control structure
127 { //Assembly code switches the core between animating a VP and
128 // animating this core controller. The switch is done by
129 // changing the stack-pointer and frame-pointer and then doing
130 // an assembly jmp. When reading this code, the effect is
131 // that the "switchToSlv()" at the end of the loop is sort of a
132 // "warp in time" -- the core disappears inside this, jmps to
133 // animating a VP, and when that VP suspends, the suspend
134 // jmps back. This has the effect of "returning" from the
135 // switchToSlv() call. Then control loops back to here.
136 //Alternatively, the VP suspend primitive could just not bother
137 // returning from switchToSlv, and instead jmp directly to here.
139 if( currSlotIdx >= NUM_ANIM_SLOTS ) goto switchToMaster;
140 currSlot = animSlots[ currSlotIdx ];
142 if( ! currSlot->needsSlaveAssigned ) //slot does have slave assigned
143 { if(currSlot->slaveAssignedToSlot->typeOfVP == Idle){
144 numRepetitionsWithNoWork ++;
145 } else {
146 numRepetitionsWithNoWork = 0; //reset back2back master count
147 }
148 currSlotIdx ++;
149 currVP = currSlot->slaveAssignedToSlot;
150 HOLISTIC__Record_last_work;
151 }
152 else //slot is empty, so switch to master
153 {
154 switchToMaster:
155 currSlotIdx = 0; //doing switch to master, so start over at slot 0
156 currVP = NULL;
158 MEAS__Capture_Pre_Master_Lock_Point;
159 HOLISTIC__Record_AppResponderInvocation_start;
161 int numTriesToGetLock = 0; int gotLock = 0;
162 while( currVP == NULL ) //keep going until get master lock
163 {
164 //At this point, first thing to do is get lock. But, want to
165 // reduce lock contention from cores with no work, so first
166 // check if this is a core with no work, and busy wait if so.
167 //Then, if it's been way too long without work, yield pthread
168 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF)
169 doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 );
170 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD )
171 { numRepetitionsWithNoWork = 0; pthread_yield(); }
174 //Now, try to get the lock
175 gotLock = __sync_bool_compare_and_swap( addrOfMasterLock,
176 UNLOCKED, LOCKED );
177 if( gotLock )
178 { //At this point, have run out of slaves, so tried to get
179 // the master lock, and have successfully gotten it.
180 //So, set the currVP to this core's masterVP and break out
181 // of the get-lock loop. Below, assembly code will switch
182 // the core over to animating the masterVP. When it's
183 // done, the masterVP will use assembly to switch the core
184 // back to animating this core controller
185 currVP = thisCoresMasterVP;
186 numRepetitionsWithNoWork += 1;
187 break; //end while -- have a VP to animate now
188 }
189 //Get here only when failed to get lock
191 numTriesToGetLock++; //if too many, means too much contention
192 if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF )
193 doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 );
194 if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD )
195 { numTriesToGetLock = 0; pthread_yield(); }
196 }
197 MEAS__Capture_Post_Master_Lock_Point;
198 }
200 HOLISTIC__Record_Work_start;
202 switchToSlv(currVP); //Slave suspend makes core "return" from this call
203 flushRegisters(); //prevent GCC optimization from doing bad things
205 HOLISTIC__Record_Work_end;
207 MEAS__Capture_End_Susp_in_CoreCtlr_ForSys;
209 }//while(1)
210 }
212 /*Shutdown of VMS involves several steps, of which this is the last. This
213 * function is jumped to from the asmTerminateCoreCtrl, which is in turn
214 * called from endOSThreadFn, which is the top-level-fn of the shutdown
215 * slaves.
216 */
217 void *
218 terminateCoreCtlr(SlaveVP *currSlv)
219 {
220 //first, free shutdown Slv that jumped here, then end the pthread
221 VMS_int__dissipate_slaveVP( currSlv );
222 pthread_exit( NULL );
223 }
226 /*Busy-wait for a random number of cycles -- chooses number of cycles
227 * differently than for the too-many-tries-to-get-lock backoff
228 */
229 inline void
230 doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1,
231 uint32 *seed2 )
232 { int32 i, waitIterations;
233 volatile double fakeWorkVar; //busy-wait fake work
235 //Get a random number of iterations to busy-wait. The % is a simple
236 // way to set the maximum value that can be generated.
237 waitIterations =
238 randomNumber(seed1, seed2) %
239 (numRepsWithNoWork * numRepsWithNoWork * NUM_CORES);
240 for( i = 0; i < waitIterations; i++ )
241 { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait
242 }
243 }
245 /*Busy-waits for a random number of cycles -- chooses number of cycles
246 * differently than for the no-work backoff
247 */
248 inline void
249 doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1,
250 uint32 *seed2 )
251 { int32 i, waitIterations;
252 volatile double fakeWorkVar; //busy-wait fake work
254 waitIterations =
255 randomNumber(seed1, seed2) %
256 (numTriesToGetLock * GET_LOCK_BACKOFF_WEIGHT);
257 //addToHist( wait_iterations, coreLoopThdParams->wait_iterations_hist );
258 for( i = 0; i < waitIterations; i++ )
259 { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait
260 }
261 }
264 #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
266 //===========================================================================
267 /*This sequential version does the same as threaded, except doesn't do the
268 * pin-threads part, nor the wait until setup complete and acquire master
269 * lock parts.
270 */
271 void *
272 coreCtlr_Seq( void *paramsIn )
273 {
274 int32 thisCoresIdx;
275 int32 numRepetitionsWithNoWork;
276 SlaveVP *currVP;
277 AnimSlot *currSlot, **animSlots;
278 int32 currSlotIdx;
279 int32 *addrOfMasterLock;
280 SlaveVP *thisCoresMasterVP;
282 //=============== Initializations ===================
283 thisCoresIdx = 0; //sequential version
284 animSlots = _VMSMasterEnv->allAnimSlots[thisCoresIdx];
285 currSlotIdx = 0; //start at slot 0, go up until one empty, then do master
286 numRepetitionsWithNoWork = 0;
287 addrOfMasterLock = &(_VMSMasterEnv->masterLock);
288 thisCoresMasterVP = _VMSMasterEnv->masterVPs[thisCoresIdx];
290 //Assembly that saves addr of label of return instr -- label in assmbly
291 recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt));
294 //====================== The Core Controller ======================
295 while(1)
296 {
297 if( currSlotIdx >= NUM_ANIM_SLOTS ) goto switchToMaster;
298 currSlot = animSlots[ currSlotIdx ];
300 if( ! currSlot->needsSlaveAssigned ) //slot does have slave assigned
301 { numRepetitionsWithNoWork = 0; //reset B2B master count
302 currSlotIdx ++;
303 currVP = currSlot->slaveAssignedToSlot;
304 }
305 else //slot is empty, so switch to master
306 {
307 switchToMaster:
308 currSlotIdx = 0; //doing switch to master, so start over at slot 0
310 currVP = thisCoresMasterVP;
312 MEAS__Capture_Pre_Master_Lock_Point; //back to back because
313 MEAS__Capture_Post_Master_Lock_Point; // sequential version
315 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD )
316 { printf("Lots of reps w/o work\n");
317 exit(0); //if no work, no way to ever get it in sequential!
318 }
319 numRepetitionsWithNoWork += 1;
320 }
322 switchToSlv(currVP); //Slave suspend makes core "return" from this call
323 flushRegisters(); //prevent GCC optimization from doing bad things
325 MEAS__Capture_End_Susp_in_CoreCtlr_ForSys;
327 } //while(1)
328 }
329 #endif