Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 222:c88ce1db91ef Common_Ancestor
Compiles, but does not run properly -- and changed MasterLoop to SchedulingMaster
author | Some Random Person <seanhalle@yahoo.com> |
---|---|
date | Tue, 13 Mar 2012 10:02:06 -0700 |
parents | 8059fb8d5465 |
children | b0b93147adfb |
files | CoreController.c CoreLoop.c Defines/VMS_defs__MEAS.h Defines/VMS_defs__turn_on_and_off.h MasterLoop.c Probes/probes.c Probes/probes.h SchedulingMaster.c VMS.h VMS__startup_and_shutdown.c VMS_primitive_data_types.h vmalloc.c |
diffstat | 12 files changed, 703 insertions(+), 699 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/CoreController.c Tue Mar 13 10:02:06 2012 -0700 1.3 @@ -0,0 +1,333 @@ 1.4 +/* 1.5 + * Copyright 2010 OpenSourceStewardshipFoundation 1.6 + * 1.7 + * Licensed under BSD 1.8 + */ 1.9 + 1.10 + 1.11 +#include "VMS.h" 1.12 + 1.13 +#include <stdlib.h> 1.14 +#include <stdio.h> 1.15 +#include <time.h> 1.16 + 1.17 +#include <pthread.h> 1.18 +#include <sched.h> 1.19 + 1.20 +//===================== Functions local to this file ======================= 1.21 +void *terminateCoreController(SlaveVP *currSlv); 1.22 +inline void 1.23 +doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1, 1.24 + uint32 *seed2 ); 1.25 +inline void 1.26 +doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1, 1.27 + uint32 *seed2 ); 1.28 + 1.29 +//=========================================================================== 1.30 + 1.31 + 1.32 +/*The Core Controller is logically "beneath" the masterVP and slave VPs. Its 1.33 + * job is to control which of those VPs the core animates. Any time one of 1.34 + * those VPs suspends, the suspend-primitive switches the core over to 1.35 + * animating the core controller. The core controller then follows a very 1.36 + * basic pattern to choose which VP will get animated next, then switches 1.37 + * the core over to animating that VP. So, all VPs switch the core to 1.38 + * core controller, which then chooses which VP the core animates next. 1.39 + * 1.40 + *The way the core controller decides which VP to switch the core to next is: 1.41 + * 1) There are a number of "scheduling slots", which the master VP fills up 1.42 + * with slave VPs that are ready to be animated. So, the core controller 1.43 + * just iterates through the scheduling slots. When the next slot has a 1.44 + * slave VP in it, the core controller switches the core over to animate 1.45 + * that slave. 1.46 + * 2) When the core controller checks a scheduling slot, and it's empty, 1.47 + * then the controller switches the core over to animating the master VP, 1.48 + * whose job is to find more slave VPs ready, and assign those to 1.49 + * scheduling slots. 1.50 + * 1.51 + *So, in effect, a scheduling slot functions as another layer of virtual 1.52 + * processor. A slot has the logical meaning of being an animator that 1.53 + * animates the slave assigned to it. However, the core controller sits 1.54 + * below the slots, and sequences down them, assigning the actual physical 1.55 + * core to each slot, in turn. 1.56 + *The reason for having the scheduling slots and core controller is to 1.57 + * amortize the overhead of switching to the master VP and running it. With 1.58 + * multiple scheduling slots, the time to switch-to-master and the code in 1.59 + * the master loop is divided by the number of scheduling slots. 1.60 + *The core controller and scheduling slots are not fundamental parts of VMS, 1.61 + * but rather optimizations put into the shared-semantic-state version of 1.62 + * VMS. Other versions of VMS will not have a core controller nor scheduling 1.63 + * slots. 1.64 + * 1.65 + *The core controller "owns" the physical core, in effect, and is the 1.66 + * function given to the pthread creation call. Hence, it contains code 1.67 + * related to pthread startup, synchronizing the controllers to all start 1.68 + * at the same time-point, and pinning the pthreads to physical cores. 1.69 + * 1.70 + */ 1.71 +void * 1.72 +coreController( void *paramsIn ) 1.73 + { 1.74 + int32 thisCoresIdx; 1.75 + int32 numRepetitionsWithNoWork; 1.76 + SlaveVP *currVP; 1.77 + SchedSlot *currSlot, **schedSlots; 1.78 + int32 currSlotIdx; 1.79 + volatile int32 *addrOfMasterLock; //thing pointed to is volatile, not ptr 1.80 + SlaveVP *thisCoresMasterVP; 1.81 + //Variables used for pthread related things 1.82 + ThdParams *coreCtlrThdParams; 1.83 + cpu_set_t coreMask; //used during pinning pthread to CPU core 1.84 + int32 errorCode; 1.85 + //Variables used during measurements 1.86 + TSCountLowHigh endSusp; 1.87 + //Variables used in random-backoff, for master-lock and waiting for work 1.88 + uint32_t seed1 = rand()%1000; // init random number generator for retries 1.89 + uint32_t seed2 = rand()%1000; 1.90 + //Variable for work-stealing -- a gate protects a critical section 1.91 + volatile GateStruc gate; //on stack to avoid false-sharing 1.92 + 1.93 + 1.94 + //=============== Initializations =================== 1.95 + coreCtlrThdParams = (ThdParams *)paramsIn; 1.96 + thisCoresIdx = coreCtlrThdParams->coreNum; 1.97 + 1.98 + gate.gateClosed = FALSE; 1.99 + gate.preGateProgress = 0; 1.100 + gate.waitProgress = 0; 1.101 + gate.exitProgress = 0; 1.102 + //TODO: pad these to prevent false-sharing, and fix the race at startup 1.103 + _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate; 1.104 + 1.105 + //Assembly that saves addr of label of return instr -- label in assmbly 1.106 + recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt)); 1.107 + 1.108 + schedSlots = _VMSMasterEnv->allSchedSlots[thisCoresIdx]; 1.109 + currSlotIdx = 0; //start at slot 0, go up until one empty, then do master 1.110 + numRepetitionsWithNoWork = 0; 1.111 + addrOfMasterLock = &(_VMSMasterEnv->masterLock); 1.112 + thisCoresMasterVP = _VMSMasterEnv->masterVPs[thisCoresIdx]; 1.113 + 1.114 + //==================== pthread related stuff ====================== 1.115 + //pin the pthread to the core 1.116 + //Linux requires pinning to be done inside the thread-function 1.117 + //Designate a core by a 1 in bit-position corresponding to the core 1.118 + CPU_ZERO(&coreMask); //initialize mask bits to zero 1.119 + CPU_SET(coreCtlrThdParams->coreNum,&coreMask); //set bit repr the coreNum 1.120 + pthread_t selfThd = pthread_self(); 1.121 + errorCode = 1.122 + pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); 1.123 + if(errorCode){ printf("\n pinning thd to core failed \n"); exit(0); } 1.124 + 1.125 + //make sure the controllers all start at same time, by making them wait 1.126 + pthread_mutex_lock( &suspendLock ); 1.127 + while( !(_VMSMasterEnv->setupComplete) ) 1.128 + { pthread_cond_wait( &suspendCond, &suspendLock ); 1.129 + } 1.130 + pthread_mutex_unlock( &suspendLock ); 1.131 + 1.132 + //====================== The Core Controller ====================== 1.133 + while(1) //An endless loop is just one way of doing the control structure 1.134 + { //Assembly code switches the core between animating a VP and 1.135 + // animating this core controller. The switch is done by 1.136 + // changing the stack-pointer and frame-pointer and then doing 1.137 + // an assembly jmp. When reading this code, the effect is 1.138 + // that the "switchToSlv()" at the end of the loop is sort of a 1.139 + // "warp in time" -- the core disappears inside this, jmps to 1.140 + // animating a VP, and when that VP suspends, the suspend 1.141 + // jmps back. This has the effect of "returning" from the 1.142 + // switchToSlv() call. Then control loops back to here. 1.143 + //Alternatively, the VP suspend primitive could just not bother 1.144 + // returning from switchToSlv, and instead jmp directly to here. 1.145 + 1.146 + if( currSlotIdx >= NUM_SCHED_SLOTS ) goto switchToMaster; 1.147 + currSlot = schedSlots[ currSlotIdx ]; 1.148 + 1.149 + 1.150 + if( ! currSlot->needsSlaveAssigned ) //slot does have slave assigned 1.151 + { numRepetitionsWithNoWork = 0; //reset B2B master count 1.152 + currSlotIdx ++; 1.153 + currVP = currSlot->slaveAssignedToSlot; 1.154 + } 1.155 + else //slot is empty, so switch to master 1.156 + { 1.157 + switchToMaster: 1.158 + currSlotIdx = 0; //doing switch to master, so start over at slot 0 1.159 + currVP = NULL; 1.160 + 1.161 + MEAS__Capture_Pre_Master_Lock_Point; 1.162 + 1.163 + int numTriesToGetLock = 0; int gotLock = 0; 1.164 + while( currVP == NULL ) //keep going until get master lock 1.165 + { 1.166 + //At this point, first thing to do is get lock. But, want to 1.167 + // reduce lock contention from cores with no work, so first 1.168 + // check if this is a core with no work, and busy wait if so. 1.169 + //Then, if it's been way too long without work, yield pthread 1.170 + if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF) 1.171 + doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 ); 1.172 + if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD ) 1.173 + { numRepetitionsWithNoWork = 0; pthread_yield(); } 1.174 + 1.175 + 1.176 + //Now, try to get the lock 1.177 + gotLock = __sync_bool_compare_and_swap( addrOfMasterLock, 1.178 + UNLOCKED, LOCKED ); 1.179 + if( gotLock ) 1.180 + { //At this point, have run out of slaves, so tried to get 1.181 + // the master lock, and have successfully gotten it. 1.182 + //So, set the currVP to this core's masterVP and break out 1.183 + // of the get-lock loop. Below, assembly code will switch 1.184 + // the core over to animating the masterVP. When it's 1.185 + // done, the masterVP will use assembly to switch the core 1.186 + // back to animating this core controller 1.187 + currVP = thisCoresMasterVP; 1.188 + numRepetitionsWithNoWork += 1; 1.189 + break; //end while -- have a VP to animate now 1.190 + } 1.191 + //Get here only when failed to get lock 1.192 + 1.193 + numTriesToGetLock++; //if too many, means too much contention 1.194 + if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF ) 1.195 + doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 ); 1.196 + if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD ) 1.197 + { numTriesToGetLock = 0; pthread_yield(); } 1.198 + } 1.199 + MEAS__Capture_Post_Master_Lock_Point; 1.200 + } 1.201 + 1.202 + 1.203 + switchToSlv(currVP); //Slave suspend makes core "return" from this call 1.204 + flushRegisters(); //prevent GCC optimization from doing bad things 1.205 + 1.206 + MEAS__Capture_End_Susp_in_CoreCtlr_ForSys; 1.207 + 1.208 + }//while(1) 1.209 + } 1.210 + 1.211 + 1.212 +void * 1.213 +terminateCoreCtlr(SlaveVP *currSlv) 1.214 + { 1.215 + //first free shutdown Slv that jumped here -- it first restores the 1.216 + // coreloop's stack, so addr of currSlv in stack frame is still correct 1.217 + VMS_int__dissipate_slaveVP( currSlv ); 1.218 + pthread_exit( NULL ); 1.219 + } 1.220 + 1.221 + 1.222 +/*Used by the backoff to pick a random amount of busy-wait. Can't use the 1.223 + * system rand because it takes much too long. 1.224 + *Note, are passing pointers to the seeds, which are then modified 1.225 + */ 1.226 +inline uint32_t 1.227 +randomNumber(uint32_t* seed1, uint32_t* seed2) 1.228 + { 1.229 + *seed1 = 36969 * (*seed1 & 65535) + (*seed1 >> 16); 1.230 + *seed2 = 18000 * (*seed2 & 65535) + (*seed2 >> 16); 1.231 + return (*seed1 << 16) + *seed2; 1.232 + } 1.233 + 1.234 +/*Busy-wait for a random number of cycles -- chooses number of cycles 1.235 + * differently than for the too-many-tries-to-get-lock backoff 1.236 + */ 1.237 +inline void 1.238 +doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1, 1.239 + uint32 *seed2 ) 1.240 + { int32 i, waitIterations; 1.241 + volatile double fakeWorkVar; //busy-wait fake work 1.242 + 1.243 + waitIterations = 1.244 + randomNumber(seed1, seed2) % 1.245 + (numRepsWithNoWork * numRepsWithNoWork * NUM_CORES); 1.246 + for( i = 0; i < waitIterations; i++ ) 1.247 + { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait 1.248 + } 1.249 + } 1.250 + 1.251 +/*Busy-waits for a random number of cycles -- chooses number of cycles 1.252 + * differently than for the no-work backoff 1.253 + */ 1.254 +inline void 1.255 +doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1, 1.256 + uint32 *seed2 ) 1.257 + { int32 i, waitIterations; 1.258 + volatile double fakeWorkVar; //busy-wait fake work 1.259 + 1.260 + waitIterations = 1.261 + randomNumber(seed1, seed2) % 1.262 + (numTriesToGetLock * NUM_TRIES_TO_GET_LOCK_BACKOFF_WEIGHT); 1.263 + //addToHist( wait_iterations, coreLoopThdParams->wait_iterations_hist ); 1.264 + for( i = 0; i < waitIterations; i++ ) 1.265 + { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait 1.266 + } 1.267 + } 1.268 + 1.269 + 1.270 +#ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE 1.271 + 1.272 +//=========================================================================== 1.273 +/*This sequential version does the same as threaded, except doesn't do the 1.274 + * pin-threads part, nor the wait until setup complete and acquire master 1.275 + * lock parts. 1.276 + */ 1.277 +void * 1.278 +coreCtlr_Seq( void *paramsIn ) 1.279 + { 1.280 + int32 thisCoresIdx; 1.281 + int32 numRepetitionsWithNoWork; 1.282 + SlaveVP *currVP; 1.283 + SchedSlot *currSlot, **schedSlots; 1.284 + int32 currSlotIdx; 1.285 + int32 *addrOfMasterLock; 1.286 + SlaveVP *thisCoresMasterVP; 1.287 + 1.288 + //=============== Initializations =================== 1.289 + schedSlots = _VMSMasterEnv->allSchedSlots[thisCoresIdx]; 1.290 + currSlotIdx = 0; //start at slot 0, go up until one empty, then do master 1.291 + numRepetitionsWithNoWork = 0; 1.292 + addrOfMasterLock = &(_VMSMasterEnv->masterLock); 1.293 + thisCoresMasterVP = _VMSMasterEnv->masterVPs[thisCoresIdx]; 1.294 + 1.295 + thisCoresIdx = 0; //sequential version 1.296 + 1.297 + //Assembly that saves addr of label of return instr -- label in assmbly 1.298 + recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt)); 1.299 + 1.300 + 1.301 + //====================== The Core Controller ====================== 1.302 + while(1) 1.303 + { 1.304 + if( currSlotIdx >= NUM_SCHED_SLOTS ) goto switchToMaster; 1.305 + currSlot = schedSlots[ currSlotIdx ]; 1.306 + 1.307 + if( ! currSlot->needsSlaveAssigned ) //slot does have slave assigned 1.308 + { numRepetitionsWithNoWork = 0; //reset B2B master count 1.309 + currSlotIdx ++; 1.310 + currVP = currSlot->slaveAssignedToSlot; 1.311 + } 1.312 + else //slot is empty, so switch to master 1.313 + { 1.314 + switchToMaster: 1.315 + currSlotIdx = 0; //doing switch to master, so start over at slot 0 1.316 + 1.317 + currVP = thisCoresMasterVP; 1.318 + 1.319 + MEAS__Capture_Pre_Master_Lock_Point; //back to back because 1.320 + MEAS__Capture_Post_Master_Lock_Point; // sequential version 1.321 + 1.322 + if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD ) 1.323 + { printf("Lots of reps w/o work\n"); 1.324 + exit(0); //if no work, no way to ever get it in sequential! 1.325 + } 1.326 + numRepetitionsWithNoWork += 1; 1.327 + } 1.328 + 1.329 + switchToSlv(currVP); //Slave suspend makes core "return" from this call 1.330 + flushRegisters(); //prevent GCC optimization from doing bad things 1.331 + 1.332 + MEAS__Capture_End_Susp_in_CoreCtlr_ForSys; 1.333 + 1.334 + } //while(1) 1.335 + } 1.336 +#endif
2.1 --- a/CoreLoop.c Mon Mar 12 05:38:07 2012 -0700 2.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 2.3 @@ -1,332 +0,0 @@ 2.4 -/* 2.5 - * Copyright 2010 OpenSourceStewardshipFoundation 2.6 - * 2.7 - * Licensed under BSD 2.8 - */ 2.9 - 2.10 - 2.11 -#include "VMS.h" 2.12 - 2.13 -#include <stdlib.h> 2.14 -#include <stdio.h> 2.15 -#include <time.h> 2.16 - 2.17 -#include <pthread.h> 2.18 -#include <sched.h> 2.19 - 2.20 -//===================== Functions local to this file ======================= 2.21 -void *terminateCoreController(SlaveVP *currSlv); 2.22 -inline void 2.23 -doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1, 2.24 - uint32 *seed2 ); 2.25 -inline void 2.26 -doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1, 2.27 - uint32 *seed2 ); 2.28 -//=========================================================================== 2.29 - 2.30 - 2.31 -/*The Core Controller is logically "beneath" the masterVP and slave VPs. Its 2.32 - * job is to control which of those VPs the core animates. Any time one of 2.33 - * those VPs suspends, the suspend-primitive switches the core over to 2.34 - * animating the core controller. The core controller then follows a very 2.35 - * basic pattern to choose which VP will get animated next, then switches 2.36 - * the core over to animating that VP. So, all VPs switch the core to 2.37 - * core controller, which then chooses which VP the core animates next. 2.38 - * 2.39 - *The way the core controller decides which VP to switch the core to next is: 2.40 - * 1) There are a number of "scheduling slots", which the master VP fills up 2.41 - * with slave VPs that are ready to be animated. So, the core controller 2.42 - * just iterates through the scheduling slots. When the next slot has a 2.43 - * slave VP in it, the core controller switches the core over to animate 2.44 - * that slave. 2.45 - * 2) When the core controller checks a scheduling slot, and it's empty, 2.46 - * then the controller switches the core over to animating the master VP, 2.47 - * whose job is to find more slave VPs ready, and assign those to 2.48 - * scheduling slots. 2.49 - * 2.50 - *So, in effect, a scheduling slot functions as another layer of virtual 2.51 - * processor. A slot has the logical meaning of being an animator that 2.52 - * animates the slave assigned to it. However, the core controller sits 2.53 - * below the slots, and sequences down them, assigning the actual physical 2.54 - * core to each slot, in turn. 2.55 - *The reason for having the scheduling slots and core controller is to 2.56 - * amortize the overhead of switching to the master VP and running it. With 2.57 - * multiple scheduling slots, the time to switch-to-master and the code in 2.58 - * the master loop is divided by the number of scheduling slots. 2.59 - *The core controller and scheduling slots are not fundamental parts of VMS, 2.60 - * but rather optimizations put into the shared-semantic-state version of 2.61 - * VMS. Other versions of VMS will not have a core controller nor scheduling 2.62 - * slots. 2.63 - * 2.64 - *The core controller "owns" the physical core, in effect, and is the 2.65 - * function given to the pthread creation call. Hence, it contains code 2.66 - * related to pthread startup, synchronizing the controllers to all start 2.67 - * at the same time-point, and pinning the pthreads to physical cores. 2.68 - * 2.69 - */ 2.70 -void * 2.71 -coreController( void *paramsIn ) 2.72 - { 2.73 - int32 thisCoresIdx; 2.74 - int32 numRepetitionsWithNoWork; 2.75 - SlaveVP *currVP; 2.76 - SchedSlot *currSlot, **schedSlots; 2.77 - int32 currSlotIdx; 2.78 - volatile int32 *addrOfMasterLock; //thing pointed to is volatile, not ptr 2.79 - SlaveVP *thisCoresMasterVP; 2.80 - //Variables used for pthread related things 2.81 - ThdParams *coreCtlrThdParams; 2.82 - cpu_set_t coreMask; //used during pinning pthread to CPU core 2.83 - int32 errorCode; 2.84 - //Variables used during measurements 2.85 - TSCountLowHigh endSusp; 2.86 - //Variables used in random-backoff, for master-lock and waiting for work 2.87 - uint32_t seed1 = rand()%1000; // init random number generator for retries 2.88 - uint32_t seed2 = rand()%1000; 2.89 - //Variable for work-stealing -- a gate protects a critical section 2.90 - volatile GateStruc gate; //on stack to avoid false-sharing 2.91 - 2.92 - 2.93 - //=============== Initializations =================== 2.94 - coreCtlrThdParams = (ThdParams *)paramsIn; 2.95 - thisCoresIdx = coreCtlrThdParams->coreNum; 2.96 - 2.97 - gate.gateClosed = FALSE; 2.98 - gate.preGateProgress = 0; 2.99 - gate.waitProgress = 0; 2.100 - gate.exitProgress = 0; 2.101 - //TODO: pad these to prevent false-sharing, and fix the race at startup 2.102 - _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate; 2.103 - 2.104 - //Assembly that saves addr of label of return instr -- label in assmbly 2.105 - recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt)); 2.106 - 2.107 - schedSlots = _VMSMasterEnv->allSchedSlots[thisCoresIdx]; 2.108 - currSlotIdx = 0; //start at slot 0, go up until one empty, then do master 2.109 - numRepetitionsWithNoWork = 0; 2.110 - addrOfMasterLock = &(_VMSMasterEnv->masterLock); 2.111 - thisCoresMasterVP = _VMSMasterEnv->masterVPs[thisCoresIdx]; 2.112 - 2.113 - //==================== pthread related stuff ====================== 2.114 - //pin the pthread to the core 2.115 - //Linux requires pinning to be done inside the thread-function 2.116 - //Designate a core by a 1 in bit-position corresponding to the core 2.117 - CPU_ZERO(&coreMask); //initialize mask bits to zero 2.118 - CPU_SET(coreCtlrThdParams->coreNum,&coreMask); //set bit repr the coreNum 2.119 - pthread_t selfThd = pthread_self(); 2.120 - errorCode = 2.121 - pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); 2.122 - if(errorCode){ printf("\n pinning thd to core failed \n"); exit(0); } 2.123 - 2.124 - //make sure the controllers all start at same time, by making them wait 2.125 - pthread_mutex_lock( &suspendLock ); 2.126 - while( !(_VMSMasterEnv->setupComplete) ) 2.127 - { pthread_cond_wait( &suspendCond, &suspendLock ); 2.128 - } 2.129 - pthread_mutex_unlock( &suspendLock ); 2.130 - 2.131 - //====================== The Core Controller ====================== 2.132 - while(1) //An endless loop is just one way of doing the control structure 2.133 - { //Assembly code switches the core between animating a VP and 2.134 - // animating this core controller. The switch is done by 2.135 - // changing the stack-pointer and frame-pointer and then doing 2.136 - // an assembly jmp. When reading this code, the effect is 2.137 - // that the "switchToSlv()" at the end of the loop is sort of a 2.138 - // "warp in time" -- the core disappears inside this, jmps to 2.139 - // animating a VP, and when that VP suspends, the suspend 2.140 - // jmps back. This has the effect of "returning" from the 2.141 - // switchToSlv() call. Then control loops back to here. 2.142 - //Alternatively, the VP suspend primitive could just not bother 2.143 - // returning from switchToSlv, and instead jmp directly to here. 2.144 - 2.145 - if( currSlotIdx >= NUM_SCHED_SLOTS ) goto switchToMaster; 2.146 - currSlot = schedSlots[ currSlotIdx ]; 2.147 - 2.148 - 2.149 - if( ! currSlot->needsSlaveAssigned ) //slot does have slave assigned 2.150 - { numRepetitionsWithNoWork = 0; //reset B2B master count 2.151 - currSlotIdx ++; 2.152 - currVP = currSlot->slaveAssignedToSlot; 2.153 - } 2.154 - else //slot is empty, so switch to master 2.155 - { 2.156 - switchToMaster: 2.157 - currSlotIdx = 0; //doing switch to master, so start over at slot 0 2.158 - currVP = NULL; 2.159 - 2.160 - MEAS__Capture_Pre_Master_Lock_Point; 2.161 - 2.162 - int numTriesToGetLock = 0; int gotLock = 0; 2.163 - while( currVP == NULL ) //keep going until get master lock 2.164 - { 2.165 - //At this point, first thing to do is get lock. But, want to 2.166 - // reduce lock contention from cores with no work, so first 2.167 - // check if this is a core with no work, and busy wait if so. 2.168 - //Then, if it's been way too long without work, yield pthread 2.169 - if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF) 2.170 - doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 ); 2.171 - if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD ) 2.172 - { numRepetitionsWithNoWork = 0; pthread_yield(); } 2.173 - 2.174 - 2.175 - //Now, try to get the lock 2.176 - gotLock = __sync_bool_compare_and_swap( addrOfMasterLock, 2.177 - UNLOCKED, LOCKED ); 2.178 - if( gotLock ) 2.179 - { //At this point, have run out of slaves, so tried to get 2.180 - // the master lock, and have successfully gotten it. 2.181 - //So, set the currVP to this core's masterVP and break out 2.182 - // of the get-lock loop. Below, assembly code will switch 2.183 - // the core over to animating the masterVP. When it's 2.184 - // done, the masterVP will use assembly to switch the core 2.185 - // back to animating this core controller 2.186 - currVP = thisCoresMasterVP; 2.187 - numRepetitionsWithNoWork += 1; 2.188 - break; //end while -- have a VP to animate now 2.189 - } 2.190 - //Get here only when failed to get lock 2.191 - 2.192 - numTriesToGetLock++; //if too many, means too much contention 2.193 - if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF ) 2.194 - doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 ); 2.195 - if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD ) 2.196 - { numTriesToGetLock = 0; pthread_yield(); } 2.197 - } 2.198 - MEAS__Capture_Post_Master_Lock_Point; 2.199 - } 2.200 - 2.201 - 2.202 - switchToSlv(currVP); //Slave suspend makes core "return" from this call 2.203 - flushRegisters(); //prevent GCC optimization from doing bad things 2.204 - 2.205 - MEAS__Capture_End_Susp_in_CoreCtlr_ForSys; 2.206 - 2.207 - }//while(1) 2.208 - } 2.209 - 2.210 - 2.211 -void * 2.212 -terminateCoreController(SlaveVP *currSlv) 2.213 - { 2.214 - //first free shutdown Slv that jumped here -- it first restores the 2.215 - // coreloop's stack, so addr of currSlv in stack frame is still correct 2.216 - VMS_int__dissipate_slaveVP( currSlv ); 2.217 - pthread_exit( NULL ); 2.218 - } 2.219 - 2.220 - 2.221 -/*Used by the backoff to pick a random amount of busy-wait. Can't use the 2.222 - * system rand because it takes much too long. 2.223 - *Note, are passing pointers to the seeds, which are then modified 2.224 - */ 2.225 -inline uint32_t 2.226 -randomNumber(uint32_t* seed1, uint32_t* seed2) 2.227 - { 2.228 - *seed1 = 36969 * (*seed1 & 65535) + (*seed1 >> 16); 2.229 - *seed2 = 18000 * (*seed2 & 65535) + (*seed2 >> 16); 2.230 - return (*seed1 << 16) + *seed2; 2.231 - } 2.232 - 2.233 -/*Busy-wait for a random number of cycles -- chooses number of cycles 2.234 - * differently than for the too-many-tries-to-get-lock backoff 2.235 - */ 2.236 -inline void 2.237 -doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1, 2.238 - uint32 *seed2 ) 2.239 - { int32 i, waitIterations; 2.240 - volatile double fakeWorkVar; //busy-wait fake work 2.241 - 2.242 - waitIterations = 2.243 - randomNumber(seed1, seed2) % 2.244 - (numRepsWithNoWork * numRepsWithNoWork * NUM_CORES); 2.245 - for( i = 0; i < waitIterations; i++ ) 2.246 - { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait 2.247 - } 2.248 - } 2.249 - 2.250 -/*Busy-waits for a random number of cycles -- chooses number of cycles 2.251 - * differently than for the no-work backoff 2.252 - */ 2.253 -inline void 2.254 -doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1, 2.255 - uint32 *seed2 ) 2.256 - { int32 i, waitIterations; 2.257 - volatile double fakeWorkVar; //busy-wait fake work 2.258 - 2.259 - waitIterations = 2.260 - randomNumber(seed1, seed2) % 2.261 - (numTriesToGetLock * NUM_TRIES_TO_GET_LOCK_BACKOFF_WEIGHT); 2.262 - //addToHist( wait_iterations, coreLoopThdParams->wait_iterations_hist ); 2.263 - for( i = 0; i < waitIterations; i++ ) 2.264 - { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait 2.265 - } 2.266 - } 2.267 - 2.268 - 2.269 -#ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE 2.270 - 2.271 -//=========================================================================== 2.272 -/*This sequential version does the same as threaded, except doesn't do the 2.273 - * pin-threads part, nor the wait until setup complete and acquire master 2.274 - * lock parts. 2.275 - */ 2.276 -void * 2.277 -coreCtlr_Seq( void *paramsIn ) 2.278 - { 2.279 - int32 thisCoresIdx; 2.280 - int32 numRepetitionsWithNoWork; 2.281 - SlaveVP *currVP; 2.282 - SchedSlot *currSlot, **schedSlots; 2.283 - int32 currSlotIdx; 2.284 - int32 *addrOfMasterLock; 2.285 - SlaveVP *thisCoresMasterVP; 2.286 - 2.287 - //=============== Initializations =================== 2.288 - schedSlots = _VMSMasterEnv->allSchedSlots[thisCoresIdx]; 2.289 - currSlotIdx = 0; //start at slot 0, go up until one empty, then do master 2.290 - numRepetitionsWithNoWork = 0; 2.291 - addrOfMasterLock = &(_VMSMasterEnv->masterLock); 2.292 - thisCoresMasterVP = _VMSMasterEnv->masterVPs[thisCoresIdx]; 2.293 - 2.294 - thisCoresIdx = 0; //sequential version 2.295 - 2.296 - //Assembly that saves addr of label of return instr -- label in assmbly 2.297 - recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt)); 2.298 - 2.299 - 2.300 - //====================== The Core Controller ====================== 2.301 - while(1) 2.302 - { 2.303 - if( currSlotIdx >= NUM_SCHED_SLOTS ) goto switchToMaster; 2.304 - currSlot = schedSlots[ currSlotIdx ]; 2.305 - 2.306 - if( ! currSlot->needsSlaveAssigned ) //slot does have slave assigned 2.307 - { numRepetitionsWithNoWork = 0; //reset B2B master count 2.308 - currSlotIdx ++; 2.309 - currVP = currSlot->slaveAssignedToSlot; 2.310 - } 2.311 - else //slot is empty, so switch to master 2.312 - { 2.313 - switchToMaster: 2.314 - currSlotIdx = 0; //doing switch to master, so start over at slot 0 2.315 - 2.316 - currVP = thisCoresMasterVP; 2.317 - 2.318 - MEAS__Capture_Pre_Master_Lock_Point; //back to back because 2.319 - MEAS__Capture_Post_Master_Lock_Point; // sequential version 2.320 - 2.321 - if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD ) 2.322 - { printf("Lots of reps w/o work\n"); 2.323 - exit(0); //if no work, no way to ever get it in sequential! 2.324 - } 2.325 - numRepetitionsWithNoWork += 1; 2.326 - } 2.327 - 2.328 - switchToSlv(currVP); //Slave suspend makes core "return" from this call 2.329 - flushRegisters(); //prevent GCC optimization from doing bad things 2.330 - 2.331 - MEAS__Capture_End_Susp_in_CoreCtlr_ForSys; 2.332 - 2.333 - } //while(1) 2.334 - } 2.335 -#endif
3.1 --- a/Defines/VMS_defs__MEAS.h Mon Mar 12 05:38:07 2012 -0700 3.2 +++ b/Defines/VMS_defs__MEAS.h Tue Mar 13 10:02:06 2012 -0700 3.3 @@ -6,8 +6,8 @@ 3.4 * 3.5 */ 3.6 3.7 -#ifndef _VMS_DEFS_MEAS_H 3.8 -#define _VMS_DEFS_MEAS_H 3.9 +#ifndef _VMS_DEFS_MEAS_H 3.10 +#define _VMS_DEFS_MEAS_H 3.11 #define _GNU_SOURCE 3.12 3.13 //================== Macros define types of meas want ===================== 3.14 @@ -321,5 +321,5 @@ 3.15 3.16 3.17 //=========================================================================== 3.18 -#endif /* _VMS_DEFS_H */ 3.19 +#endif /* _VMS_DEFS_MEAS_H */ 3.20
4.1 --- a/Defines/VMS_defs__turn_on_and_off.h Mon Mar 12 05:38:07 2012 -0700 4.2 +++ b/Defines/VMS_defs__turn_on_and_off.h Tue Mar 13 10:02:06 2012 -0700 4.3 @@ -15,7 +15,7 @@ 4.4 * It still does co-routines and all the mechanisms are the same, it just 4.5 * has only a single thread and animates Slvs one at a time 4.6 */ 4.7 -//#define DEBUG__TURN_ON_SEQUENTIAL_MODE 4.8 +#define DEBUG__TURN_ON_SEQUENTIAL_MODE 4.9 4.10 4.11 /*turns on the probe-instrumentation in the application -- when not
5.1 --- a/MasterLoop.c Mon Mar 12 05:38:07 2012 -0700 5.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 5.3 @@ -1,349 +0,0 @@ 5.4 -/* 5.5 - * Copyright 2010 OpenSourceStewardshipFoundation 5.6 - * 5.7 - * Licensed under BSD 5.8 - */ 5.9 - 5.10 - 5.11 - 5.12 -#include <stdio.h> 5.13 -#include <stddef.h> 5.14 - 5.15 -#include "VMS.h" 5.16 - 5.17 - 5.18 -//=========================================================================== 5.19 -void inline 5.20 -stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 5.21 - SlaveVP *masterVP ); 5.22 - 5.23 -//=========================================================================== 5.24 - 5.25 - 5.26 - 5.27 -/*This code is animated by the virtual Master processor. 5.28 - * 5.29 - *Polls each sched slot exactly once, hands any requests made by a newly 5.30 - * done slave to the "request handler" plug-in function 5.31 - * 5.32 - *Any slots that need a Slv assigned are given to the "schedule" 5.33 - * plug-in function, which tries to assign a Slv (slave) to it. 5.34 - * 5.35 - *When all slots needing a processor have been given to the schedule plug-in, 5.36 - * a fraction of the slaves successfully scheduled are put into the 5.37 - * work queue, then a continuation of this function is put in, then the rest 5.38 - * of the Slvs that were successfully scheduled. 5.39 - * 5.40 - *The first thing the continuation does is busy-wait until the previous 5.41 - * animation completes. This is because an (unlikely) continuation may 5.42 - * sneak through queue before previous continuation is done putting second 5.43 - * part of scheduled slaves in, which is the only race condition. 5.44 - * 5.45 - */ 5.46 - 5.47 -/*May 29, 2010 -- birth a Master during init so that first core controller to 5.48 - * start running gets it and does all the stuff for a newly born -- 5.49 - * from then on, will be doing continuation, but do suspension self 5.50 - * directly at end of master loop 5.51 - *So VMS_WL__init just births the master virtual processor same way it births 5.52 - * all the others -- then does any extra setup needed and puts it into the 5.53 - * work queue. 5.54 - *However means have to make masterEnv a global static volatile the same way 5.55 - * did with readyToAnimateQ in core controller. -- for performance, put the 5.56 - * jump to the core controller directly in here, and have it directly jump back. 5.57 - * 5.58 - * 5.59 - *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this 5.60 - * avoids the suspected bug in the system stack that causes bizarre faults 5.61 - * at random places in the system code. 5.62 - * 5.63 - *So, this function is coupled to each of the MasterVPs, -- meaning this 5.64 - * function can't rely on a particular stack and frame -- each MasterVP that 5.65 - * animates this function has a different one. 5.66 - * 5.67 - *At this point, the masterLoop does not write itself into the queue anymore, 5.68 - * instead, the coreCtlr acquires the masterLock when it has nothing to 5.69 - * animate, and then animates its own masterLoop. However, still try to put 5.70 - * several AppSlvs into the queue to amortize the startup cost of switching 5.71 - * to the MasterVP. Note, don't have to worry about latency of requests much 5.72 - * because most requests generate work for same core -- only latency issue 5.73 - * is case when other cores starved and one core's requests generate work 5.74 - * for them -- so keep max in queue to 3 or 4.. 5.75 - */ 5.76 -void masterLoop( void *initData, SlaveVP *animatingSlv ) 5.77 - { 5.78 - int32 slotIdx, numSlotsFilled; 5.79 - SlaveVP *schedSlaveVP; 5.80 - SchedSlot *currSlot, **schedSlots; 5.81 - MasterEnv *masterEnv; 5.82 - VMSQueueStruc *readyToAnimateQ; 5.83 - 5.84 - Sched_Assigner slaveAssigner; 5.85 - RequestHandler requestHandler; 5.86 - void *semanticEnv; 5.87 - 5.88 - int32 thisCoresIdx; 5.89 - SlaveVP *masterVP; 5.90 - volatile SlaveVP *volatileMasterVP; 5.91 - 5.92 - volatileMasterVP = animatingSlv; 5.93 - masterVP = (SlaveVP*)volatileMasterVP; //used to force re-define after jmp 5.94 - 5.95 - //First animation of each MasterVP will in turn animate this part 5.96 - // of setup code.. (Slv creator sets up the stack as if this function 5.97 - // was called normally, but actually get here by jmp) 5.98 - //So, setup values about stack ptr, jmp pt and all that 5.99 - //masterVP->resumeInstrPtr = &&masterLoopStartPt; 5.100 - 5.101 - 5.102 - //Note, got rid of writing the stack and frame ptr up here, because 5.103 - // only one 5.104 - // core can ever animate a given MasterVP, so don't need to communicate 5.105 - // new frame and stack ptr to the MasterVP storage before a second 5.106 - // version of that MasterVP can get animated on a different core. 5.107 - //Also got rid of the busy-wait. 5.108 - 5.109 - 5.110 - //masterLoopStartPt: 5.111 - while(1){ 5.112 - 5.113 - MEAS__Capture_Pre_Master_Point 5.114 - 5.115 - masterEnv = (MasterEnv*)_VMSMasterEnv; 5.116 - 5.117 - //GCC may optimize so doesn't always re-define from frame-storage 5.118 - masterVP = (SlaveVP*)volatileMasterVP; //just to make sure after jmp 5.119 - thisCoresIdx = masterVP->coreAnimatedBy; 5.120 - readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; 5.121 - schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; 5.122 - 5.123 - requestHandler = masterEnv->requestHandler; 5.124 - slaveAssigner = masterEnv->slaveAssigner; 5.125 - semanticEnv = masterEnv->semanticEnv; 5.126 - 5.127 - 5.128 - //Poll each slot's Done flag 5.129 - numSlotsFilled = 0; 5.130 - for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++) 5.131 - { 5.132 - currSlot = schedSlots[ slotIdx ]; 5.133 - 5.134 - if( currSlot->workIsDone ) 5.135 - { 5.136 - currSlot->workIsDone = FALSE; 5.137 - currSlot->needsSlaveAssigned = TRUE; 5.138 - 5.139 - MEAS__startReqHdlr; 5.140 - 5.141 - //process the requests made by the slave (held inside slave struc) 5.142 - (*requestHandler)( currSlot->slaveAssignedToSlot, semanticEnv ); 5.143 - 5.144 - MEAS__endReqHdlr; 5.145 - } 5.146 - if( currSlot->needsSlaveAssigned ) 5.147 - { //give slot a new Slv 5.148 - schedSlaveVP = 5.149 - (*slaveAssigner)( semanticEnv, thisCoresIdx ); 5.150 - 5.151 - if( schedSlaveVP != NULL ) 5.152 - { currSlot->slaveAssignedToSlot = schedSlaveVP; 5.153 - schedSlaveVP->schedSlot = currSlot; 5.154 - currSlot->needsSlaveAssigned = FALSE; 5.155 - numSlotsFilled += 1; 5.156 - } 5.157 - } 5.158 - } 5.159 - 5.160 - 5.161 - #ifdef SYS__TURN_ON_WORK_STEALING 5.162 - //If no slots filled, means no more work, look for work to steal. 5.163 - if( numSlotsFilled == 0 ) 5.164 - { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterVP ); 5.165 - } 5.166 - #endif 5.167 - 5.168 - MEAS__Capture_Post_Master_Point; 5.169 - 5.170 - masterSwitchToCoreCtlr(animatingSlv); 5.171 - flushRegisters(); 5.172 - }//MasterLoop 5.173 - 5.174 - 5.175 - } 5.176 - 5.177 - 5.178 - 5.179 -/*This has a race condition -- the coreloops are accessing their own queues 5.180 - * at the same time that this work-stealer on a different core is trying to 5.181 - */ 5.182 -void inline 5.183 -stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 5.184 - SlaveVP *masterVP ) 5.185 - { 5.186 - SlaveVP *stolenSlv; 5.187 - int32 coreIdx, i; 5.188 - VMSQueueStruc *currQ; 5.189 - 5.190 - stolenSlv = NULL; 5.191 - coreIdx = masterVP->coreAnimatedBy; 5.192 - for( i = 0; i < NUM_CORES -1; i++ ) 5.193 - { 5.194 - if( coreIdx >= NUM_CORES -1 ) 5.195 - { coreIdx = 0; 5.196 - } 5.197 - else 5.198 - { coreIdx++; 5.199 - } 5.200 - currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 5.201 - if( numInVMSQ( currQ ) > 0 ) 5.202 - { stolenSlv = readVMSQ (currQ ); 5.203 - break; 5.204 - } 5.205 - } 5.206 - 5.207 - if( stolenSlv != NULL ) 5.208 - { currSlot->slaveAssignedToSlot = stolenSlv; 5.209 - stolenSlv->schedSlot = currSlot; 5.210 - currSlot->needsSlaveAssigned = FALSE; 5.211 - 5.212 - writeVMSQ( stolenSlv, readyToAnimateQ ); 5.213 - } 5.214 - } 5.215 - 5.216 -/*This algorithm makes the common case fast. Make the coreloop passive, 5.217 - * and show its progress. Make the stealer control a gate that coreloop 5.218 - * has to pass. 5.219 - *To avoid interference, only one stealer at a time. Use a global 5.220 - * stealer-lock. 5.221 - * 5.222 - *The pattern is based on a gate -- stealer shuts the gate, then monitors 5.223 - * to be sure any already past make it all the way out, before starting. 5.224 - *So, have a "progress" measure just before the gate, then have two after it, 5.225 - * one is in a "waiting room" outside the gate, the other is at the exit. 5.226 - *Then, the stealer first shuts the gate, then checks the progress measure 5.227 - * outside it, then looks to see if the progress measure at the exit is the 5.228 - * same. If yes, it knows the protected area is empty 'cause no other way 5.229 - * to get in and the last to get in also exited. 5.230 - *If the progress measure at the exit is not the same, then the stealer goes 5.231 - * into a loop checking both the waiting-area and the exit progress-measures 5.232 - * until one of them shows the same as the measure outside the gate. Might 5.233 - * as well re-read the measure outside the gate each go around, just to be 5.234 - * sure. It is guaranteed that one of the two will eventually match the one 5.235 - * outside the gate. 5.236 - * 5.237 - *Here's an informal proof of correctness: 5.238 - *The gate can be closed at any point, and have only four cases: 5.239 - * 1) coreloop made it past the gate-closing but not yet past the exit 5.240 - * 2) coreloop made it past the pre-gate progress update but not yet past 5.241 - * the gate, 5.242 - * 3) coreloop is right before the pre-gate update 5.243 - * 4) coreloop is past the exit and far from the pre-gate update. 5.244 - * 5.245 - * Covering the cases in reverse order, 5.246 - * 4) is not a problem -- stealer will read pre-gate progress, see that it 5.247 - * matches exit progress, and the gate is closed, so stealer can proceed. 5.248 - * 3) stealer will read pre-gate progress just after coreloop updates it.. 5.249 - * so stealer goes into a loop until the coreloop causes wait-progress 5.250 - * to match pre-gate progress, so then stealer can proceed 5.251 - * 2) same as 3.. 5.252 - * 1) stealer reads pre-gate progress, sees that it's different than exit, 5.253 - * so goes into loop until exit matches pre-gate, now it knows coreloop 5.254 - * is not in protected and cannot get back in, so can proceed. 5.255 - * 5.256 - *Implementation for the stealer: 5.257 - * 5.258 - *First, acquire the stealer lock -- only cores with no work to do will 5.259 - * compete to steal, so not a big performance penalty having only one -- 5.260 - * will rarely have multiple stealers in a system with plenty of work -- and 5.261 - * in a system with little work, it doesn't matter. 5.262 - * 5.263 - *Note, have single-reader, single-writer pattern for all variables used to 5.264 - * communicate between stealer and victims 5.265 - * 5.266 - *So, scan the queues of the core controllers, until find non-empty. Each core 5.267 - * has its own list that it scans. The list goes in order from closest to 5.268 - * furthest core, so it steals first from close cores. Later can add 5.269 - * taking info from the app about overlapping footprints, and scan all the 5.270 - * others then choose work with the most footprint overlap with the contents 5.271 - * of this core's cache. 5.272 - * 5.273 - *Now, have a victim want to take work from. So, shut the gate in that 5.274 - * coreloop, by setting the "gate closed" var on its stack to TRUE. 5.275 - *Then, read the core's pre-gate progress and compare to the core's exit 5.276 - * progress. 5.277 - *If same, can proceed to take work from the coreloop's queue. When done, 5.278 - * write FALSE to gate closed var. 5.279 - *If different, then enter a loop that reads the pre-gate progress, then 5.280 - * compares to exit progress then to wait progress. When one of two 5.281 - * matches, proceed. Take work from the coreloop's queue. When done, 5.282 - * write FALSE to the gate closed var. 5.283 - * 5.284 - */ 5.285 -void inline 5.286 -gateProtected_stealWorkInto( SchedSlot *currSlot, 5.287 - VMSQueueStruc *myReadyToAnimateQ, 5.288 - SlaveVP *masterVP ) 5.289 - { 5.290 - SlaveVP *stolenSlv; 5.291 - int32 coreIdx, i, haveAVictim, gotLock; 5.292 - VMSQueueStruc *victimsQ; 5.293 - 5.294 - volatile GateStruc *vicGate; 5.295 - int32 coreMightBeInProtected; 5.296 - 5.297 - 5.298 - 5.299 - //see if any other cores have work available to steal 5.300 - haveAVictim = FALSE; 5.301 - coreIdx = masterVP->coreAnimatedBy; 5.302 - for( i = 0; i < NUM_CORES -1; i++ ) 5.303 - { 5.304 - if( coreIdx >= NUM_CORES -1 ) 5.305 - { coreIdx = 0; 5.306 - } 5.307 - else 5.308 - { coreIdx++; 5.309 - } 5.310 - victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 5.311 - if( numInVMSQ( victimsQ ) > 0 ) 5.312 - { haveAVictim = TRUE; 5.313 - vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ]; 5.314 - break; 5.315 - } 5.316 - } 5.317 - if( !haveAVictim ) return; //no work to steal, exit 5.318 - 5.319 - //have a victim core, now get the stealer-lock 5.320 - gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock), 5.321 - UNLOCKED, LOCKED ); 5.322 - if( !gotLock ) return; //go back to core controller, which will re-start master 5.323 - 5.324 - 5.325 - //====== Start Gate-protection ======= 5.326 - vicGate->gateClosed = TRUE; 5.327 - coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress; 5.328 - while( coreMightBeInProtected ) 5.329 - { //wait until sure 5.330 - if( vicGate->preGateProgress == vicGate->waitProgress ) 5.331 - coreMightBeInProtected = FALSE; 5.332 - if( vicGate->preGateProgress == vicGate->exitProgress ) 5.333 - coreMightBeInProtected = FALSE; 5.334 - } 5.335 - 5.336 - stolenSlv = readVMSQ ( victimsQ ); 5.337 - 5.338 - vicGate->gateClosed = FALSE; 5.339 - //======= End Gate-protection ======= 5.340 - 5.341 - 5.342 - if( stolenSlv != NULL ) //victim could have been in protected and taken 5.343 - { currSlot->slaveAssignedToSlot = stolenSlv; 5.344 - stolenSlv->schedSlot = currSlot; 5.345 - currSlot->needsSlaveAssigned = FALSE; 5.346 - 5.347 - writeVMSQ( stolenSlv, myReadyToAnimateQ ); 5.348 - } 5.349 - 5.350 - //unlock the work stealing lock 5.351 - _VMSMasterEnv->workStealingLock = UNLOCKED; 5.352 - }
6.1 --- a/Probes/probes.c Mon Mar 12 05:38:07 2012 -0700 6.2 +++ b/Probes/probes.c Tue Mar 13 10:02:06 2012 -0700 6.3 @@ -298,7 +298,6 @@ 6.4 VMS_impl__print_stats_of_all_probes() 6.5 { 6.6 forAllInDynArrayDo( _VMSMasterEnv->dynIntervalProbesInfo, 6.7 - &VMS_impl__print_stats_of_probe ); 6.8 + (DynArrayFnPtr) &VMS_impl__print_stats_of_probe ); 6.9 fflush( stdout ); 6.10 } 6.11 -typedef void (*DynArrayFnPtr) ( void * ); //fn has to cast void *
7.1 --- a/Probes/probes.h Mon Mar 12 05:38:07 2012 -0700 7.2 +++ b/Probes/probes.h Tue Mar 13 10:02:06 2012 -0700 7.3 @@ -107,7 +107,7 @@ 7.4 VMS_impl__record_interval_end_in_probe( int32 probeID ); 7.5 7.6 void 7.7 -VMS_impl__print_stats_of_probe( IntervalProbe *probe ) 7.8 +VMS_impl__print_stats_of_probe( IntervalProbe *probe ); 7.9 7.10 void 7.11 VMS_impl__print_stats_of_all_probes();
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/SchedulingMaster.c Tue Mar 13 10:02:06 2012 -0700 8.3 @@ -0,0 +1,349 @@ 8.4 +/* 8.5 + * Copyright 2010 OpenSourceStewardshipFoundation 8.6 + * 8.7 + * Licensed under BSD 8.8 + */ 8.9 + 8.10 + 8.11 + 8.12 +#include <stdio.h> 8.13 +#include <stddef.h> 8.14 + 8.15 +#include "VMS.h" 8.16 + 8.17 + 8.18 +//=========================================================================== 8.19 +void inline 8.20 +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 8.21 + SlaveVP *masterVP ); 8.22 + 8.23 +//=========================================================================== 8.24 + 8.25 + 8.26 + 8.27 +/*This code is animated by the virtual Master processor. 8.28 + * 8.29 + *Polls each sched slot exactly once, hands any requests made by a newly 8.30 + * done slave to the "request handler" plug-in function 8.31 + * 8.32 + *Any slots that need a Slv assigned are given to the "schedule" 8.33 + * plug-in function, which tries to assign a Slv (slave) to it. 8.34 + * 8.35 + *When all slots needing a processor have been given to the schedule plug-in, 8.36 + * a fraction of the slaves successfully scheduled are put into the 8.37 + * work queue, then a continuation of this function is put in, then the rest 8.38 + * of the Slvs that were successfully scheduled. 8.39 + * 8.40 + *The first thing the continuation does is busy-wait until the previous 8.41 + * animation completes. This is because an (unlikely) continuation may 8.42 + * sneak through queue before previous continuation is done putting second 8.43 + * part of scheduled slaves in, which is the only race condition. 8.44 + * 8.45 + */ 8.46 + 8.47 +/*May 29, 2010 -- birth a Master during init so that first core controller to 8.48 + * start running gets it and does all the stuff for a newly born -- 8.49 + * from then on, will be doing continuation, but do suspension self 8.50 + * directly at end of master loop 8.51 + *So VMS_WL__init just births the master virtual processor same way it births 8.52 + * all the others -- then does any extra setup needed and puts it into the 8.53 + * work queue. 8.54 + *However means have to make masterEnv a global static volatile the same way 8.55 + * did with readyToAnimateQ in core controller. -- for performance, put the 8.56 + * jump to the core controller directly in here, and have it directly jump back. 8.57 + * 8.58 + * 8.59 + *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this 8.60 + * avoids the suspected bug in the system stack that causes bizarre faults 8.61 + * at random places in the system code. 8.62 + * 8.63 + *So, this function is coupled to each of the MasterVPs, -- meaning this 8.64 + * function can't rely on a particular stack and frame -- each MasterVP that 8.65 + * animates this function has a different one. 8.66 + * 8.67 + *At this point, the schedulingMaster does not write itself into the queue anymore, 8.68 + * instead, the coreCtlr acquires the masterLock when it has nothing to 8.69 + * animate, and then animates its own schedulingMaster. However, still try to put 8.70 + * several AppSlvs into the queue to amortize the startup cost of switching 8.71 + * to the MasterVP. Note, don't have to worry about latency of requests much 8.72 + * because most requests generate work for same core -- only latency issue 8.73 + * is case when other cores starved and one core's requests generate work 8.74 + * for them -- so keep max in queue to 3 or 4.. 8.75 + */ 8.76 +void schedulingMaster( void *initData, SlaveVP *animatingSlv ) 8.77 + { 8.78 + int32 slotIdx, numSlotsFilled; 8.79 + SlaveVP *schedSlaveVP; 8.80 + SchedSlot *currSlot, **schedSlots; 8.81 + MasterEnv *masterEnv; 8.82 + VMSQueueStruc *readyToAnimateQ; 8.83 + 8.84 + Sched_Assigner slaveAssigner; 8.85 + RequestHandler requestHandler; 8.86 + void *semanticEnv; 8.87 + 8.88 + int32 thisCoresIdx; 8.89 + SlaveVP *masterVP; 8.90 + volatile SlaveVP *volatileMasterVP; 8.91 + 8.92 + volatileMasterVP = animatingSlv; 8.93 + masterVP = (SlaveVP*)volatileMasterVP; //used to force re-define after jmp 8.94 + 8.95 + //First animation of each MasterVP will in turn animate this part 8.96 + // of setup code.. (Slv creator sets up the stack as if this function 8.97 + // was called normally, but actually get here by jmp) 8.98 + //So, setup values about stack ptr, jmp pt and all that 8.99 + //masterVP->resumeInstrPtr = &&schedulingMasterStartPt; 8.100 + 8.101 + 8.102 + //Note, got rid of writing the stack and frame ptr up here, because 8.103 + // only one 8.104 + // core can ever animate a given MasterVP, so don't need to communicate 8.105 + // new frame and stack ptr to the MasterVP storage before a second 8.106 + // version of that MasterVP can get animated on a different core. 8.107 + //Also got rid of the busy-wait. 8.108 + 8.109 + 8.110 + //schedulingMasterStartPt: 8.111 + while(1){ 8.112 + 8.113 + MEAS__Capture_Pre_Master_Point 8.114 + 8.115 + masterEnv = (MasterEnv*)_VMSMasterEnv; 8.116 + 8.117 + //GCC may optimize so doesn't always re-define from frame-storage 8.118 + masterVP = (SlaveVP*)volatileMasterVP; //just to make sure after jmp 8.119 + thisCoresIdx = masterVP->coreAnimatedBy; 8.120 + readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; 8.121 + schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; 8.122 + 8.123 + requestHandler = masterEnv->requestHandler; 8.124 + slaveAssigner = masterEnv->slaveAssigner; 8.125 + semanticEnv = masterEnv->semanticEnv; 8.126 + 8.127 + 8.128 + //Poll each slot's Done flag 8.129 + numSlotsFilled = 0; 8.130 + for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++) 8.131 + { 8.132 + currSlot = schedSlots[ slotIdx ]; 8.133 + 8.134 + if( currSlot->workIsDone ) 8.135 + { 8.136 + currSlot->workIsDone = FALSE; 8.137 + currSlot->needsSlaveAssigned = TRUE; 8.138 + 8.139 + MEAS__startReqHdlr; 8.140 + 8.141 + //process the requests made by the slave (held inside slave struc) 8.142 + (*requestHandler)( currSlot->slaveAssignedToSlot, semanticEnv ); 8.143 + 8.144 + MEAS__endReqHdlr; 8.145 + } 8.146 + if( currSlot->needsSlaveAssigned ) 8.147 + { //give slot a new Slv 8.148 + schedSlaveVP = 8.149 + (*slaveAssigner)( semanticEnv, thisCoresIdx ); 8.150 + 8.151 + if( schedSlaveVP != NULL ) 8.152 + { currSlot->slaveAssignedToSlot = schedSlaveVP; 8.153 + schedSlaveVP->schedSlot = currSlot; 8.154 + currSlot->needsSlaveAssigned = FALSE; 8.155 + numSlotsFilled += 1; 8.156 + } 8.157 + } 8.158 + } 8.159 + 8.160 + 8.161 + #ifdef SYS__TURN_ON_WORK_STEALING 8.162 + //If no slots filled, means no more work, look for work to steal. 8.163 + if( numSlotsFilled == 0 ) 8.164 + { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterVP ); 8.165 + } 8.166 + #endif 8.167 + 8.168 + MEAS__Capture_Post_Master_Point; 8.169 + 8.170 + masterSwitchToCoreCtlr(animatingSlv); 8.171 + flushRegisters(); 8.172 + }//MasterLoop 8.173 + 8.174 + 8.175 + } 8.176 + 8.177 + 8.178 + 8.179 +/*This has a race condition -- the coreloops are accessing their own queues 8.180 + * at the same time that this work-stealer on a different core is trying to 8.181 + */ 8.182 +void inline 8.183 +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 8.184 + SlaveVP *masterVP ) 8.185 + { 8.186 + SlaveVP *stolenSlv; 8.187 + int32 coreIdx, i; 8.188 + VMSQueueStruc *currQ; 8.189 + 8.190 + stolenSlv = NULL; 8.191 + coreIdx = masterVP->coreAnimatedBy; 8.192 + for( i = 0; i < NUM_CORES -1; i++ ) 8.193 + { 8.194 + if( coreIdx >= NUM_CORES -1 ) 8.195 + { coreIdx = 0; 8.196 + } 8.197 + else 8.198 + { coreIdx++; 8.199 + } 8.200 + currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 8.201 + if( numInVMSQ( currQ ) > 0 ) 8.202 + { stolenSlv = readVMSQ (currQ ); 8.203 + break; 8.204 + } 8.205 + } 8.206 + 8.207 + if( stolenSlv != NULL ) 8.208 + { currSlot->slaveAssignedToSlot = stolenSlv; 8.209 + stolenSlv->schedSlot = currSlot; 8.210 + currSlot->needsSlaveAssigned = FALSE; 8.211 + 8.212 + writeVMSQ( stolenSlv, readyToAnimateQ ); 8.213 + } 8.214 + } 8.215 + 8.216 +/*This algorithm makes the common case fast. Make the coreloop passive, 8.217 + * and show its progress. Make the stealer control a gate that coreloop 8.218 + * has to pass. 8.219 + *To avoid interference, only one stealer at a time. Use a global 8.220 + * stealer-lock. 8.221 + * 8.222 + *The pattern is based on a gate -- stealer shuts the gate, then monitors 8.223 + * to be sure any already past make it all the way out, before starting. 8.224 + *So, have a "progress" measure just before the gate, then have two after it, 8.225 + * one is in a "waiting room" outside the gate, the other is at the exit. 8.226 + *Then, the stealer first shuts the gate, then checks the progress measure 8.227 + * outside it, then looks to see if the progress measure at the exit is the 8.228 + * same. If yes, it knows the protected area is empty 'cause no other way 8.229 + * to get in and the last to get in also exited. 8.230 + *If the progress measure at the exit is not the same, then the stealer goes 8.231 + * into a loop checking both the waiting-area and the exit progress-measures 8.232 + * until one of them shows the same as the measure outside the gate. Might 8.233 + * as well re-read the measure outside the gate each go around, just to be 8.234 + * sure. It is guaranteed that one of the two will eventually match the one 8.235 + * outside the gate. 8.236 + * 8.237 + *Here's an informal proof of correctness: 8.238 + *The gate can be closed at any point, and have only four cases: 8.239 + * 1) coreloop made it past the gate-closing but not yet past the exit 8.240 + * 2) coreloop made it past the pre-gate progress update but not yet past 8.241 + * the gate, 8.242 + * 3) coreloop is right before the pre-gate update 8.243 + * 4) coreloop is past the exit and far from the pre-gate update. 8.244 + * 8.245 + * Covering the cases in reverse order, 8.246 + * 4) is not a problem -- stealer will read pre-gate progress, see that it 8.247 + * matches exit progress, and the gate is closed, so stealer can proceed. 8.248 + * 3) stealer will read pre-gate progress just after coreloop updates it.. 8.249 + * so stealer goes into a loop until the coreloop causes wait-progress 8.250 + * to match pre-gate progress, so then stealer can proceed 8.251 + * 2) same as 3.. 8.252 + * 1) stealer reads pre-gate progress, sees that it's different than exit, 8.253 + * so goes into loop until exit matches pre-gate, now it knows coreloop 8.254 + * is not in protected and cannot get back in, so can proceed. 8.255 + * 8.256 + *Implementation for the stealer: 8.257 + * 8.258 + *First, acquire the stealer lock -- only cores with no work to do will 8.259 + * compete to steal, so not a big performance penalty having only one -- 8.260 + * will rarely have multiple stealers in a system with plenty of work -- and 8.261 + * in a system with little work, it doesn't matter. 8.262 + * 8.263 + *Note, have single-reader, single-writer pattern for all variables used to 8.264 + * communicate between stealer and victims 8.265 + * 8.266 + *So, scan the queues of the core controllers, until find non-empty. Each core 8.267 + * has its own list that it scans. The list goes in order from closest to 8.268 + * furthest core, so it steals first from close cores. Later can add 8.269 + * taking info from the app about overlapping footprints, and scan all the 8.270 + * others then choose work with the most footprint overlap with the contents 8.271 + * of this core's cache. 8.272 + * 8.273 + *Now, have a victim want to take work from. So, shut the gate in that 8.274 + * coreloop, by setting the "gate closed" var on its stack to TRUE. 8.275 + *Then, read the core's pre-gate progress and compare to the core's exit 8.276 + * progress. 8.277 + *If same, can proceed to take work from the coreloop's queue. When done, 8.278 + * write FALSE to gate closed var. 8.279 + *If different, then enter a loop that reads the pre-gate progress, then 8.280 + * compares to exit progress then to wait progress. When one of two 8.281 + * matches, proceed. Take work from the coreloop's queue. When done, 8.282 + * write FALSE to the gate closed var. 8.283 + * 8.284 + */ 8.285 +void inline 8.286 +gateProtected_stealWorkInto( SchedSlot *currSlot, 8.287 + VMSQueueStruc *myReadyToAnimateQ, 8.288 + SlaveVP *masterVP ) 8.289 + { 8.290 + SlaveVP *stolenSlv; 8.291 + int32 coreIdx, i, haveAVictim, gotLock; 8.292 + VMSQueueStruc *victimsQ; 8.293 + 8.294 + volatile GateStruc *vicGate; 8.295 + int32 coreMightBeInProtected; 8.296 + 8.297 + 8.298 + 8.299 + //see if any other cores have work available to steal 8.300 + haveAVictim = FALSE; 8.301 + coreIdx = masterVP->coreAnimatedBy; 8.302 + for( i = 0; i < NUM_CORES -1; i++ ) 8.303 + { 8.304 + if( coreIdx >= NUM_CORES -1 ) 8.305 + { coreIdx = 0; 8.306 + } 8.307 + else 8.308 + { coreIdx++; 8.309 + } 8.310 + victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 8.311 + if( numInVMSQ( victimsQ ) > 0 ) 8.312 + { haveAVictim = TRUE; 8.313 + vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ]; 8.314 + break; 8.315 + } 8.316 + } 8.317 + if( !haveAVictim ) return; //no work to steal, exit 8.318 + 8.319 + //have a victim core, now get the stealer-lock 8.320 + gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock), 8.321 + UNLOCKED, LOCKED ); 8.322 + if( !gotLock ) return; //go back to core controller, which will re-start master 8.323 + 8.324 + 8.325 + //====== Start Gate-protection ======= 8.326 + vicGate->gateClosed = TRUE; 8.327 + coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress; 8.328 + while( coreMightBeInProtected ) 8.329 + { //wait until sure 8.330 + if( vicGate->preGateProgress == vicGate->waitProgress ) 8.331 + coreMightBeInProtected = FALSE; 8.332 + if( vicGate->preGateProgress == vicGate->exitProgress ) 8.333 + coreMightBeInProtected = FALSE; 8.334 + } 8.335 + 8.336 + stolenSlv = readVMSQ ( victimsQ ); 8.337 + 8.338 + vicGate->gateClosed = FALSE; 8.339 + //======= End Gate-protection ======= 8.340 + 8.341 + 8.342 + if( stolenSlv != NULL ) //victim could have been in protected and taken 8.343 + { currSlot->slaveAssignedToSlot = stolenSlv; 8.344 + stolenSlv->schedSlot = currSlot; 8.345 + currSlot->needsSlaveAssigned = FALSE; 8.346 + 8.347 + writeVMSQ( stolenSlv, myReadyToAnimateQ ); 8.348 + } 8.349 + 8.350 + //unlock the work stealing lock 8.351 + _VMSMasterEnv->workStealingLock = UNLOCKED; 8.352 + }
9.1 --- a/VMS.h Mon Mar 12 05:38:07 2012 -0700 9.2 +++ b/VMS.h Tue Mar 13 10:02:06 2012 -0700 9.3 @@ -201,7 +201,7 @@ 9.4 9.5 void * coreController( void *paramsIn ); //standard PThreads fn prototype 9.6 void * coreCtlr_Seq( void *paramsIn ); //standard PThreads fn prototype 9.7 -void masterLoop( void *initData, SlaveVP *masterVP ); 9.8 +void schedulingMaster( void *initData, SlaveVP *masterVP ); 9.9 9.10 9.11 typedef struct 9.12 @@ -215,10 +215,11 @@ 9.13 9.14 volatile MasterEnv *_VMSMasterEnv __align_to_cacheline__; 9.15 9.16 -pthread_t coreCtlrThdHandles[ NUM_CORES ]; //pthread's virt-procr state 9.17 +pthread_t coreCtlrThdHandles[ NUM_CORES ]; //pthread's virt-procr state 9.18 ThdParams *coreCtlrThdParams [ NUM_CORES ]; 9.19 -pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER; 9.20 -pthread_cond_t suspendCond = PTHREAD_COND_INITIALIZER; 9.21 + 9.22 +pthread_mutex_t suspendLock; 9.23 +pthread_cond_t suspendCond; 9.24 9.25 //========================= Function Prototypes =========================== 9.26
10.1 --- a/VMS__startup_and_shutdown.c Mon Mar 12 05:38:07 2012 -0700 10.2 +++ b/VMS__startup_and_shutdown.c Tue Mar 13 10:02:06 2012 -0700 10.3 @@ -10,6 +10,7 @@ 10.4 #include <malloc.h> 10.5 #include <inttypes.h> 10.6 #include <sys/time.h> 10.7 +#include <pthread.h> 10.8 10.9 #include "VMS.h" 10.10 10.11 @@ -43,7 +44,7 @@ 10.12 * the master Slv into the work-queue, ready for first "call" 10.13 * 2) Semantic layer then does its own init, which creates the seed virt 10.14 * slave inside the semantic layer, ready to schedule it when 10.15 - * asked by the first run of the masterLoop. 10.16 + * asked by the first run of the schedulingMaster. 10.17 * 10.18 *This part is bit weird because VMS really wants to be "always there", and 10.19 * have applications attach and detach.. for now, this VMS is part of 10.20 @@ -51,7 +52,7 @@ 10.21 * 10.22 *The semantic layer is isolated from the VMS internals by making the 10.23 * semantic layer do setup to a state that it's ready with its 10.24 - * initial Slvs, ready to schedule them to slots when the masterLoop 10.25 + * initial Slvs, ready to schedule them to slots when the schedulingMaster 10.26 * asks. Without this pattern, the semantic layer's setup would 10.27 * have to modify slots directly to assign the initial virt-procrs, and put 10.28 * them into the readyToAnimateQ itself, breaking the isolation completely. 10.29 @@ -71,7 +72,7 @@ 10.30 { 10.31 #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE 10.32 create_masterEnv(); 10.33 - flushRegisters(); //? not sure why here -- merten added it..? 10.34 + printf( "\n\n Running in SEQUENTIAL mode \n\n" ); 10.35 #else 10.36 create_masterEnv(); 10.37 create_the_coreCtlr_OS_threads(); 10.38 @@ -292,7 +293,7 @@ 10.39 readyToAnimateQs[ coreIdx ] = makeVMSQ(); 10.40 10.41 //Q: should give masterVP core-specific info as its init data? 10.42 - masterVPs[ coreIdx ] = VMS_int__create_slaveVP( (TopLevelFnPtr)&masterLoop, (void*)masterEnv ); 10.43 + masterVPs[ coreIdx ] = VMS_int__create_slaveVP( (TopLevelFnPtr)&schedulingMaster, (void*)masterEnv ); 10.44 masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx; 10.45 allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core 10.46 _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; 10.47 @@ -426,6 +427,8 @@ 10.48 //get lock, to lock out any threads still starting up -- they'll see 10.49 // that setupComplete is true before entering while loop, and so never 10.50 // wait on the condition 10.51 + pthread_mutex_init( &suspendLock, NULL ); 10.52 + pthread_cond_init( &suspendCond, NULL ); 10.53 pthread_mutex_lock( &suspendLock ); 10.54 _VMSMasterEnv->setupComplete = 1; 10.55 pthread_mutex_unlock( &suspendLock );
11.1 --- a/VMS_primitive_data_types.h Mon Mar 12 05:38:07 2012 -0700 11.2 +++ b/VMS_primitive_data_types.h Tue Mar 13 10:02:06 2012 -0700 11.3 @@ -7,8 +7,8 @@ 11.4 11.5 */ 11.6 11.7 -#ifndef _PRIMITIVE_DATA_TYPES_H 11.8 -#define _PRIMITIVE_DATA_TYPES_H 11.9 +#ifndef _PRIMITIVE_DATA_TYPES_H 11.10 +#define _PRIMITIVE_DATA_TYPES_H 11.11 11.12 11.13 /*For portability, need primitive data types that have a well defined
12.1 --- a/vmalloc.c Mon Mar 12 05:38:07 2012 -0700 12.2 +++ b/vmalloc.c Tue Mar 13 10:02:06 2012 -0700 12.3 @@ -15,7 +15,7 @@ 12.4 #include <math.h> 12.5 12.6 #include "VMS.h" 12.7 -#include "C_Libraries/Histogram/Histogram.h" 12.8 +#include "Histogram/Histogram.h" 12.9 12.10 #define MAX_UINT64 0xFFFFFFFFFFFFFFFF 12.11