VMS/VMS_Implementations/VMS_impls/VMS__MC_shared_impl

annotate CoreController.c @ 256:2bcf37fd50c6

Prev msg should have been: isolated valgrind with a new compiler switch use SERVICES__TURN_ON_VALGRIND in order to compile valgrind support into vmalloc
author Sean Halle <seanhalle@yahoo.com>
date Mon, 10 Sep 2012 01:26:51 -0700
parents f1267bc7b342
children f5b110414453 999f2966a3e5
rev   line source
seanhalle@222 1 /*
seanhalle@222 2 * Copyright 2010 OpenSourceStewardshipFoundation
seanhalle@222 3 *
seanhalle@222 4 * Licensed under BSD
seanhalle@222 5 */
seanhalle@222 6
seanhalle@222 7
seanhalle@222 8 #include "VMS.h"
seanhalle@222 9
seanhalle@222 10 #include <stdlib.h>
seanhalle@222 11 #include <stdio.h>
seanhalle@222 12 #include <time.h>
seanhalle@222 13
seanhalle@222 14 #include <pthread.h>
seanhalle@222 15 #include <sched.h>
seanhalle@222 16
seanhalle@222 17 //===================== Functions local to this file =======================
seanhalle@222 18 void *terminateCoreController(SlaveVP *currSlv);
seanhalle@230 19
seanhalle@222 20 inline void
seanhalle@222 21 doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1,
seanhalle@222 22 uint32 *seed2 );
seanhalle@222 23 inline void
seanhalle@222 24 doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1,
seanhalle@222 25 uint32 *seed2 );
seanhalle@222 26
seanhalle@222 27 //===========================================================================
seanhalle@222 28
seanhalle@222 29
seanhalle@222 30 /*The Core Controller is logically "beneath" the masterVP and slave VPs. Its
seanhalle@222 31 * job is to control which of those VPs the core animates. Any time one of
seanhalle@222 32 * those VPs suspends, the suspend-primitive switches the core over to
seanhalle@222 33 * animating the core controller. The core controller then follows a very
seanhalle@222 34 * basic pattern to choose which VP will get animated next, then switches
seanhalle@222 35 * the core over to animating that VP. So, all VPs switch the core to
seanhalle@222 36 * core controller, which then chooses which VP the core animates next.
seanhalle@222 37 *
seanhalle@222 38 *The way the core controller decides which VP to switch the core to next is:
seanhalle@230 39 * 1) There are a number of "animation slots", which the master VP fills up
seanhalle@222 40 * with slave VPs that are ready to be animated. So, the core controller
seanhalle@230 41 * just iterates through the animation slots. When the next slot has a
seanhalle@222 42 * slave VP in it, the core controller switches the core over to animate
seanhalle@222 43 * that slave.
seanhalle@230 44 * 2) When the core controller checks a animation slot, and it's empty,
seanhalle@222 45 * then the controller switches the core over to animating the master VP,
seanhalle@222 46 * whose job is to find more slave VPs ready, and assign those to
seanhalle@230 47 * animation slots.
seanhalle@222 48 *
seanhalle@230 49 *So, in effect, a animation slot functions as another layer of virtual
seanhalle@222 50 * processor. A slot has the logical meaning of being an animator that
seanhalle@222 51 * animates the slave assigned to it. However, the core controller sits
seanhalle@222 52 * below the slots, and sequences down them, assigning the actual physical
seanhalle@222 53 * core to each slot, in turn.
seanhalle@230 54 *The reason for having the animation slots and core controller is to
seanhalle@222 55 * amortize the overhead of switching to the master VP and running it. With
seanhalle@230 56 * multiple animation slots, the time to switch-to-master and the code in
seanhalle@232 57 * the animation master is divided by the number of animation slots.
seanhalle@230 58 *The core controller and animation slots are not fundamental parts of VMS,
seanhalle@222 59 * but rather optimizations put into the shared-semantic-state version of
seanhalle@222 60 * VMS. Other versions of VMS will not have a core controller nor scheduling
seanhalle@222 61 * slots.
seanhalle@222 62 *
seanhalle@222 63 *The core controller "owns" the physical core, in effect, and is the
seanhalle@230 64 * function given to the pthread's creation call. Hence, it contains code
seanhalle@222 65 * related to pthread startup, synchronizing the controllers to all start
seanhalle@222 66 * at the same time-point, and pinning the pthreads to physical cores.
seanhalle@222 67 *
seanhalle@222 68 */
seanhalle@222 69 void *
seanhalle@222 70 coreController( void *paramsIn )
seanhalle@222 71 {
seanhalle@222 72 int32 thisCoresIdx;
seanhalle@222 73 int32 numRepetitionsWithNoWork;
seanhalle@222 74 SlaveVP *currVP;
seanhalle@245 75 AnimSlot *currSlot, **animSlots;
seanhalle@222 76 int32 currSlotIdx;
seanhalle@222 77 volatile int32 *addrOfMasterLock; //thing pointed to is volatile, not ptr
seanhalle@222 78 SlaveVP *thisCoresMasterVP;
seanhalle@222 79 //Variables used for pthread related things
nengel@238 80 ThdParams *thisCoresThdParams;
seanhalle@222 81 cpu_set_t coreMask; //used during pinning pthread to CPU core
seanhalle@222 82 int32 errorCode;
seanhalle@222 83 //Variables used during measurements
seanhalle@222 84 TSCountLowHigh endSusp;
seanhalle@222 85 //Variables used in random-backoff, for master-lock and waiting for work
seanhalle@230 86 uint32_t seed1 = rand()%1000; // init random number generator for backoffs
seanhalle@222 87 uint32_t seed2 = rand()%1000;
seanhalle@222 88
seanhalle@222 89
seanhalle@222 90 //=============== Initializations ===================
nengel@238 91 thisCoresThdParams = (ThdParams *)paramsIn;
nengel@238 92 thisCoresIdx = thisCoresThdParams->coreNum;
seanhalle@222 93
seanhalle@222 94 //Assembly that saves addr of label of return instr -- label in assmbly
seanhalle@222 95 recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt));
seanhalle@222 96
seanhalle@235 97 animSlots = _VMSMasterEnv->allAnimSlots[thisCoresIdx];
seanhalle@222 98 currSlotIdx = 0; //start at slot 0, go up until one empty, then do master
seanhalle@222 99 numRepetitionsWithNoWork = 0;
seanhalle@222 100 addrOfMasterLock = &(_VMSMasterEnv->masterLock);
seanhalle@222 101 thisCoresMasterVP = _VMSMasterEnv->masterVPs[thisCoresIdx];
seanhalle@222 102
seanhalle@222 103 //==================== pthread related stuff ======================
seanhalle@225 104 //pin the pthread to the core -- takes away Linux control
seanhalle@222 105 //Linux requires pinning to be done inside the thread-function
seanhalle@222 106 //Designate a core by a 1 in bit-position corresponding to the core
seanhalle@222 107 CPU_ZERO(&coreMask); //initialize mask bits to zero
nengel@238 108 CPU_SET(thisCoresThdParams->coreNum,&coreMask); //set bit repr the coreNum
seanhalle@222 109 pthread_t selfThd = pthread_self();
seanhalle@222 110 errorCode =
seanhalle@222 111 pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
seanhalle@222 112 if(errorCode){ printf("\n pinning thd to core failed \n"); exit(0); }
seanhalle@222 113
seanhalle@222 114 //make sure the controllers all start at same time, by making them wait
seanhalle@227 115 pthread_mutex_lock( &suspendLock );
seanhalle@222 116 while( !(_VMSMasterEnv->setupComplete) )
seanhalle@222 117 { pthread_cond_wait( &suspendCond, &suspendLock );
seanhalle@222 118 }
seanhalle@222 119 pthread_mutex_unlock( &suspendLock );
seanhalle@235 120
nengel@238 121 HOLISTIC__CoreCtrl_Setup;
nengel@238 122
seanhalle@235 123 DEBUG__printf1(TRUE, "started coreCtrlr", thisCoresIdx );
nengel@238 124
seanhalle@222 125 //====================== The Core Controller ======================
seanhalle@222 126 while(1) //An endless loop is just one way of doing the control structure
seanhalle@222 127 { //Assembly code switches the core between animating a VP and
seanhalle@222 128 // animating this core controller. The switch is done by
seanhalle@222 129 // changing the stack-pointer and frame-pointer and then doing
seanhalle@222 130 // an assembly jmp. When reading this code, the effect is
seanhalle@222 131 // that the "switchToSlv()" at the end of the loop is sort of a
seanhalle@222 132 // "warp in time" -- the core disappears inside this, jmps to
seanhalle@222 133 // animating a VP, and when that VP suspends, the suspend
seanhalle@222 134 // jmps back. This has the effect of "returning" from the
seanhalle@222 135 // switchToSlv() call. Then control loops back to here.
seanhalle@222 136 //Alternatively, the VP suspend primitive could just not bother
seanhalle@222 137 // returning from switchToSlv, and instead jmp directly to here.
seanhalle@227 138
seanhalle@236 139 if( currSlotIdx >= NUM_ANIM_SLOTS ) goto switchToMaster;
seanhalle@235 140 currSlot = animSlots[ currSlotIdx ];
seanhalle@222 141
seanhalle@222 142 if( ! currSlot->needsSlaveAssigned ) //slot does have slave assigned
nengel@239 143 { if(currSlot->slaveAssignedToSlot->typeOfVP == Idle){
nengel@239 144 numRepetitionsWithNoWork ++;
nengel@239 145 } else {
nengel@239 146 numRepetitionsWithNoWork = 0; //reset back2back master count
nengel@239 147 }
seanhalle@222 148 currSlotIdx ++;
seanhalle@222 149 currVP = currSlot->slaveAssignedToSlot;
nengel@238 150 HOLISTIC__Record_last_work;
seanhalle@222 151 }
seanhalle@222 152 else //slot is empty, so switch to master
seanhalle@222 153 {
seanhalle@222 154 switchToMaster:
seanhalle@222 155 currSlotIdx = 0; //doing switch to master, so start over at slot 0
seanhalle@222 156 currVP = NULL;
seanhalle@222 157
seanhalle@222 158 MEAS__Capture_Pre_Master_Lock_Point;
nengel@238 159 HOLISTIC__Record_AppResponderInvocation_start;
seanhalle@222 160
seanhalle@222 161 int numTriesToGetLock = 0; int gotLock = 0;
seanhalle@222 162 while( currVP == NULL ) //keep going until get master lock
seanhalle@222 163 {
seanhalle@222 164 //At this point, first thing to do is get lock. But, want to
seanhalle@222 165 // reduce lock contention from cores with no work, so first
seanhalle@222 166 // check if this is a core with no work, and busy wait if so.
seanhalle@222 167 //Then, if it's been way too long without work, yield pthread
seanhalle@222 168 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF)
seanhalle@222 169 doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 );
seanhalle@222 170 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD )
seanhalle@222 171 { numRepetitionsWithNoWork = 0; pthread_yield(); }
seanhalle@222 172
seanhalle@222 173
seanhalle@222 174 //Now, try to get the lock
seanhalle@222 175 gotLock = __sync_bool_compare_and_swap( addrOfMasterLock,
seanhalle@222 176 UNLOCKED, LOCKED );
seanhalle@222 177 if( gotLock )
seanhalle@222 178 { //At this point, have run out of slaves, so tried to get
seanhalle@222 179 // the master lock, and have successfully gotten it.
seanhalle@222 180 //So, set the currVP to this core's masterVP and break out
seanhalle@222 181 // of the get-lock loop. Below, assembly code will switch
seanhalle@222 182 // the core over to animating the masterVP. When it's
seanhalle@222 183 // done, the masterVP will use assembly to switch the core
seanhalle@222 184 // back to animating this core controller
seanhalle@222 185 currVP = thisCoresMasterVP;
seanhalle@222 186 numRepetitionsWithNoWork += 1;
seanhalle@222 187 break; //end while -- have a VP to animate now
seanhalle@222 188 }
seanhalle@222 189 //Get here only when failed to get lock
seanhalle@222 190
seanhalle@222 191 numTriesToGetLock++; //if too many, means too much contention
seanhalle@222 192 if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF )
seanhalle@222 193 doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 );
seanhalle@222 194 if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD )
seanhalle@222 195 { numTriesToGetLock = 0; pthread_yield(); }
seanhalle@222 196 }
seanhalle@222 197 MEAS__Capture_Post_Master_Lock_Point;
seanhalle@222 198 }
seanhalle@222 199
nengel@238 200 HOLISTIC__Record_Work_start;
seanhalle@222 201
seanhalle@222 202 switchToSlv(currVP); //Slave suspend makes core "return" from this call
seanhalle@222 203 flushRegisters(); //prevent GCC optimization from doing bad things
seanhalle@222 204
nengel@238 205 HOLISTIC__Record_Work_end;
nengel@238 206
seanhalle@222 207 MEAS__Capture_End_Susp_in_CoreCtlr_ForSys;
seanhalle@222 208
seanhalle@222 209 }//while(1)
seanhalle@222 210 }
seanhalle@222 211
seanhalle@230 212 /*Shutdown of VMS involves several steps, of which this is the last. This
seanhalle@230 213 * function is jumped to from the asmTerminateCoreCtrl, which is in turn
seanhalle@230 214 * called from endOSThreadFn, which is the top-level-fn of the shutdown
seanhalle@230 215 * slaves.
seanhalle@230 216 */
seanhalle@222 217 void *
seanhalle@222 218 terminateCoreCtlr(SlaveVP *currSlv)
seanhalle@222 219 {
seanhalle@230 220 //first, free shutdown Slv that jumped here, then end the pthread
seanhalle@222 221 VMS_int__dissipate_slaveVP( currSlv );
seanhalle@222 222 pthread_exit( NULL );
seanhalle@222 223 }
seanhalle@222 224
seanhalle@246 225 inline uint32_t
seanhalle@246 226 randomNumber()
seanhalle@246 227 {
seanhalle@246 228 _VMSMasterEnv->seed1 = (uint32)(36969 * (_VMSMasterEnv->seed1 & 65535) +
seanhalle@246 229 (_VMSMasterEnv->seed1 >> 16) );
seanhalle@246 230 _VMSMasterEnv->seed2 = (uint32)(18000 * (_VMSMasterEnv->seed2 & 65535) +
seanhalle@246 231 (_VMSMasterEnv->seed2 >> 16) );
seanhalle@246 232 return (_VMSMasterEnv->seed1 << 16) + _VMSMasterEnv->seed2;
seanhalle@246 233 }
seanhalle@246 234
seanhalle@246 235
seanhalle@222 236
seanhalle@222 237 /*Busy-wait for a random number of cycles -- chooses number of cycles
seanhalle@222 238 * differently than for the too-many-tries-to-get-lock backoff
seanhalle@222 239 */
seanhalle@222 240 inline void
seanhalle@222 241 doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1,
seanhalle@222 242 uint32 *seed2 )
seanhalle@222 243 { int32 i, waitIterations;
seanhalle@222 244 volatile double fakeWorkVar; //busy-wait fake work
seanhalle@222 245
seanhalle@230 246 //Get a random number of iterations to busy-wait. The % is a simple
seanhalle@230 247 // way to set the maximum value that can be generated.
seanhalle@222 248 waitIterations =
seanhalle@222 249 randomNumber(seed1, seed2) %
seanhalle@222 250 (numRepsWithNoWork * numRepsWithNoWork * NUM_CORES);
seanhalle@222 251 for( i = 0; i < waitIterations; i++ )
seanhalle@222 252 { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait
seanhalle@222 253 }
seanhalle@222 254 }
seanhalle@222 255
seanhalle@222 256 /*Busy-waits for a random number of cycles -- chooses number of cycles
seanhalle@222 257 * differently than for the no-work backoff
seanhalle@222 258 */
seanhalle@222 259 inline void
seanhalle@222 260 doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1,
seanhalle@222 261 uint32 *seed2 )
seanhalle@222 262 { int32 i, waitIterations;
seanhalle@222 263 volatile double fakeWorkVar; //busy-wait fake work
seanhalle@222 264
seanhalle@222 265 waitIterations =
seanhalle@222 266 randomNumber(seed1, seed2) %
seanhalle@245 267 (numTriesToGetLock * GET_LOCK_BACKOFF_WEIGHT);
seanhalle@222 268 //addToHist( wait_iterations, coreLoopThdParams->wait_iterations_hist );
seanhalle@222 269 for( i = 0; i < waitIterations; i++ )
seanhalle@222 270 { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait
seanhalle@222 271 }
seanhalle@222 272 }
seanhalle@222 273
seanhalle@222 274
seanhalle@222 275 #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
seanhalle@222 276
seanhalle@222 277 //===========================================================================
seanhalle@222 278 /*This sequential version does the same as threaded, except doesn't do the
seanhalle@222 279 * pin-threads part, nor the wait until setup complete and acquire master
seanhalle@222 280 * lock parts.
seanhalle@222 281 */
seanhalle@222 282 void *
seanhalle@222 283 coreCtlr_Seq( void *paramsIn )
seanhalle@222 284 {
seanhalle@222 285 int32 thisCoresIdx;
seanhalle@222 286 int32 numRepetitionsWithNoWork;
seanhalle@222 287 SlaveVP *currVP;
seanhalle@236 288 AnimSlot *currSlot, **animSlots;
seanhalle@222 289 int32 currSlotIdx;
seanhalle@222 290 int32 *addrOfMasterLock;
seanhalle@222 291 SlaveVP *thisCoresMasterVP;
seanhalle@222 292
seanhalle@222 293 //=============== Initializations ===================
seanhalle@236 294 thisCoresIdx = 0; //sequential version
seanhalle@235 295 animSlots = _VMSMasterEnv->allAnimSlots[thisCoresIdx];
seanhalle@222 296 currSlotIdx = 0; //start at slot 0, go up until one empty, then do master
seanhalle@222 297 numRepetitionsWithNoWork = 0;
seanhalle@222 298 addrOfMasterLock = &(_VMSMasterEnv->masterLock);
seanhalle@222 299 thisCoresMasterVP = _VMSMasterEnv->masterVPs[thisCoresIdx];
seanhalle@222 300
seanhalle@222 301 //Assembly that saves addr of label of return instr -- label in assmbly
seanhalle@222 302 recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt));
seanhalle@222 303
seanhalle@222 304
seanhalle@222 305 //====================== The Core Controller ======================
seanhalle@222 306 while(1)
seanhalle@222 307 {
seanhalle@236 308 if( currSlotIdx >= NUM_ANIM_SLOTS ) goto switchToMaster;
seanhalle@235 309 currSlot = animSlots[ currSlotIdx ];
seanhalle@222 310
seanhalle@222 311 if( ! currSlot->needsSlaveAssigned ) //slot does have slave assigned
seanhalle@222 312 { numRepetitionsWithNoWork = 0; //reset B2B master count
seanhalle@222 313 currSlotIdx ++;
seanhalle@222 314 currVP = currSlot->slaveAssignedToSlot;
seanhalle@222 315 }
seanhalle@222 316 else //slot is empty, so switch to master
seanhalle@222 317 {
seanhalle@222 318 switchToMaster:
seanhalle@222 319 currSlotIdx = 0; //doing switch to master, so start over at slot 0
seanhalle@222 320
seanhalle@222 321 currVP = thisCoresMasterVP;
seanhalle@222 322
seanhalle@222 323 MEAS__Capture_Pre_Master_Lock_Point; //back to back because
seanhalle@222 324 MEAS__Capture_Post_Master_Lock_Point; // sequential version
seanhalle@222 325
seanhalle@222 326 if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD )
seanhalle@222 327 { printf("Lots of reps w/o work\n");
seanhalle@222 328 exit(0); //if no work, no way to ever get it in sequential!
seanhalle@222 329 }
seanhalle@222 330 numRepetitionsWithNoWork += 1;
seanhalle@222 331 }
seanhalle@222 332
seanhalle@222 333 switchToSlv(currVP); //Slave suspend makes core "return" from this call
seanhalle@222 334 flushRegisters(); //prevent GCC optimization from doing bad things
seanhalle@222 335
seanhalle@222 336 MEAS__Capture_End_Susp_in_CoreCtlr_ForSys;
seanhalle@222 337
seanhalle@222 338 } //while(1)
seanhalle@222 339 }
seanhalle@222 340 #endif