annotate MasterLoop.c @ 135:0b49fd35afc1

distributed free working -app sends a VMSSemReqst to his Master which send a request to a different Master -Master send the request directly -The request structure is freed by the sender, when the request was handled There are still problems on shutdown. The shutdownVPs are all allocated by one Master which is likly to be terminated
author Merten Sach <msach@mailbox.tu-berlin.de>
date Fri, 16 Sep 2011 20:08:28 +0200
parents a9b72021f053
children 99343ffe1918
rev   line source
Me@0 1 /*
Me@38 2 * Copyright 2010 OpenSourceStewardshipFoundation
Me@43 3 *
Me@0 4 * Licensed under BSD
Me@0 5 */
Me@0 6
Me@0 7
Me@0 8
Me@0 9 #include <stdio.h>
Me@9 10 #include <stddef.h>
Me@0 11
Me@0 12 #include "VMS.h"
msach@77 13 #include "ProcrContext.h"
msach@134 14 #include "scheduling.h"
msach@135 15 #include "inter_VMS_requests.h"
msach@135 16 #include "inter_VMS_requests_handler.h"
Me@0 17
Me@55 18 //===========================================================================
Me@55 19 void inline
Me@55 20 stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
msach@127 21 VirtProcr *masterPr);
msach@127 22
msach@127 23 void inline
msach@127 24 handleInterMasterReq( InterMasterReqst *currReq, void *_semEnv,
msach@127 25 VirtProcr *masterPr);
msach@127 26
msach@127 27 void inline
msach@127 28 handleInterVMSCoreReq( InterVMSCoreReqst *currReq, VirtProcr *masterPr);
Me@55 29
Me@55 30 //===========================================================================
Me@55 31
Me@55 32
msach@69 33
Me@0 34 /*This code is animated by the virtual Master processor.
Me@0 35 *
Me@11 36 *Polls each sched slot exactly once, hands any requests made by a newly
Me@11 37 * done slave to the "request handler" plug-in function
Me@0 38 *
Me@11 39 *Any slots that need a virt procr assigned are given to the "schedule"
Me@11 40 * plug-in function, which tries to assign a virt procr (slave) to it.
Me@0 41 *
Me@11 42 *When all slots needing a processor have been given to the schedule plug-in,
Me@11 43 * a fraction of the procrs successfully scheduled are put into the
Me@11 44 * work queue, then a continuation of this function is put in, then the rest
Me@11 45 * of the virt procrs that were successfully scheduled.
Me@0 46 *
Me@11 47 *The first thing the continuation does is busy-wait until the previous
Me@11 48 * animation completes. This is because an (unlikely) continuation may
Me@11 49 * sneak through queue before previous continuation is done putting second
Me@11 50 * part of scheduled slaves in, which is the only race condition.
Me@0 51 *
Me@0 52 */
Me@0 53
Me@4 54 /*May 29, 2010 -- birth a Master during init so that first core loop to
Me@11 55 * start running gets it and does all the stuff for a newly born --
Me@11 56 * from then on, will be doing continuation, but do suspension self
Me@4 57 * directly at end of master loop
Me@4 58 *So VMS__init just births the master virtual processor same way it births
Me@4 59 * all the others -- then does any extra setup needed and puts it into the
Me@4 60 * work queue.
Me@120 61 *However means have to make masterEnv a global static volatile.
Me@31 62 *
Me@31 63 *
Me@31 64 *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this
Me@31 65 * avoids the suspected bug in the system stack that causes bizarre faults
Me@31 66 * at random places in the system code.
Me@31 67 *
Me@31 68 *So, this function is coupled to each of the MasterVPs, -- meaning this
Me@31 69 * function can't rely on a particular stack and frame -- each MasterVP that
Me@120 70 * animates this function has a different stack.
Me@31 71 *
Me@31 72 *At this point, the masterLoop does not write itself into the queue anymore,
Me@31 73 * instead, the coreLoop acquires the masterLock when it has nothing to
Me@31 74 * animate, and then animates its own masterLoop. However, still try to put
Me@31 75 * several AppVPs into the queue to amortize the startup cost of switching
Me@31 76 * to the MasterVP. Note, don't have to worry about latency of requests much
Me@31 77 * because most requests generate work for same core -- only latency issue
Me@31 78 * is case when other cores starved and one core's requests generate work
Me@31 79 * for them -- so keep max in queue to 3 or 4..
Me@4 80 */
Me@31 81 void masterLoop( void *initData, VirtProcr *animatingPr )
Me@21 82 {
Me@55 83 int32 slotIdx, numSlotsFilled;
Me@21 84 VirtProcr *schedVirtPr;
Me@31 85 SchedSlot *currSlot, **schedSlots;
Me@0 86 MasterEnv *masterEnv;
Me@31 87 VMSQueueStruc *readyToAnimateQ;
Me@4 88
Me@0 89 SlaveScheduler slaveScheduler;
Me@0 90 RequestHandler requestHandler;
Me@31 91 void *semanticEnv;
Me@0 92
Me@55 93 int32 thisCoresIdx;
Me@31 94 VirtProcr *masterPr;
msach@69 95 volatile VirtProcr *volatileMasterPr;
msach@69 96
msach@69 97 volatileMasterPr = animatingPr;
msach@69 98 masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp
msach@132 99 masterEnv = (MasterEnv*)_VMSMasterEnv;
Me@31 100
Me@31 101 //First animation of each MasterVP will in turn animate this part
Me@31 102 // of setup code.. (VP creator sets up the stack as if this function
Me@31 103 // was called normally, but actually get here by jmp)
Me@0 104
Me@120 105 //Sept 2011
Me@120 106 //Old code jumped directly to this point, but doesn't work on x64
Me@120 107 // So, just make this an endless loop, and do assembly function at end
Me@120 108 // that saves its own return addr, then jumps to core_loop.
Me@120 109 while(1)
msach@132 110 {
Me@38 111 //============================= MEASUREMENT STUFF ========================
Me@38 112 #ifdef MEAS__TIME_MASTER
Me@38 113 //Total Master time includes one coreloop time -- just assume the core
Me@120 114 // loop time is same for Master as is for AppVPs, even though it may be
Me@68 115 // smaller due to higher predictability of the fixed jmp.
Me@38 116 saveLowTimeStampCountInto( masterPr->startMasterTSCLow );
Me@38 117 #endif
Me@38 118 //========================================================================
Me@0 119
msach@132 120 //GCC may optimize so doesn't always re-define from frame-storage
msach@69 121 thisCoresIdx = masterPr->coreAnimatedBy;
msach@132 122 masterEnv->currentMasterProcrID = thisCoresIdx;
msach@69 123 readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx];
msach@69 124 schedSlots = masterEnv->allSchedSlots[thisCoresIdx];
msach@69 125
msach@69 126 requestHandler = masterEnv->requestHandler;
msach@69 127 slaveScheduler = masterEnv->slaveScheduler;
msach@69 128 semanticEnv = masterEnv->semanticEnv;
msach@69 129
Me@119 130 //First, check for requests from other MasterVPs, and handle them
msach@135 131 InterMasterReqst* currReqst = masterEnv->interMasterRequestsFor[thisCoresIdx];
msach@135 132 while(currReqst)
msach@127 133 {
msach@135 134 handleInterMasterReq( currReqst, semanticEnv, masterPr );
msach@135 135 currReqst = currReqst->nextReqst;
msach@127 136 }
msach@135 137 masterEnv->interMasterRequestsFor[thisCoresIdx] = NULL;
msach@135 138
msach@135 139 //Second, check for own request that were handled for other MasterVPs
msach@135 140 currReqst = masterEnv->interMasterRequestsSentBy[thisCoresIdx];
msach@135 141 while(currReqst && currReqst->obsolete)
msach@135 142 {
msach@135 143 InterMasterReqst *nextReqst = currReqst->nextSentReqst;
msach@135 144 VMS__free(currReqst);
msach@135 145 currReqst = nextReqst;
msach@135 146 }
msach@135 147 masterEnv->interMasterRequestsSentBy[thisCoresIdx] = currReqst;
msach@127 148
Me@119 149 //Now, take care of the SlaveVPs
Me@120 150 //Go through the slots -- if Slave there newly suspended, handle its request
Me@120 151 // then, either way, ask assigner to fill each slot
Me@55 152 numSlotsFilled = 0;
Me@26 153 for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
Me@0 154 {
Me@4 155 currSlot = schedSlots[ slotIdx ];
Me@0 156
Me@4 157 if( currSlot->workIsDone )
Me@0 158 {
Me@4 159 currSlot->workIsDone = FALSE;
Me@4 160 currSlot->needsProcrAssigned = TRUE;
Me@0 161
Me@0 162 //process requests from slave to master
Me@68 163 //====================== MEASUREMENT STUFF ===================
Me@68 164 #ifdef MEAS__TIME_PLUGIN
Me@68 165 int32 startStamp1, endStamp1;
Me@68 166 saveLowTimeStampCountInto( startStamp1 );
Me@68 167 #endif
Me@68 168 //============================================================
Me@21 169 (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv );
Me@68 170 //====================== MEASUREMENT STUFF ===================
Me@68 171 #ifdef MEAS__TIME_PLUGIN
Me@68 172 saveLowTimeStampCountInto( endStamp1 );
Me@68 173 addIntervalToHist( startStamp1, endStamp1,
Me@68 174 _VMSMasterEnv->reqHdlrLowTimeHist );
Me@68 175 addIntervalToHist( startStamp1, endStamp1,
Me@68 176 _VMSMasterEnv->reqHdlrHighTimeHist );
Me@68 177 #endif
Me@68 178 //============================================================
Me@0 179 }
Me@4 180 if( currSlot->needsProcrAssigned )
Me@4 181 { //give slot a new virt procr
Me@21 182 schedVirtPr =
Me@31 183 (*slaveScheduler)( semanticEnv, thisCoresIdx );
Me@0 184
Me@21 185 if( schedVirtPr != NULL )
Me@21 186 { currSlot->procrAssignedToSlot = schedVirtPr;
Me@26 187 schedVirtPr->schedSlot = currSlot;
msach@132 188 schedVirtPr->coreAnimatedBy = thisCoresIdx;
Me@26 189 currSlot->needsProcrAssigned = FALSE;
Me@55 190 numSlotsFilled += 1;
Me@55 191
Me@55 192 writeVMSQ( schedVirtPr, readyToAnimateQ );
Me@0 193 }
Me@0 194 }
Me@0 195 }
Me@0 196
Me@55 197
Me@55 198 #ifdef USE_WORK_STEALING
Me@55 199 //If no slots filled, means no more work, look for work to steal.
Me@55 200 if( numSlotsFilled == 0 )
Me@55 201 { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr );
Me@55 202 }
Me@55 203 #endif
Me@26 204
Me@21 205
Me@38 206 #ifdef MEAS__TIME_MASTER
Me@38 207 saveLowTimeStampCountInto( masterPr->endMasterTSCLow );
Me@38 208 #endif
Me@38 209
msach@71 210 masterSwitchToCoreLoop(animatingPr);
msach@71 211 flushRegisters();
Me@119 212 }//while(1) MasterLoop
Me@0 213 }
Me@0 214
Me@119 215 /*This is for inter-master communication. Either the master itself or
Me@119 216 * the plugin sends one of these requests. Some are handled here, by the
Me@119 217 * master_loop, others are handed off to the plugin.
Me@119 218 */
Me@119 219 void inline
Me@120 220 handleInterMasterReq( InterMasterReqst *currReq, void *_semEnv,
Me@120 221 VirtProcr *masterPr )
msach@127 222 {
msach@127 223
msach@127 224 switch( currReq->reqType )
msach@127 225 {
msach@127 226 case destVMSCore:
msach@127 227 handleInterVMSCoreReq( (InterVMSCoreReqst *)currReq, masterPr);
Me@119 228 break;
Me@120 229 case destPlugin:
msach@127 230 _VMSMasterEnv->interPluginReqHdlr( ((InterPluginReqst *)currReq)->pluginReq,
msach@127 231 _semEnv );
msach@127 232 break;
Me@119 233 default:
Me@119 234 break;
Me@119 235 }
Me@119 236 }
Me@55 237
Me@119 238 void inline
msach@127 239 handleInterVMSCoreReq( InterVMSCoreReqst *currReq, VirtProcr *masterPr )
Me@119 240 {
Me@119 241 switch( currReq->reqType )
Me@119 242 {
msach@135 243 case transfer_free_ptr:
msach@135 244 handleTransferFree( currReq, masterPr );
msach@135 245 currReq->obsolete = 1; //now the sender can free the structure
msach@135 246 break;
msach@127 247 default:
msach@127 248 break;
Me@119 249 }
msach@127 250 }
msach@69 251
Me@119 252 /*Work Stealing Alg -- racy one
Me@119 253 *This algorithm has a race condition -- the coreloops are accessing their
Me@119 254 * own queues at the same time that this work-stealer on a different core
Me@119 255 * is trying to.
Me@119 256 *The second stealing alg, below, protects against this.
Me@55 257 */
Me@55 258 void inline
Me@55 259 stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
Me@55 260 VirtProcr *masterPr )
Me@55 261 {
Me@55 262 VirtProcr *stolenPr;
Me@55 263 int32 coreIdx, i;
Me@55 264 VMSQueueStruc *currQ;
Me@55 265
Me@55 266 stolenPr = NULL;
Me@55 267 coreIdx = masterPr->coreAnimatedBy;
Me@55 268 for( i = 0; i < NUM_CORES -1; i++ )
Me@55 269 {
Me@55 270 if( coreIdx >= NUM_CORES -1 )
Me@55 271 { coreIdx = 0;
Me@55 272 }
Me@55 273 else
Me@55 274 { coreIdx++;
Me@55 275 }
Me@55 276 currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
Me@55 277 if( numInVMSQ( currQ ) > 0 )
Me@55 278 { stolenPr = readVMSQ (currQ );
Me@55 279 break;
Me@55 280 }
Me@55 281 }
Me@55 282
Me@55 283 if( stolenPr != NULL )
Me@55 284 { currSlot->procrAssignedToSlot = stolenPr;
Me@55 285 stolenPr->schedSlot = currSlot;
Me@55 286 currSlot->needsProcrAssigned = FALSE;
Me@55 287
Me@55 288 writeVMSQ( stolenPr, readyToAnimateQ );
Me@55 289 }
Me@55 290 }
Me@55 291
Me@119 292 /*Work Stealing alg -- protected one
Me@119 293 *This algorithm makes the common case fast. Make the coreloop passive,
Me@55 294 * and show its progress. Make the stealer control a gate that coreloop
Me@55 295 * has to pass.
Me@55 296 *To avoid interference, only one stealer at a time. Use a global
Me@55 297 * stealer-lock.
Me@55 298 *
Me@55 299 *The pattern is based on a gate -- stealer shuts the gate, then monitors
Me@55 300 * to be sure any already past make it all the way out, before starting.
Me@55 301 *So, have a "progress" measure just before the gate, then have two after it,
Me@55 302 * one is in a "waiting room" outside the gate, the other is at the exit.
Me@55 303 *Then, the stealer first shuts the gate, then checks the progress measure
Me@55 304 * outside it, then looks to see if the progress measure at the exit is the
Me@55 305 * same. If yes, it knows the protected area is empty 'cause no other way
Me@55 306 * to get in and the last to get in also exited.
Me@55 307 *If the progress measure at the exit is not the same, then the stealer goes
Me@55 308 * into a loop checking both the waiting-area and the exit progress-measures
Me@55 309 * until one of them shows the same as the measure outside the gate. Might
Me@55 310 * as well re-read the measure outside the gate each go around, just to be
Me@55 311 * sure. It is guaranteed that one of the two will eventually match the one
Me@55 312 * outside the gate.
Me@55 313 *
Me@55 314 *Here's an informal proof of correctness:
Me@55 315 *The gate can be closed at any point, and have only four cases:
Me@55 316 * 1) coreloop made it past the gate-closing but not yet past the exit
Me@55 317 * 2) coreloop made it past the pre-gate progress update but not yet past
Me@55 318 * the gate,
Me@55 319 * 3) coreloop is right before the pre-gate update
Me@55 320 * 4) coreloop is past the exit and far from the pre-gate update.
Me@55 321 *
Me@55 322 * Covering the cases in reverse order,
Me@55 323 * 4) is not a problem -- stealer will read pre-gate progress, see that it
Me@55 324 * matches exit progress, and the gate is closed, so stealer can proceed.
Me@55 325 * 3) stealer will read pre-gate progress just after coreloop updates it..
Me@55 326 * so stealer goes into a loop until the coreloop causes wait-progress
Me@55 327 * to match pre-gate progress, so then stealer can proceed
Me@55 328 * 2) same as 3..
Me@55 329 * 1) stealer reads pre-gate progress, sees that it's different than exit,
Me@55 330 * so goes into loop until exit matches pre-gate, now it knows coreloop
Me@55 331 * is not in protected and cannot get back in, so can proceed.
Me@55 332 *
Me@55 333 *Implementation for the stealer:
Me@55 334 *
Me@55 335 *First, acquire the stealer lock -- only cores with no work to do will
Me@55 336 * compete to steal, so not a big performance penalty having only one --
Me@55 337 * will rarely have multiple stealers in a system with plenty of work -- and
Me@55 338 * in a system with little work, it doesn't matter.
Me@55 339 *
Me@55 340 *Note, have single-reader, single-writer pattern for all variables used to
Me@55 341 * communicate between stealer and victims
Me@55 342 *
Me@55 343 *So, scan the queues of the core loops, until find non-empty. Each core
Me@55 344 * has its own list that it scans. The list goes in order from closest to
Me@55 345 * furthest core, so it steals first from close cores. Later can add
Me@55 346 * taking info from the app about overlapping footprints, and scan all the
Me@55 347 * others then choose work with the most footprint overlap with the contents
Me@55 348 * of this core's cache.
Me@55 349 *
Me@55 350 *Now, have a victim want to take work from. So, shut the gate in that
Me@55 351 * coreloop, by setting the "gate closed" var on its stack to TRUE.
Me@55 352 *Then, read the core's pre-gate progress and compare to the core's exit
Me@55 353 * progress.
Me@55 354 *If same, can proceed to take work from the coreloop's queue. When done,
Me@55 355 * write FALSE to gate closed var.
Me@55 356 *If different, then enter a loop that reads the pre-gate progress, then
Me@55 357 * compares to exit progress then to wait progress. When one of two
Me@55 358 * matches, proceed. Take work from the coreloop's queue. When done,
Me@55 359 * write FALSE to the gate closed var.
Me@55 360 *
Me@55 361 */
Me@55 362 void inline
Me@55 363 gateProtected_stealWorkInto( SchedSlot *currSlot,
Me@55 364 VMSQueueStruc *myReadyToAnimateQ,
Me@55 365 VirtProcr *masterPr )
Me@55 366 {
Me@55 367 VirtProcr *stolenPr;
Me@55 368 int32 coreIdx, i, haveAVictim, gotLock;
Me@55 369 VMSQueueStruc *victimsQ;
Me@55 370
Me@55 371 volatile GateStruc *vicGate;
Me@55 372 int32 coreMightBeInProtected;
Me@55 373
Me@55 374
Me@55 375
Me@55 376 //see if any other cores have work available to steal
Me@55 377 haveAVictim = FALSE;
Me@55 378 coreIdx = masterPr->coreAnimatedBy;
Me@55 379 for( i = 0; i < NUM_CORES -1; i++ )
Me@55 380 {
Me@55 381 if( coreIdx >= NUM_CORES -1 )
Me@55 382 { coreIdx = 0;
Me@55 383 }
Me@55 384 else
Me@55 385 { coreIdx++;
Me@55 386 }
Me@55 387 victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
Me@55 388 if( numInVMSQ( victimsQ ) > 0 )
Me@55 389 { haveAVictim = TRUE;
Me@55 390 vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ];
Me@55 391 break;
Me@55 392 }
Me@55 393 }
Me@55 394 if( !haveAVictim ) return; //no work to steal, exit
Me@55 395
Me@55 396 //have a victim core, now get the stealer-lock
Me@55 397 gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock),
Me@55 398 UNLOCKED, LOCKED );
Me@55 399 if( !gotLock ) return; //go back to core loop, which will re-start master
Me@55 400
Me@55 401
Me@55 402 //====== Start Gate-protection =======
Me@55 403 vicGate->gateClosed = TRUE;
Me@55 404 coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress;
Me@55 405 while( coreMightBeInProtected )
Me@55 406 { //wait until sure
Me@55 407 if( vicGate->preGateProgress == vicGate->waitProgress )
Me@55 408 coreMightBeInProtected = FALSE;
Me@55 409 if( vicGate->preGateProgress == vicGate->exitProgress )
Me@55 410 coreMightBeInProtected = FALSE;
Me@55 411 }
Me@55 412
Me@55 413 stolenPr = readVMSQ ( victimsQ );
Me@55 414
Me@55 415 vicGate->gateClosed = FALSE;
Me@55 416 //======= End Gate-protection =======
Me@55 417
Me@55 418
Me@119 419 if( stolenPr != NULL ) //victim could have been in protected and took it
Me@55 420 { currSlot->procrAssignedToSlot = stolenPr;
Me@55 421 stolenPr->schedSlot = currSlot;
Me@55 422 currSlot->needsProcrAssigned = FALSE;
Me@55 423
Me@55 424 writeVMSQ( stolenPr, myReadyToAnimateQ );
Me@55 425 }
Me@55 426
Me@55 427 //unlock the work stealing lock
Me@55 428 _VMSMasterEnv->workStealingLock = UNLOCKED;
Me@55 429 }