VMS/VMS_Implementations/VMS_impls/VMS__MC_shared_impl: 0b49fd35afc1 MasterLoop.c

view MasterLoop.c @ 135:0b49fd35afc1

distributed free working -app sends a VMSSemReqst to his Master which send a request to a different Master -Master send the request directly -The request structure is freed by the sender, when the request was handled There are still problems on shutdown. The shutdownVPs are all allocated by one Master which is likly to be terminated

author	Merten Sach <msach@mailbox.tu-berlin.de>
date	Fri, 16 Sep 2011 20:08:28 +0200
parents	a9b72021f053
children	99343ffe1918

line source

2 * Copyright 2010 OpenSourceStewardshipFoundation

4 * Licensed under BSD

9 #include <stdio.h>

10 #include <stddef.h>

12 #include "VMS.h"

13 #include "ProcrContext.h"

14 #include "scheduling.h"

15 #include "inter_VMS_requests.h"

16 #include "inter_VMS_requests_handler.h"

18 //===========================================================================

19 void inline

20 stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,

21 VirtProcr *masterPr);

23 void inline

24 handleInterMasterReq( InterMasterReqst *currReq, void *_semEnv,

25 VirtProcr *masterPr);

27 void inline

28 handleInterVMSCoreReq( InterVMSCoreReqst *currReq, VirtProcr *masterPr);

30 //===========================================================================

34 /*This code is animated by the virtual Master processor.

36 *Polls each sched slot exactly once, hands any requests made by a newly

37 * done slave to the "request handler" plug-in function

39 *Any slots that need a virt procr assigned are given to the "schedule"

40 * plug-in function, which tries to assign a virt procr (slave) to it.

42 *When all slots needing a processor have been given to the schedule plug-in,

43 * a fraction of the procrs successfully scheduled are put into the

44 * work queue, then a continuation of this function is put in, then the rest

45 * of the virt procrs that were successfully scheduled.

47 *The first thing the continuation does is busy-wait until the previous

48 * animation completes. This is because an (unlikely) continuation may

49 * sneak through queue before previous continuation is done putting second

50 * part of scheduled slaves in, which is the only race condition.

54 /*May 29, 2010 -- birth a Master during init so that first core loop to

55 * start running gets it and does all the stuff for a newly born --

56 * from then on, will be doing continuation, but do suspension self

57 * directly at end of master loop

58 *So VMS__init just births the master virtual processor same way it births

59 * all the others -- then does any extra setup needed and puts it into the

60 * work queue.

61 *However means have to make masterEnv a global static volatile.

64 *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this

65 * avoids the suspected bug in the system stack that causes bizarre faults

66 * at random places in the system code.

68 *So, this function is coupled to each of the MasterVPs, -- meaning this

69 * function can't rely on a particular stack and frame -- each MasterVP that

70 * animates this function has a different stack.

72 *At this point, the masterLoop does not write itself into the queue anymore,

73 * instead, the coreLoop acquires the masterLock when it has nothing to

74 * animate, and then animates its own masterLoop. However, still try to put

75 * several AppVPs into the queue to amortize the startup cost of switching

76 * to the MasterVP. Note, don't have to worry about latency of requests much

77 * because most requests generate work for same core -- only latency issue

78 * is case when other cores starved and one core's requests generate work

79 * for them -- so keep max in queue to 3 or 4..

81 void masterLoop( void *initData, VirtProcr *animatingPr )

83 int32 slotIdx, numSlotsFilled;

84 VirtProcr *schedVirtPr;

85 SchedSlot *currSlot, **schedSlots;

86 MasterEnv *masterEnv;

87 VMSQueueStruc *readyToAnimateQ;

89 SlaveScheduler slaveScheduler;

90 RequestHandler requestHandler;

91 void *semanticEnv;

93 int32 thisCoresIdx;

94 VirtProcr *masterPr;

95 volatile VirtProcr *volatileMasterPr;

97 volatileMasterPr = animatingPr;

98 masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp

99 masterEnv = (MasterEnv*)_VMSMasterEnv;

101 //First animation of each MasterVP will in turn animate this part

102 // of setup code.. (VP creator sets up the stack as if this function

103 // was called normally, but actually get here by jmp)

105 //Sept 2011

106 //Old code jumped directly to this point, but doesn't work on x64

107 // So, just make this an endless loop, and do assembly function at end

108 // that saves its own return addr, then jumps to core_loop.

109 while(1)

110 {

111 //============================= MEASUREMENT STUFF ========================

112 #ifdef MEAS__TIME_MASTER

113 //Total Master time includes one coreloop time -- just assume the core

114 // loop time is same for Master as is for AppVPs, even though it may be

115 // smaller due to higher predictability of the fixed jmp.

116 saveLowTimeStampCountInto( masterPr->startMasterTSCLow );

117 #endif

118 //========================================================================

120 //GCC may optimize so doesn't always re-define from frame-storage

121 thisCoresIdx = masterPr->coreAnimatedBy;

122 masterEnv->currentMasterProcrID = thisCoresIdx;

123 readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx];

124 schedSlots = masterEnv->allSchedSlots[thisCoresIdx];

126 requestHandler = masterEnv->requestHandler;

127 slaveScheduler = masterEnv->slaveScheduler;

128 semanticEnv = masterEnv->semanticEnv;

130 //First, check for requests from other MasterVPs, and handle them

131 InterMasterReqst* currReqst = masterEnv->interMasterRequestsFor[thisCoresIdx];

132 while(currReqst)

133 {

134 handleInterMasterReq( currReqst, semanticEnv, masterPr );

135 currReqst = currReqst->nextReqst;

136 }

137 masterEnv->interMasterRequestsFor[thisCoresIdx] = NULL;

139 //Second, check for own request that were handled for other MasterVPs

140 currReqst = masterEnv->interMasterRequestsSentBy[thisCoresIdx];

141 while(currReqst && currReqst->obsolete)

142 {

143 InterMasterReqst *nextReqst = currReqst->nextSentReqst;

144 VMS__free(currReqst);

145 currReqst = nextReqst;

146 }

147 masterEnv->interMasterRequestsSentBy[thisCoresIdx] = currReqst;

149 //Now, take care of the SlaveVPs

150 //Go through the slots -- if Slave there newly suspended, handle its request

151 // then, either way, ask assigner to fill each slot

152 numSlotsFilled = 0;

153 for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)

154 {

155 currSlot = schedSlots[ slotIdx ];

157 if( currSlot->workIsDone )

158 {

159 currSlot->workIsDone = FALSE;

160 currSlot->needsProcrAssigned = TRUE;

162 //process requests from slave to master

163 //====================== MEASUREMENT STUFF ===================

164 #ifdef MEAS__TIME_PLUGIN

165 int32 startStamp1, endStamp1;

166 saveLowTimeStampCountInto( startStamp1 );

167 #endif

168 //============================================================

169 (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv );

170 //====================== MEASUREMENT STUFF ===================

171 #ifdef MEAS__TIME_PLUGIN

172 saveLowTimeStampCountInto( endStamp1 );

173 addIntervalToHist( startStamp1, endStamp1,

174 _VMSMasterEnv->reqHdlrLowTimeHist );

175 addIntervalToHist( startStamp1, endStamp1,

176 _VMSMasterEnv->reqHdlrHighTimeHist );

177 #endif

178 //============================================================

179 }

180 if( currSlot->needsProcrAssigned )

181 { //give slot a new virt procr

182 schedVirtPr =

183 (*slaveScheduler)( semanticEnv, thisCoresIdx );

184

185 if( schedVirtPr != NULL )

186 { currSlot->procrAssignedToSlot = schedVirtPr;

187 schedVirtPr->schedSlot = currSlot;

188 schedVirtPr->coreAnimatedBy = thisCoresIdx;

189 currSlot->needsProcrAssigned = FALSE;

190 numSlotsFilled += 1;

191

192 writeVMSQ( schedVirtPr, readyToAnimateQ );

193 }

194 }

195 }

198 #ifdef USE_WORK_STEALING

199 //If no slots filled, means no more work, look for work to steal.

200 if( numSlotsFilled == 0 )

201 { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr );

202 }

203 #endif

206 #ifdef MEAS__TIME_MASTER

207 saveLowTimeStampCountInto( masterPr->endMasterTSCLow );

208 #endif

210 masterSwitchToCoreLoop(animatingPr);

211 flushRegisters();

212 }//while(1) MasterLoop

215 /*This is for inter-master communication. Either the master itself or

216 * the plugin sends one of these requests. Some are handled here, by the

217 * master_loop, others are handed off to the plugin.

219 void inline

220 handleInterMasterReq( InterMasterReqst *currReq, void *_semEnv,

221 VirtProcr *masterPr )

223

224 switch( currReq->reqType )

225 {

226 case destVMSCore:

227 handleInterVMSCoreReq( (InterVMSCoreReqst *)currReq, masterPr);

228 break;

229 case destPlugin:

230 _VMSMasterEnv->interPluginReqHdlr( ((InterPluginReqst *)currReq)->pluginReq,

231 _semEnv );

232 break;

233 default:

234 break;

235 }

238 void inline

239 handleInterVMSCoreReq( InterVMSCoreReqst *currReq, VirtProcr *masterPr )

241 switch( currReq->reqType )

242 {

243 case transfer_free_ptr:

244 handleTransferFree( currReq, masterPr );

245 currReq->obsolete = 1; //now the sender can free the structure

246 break;

247 default:

248 break;

249 }

252 /*Work Stealing Alg -- racy one

253 *This algorithm has a race condition -- the coreloops are accessing their

254 * own queues at the same time that this work-stealer on a different core

255 * is trying to.

256 *The second stealing alg, below, protects against this.

258 void inline

259 stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,

260 VirtProcr *masterPr )

262 VirtProcr *stolenPr;

263 int32 coreIdx, i;

264 VMSQueueStruc *currQ;

266 stolenPr = NULL;

267 coreIdx = masterPr->coreAnimatedBy;

268 for( i = 0; i < NUM_CORES -1; i++ )

269 {

270 if( coreIdx >= NUM_CORES -1 )

271 { coreIdx = 0;

272 }

273 else

274 { coreIdx++;

275 }

276 currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];

277 if( numInVMSQ( currQ ) > 0 )

278 { stolenPr = readVMSQ (currQ );

279 break;

280 }

281 }

283 if( stolenPr != NULL )

284 { currSlot->procrAssignedToSlot = stolenPr;

285 stolenPr->schedSlot = currSlot;

286 currSlot->needsProcrAssigned = FALSE;

288 writeVMSQ( stolenPr, readyToAnimateQ );

289 }

292 /*Work Stealing alg -- protected one

293 *This algorithm makes the common case fast. Make the coreloop passive,

294 * and show its progress. Make the stealer control a gate that coreloop

295 * has to pass.

296 *To avoid interference, only one stealer at a time. Use a global

297 * stealer-lock.

299 *The pattern is based on a gate -- stealer shuts the gate, then monitors

300 * to be sure any already past make it all the way out, before starting.

301 *So, have a "progress" measure just before the gate, then have two after it,

302 * one is in a "waiting room" outside the gate, the other is at the exit.

303 *Then, the stealer first shuts the gate, then checks the progress measure

304 * outside it, then looks to see if the progress measure at the exit is the

305 * same. If yes, it knows the protected area is empty 'cause no other way

306 * to get in and the last to get in also exited.

307 *If the progress measure at the exit is not the same, then the stealer goes

308 * into a loop checking both the waiting-area and the exit progress-measures

309 * until one of them shows the same as the measure outside the gate. Might

310 * as well re-read the measure outside the gate each go around, just to be

311 * sure. It is guaranteed that one of the two will eventually match the one

312 * outside the gate.

314 *Here's an informal proof of correctness:

315 *The gate can be closed at any point, and have only four cases:

316 * 1) coreloop made it past the gate-closing but not yet past the exit

317 * 2) coreloop made it past the pre-gate progress update but not yet past

318 * the gate,

319 * 3) coreloop is right before the pre-gate update

320 * 4) coreloop is past the exit and far from the pre-gate update.

322 * Covering the cases in reverse order,

323 * 4) is not a problem -- stealer will read pre-gate progress, see that it

324 * matches exit progress, and the gate is closed, so stealer can proceed.

325 * 3) stealer will read pre-gate progress just after coreloop updates it..

326 * so stealer goes into a loop until the coreloop causes wait-progress

327 * to match pre-gate progress, so then stealer can proceed

328 * 2) same as 3..

329 * 1) stealer reads pre-gate progress, sees that it's different than exit,

330 * so goes into loop until exit matches pre-gate, now it knows coreloop

331 * is not in protected and cannot get back in, so can proceed.

333 *Implementation for the stealer:

335 *First, acquire the stealer lock -- only cores with no work to do will

336 * compete to steal, so not a big performance penalty having only one --

337 * will rarely have multiple stealers in a system with plenty of work -- and

338 * in a system with little work, it doesn't matter.

340 *Note, have single-reader, single-writer pattern for all variables used to

341 * communicate between stealer and victims

343 *So, scan the queues of the core loops, until find non-empty. Each core

344 * has its own list that it scans. The list goes in order from closest to

345 * furthest core, so it steals first from close cores. Later can add

346 * taking info from the app about overlapping footprints, and scan all the

347 * others then choose work with the most footprint overlap with the contents

348 * of this core's cache.

350 *Now, have a victim want to take work from. So, shut the gate in that

351 * coreloop, by setting the "gate closed" var on its stack to TRUE.

352 *Then, read the core's pre-gate progress and compare to the core's exit

353 * progress.

354 *If same, can proceed to take work from the coreloop's queue. When done,

355 * write FALSE to gate closed var.

356 *If different, then enter a loop that reads the pre-gate progress, then

357 * compares to exit progress then to wait progress. When one of two

358 * matches, proceed. Take work from the coreloop's queue. When done,

359 * write FALSE to the gate closed var.

362 void inline

363 gateProtected_stealWorkInto( SchedSlot *currSlot,

364 VMSQueueStruc *myReadyToAnimateQ,

365 VirtProcr *masterPr )

367 VirtProcr *stolenPr;

368 int32 coreIdx, i, haveAVictim, gotLock;

369 VMSQueueStruc *victimsQ;

371 volatile GateStruc *vicGate;

372 int32 coreMightBeInProtected;

376 //see if any other cores have work available to steal

377 haveAVictim = FALSE;

378 coreIdx = masterPr->coreAnimatedBy;

379 for( i = 0; i < NUM_CORES -1; i++ )

380 {

381 if( coreIdx >= NUM_CORES -1 )

382 { coreIdx = 0;

383 }

384 else

385 { coreIdx++;

386 }

387 victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];

388 if( numInVMSQ( victimsQ ) > 0 )

389 { haveAVictim = TRUE;

390 vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ];

391 break;

392 }

393 }

394 if( !haveAVictim ) return; //no work to steal, exit

396 //have a victim core, now get the stealer-lock

397 gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock),

398 UNLOCKED, LOCKED );

399 if( !gotLock ) return; //go back to core loop, which will re-start master

402 //====== Start Gate-protection =======

403 vicGate->gateClosed = TRUE;

404 coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress;

405 while( coreMightBeInProtected )

406 { //wait until sure

407 if( vicGate->preGateProgress == vicGate->waitProgress )

408 coreMightBeInProtected = FALSE;

409 if( vicGate->preGateProgress == vicGate->exitProgress )

410 coreMightBeInProtected = FALSE;

411 }

413 stolenPr = readVMSQ ( victimsQ );

415 vicGate->gateClosed = FALSE;

416 //======= End Gate-protection =======

419 if( stolenPr != NULL ) //victim could have been in protected and took it

420 { currSlot->procrAssignedToSlot = stolenPr;

421 stolenPr->schedSlot = currSlot;

422 currSlot->needsProcrAssigned = FALSE;

424 writeVMSQ( stolenPr, myReadyToAnimateQ );

425 }

427 //unlock the work stealing lock

428 _VMSMasterEnv->workStealingLock = UNLOCKED;