VMS/VMS_Implementations/VMS_impls/VMS__MC_shared_impl: df00af7eb307 VMS.c

view VMS.c @ 212:df00af7eb307

try 40 cores

author	Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date	Fri, 09 Mar 2012 18:58:33 +0100
parents	f6d81915512c
children

line source

2 * Copyright 2010 OpenSourceStewardshipFoundation

4 * Licensed under BSD

7 #include <stdio.h>

8 #include <stdlib.h>

9 #include <string.h>

10 #include <malloc.h>

11 #include <inttypes.h>

12 #include <sys/time.h>

14 #include "VMS.h"

15 #include "ProcrContext.h"

16 #include "../../C_Libraries/Queue_impl/PrivateQueue.h"

17 #include "../../C_Libraries/Histogram/Histogram.h"

19 #include <unistd.h>

20 #include <fcntl.h>

21 #include <linux/types.h>

22 #include <linux/perf_event.h>

23 #include <errno.h>

24 #include <sys/syscall.h>

25 #include <linux/prctl.h>

28 #define thdAttrs NULL

30 //===========================================================================

31 void

32 shutdownFn( void *dummy, VirtProcr *dummy2 );

34 SchedSlot **

35 create_sched_slots();

37 void

38 create_masterEnv();

40 void

41 create_the_coreLoop_OS_threads();

43 MallocProlog *

44 create_free_list();

46 void

47 endOSThreadFn( void *initData, VirtProcr *animatingPr );

49 pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER;

50 pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER;

52 //===========================================================================

54 /*Setup has two phases:

55 * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts

56 * the master virt procr into the work-queue, ready for first "call"

57 * 2) Semantic layer then does its own init, which creates the seed virt

58 * procr inside the semantic layer, ready to schedule it when

59 * asked by the first run of the masterLoop.

61 *This part is bit weird because VMS really wants to be "always there", and

62 * have applications attach and detach.. for now, this VMS is part of

63 * the app, so the VMS system starts up as part of running the app.

65 *The semantic layer is isolated from the VMS internals by making the

66 * semantic layer do setup to a state that it's ready with its

67 * initial virt procrs, ready to schedule them to slots when the masterLoop

68 * asks. Without this pattern, the semantic layer's setup would

69 * have to modify slots directly to assign the initial virt-procrs, and put

70 * them into the readyToAnimateQ itself, breaking the isolation completely.

73 *The semantic layer creates the initial virt procr(s), and adds its

74 * own environment to masterEnv, and fills in the pointers to

75 * the requestHandler and slaveScheduler plug-in functions

78 /*This allocates VMS data structures, populates the master VMSProc,

79 * and master environment, and returns the master environment to the semantic

80 * layer.

82 void

83 VMS__init()

85 create_masterEnv();

86 create_the_coreLoop_OS_threads();

89 #ifdef SEQUENTIAL

91 /*To initialize the sequential version, just don't create the threads

93 void

94 VMS__init_Seq()

96 create_masterEnv();

99 #endif

101 void

102 create_masterEnv()

103 { MasterEnv *masterEnv;

104 VMSQueueStruc **readyToAnimateQs;

105 int coreIdx;

106 VirtProcr **masterVPs;

107 SchedSlot ***allSchedSlots; //ptr to array of ptrs

110 //Make the master env, which holds everything else

111 _VMSMasterEnv = malloc( sizeof(MasterEnv) );

113 //Very first thing put into the master env is the free-list, seeded

114 // with a massive initial chunk of memory.

115 //After this, all other mallocs are VMS__malloc.

116 _VMSMasterEnv->freeListHead = VMS_ext__create_free_list();

119 //============================= MEASUREMENT STUFF ========================

120 #ifdef MEAS__TIME_MALLOC

121 _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 100, 0, 30,

122 "malloc_time_hist");

123 _VMSMasterEnv->freeTimeHist = makeFixedBinHistExt( 100, 0, 30,

124 "free_time_hist");

125 #endif

126 #ifdef MEAS__TIME_PLUGIN

127 _VMSMasterEnv->reqHdlrLowTimeHist = makeFixedBinHistExt( 100, 0, 200,

128 "plugin_low_time_hist");

129 _VMSMasterEnv->reqHdlrHighTimeHist = makeFixedBinHistExt( 100, 0, 200,

130 "plugin_high_time_hist");

131 #endif

132 //========================================================================

134 //===================== Only VMS__malloc after this ====================

135 masterEnv = (MasterEnv*)_VMSMasterEnv;

137 //Make a readyToAnimateQ for each core loop

138 readyToAnimateQs = VMS__malloc( NUM_CORES * sizeof(VMSQueueStruc *) );

139 masterVPs = VMS__malloc( NUM_CORES * sizeof(VirtProcr *) );

141 //One array for each core, 3 in array, core's masterVP scheds all

142 allSchedSlots = VMS__malloc( NUM_CORES * sizeof(SchedSlot *) );

144 _VMSMasterEnv->numProcrsCreated = 0; //used by create procr

145 for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )

146 {

147 readyToAnimateQs[ coreIdx ] = makeVMSQ();

148

149 //Q: should give masterVP core-specific info as its init data?

150 masterVPs[ coreIdx ] = VMS__create_procr( (VirtProcrFnPtr)&masterLoop, (void*)masterEnv );

151 masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx;

152 masterVPs[ coreIdx ]->isMasterVP = TRUE;

153 allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core

154 _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0;

155 _VMSMasterEnv->workStealingGates[ coreIdx ] = NULL;

156 }

157 _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs;

158 _VMSMasterEnv->masterVPs = masterVPs;

159 _VMSMasterEnv->masterLock = UNLOCKED;

160 _VMSMasterEnv->allSchedSlots = allSchedSlots;

161 _VMSMasterEnv->workStealingLock = UNLOCKED;

164 //Aug 19, 2010: no longer need to place initial masterVP into queue

165 // because coreLoop now controls -- animates its masterVP when no work

168 //============================= MEASUREMENT STUFF ========================

169 #ifdef STATS__TURN_ON_PROBES

170 _VMSMasterEnv->dynIntervalProbesInfo =

171 makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->intervalProbes), 200);

173 _VMSMasterEnv->probeNameHashTbl = makeHashTable( 1000, &VMS__free );

175 //put creation time directly into master env, for fast retrieval

176 struct timeval timeStamp;

177 gettimeofday( &(timeStamp), NULL);

178 _VMSMasterEnv->createPtInSecs =

179 timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0);

180 #endif

181 #ifdef MEAS__TIME_MASTER_LOCK

182 _VMSMasterEnv->masterLockLowTimeHist = makeFixedBinHist( 50, 0, 2,

183 "master lock low time hist");

184 _VMSMasterEnv->masterLockHighTimeHist = makeFixedBinHist( 50, 0, 100,

185 "master lock high time hist");

186 #endif

188 MakeTheMeasHists();

191 #ifdef DETECT_LOOP_GRAPH

193 #endif

195 #ifdef MEAS__PERF_COUNTERS

196 struct perf_event_attr hw_event;

197 memset(&hw_event,0,sizeof(hw_event));

198 hw_event.type = PERF_TYPE_HARDWARE;

199 hw_event.size = sizeof(hw_event);

200 hw_event.disabled = 1;

201 hw_event.freq = 0;

202 hw_event.inherit = 1; /* children inherit it */

203 hw_event.pinned = 1; /* must always be on PMU */

204 hw_event.exclusive = 0; /* only group on PMU */

205 hw_event.exclude_user = 0; /* don't count user */

206 hw_event.exclude_kernel = 0; /* ditto kernel */

207 hw_event.exclude_hv = 0; /* ditto hypervisor */

208 hw_event.exclude_idle = 0; /* don't count when idle */

209 hw_event.mmap = 0; /* include mmap data */

210 hw_event.comm = 0; /* include comm data */

213 for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )

214 {

215 hw_event.config = 0x0000000000000000; //cycles

216 _VMSMasterEnv->cycles_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event,

217 0,//pid_t pid,

218 coreIdx,//int cpu,

219 -1,//int group_fd,

220 0//unsigned long flags

222 if (_VMSMasterEnv->cycles_counter_fd[coreIdx]<0){

223 fprintf(stderr,"On core %d: ",coreIdx);

224 perror("Failed to open cycles counter");

225 }

226 hw_event.config = 0x0000000000000001; //instrs

227 _VMSMasterEnv->instrs_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event,

228 0,//pid_t pid,

229 coreIdx,//int cpu,

230 -1,//int group_fd,

231 0//unsigned long flags

233 if (_VMSMasterEnv->instrs_counter_fd[coreIdx]<0){

234 fprintf(stderr,"On core %d: ",coreIdx);

235 perror("Failed to open instrs counter");

236 }

237 }

238 //uint64 tmpc,tmpi;

239 //saveCyclesAndInstrs(0,tmpc,tmpi);

240 //printf("Start: cycles = %llu, instrs = %llu\n",tmpc,tmpi);

241 #endif

243 //========================================================================

247 SchedSlot **

248 create_sched_slots()

249 { SchedSlot **schedSlots;

250 int i;

252 schedSlots = VMS__malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );

254 for( i = 0; i < NUM_SCHED_SLOTS; i++ )

255 {

256 schedSlots[i] = VMS__malloc( sizeof(SchedSlot) );

258 //Set state to mean "handling requests done, slot needs filling"

259 schedSlots[i]->workIsDone = FALSE;

260 schedSlots[i]->needsProcrAssigned = TRUE;

261 }

262 return schedSlots;

266 void

267 freeSchedSlots( SchedSlot **schedSlots )

268 { int i;

269 for( i = 0; i < NUM_SCHED_SLOTS; i++ )

270 {

271 VMS__free( schedSlots[i] );

272 }

273 VMS__free( schedSlots );

277 void

278 create_the_coreLoop_OS_threads()

280 //========================================================================

281 // Create the Threads

282 int coreIdx, retCode;

284 //Need the threads to be created suspended, and wait for a signal

285 // before proceeding -- gives time after creating to initialize other

286 // stuff before the coreLoops set off.

287 _VMSMasterEnv->setupComplete = 0;

289 //Make the threads that animate the core loops

290 for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )

291 { coreLoopThdParams[coreIdx] = VMS__malloc( sizeof(ThdParams) + CACHE_LINE ); //make sure there is no false sharing

292 coreLoopThdParams[coreIdx]->coreNum = coreIdx;

293 coreLoopThdParams[coreIdx]->sent_ctr = 0;

294 coreLoopThdParams[coreIdx]->ret_tsc = 0;

296 retCode =

297 pthread_create( &(coreLoopThdHandles[coreIdx]),

298 thdAttrs,

299 &coreLoop,

300 (void *)(coreLoopThdParams[coreIdx]) );

301 if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);}

302 }

303 prctl(PR_TASK_PERF_EVENTS_ENABLE);

306 /*Semantic layer calls this when it want the system to start running..

308 *This starts the core loops running then waits for them to exit.

310 void

311 VMS__start_the_work_then_wait_until_done()

312 { int coreIdx;

313 //Start the core loops running

315 //tell the core loop threads that setup is complete

316 //get lock, to lock out any threads still starting up -- they'll see

317 // that setupComplete is true before entering while loop, and so never

318 // wait on the condition

319 pthread_mutex_lock( &suspendLock );

320 _VMSMasterEnv->setupComplete = 1;

321 pthread_mutex_unlock( &suspendLock );

322 pthread_cond_broadcast( &suspend_cond );

325 //wait for all to complete

326 for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )

327 {

328 pthread_join( coreLoopThdHandles[coreIdx], NULL );

329 }

331 //NOTE: do not clean up VMS env here -- semantic layer has to have

332 // a chance to clean up its environment first, then do a call to free

333 // the Master env and rest of VMS locations

336 #ifdef SEQUENTIAL

337 /*Only difference between version with an OS thread pinned to each core and

338 * the sequential version of VMS is VMS__init_Seq, this, and coreLoop_Seq.

340 void

341 VMS__start_the_work_then_wait_until_done_Seq()

343 //Instead of un-suspending threads, just call the one and only

344 // core loop (sequential version), in the main thread.

345 coreLoop_Seq( NULL );

346 flushRegisters();

349 #endif

351 inline VirtProcr *

352 VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData )

353 { VirtProcr *newPr;

354 void *stackLocs;

356 newPr = VMS__malloc( sizeof(VirtProcr) );

357 stackLocs = VMS__malloc( VIRT_PROCR_STACK_SIZE );

358 if( stackLocs == 0 )

359 { perror("VMS__malloc stack"); exit(1); }

361 return create_procr_helper( newPr, fnPtr, initialData, stackLocs );

364 /* "ext" designates that it's for use outside the VMS system -- should only

365 * be called from main thread or other thread -- never from code animated by

366 * a VMS virtual processor.

368 inline VirtProcr *

369 VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData )

370 { VirtProcr *newPr;

371 char *stackLocs;

373 newPr = malloc( sizeof(VirtProcr) );

374 stackLocs = malloc( VIRT_PROCR_STACK_SIZE );

375 if( stackLocs == 0 )

376 { perror("malloc stack"); exit(1); }

378 return create_procr_helper( newPr, fnPtr, initialData, stackLocs );

382 /*Anticipating multi-tasking

384 void *

385 VMS__give_sem_env_for( VirtProcr *animPr )

387 return _VMSMasterEnv->semanticEnv;

389 //===========================================================================

390 /*there is a label inside this function -- save the addr of this label in

391 * the callingPr struc, as the pick-up point from which to start the next

392 * work-unit for that procr. If turns out have to save registers, then

393 * save them in the procr struc too. Then do assembly jump to the CoreLoop's

394 * "done with work-unit" label. The procr struc is in the request in the

395 * slave that animated the just-ended work-unit, so all the state is saved

396 * there, and will get passed along, inside the request handler, to the

397 * next work-unit for that procr.

399 void

400 VMS__suspend_procr( VirtProcr *animatingPr )

403 //The request to master will cause this suspended virt procr to get

404 // scheduled again at some future point -- to resume, core loop jumps

405 // to the resume point (below), which causes restore of saved regs and

406 // "return" from this call.

407 //animatingPr->nextInstrPt = &&ResumePt;

409 //return ownership of the virt procr and sched slot to Master virt pr

410 animatingPr->schedSlot->workIsDone = TRUE;

412 //=========================== Measurement stuff ========================

413 #ifdef MEAS__TIME_STAMP_SUSP

414 //record time stamp: compare to time-stamp recorded below

415 saveLowTimeStampCountInto( animatingPr->preSuspTSCLow );

416 #endif

417 #ifdef MEAS__PERF_COUNTERS

418 //start work

419 uint64 cycles,instrs;

420 saveCyclesAndInstrs(animatingPr->coreAnimatedBy,cycles, instrs);

421 (*(_VMSMasterEnv->counterHandler))(HwResponderInvocation_start,animatingPr->procrID,animatingPr->numTimesScheduled,animatingPr,cycles,instrs);

422 #endif

423 //=======================================================================

425 switchToCoreLoop(animatingPr);

426 flushRegisters();

428 //=======================================================================

430 #ifdef MEAS__TIME_STAMP_SUSP

431 //NOTE: only take low part of count -- do sanity check when take diff

432 saveLowTimeStampCountInto( animatingPr->postSuspTSCLow );

433 #endif

435 return;

440 /*For this implementation of VMS, it may not make much sense to have the

441 * system of requests for creating a new processor done this way.. but over

442 * the scope of single-master, multi-master, mult-tasking, OS-implementing,

443 * distributed-memory, and so on, this gives VMS implementation a chance to

444 * do stuff before suspend, in the AppVP, and in the Master before the plugin

445 * is called, as well as in the lang-lib before this is called, and in the

446 * plugin. So, this gives both VMS and language implementations a chance to

447 * intercept at various points and do order-dependent stuff.

448 *Having a standard VMSNewPrReqData struc allows the language to create and

449 * free the struc, while VMS knows how to get the newPr if it wants it, and

450 * it lets the lang have lang-specific data related to creation transported

451 * to the plugin.

453 void

454 VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr )

455 { VMSReqst req;

457 req.reqType = createReq;

458 req.semReqData = semReqData;

459 req.nextReqst = reqstingPr->requests;

460 reqstingPr->requests = &req;

462 VMS__suspend_procr( reqstingPr );

467 *This adds a request to dissipate, then suspends the processor so that the

468 * request handler will receive the request. The request handler is what

469 * does the work of freeing memory and removing the processor from the

470 * semantic environment's data structures.

471 *The request handler also is what figures out when to shutdown the VMS

472 * system -- which causes all the core loop threads to die, and returns from

473 * the call that started up VMS to perform the work.

475 *This form is a bit misleading to understand if one is trying to figure out

476 * how VMS works -- it looks like a normal function call, but inside it

477 * sends a request to the request handler and suspends the processor, which

478 * jumps out of the VMS__dissipate_procr function, and out of all nestings

479 * above it, transferring the work of dissipating to the request handler,

480 * which then does the actual work -- causing the processor that animated

481 * the call of this function to disappear and the "hanging" state of this

482 * function to just poof into thin air -- the virtual processor's trace

483 * never returns from this call, but instead the virtual processor's trace

484 * gets suspended in this call and all the virt processor's state disap-

485 * pears -- making that suspend the last thing in the virt procr's trace.

487 void

488 VMS__send_dissipate_req( VirtProcr *procrToDissipate )

489 { VMSReqst req;

491 req.reqType = dissipate;

492 req.nextReqst = procrToDissipate->requests;

493 procrToDissipate->requests = &req;

495 VMS__suspend_procr( procrToDissipate );

499 /* "ext" designates that it's for use outside the VMS system -- should only

500 * be called from main thread or other thread -- never from code animated by

501 * a VMS virtual processor.

503 *Use this version to dissipate VPs created outside the VMS system.

505 void

506 VMS_ext__dissipate_procr( VirtProcr *procrToDissipate )

508 //NOTE: initialData was given to the processor, so should either have

509 // been alloc'd with VMS__malloc, or freed by the level above animPr.

510 //So, all that's left to free here is the stack and the VirtProcr struc

511 // itself

512 //Note, should not stack-allocate initial data -- no guarantee, in

513 // general that creating processor will outlive ones it creates.

514 free( procrToDissipate->startOfStack );

515 free( procrToDissipate );

520 /*This call's name indicates that request is malloc'd -- so req handler

521 * has to free any extra requests tacked on before a send, using this.

523 * This inserts the semantic-layer's request data into standard VMS carrier

524 * request data-struct that is mallocd. The sem request doesn't need to

525 * be malloc'd if this is called inside the same call chain before the

526 * send of the last request is called.

528 *The request handler has to call VMS__free_VMSReq for any of these

530 inline void

531 VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData,

532 VirtProcr *callingPr )

533 { VMSReqst *req;

535 req = VMS__malloc( sizeof(VMSReqst) );

536 req->reqType = semantic;

537 req->semReqData = semReqData;

538 req->nextReqst = callingPr->requests;

539 callingPr->requests = req;

542 /*This inserts the semantic-layer's request data into standard VMS carrier

543 * request data-struct is allocated on stack of this call & ptr to it sent

544 * to plugin

545 *Then it does suspend, to cause request to be sent.

547 inline void

548 VMS__send_sem_request( void *semReqData, VirtProcr *callingPr )

549 { VMSReqst req;

551 req.reqType = semantic;

552 req.semReqData = semReqData;

553 req.nextReqst = callingPr->requests;

554 callingPr->requests = &req;

556 VMS__suspend_procr( callingPr );

560 inline void

561 VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr )

562 { VMSReqst req;

564 req.reqType = VMSSemantic;

565 req.semReqData = semReqData;

566 req.nextReqst = callingPr->requests; //gab any other preceeding

567 callingPr->requests = &req;

569 VMS__suspend_procr( callingPr );

575 VMSReqst *

576 VMS__take_next_request_out_of( VirtProcr *procrWithReq )

577 { VMSReqst *req;

579 req = procrWithReq->requests;

580 if( req == NULL ) return NULL;

582 procrWithReq->requests = procrWithReq->requests->nextReqst;

583 return req;

587 inline void *

588 VMS__take_sem_reqst_from( VMSReqst *req )

590 return req->semReqData;

595 /* This is for OS requests and VMS infrastructure requests, such as to create

596 * a probe -- a probe is inside the heart of VMS-core, it's not part of any

597 * language -- but it's also a semantic thing that's triggered from and used

598 * in the application.. so it crosses abstractions.. so, need some special

599 * pattern here for handling such requests.

600 * Doing this just like it were a second language sharing VMS-core.

602 * This is called from the language's request handler when it sees a request

603 * of type VMSSemReq

605 * TODO: Later change this, to give probes their own separate plugin & have

606 * VMS-core steer the request to appropriate plugin

607 * Do the same for OS calls -- look later at it..

609 void inline

610 VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv,

611 ResumePrFnPtr resumePrFnPtr )

612 { VMSSemReq *semReq;

613 IntervalProbe *newProbe;

615 semReq = req->semReqData;

617 newProbe = VMS__malloc( sizeof(IntervalProbe) );

618 newProbe->nameStr = VMS__strDup( semReq->nameStr );

619 newProbe->hist = NULL;

620 newProbe->schedChoiceWasRecorded = FALSE;

622 //This runs in masterVP, so no race-condition worries

623 newProbe->probeID =

624 addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo );

626 requestingPr->dataRetFromReq = newProbe;

628 (*resumePrFnPtr)( requestingPr, semEnv );

633 /*This must be called by the request handler plugin -- it cannot be called

634 * from the semantic library "dissipate processor" function -- instead, the

635 * semantic layer has to generate a request, and the plug-in calls this

636 * function.

637 *The reason is that this frees the virtual processor's stack -- which is

638 * still in use inside semantic library calls!

640 *This frees or recycles all the state owned by and comprising the VMS

641 * portion of the animating virtual procr. The request handler must first

642 * free any semantic data created for the processor that didn't use the

643 * VMS_malloc mechanism. Then it calls this, which first asks the malloc

644 * system to disown any state that did use VMS_malloc, and then frees the

645 * statck and the processor-struct itself.

646 *If the dissipated processor is the sole (remaining) owner of VMS__malloc'd

647 * state, then that state gets freed (or sent to recycling) as a side-effect

648 * of dis-owning it.

650 void

651 VMS__dissipate_procr( VirtProcr *animatingPr )

653 //dis-own all locations owned by this processor, causing to be freed

654 // any locations that it is (was) sole owner of

655 //TODO: implement VMS__malloc system, including "give up ownership"

658 //NOTE: initialData was given to the processor, so should either have

659 // been alloc'd with VMS__malloc, or freed by the level above animPr.

660 //So, all that's left to free here is the stack and the VirtProcr struc

661 // itself

662 //Note, should not stack-allocate initial data -- no guarantee, in

663 // general that creating processor will outlive ones it creates.

664 VMS__free( animatingPr->startOfStack );

665 VMS__free( animatingPr );

669 //TODO: look at architecting cleanest separation between request handler

670 // and master loop, for dissipate, create, shutdown, and other non-semantic

671 // requests. Issue is chain: one removes requests from AppVP, one dispatches

672 // on type of request, and one handles each type.. but some types require

673 // action from both request handler and master loop -- maybe just give the

674 // request handler calls like: VMS__handle_X_request_type

677 /*This is called by the semantic layer's request handler when it decides its

678 * time to shut down the VMS system. Calling this causes the core loop OS

679 * threads to exit, which unblocks the entry-point function that started up

680 * VMS, and allows it to grab the result and return to the original single-

681 * threaded application.

683 *The _VMSMasterEnv is needed by this shut down function, so the create-seed-

684 * and-wait function has to free a bunch of stuff after it detects the

685 * threads have all died: the masterEnv, the thread-related locations,

686 * masterVP any AppVPs that might still be allocated and sitting in the

687 * semantic environment, or have been orphaned in the _VMSWorkQ.

689 *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the

690 * locations it needs, and give ownership to masterVP. Then, they will be

691 * automatically freed.

693 *In here,create one core-loop shut-down processor for each core loop and put

694 * them all directly into the readyToAnimateQ.

695 *Note, this function can ONLY be called after the semantic environment no

696 * longer cares if AppVPs get animated after the point this is called. In

697 * other words, this can be used as an abort, or else it should only be

698 * called when all AppVPs have finished dissipate requests -- only at that

699 * point is it sure that all results have completed.

701 void

702 VMS__shutdown()

703 { int coreIdx;

704 VirtProcr *shutDownPr;

706 //create the shutdown processors, one for each core loop -- put them

707 // directly into the Q -- each core will die when gets one

708 for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )

709 { //Note, this is running in the master

710 shutDownPr = VMS__create_procr( &endOSThreadFn, NULL );

711 shutDownPr->isShutdownVP = TRUE;

712 writeVMSQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] );

713 }

714 #ifdef MEAS__PERF_COUNTERS

715 uint64 tmpc,tmpi;

716 saveCyclesAndInstrs(0,tmpc,tmpi);

717 //printf("End: cycles = %llu, instrs = %llu\n",tmpc,tmpi);

719 for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ){

720 close(_VMSMasterEnv->cycles_counter_fd[coreIdx]);

721 close(_VMSMasterEnv->instrs_counter_fd[coreIdx]);

722 }

724 #endif

728 /*Am trying to be cute, avoiding IF statement in coreLoop that checks for

729 * a special shutdown procr. Ended up with extra-complex shutdown sequence.

730 *This function has the sole purpose of setting the stack and framePtr

731 * to the coreLoop's stack and framePtr.. it does that then jumps to the

732 * core loop's shutdown point -- might be able to just call Pthread_exit

733 * from here, but am going back to the pthread's stack and setting everything

734 * up just as if it never jumped out, before calling pthread_exit.

735 *The end-point of core loop will free the stack and so forth of the

736 * processor that animates this function, (this fn is transfering the

737 * animator of the AppVP that is in turn animating this function over

738 * to core loop function -- note that this slices out a level of virtual

739 * processors).

741 void

742 endOSThreadFn( void *initData, VirtProcr *animatingPr )

744 #ifdef SEQUENTIAL

745 asmTerminateCoreLoopSeq(animatingPr);

746 #else

747 asmTerminateCoreLoop(animatingPr);

748 #endif

752 /*This is called from the startup & shutdown

754 void

755 VMS__cleanup_at_end_of_shutdown()

757 //unused

758 //VMSQueueStruc **readyToAnimateQs;

759 //int coreIdx;

760 //VirtProcr **masterVPs;

761 //SchedSlot ***allSchedSlots; //ptr to array of ptrs

763 //Before getting rid of everything, print out any measurements made

764 //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist );

765 //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile);

766 //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist );

768 prctl(PR_TASK_PERF_EVENTS_DISABLE);

769 #ifdef MEAS__TIME_PLUGIN

770 printHist( _VMSMasterEnv->reqHdlrLowTimeHist );

771 saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist );

772 printHist( _VMSMasterEnv->reqHdlrHighTimeHist );

773 saveHistToFile( _VMSMasterEnv->reqHdlrHighTimeHist );

774 freeHistExt( _VMSMasterEnv->reqHdlrLowTimeHist );

775 freeHistExt( _VMSMasterEnv->reqHdlrHighTimeHist );

776 #endif

777 #ifdef MEAS__TIME_MALLOC

778 printHist( _VMSMasterEnv->mallocTimeHist );

779 saveHistToFile( _VMSMasterEnv->mallocTimeHist );

780 printHist( _VMSMasterEnv->freeTimeHist );

781 saveHistToFile( _VMSMasterEnv->freeTimeHist );

782 freeHistExt( _VMSMasterEnv->mallocTimeHist );

783 freeHistExt( _VMSMasterEnv->freeTimeHist );

784 #endif

785 #ifdef MEAS__TIME_MASTER_LOCK

786 printHist( _VMSMasterEnv->masterLockLowTimeHist );

787 printHist( _VMSMasterEnv->masterLockHighTimeHist );

788 #endif

789 #ifdef MEAS__TIME_MASTER

790 printHist( _VMSMasterEnv->pluginTimeHist );

791 for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )

792 {

793 freeVMSQ( readyToAnimateQs[ coreIdx ] );

794 //master VPs were created external to VMS, so use external free

795 VMS__dissipate_procr( masterVPs[ coreIdx ] );

797 freeSchedSlots( allSchedSlots[ coreIdx ] );

798 }

799 #endif

800 #ifdef MEAS__TIME_STAMP_SUSP

801 printHist( _VMSMasterEnv->pluginTimeHist );

802 for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )

803 {

804 freeVMSQ( readyToAnimateQs[ coreIdx ] );

805 //master VPs were created external to VMS, so use external free

806 VMS__dissipate_procr( masterVPs[ coreIdx ] );

808 freeSchedSlots( allSchedSlots[ coreIdx ] );

809 }

810 #endif

812 //All the environment data has been allocated with VMS__malloc, so just

813 // free its internal big-chunk and all inside it disappear.

815 readyToAnimateQs = _VMSMasterEnv->readyToAnimateQs;

816 masterVPs = _VMSMasterEnv->masterVPs;

817 allSchedSlots = _VMSMasterEnv->allSchedSlots;

819 for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )

820 {

821 freeVMSQ( readyToAnimateQs[ coreIdx ] );

822 //master VPs were created external to VMS, so use external free

823 VMS__dissipate_procr( masterVPs[ coreIdx ] );

824

825 freeSchedSlots( allSchedSlots[ coreIdx ] );

826 }

828 VMS__free( _VMSMasterEnv->readyToAnimateQs );

829 VMS__free( _VMSMasterEnv->masterVPs );

830 VMS__free( _VMSMasterEnv->allSchedSlots );

832 //============================= MEASUREMENT STUFF ========================

833 #ifdef STATS__TURN_ON_PROBES

834 freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS__free_probe);

835 #endif

836 //========================================================================

838 //These are the only two that use system free

839 VMS_ext__free_free_list( _VMSMasterEnv->freeListHead );

840 free( (void *)_VMSMasterEnv );

844 //================================

847 /*Later, improve this -- for now, just exits the application after printing

848 * the error message.

850 void

851 VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData )

853 printf("%s",msgStr);

854 fflush(stdin);

855 exit(1);

858 __inline__ uint64_t rdtsc(void){

859 uint32_t lo, hi;

860 __asm__ __volatile__ ( // serialize

861 "xorl %%eax,%%eax \n cpuid"

862 ::: "%rax", "%rbx", "%rcx", "%rdx");

863 __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));

864 /* asm volatile("RDTSC;"

865 "movl %%eax, %0;"

866 "movl %%edx, %1;"

867 : "=m" (lo), "=m" (hi)

868 :

869 : "%eax", "%edx"

870 ); */

871 return (uint64_t)hi << 32 | lo;

874 uint64 tsc_offset_send(ThdParams* thisCoresThdParams, uint64 initval){

875 uint64 ret_tsc_curr; //local copy of coreLoopThdParams->ret_tsc

876 uint64 ret_tsc_prev;

877 uint64 local_before;

878 uint64 local_after;

879

880 ret_tsc_prev = initval;

881 ret_tsc_curr = initval;

882 local_before = rdtsc();

883 thisCoresThdParams->sent_ctr++;

884 while(ret_tsc_curr == ret_tsc_prev)

885 ret_tsc_curr = thisCoresThdParams->ret_tsc;

886 local_after = rdtsc();

887 ret_tsc_prev = ret_tsc_curr;

888

889 int i;

890 for(i=0;i<3;++i){

891 local_before = rdtsc();

892 thisCoresThdParams->sent_ctr++;

893 while(ret_tsc_curr == ret_tsc_prev)

894 ret_tsc_curr = thisCoresThdParams->ret_tsc;

895 local_after = rdtsc();

896 int64 midpoint = local_before + (local_after-local_before)/2;

897 int64 difference;

898 if (midpoint > ret_tsc_curr)

899 difference = midpoint - (int64)ret_tsc_curr;

900 else

901 difference = (int64)ret_tsc_curr - midpoint;

902 //printf("TSC: %llu (Core %d) = %llu (Core %d) // difference=%llu\n",midpoint,thisCoresThdParams->coreNum,ret_tsc_curr,thisCoresThdParams->coreNum + 1,difference);

903 ret_tsc_prev = ret_tsc_curr;

904 }

905

906 return ret_tsc_curr;

909 int tsc_offset_resp(ThdParams* sendCoresThdParams, int initialctrval){

911 int send_ctr_curr = initialctrval;

912 int send_ctr_prev = initialctrval;

914

915 while(send_ctr_curr == send_ctr_prev)

916 send_ctr_curr = sendCoresThdParams->sent_ctr;

917 sendCoresThdParams->ret_tsc = rdtsc();

918 send_ctr_prev = send_ctr_curr;

919

920 int i;

921 for(i=0;i<3;++i){

922 while(send_ctr_curr == send_ctr_prev)

923 send_ctr_curr = sendCoresThdParams->sent_ctr;

924 sendCoresThdParams->ret_tsc = rdtsc();

925 send_ctr_prev = send_ctr_curr;

926 }

927 return send_ctr_curr;