# HG changeset patch # User Some Random Person # Date 1330899995 28800 # Node ID 0c83ea8adefce64c0b749801f9066ae4f837bc8c # Parent eaf7e4c58c9e0b9279d9cbf16e3a17c18b4a0bc1 Close to compilable version of common_ancestor -- still includes HW dep stuff diff -r eaf7e4c58c9e -r 0c83ea8adefc CoreLoop.c --- a/CoreLoop.c Wed Feb 22 11:39:12 2012 -0800 +++ b/CoreLoop.c Sun Mar 04 14:26:35 2012 -0800 @@ -6,7 +6,6 @@ #include "VMS.h" -#include "ProcrContext.h" #include #include @@ -15,14 +14,14 @@ #include #include -void *terminateCoreLoop(SlaveVP *currPr); +void *terminateCoreLoop(SlaveVP *currSlv); /*This is the loop that runs in the OS Thread pinned to each core - *Get virt procr from queue, - * save state of current animator, then load in state of virt procr, using - * jmp instr to switch the program-counter state -- making the virt procr + *Get Slv from queue, + * save state of current animator, then load in state of Slv, using + * jmp instr to switch the program-counter state -- making the Slv * the new animator. - *At some point, the virt procr will suspend itself by saving out its + *At some point, the Slv will suspend itself by saving out its * animator state (stack ptr, frame ptr, program counter) and switching * back to the OS Thread's animator state, which means restoring the * stack and frame and jumping to the core loop start point. @@ -34,7 +33,7 @@ { ThdParams *coreLoopThdParams; int thisCoresIdx; - SlaveVP *currPr; + SlaveVP *currSlv; VMSQueueStruc *readyToAnimateQ; cpu_set_t coreMask; //has 1 in bit positions of allowed cores int errorCode; @@ -78,7 +77,7 @@ if(errorCode){ printf("\nset affinity failure\n"); exit(0); } - //Save the return address in the SwitchVP function + //Save the return address in the SwitchSlv function saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt)); @@ -100,68 +99,55 @@ while( gate.gateClosed ) /*busy wait*/; } - currPr = (SlaveVP *) readVMSQ( readyToAnimateQ ); + currSlv = (SlaveVP *) readVMSQ( readyToAnimateQ ); //Set the coreloop's progress, so stealer can see it has made it out // of the protected area gate.exitProgress = gate.preGateProgress; #else - currPr = (SlaveVP *) readVMSQ( readyToAnimateQ ); + currSlv = (SlaveVP *) readVMSQ( readyToAnimateQ ); #endif - if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; + if( currSlv != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; else { - //============================= MEASUREMENT STUFF ===================== - #ifdef MEAS__TIME_MASTER_LOCK - int32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //===================================================================== + MEAS__Capture_Pre_Master_Lock_Point; + int tries = 0; int gotLock = 0; - while( currPr == NULL ) //if queue was empty, enter get masterLock loop + while( currSlv == NULL ) //if queue was empty, enter get masterLock loop { //queue was empty, so get master lock gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), UNLOCKED, LOCKED ); if( gotLock ) { //run own MasterVP -- jmps to coreLoops startPt when done - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; + currSlv = _VMSMasterEnv->masterVPs[thisCoresIdx]; if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); pthread_yield(); } _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; - break; //end while -- have a VP to animate now + break; //end while -- have a Slv to animate now } tries++; //if too many, means master on other core taking too long if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } } - //============================= MEASUREMENT STUFF ===================== - #ifdef MEAS__TIME_MASTER_LOCK - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, - _VMSMasterEnv->masterLockLowTimeHist ); - addIntervalToHist( startStamp, endStamp, - _VMSMasterEnv->masterLockHighTimeHist ); - #endif - //===================================================================== - + MEAS__Capture_Post_Master_Lock_Point; } - switchToVP(currPr); //The VPs return in here + switchToSlv(currSlv); //The Slvs return in here flushRegisters(); }//CoreLoop } void * -terminateCoreLoop(SlaveVP *currPr){ - //first free shutdown VP that jumped here -- it first restores the - // coreloop's stack, so addr of currPr in stack frame is still correct - VMS_int__dissipate_procr( currPr ); +terminateCoreLoop(SlaveVP *currSlv){ + //first free shutdown Slv that jumped here -- it first restores the + // coreloop's stack, so addr of currSlv in stack frame is still correct + VMS_int__dissipate_SlaveVP( currSlv ); pthread_exit( NULL ); } @@ -176,7 +162,7 @@ void * coreLoop_Seq( void *paramsIn ) { - SlaveVP *currPr; + SlaveVP *currSlv; VMSQueueStruc *readyToAnimateQ; ThdParams *coreLoopThdParams; @@ -186,7 +172,7 @@ // thisCoresIdx = coreLoopThdParams->coreNum; thisCoresIdx = 0; - //Save the return address in the SwitchVP function + //Save the return address in the SwitchSlv function saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt)); @@ -195,19 +181,19 @@ //_VMSWorkQ must be a global, static volatile var, so not kept in reg, // which forces reloading the pointer after each jmp to this point readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; - currPr = (SlaveVP *) readVMSQ( readyToAnimateQ ); - if( currPr == NULL ) + currSlv = (SlaveVP *) readVMSQ( readyToAnimateQ ); + if( currSlv == NULL ) { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) { printf("too many back to back MasterVP\n"); exit(1); } _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; + currSlv = _VMSMasterEnv->masterVPs[thisCoresIdx]; } else _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; - switchToVP( currPr ); + switchToSlv( currSlv ); flushRegisters(); } } diff -r eaf7e4c58c9e -r 0c83ea8adefc MasterLoop.c --- a/MasterLoop.c Wed Feb 22 11:39:12 2012 -0800 +++ b/MasterLoop.c Sun Mar 04 14:26:35 2012 -0800 @@ -10,13 +10,12 @@ #include #include "VMS.h" -#include "ProcrContext.h" //=========================================================================== void inline stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, - SlaveVP *masterPr ); + SlaveVP *masterVP ); //=========================================================================== @@ -27,13 +26,13 @@ *Polls each sched slot exactly once, hands any requests made by a newly * done slave to the "request handler" plug-in function * - *Any slots that need a virt procr assigned are given to the "schedule" - * plug-in function, which tries to assign a virt procr (slave) to it. + *Any slots that need a Slv assigned are given to the "schedule" + * plug-in function, which tries to assign a Slv (slave) to it. * *When all slots needing a processor have been given to the schedule plug-in, - * a fraction of the procrs successfully scheduled are put into the + * a fraction of the slaves successfully scheduled are put into the * work queue, then a continuation of this function is put in, then the rest - * of the virt procrs that were successfully scheduled. + * of the Slvs that were successfully scheduled. * *The first thing the continuation does is busy-wait until the previous * animation completes. This is because an (unlikely) continuation may @@ -46,7 +45,7 @@ * start running gets it and does all the stuff for a newly born -- * from then on, will be doing continuation, but do suspension self * directly at end of master loop - *So VMS__init just births the master virtual processor same way it births + *So VMS_WL__init just births the master virtual processor same way it births * all the others -- then does any extra setup needed and puts it into the * work queue. *However means have to make masterEnv a global static volatile the same way @@ -65,36 +64,36 @@ *At this point, the masterLoop does not write itself into the queue anymore, * instead, the coreLoop acquires the masterLock when it has nothing to * animate, and then animates its own masterLoop. However, still try to put - * several AppVPs into the queue to amortize the startup cost of switching + * several AppSlvs into the queue to amortize the startup cost of switching * to the MasterVP. Note, don't have to worry about latency of requests much * because most requests generate work for same core -- only latency issue * is case when other cores starved and one core's requests generate work * for them -- so keep max in queue to 3 or 4.. */ -void masterLoop( void *initData, SlaveVP *animatingPr ) +void masterLoop( void *initData, SlaveVP *animatingSlv ) { int32 slotIdx, numSlotsFilled; - SlaveVP *schedVirtPr; + SlaveVP *schedSlaveVP; SchedSlot *currSlot, **schedSlots; MasterEnv *masterEnv; VMSQueueStruc *readyToAnimateQ; - Sched_Assigner slaveScheduler; + Sched_Assigner slaveAssigner; RequestHandler requestHandler; void *semanticEnv; int32 thisCoresIdx; - SlaveVP *masterPr; - volatile SlaveVP *volatileMasterPr; + SlaveVP *masterVP; + volatile SlaveVP *volatileMasterVP; - volatileMasterPr = animatingPr; - masterPr = (SlaveVP*)volatileMasterPr; //used to force re-define after jmp + volatileMasterVP = animatingSlv; + masterVP = (SlaveVP*)volatileMasterVP; //used to force re-define after jmp //First animation of each MasterVP will in turn animate this part - // of setup code.. (VP creator sets up the stack as if this function + // of setup code.. (Slv creator sets up the stack as if this function // was called normally, but actually get here by jmp) //So, setup values about stack ptr, jmp pt and all that - //masterPr->resumeInstrPtr = &&masterLoopStartPt; + //masterVP->resumeInstrPtr = &&masterLoopStartPt; //Note, got rid of writing the stack and frame ptr up here, because @@ -108,25 +107,18 @@ //masterLoopStartPt: while(1){ - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MASTER - //Total Master time includes one coreloop time -- just assume the core - // loop time is same for Master as for AppVPs, even though it may be - // smaller due to higher predictability of the fixed jmp. - saveLowTimeStampCountInto( masterPr->startMasterTSCLow ); - #endif - //======================================================================== + MEAS__Capture_Pre_Master_Point masterEnv = (MasterEnv*)_VMSMasterEnv; //GCC may optimize so doesn't always re-define from frame-storage - masterPr = (SlaveVP*)volatileMasterPr; //just to make sure after jmp - thisCoresIdx = masterPr->coreAnimatedBy; + masterVP = (SlaveVP*)volatileMasterVP; //just to make sure after jmp + thisCoresIdx = masterVP->coreAnimatedBy; readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; requestHandler = masterEnv->requestHandler; - slaveScheduler = masterEnv->slaveSchedAssigner; + slaveAssigner = masterEnv->slaveAssigner; semanticEnv = masterEnv->semanticEnv; @@ -139,18 +131,18 @@ if( currSlot->workIsDone ) { currSlot->workIsDone = FALSE; - currSlot->needsProcrAssigned = TRUE; + currSlot->needsSlaveAssigned = TRUE; //process requests from slave to master //====================== MEASUREMENT STUFF =================== - #ifdef MEAS__TIME_PLUGIN + #ifdef MEAS__TURN_ON_PLUGIN_MEAS int32 startStamp1, endStamp1; saveLowTimeStampCountInto( startStamp1 ); #endif //============================================================ - (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv ); + (*requestHandler)( currSlot->slaveAssignedToSlot, semanticEnv ); //====================== MEASUREMENT STUFF =================== - #ifdef MEAS__TIME_PLUGIN + #ifdef MEAS__TURN_ON_PLUGIN_MEAS saveLowTimeStampCountInto( endStamp1 ); addIntervalToHist( startStamp1, endStamp1, _VMSMasterEnv->reqHdlrLowTimeHist ); @@ -159,18 +151,18 @@ #endif //============================================================ } - if( currSlot->needsProcrAssigned ) - { //give slot a new virt procr - schedVirtPr = - (*slaveScheduler)( semanticEnv, thisCoresIdx ); + if( currSlot->needsSlaveAssigned ) + { //give slot a new Slv + schedSlaveVP = + (*slaveAssigner)( semanticEnv, thisCoresIdx ); - if( schedVirtPr != NULL ) - { currSlot->procrAssignedToSlot = schedVirtPr; - schedVirtPr->schedSlot = currSlot; - currSlot->needsProcrAssigned = FALSE; + if( schedSlaveVP != NULL ) + { currSlot->slaveAssignedToSlot = schedSlaveVP; + schedSlaveVP->schedSlot = currSlot; + currSlot->needsSlaveAssigned = FALSE; numSlotsFilled += 1; - writeVMSQ( schedVirtPr, readyToAnimateQ ); + writeVMSQ( schedSlaveVP, readyToAnimateQ ); } } } @@ -179,16 +171,13 @@ #ifdef USE_WORK_STEALING //If no slots filled, means no more work, look for work to steal. if( numSlotsFilled == 0 ) - { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr ); + { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterVP ); } #endif + MEAS__Capture_Post_Master_Point; - #ifdef MEAS__TIME_MASTER - saveLowTimeStampCountInto( masterPr->endMasterTSCLow ); - #endif - - masterSwitchToCoreLoop(animatingPr); + masterSwitchToCoreLoop(animatingSlv); flushRegisters(); }//MasterLoop @@ -202,14 +191,14 @@ */ void inline stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, - SlaveVP *masterPr ) + SlaveVP *masterVP ) { - SlaveVP *stolenPr; + SlaveVP *stolenSlv; int32 coreIdx, i; VMSQueueStruc *currQ; - stolenPr = NULL; - coreIdx = masterPr->coreAnimatedBy; + stolenSlv = NULL; + coreIdx = masterVP->coreAnimatedBy; for( i = 0; i < NUM_CORES -1; i++ ) { if( coreIdx >= NUM_CORES -1 ) @@ -220,17 +209,17 @@ } currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; if( numInVMSQ( currQ ) > 0 ) - { stolenPr = readVMSQ (currQ ); + { stolenSlv = readVMSQ (currQ ); break; } } - if( stolenPr != NULL ) - { currSlot->procrAssignedToSlot = stolenPr; - stolenPr->schedSlot = currSlot; - currSlot->needsProcrAssigned = FALSE; + if( stolenSlv != NULL ) + { currSlot->slaveAssignedToSlot = stolenSlv; + stolenSlv->schedSlot = currSlot; + currSlot->needsSlaveAssigned = FALSE; - writeVMSQ( stolenPr, readyToAnimateQ ); + writeVMSQ( stolenSlv, readyToAnimateQ ); } } @@ -306,9 +295,9 @@ void inline gateProtected_stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *myReadyToAnimateQ, - SlaveVP *masterPr ) + SlaveVP *masterVP ) { - SlaveVP *stolenPr; + SlaveVP *stolenSlv; int32 coreIdx, i, haveAVictim, gotLock; VMSQueueStruc *victimsQ; @@ -319,7 +308,7 @@ //see if any other cores have work available to steal haveAVictim = FALSE; - coreIdx = masterPr->coreAnimatedBy; + coreIdx = masterVP->coreAnimatedBy; for( i = 0; i < NUM_CORES -1; i++ ) { if( coreIdx >= NUM_CORES -1 ) @@ -354,18 +343,18 @@ coreMightBeInProtected = FALSE; } - stolenPr = readVMSQ ( victimsQ ); + stolenSlv = readVMSQ ( victimsQ ); vicGate->gateClosed = FALSE; //======= End Gate-protection ======= - if( stolenPr != NULL ) //victim could have been in protected and taken - { currSlot->procrAssignedToSlot = stolenPr; - stolenPr->schedSlot = currSlot; - currSlot->needsProcrAssigned = FALSE; + if( stolenSlv != NULL ) //victim could have been in protected and taken + { currSlot->slaveAssignedToSlot = stolenSlv; + stolenSlv->schedSlot = currSlot; + currSlot->needsSlaveAssigned = FALSE; - writeVMSQ( stolenPr, myReadyToAnimateQ ); + writeVMSQ( stolenSlv, myReadyToAnimateQ ); } //unlock the work stealing lock diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS.h --- a/VMS.h Wed Feb 22 11:39:12 2012 -0800 +++ b/VMS.h Sun Mar 04 14:26:35 2012 -0800 @@ -20,6 +20,10 @@ #include #include +#ifndef _LANG_NAME_ +#define _LANG_NAME_ "" +#endif + //================= Defines: included from separate files ================= // // Note: ALL defines are in other files, none are in here @@ -44,11 +48,15 @@ typedef struct _GateStruc GateStruc; -typedef SlaveVP * (*Sched_Assigner) ( void *, int ); //semEnv, coreIdx -typedef void (*RequestHandler) ( SlaveVP *, void * ); //prWReqst, semEnv -typedef void (*TopLevelFnPtr) ( void *, SlaveVP * ); //initData, animPr -typedef void TopLevelFn ( void *, SlaveVP * ); //initData, animPr -typedef void (*ResumeVPFnPtr) ( SlaveVP *, void * ); +typedef SlaveVP * (*Sched_Assigner) ( void *, int ); //semEnv, coreIdx +typedef void (*RequestHandler) ( SlaveVP *, void * ); //prWReqst, semEnv +typedef void (*TopLevelFnPtr) ( void *, SlaveVP * ); //initData, animSlv +typedef void TopLevelFn ( void *, SlaveVP * ); //initData, animSlv +typedef void (*ResumeSlvFnPtr) ( SlaveVP *, void * ); + +//============================ HW Dependent Fns ================================ + +#include "VMS__HW_dependent.h" //============================= Statistics ================================== @@ -83,7 +91,7 @@ typedef struct { enum VMSSemReqstType reqType; - SlaveVP *requestingPr; + SlaveVP *requestingSlv; char *nameStr; //for create probe } VMSSemReq; @@ -94,12 +102,12 @@ struct _SchedSlot { int workIsDone; - int needsProcrAssigned; - SlaveVP *procrAssignedToSlot; + int needsSlaveAssigned; + SlaveVP *slaveAssignedToSlot; }; //SchedSlot -/*WARNING: re-arranging this data structure could cause VP switching +/*WARNING: re-arranging this data structure could cause Slv switching * assembly code to fail -- hard-codes offsets of fields */ struct _SlaveVP @@ -117,23 +125,11 @@ SchedSlot *schedSlot; VMSReqst *requests; - void *semanticData; //this livesUSE_GNU here for the life of VP - void *dataRetFromReq;//values returned from plugin to VP go here + void *semanticData; //this livesUSE_GNU here for the life of Slv + void *dataRetFromReq;//values returned from plugin to Slv go here //=========== MEASUREMENT STUFF ========== - #ifdef MEAS__TIME_STAMP_SUSP - uint32 preSuspTSCLow; - uint32 postSuspTSCLow; - #endif - #ifdef MEAS__TIME_MASTER /* in SlaveVP because multiple masterVPs*/ - uint32 startMasterTSCLow;USE_GNU - uint32 endMasterTSCLow; - #endif - #ifdef MEAS__TIME_2011_SYS - TSCountLowHigh startSusp; - uint64 totalSuspCycles; - uint32 numGoodSusp; - #endif + MEAS__Insert_Meas_Fields_into_Slave; //======================================== float64 createPtInSecs; //have space but don't use on some configs @@ -141,18 +137,13 @@ //SlaveVP -/*WARNING: re-arranging this data structure could cause VP-switching +/*WARNING: re-arranging this data structure could cause Slv-switching * assembly code to fail -- hard-codes offsets of fields * (because -O3 messes with things otherwise) */ typedef struct { - union{ //adds padding to put masterLock on its own cache-line to elim - // false sharing (masterLock is most-accessed var in VMS) - volatile int32 masterLock; - char padding[CACHE_LINE_SZ]; - } masterLockUnion; - Sched_Assigner slaveSchedAssigner; + Sched_Assigner slaveAssigner; RequestHandler requestHandler; SchedSlot ***allSchedSlots; @@ -161,17 +152,19 @@ void *semanticEnv; void *OSEventStruc; //for future, when add I/O to BLIS - MallocArrays *freeLists; + MallocArrays *freeLists; int32 amtOfOutstandingMem; //total currently allocated void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop int32 setupComplete; - //int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP + int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP + int32 masterLock __align_to_cacheline__; GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal int32 workStealingLock; - int32 numVPsCreated; //gives ordering to processor creation + int32 numSlavesCreated; //gives ordering to processor creation + int32 numSlavesAlive; //used to detect when to shutdown //=========== MEASUREMENT STUFF ============= IntervalProbe **intervalProbes; @@ -181,28 +174,12 @@ float64 createPtInSecs; Histogram **measHists; PrivDynArrayInfo *measHistsInfo; - #ifdef MEAS__TIME_PLUGIN - Histogram *reqHdlrLowTimeHist; - Histogram *reqHdlrHighTimeHist; - #endif - #ifdef MEAS__TIME_MALLOC - Histogram *mallocTimeHist; - Histogram *freeTimeHist; - #endif - #ifdef MEAS__TIME_MASTER_LOCK - Histogram *masterLockLowTimeHist; - Histogram *masterLockHighTimeHist; - #endif - #ifdef MEAS__TIME_2011_SYS - TSCountLowHigh startMaster; - uint64 totalMasterCycles; - uint32 numMasterAnimations; - TSCountLowHigh startReqHdlr; - uint64 totalPluginCycles; - uint32 numPluginAnimations; - uint64 cyclesTillStartMasterLoop; - TSCountLowHigh endMasterLoop; - #endif + MEAS__Insert_Susp_Meas_Fields_into_MasterEnv; + MEAS__Insert_Master_Meas_Fields_into_MasterEnv; + MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv; + MEAS__Insert_Malloc_Meas_Fields_into_MasterEnv; + MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv; + MEAS__Insert_System_Meas_Fields_into_MasterEnv; //========================================== } MasterEnv; @@ -237,28 +214,32 @@ } ThdParams; +//============================= Global Vars ================================ + pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state ThdParams *coreLoopThdParams [ NUM_CORES ]; pthread_mutex_t suspendLock; pthread_cond_t suspend_cond; - - -//============================= Global Vars ================================ - volatile MasterEnv *_VMSMasterEnv __align_to_cacheline__; - - //========================= Function Prototypes =========================== +/* MEANING OF WL PI SS int + * These indicate which places the function is safe to use. They stand for: + * WL: Wrapper Library + * PI: Plugin + * SS: Startup and Shutdown + * int: internal to the VMS implementation + */ //========== Setup and shutdown ========== void -VMS_int__init(); +VMS_SS__init(); -Fix seed-procr creation -- put box around language, have lang register stuff +//Fix; +/*seed-procr creation -- put box around language, have lang register stuff with VMS. have main program explicitly INIT Lang! -- makes more sense to C programmers -- makes it clear that there's a transition. @@ -289,77 +270,83 @@ lang's sync constructs -- VMS uses message system to establish tie-pt, each lang defines what a tie-point means to it.. (work with the diff semantics?) +*/ void -VMS_WL__start_the_work_then_wait_until_done(); +VMS_SS__start_the_work_then_wait_until_done(); void -VMS_int__shutdown(); +VMS_SS__shutdown(); void -VMS_int__cleanup_at_end_of_shutdown(); +VMS_SS__cleanup_at_end_of_shutdown(); //============== =============== inline SlaveVP * -VMS_int__create_procr( TopLevelFnPtr fnPtr, void *dataParam ); +VMS_int__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam ); +#define VMS_PI__create_slaveVP VMS_int__create_slaveVP +#define VMS_WL__create_slaveVP VMS_int__create_slaveVP inline void -VMS_int__point_slave_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr, +VMS_int__point_slaveVP_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr, void *dataParam); +#define VMS_PI__point_slaveVP_to_Fn VMS_int__point_slaveVP_to_Fn +#define VMS_WL__point_slaveVP_to_Fn VMS_int__point_slaveVP_to_Fn void -VMS_int__save_return_addr_into_ptd_to_loc(void *ptrToReturnAddrHoldingLoc); - -void -VMS_int__write_return_addr_from_ptd_to_loc(void *ptrToReturnAddrHoldingLoc); - -void -VMS_int__dissipate_procr( SlaveVP *procrToDissipate ); +VMS_int__dissipate_SlaveVP( SlaveVP *slaveToDissipate ); +#define VMS_PI__dissipate_SlaveVP VMS_int__dissipateSlaveVP +//From WL, dissipate a SlaveVP by sending a request //Use this to create processor inside entry point & other places outside // the VMS system boundary (IE, not run in slave nor Master) SlaveVP * -VMS_ext__create_procr( TopLevelFnPtr fnPtr, void *dataParam ); +VMS_ext__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam ); void -VMS_ext__dissipate_procr( SlaveVP *procrToDissipate ); +VMS_ext__dissipate_slaveVP( SlaveVP *slaveToDissipate ); void -VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstPr, VMSExcp *excpData ); +VMS_int__throw_exception( char *msgStr, SlaveVP *reqstSlv, VMSExcp *excpData ); +#define VMS_PI__throw_exception VMS_int__throw_exception +#define VMS_WL__throw_exception VMS_int__throw_exception void * -VMS_WL__give_sem_env_for( SlaveVP *animPr ); +VMS_int__give_sem_env_for( SlaveVP *animSlv ); +#define VMS_PI__give_sem_env_for VMS_int__give_sem_env_for +#define VMS_SS__give_sem_env_for VMS_int__give_sem_env_for +//No WL version -- not safe! if use in WL, be sure data rd & wr is stable //============== Request Related =============== void -VMS_int__suspend_procr( SlaveVP *callingPr ); +VMS_int__suspend_slaveVP_and_send_req( SlaveVP *callingSlv ); inline void -VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, SlaveVP *callingPr ); +VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, SlaveVP *callingSlv ); inline void -VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingPr ); +VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingSlv ); void -VMS_WL__send_create_procr_req( void *semReqData, SlaveVP *reqstingPr ); +VMS_WL__send_create_slaveVP_req( void *semReqData, SlaveVP *reqstingSlv ); void inline VMS_WL__send_dissipate_req( SlaveVP *prToDissipate ); inline void -VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingPr ); +VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingSlv ); VMSReqst * -VMS_PI__take_next_request_out_of( SlaveVP *procrWithReq ); +VMS_PI__take_next_request_out_of( SlaveVP *slaveWithReq ); inline void * VMS_PI__take_sem_reqst_from( VMSReqst *req ); void inline -VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingPr, void *semEnv, - ResumeVPFnPtr resumePrFnPtr ); +VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingSlv, void *semEnv, + ResumeSlvFnPtr resumeSlvFnPtr ); //======================== MEASUREMENT ====================== uint64 @@ -368,8 +355,6 @@ VMS_WL__give_num_plugin_animations(); - -#include "VMS__HW_dependent.h" #include "probes.h" #include "vutilities.h" diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__HW_dependent.c --- a/VMS__HW_dependent.c Wed Feb 22 11:39:12 2012 -0800 +++ b/VMS__HW_dependent.c Sun Mar 04 14:26:35 2012 -0800 @@ -12,7 +12,8 @@ *No need to save registers on old stack frame, because there's no old * animator state to return to */ -VMS_int__point_slave_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr, +inline void +VMS_int__point_slaveVP_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr, void *dataParam) { void *stackPtr; diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__HW_dependent.h --- a/VMS__HW_dependent.h Wed Feb 22 11:39:12 2012 -0800 +++ b/VMS__HW_dependent.h Sun Mar 04 14:26:35 2012 -0800 @@ -6,28 +6,75 @@ * */ -#ifndef _ProcrContext_H -#define _ProcrContext_H +#ifndef _VMS__HW_DEPENDENT_H +#define _VMS__HW_DEPENDENT_H #define _GNU_SOURCE -void saveCoreLoopReturnAddr(void **returnAddress); +void +saveCoreLoopReturnAddr(void **returnAddress); -void switchToVP(SlaveVP *nextProcr); +void +switchToSlv(SlaveVP *nextSlave); -void switchToCoreLoop(SlaveVP *nextProcr); +void +switchToCoreLoop(SlaveVP *nextSlave); -void masterSwitchToCoreLoop(SlaveVP *nextProcr); +void +masterSwitchToCoreLoop(SlaveVP *nextSlave); -void startUpTopLevelFn(); +void +startUpTopLevelFn(); -void *asmTerminateCoreLoop(SlaveVP *currPr); +void * +asmTerminateCoreLoop(SlaveVP *currSlv); #define flushRegisters() \ asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") inline SlaveVP * -create_procr_helper( SlaveVP *newPr, TopLevelFnPtr fnPtr, +create_slaveVP_helper( SlaveVP *newSlv, TopLevelFnPtr fnPtr, void *dataParam, void *stackLocs ); -#endif /* _ProcrContext_H */ +void +VMS_int__save_return_into_ptd_to_loc_then_do_ret(void *ptdToLoc); +void +VMS_int__return_to_addr_in_ptd_to_loc(void *ptdToLoc); + +//=================== Macros to Capture Measurements ====================== +// +//===== RDTSC wrapper ===== +//Also runs with x86_64 code +#define saveTSCLowHigh(lowHighIn) \ + asm volatile("RDTSC; \ + movl %%eax, %0; \ + movl %%edx, %1;" \ + /* outputs */ : "=m" (lowHighIn.lowHigh[0]), "=m" (lowHighIn.lowHigh[1])\ + /* inputs */ : \ + /* clobber */ : "%eax", "%edx" \ + ); + +#define saveTimeStampCountInto(low, high) \ + asm volatile("RDTSC; \ + movl %%eax, %0; \ + movl %%edx, %1;" \ + /* outputs */ : "=m" (low), "=m" (high)\ + /* inputs */ : \ + /* clobber */ : "%eax", "%edx" \ + ); + +#define saveLowTimeStampCountInto(low) \ + asm volatile("RDTSC; \ + movl %%eax, %0;" \ + /* outputs */ : "=m" (low) \ + /* inputs */ : \ + /* clobber */ : "%eax", "%edx" \ + ); + + //For code that calculates normalization-offset between TSC counts of + // different cores. +//#define NUM_TSC_ROUND_TRIPS 10 + + +#endif /* _VMS__HW_DEPENDENT_H */ + diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__HW_dependent.s --- a/VMS__HW_dependent.s Wed Feb 22 11:39:12 2012 -0800 +++ b/VMS__HW_dependent.s Sun Mar 04 14:26:35 2012 -0800 @@ -16,13 +16,13 @@ // the top-level function, which was pointed to by the stack-ptr .globl startUpTopLevelFn startUpTopLevelFn: - movq %rdi , %rsi #get second argument from first argument of switchVP + movq %rdi , %rsi #get second argument from first argument of switchSlv movq 0x08(%rsp), %rdi #get first argument from stack movq (%rsp) , %rax #get top-level function's addr from stack jmp *%rax #jump to the top-level function -//Switches form CoreLoop to VP ether a normal VP or the Master Loop -//switch to virt procr's stack and frame ptr then jump to virt procr fn +//Switches form CoreLoop to Slv ether a normal Slv or the Master Loop +//switch to Slv's stack and frame ptr then jump to Slv fn /* SlaveVP offsets: * 0x10 stackPtr * 0x18 framePtr @@ -34,15 +34,15 @@ * 0x48 coreLoopReturnPt * 0x54 masterLock */ -.globl switchToVP -switchToVP: +.globl switchToSlv +switchToSlv: #SlaveVP in %rdi movq %rsp , 0x38(%rdi) #save core loop stack pointer movq %rbp , 0x30(%rdi) #save core loop frame pointer movq 0x10(%rdi), %rsp #restore stack pointer movq 0x18(%rdi), %rbp #restore frame pointer movq 0x20(%rdi), %rax #get jmp pointer - jmp *%rax #jmp to VP + jmp *%rax #jmp to Slv coreLoopReturn: ret @@ -62,7 +62,7 @@ .globl switchToCoreLoop switchToCoreLoop: #SlaveVP in %rdi - movq $VPReturn , 0x20(%rdi) #store return address + movq $SlvReturn , 0x20(%rdi) #store return address movq %rsp , 0x10(%rdi) #save stack pointer movq %rbp , 0x18(%rdi) #save frame pointer movq 0x38(%rdi), %rsp #restore stack pointer @@ -71,7 +71,7 @@ movq (%rcx) , %rcx movq 0x48(%rcx), %rax #get CoreLoopStartPt jmp *%rax #jmp to CoreLoop -VPReturn: +SlvReturn: ret @@ -108,10 +108,10 @@ //Switch to terminateCoreLoop //therefor switch to coreLoop context from master context -// no need to call because the stack is already set up for switchVP -// and virtPr is in %rdi +// no need to call because the stack is already set up for switchSlv +// and Slv is in %rdi // and both functions have the same argument. -// do not save register of VP because this function will never return +// do not save register of Slv because this function will never return /* SlaveVP offsets: * 0x10 stackPtr * 0x18 framePtr @@ -134,7 +134,7 @@ /* * This one for the sequential version is special. It discards the current stack - * and returns directly from the coreLoop after VMS__dissipate_procr was called + * and returns directly from the coreLoop after VMS_WL__dissipate_slaveVP was called */ .globl asmTerminateCoreLoopSeq asmTerminateCoreLoopSeq: @@ -142,7 +142,7 @@ movq 0x38(%rdi), %rsp #restore stack pointer movq 0x30(%rdi), %rbp #restore frame pointer #argument is in %rdi - call VMS__dissipate_procr + call VMS_int__dissipate_slaveVP movq %rbp , %rsp #goto the coreLoops stack pop %rbp #restore the old framepointer ret #return from core loop @@ -150,18 +150,18 @@ //Assembly code takes the return addr off the stack and saves // into the loc pointed to by rdi. The return addr is at 0x8(%rbp) for 64bit -.globl asm_save_ret_to_singleton -VMS_int__save_return_addr_into_ptd_to_loc: +.globl VMS_int__save_return_into_ptd_to_loc_then_do_ret +VMS_int__save_return_into_ptd_to_loc_then_do_ret: movq 0x8(%rbp), %rax #get ret address, rbp is the same as in the calling function - movq %rax, (%rdi) #write ret addr to endInstrAddr field + movq %rax, (%rdi) #write ret addr into addr passed as param field ret //Assembly code changes the return addr on the stack to the one -// pointed to by the parameter. The stack's return addr is at 0x8(%rbp) -.globl asm_write_ret_from_singleton -VMS_int__write_return_addr_from_ptd_to_loc: - movq (%rdi), %rax #get return addr - movq %rax, 0x8(%rbp) #write return addr to the stack of the caller +// pointed to by the parameter, then returns. Stack's return addr is at 0x8(%rbp) +.globl VMS_int__return_to_addr_in_ptd_to_loc +VMS_int__return_to_addr_in_ptd_to_loc: + movq (%rdi), %rax #get return addr from addr passed as param + movq %rax, 0x8(%rbp) #write return addr to the stack of the caller ret diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__PI.c --- a/VMS__PI.c Wed Feb 22 11:39:12 2012 -0800 +++ b/VMS__PI.c Sun Mar 04 14:26:35 2012 -0800 @@ -17,13 +17,13 @@ /* */ VMSReqst * -VMS_PI__take_next_request_out_of( SlaveVP *procrWithReq ) +VMS_PI__take_next_request_out_of( SlaveVP *slaveWithReq ) { VMSReqst *req; - req = procrWithReq->requests; + req = slaveWithReq->requests; if( req == NULL ) return NULL; - procrWithReq->requests = procrWithReq->requests->nextReqst; + slaveWithReq->requests = slaveWithReq->requests->nextReqst; return req; } @@ -51,8 +51,8 @@ * Do the same for OS calls -- look later at it.. */ void inline -VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingPr, void *semEnv, - ResumeVPFnPtr resumePrFnPtr ) +VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingSlv, void *semEnv, + ResumeSlvFnPtr resumeSlvFnPtr ) { VMSSemReq *semReq; IntervalProbe *newProbe; @@ -67,9 +67,9 @@ newProbe->probeID = addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); - requestingPr->dataRetFromReq = newProbe; + requestingSlv->dataRetFromReq = newProbe; - (*resumePrFnPtr)( requestingPr, semEnv ); + (*resumeSlvFnPtr)( requestingSlv, semEnv ); } @@ -77,7 +77,7 @@ * the error message. */ void -VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstPr, VMSExcp *excpData ) +VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstSlv, VMSExcp *excpData ) { printf("%s",msgStr); fflush(stdin); diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__WL.c --- a/VMS__WL.c Wed Feb 22 11:39:12 2012 -0800 +++ b/VMS__WL.c Sun Mar 04 14:26:35 2012 -0800 @@ -14,38 +14,30 @@ #include "VMS.h" -/*Anticipating multi-tasking - */ -void * -VMS_WL__give_sem_env_for( SlaveVP *animPr ) - { - return _VMSMasterEnv->semanticEnv; - } - /*For this implementation of VMS, it may not make much sense to have the * system of requests for creating a new processor done this way.. but over * the scope of single-master, multi-master, mult-tasking, OS-implementing, * distributed-memory, and so on, this gives VMS implementation a chance to - * do stuff before suspend, in the AppVP, and in the Master before the plugin + * do stuff before suspend, in the SlaveVP, and in the Master before the plugin * is called, as well as in the lang-lib before this is called, and in the * plugin. So, this gives both VMS and language implementations a chance to * intercept at various points and do order-dependent stuff. *Having a standard VMSNewPrReqData struc allows the language to create and - * free the struc, while VMS knows how to get the newPr if it wants it, and + * free the struc, while VMS knows how to get the newSlv if it wants it, and * it lets the lang have lang-specific data related to creation transported * to the plugin. */ void -VMS_WL__send_create_procr_req( void *semReqData, SlaveVP *reqstingPr ) +VMS_WL__send_create_slaveVP_req( void *semReqData, SlaveVP *reqstingSlv ) { VMSReqst req; req.reqType = createReq; req.semReqData = semReqData; - req.nextReqst = reqstingPr->requests; - reqstingPr->requests = &req; + req.nextReqst = reqstingSlv->requests; + reqstingSlv->requests = &req; - VMS_int__suspend_procr( reqstingPr ); + VMS_int__suspend_slaveVP_and_send_req( reqstingSlv ); } @@ -61,24 +53,24 @@ *This form is a bit misleading to understand if one is trying to figure out * how VMS works -- it looks like a normal function call, but inside it * sends a request to the request handler and suspends the processor, which - * jumps out of the VMS__dissipate_procr function, and out of all nestings + * jumps out of the VMS_WL__dissipate_slaveVP function, and out of all nestings * above it, transferring the work of dissipating to the request handler, * which then does the actual work -- causing the processor that animated * the call of this function to disappear and the "hanging" state of this * function to just poof into thin air -- the virtual processor's trace * never returns from this call, but instead the virtual processor's trace * gets suspended in this call and all the virt processor's state disap- - * pears -- making that suspend the last thing in the virt procr's trace. + * pears -- making that suspend the last thing in the Slv's trace. */ void -VMS_WL__send_dissipate_req( SlaveVP *procrToDissipate ) +VMS_WL__send_dissipate_req( SlaveVP *slaveToDissipate ) { VMSReqst req; req.reqType = dissipate; - req.nextReqst = procrToDissipate->requests; - procrToDissipate->requests = &req; + req.nextReqst = slaveToDissipate->requests; + slaveToDissipate->requests = &req; - VMS_int__suspend_procr( procrToDissipate ); + VMS_int__suspend_slaveVP_and_send_req( slaveToDissipate ); } @@ -95,14 +87,14 @@ */ inline void VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, - SlaveVP *callingPr ) + SlaveVP *callingSlv ) { VMSReqst *req; req = VMS_int__malloc( sizeof(VMSReqst) ); req->reqType = semantic; req->semReqData = semReqData; - req->nextReqst = callingPr->requests; - callingPr->requests = req; + req->nextReqst = callingSlv->requests; + callingSlv->requests = req; } /*This inserts the semantic-layer's request data into standard VMS carrier @@ -111,28 +103,28 @@ *Then it does suspend, to cause request to be sent. */ inline void -VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingPr ) +VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingSlv ) { VMSReqst req; req.reqType = semantic; req.semReqData = semReqData; - req.nextReqst = callingPr->requests; - callingPr->requests = &req; + req.nextReqst = callingSlv->requests; + callingSlv->requests = &req; - VMS_int__suspend_procr( callingPr ); + VMS_int__suspend_slaveVP_and_send_req( callingSlv ); } inline void -VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingPr ) +VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingSlv ) { VMSReqst req; req.reqType = VMSSemantic; req.semReqData = semReqData; - req.nextReqst = callingPr->requests; //gab any other preceeding - callingPr->requests = &req; + req.nextReqst = callingSlv->requests; //gab any other preceeding + callingSlv->requests = &req; - VMS_int__suspend_procr( callingPr ); + VMS_int__suspend_slaveVP_and_send_req( callingSlv ); } diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__int.c --- a/VMS__int.c Wed Feb 22 11:39:12 2012 -0800 +++ b/VMS__int.c Sun Mar 04 14:26:35 2012 -0800 @@ -15,18 +15,18 @@ inline SlaveVP * -VMS_int__create_procr( TopLevelFnPtr fnPtr, void *dataParam ) - { SlaveVP *newPr; +VMS_int__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam ) + { SlaveVP *newSlv; void *stackLocs; - newPr = VMS_int__malloc( sizeof(SlaveVP) ); + newSlv = VMS_int__malloc( sizeof(SlaveVP) ); stackLocs = VMS_int__malloc( VIRT_PROCR_STACK_SIZE ); if( stackLocs == 0 ) { perror("VMS__malloc stack"); exit(1); } - _VMSMasterEnv->numSlaves += 1; + _VMSMasterEnv->numSlavesAlive += 1; - return create_procr_helper( newPr, fnPtr, dataParam, stackLocs ); + return create_slaveVP_helper( newSlv, fnPtr, dataParam, stackLocs ); } /* "ext" designates that it's for use outside the VMS system -- should only @@ -34,59 +34,49 @@ * a VMS virtual processor. */ inline SlaveVP * -VMS_ext__create_procr( TopLevelFnPtr fnPtr, void *dataParam ) - { SlaveVP *newPr; +VMS_ext__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam ) + { SlaveVP *newSlv; char *stackLocs; - newPr = malloc( sizeof(SlaveVP) ); + newSlv = malloc( sizeof(SlaveVP) ); stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); if( stackLocs == 0 ) { perror("malloc stack"); exit(1); } - return create_procr_helper( newPr, fnPtr, dataParam, stackLocs ); + _VMSMasterEnv->numSlavesAlive += 1; + + return create_slaveVP_helper(newSlv, fnPtr, dataParam, stackLocs); } //=========================================================================== /*there is a label inside this function -- save the addr of this label in - * the callingPr struc, as the pick-up point from which to start the next - * work-unit for that procr. If turns out have to save registers, then - * save them in the procr struc too. Then do assembly jump to the CoreLoop's - * "done with work-unit" label. The procr struc is in the request in the + * the callingSlv struc, as the pick-up point from which to start the next + * work-unit for that slave. If turns out have to save registers, then + * save them in the slave struc too. Then do assembly jump to the CoreLoop's + * "done with work-unit" label. The slave struc is in the request in the * slave that animated the just-ended work-unit, so all the state is saved * there, and will get passed along, inside the request handler, to the - * next work-unit for that procr. + * next work-unit for that slave. */ void -VMS_int__suspend_procr( SlaveVP *animatingPr ) +VMS_int__suspend_slaveVP_and_send_req( SlaveVP *animatingSlv ) { - //The request to master will cause this suspended virt procr to get + //The request to master will cause this suspended Slv to get // scheduled again at some future point -- to resume, core loop jumps // to the resume point (below), which causes restore of saved regs and // "return" from this call. - //animatingPr->resumeInstrPtr = &&ResumePt; + //animatingSlv->resumeInstrPtr = &&ResumePt; - //return ownership of the virt procr and sched slot to Master virt pr - animatingPr->schedSlot->workIsDone = TRUE; + //return ownership of the Slv and sched slot to Master virt pr + animatingSlv->schedSlot->workIsDone = TRUE; - //=========================== Measurement stuff ======================== - #ifdef MEAS__TIME_STAMP_SUSP - //record time stamp: compare to time-stamp recorded below - saveLowTimeStampCountInto( animatingPr->preSuspTSCLow ); - #endif - //======================================================================= - - switchToCoreLoop(animatingPr); + MEAS__Capture_Pre_Susp_Point; + switchToCoreLoop(animatingSlv); flushRegisters(); - - //======================================================================= - - #ifdef MEAS__TIME_STAMP_SUSP - //NOTE: only take low part of count -- do sanity check when take diff - saveLowTimeStampCountInto( animatingPr->postSuspTSCLow ); - #endif - + MEAS__Capture_Post_Susp_Point; + return; } @@ -95,19 +85,19 @@ * be called from main thread or other thread -- never from code animated by * a SlaveVP, nor from a masterVP. * - *Use this version to dissipate VPs created outside the VMS system. + *Use this version to dissipate Slvs created outside the VMS system. */ void -VMS_ext__dissipate_procr( SlaveVP *procrToDissipate ) +VMS_ext__dissipate_slaveVP( SlaveVP *slaveToDissipate ) { //NOTE: dataParam was given to the processor, so should either have - // been alloc'd with VMS__malloc, or freed by the level above animPr. + // been alloc'd with VMS__malloc, or freed by the level above animSlv. //So, all that's left to free here is the stack and the SlaveVP struc // itself //Note, should not stack-allocate the data param -- no guarantee, in // general that creating processor will outlive ones it creates. - free( procrToDissipate->startOfStack ); - free( procrToDissipate ); + free( slaveToDissipate->startOfStack ); + free( slaveToDissipate ); } @@ -130,26 +120,32 @@ * of dis-owning it. */ void -VMS_int__dissipate_procr( SlaveVP *animatingPr ) +VMS_int__dissipate_SlaveVP( SlaveVP *animatingSlv ) { //dis-own all locations owned by this processor, causing to be freed // any locations that it is (was) sole owner of -//TODO: implement VMS__malloc system, including "give up ownership" - _VMSMasterEnv->numSlaves -= 1; - if( _VMSMasterEnv->numSlaves == 0 ) + _VMSMasterEnv->numSlavesAlive -= 1; + if( _VMSMasterEnv->numSlavesAlive == 0 ) { //no more work, so shutdown - VMS_int__shutdown(); //note, creates 4 shut-down processors + VMS_SS__shutdown(); //note, creates 4 shut-down processors } //NOTE: dataParam was given to the processor, so should either have - // been alloc'd with VMS__malloc, or freed by the level above animPr. + // been alloc'd with VMS__malloc, or freed by the level above animSlv. //So, all that's left to free here is the stack and the SlaveVP struc // itself //Note, should not stack-allocate initial data -- no guarantee, in // general that creating processor will outlive ones it creates. - VMS_int__free( animatingPr->startOfStack ); - VMS_int__free( animatingPr ); + VMS_int__free( animatingSlv->startOfStack ); + VMS_int__free( animatingSlv ); } +/*Anticipating multi-tasking + */ +void * +VMS_int__give_sem_env_for( SlaveVP *animSlv ) + { + return _VMSMasterEnv->semanticEnv; + } diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__startup_and_shutdown.c --- a/VMS__startup_and_shutdown.c Wed Feb 22 11:39:12 2012 -0800 +++ b/VMS__startup_and_shutdown.c Sun Mar 04 14:26:35 2012 -0800 @@ -12,7 +12,7 @@ #include #include "VMS.h" -#include "VMS__HW_dependent.h" +//#include "VMS__HW_dependent.h" #define thdAttrs NULL @@ -34,7 +34,7 @@ create_free_list(); void -endOSThreadFn( void *initData, SlaveVP *animatingPr ); +endOSThreadFn( void *initData, SlaveVP *animatingSlv ); pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER; @@ -43,9 +43,9 @@ /*Setup has two phases: * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts - * the master virt procr into the work-queue, ready for first "call" + * the master Slv into the work-queue, ready for first "call" * 2) Semantic layer then does its own init, which creates the seed virt - * procr inside the semantic layer, ready to schedule it when + * slave inside the semantic layer, ready to schedule it when * asked by the first run of the masterLoop. * *This part is bit weird because VMS really wants to be "always there", and @@ -54,15 +54,15 @@ * *The semantic layer is isolated from the VMS internals by making the * semantic layer do setup to a state that it's ready with its - * initial virt procrs, ready to schedule them to slots when the masterLoop + * initial Slvs, ready to schedule them to slots when the masterLoop * asks. Without this pattern, the semantic layer's setup would * have to modify slots directly to assign the initial virt-procrs, and put * them into the readyToAnimateQ itself, breaking the isolation completely. * * - *The semantic layer creates the initial virt procr(s), and adds its + *The semantic layer creates the initial Slv(s), and adds its * own environment to masterEnv, and fills in the pointers to - * the requestHandler and slaveScheduler plug-in functions + * the requestHandler and slaveAssigner plug-in functions */ /*This allocates VMS data structures, populates the master VMSProc, @@ -70,7 +70,7 @@ * layer. */ void -VMS_int__init() +VMS_SS__init() { #ifdef SEQUENTIAL @@ -97,24 +97,12 @@ //Very first thing put into the master env is the free-list, seeded // with a massive initial chunk of memory. //After this, all other mallocs are VMS__malloc. - _VMSMasterEnv->freeListHead = VMS_ext__create_free_list(); + _VMSMasterEnv->freeLists = VMS_ext__create_free_list(); - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 100, 0, 30, - "malloc_time_hist"); - _VMSMasterEnv->freeTimeHist = makeFixedBinHistExt( 100, 0, 30, - "free_time_hist"); - #endif - #ifdef MEAS__TIME_PLUGIN - _VMSMasterEnv->reqHdlrLowTimeHist = makeFixedBinHistExt( 100, 0, 200, - "plugin_low_time_hist"); - _VMSMasterEnv->reqHdlrHighTimeHist = makeFixedBinHistExt( 100, 0, 200, - "plugin_high_time_hist"); - #endif - //======================================================================== - + MEAS__Make_Meas_Hists_for_Malloc_Meas; + MEAS__Make_Meas_Hists_for_Plugin_Meas; + //===================== Only VMS__malloc after this ==================== masterEnv = (MasterEnv*)_VMSMasterEnv; @@ -125,15 +113,15 @@ //One array for each core, 3 in array, core's masterVP scheds all allSchedSlots = VMS_int__malloc( NUM_CORES * sizeof(SchedSlot *) ); - _VMSMasterEnv->numSlaves = 0; //used to detect shut-down condition + _VMSMasterEnv->numSlavesAlive = 0; //used to detect shut-down condition - _VMSMasterEnv->numVPsCreated = 0; //used by create procr to set ID + _VMSMasterEnv->numSlavesCreated = 0; //used by create slave to set ID for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) { readyToAnimateQs[ coreIdx ] = makeVMSQ(); //Q: should give masterVP core-specific info as its init data? - masterVPs[ coreIdx ] = VMS_int__create_procr( (TopLevelFnPtr)&masterLoop, (void*)masterEnv ); + masterVPs[ coreIdx ] = VMS_int__create_slaveVP( (TopLevelFnPtr)&masterLoop, (void*)masterEnv ); masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx; allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; @@ -146,10 +134,6 @@ _VMSMasterEnv->workStealingLock = UNLOCKED; - //Aug 19, 2010: no longer need to place initial masterVP into queue - // because coreLoop now controls -- animates its masterVP when no work - - //============================= MEASUREMENT STUFF ======================== #ifdef STATS__TURN_ON_PROBES _VMSMasterEnv->dynIntervalProbesInfo = @@ -163,14 +147,10 @@ _VMSMasterEnv->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0); #endif - #ifdef MEAS__TIME_MASTER_LOCK - _VMSMasterEnv->masterLockLowTimeHist = makeFixedBinHist( 50, 0, 2, - "master lock low time hist"); - _VMSMasterEnv->masterLockHighTimeHist = makeFixedBinHist( 50, 0, 100, - "master lock high time hist"); - #endif - MakeTheMeasHists(); + MEAS__Make_Meas_Hists_for_Master_Lock_Meas + + MEAS__Make_Meas_Hists_for_Language(); //======================================================================== } @@ -187,7 +167,7 @@ //Set state to mean "handling requests done, slot needs filling" schedSlots[i]->workIsDone = FALSE; - schedSlots[i]->needsProcrAssigned = TRUE; + schedSlots[i]->needsSlaveAssigned = TRUE; } return schedSlots; } @@ -233,17 +213,17 @@ void -VMS_WL__register_request_handler( RequestHandler requestHandler ) +VMS_SS__register_request_handler( RequestHandler requestHandler ) { _VMSMasterEnv->requestHandler = requestHandler; } void -VMS_WL__register_sched_assigner( Sched_Assigner schedAssigner ) - { _VMSMasterEnv->slaveSchedAssigner = schedAssigner; +VMS_SS__register_sched_assigner( Sched_Assigner schedAssigner ) + { _VMSMasterEnv->slaveAssigner = schedAssigner; } -VMS_WL__register_semantic_env( void *semanticEnv ) +VMS_SS__register_semantic_env( void *semanticEnv ) { _VMSMasterEnv->semanticEnv = semanticEnv; } @@ -254,7 +234,7 @@ *Wrapper lib layer calls this when it wants the system to start running.. */ void -VMS_WL__start_the_work_then_wait_until_done() +VMS_SS__start_the_work_then_wait_until_done() { #ifdef SEQUENTIAL /*Only difference between version with an OS thread pinned to each core and @@ -293,7 +273,7 @@ //TODO: look at architecting cleanest separation between request handler // and master loop, for dissipate, create, shutdown, and other non-semantic -// requests. Issue is chain: one removes requests from AppVP, one dispatches +// requests. Issue is chain: one removes requests from AppSlv, one dispatches // on type of request, and one handles each type.. but some types require // action from both request handler and master loop -- maybe just give the // request handler calls like: VMS__handle_X_request_type @@ -308,7 +288,7 @@ *The _VMSMasterEnv is needed by this shut down function, so the create-seed- * and-wait function has to free a bunch of stuff after it detects the * threads have all died: the masterEnv, the thread-related locations, - * masterVP any AppVPs that might still be allocated and sitting in the + * masterVP any AppSlvs that might still be allocated and sitting in the * semantic environment, or have been orphaned in the _VMSWorkQ. * *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the @@ -318,22 +298,22 @@ *In here,create one core-loop shut-down processor for each core loop and put * them all directly into the readyToAnimateQ. *Note, this function can ONLY be called after the semantic environment no - * longer cares if AppVPs get animated after the point this is called. In + * longer cares if AppSlvs get animated after the point this is called. In * other words, this can be used as an abort, or else it should only be - * called when all AppVPs have finished dissipate requests -- only at that + * called when all AppSlvs have finished dissipate requests -- only at that * point is it sure that all results have completed. */ void -VMS_int__shutdown() +VMS_SS__shutdown() { int coreIdx; - SlaveVP *shutDownPr; + SlaveVP *shutDownSlv; //create the shutdown processors, one for each core loop -- put them // directly into the Q -- each core will die when gets one for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) { //Note, this is running in the master - shutDownPr = VMS_int__create_procr( &endOSThreadFn, NULL ); - writeVMSQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] ); + shutDownSlv = VMS_int__create_slaveVP( &endOSThreadFn, NULL ); + writeVMSQ( shutDownSlv, _VMSMasterEnv->readyToAnimateQs[coreIdx] ); } } @@ -348,78 +328,37 @@ * up just as if it never jumped out, before calling pthread_exit. *The end-point of core loop will free the stack and so forth of the * processor that animates this function, (this fn is transfering the - * animator of the AppVP that is in turn animating this function over + * animator of the AppSlv that is in turn animating this function over * to core loop function -- note that this slices out a level of virtual * processors). */ void -endOSThreadFn( void *initData, SlaveVP *animatingPr ) +endOSThreadFn( void *initData, SlaveVP *animatingSlv ) { -#ifdef SEQUENTIAL - asmTerminateCoreLoopSeq(animatingPr); -#else - asmTerminateCoreLoop(animatingPr); -#endif + #ifdef SEQUENTIAL + asmTerminateCoreLoopSeq(animatingSlv); + #else + asmTerminateCoreLoop(animatingSlv); + #endif } /*This is called from the startup & shutdown */ void -VMS_int__cleanup_at_end_of_shutdown() +VMS_SS__cleanup_at_end_of_shutdown() { - //unused - //VMSQueueStruc **readyToAnimateQs; - //int coreIdx; - //SlaveVP **masterVPs; - //SchedSlot ***allSchedSlots; //ptr to array of ptrs - //Before getting rid of everything, print out any measurements made forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist ); forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile); forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist ); - #ifdef MEAS__TIME_PLUGIN - printHist( _VMSMasterEnv->reqHdlrLowTimeHist ); - saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist ); - printHist( _VMSMasterEnv->reqHdlrHighTimeHist ); - saveHistToFile( _VMSMasterEnv->reqHdlrHighTimeHist ); - freeHistExt( _VMSMasterEnv->reqHdlrLowTimeHist ); - freeHistExt( _VMSMasterEnv->reqHdlrHighTimeHist ); - #endif - #ifdef MEAS__TIME_MALLOC - printHist( _VMSMasterEnv->mallocTimeHist ); - saveHistToFile( _VMSMasterEnv->mallocTimeHist ); - printHist( _VMSMasterEnv->freeTimeHist ); - saveHistToFile( _VMSMasterEnv->freeTimeHist ); - freeHistExt( _VMSMasterEnv->mallocTimeHist ); - freeHistExt( _VMSMasterEnv->freeTimeHist ); - #endif - #ifdef MEAS__TIME_MASTER_LOCK - printHist( _VMSMasterEnv->masterLockLowTimeHist ); - printHist( _VMSMasterEnv->masterLockHighTimeHist ); - #endif - #ifdef MEAS__TIME_MASTER - printHist( _VMSMasterEnv->pluginTimeHist ); - for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) - { - freeVMSQ( readyToAnimateQs[ coreIdx ] ); - //master VPs were created external to VMS, so use external free - VMS_int__dissipate_procr( masterVPs[ coreIdx ] ); - - freeSchedSlots( allSchedSlots[ coreIdx ] ); - } - #endif - #ifdef MEAS__TIME_STAMP_SUSP - printHist( _VMSMasterEnv->pluginTimeHist ); - for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) - { - freeVMSQ( readyToAnimateQs[ coreIdx ] ); - //master VPs were created external to VMS, so use external free - VMS_int__dissipate_procr( masterVPs[ coreIdx ] ); - - freeSchedSlots( allSchedSlots[ coreIdx ] ); - } - #endif + + MEAS__Print_Hists_for_Susp_Meas; + MEAS__Print_Hists_for_Master_Meas; + MEAS__Print_Hists_for_Master_Lock_Meas; + MEAS__Print_Hists_for_Malloc_Meas; + MEAS__Print_Hists_for_Plugin_Meas; + //All the environment data has been allocated with VMS__malloc, so just // free its internal big-chunk and all inside it disappear. @@ -431,24 +370,24 @@ for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) { freeVMSQ( readyToAnimateQs[ coreIdx ] ); - //master VPs were created external to VMS, so use external free - VMS__dissipate_procr( masterVPs[ coreIdx ] ); + //master Slvs were created external to VMS, so use external free + VMS_int__dissipate_slaveVP( masterVPs[ coreIdx ] ); freeSchedSlots( allSchedSlots[ coreIdx ] ); } - VMS__free( _VMSMasterEnv->readyToAnimateQs ); - VMS__free( _VMSMasterEnv->masterVPs ); - VMS__free( _VMSMasterEnv->allSchedSlots ); + VMS_int__free( _VMSMasterEnv->readyToAnimateQs ); + VMS_int__free( _VMSMasterEnv->masterVPs ); + VMS_int__free( _VMSMasterEnv->allSchedSlots ); //============================= MEASUREMENT STUFF ======================== #ifdef STATS__TURN_ON_PROBES - freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS__free_probe); + freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS_WL__free_probe); #endif //======================================================================== */ //These are the only two that use system free - VMS_ext__free_free_list( _VMSMasterEnv->freeListHead ); + VMS_ext__free_free_list( _VMSMasterEnv->freeLists ); free( (void *)_VMSMasterEnv ); } diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS_defs__HW_specific.h --- a/VMS_defs__HW_specific.h Wed Feb 22 11:39:12 2012 -0800 +++ b/VMS_defs__HW_specific.h Sun Mar 04 14:26:35 2012 -0800 @@ -27,7 +27,7 @@ // stack size in virtual processors created #define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */ - // memory for VMS__malloc + // memory for VMS_WL__malloc #define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x40000000 /* 1G */ //Frequency of TS counts -- have to do tests to verify diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS_defs__lang_specific.h --- a/VMS_defs__lang_specific.h Wed Feb 22 11:39:12 2012 -0800 +++ b/VMS_defs__lang_specific.h Sun Mar 04 14:26:35 2012 -0800 @@ -13,80 +13,8 @@ //=================== Language-specific Measurement Stuff =================== // -//TODO: Figure out way to move these into language dir.. -// wrap them in #ifdef MEAS__... +//TODO: move these into the language implementation directories // -#ifndef MAKE_HISTS_FOR_MEASUREMENTS -#define MakeTheMeasHists() -#endif - -//=========================================================================== -//VPThread -#ifdef VTHREAD - -#define createHistIdx 1 //note: starts at 1 -#define mutexLockHistIdx 2 -#define mutexUnlockHistIdx 3 -#define condWaitHistIdx 4 -#define condSignalHistIdx 5 - -#define MakeTheMeasHists() \ - _VMSMasterEnv->measHistsInfo = \ - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ - makeAMeasHist( createHistIdx, "create", 250, 0, 100 ) \ - makeAMeasHist( mutexLockHistIdx, "mutex_lock", 50, 0, 100 ) \ - makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock", 50, 0, 100 ) \ - makeAMeasHist( condWaitHistIdx, "cond_wait", 50, 0, 100 ) \ - makeAMeasHist( condSignalHistIdx, "cond_signal", 50, 0, 100 ) - - -#define Meas_startCreate \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endCreate \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ createHistIdx ] ); - -#define Meas_startMutexLock \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endMutexLock \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ mutexLockHistIdx ] ); - -#define Meas_startMutexUnlock \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endMutexUnlock \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] ); - -#define Meas_startCondWait \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endCondWait \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ condWaitHistIdx ] ); - -#define Meas_startCondSignal \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endCondSignal \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ condSignalHistIdx ] ); - -#endif - //=========================================================================== @@ -97,7 +25,7 @@ #define spawnHistIdx 1 //note: starts at 1 #define syncHistIdx 2 -#define MakeTheMeasHists() \ +#define MEAS__Make_Meas_Hists_for_Language() \ _VMSMasterEnv->measHistsInfo = \ makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ makeAMeasHist( spawnHistIdx, "Spawn", 50, 0, 200 ) \ @@ -133,7 +61,7 @@ #define ReceiveFromToHistIdx 3 #define ReceiveOfTypeHistIdx 4 -#define MakeTheMeasHists() \ +#define MEAS__Make_Meas_Hists_for_Language() \ _VMSMasterEnv->measHistsInfo = \ makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ makeAMeasHist( SendFromToHistIdx, "SendFromTo", 50, 0, 100 ) \ diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS_defs__main.h --- a/VMS_defs__main.h Wed Feb 22 11:39:12 2012 -0800 +++ b/VMS_defs__main.h Sun Mar 04 14:26:35 2012 -0800 @@ -6,8 +6,8 @@ * */ -#ifndef _VMS_DEFS_H -#define _VMS_DEFS_H +#ifndef _VMS_DEFS_MAIN_H +#define _VMS_DEFS_MAIN_H #define _GNU_SOURCE //=========================== VMS-wide defs =============================== @@ -19,7 +19,7 @@ // so these defs can be at the top, and writePrivQ defined later on.. #define writeVMSQ writePrivQ #define readVMSQ readPrivQ -#define makeVMSQ makeVMSPrivQ +#define makeVMSQ makePrivQ #define numInVMSQ numInPrivQ #define VMSQueueStruc PrivQueueStruc @@ -31,21 +31,21 @@ // //When SEQUENTIAL is defined, VMS does sequential exe in the main thread // It still does co-routines and all the mechanisms are the same, it just -// has only a single thread and animates VPs one at a time +// has only a single thread and animates Slvs one at a time //#define SEQUENTIAL //#define USE_WORK_STEALING //turns on the probe-instrumentation in the application -- when not // defined, the calls to the probe functions turn into comments -#define STATS__ENABLE_PROBES +//#define STATS__ENABLE_PROBES //#define TURN_ON_DEBUG_PROBES //These defines turn types of bug messages on and off // be sure debug messages are un-commented (next block of defines) #define dbgAppFlow TRUE /* Top level flow of application code -- general*/ #define dbgProbes FALSE /* for issues inside probes themselves*/ -#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ +#define dbgB2BMaster FALSE /* in coreloop, back to back master Slvs*/ #define dbgRqstHdlr FALSE /* in request handler code*/ //Comment or un- the substitute half to turn on/off types of debug message @@ -74,112 +74,310 @@ //================== Turn Measurement Things on and off ==================== -//#define MEAS__TIME_2011_SYS +//#define MEAS__TURN_ON_SYSTEM_MEAS -//define this if any MEAS__... below are -//#define MAKE_HISTS_FOR_MEASUREMENTS - //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and - // compiled-in that saves the low part of the time stamp count just before - // suspending a processor and just after resuming that processor. It is - // saved into a field added to VirtProcr. Have to sanity-check for - // rollover of low portion into high portion. -//#define MEAS__TIME_STAMP_SUSP -//#define MEAS__TIME_MASTER -//#define MEAS__TIME_PLUGIN -//#define MEAS__TIME_MALLOC -//#define MEAS__TIME_MASTER_LOCK +/*NOTE: define MEAS__TURN_ON_MAKE_HISTS if any other MEAS__... below are*/ +//#define MEAS__TURN_ON_MAKE_HISTS - //For code that calculates normalization-offset between TSC counts of - // different cores. -//#define NUM_TSC_ROUND_TRIPS 10 +//#define MEAS__TURN_ON_SUSP_MEAS +//#define MEAS__TURN_ON_MASTER_MEAS +//#define MEAS__TURN_ON_PLUGIN_MEAS +//#define MEAS__TURN_ON_MALLOC_MEAS +//#define MEAS__TURN_ON_MASTER_LOCK_MEAS + /*turn on/off subtraction of create measurements from plugin meas*/ +//#define MEAS__TURN_ON_EXCLUDE_CREATION_TIME -//=================== Macros to Capture Measurements ====================== -// -//===== RDTSC wrapper ===== -//Also runs with x86_64 code -#define saveTSCLowHigh(lowHighIn) \ - asm volatile("RDTSC; \ - movl %%eax, %0; \ - movl %%edx, %1;" \ - /* outputs */ : "=m" (lowHighIn.lowHigh[0]), "=m" (lowHighIn.lowHigh[1])\ - /* inputs */ : \ - /* clobber */ : "%eax", "%edx" \ - ); - -#define saveTimeStampCountInto(low, high) \ - asm volatile("RDTSC; \ - movl %%eax, %0; \ - movl %%edx, %1;" \ - /* outputs */ : "=m" (low), "=m" (high)\ - /* inputs */ : \ - /* clobber */ : "%eax", "%edx" \ - ); - -#define saveLowTimeStampCountInto(low) \ - asm volatile("RDTSC; \ - movl %%eax, %0;" \ - /* outputs */ : "=m" (low) \ - /* inputs */ : \ - /* clobber */ : "%eax", "%edx" \ - ); - //================== Macros define types of meas want ===================== -#ifdef MEAS__TIME_PLUGIN -#define Meas_startReqHdlr \ - int32 startStamp1, endStamp1; \ - saveLowTimeStampCountInto( startStamp1 ); +#ifdef MEAS__TURN_ON_SUSP_MEAS + #define MEAS__Insert_Susp_Meas_Fields_into_Slave \ + uint32 preSuspTSCLow; \ + uint32 postSuspTSCLow; -#define Meas_endReqHdlr \ - saveLowTimeStampCountInto( endStamp1 ); \ - addIntervalToHist( startStamp1, endStamp1, \ - _VMSMasterEnv->reqHdlrLowTimeHist ); \ - addIntervalToHist( startStamp1, endStamp1, \ - _VMSMasterEnv->reqHdlrHighTimeHist ); - -#elif defined MEAS__TIME_2011_SYS -#define Meas_startMasterLoop \ - TSCountLowHigh startStamp1, endStamp1; \ - saveTSCLowHigh( endStamp1 ); \ - _VMSMasterEnv->cyclesTillStartMasterLoop = \ - endStamp1.longVal - masterVP->startSusp.longVal; + #define MEAS__Insert_Susp_Meas_Fields_into_MasterEnv \ + Histogram *suspLowTimeHist; \ + Histogram *suspHighTimeHist; -#define Meas_startReqHdlr \ - saveTSCLowHigh( startStamp1 ); \ - _VMSMasterEnv->startReqHdlr.longVal = startStamp1.longVal; + #define MEAS__Make_Meas_Hists_for_Susp_Meas \ + _VMSMasterEnv->suspLowTimeHist = makeFixedBinHistExt( 100, 0, 200,\ + "master_low_time_hist");\ + _VMSMasterEnv->suspHighTimeHist = makeFixedBinHistExt( 100, 0, 200,\ + "master_high_time_hist"); + + //record time stamp: compare to time-stamp recorded below + #define MEAS__Capture_Pre_Susp_Point \ + saveLowTimeStampCountInto( animatingSlv->preSuspTSCLow ); + + //NOTE: only take low part of count -- do sanity check when take diff + #define MEAS__Capture_Post_Susp_Point \ + saveLowTimeStampCountInto( animatingSlv->postSuspTSCLow );\ + addIntervalToHist( preSuspTSCLow, postSuspTSCLow,\ + _VMSMasterEnv->suspLowTimeHist ); \ + addIntervalToHist( preSuspTSCLow, postSuspTSCLow,\ + _VMSMasterEnv->suspHighTimeHist ); -#define Meas_endReqHdlr + #define MEAS__Print_Hists_for_Susp_Meas \ + printHist( _VMSMasterEnv->pluginTimeHist ); + +#else + #define MEAS__Insert_Susp_Meas_Fields_into_Slave + #define MEAS__Insert_Susp_Meas_Fields_into_MasterEnv + #define MEAS__Make_Meas_Hists_for_Susp_Meas + #define MEAS__Capture_Pre_Susp_Point + #define MEAS__Capture_Post_Susp_Point + #define MEAS__Print_Hists_for_Susp_Meas +#endif -#define Meas_endMasterLoop \ - saveTSCLowHigh( startStamp1 ); \ - _VMSMasterEnv->endMasterLoop.longVal = startStamp1.longVal; +#ifdef MEAS__TURN_ON_MASTER_MEAS + #define MEAS__Insert_Master_Meas_Fields_into_Slave \ + uint32 startMasterTSCLow; \ + uint32 endMasterTSCLow; + + #define MEAS__Insert_Master_Meas_Fields_into_MasterEnv \ + Histogram *masterLowTimeHist; \ + Histogram *masterHighTimeHist; + + #define MEAS__Make_Meas_Hists_for_Master_Meas \ + _VMSMasterEnv->masterLowTimeHist = makeFixedBinHistExt( 100, 0, 200,\ + "master_low_time_hist");\ + _VMSMasterEnv->masterHighTimeHist = makeFixedBinHistExt( 100, 0, 200,\ + "master_high_time_hist"); + + //Total Master time includes one coreloop time -- just assume the core + // loop time is same for Master as for AppSlvs, even though it may be + // smaller due to higher predictability of the fixed jmp. + #define MEAS__Capture_Pre_Master_Point\ + saveLowTimeStampCountInto( masterVP->startMasterTSCLow ); + + #define MEAS__Capture_Post_Master_Point \ + saveLowTimeStampCountInto( masterVP->endMasterTSCLow );\ + addIntervalToHist( startMasterTSCLow, endMasterTSCLow,\ + _VMSMasterEnv->masterLowTimeHist ); \ + addIntervalToHist( startMasterTSCLow, endMasterTSCLow,\ + _VMSMasterEnv->masterHighTimeHist ); + + #define MEAS__Print_Hists_for_Master_Meas \ + printHist( _VMSMasterEnv->pluginTimeHist ); #else -#define Meas_startMasterLoop -#define Meas_startReqHdlr -#define Meas_endReqHdlr -#define Meas_endMasterLoop + #define MEAS__Insert_Master_Meas_Fields_into_Slave + #define MEAS__Insert_Master_Meas_Fields_into_MasterEnv + #define MEAS__Make_Meas_Hists_for_Master_Meas + #define MEAS__Capture_Pre_Master_Point + #define MEAS__Capture_Post_Master_Point + #define MEAS__Print_Hists_for_Master_Meas #endif + +#ifdef MEAS__TURN_ON_MASTER_LOCK_MEAS + #define MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv \ + Histogram *masterLockLowTimeHist; \ + Histogram *masterLockHighTimeHist; + + #define MEAS__Make_Meas_Hists_for_Master_Lock_Meas \ + _VMSMasterEnv->masterLockLowTimeHist = makeFixedBinHist( 50, 0, 2, \ + "master lock low time hist");\ + _VMSMasterEnv->masterLockHighTimeHist = makeFixedBinHist( 50, 0, 100,\ + "master lock high time hist"); + + #define MEAS__Capture_Pre_Master_Lock_Point \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); + + #define MEAS__Capture_Post_Master_Lock_Point \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp,\ + _VMSMasterEnv->masterLockLowTimeHist ); \ + addIntervalToHist( startStamp, endStamp,\ + _VMSMasterEnv->masterLockHighTimeHist ); + + #define MEAS__Print_Hists_for_Master_Lock_Meas \ + printHist( _VMSMasterEnv->masterLockLowTimeHist ); \ + printHist( _VMSMasterEnv->masterLockHighTimeHist ); + +#else + #define MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv + #define MEAS__Make_Meas_Hists_for_Master_Lock_Meas + #define MEAS__Capture_Pre_Master_Lock_Point + #define MEAS__Capture_Post_Master_Lock_Point + #define MEAS__Print_Hists_for_Master_Lock_Meas +#endif + + +#ifdef MEAS__TURN_ON_MALLOC_MEAS + #define MEAS__Insert_Malloc_Meas_Fields_into_MasterEnv\ + Histogram *mallocTimeHist; \ + Histogram *freeTimeHist; + + #define MEAS__Make_Meas_Hists_for_Malloc_Meas \ + _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 100, 0, 30,\ + "malloc_time_hist");\ + _VMSMasterEnv->freeTimeHist = makeFixedBinHistExt( 100, 0, 30,\ + "free_time_hist"); + + #define MEAS__Capture_Pre_Malloc_Point \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); + + #define MEAS__Capture_Post_Malloc_Point \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp,\ + _VMSMasterEnv->mallocTimeHist ); + + #define MEAS__Capture_Pre_Free_Point \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); + + #define MEAS__Capture_Post_Free_Point \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp,\ + _VMSMasterEnv->freeTimeHist ); + + #define MEAS__Print_Hists_for_Malloc_Meas \ + printHist( _VMSMasterEnv->mallocTimeHist ); \ + saveHistToFile( _VMSMasterEnv->mallocTimeHist ); \ + printHist( _VMSMasterEnv->freeTimeHist ); \ + saveHistToFile( _VMSMasterEnv->freeTimeHist ); \ + freeHistExt( _VMSMasterEnv->mallocTimeHist ); \ + freeHistExt( _VMSMasterEnv->freeTimeHist ); + +#else + #define MEAS__Insert_Malloc_Meas_Fields_into_MasterEnv + #define MEAS__Make_Meas_Hists_for_Malloc_Meas + #define MEAS__Capture_Pre_Malloc_Point + #define MEAS__Capture_Post_Malloc_Point + #define MEAS__Capture_Pre_Free_Point + #define MEAS__Capture_Post_Free_Point + #define MEAS__Print_Hists_for_Malloc_Meas +#endif + + +#ifdef MEAS__TURN_ON_SYSTEM_MEAS + #define MEAS__Insert_System_Meas_Fields_into_Slave \ + TSCountLowHigh startSusp; \ + uint64 totalSuspCycles; \ + uint32 numGoodSusp; + + #define MEAS__Insert_System_Meas_Fields_into_MasterEnv \ + TSCountLowHigh startMaster; \ + uint64 totalMasterCycles; \ + uint32 numMasterAnimations; \ + TSCountLowHigh startReqHdlr; \ + uint64 totalPluginCycles; \ + uint32 numPluginAnimations; \ + uint64 cyclesTillStartMasterLoop; \ + TSCountLowHigh endMasterLoop; + +#else + #define MEAS__Insert_System_Meas_Fields_into_Slave + #define MEAS__Insert_System_Meas_Fields_into_MasterEnv +#endif + + +/*This macro's a bit weird -- the same macro is defined in three different + * ways, depending upon which defines are turned on + *That's because added the system meas, which interferes with plugin meas, + * but don't want to make plugin meas stop working.. this is compromise + */ +#ifdef MEAS__TURN_ON_PLUGIN_MEAS + #define MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv \ + Histogram *reqHdlrLowTimeHist; \ + Histogram *reqHdlrHighTimeHist; + + #define MEAS__Make_Meas_Hists_for_Plugin_Meas \ + _VMSMasterEnv->reqHdlrLowTimeHist = makeFixedBinHistExt( 100, 0, 200,\ + "plugin_low_time_hist");\ + _VMSMasterEnv->reqHdlrHighTimeHist = makeFixedBinHistExt( 100, 0, 200,\ + "plugin_high_time_hist"); + + #define Meas_startReqHdlr \ + int32 startStamp1, endStamp1; \ + saveLowTimeStampCountInto( startStamp1 ); + + #define Meas_endReqHdlr \ + saveLowTimeStampCountInto( endStamp1 ); \ + addIntervalToHist( startStamp1, endStamp1, \ + _VMSMasterEnv->reqHdlrLowTimeHist ); \ + addIntervalToHist( startStamp1, endStamp1, \ + _VMSMasterEnv->reqHdlrHighTimeHist ); + + #define MEAS__Print_Hists_for_Plugin_Meas \ + printHist( _VMSMasterEnv->reqHdlrLowTimeHist ); \ + saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist ); \ + printHist( _VMSMasterEnv->reqHdlrHighTimeHist ); \ + saveHistToFile( _VMSMasterEnv->reqHdlrHighTimeHist ); \ + freeHistExt( _VMSMasterEnv->reqHdlrLowTimeHist ); \ + freeHistExt( _VMSMasterEnv->reqHdlrHighTimeHist ); + +#elif defined MEAS__TURN_ON_SYSTEM_MEAS + #define MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv + + #define MEAS__Make_Meas_Hists_for_Plugin_Meas + + #define Meas_startMasterLoop \ + TSCountLowHigh startStamp1, endStamp1; \ + saveTSCLowHigh( endStamp1 ); \ + _VMSMasterEnv->cyclesTillStartMasterLoop = \ + endStamp1.longVal - masterVP->startSusp.longVal; + + #define Meas_endMasterLoop \ + saveTSCLowHigh( startStamp1 ); \ + _VMSMasterEnv->endMasterLoop.longVal = startStamp1.longVal; + + #define Meas_startReqHdlr \ + saveTSCLowHigh( startStamp1 ); \ + _VMSMasterEnv->startReqHdlr.longVal = startStamp1.longVal; + + #define Meas_endReqHdlr + + #define MEAS__Print_Hists_for_Plugin_Meas + +#else + #define MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv + #define MEAS__Make_Meas_Hists_for_Plugin_Meas + #define Meas_startMasterLoop + #define Meas_endMasterLoop + #define Meas_startReqHdlr + #define Meas_endReqHdlr + #define MEAS__Print_Hists_for_Plugin_Meas +#endif + + +//Experiment in two-step macros -- if doesn't work, insert each separately +#define MEAS__Insert_Meas_Fields_into_Slave \ + MEAS__Insert_Susp_Meas_Fields_into_Slave \ + MEAS__Insert_Master_Meas_Fields_into_Slave \ + MEAS__Insert_System_Meas_Fields_into_Slave + + //====================== Histogram Macros -- Create ======================== // // -#ifdef MAKE_HISTS_FOR_MEASUREMENTS -#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ - makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ - _VMSMasterEnv->measHists[idx] = \ + +//The language implementation should include a definition of this macro, +// which creates all the histograms the language uses to collect measurements +// of plugin operation -- so, if the language didn't define it, must +// define it here (as empty), to avoid compile error +#ifndef MEAS__Make_Meas_Hists_for_Language +#define MEAS__Make_Meas_Hists_for_Language() /*consume parens!*/ +#endif + + +#ifdef MEAS__TURN_ON_MAKE_HISTS + #define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ + makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ + _VMSMasterEnv->measHists[idx] = \ makeFixedBinHist( numBins, startVal, binWidth, name ); #else -#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) + #define makeAMeasHist( idx, name, numBins, startVal, binWidth ) #endif +//============================== Probes =================================== -#define MEAS__SUB_CREATE /*turn on/off subtraction of create from plugin*/ -#include "VMS_defs__lang_specific.h" - +//=========================================================================== #endif /* _VMS_DEFS_H */ diff -r eaf7e4c58c9e -r 0c83ea8adefc probes.c --- a/probes.c Wed Feb 22 11:39:12 2012 -0800 +++ b/probes.c Sun Mar 04 14:26:35 2012 -0800 @@ -13,100 +13,46 @@ //==================== Probes ================= -#ifdef STATS__USE_TSC_PROBES - -int32 -VMS__create_histogram_probe( int32 numBins, float32 startValue, - float32 binWidth, char *nameStr ) - { IntervalProbe *newProbe; - int32 idx; - FloatHist *hist; - - idx = VMS__create_single_interval_probe( nameStr ); - newProbe = _VMSMasterEnv->intervalProbes[ idx ]; - - hist = makeFloatHistogram( numBins, startValue, binWidth ); - newProbe->hist = hist; - return idx; - } - -void -VMS_impl__record_interval_start_in_probe( int32 probeID ) - { IntervalProbe *probe; - - probe = _VMSMasterEnv->intervalProbes[ probeID ]; - probe->startStamp = getTSCount(); - } - -void -VMS_impl__record_interval_end_in_probe( int32 probeID ) - { IntervalProbe *probe; - TSCount endStamp; - - endStamp = getTSCount(); - - probe = _VMSMasterEnv->intervalProbes[ probeID ]; - probe->endStamp = endStamp; - - if( probe->hist != NULL ) - { TSCount interval = probe->endStamp - probe->startStamp; - //if the interval is sane, then add to histogram - if( interval < probe->hist->endOfRange * 10 ) - addToFloatHist( interval, probe->hist ); - } - } - -void -VMS_impl__print_stats_of_probe( int32 probeID ) - { IntervalProbe *probe; - - probe = _VMSMasterEnv->intervalProbes[ probeID ]; - - if( probe->hist == NULL ) - { - printf("probe: %s, interval: %.6lf\n", probe->nameStr,probe->interval); - } - - else - { - printf( "probe: %s\n", probe->nameStr ); - printFloatHist( probe->hist ); - } - } -#else - /* * In practice, probe operations are called from the app, from inside slaves - * -- so have to be sure each probe is single-VP owned, and be sure that + * -- so have to be sure each probe is single-Slv owned, and be sure that * any place common structures are modified it's done inside the master. * So -- the only place common structures are modified is during creation. * after that, all mods are to individual instances. * * Thniking perhaps should change the semantics to be that probes are * attached to the virtual processor -- and then everything is guaranteed - * to be isolated -- except then can't take any intervals that span VPs, - * and would have to transfer the probes to Master env when VP dissipates.. + * to be isolated -- except then can't take any intervals that span Slvs, + * and would have to transfer the probes to Master env when Slv dissipates.. * gets messy.. * * For now, just making so that probe creation causes a suspend, so that * the dynamic array in the master env is only modified from the master * */ + +//============================ Helpers =========================== +inline void +doNothing() + { + } + + IntervalProbe * -create_generic_probe( char *nameStr, SlaveVP *animPr ) -{ +create_generic_probe( char *nameStr, SlaveVP *animSlv ) + { VMSSemReq reqData; reqData.reqType = createProbe; reqData.nameStr = nameStr; - VMS_WL__send_VMSSem_request( &reqData, animPr ); + VMS_WL__send_VMSSem_request( &reqData, animSlv ); - return animPr->dataRetFromReq; + return animSlv->dataRetFromReq; } /*Use this version from outside VMS -- it uses external malloc, and modifies - * dynamic array, so can't be animated in a slave VP + * dynamic array, so can't be animated in a slave Slv */ IntervalProbe * ext__create_generic_probe( char *nameStr ) @@ -125,24 +71,38 @@ return newProbe; } +//============================ Fns def in header ======================= -/*Only call from inside master or main startup/shutdown thread - */ -void -VMS_impl__free_probe( IntervalProbe *probe ) - { if( probe->hist != NULL ) freeDblHist( probe->hist ); - if( probe->nameStr != NULL) VMS_int__free( probe->nameStr ); - VMS_int__free( probe ); +int32 +VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animSlv ) + { IntervalProbe *newProbe; + + newProbe = create_generic_probe( nameStr, animSlv ); + + return newProbe->probeID; } +int32 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, + float64 binWidth, char *nameStr, SlaveVP *animSlv ) + { IntervalProbe *newProbe; + DblHist *hist; + + newProbe = create_generic_probe( nameStr, animSlv ); + + hist = makeDblHistogram( numBins, startValue, binWidth ); + newProbe->hist = hist; + return newProbe->probeID; + } + int32 -VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animPr) +VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animSlv) { IntervalProbe *newProbe; struct timeval *startStamp; float64 startSecs; - newProbe = create_generic_probe( nameStr, animPr ); + newProbe = create_generic_probe( nameStr, animSlv ); newProbe->endSecs = 0; gettimeofday( &(newProbe->startStamp), NULL); @@ -174,30 +134,19 @@ return newProbe->probeID; } -int32 -VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr ) - { IntervalProbe *newProbe; - newProbe = create_generic_probe( nameStr, animPr ); - - return newProbe->probeID; +/*Only call from inside master or main startup/shutdown thread + */ +void +VMS_impl__free_probe( IntervalProbe *probe ) + { if( probe->hist != NULL ) freeDblHist( probe->hist ); + if( probe->nameStr != NULL) VMS_int__free( probe->nameStr ); + VMS_int__free( probe ); } -int32 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, - float64 binWidth, char *nameStr, SlaveVP *animPr ) - { IntervalProbe *newProbe; - DblHist *hist; - - newProbe = create_generic_probe( nameStr, animPr ); - - hist = makeDblHistogram( numBins, startValue, binWidth ); - newProbe->hist = hist; - return newProbe->probeID; - } void -VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr ) +VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animSlv ) { IntervalProbe *probe; //TODO: fix this To be in Master -- race condition @@ -206,8 +155,9 @@ addValueIntoTable(probe->nameStr, probe, _VMSMasterEnv->probeNameHashTbl); } + IntervalProbe * -VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr ) +VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animSlv ) { //TODO: fix this To be in Master -- race condition return getValueFromTable( probeName, _VMSMasterEnv->probeNameHashTbl ); @@ -215,21 +165,21 @@ /*Everything is local to the animating procr, so no need for request, do - * work locally, in the anim Pr + * work locally, in the anim Slv */ void -VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animatingPr ) +VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animatingSlv ) { IntervalProbe *probe; probe = _VMSMasterEnv->intervalProbes[ probeID ]; probe->schedChoiceWasRecorded = TRUE; - probe->coreNum = animatingPr->coreAnimatedBy; - probe->procrID = animatingPr->procrID; - probe->procrCreateSecs = animatingPr->createPtInSecs; + probe->coreNum = animatingSlv->coreAnimatedBy; + probe->slaveID = animatingSlv->procrID; + probe->slaveCreateSecs = animatingSlv->createPtInSecs; } /*Everything is local to the animating procr, so no need for request, do - * work locally, in the anim Pr + * work locally, in the anim Slv */ void VMS_impl__record_interval_start_in_probe( int32 probeID ) @@ -237,44 +187,37 @@ DEBUG( dbgProbes, "record start of interval\n" ) probe = _VMSMasterEnv->intervalProbes[ probeID ]; - gettimeofday( &(probe->startStamp), NULL ); + probe->startStamp = getTSCount(); } /*Everything is local to the animating procr, so no need for request, do - * work locally, in the anim Pr + * work locally, in the anim Slv + * + *This should be safe to run inside SlaveVP -- weird behavior will be due + * to the logical error of having more than one interval open in overlapped. */ void VMS_impl__record_interval_end_in_probe( int32 probeID ) { IntervalProbe *probe; - struct timeval *endStamp, *startStamp; - float64 startSecs, endSecs; + TSCount endStamp; + endStamp = getTSCount(); + DEBUG( dbgProbes, "record end of interval\n" ) - //possible seg-fault if array resized by diff core right after this - // one gets probe..? Something like that? Might be safe.. don't care + probe = _VMSMasterEnv->intervalProbes[ probeID ]; - gettimeofday( &(probe->endStamp), NULL); - - //now turn into an interval held in a double - startStamp = &(probe->startStamp); - endStamp = &(probe->endStamp); - - startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); - endSecs = endStamp->tv_sec + ( endStamp->tv_usec / 1000000.0 ); - - probe->interval = endSecs - startSecs; - probe->startSecs = startSecs; - probe->endSecs = endSecs; + probe->endStamp = endStamp; if( probe->hist != NULL ) - { + { TSCount interval = probe->endStamp - probe->startStamp; //if the interval is sane, then add to histogram - if( probe->interval < probe->hist->endOfRange * 10 ) - addToDblHist( probe->interval, probe->hist ); + if( interval < probe->hist->endOfRange * 10 ) + addToFloatHist( interval, probe->hist ); } } + void print_probe_helper( IntervalProbe *probe ) { @@ -283,7 +226,7 @@ if( probe->schedChoiceWasRecorded ) { printf( "coreNum: %d, procrID: %d, procrCreated: %0.6f | ", - probe->coreNum, probe->procrID, probe->procrCreateSecs ); + probe->coreNum, probe->slaveID, probe->slaveCreateSecs ); } if( probe->endSecs == 0 ) //just a single point in time @@ -318,22 +261,10 @@ } -inline void doNothing(){}; - -void -generic_print_probe( void *_probe ) - { - IntervalProbe *probe = (IntervalProbe *)_probe; - - //TODO segfault in printf - //print_probe_helper( probe ); - } - void VMS_impl__print_stats_of_all_probes() { forAllInDynArrayDo( _VMSMasterEnv->dynIntervalProbesInfo, - &generic_print_probe ); + &VMS_impl__print_stats_of_probe ); fflush( stdout ); } -#endif diff -r eaf7e4c58c9e -r 0c83ea8adefc probes.h --- a/probes.h Wed Feb 22 11:39:12 2012 -0800 +++ b/probes.h Sun Mar 04 14:26:35 2012 -0800 @@ -28,16 +28,16 @@ int32 schedChoiceWasRecorded; int32 coreNum; - int32 procrID; - float64 procrCreateSecs; + int32 slaveID; + float64 slaveCreateSecs; - #ifdef STATS__USE_TSC_PROBES + // #ifdef STATS__USE_TSC_PROBES TSCount startStamp; TSCount endStamp; - #else - struct timeval startStamp; - struct timeval endStamp; - #endif +// #else +// struct timeval startStamp; +// struct timeval endStamp; +// #endif float64 startSecs; float64 endSecs; float64 interval; @@ -45,136 +45,136 @@ }; +int32 +VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animSlv ); + +int32 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, + float64 binWidth, char *nameStr, SlaveVP *animSlv ); + +int32 +VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animSlv); + +int32 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); + +void +VMS_impl__free_probe( IntervalProbe *probe ); + +void +VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animSlv ); + +IntervalProbe * +VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animSlv ); + +void +VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animSlv ); + +void +VMS_impl__record_interval_start_in_probe( int32 probeID ); + +void +VMS_impl__record_interval_end_in_probe( int32 probeID ); + +void +VMS_impl__print_stats_of_probe( int32 probeID ); + +void +VMS_impl__print_stats_of_all_probes(); + //======================== Probes ============================= // // Use macros to allow turning probes off with a #define switch +// This means probes have zero impact on performance when off +//============================================================= +#define VMS_App__record_time_point_into_new_probe VMS_WL__record_time_point_into_new_probe +#define VMS_ext__record_time_point_into_new_probe +#define VMS_App__create_single_interval_probe VMS_WL__create_single_interval_probe +#define VMS_App__create_histogram_probe VMS_WL__create_histogram_probe +#define VMS_App__index_probe_by_its_name VMS_WL__index_probe_by_its_name +#define VMS_App__get_probe_by_name VMS_WL__get_probe_by_name +#define VMS_App__record_sched_choice_into_probe VMS_WL__record_sched_choice_into_probe +#define VMS_App__record_interval_start_in_probe VMS_WL__record_interval_start_in_probe +#define VMS_App__record_interval_end_in_probe VMS_WL__record_interval_end_in_probe +#define VMS_App__print_stats_of_probe VMS_WL__print_stats_of_probe +#define VMS_App__print_stats_of_all_probes VMS_WL__print_stats_of_all_probes + #ifdef STATS__ENABLE_PROBES -int32 -VMS_impl__record_time_point_into_new_probe( char *nameStr,SlaveVP *animPr); -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ - VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) +#define VMS_WL__record_time_point_into_new_probe( nameStr, animSlv ) \ + VMS_impl__record_time_point_in_new_probe( nameStr, animSlv ) -int32 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); #define VMS_ext__record_time_point_into_new_probe( nameStr ) \ VMS_ext_impl__record_time_point_into_new_probe( nameStr ) +#define VMS_WL__create_single_interval_probe( nameStr, animSlv ) \ + VMS_impl__create_single_interval_probe( nameStr, animSlv ) -int32 -VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr ); -#define VMS__create_single_interval_probe( nameStr, animPr ) \ - VMS_impl__create_single_interval_probe( nameStr, animPr ) - - -int32 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, - float64 binWidth, char *nameStr, SlaveVP *animPr ); -#define VMS__create_histogram_probe( numBins, startValue, \ - binWidth, nameStr, animPr ) \ +#define VMS_WL__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animSlv ) \ VMS_impl__create_histogram_probe( numBins, startValue, \ - binWidth, nameStr, animPr ) -void -VMS_impl__free_probe( IntervalProbe *probe ); -#define VMS__free_probe( probe ) \ + binWidth, nameStr, animSlv ) +#define VMS_int__free_probe( probe ) \ VMS_impl__free_probe( probe ) -void -VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr ); -#define VMS__index_probe_by_its_name( probeID, animPr ) \ - VMS_impl__index_probe_by_its_name( probeID, animPr ) +#define VMS_WL__index_probe_by_its_name( probeID, animSlv ) \ + VMS_impl__index_probe_by_its_name( probeID, animSlv ) -IntervalProbe * -VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr ); -#define VMS__get_probe_by_name( probeID, animPr ) \ - VMS_impl__get_probe_by_name( probeName, animPr ) +#define VMS_WL__get_probe_by_name( probeID, animSlv ) \ + VMS_impl__get_probe_by_name( probeName, animSlv ) -void -VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animPr ); -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ - VMS_impl__record_sched_choice_into_probe( probeID, animPr ) +#define VMS_WL__record_sched_choice_into_probe( probeID, animSlv ) \ + VMS_impl__record_sched_choice_into_probe( probeID, animSlv ) -void -VMS_impl__record_interval_start_in_probe( int32 probeID ); -#define VMS__record_interval_start_in_probe( probeID ) \ +#define VMS_WL__record_interval_start_in_probe( probeID ) \ VMS_impl__record_interval_start_in_probe( probeID ) -void -VMS_impl__record_interval_end_in_probe( int32 probeID ); -#define VMS__record_interval_end_in_probe( probeID ) \ +#define VMS_WL__record_interval_end_in_probe( probeID ) \ VMS_impl__record_interval_end_in_probe( probeID ) -void -VMS_impl__print_stats_of_probe( int32 probeID ); -#define VMS__print_stats_of_probe( probeID ) \ +#define VMS_WL__print_stats_of_probe( probeID ) \ VMS_impl__print_stats_of_probe( probeID ) -void -VMS_impl__print_stats_of_all_probes(); -#define VMS__print_stats_of_all_probes() \ +#define VMS_WL__print_stats_of_all_probes() \ VMS_impl__print_stats_of_all_probes() #else -int32 -VMS_impl__record_time_point_into_new_probe( char *nameStr,SlaveVP *animPr); -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ +#define VMS_WL__record_time_point_into_new_probe( nameStr, animSlv ) \ 0 /* do nothing */ -int32 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); #define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 0 /* do nothing */ -int32 -VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr ); -#define VMS__create_single_interval_probe( nameStr, animPr ) \ +#define VMS_WL__create_single_interval_probe( nameStr, animSlv ) \ 0 /* do nothing */ -int32 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, - float64 binWidth, char *nameStr, SlaveVP *animPr ); -#define VMS__create_histogram_probe( numBins, startValue, \ - binWidth, nameStr, animPr ) \ +#define VMS_WL__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animSlv ) \ 0 /* do nothing */ -void -VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr ); -#define VMS__index_probe_by_its_name( probeID, animPr ) \ +#define VMS_WL__index_probe_by_its_name( probeID, animSlv ) \ /* do nothing */ -IntervalProbe * -VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr ); -#define VMS__get_probe_by_name( probeID, animPr ) \ +#define VMS_WL__get_probe_by_name( probeID, animSlv ) \ NULL /* do nothing */ -void -VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animPr ); -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ +#define VMS_WL__record_sched_choice_into_probe( probeID, animSlv ) \ /* do nothing */ -void -VMS_impl__record_interval_start_in_probe( int32 probeID ); -#define VMS__record_interval_start_in_probe( probeID ) \ +#define VMS_WL__record_interval_start_in_probe( probeID ) \ /* do nothing */ -void -VMS_impl__record_interval_end_in_probe( int32 probeID ); -#define VMS__record_interval_end_in_probe( probeID ) \ +#define VMS_WL__record_interval_end_in_probe( probeID ) \ /* do nothing */ -inline void doNothing(); -void -VMS_impl__print_stats_of_probe( int32 probeID ); -#define VMS__print_stats_of_probe( probeID ) \ - doNothing/* do nothing */ +#define VMS_WL__print_stats_of_probe( probeID ) \ + ; /* do nothing */ -void -VMS_impl__print_stats_of_all_probes(); -#define VMS__print_stats_of_all_probes \ - doNothing/* do nothing */ +#define VMS_WL__print_stats_of_all_probes() \ + ;/* do nothing */ #endif /* defined STATS__ENABLE_PROBES */ diff -r eaf7e4c58c9e -r 0c83ea8adefc vmalloc.c --- a/vmalloc.c Wed Feb 22 11:39:12 2012 -0800 +++ b/vmalloc.c Sun Mar 04 14:26:35 2012 -0800 @@ -11,46 +11,200 @@ #include #include #include +#include +#include #include "VMS.h" #include "C_Libraries/Histogram/Histogram.h" -/*Helper function - *Insert a newly generated free chunk into the first spot on the free list. - * The chunk is cast as a MallocProlog, so the various pointers in it are - * accessed with C's help -- and the size of the prolog is easily added to - * the pointer when a chunk is returned to the app -- so C handles changes - * in pointer sizes among machines. - * - *The list head is a normal MallocProlog struct -- identified by its - * prevChunkInFreeList being NULL -- the only one. - * - *The end of the list is identified by next chunk being NULL, as usual. +#define MAX_UINT64 0xFFFFFFFFFFFFFFFF + +//A MallocProlog is a head element if the HigherInMem variable is NULL +//A Chunk is free if the prevChunkInFreeList variable is NULL + +/* + * This calculates the container which fits the given size. */ -void inline -add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) - { - chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; - if( chunk->nextChunkInFreeList != NULL ) //if not last in free list - chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; - chunk->prevChunkInFreeList = listHead; - listHead->nextChunkInFreeList = chunk; - } +inline +uint32 getContainer(size_t size) +{ + return (log2(size)-LOG128)/LOG54; +} +/* + * Removes the first chunk of a freeList + * The chunk is removed but not set as free. There is no check if + * the free list is empty, so make sure this is not the case. + */ +inline +MallocProlog *removeChunk(MallocArrays* freeLists, uint32 containerIdx) +{ + MallocProlog** container = &freeLists->bigChunks[containerIdx]; + MallocProlog* removedChunk = *container; + *container = removedChunk->nextChunkInFreeList; + + if(removedChunk->nextChunkInFreeList) + removedChunk->nextChunkInFreeList->prevChunkInFreeList = + (MallocProlog*)container; + + if(*container == NULL) + { + if(containerIdx < 64) + freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); + else + freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64)); + } + + return removedChunk; +} -/*This is sequential code, meant to only be called from the Master, not from - * any slave VPs. - *Search down list, checking size by the nextHigherInMem pointer, to find - * first chunk bigger than size needed. - *Shave off the extra and make it into a new free-list element, hook it in - * then return the address of the found element plus size of prolog. - * +/* + * Removes the first chunk of a freeList + * The chunk is removed but not set as free. There is no check if + * the free list is empty, so make sure this is not the case. + */ +inline +MallocProlog *removeSmallChunk(MallocArrays* freeLists, uint32 containerIdx) +{ + MallocProlog** container = &freeLists->smallChunks[containerIdx]; + MallocProlog* removedChunk = *container; + *container = removedChunk->nextChunkInFreeList; + + if(removedChunk->nextChunkInFreeList) + removedChunk->nextChunkInFreeList->prevChunkInFreeList = + (MallocProlog*)container; + + return removedChunk; +} + +inline +size_t getChunkSize(MallocProlog* chunk) +{ + return (uintptr_t)chunk->nextHigherInMem - + (uintptr_t)chunk - sizeof(MallocProlog); +} + +/* + * Removes a chunk from a free list. + */ +inline +void extractChunk(MallocProlog* chunk, MallocArrays *freeLists) +{ + chunk->prevChunkInFreeList->nextChunkInFreeList = chunk->nextChunkInFreeList; + if(chunk->nextChunkInFreeList) + chunk->nextChunkInFreeList->prevChunkInFreeList = chunk->prevChunkInFreeList; + + //The last element in the list points to the container. If the container points + //to NULL the container is empty + if(*((void**)(chunk->prevChunkInFreeList)) == NULL && getChunkSize(chunk) >= BIG_LOWER_BOUND) + { + //Find the approppiate container because we do not know it + uint64 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 3; + if(containerIdx < (uint32)64) + freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); + if(containerIdx < 128 && containerIdx >=64) + freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64)); + + } +} + +/* + * Merges two chunks. + * Chunk A has to be before chunk B in memory. Both have to be removed from + * a free list + */ +inline +MallocProlog *mergeChunks(MallocProlog* chunkA, MallocProlog* chunkB) +{ + chunkA->nextHigherInMem = chunkB->nextHigherInMem; + chunkB->nextHigherInMem->nextLowerInMem = chunkA; + return chunkA; +} +/* + * Inserts a chunk into a free list. + */ +inline +void insertChunk(MallocProlog* chunk, MallocProlog** container) +{ + chunk->nextChunkInFreeList = *container; + chunk->prevChunkInFreeList = (MallocProlog*)container; + if(*container) + (*container)->prevChunkInFreeList = chunk; + *container = chunk; +} + +/* + * Divides the chunk that a new chunk of newSize is created. + * There is no size check, so make sure the size value is valid. + */ +inline +MallocProlog *divideChunk(MallocProlog* chunk, size_t newSize) +{ + MallocProlog* newChunk = (MallocProlog*)((uintptr_t)chunk->nextHigherInMem - + newSize - sizeof(MallocProlog)); + + newChunk->nextLowerInMem = chunk; + newChunk->nextHigherInMem = chunk->nextHigherInMem; + + chunk->nextHigherInMem->nextLowerInMem = newChunk; + chunk->nextHigherInMem = newChunk; + + return newChunk; +} + +/* + * Search for chunk in the list of big chunks. Split the block if it's too big + */ +inline +MallocProlog *searchChunk(MallocArrays *freeLists, size_t sizeRequested, uint32 containerIdx) +{ + MallocProlog* foundChunk; + + uint64 searchVector = freeLists->bigChunksSearchVector[0]; + //set small chunk bits to zero + searchVector &= MAX_UINT64 << containerIdx; + containerIdx = __builtin_ffsl(searchVector); + + if(containerIdx == 0) + { + searchVector = freeLists->bigChunksSearchVector[1]; + containerIdx = __builtin_ffsl(searchVector); + if(containerIdx == 0) + { + printf("VMS malloc failed: low memory"); + exit(1); + } + containerIdx += 64; + } + containerIdx--; + + + foundChunk = removeChunk(freeLists, containerIdx); + size_t chunkSize = getChunkSize(foundChunk); + + //If the new chunk is larger than the requested size: split + if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + BIG_LOWER_BOUND) + { + MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested); + containerIdx = getContainer(getChunkSize(foundChunk)) - 1; + insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]); + if(containerIdx < 64) + freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx); + else + freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64)); + foundChunk = newChunk; + } + + return foundChunk; +} + + +/* + * This is sequential code, meant to only be called from the Master, not from + * any slave Slvs. */ void *VMS_int__malloc( size_t sizeRequested ) - { MallocProlog *foundElem = NULL, *currElem, *newElem; - ssize_t amountExtra, sizeConsumed,sizeOfFound; - uint32 foundElemIsTopOfHeap; - + { //============================= MEASUREMENT STUFF ======================== #ifdef MEAS__TIME_MALLOC int32 startStamp, endStamp; @@ -58,312 +212,101 @@ #endif //======================================================================== - //step up the size to be aligned at 16-byte boundary, prob better ways - sizeRequested = (sizeRequested + 16) & ~15; - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; - - while( currElem != NULL ) - { //check if size of currElem is big enough - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); - if( amountExtra > 0 ) - { //found it, get out of loop - foundElem = currElem; - currElem = NULL; - } - else - currElem = currElem->nextChunkInFreeList; - } + MallocArrays* freeLists = _VMSMasterEnv->freeLists; + MallocProlog* foundChunk; - if( foundElem == NULL ) - { ERROR("\nmalloc failed\n") - return (void *)NULL; //indicates malloc failed - } - //Using a kludge to identify the element that is the top chunk in the - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and - // save addr of start of heap in head's nextLowerInMem - //Will handle top of Heap specially - foundElemIsTopOfHeap = foundElem->nextHigherInMem == - _VMSMasterEnv->freeListHead->nextHigherInMem; + //Return a small chunk if the requested size is smaller than 128B + if(sizeRequested <= LOWER_BOUND) + { + uint32 freeListIdx = (sizeRequested-1)/SMALL_CHUNK_SIZE; + if(freeLists->smallChunks[freeListIdx] == NULL) + foundChunk = searchChunk(freeLists, SMALL_CHUNK_SIZE*(freeListIdx+1), 0); + else + foundChunk = removeSmallChunk(freeLists, freeListIdx); + + //Mark as allocated + foundChunk->prevChunkInFreeList = NULL; + return foundChunk + 1; + } - //before shave off and try to insert new elem, remove found elem - //note, foundElem will never be the head, so always has valid prevChunk - foundElem->prevChunkInFreeList->nextChunkInFreeList = - foundElem->nextChunkInFreeList; - if( foundElem->nextChunkInFreeList != NULL ) - { foundElem->nextChunkInFreeList->prevChunkInFreeList = - foundElem->prevChunkInFreeList; - } - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated + //Calculate the expected container. Start one higher to have a Chunk that's + //always big enough. + uint32 containerIdx = getContainer(sizeRequested); - //if enough, turn extra into new elem & insert it - if( amountExtra > 64 ) - { //make new elem by adding to addr of curr elem then casting - sizeConsumed = sizeof(MallocProlog) + sizeRequested; - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); - newElem->nextLowerInMem = foundElem; //This is evil (but why?) - newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) - foundElem->nextHigherInMem = newElem; - if( ! foundElemIsTopOfHeap ) - { //there is no next higher for top of heap, so can't write to it - newElem->nextHigherInMem->nextLowerInMem = newElem; - } - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); - } + if(freeLists->bigChunks[containerIdx] == NULL) + foundChunk = searchChunk(freeLists, sizeRequested, containerIdx); else - { - sizeConsumed = sizeOfFound; - } - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; - + foundChunk = removeChunk(freeLists, containerIdx); + + //Mark as allocated + foundChunk->prevChunkInFreeList = NULL; + //============================= MEASUREMENT STUFF ======================== #ifdef MEAS__TIME_MALLOC saveLowTimeStampCountInto( endStamp ); addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); #endif //======================================================================== - - //skip over the prolog by adding its size to the pointer return - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); + + //skip over the prolog by adding its size to the pointer return + return foundChunk + 1; } -/*This is sequential code, meant to only be called from the Master, not from - * any slave VPs. - *Search down list, checking size by the nextHigherInMem pointer, to find - * first chunk bigger than size needed. - *Shave off the extra and make it into a new free-list element, hook it in - * then return the address of the found element plus size of prolog. - * - * The difference to the regular malloc is, that all the allocated chunks are - * aligned and padded to the size of a CACHE_LINE_SZ. Thus creating a new chunk - * before the aligned chunk. - */ -void *VMS_int__malloc_aligned( size_t sizeRequested ) - { MallocProlog *foundElem = NULL, *currElem, *newElem; - ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; - uint32 foundElemIsTopOfHeap; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - uint32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //======================================================================== - - //step up the size to be multiple of the cache line size - sizeRequested = (sizeRequested + CACHE_LINE_SZ) & ~(CACHE_LINE_SZ-1); - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; - - while( currElem != NULL ) - { //check if size of currElem is big enough - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); - if( amountExtra > 0 ) - { - //look if the found element is already aligned - if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE_SZ-1)) == 0){ - //found it, get out of loop - foundElem = currElem; - break; - }else{ - //find first aligned address and check if it's still big enough - //check also if the space before the aligned address is big enough - //for a new element - void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE_SZ) & ~((uintptr_t)(CACHE_LINE_SZ-1))); - prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; - sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); - amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); - if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ - //found suitable element - //create new previous element and exit loop - MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; - - //insert new element into free list - if(currElem->nextChunkInFreeList != NULL) - currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; - newAlignedElem->prevChunkInFreeList = currElem; - newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; - currElem->nextChunkInFreeList = newAlignedElem; - - //set higherInMem and lowerInMem - newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; - foundElemIsTopOfHeap = currElem->nextHigherInMem == - _VMSMasterEnv->freeListHead->nextHigherInMem; - if(!foundElemIsTopOfHeap) - currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; - currElem->nextHigherInMem = newAlignedElem; - newAlignedElem->nextLowerInMem = currElem; - - //Found new element leaving loop - foundElem = newAlignedElem; - break; - } - } - - } - currElem = currElem->nextChunkInFreeList; - } - - if( foundElem == NULL ) - { ERROR("\nmalloc failed\n") - return (void *)NULL; //indicates malloc failed - } - //Using a kludge to identify the element that is the top chunk in the - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and - // save addr of start of heap in head's nextLowerInMem - //Will handle top of Heap specially - foundElemIsTopOfHeap = foundElem->nextHigherInMem == - _VMSMasterEnv->freeListHead->nextHigherInMem; - - //before shave off and try to insert new elem, remove found elem - //note, foundElem will never be the head, so always has valid prevChunk - foundElem->prevChunkInFreeList->nextChunkInFreeList = - foundElem->nextChunkInFreeList; - if( foundElem->nextChunkInFreeList != NULL ) - { foundElem->nextChunkInFreeList->prevChunkInFreeList = - foundElem->prevChunkInFreeList; - } - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated - - //if enough, turn extra into new elem & insert it - if( amountExtra > 64 ) - { //make new elem by adding to addr of curr elem then casting - sizeConsumed = sizeof(MallocProlog) + sizeRequested; - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); - newElem->nextHigherInMem = foundElem->nextHigherInMem; - newElem->nextLowerInMem = foundElem; - foundElem->nextHigherInMem = newElem; - - if( ! foundElemIsTopOfHeap ) - { //there is no next higher for top of heap, so can't write to it - newElem->nextHigherInMem->nextLowerInMem = newElem; - } - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); - } - else - { - sizeConsumed = sizeOfFound; - } - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); - #endif - //======================================================================== - - //skip over the prolog by adding its size to the pointer return - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); - } - - -/*This is sequential code -- only to be called from the Master - * When free, subtract the size of prolog from pointer, then cast it to a - * MallocProlog. Then check the nextLower and nextHigher chunks to see if - * one or both are also free, and coalesce if so, and if neither free, then - * add this one to free-list. +/* + * This is sequential code, meant to only be called from the Master, not from + * any slave Slvs. */ void VMS_int__free( void *ptrToFree ) - { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; - size_t sizeOfElem; - uint32 lowerExistsAndIsFree, higherExistsAndIsFree; - + { + //============================= MEASUREMENT STUFF ======================== #ifdef MEAS__TIME_MALLOC int32 startStamp, endStamp; saveLowTimeStampCountInto( startStamp ); #endif //======================================================================== - - if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || - ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) - { //outside the range of data owned by VMS's malloc, so do nothing - return; - } - //subtract size of prolog to get pointer to prolog, then cast - elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog)); - sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree); - - if( elemToFree->prevChunkInFreeList != NULL ) - { printf( "error: freeing same element twice!" ); exit(1); - } - - _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; - - nextLowerElem = elemToFree->nextLowerInMem; - nextHigherElem = elemToFree->nextHigherInMem; - - if( nextHigherElem == NULL ) - higherExistsAndIsFree = FALSE; - else //okay exists, now check if in the free-list by checking back ptr - higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); - - if( nextLowerElem == NULL ) - lowerExistsAndIsFree = FALSE; - else //okay, it exists, now check if it's free - lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); - - - //now, know what exists and what's free - if( lowerExistsAndIsFree ) - { if( higherExistsAndIsFree ) - { //both exist and are free, so coalesce all three - //First, remove higher from free-list - nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = - nextHigherElem->nextChunkInFreeList; - if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? - nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = - nextHigherElem->prevChunkInFreeList; - //Now, fix-up sequence-in-mem list -- by side-effect, this also - // changes size of the lower elem, which is still in free-list - nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; - if( nextHigherElem->nextHigherInMem != - _VMSMasterEnv->freeListHead->nextHigherInMem ) - nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; - //notice didn't do anything to elemToFree -- it simply is no - // longer reachable from any of the lists. Wonder if could be a - // security leak because left valid addresses in it, - // but don't care for now. + + MallocArrays* freeLists = _VMSMasterEnv->freeLists; + MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1; + uint32 containerIdx; + + //Check for free neighbors + if(chunkToFree->nextLowerInMem) + { + if(chunkToFree->nextLowerInMem->prevChunkInFreeList != NULL) + {//Chunk is not allocated + extractChunk(chunkToFree->nextLowerInMem, freeLists); + chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree); } - else - { //lower is the only of the two that exists and is free, - //In this case, no adjustment to free-list, just change mem-list. - // By side-effect, changes size of the lower elem - nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; - if( elemToFree->nextHigherInMem != - _VMSMasterEnv->freeListHead->nextHigherInMem ) - elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; + } + if(chunkToFree->nextHigherInMem) + { + if(chunkToFree->nextHigherInMem->prevChunkInFreeList != NULL) + {//Chunk is not allocated + extractChunk(chunkToFree->nextHigherInMem, freeLists); + chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem); } - } + } + + size_t chunkSize = getChunkSize(chunkToFree); + if(chunkSize < BIG_LOWER_BOUND) + { + containerIdx = (chunkSize/SMALL_CHUNK_SIZE)-1; + if(containerIdx > SMALL_CHUNK_COUNT-1) + containerIdx = SMALL_CHUNK_COUNT-1; + insertChunk(chunkToFree, &freeLists->smallChunks[containerIdx]); + } else - { //lower either doesn't exist or isn't free, so check higher - if( higherExistsAndIsFree ) - { //higher exists and is the only of the two free - //First, in free-list, replace higher elem with the one to free - elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; - elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; - elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; - //Now chg mem-list. By side-effect, changes size of elemToFree - elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; - if( elemToFree->nextHigherInMem != - _VMSMasterEnv->freeListHead->nextHigherInMem ) - elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; - } - else - { //neither lower nor higher is availabe to coalesce so add to list - // this makes prev chunk ptr non-null, which indicates it's free - elemToFree->nextChunkInFreeList = - _VMSMasterEnv->freeListHead->nextChunkInFreeList; - _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; - elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; - } - } + { + containerIdx = getContainer(getChunkSize(chunkToFree)) - 1; + insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]); + if(containerIdx < 64) + freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx; + else + freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64); + } + //============================= MEASUREMENT STUFF ======================== #ifdef MEAS__TIME_MALLOC saveLowTimeStampCountInto( endStamp ); @@ -373,82 +316,31 @@ } - -/*Allocates memory from the external system -- higher overhead - * - *Because of Linux's malloc throwing bizarre random faults when malloc is - * used inside a VMS virtual processor, have to pass this as a request and - * have the core loop do it when it gets around to it -- will look for these - * chores leftover from the previous animation of masterVP the next time it - * goes to animate the masterVP -- so it takes two separate masterVP - * animations, separated by work, to complete an external malloc or - * external free request. - * - *Thinking core loop accepts signals -- just looks if signal-location is - * empty or not -- +/* + * Designed to be called from the main thread outside of VMS, during init */ -void * -VMS__malloc_in_ext( size_t sizeRequested ) - { - /* - //This is running in the master, so no chance for multiple cores to be - // competing for the core's flag. - if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) - { //something has already signalled to core loop, so save the signal - // and look, next time master animated, to see if can send it. - //Note, the addr to put a signal is in the coreloop's frame, so just - // checks it each time through -- make it volatile to avoid GCC - // optimizations -- it's a coreloop local var that only changes - // after jumping away. The signal includes the addr to send the - //return to -- even if just empty return completion-signal - // - //save the signal in some queue that the master looks at each time - // it starts up -- one loc says if empty for fast common case -- - //something like that -- want to hide this inside this call -- but - // think this has to come as a request -- req handler gives procr - // back to master loop, which gives it back to req handler at point - // it sees that core loop has sent return signal. Something like - // that. - saveTheSignal - - } - coreSigData->type = malloc; - coreSigData->sizeToMalloc = sizeRequested; - coreSigData->locToSignalCompletion = &figureOut; - _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; - */ - //just risk system-stack faults until get this figured out - return malloc( sizeRequested ); - } - - -/*Frees memory that was allocated in the external system -- higher overhead - * - *As noted in external malloc comment, this is clunky 'cause the free has - * to be called in the core loop. - */ -void -VMS__free_in_ext( void *ptrToFree ) - { - //just risk system-stack faults until get this figured out - free( ptrToFree ); - - //TODO: fix this -- so - } - - -/*Designed to be called from the main thread outside of VMS, during init - */ -MallocProlog * +MallocArrays * VMS_ext__create_free_list() - { MallocProlog *freeListHead, *firstChunk; - - //Note, this is running in the main thread -- all increases in malloc - // mem and all frees of it must be done in this thread, with the - // thread's original stack available - freeListHead = malloc( sizeof(MallocProlog) ); - firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); - if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} +{ + //Initialize containers for small chunks and fill with zeros + _VMSMasterEnv->freeLists = (MallocArrays*)malloc( sizeof(MallocArrays) ); + MallocArrays *freeLists = _VMSMasterEnv->freeLists; + + freeLists->smallChunks = + (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*)); + memset((void*)freeLists->smallChunks, + 0,SMALL_CHUNK_COUNT*sizeof(MallocProlog*)); + + //Calculate number of containers for big chunks + uint32 container = getContainer(MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE)+1; + freeLists->bigChunks = (MallocProlog**)malloc(container*sizeof(MallocProlog*)); + memset((void*)freeLists->bigChunks,0,container*sizeof(MallocProlog*)); + freeLists->containerCount = container; + + //Create first element in lastContainer + MallocProlog *firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); + if( firstChunk == NULL ) {printf("Can't allocate initial memory\n"); exit(1);} + freeLists->memSpace = firstChunk; //Touch memory to avoid page faults void *ptr,*endPtr; @@ -457,38 +349,47 @@ { *(char*)ptr = 0; } - - freeListHead->prevChunkInFreeList = NULL; - //Use this addr to free the heap when cleanup - freeListHead->nextLowerInMem = firstChunk; - //to identify top-of-heap elem, compare this addr to elem's next higher - freeListHead->nextHigherInMem = (void*)( (uintptr_t)firstChunk + - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); - freeListHead->nextChunkInFreeList = firstChunk; - - firstChunk->nextChunkInFreeList = NULL; - firstChunk->prevChunkInFreeList = freeListHead; - //next Higher has to be set to top of chunk, so can calc size in malloc - firstChunk->nextHigherInMem = (void*)( (uintptr_t)firstChunk + - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); - firstChunk->nextLowerInMem = NULL; //identifies as bott of heap - _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet - - return freeListHead; + firstChunk->nextLowerInMem = NULL; + firstChunk->nextHigherInMem = (MallocProlog*)((uintptr_t)firstChunk + + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog)); + firstChunk->nextChunkInFreeList = NULL; + //previous element in the queue is the container + firstChunk->prevChunkInFreeList = &freeLists->bigChunks[container-2]; + + freeLists->bigChunks[container-2] = firstChunk; + //Insert into bit search list + if(container <= 65) + { + freeLists->bigChunksSearchVector[0] = ((uint64)1 << (container-2)); + freeLists->bigChunksSearchVector[1] = 0; + } + else + { + freeLists->bigChunksSearchVector[0] = 0; + freeLists->bigChunksSearchVector[1] = ((uint64)1 << (container-66)); + } + + //Create dummy chunk to mark the top of stack this is of course + //never freed + MallocProlog *dummyChunk = firstChunk->nextHigherInMem; + dummyChunk->nextHigherInMem = dummyChunk+1; + dummyChunk->nextLowerInMem = NULL; + dummyChunk->nextChunkInFreeList = NULL; + dummyChunk->prevChunkInFreeList = NULL; + + return freeLists; } /*Designed to be called from the main thread outside of VMS, during cleanup */ void -VMS_ext__free_free_list( MallocProlog *freeListHead ) +VMS_ext__free_free_list( MallocArrays *freeLists ) { - //stashed a ptr to the one and only bug chunk malloc'd from OS in the - // free list head's next lower in mem pointer - free( freeListHead->nextLowerInMem ); - - //don't free the head -- it'll be in an array eventually -- free whole - // array when all the free lists linked from it have already been freed + free(freeLists->memSpace); + free(freeLists->bigChunks); + free(freeLists->smallChunks); + } diff -r eaf7e4c58c9e -r 0c83ea8adefc vmalloc.h --- a/vmalloc.h Wed Feb 22 11:39:12 2012 -0800 +++ b/vmalloc.h Sun Mar 04 14:26:35 2012 -0800 @@ -14,6 +14,14 @@ #include #include "VMS_primitive_data_types.h" +#define SMALL_CHUNK_SIZE 32 +#define SMALL_CHUNK_COUNT 4 +#define LOWER_BOUND 128 //Biggest chunk size that is created for the small chunks +#define BIG_LOWER_BOUND 160 //Smallest chunk size that is created for the big chunks + +#define LOG54 0.3219280948873623 +#define LOG128 7 + typedef struct _MallocProlog MallocProlog; struct _MallocProlog @@ -24,6 +32,18 @@ MallocProlog *nextLowerInMem; }; //MallocProlog + + typedef struct MallocArrays MallocArrays; + + struct MallocArrays + { + MallocProlog **smallChunks; + MallocProlog **bigChunks; + uint64 bigChunksSearchVector[2]; + void *memSpace; + uint32 containerCount; + }; + //MallocArrays typedef struct { @@ -34,57 +54,38 @@ void * VMS_int__malloc( size_t sizeRequested ); +#define VMS_PI__malloc VMS_int__malloc +#define VMS_WL__malloc VMS_int__malloc /*TODO: Bug -- Not protected!! */ +#define VMS_App__malloc VMS_int__malloc /*TODO: Bug -- Not protected!! */ void * VMS_int__malloc_aligned( size_t sizeRequested ); +#define VMS_PI__malloc_aligned VMS_int__malloc_aligned +#define VMS_WL__malloc_aligned VMS_int__malloc_aligned void VMS_int__free( void *ptrToFree ); +#define VMS_PI__free VMS_int__free +#define VMS_WL__free VMS_int__free /*TODO: Bug -- Not protected!! */ +#define VMS_App__free VMS_int__free /*TODO: Bug -- Not protected!! */ -#define VMS_PI__malloc VMS_int__malloc -#define VMS_PI__malloc_aligned VMS_int__malloc_aligned -#define VMS_PI__free VMS_int__free -/* For now, the PI is protected by master lock, so int malloc fine -void * -VMS_PI__malloc( size_t sizeRequested ); -void * -VMS_PI__malloc_aligned( size_t sizeRequested ); - -void -VMS_PI__free( void *ptrToFree ); -*/ - -//TODO: protect WL malloc from concurrency!! shared freelist can be corrupted -#define VMS_WL__malloc VMS_int__malloc -#define VMS_WL__malloc_aligned VMS_int__malloc_aligned -#define VMS_WL__free VMS_int__free -/* -void * -VMS_WL__malloc( size_t sizeRequested ); - -void * -VMS_WL__malloc_aligned( size_t sizeRequested ); - -void -VMS_WL__free( void *ptrToFree ); -*/ /*Allocates memory from the external system -- higher overhead */ void * -VMS__malloc_in_ext( size_t sizeRequested ); +VMS_ext__malloc_in_ext( size_t sizeRequested ); /*Frees memory that was allocated in the external system -- higher overhead */ void -VMS__free_in_ext( void *ptrToFree ); +VMS_ext__free_in_ext( void *ptrToFree ); -MallocProlog * +MallocArrays * VMS_ext__create_free_list(); void -VMS_ext__free_free_list( MallocProlog *freeListHead ); +VMS_ext__free_free_list(MallocArrays *freeLists ); #endif \ No newline at end of file diff -r eaf7e4c58c9e -r 0c83ea8adefc vutilities.h --- a/vutilities.h Wed Feb 22 11:39:12 2012 -0800 +++ b/vutilities.h Sun Mar 04 14:26:35 2012 -0800 @@ -8,8 +8,8 @@ */ -#ifndef _UTILITIES_H -#define _UTILITIES_H +#ifndef _VUTILITIES_H +#define _VUTILITIES_H #include #include "VMS_primitive_data_types.h"