# HG changeset patch # User Me@portablequad # Date 1325722211 28800 # Node ID c1784868dcea621af6e734b7e9a4bd6fbaac3e50 # Parent ad8213a8e9164e07fa301958c38d9972ddc65a7a testing hgeol -- see if it fixes line-ending issues -- commit line endings diff -r ad8213a8e916 -r c1784868dcea .hgeol --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgeol Wed Jan 04 16:10:11 2012 -0800 @@ -0,0 +1,12 @@ + +[patterns] +**.py = native +**.txt = native +**.c = native +**.h = native +**.cpp = native +**.java = native +**.sh = native +**.pl = native +**.jpg = bin +**.gif = bin diff -r ad8213a8e916 -r c1784868dcea CoreLoop.c --- a/CoreLoop.c Thu Oct 06 16:24:17 2011 +0200 +++ b/CoreLoop.c Wed Jan 04 16:10:11 2012 -0800 @@ -1,215 +1,215 @@ -/* - * Copyright 2010 OpenSourceStewardshipFoundation - * - * Licensed under BSD - */ - - -#include "VMS.h" -#include "Queue_impl/BlockingQueue.h" -#include "ProcrContext.h" - -#include -#include -#include - -#include -#include - -void *terminateCoreLoop(VirtProcr *currPr); - -/*This is the loop that runs in the OS Thread pinned to each core - *Get virt procr from queue, - * save state of current animator, then load in state of virt procr, using - * jmp instr to switch the program-counter state -- making the virt procr - * the new animator. - *At some point, the virt procr will suspend itself by saving out its - * animator state (stack ptr, frame ptr, program counter) and switching - * back to the OS Thread's animator state, which means restoring the - * stack and frame and jumping to the core loop start point. - *This cycle then repeats, until a special shutdown virtual processor is - * animated, which jumps to the end point at the bottom of core loop. - */ -void * -coreLoop( void *paramsIn ) - { - ThdParams *coreLoopThdParams; - int thisCoresIdx; - VirtProcr *currPr; - VMSQueueStruc *readyToAnimateQ; - cpu_set_t coreMask; //has 1 in bit positions of allowed cores - int errorCode; - - //work-stealing struc on stack to prevent false-sharing in cache-line - volatile GateStruc gate; - //preGateProgress, waitProgress, exitProgress, gateClosed; - - - coreLoopThdParams = (ThdParams *)paramsIn; - thisCoresIdx = coreLoopThdParams->coreNum; - - gate.gateClosed = FALSE; - gate.preGateProgress = 0; - gate.waitProgress = 0; - gate.exitProgress = 0; - _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup - - //wait until signalled that setup is complete - pthread_mutex_lock( &suspendLock ); - while( !(_VMSMasterEnv->setupComplete) ) - { - pthread_cond_wait( &suspend_cond, - &suspendLock ); - } - pthread_mutex_unlock( &suspendLock ); - - //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); - - //set thread affinity - //Linux requires pinning thd to core inside thread-function - //Designate a core by a 1 in bit-position corresponding to the core - CPU_ZERO(&coreMask); - CPU_SET(coreLoopThdParams->coreNum,&coreMask); - //coreMask = 1L << coreLoopThdParams->coreNum; - - pthread_t selfThd = pthread_self(); - errorCode = - pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); - - if(errorCode){ printf("\nset affinity failure\n"); exit(0); } - - - //Save the return address in the SwitchVP function - saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt)); - - - while(1){ - - //Get virtual processor from queue - //The Q must be a global, static volatile var, so not kept in reg, - // which forces reloading the pointer after each jmp to this point - readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; - - #ifdef USE_WORK_STEALING - //Alg for work-stealing designed to make common case fast. Comment - // in stealer code explains. - gate.preGateProgress++; - if( gate.gateClosed ) - { //now, set coreloop's progress, so stealer can see that core loop - // has made it into the waiting area. - gate.waitProgress = gate.preGateProgress; - while( gate.gateClosed ) /*busy wait*/; - } - - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); - - //Set the coreloop's progress, so stealer can see it has made it out - // of the protected area - gate.exitProgress = gate.preGateProgress; - #else - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); - #endif - - if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; - else - { - //============================= MEASUREMENT STUFF ===================== - #ifdef MEAS__TIME_MASTER_LOCK - int32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //===================================================================== - int tries = 0; int gotLock = 0; - while( currPr == NULL ) //if queue was empty, enter get masterLock loop - { //queue was empty, so get master lock - - gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), - UNLOCKED, LOCKED ); - if( gotLock ) - { //run own MasterVP -- jmps to coreLoops startPt when done - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; - if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) - { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); - pthread_yield(); - } - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; - break; //end while -- have a VP to animate now - } - - tries++; //if too many, means master on other core taking too long - if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } - } - //============================= MEASUREMENT STUFF ===================== - #ifdef MEAS__TIME_MASTER_LOCK - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, - _VMSMasterEnv->masterLockLowTimeHist ); - addIntervalToHist( startStamp, endStamp, - _VMSMasterEnv->masterLockHighTimeHist ); - #endif - //===================================================================== - - } - - - switchToVP(currPr); //The VPs return in here - flushRegisters(); - }//CoreLoop - } - - -void * -terminateCoreLoop(VirtProcr *currPr){ - //first free shutdown VP that jumped here -- it first restores the - // coreloop's stack, so addr of currPr in stack frame is still correct - VMS__dissipate_procr( currPr ); - pthread_exit( NULL ); -} - - - -#ifdef SEQUENTIAL - -//=========================================================================== -/*This sequential version is exact same as threaded, except doesn't do the - * pin-threads part, nor the wait until setup complete part. - */ -void * -coreLoop_Seq( void *paramsIn ) - { - VirtProcr *currPr; - VMSQueueStruc *readyToAnimateQ; - - ThdParams *coreLoopThdParams; - int thisCoresIdx; - - coreLoopThdParams = (ThdParams *)paramsIn; -// thisCoresIdx = coreLoopThdParams->coreNum; - thisCoresIdx = 0; - - //Save the return address in the SwitchVP function - saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt)); - - - while(1){ - //Get virtual processor from queue - //_VMSWorkQ must be a global, static volatile var, so not kept in reg, - // which forces reloading the pointer after each jmp to this point - readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); - if( currPr == NULL ) - { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) - { printf("too many back to back MasterVP\n"); exit(1); } - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; - - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; - } - else - _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; - - - switchToVP( currPr ); - flushRegisters(); - } - } -#endif +/* + * Copyright 2010 OpenSourceStewardshipFoundation + * + * Licensed under BSD + */ + + +#include "VMS.h" +#include "Queue_impl/BlockingQueue.h" +#include "ProcrContext.h" + +#include +#include +#include + +#include +#include + +void *terminateCoreLoop(VirtProcr *currPr); + +/*This is the loop that runs in the OS Thread pinned to each core + *Get virt procr from queue, + * save state of current animator, then load in state of virt procr, using + * jmp instr to switch the program-counter state -- making the virt procr + * the new animator. + *At some point, the virt procr will suspend itself by saving out its + * animator state (stack ptr, frame ptr, program counter) and switching + * back to the OS Thread's animator state, which means restoring the + * stack and frame and jumping to the core loop start point. + *This cycle then repeats, until a special shutdown virtual processor is + * animated, which jumps to the end point at the bottom of core loop. + */ +void * +coreLoop( void *paramsIn ) + { + ThdParams *coreLoopThdParams; + int thisCoresIdx; + VirtProcr *currPr; + VMSQueueStruc *readyToAnimateQ; + cpu_set_t coreMask; //has 1 in bit positions of allowed cores + int errorCode; + + //work-stealing struc on stack to prevent false-sharing in cache-line + volatile GateStruc gate; + //preGateProgress, waitProgress, exitProgress, gateClosed; + + + coreLoopThdParams = (ThdParams *)paramsIn; + thisCoresIdx = coreLoopThdParams->coreNum; + + gate.gateClosed = FALSE; + gate.preGateProgress = 0; + gate.waitProgress = 0; + gate.exitProgress = 0; + _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup + + //wait until signalled that setup is complete + pthread_mutex_lock( &suspendLock ); + while( !(_VMSMasterEnv->setupComplete) ) + { + pthread_cond_wait( &suspend_cond, + &suspendLock ); + } + pthread_mutex_unlock( &suspendLock ); + + //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); + + //set thread affinity + //Linux requires pinning thd to core inside thread-function + //Designate a core by a 1 in bit-position corresponding to the core + CPU_ZERO(&coreMask); + CPU_SET(coreLoopThdParams->coreNum,&coreMask); + //coreMask = 1L << coreLoopThdParams->coreNum; + + pthread_t selfThd = pthread_self(); + errorCode = + pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); + + if(errorCode){ printf("\nset affinity failure\n"); exit(0); } + + + //Save the return address in the SwitchVP function + saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt)); + + + while(1){ + + //Get virtual processor from queue + //The Q must be a global, static volatile var, so not kept in reg, + // which forces reloading the pointer after each jmp to this point + readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; + + #ifdef USE_WORK_STEALING + //Alg for work-stealing designed to make common case fast. Comment + // in stealer code explains. + gate.preGateProgress++; + if( gate.gateClosed ) + { //now, set coreloop's progress, so stealer can see that core loop + // has made it into the waiting area. + gate.waitProgress = gate.preGateProgress; + while( gate.gateClosed ) /*busy wait*/; + } + + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); + + //Set the coreloop's progress, so stealer can see it has made it out + // of the protected area + gate.exitProgress = gate.preGateProgress; + #else + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); + #endif + + if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; + else + { + //============================= MEASUREMENT STUFF ===================== + #ifdef MEAS__TIME_MASTER_LOCK + int32 startStamp, endStamp; + saveLowTimeStampCountInto( startStamp ); + #endif + //===================================================================== + int tries = 0; int gotLock = 0; + while( currPr == NULL ) //if queue was empty, enter get masterLock loop + { //queue was empty, so get master lock + + gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), + UNLOCKED, LOCKED ); + if( gotLock ) + { //run own MasterVP -- jmps to coreLoops startPt when done + currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; + if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) + { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); + pthread_yield(); + } + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; + break; //end while -- have a VP to animate now + } + + tries++; //if too many, means master on other core taking too long + if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } + } + //============================= MEASUREMENT STUFF ===================== + #ifdef MEAS__TIME_MASTER_LOCK + saveLowTimeStampCountInto( endStamp ); + addIntervalToHist( startStamp, endStamp, + _VMSMasterEnv->masterLockLowTimeHist ); + addIntervalToHist( startStamp, endStamp, + _VMSMasterEnv->masterLockHighTimeHist ); + #endif + //===================================================================== + + } + + + switchToVP(currPr); //The VPs return in here + flushRegisters(); + }//CoreLoop + } + + +void * +terminateCoreLoop(VirtProcr *currPr){ + //first free shutdown VP that jumped here -- it first restores the + // coreloop's stack, so addr of currPr in stack frame is still correct + VMS__dissipate_procr( currPr ); + pthread_exit( NULL ); +} + + + +#ifdef SEQUENTIAL + +//=========================================================================== +/*This sequential version is exact same as threaded, except doesn't do the + * pin-threads part, nor the wait until setup complete part. + */ +void * +coreLoop_Seq( void *paramsIn ) + { + VirtProcr *currPr; + VMSQueueStruc *readyToAnimateQ; + + ThdParams *coreLoopThdParams; + int thisCoresIdx; + + coreLoopThdParams = (ThdParams *)paramsIn; +// thisCoresIdx = coreLoopThdParams->coreNum; + thisCoresIdx = 0; + + //Save the return address in the SwitchVP function + saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt)); + + + while(1){ + //Get virtual processor from queue + //_VMSWorkQ must be a global, static volatile var, so not kept in reg, + // which forces reloading the pointer after each jmp to this point + readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); + if( currPr == NULL ) + { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) + { printf("too many back to back MasterVP\n"); exit(1); } + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; + + currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; + } + else + _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; + + + switchToVP( currPr ); + flushRegisters(); + } + } +#endif diff -r ad8213a8e916 -r c1784868dcea MasterLoop.c --- a/MasterLoop.c Thu Oct 06 16:24:17 2011 +0200 +++ b/MasterLoop.c Wed Jan 04 16:10:11 2012 -0800 @@ -1,373 +1,373 @@ -/* - * Copyright 2010 OpenSourceStewardshipFoundation - * - * Licensed under BSD - */ - - - -#include -#include - -#include "VMS.h" -#include "ProcrContext.h" - - -//=========================================================================== -void inline -stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, - VirtProcr *masterPr ); - -//=========================================================================== - - - -/*This code is animated by the virtual Master processor. - * - *Polls each sched slot exactly once, hands any requests made by a newly - * done slave to the "request handler" plug-in function - * - *Any slots that need a virt procr assigned are given to the "schedule" - * plug-in function, which tries to assign a virt procr (slave) to it. - * - *When all slots needing a processor have been given to the schedule plug-in, - * a fraction of the procrs successfully scheduled are put into the - * work queue, then a continuation of this function is put in, then the rest - * of the virt procrs that were successfully scheduled. - * - *The first thing the continuation does is busy-wait until the previous - * animation completes. This is because an (unlikely) continuation may - * sneak through queue before previous continuation is done putting second - * part of scheduled slaves in, which is the only race condition. - * - */ - -/*May 29, 2010 -- birth a Master during init so that first core loop to - * start running gets it and does all the stuff for a newly born -- - * from then on, will be doing continuation, but do suspension self - * directly at end of master loop - *So VMS__init just births the master virtual processor same way it births - * all the others -- then does any extra setup needed and puts it into the - * work queue. - *However means have to make masterEnv a global static volatile the same way - * did with readyToAnimateQ in core loop. -- for performance, put the - * jump to the core loop directly in here, and have it directly jump back. - * - * - *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this - * avoids the suspected bug in the system stack that causes bizarre faults - * at random places in the system code. - * - *So, this function is coupled to each of the MasterVPs, -- meaning this - * function can't rely on a particular stack and frame -- each MasterVP that - * animates this function has a different one. - * - *At this point, the masterLoop does not write itself into the queue anymore, - * instead, the coreLoop acquires the masterLock when it has nothing to - * animate, and then animates its own masterLoop. However, still try to put - * several AppVPs into the queue to amortize the startup cost of switching - * to the MasterVP. Note, don't have to worry about latency of requests much - * because most requests generate work for same core -- only latency issue - * is case when other cores starved and one core's requests generate work - * for them -- so keep max in queue to 3 or 4.. - */ -void masterLoop( void *initData, VirtProcr *animatingPr ) - { - int32 slotIdx, numSlotsFilled; - VirtProcr *schedVirtPr; - SchedSlot *currSlot, **schedSlots; - MasterEnv *masterEnv; - VMSQueueStruc *readyToAnimateQ; - - SlaveScheduler slaveScheduler; - RequestHandler requestHandler; - void *semanticEnv; - - int32 thisCoresIdx; - VirtProcr *masterPr; - volatile VirtProcr *volatileMasterPr; - - volatileMasterPr = animatingPr; - masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp - - //First animation of each MasterVP will in turn animate this part - // of setup code.. (VP creator sets up the stack as if this function - // was called normally, but actually get here by jmp) - //So, setup values about stack ptr, jmp pt and all that - //masterPr->nextInstrPt = &&masterLoopStartPt; - - - //Note, got rid of writing the stack and frame ptr up here, because - // only one - // core can ever animate a given MasterVP, so don't need to communicate - // new frame and stack ptr to the MasterVP storage before a second - // version of that MasterVP can get animated on a different core. - //Also got rid of the busy-wait. - - - //masterLoopStartPt: - while(1){ - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MASTER - //Total Master time includes one coreloop time -- just assume the core - // loop time is same for Master as for AppVPs, even though it may be - // smaller due to higher predictability of the fixed jmp. - saveLowTimeStampCountInto( masterPr->startMasterTSCLow ); - #endif - //======================================================================== - - masterEnv = (MasterEnv*)_VMSMasterEnv; - - //GCC may optimize so doesn't always re-define from frame-storage - masterPr = (VirtProcr*)volatileMasterPr; //just to make sure after jmp - thisCoresIdx = masterPr->coreAnimatedBy; - readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; - schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; - - requestHandler = masterEnv->requestHandler; - slaveScheduler = masterEnv->slaveScheduler; - semanticEnv = masterEnv->semanticEnv; - - - //Poll each slot's Done flag - numSlotsFilled = 0; - for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++) - { - currSlot = schedSlots[ slotIdx ]; - - if( currSlot->workIsDone ) - { - currSlot->workIsDone = FALSE; - currSlot->needsProcrAssigned = TRUE; - - //process requests from slave to master - //====================== MEASUREMENT STUFF =================== - #ifdef MEAS__TIME_PLUGIN - int32 startStamp1, endStamp1; - saveLowTimeStampCountInto( startStamp1 ); - #endif - //============================================================ - (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv ); - //====================== MEASUREMENT STUFF =================== - #ifdef MEAS__TIME_PLUGIN - saveLowTimeStampCountInto( endStamp1 ); - addIntervalToHist( startStamp1, endStamp1, - _VMSMasterEnv->reqHdlrLowTimeHist ); - addIntervalToHist( startStamp1, endStamp1, - _VMSMasterEnv->reqHdlrHighTimeHist ); - #endif - //============================================================ - } - if( currSlot->needsProcrAssigned ) - { //give slot a new virt procr - schedVirtPr = - (*slaveScheduler)( semanticEnv, thisCoresIdx ); - - if( schedVirtPr != NULL ) - { currSlot->procrAssignedToSlot = schedVirtPr; - schedVirtPr->schedSlot = currSlot; - currSlot->needsProcrAssigned = FALSE; - numSlotsFilled += 1; - - writeVMSQ( schedVirtPr, readyToAnimateQ ); - } - } - } - - - #ifdef USE_WORK_STEALING - //If no slots filled, means no more work, look for work to steal. - if( numSlotsFilled == 0 ) - { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr ); - } - #endif - - - #ifdef MEAS__TIME_MASTER - saveLowTimeStampCountInto( masterPr->endMasterTSCLow ); - #endif - - masterSwitchToCoreLoop(animatingPr); - flushRegisters(); - }//MasterLoop - - - } - - - -/*This has a race condition -- the coreloops are accessing their own queues - * at the same time that this work-stealer on a different core is trying to - */ -void inline -stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, - VirtProcr *masterPr ) - { - VirtProcr *stolenPr; - int32 coreIdx, i; - VMSQueueStruc *currQ; - - stolenPr = NULL; - coreIdx = masterPr->coreAnimatedBy; - for( i = 0; i < NUM_CORES -1; i++ ) - { - if( coreIdx >= NUM_CORES -1 ) - { coreIdx = 0; - } - else - { coreIdx++; - } - currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; - if( numInVMSQ( currQ ) > 0 ) - { stolenPr = readVMSQ (currQ ); - break; - } - } - - if( stolenPr != NULL ) - { currSlot->procrAssignedToSlot = stolenPr; - stolenPr->schedSlot = currSlot; - currSlot->needsProcrAssigned = FALSE; - - writeVMSQ( stolenPr, readyToAnimateQ ); - } - } - -/*This algorithm makes the common case fast. Make the coreloop passive, - * and show its progress. Make the stealer control a gate that coreloop - * has to pass. - *To avoid interference, only one stealer at a time. Use a global - * stealer-lock. - * - *The pattern is based on a gate -- stealer shuts the gate, then monitors - * to be sure any already past make it all the way out, before starting. - *So, have a "progress" measure just before the gate, then have two after it, - * one is in a "waiting room" outside the gate, the other is at the exit. - *Then, the stealer first shuts the gate, then checks the progress measure - * outside it, then looks to see if the progress measure at the exit is the - * same. If yes, it knows the protected area is empty 'cause no other way - * to get in and the last to get in also exited. - *If the progress measure at the exit is not the same, then the stealer goes - * into a loop checking both the waiting-area and the exit progress-measures - * until one of them shows the same as the measure outside the gate. Might - * as well re-read the measure outside the gate each go around, just to be - * sure. It is guaranteed that one of the two will eventually match the one - * outside the gate. - * - *Here's an informal proof of correctness: - *The gate can be closed at any point, and have only four cases: - * 1) coreloop made it past the gate-closing but not yet past the exit - * 2) coreloop made it past the pre-gate progress update but not yet past - * the gate, - * 3) coreloop is right before the pre-gate update - * 4) coreloop is past the exit and far from the pre-gate update. - * - * Covering the cases in reverse order, - * 4) is not a problem -- stealer will read pre-gate progress, see that it - * matches exit progress, and the gate is closed, so stealer can proceed. - * 3) stealer will read pre-gate progress just after coreloop updates it.. - * so stealer goes into a loop until the coreloop causes wait-progress - * to match pre-gate progress, so then stealer can proceed - * 2) same as 3.. - * 1) stealer reads pre-gate progress, sees that it's different than exit, - * so goes into loop until exit matches pre-gate, now it knows coreloop - * is not in protected and cannot get back in, so can proceed. - * - *Implementation for the stealer: - * - *First, acquire the stealer lock -- only cores with no work to do will - * compete to steal, so not a big performance penalty having only one -- - * will rarely have multiple stealers in a system with plenty of work -- and - * in a system with little work, it doesn't matter. - * - *Note, have single-reader, single-writer pattern for all variables used to - * communicate between stealer and victims - * - *So, scan the queues of the core loops, until find non-empty. Each core - * has its own list that it scans. The list goes in order from closest to - * furthest core, so it steals first from close cores. Later can add - * taking info from the app about overlapping footprints, and scan all the - * others then choose work with the most footprint overlap with the contents - * of this core's cache. - * - *Now, have a victim want to take work from. So, shut the gate in that - * coreloop, by setting the "gate closed" var on its stack to TRUE. - *Then, read the core's pre-gate progress and compare to the core's exit - * progress. - *If same, can proceed to take work from the coreloop's queue. When done, - * write FALSE to gate closed var. - *If different, then enter a loop that reads the pre-gate progress, then - * compares to exit progress then to wait progress. When one of two - * matches, proceed. Take work from the coreloop's queue. When done, - * write FALSE to the gate closed var. - * - */ -void inline -gateProtected_stealWorkInto( SchedSlot *currSlot, - VMSQueueStruc *myReadyToAnimateQ, - VirtProcr *masterPr ) - { - VirtProcr *stolenPr; - int32 coreIdx, i, haveAVictim, gotLock; - VMSQueueStruc *victimsQ; - - volatile GateStruc *vicGate; - int32 coreMightBeInProtected; - - - - //see if any other cores have work available to steal - haveAVictim = FALSE; - coreIdx = masterPr->coreAnimatedBy; - for( i = 0; i < NUM_CORES -1; i++ ) - { - if( coreIdx >= NUM_CORES -1 ) - { coreIdx = 0; - } - else - { coreIdx++; - } - victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; - if( numInVMSQ( victimsQ ) > 0 ) - { haveAVictim = TRUE; - vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ]; - break; - } - } - if( !haveAVictim ) return; //no work to steal, exit - - //have a victim core, now get the stealer-lock - gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock), - UNLOCKED, LOCKED ); - if( !gotLock ) return; //go back to core loop, which will re-start master - - - //====== Start Gate-protection ======= - vicGate->gateClosed = TRUE; - coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress; - while( coreMightBeInProtected ) - { //wait until sure - if( vicGate->preGateProgress == vicGate->waitProgress ) - coreMightBeInProtected = FALSE; - if( vicGate->preGateProgress == vicGate->exitProgress ) - coreMightBeInProtected = FALSE; - } - - stolenPr = readVMSQ ( victimsQ ); - - vicGate->gateClosed = FALSE; - //======= End Gate-protection ======= - - - if( stolenPr != NULL ) //victim could have been in protected and taken - { currSlot->procrAssignedToSlot = stolenPr; - stolenPr->schedSlot = currSlot; - currSlot->needsProcrAssigned = FALSE; - - writeVMSQ( stolenPr, myReadyToAnimateQ ); - } - - //unlock the work stealing lock - _VMSMasterEnv->workStealingLock = UNLOCKED; - } +/* + * Copyright 2010 OpenSourceStewardshipFoundation + * + * Licensed under BSD + */ + + + +#include +#include + +#include "VMS.h" +#include "ProcrContext.h" + + +//=========================================================================== +void inline +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, + VirtProcr *masterPr ); + +//=========================================================================== + + + +/*This code is animated by the virtual Master processor. + * + *Polls each sched slot exactly once, hands any requests made by a newly + * done slave to the "request handler" plug-in function + * + *Any slots that need a virt procr assigned are given to the "schedule" + * plug-in function, which tries to assign a virt procr (slave) to it. + * + *When all slots needing a processor have been given to the schedule plug-in, + * a fraction of the procrs successfully scheduled are put into the + * work queue, then a continuation of this function is put in, then the rest + * of the virt procrs that were successfully scheduled. + * + *The first thing the continuation does is busy-wait until the previous + * animation completes. This is because an (unlikely) continuation may + * sneak through queue before previous continuation is done putting second + * part of scheduled slaves in, which is the only race condition. + * + */ + +/*May 29, 2010 -- birth a Master during init so that first core loop to + * start running gets it and does all the stuff for a newly born -- + * from then on, will be doing continuation, but do suspension self + * directly at end of master loop + *So VMS__init just births the master virtual processor same way it births + * all the others -- then does any extra setup needed and puts it into the + * work queue. + *However means have to make masterEnv a global static volatile the same way + * did with readyToAnimateQ in core loop. -- for performance, put the + * jump to the core loop directly in here, and have it directly jump back. + * + * + *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this + * avoids the suspected bug in the system stack that causes bizarre faults + * at random places in the system code. + * + *So, this function is coupled to each of the MasterVPs, -- meaning this + * function can't rely on a particular stack and frame -- each MasterVP that + * animates this function has a different one. + * + *At this point, the masterLoop does not write itself into the queue anymore, + * instead, the coreLoop acquires the masterLock when it has nothing to + * animate, and then animates its own masterLoop. However, still try to put + * several AppVPs into the queue to amortize the startup cost of switching + * to the MasterVP. Note, don't have to worry about latency of requests much + * because most requests generate work for same core -- only latency issue + * is case when other cores starved and one core's requests generate work + * for them -- so keep max in queue to 3 or 4.. + */ +void masterLoop( void *initData, VirtProcr *animatingPr ) + { + int32 slotIdx, numSlotsFilled; + VirtProcr *schedVirtPr; + SchedSlot *currSlot, **schedSlots; + MasterEnv *masterEnv; + VMSQueueStruc *readyToAnimateQ; + + SlaveScheduler slaveScheduler; + RequestHandler requestHandler; + void *semanticEnv; + + int32 thisCoresIdx; + VirtProcr *masterPr; + volatile VirtProcr *volatileMasterPr; + + volatileMasterPr = animatingPr; + masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp + + //First animation of each MasterVP will in turn animate this part + // of setup code.. (VP creator sets up the stack as if this function + // was called normally, but actually get here by jmp) + //So, setup values about stack ptr, jmp pt and all that + //masterPr->nextInstrPt = &&masterLoopStartPt; + + + //Note, got rid of writing the stack and frame ptr up here, because + // only one + // core can ever animate a given MasterVP, so don't need to communicate + // new frame and stack ptr to the MasterVP storage before a second + // version of that MasterVP can get animated on a different core. + //Also got rid of the busy-wait. + + + //masterLoopStartPt: + while(1){ + + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MASTER + //Total Master time includes one coreloop time -- just assume the core + // loop time is same for Master as for AppVPs, even though it may be + // smaller due to higher predictability of the fixed jmp. + saveLowTimeStampCountInto( masterPr->startMasterTSCLow ); + #endif + //======================================================================== + + masterEnv = (MasterEnv*)_VMSMasterEnv; + + //GCC may optimize so doesn't always re-define from frame-storage + masterPr = (VirtProcr*)volatileMasterPr; //just to make sure after jmp + thisCoresIdx = masterPr->coreAnimatedBy; + readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; + schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; + + requestHandler = masterEnv->requestHandler; + slaveScheduler = masterEnv->slaveScheduler; + semanticEnv = masterEnv->semanticEnv; + + + //Poll each slot's Done flag + numSlotsFilled = 0; + for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++) + { + currSlot = schedSlots[ slotIdx ]; + + if( currSlot->workIsDone ) + { + currSlot->workIsDone = FALSE; + currSlot->needsProcrAssigned = TRUE; + + //process requests from slave to master + //====================== MEASUREMENT STUFF =================== + #ifdef MEAS__TIME_PLUGIN + int32 startStamp1, endStamp1; + saveLowTimeStampCountInto( startStamp1 ); + #endif + //============================================================ + (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv ); + //====================== MEASUREMENT STUFF =================== + #ifdef MEAS__TIME_PLUGIN + saveLowTimeStampCountInto( endStamp1 ); + addIntervalToHist( startStamp1, endStamp1, + _VMSMasterEnv->reqHdlrLowTimeHist ); + addIntervalToHist( startStamp1, endStamp1, + _VMSMasterEnv->reqHdlrHighTimeHist ); + #endif + //============================================================ + } + if( currSlot->needsProcrAssigned ) + { //give slot a new virt procr + schedVirtPr = + (*slaveScheduler)( semanticEnv, thisCoresIdx ); + + if( schedVirtPr != NULL ) + { currSlot->procrAssignedToSlot = schedVirtPr; + schedVirtPr->schedSlot = currSlot; + currSlot->needsProcrAssigned = FALSE; + numSlotsFilled += 1; + + writeVMSQ( schedVirtPr, readyToAnimateQ ); + } + } + } + + + #ifdef USE_WORK_STEALING + //If no slots filled, means no more work, look for work to steal. + if( numSlotsFilled == 0 ) + { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr ); + } + #endif + + + #ifdef MEAS__TIME_MASTER + saveLowTimeStampCountInto( masterPr->endMasterTSCLow ); + #endif + + masterSwitchToCoreLoop(animatingPr); + flushRegisters(); + }//MasterLoop + + + } + + + +/*This has a race condition -- the coreloops are accessing their own queues + * at the same time that this work-stealer on a different core is trying to + */ +void inline +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, + VirtProcr *masterPr ) + { + VirtProcr *stolenPr; + int32 coreIdx, i; + VMSQueueStruc *currQ; + + stolenPr = NULL; + coreIdx = masterPr->coreAnimatedBy; + for( i = 0; i < NUM_CORES -1; i++ ) + { + if( coreIdx >= NUM_CORES -1 ) + { coreIdx = 0; + } + else + { coreIdx++; + } + currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; + if( numInVMSQ( currQ ) > 0 ) + { stolenPr = readVMSQ (currQ ); + break; + } + } + + if( stolenPr != NULL ) + { currSlot->procrAssignedToSlot = stolenPr; + stolenPr->schedSlot = currSlot; + currSlot->needsProcrAssigned = FALSE; + + writeVMSQ( stolenPr, readyToAnimateQ ); + } + } + +/*This algorithm makes the common case fast. Make the coreloop passive, + * and show its progress. Make the stealer control a gate that coreloop + * has to pass. + *To avoid interference, only one stealer at a time. Use a global + * stealer-lock. + * + *The pattern is based on a gate -- stealer shuts the gate, then monitors + * to be sure any already past make it all the way out, before starting. + *So, have a "progress" measure just before the gate, then have two after it, + * one is in a "waiting room" outside the gate, the other is at the exit. + *Then, the stealer first shuts the gate, then checks the progress measure + * outside it, then looks to see if the progress measure at the exit is the + * same. If yes, it knows the protected area is empty 'cause no other way + * to get in and the last to get in also exited. + *If the progress measure at the exit is not the same, then the stealer goes + * into a loop checking both the waiting-area and the exit progress-measures + * until one of them shows the same as the measure outside the gate. Might + * as well re-read the measure outside the gate each go around, just to be + * sure. It is guaranteed that one of the two will eventually match the one + * outside the gate. + * + *Here's an informal proof of correctness: + *The gate can be closed at any point, and have only four cases: + * 1) coreloop made it past the gate-closing but not yet past the exit + * 2) coreloop made it past the pre-gate progress update but not yet past + * the gate, + * 3) coreloop is right before the pre-gate update + * 4) coreloop is past the exit and far from the pre-gate update. + * + * Covering the cases in reverse order, + * 4) is not a problem -- stealer will read pre-gate progress, see that it + * matches exit progress, and the gate is closed, so stealer can proceed. + * 3) stealer will read pre-gate progress just after coreloop updates it.. + * so stealer goes into a loop until the coreloop causes wait-progress + * to match pre-gate progress, so then stealer can proceed + * 2) same as 3.. + * 1) stealer reads pre-gate progress, sees that it's different than exit, + * so goes into loop until exit matches pre-gate, now it knows coreloop + * is not in protected and cannot get back in, so can proceed. + * + *Implementation for the stealer: + * + *First, acquire the stealer lock -- only cores with no work to do will + * compete to steal, so not a big performance penalty having only one -- + * will rarely have multiple stealers in a system with plenty of work -- and + * in a system with little work, it doesn't matter. + * + *Note, have single-reader, single-writer pattern for all variables used to + * communicate between stealer and victims + * + *So, scan the queues of the core loops, until find non-empty. Each core + * has its own list that it scans. The list goes in order from closest to + * furthest core, so it steals first from close cores. Later can add + * taking info from the app about overlapping footprints, and scan all the + * others then choose work with the most footprint overlap with the contents + * of this core's cache. + * + *Now, have a victim want to take work from. So, shut the gate in that + * coreloop, by setting the "gate closed" var on its stack to TRUE. + *Then, read the core's pre-gate progress and compare to the core's exit + * progress. + *If same, can proceed to take work from the coreloop's queue. When done, + * write FALSE to gate closed var. + *If different, then enter a loop that reads the pre-gate progress, then + * compares to exit progress then to wait progress. When one of two + * matches, proceed. Take work from the coreloop's queue. When done, + * write FALSE to the gate closed var. + * + */ +void inline +gateProtected_stealWorkInto( SchedSlot *currSlot, + VMSQueueStruc *myReadyToAnimateQ, + VirtProcr *masterPr ) + { + VirtProcr *stolenPr; + int32 coreIdx, i, haveAVictim, gotLock; + VMSQueueStruc *victimsQ; + + volatile GateStruc *vicGate; + int32 coreMightBeInProtected; + + + + //see if any other cores have work available to steal + haveAVictim = FALSE; + coreIdx = masterPr->coreAnimatedBy; + for( i = 0; i < NUM_CORES -1; i++ ) + { + if( coreIdx >= NUM_CORES -1 ) + { coreIdx = 0; + } + else + { coreIdx++; + } + victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; + if( numInVMSQ( victimsQ ) > 0 ) + { haveAVictim = TRUE; + vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ]; + break; + } + } + if( !haveAVictim ) return; //no work to steal, exit + + //have a victim core, now get the stealer-lock + gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock), + UNLOCKED, LOCKED ); + if( !gotLock ) return; //go back to core loop, which will re-start master + + + //====== Start Gate-protection ======= + vicGate->gateClosed = TRUE; + coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress; + while( coreMightBeInProtected ) + { //wait until sure + if( vicGate->preGateProgress == vicGate->waitProgress ) + coreMightBeInProtected = FALSE; + if( vicGate->preGateProgress == vicGate->exitProgress ) + coreMightBeInProtected = FALSE; + } + + stolenPr = readVMSQ ( victimsQ ); + + vicGate->gateClosed = FALSE; + //======= End Gate-protection ======= + + + if( stolenPr != NULL ) //victim could have been in protected and taken + { currSlot->procrAssignedToSlot = stolenPr; + stolenPr->schedSlot = currSlot; + currSlot->needsProcrAssigned = FALSE; + + writeVMSQ( stolenPr, myReadyToAnimateQ ); + } + + //unlock the work stealing lock + _VMSMasterEnv->workStealingLock = UNLOCKED; + } diff -r ad8213a8e916 -r c1784868dcea ProcrContext.h --- a/ProcrContext.h Thu Oct 06 16:24:17 2011 +0200 +++ b/ProcrContext.h Wed Jan 04 16:10:11 2012 -0800 @@ -1,33 +1,33 @@ -/* - * Copyright 2009 OpenSourceStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - */ - -#ifndef _ProcrContext_H -#define _ProcrContext_H -#define _GNU_SOURCE - -void saveCoreLoopReturnAddr(void **returnAddress); - -void switchToVP(VirtProcr *nextProcr); - -void switchToCoreLoop(VirtProcr *nextProcr); - -void masterSwitchToCoreLoop(VirtProcr *nextProcr); - -void startVirtProcrFn(); - -void *asmTerminateCoreLoop(VirtProcr *currPr); - -#define flushRegisters() \ - asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") - -inline VirtProcr * -create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, - void *initialData, void *stackLocs ); - -#endif /* _ProcrContext_H */ - +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + */ + +#ifndef _ProcrContext_H +#define _ProcrContext_H +#define _GNU_SOURCE + +void saveCoreLoopReturnAddr(void **returnAddress); + +void switchToVP(VirtProcr *nextProcr); + +void switchToCoreLoop(VirtProcr *nextProcr); + +void masterSwitchToCoreLoop(VirtProcr *nextProcr); + +void startVirtProcrFn(); + +void *asmTerminateCoreLoop(VirtProcr *currPr); + +#define flushRegisters() \ + asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") + +inline VirtProcr * +create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, + void *initialData, void *stackLocs ); + +#endif /* _ProcrContext_H */ + diff -r ad8213a8e916 -r c1784868dcea VMS.h --- a/VMS.h Thu Oct 06 16:24:17 2011 +0200 +++ b/VMS.h Wed Jan 04 16:10:11 2012 -0800 @@ -1,579 +1,579 @@ -/* - * Copyright 2009 OpenSourceStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - */ - -#ifndef _VMS_H -#define _VMS_H -#define _GNU_SOURCE - -#include "VMS_primitive_data_types.h" -#include "Queue_impl/PrivateQueue.h" -#include "Histogram/Histogram.h" -#include "DynArray/DynArray.h" -#include "Hash_impl/PrivateHash.h" -#include "vmalloc.h" - -#include -#include - - -//=============================== Debug =================================== -// -//When SEQUENTIAL is defined, VMS does sequential exe in the main thread -// It still does co-routines and all the mechanisms are the same, it just -// has only a single thread and animates VPs one at a time -//#define SEQUENTIAL - -//#define USE_WORK_STEALING - -//turns on the probe-instrumentation in the application -- when not -// defined, the calls to the probe functions turn into comments -#define STATS__ENABLE_PROBES -//#define TURN_ON_DEBUG_PROBES - -//These defines turn types of bug messages on and off -// be sure debug messages are un-commented (next block of defines) -#define dbgAppFlow TRUE /* Top level flow of application code -- general*/ -#define dbgProbes FALSE /* for issues inside probes themselves*/ -#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ -#define dbgRqstHdlr FALSE /* in request handler code*/ - -//Comment or un- the substitute half to turn on/off types of debug message -#define DEBUG( bool, msg) \ -// if( bool){ printf(msg); fflush(stdin);} -#define DEBUG1( bool, msg, param) \ -// if(bool){printf(msg, param); fflush(stdin);} -#define DEBUG2( bool, msg, p1, p2) \ -// if(bool) {printf(msg, p1, p2); fflush(stdin);} - -#define ERROR(msg) printf(msg); -#define ERROR1(msg, param) printf(msg, param); -#define ERROR2(msg, p1, p2) printf(msg, p1, p2); - -//=========================== STATS ======================= - - //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and - // compiled-in that saves the low part of the time stamp count just before - // suspending a processor and just after resuming that processorsrc/VPThread_lib/VMS/VMS.h:322: warning: previous declaration of ‘VMS__create_procr’ was here. It is - // saved into a field added to VirtProcr. Have to sanity-check for - // rollover of low portion into high portion. -//#define MEAS__TIME_STAMP_SUSP -//#define MEAS__TIME_MASTER -#define MEAS__TIME_PLUGIN -#define MEAS__TIME_MALLOC -//#define MEAS__TIME_MASTER_LOCK -#define MEAS__NUM_TIMES_TO_RUN 100000 - - //For code that calculates normalization-offset between TSC counts of - // different cores. -#define NUM_TSC_ROUND_TRIPS 10 - - -//========================= Hardware related Constants ===================== - //This value is the number of hardware threads in the shared memory - // machine -//#define NUM_CORES 8 - - // tradeoff amortizing master fixed overhead vs imbalance potential - // when work-stealing, can make bigger, at risk of losing cache affinity -#define NUM_SCHED_SLOTS 5 - -#define MIN_WORK_UNIT_CYCLES 20000 - -#define MASTERLOCK_RETRIES 10000 - - // stack size in virtual processors created -#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */ - - // memory for VMS__malloc -#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */ - -#define CACHE_LINE 64 -#define PAGE_SIZE 4096 - - -//============================== - -#define SUCCESS 0 - -#define writeVMSQ writePrivQ -#define readVMSQ readPrivQ -#define makeVMSQ makeVMSPrivQ -#define numInVMSQ numInPrivQ -#define VMSQueueStruc PrivQueueStruc - - - -//=========================================================================== -typedef unsigned long long TSCount; - -typedef struct _SchedSlot SchedSlot; -typedef struct _VMSReqst VMSReqst; -typedef struct _VirtProcr VirtProcr; -typedef struct _IntervalProbe IntervalProbe; -typedef struct _GateStruc GateStruc; - - -typedef VirtProcr * (*SlaveScheduler) ( void *, int ); //semEnv, coreIdx -typedef void (*RequestHandler) ( VirtProcr *, void * ); //prWReqst, semEnv -typedef void (*VirtProcrFnPtr) ( void *, VirtProcr * ); //initData, animPr -typedef void VirtProcrFn ( void *, VirtProcr * ); //initData, animPr -typedef void (*ResumePrFnPtr) ( VirtProcr *, void * ); - - -//============= Requests =========== -// - -enum VMSReqstType //avoid starting enums at 0, for debug reasons - { - semantic = 1, - createReq, - dissipate, - VMSSemantic //goes with VMSSemReqst below - }; - -struct _VMSReqst - { - enum VMSReqstType reqType;//used for dissipate and in future for IO requests - void *semReqData; - - VMSReqst *nextReqst; - }; -//VMSReqst - -enum VMSSemReqstType //These are equivalent to semantic requests, but for - { // VMS's services available directly to app, like OS - createProbe = 1, // and probe services -- like a VMS-wide built-in lang - openFile, - otherIO - }; - -typedef struct - { enum VMSSemReqstType reqType; - VirtProcr *requestingPr; - char *nameStr; //for create probe - } - VMSSemReq; - - -//==================== Core data structures =================== - -struct _SchedSlot - { - int workIsDone; - int needsProcrAssigned; - VirtProcr *procrAssignedToSlot; - }; -//SchedSlot - -/*WARNING: re-arranging this data structure could cause VP switching - * assembly code to fail -- hard-codes offsets of fields - */ -struct _VirtProcr - { int procrID; //for debugging -- count up each time create - int coreAnimatedBy; - void *startOfStack; - void *stackPtr; - void *framePtr; - void *nextInstrPt; - - void *coreLoopStartPt; //allows proto-runtime to be linked later - void *coreLoopFramePtr; //restore before jmp back to core loop - void *coreLoopStackPtr; //restore before jmp back to core loop - - void *initialData; - - SchedSlot *schedSlot; - VMSReqst *requests; - - void *semanticData; //this livesUSE_GNU here for the life of VP - void *dataRetFromReq;//values returned from plugin to VP go here - - //=========== MEASUREMENT STUFF ========== - #ifdef MEAS__TIME_STAMP_SUSP - unsigned int preSuspTSCLow; - unsigned int postSuspTSCLow; - #endif - #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/ - unsigned int startMasterTSCLow;USE_GNU - unsigned int endMasterTSCLow; - #endif - //======================================== - - float64 createPtInSecs; //have space but don't use on some configs - }; -//VirtProcr - - -/*WARNING: re-arranging this data structure could cause VP-switching - * assembly code to fail -- hard-codes offsets of fields - * (because -O3 messes with things otherwise) - */ -typedef struct - { - SlaveScheduler slaveScheduler; - RequestHandler requestHandler; - - SchedSlot ***allSchedSlots; - VMSQueueStruc **readyToAnimateQs; - VirtProcr **masterVPs; - - void *semanticEnv; - void *OSEventStruc; //for future, when add I/O to BLIS - MallocProlog *freeListHead; - int32 amtOfOutstandingMem; //total currently allocated - - void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop - - int32 setupComplete; - volatile int32 masterLock; - - int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP - GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal - int32 workStealingLock; - - int32 numProcrsCreated; //gives ordering to processor creation - - //=========== MEASUREMENT STUFF ============= - IntervalProbe **intervalProbes; - PrivDynArrayInfo *dynIntervalProbesInfo; - HashTable *probeNameHashTbl; - int32 masterCreateProbeID; - float64 createPtInSecs; - Histogram **measHists; - PrivDynArrayInfo *measHistsInfo; - #ifdef MEAS__TIME_PLUGIN - Histogram *reqHdlrLowTimeHist; - Histogram *reqHdlrHighTimeHist; - #endif - #ifdef MEAS__TIME_MALLOC - Histogram *mallocTimeHist; - Histogram *freeTimeHist; - #endif - #ifdef MEAS__TIME_MASTER_LOCK - Histogram *masterLockLowTimeHist; - Histogram *masterLockHighTimeHist; - #endif - } -MasterEnv; - -//========================= Extra Stuff Data Strucs ======================= -typedef struct - { - - } -VMSExcp; - -struct _GateStruc - { - int32 gateClosed; - int32 preGateProgress; - int32 waitProgress; - int32 exitProgress; - }; -//GateStruc - -//======================= OS Thread related =============================== - -void * coreLoop( void *paramsIn ); //standard PThreads fn prototype -void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype -void masterLoop( void *initData, VirtProcr *masterPr ); - - -typedef struct - { - void *endThdPt; - unsigned int coreNum; - } -ThdParams; - -pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state -ThdParams *coreLoopThdParams [ NUM_CORES ]; -pthread_mutex_t suspendLock; -pthread_cond_t suspend_cond; - - - -//===================== Global Vars =================== - -volatile MasterEnv *_VMSMasterEnv; - - - - -//=========================== Function Prototypes ========================= - - -//========== Setup and shutdown ========== -void -VMS__init(); - -void -VMS__init_Seq(); - -void -VMS__start_the_work_then_wait_until_done(); - -void -VMS__start_the_work_then_wait_until_done_Seq(); - -inline VirtProcr * -VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); - -void -VMS__dissipate_procr( VirtProcr *procrToDissipate ); - - //Use this to create processor inside entry point & other places outside - // the VMS system boundary (IE, not run in slave nor Master) -VirtProcr * -VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); - -void -VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ); - -void -VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ); - -void -VMS__shutdown(); - -void -VMS__cleanup_at_end_of_shutdown(); - -void * -VMS__give_sem_env_for( VirtProcr *animPr ); - - -//============== Request Related =============== - -void -VMS__suspend_procr( VirtProcr *callingPr ); - -inline void -VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr ); - -inline void -VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ); - -void -VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ); - -void inline -VMS__send_dissipate_req( VirtProcr *prToDissipate ); - -inline void -VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ); - -VMSReqst * -VMS__take_next_request_out_of( VirtProcr *procrWithReq ); - -inline void * -VMS__take_sem_reqst_from( VMSReqst *req ); - -void inline -VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, - ResumePrFnPtr resumePrFnPtr ); - -//======================== STATS ====================== - -//===== RDTSC wrapper ===== //Also runs with x86_64 code - -#define saveTimeStampCountInto(low, high) \ - asm volatile("RDTSC; \ - movl %%eax, %0; \ - movl %%edx, %1;" \ - /* outputs */ : "=m" (low), "=m" (high)\ - /* inputs */ : \ - /* clobber */ : "%eax", "%edx" \ - ); - -#define saveLowTimeStampCountInto(low) \ - asm volatile("RDTSC; \ - movl %%eax, %0;" \ - /* outputs */ : "=m" (low) \ - /* inputs */ : \ - /* clobber */ : "%eax", "%edx" \ - ); - -//==================== -#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ - makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ - _VMSMasterEnv->measHists[idx] = \ - makeFixedBinHist( numBins, startVal, binWidth, name ); - - -#define MEAS__SUB_CREATE /*turn on/off subtraction of create from plugin*/ - -#ifdef VPTHREAD - -//VPThread -#define createHistIdx 0 -#define mutexLockHistIdx 1 -#define mutexUnlockHistIdx 2 -#define condWaitHistIdx 3 -#define condSignalHistIdx 4 - -#define MakeTheMeasHists() \ - _VMSMasterEnv->measHistsInfo = \ - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ - makeAMeasHist( createHistIdx, "create", 250, 0, 100 ) \ - makeAMeasHist( mutexLockHistIdx, "mutex_lock", 50, 0, 100 ) \ - makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock", 50, 0, 100 ) \ - makeAMeasHist( condWaitHistIdx, "cond_wait", 50, 0, 100 ) \ - makeAMeasHist( condSignalHistIdx, "cond_signal", 50, 0, 100 ) - -#endif - - -#ifdef VCILK - -//VCilk -#define spawnHistIdx 0 -#define syncHistIdx 1 - -#define MakeTheMeasHists() \ - _VMSMasterEnv->measHistsInfo = \ - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ - makeAMeasHist( spawnHistIdx, "Spawn", 50, 0, 200 ) \ - makeAMeasHist( syncHistIdx, "Sync", 50, 0, 200 ) - - -#endif - -#ifdef SSR - -//SSR -#define SendFromToHistIdx 0 -#define SendOfTypeHistIdx 1 -#define ReceiveFromToHistIdx 2 -#define ReceiveOfTypeHistIdx 3 - -#define MakeTheMeasHists() \ - _VMSMasterEnv->measHistsInfo = \ - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ - makeAMeasHist( SendFromToHistIdx, "SendFromTo", 50, 0, 100 ) \ - makeAMeasHist( SendOfTypeHistIdx, "SendOfType", 50, 0, 100 ) \ - makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \ - makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 ) - -#endif - -//=========================================================================== -//VPThread - - -#define Meas_startCreate \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endCreate \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ createHistIdx ] ); - -#define Meas_startMutexLock \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endMutexLock \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ mutexLockHistIdx ] ); - -#define Meas_startMutexUnlock \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endMutexUnlock \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] ); - -#define Meas_startCondWait \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endCondWait \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ condWaitHistIdx ] ); - -#define Meas_startCondSignal \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endCondSignal \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ condSignalHistIdx ] ); - -//=========================================================================== -// VCilk -#define Meas_startSpawn \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endSpawn \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ spawnHistIdx ] ); - -#define Meas_startSync \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endSync \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ syncHistIdx ] ); - -//=========================================================================== -// SSR -#define Meas_startSendFromTo \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endSendFromTo \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ SendFromToHistIdx ] ); - -#define Meas_startSendOfType \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endSendOfType \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] ); - -#define Meas_startReceiveFromTo \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endReceiveFromTo \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] ); - -#define Meas_startReceiveOfType \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endReceiveOfType \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] ); - -//===== - -#include "ProcrContext.h" -#include "probes.h" -#include "vutilities.h" - -#endif /* _VMS_H */ - +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + */ + +#ifndef _VMS_H +#define _VMS_H +#define _GNU_SOURCE + +#include "VMS_primitive_data_types.h" +#include "Queue_impl/PrivateQueue.h" +#include "Histogram/Histogram.h" +#include "DynArray/DynArray.h" +#include "Hash_impl/PrivateHash.h" +#include "vmalloc.h" + +#include +#include + + +//=============================== Debug =================================== +// +//When SEQUENTIAL is defined, VMS does sequential exe in the main thread +// It still does co-routines and all the mechanisms are the same, it just +// has only a single thread and animates VPs one at a time +//#define SEQUENTIAL + +//#define USE_WORK_STEALING + +//turns on the probe-instrumentation in the application -- when not +// defined, the calls to the probe functions turn into comments +#define STATS__ENABLE_PROBES +//#define TURN_ON_DEBUG_PROBES + +//These defines turn types of bug messages on and off +// be sure debug messages are un-commented (next block of defines) +#define dbgAppFlow TRUE /* Top level flow of application code -- general*/ +#define dbgProbes FALSE /* for issues inside probes themselves*/ +#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ +#define dbgRqstHdlr FALSE /* in request handler code*/ + +//Comment or un- the substitute half to turn on/off types of debug message +#define DEBUG( bool, msg) \ +// if( bool){ printf(msg); fflush(stdin);} +#define DEBUG1( bool, msg, param) \ +// if(bool){printf(msg, param); fflush(stdin);} +#define DEBUG2( bool, msg, p1, p2) \ +// if(bool) {printf(msg, p1, p2); fflush(stdin);} + +#define ERROR(msg) printf(msg); +#define ERROR1(msg, param) printf(msg, param); +#define ERROR2(msg, p1, p2) printf(msg, p1, p2); + +//=========================== STATS ======================= + + //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and + // compiled-in that saves the low part of the time stamp count just before + // suspending a processor and just after resuming that processorsrc/VPThread_lib/VMS/VMS.h:322: warning: previous declaration of ‘VMS__create_procr’ was here. It is + // saved into a field added to VirtProcr. Have to sanity-check for + // rollover of low portion into high portion. +//#define MEAS__TIME_STAMP_SUSP +//#define MEAS__TIME_MASTER +#define MEAS__TIME_PLUGIN +#define MEAS__TIME_MALLOC +//#define MEAS__TIME_MASTER_LOCK +#define MEAS__NUM_TIMES_TO_RUN 100000 + + //For code that calculates normalization-offset between TSC counts of + // different cores. +#define NUM_TSC_ROUND_TRIPS 10 + + +//========================= Hardware related Constants ===================== + //This value is the number of hardware threads in the shared memory + // machine +//#define NUM_CORES 8 + + // tradeoff amortizing master fixed overhead vs imbalance potential + // when work-stealing, can make bigger, at risk of losing cache affinity +#define NUM_SCHED_SLOTS 5 + +#define MIN_WORK_UNIT_CYCLES 20000 + +#define MASTERLOCK_RETRIES 10000 + + // stack size in virtual processors created +#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */ + + // memory for VMS__malloc +#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */ + +#define CACHE_LINE 64 +#define PAGE_SIZE 4096 + + +//============================== + +#define SUCCESS 0 + +#define writeVMSQ writePrivQ +#define readVMSQ readPrivQ +#define makeVMSQ makeVMSPrivQ +#define numInVMSQ numInPrivQ +#define VMSQueueStruc PrivQueueStruc + + + +//=========================================================================== +typedef unsigned long long TSCount; + +typedef struct _SchedSlot SchedSlot; +typedef struct _VMSReqst VMSReqst; +typedef struct _VirtProcr VirtProcr; +typedef struct _IntervalProbe IntervalProbe; +typedef struct _GateStruc GateStruc; + + +typedef VirtProcr * (*SlaveScheduler) ( void *, int ); //semEnv, coreIdx +typedef void (*RequestHandler) ( VirtProcr *, void * ); //prWReqst, semEnv +typedef void (*VirtProcrFnPtr) ( void *, VirtProcr * ); //initData, animPr +typedef void VirtProcrFn ( void *, VirtProcr * ); //initData, animPr +typedef void (*ResumePrFnPtr) ( VirtProcr *, void * ); + + +//============= Requests =========== +// + +enum VMSReqstType //avoid starting enums at 0, for debug reasons + { + semantic = 1, + createReq, + dissipate, + VMSSemantic //goes with VMSSemReqst below + }; + +struct _VMSReqst + { + enum VMSReqstType reqType;//used for dissipate and in future for IO requests + void *semReqData; + + VMSReqst *nextReqst; + }; +//VMSReqst + +enum VMSSemReqstType //These are equivalent to semantic requests, but for + { // VMS's services available directly to app, like OS + createProbe = 1, // and probe services -- like a VMS-wide built-in lang + openFile, + otherIO + }; + +typedef struct + { enum VMSSemReqstType reqType; + VirtProcr *requestingPr; + char *nameStr; //for create probe + } + VMSSemReq; + + +//==================== Core data structures =================== + +struct _SchedSlot + { + int workIsDone; + int needsProcrAssigned; + VirtProcr *procrAssignedToSlot; + }; +//SchedSlot + +/*WARNING: re-arranging this data structure could cause VP switching + * assembly code to fail -- hard-codes offsets of fields + */ +struct _VirtProcr + { int procrID; //for debugging -- count up each time create + int coreAnimatedBy; + void *startOfStack; + void *stackPtr; + void *framePtr; + void *nextInstrPt; + + void *coreLoopStartPt; //allows proto-runtime to be linked later + void *coreLoopFramePtr; //restore before jmp back to core loop + void *coreLoopStackPtr; //restore before jmp back to core loop + + void *initialData; + + SchedSlot *schedSlot; + VMSReqst *requests; + + void *semanticData; //this livesUSE_GNU here for the life of VP + void *dataRetFromReq;//values returned from plugin to VP go here + + //=========== MEASUREMENT STUFF ========== + #ifdef MEAS__TIME_STAMP_SUSP + unsigned int preSuspTSCLow; + unsigned int postSuspTSCLow; + #endif + #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/ + unsigned int startMasterTSCLow;USE_GNU + unsigned int endMasterTSCLow; + #endif + //======================================== + + float64 createPtInSecs; //have space but don't use on some configs + }; +//VirtProcr + + +/*WARNING: re-arranging this data structure could cause VP-switching + * assembly code to fail -- hard-codes offsets of fields + * (because -O3 messes with things otherwise) + */ +typedef struct + { + SlaveScheduler slaveScheduler; + RequestHandler requestHandler; + + SchedSlot ***allSchedSlots; + VMSQueueStruc **readyToAnimateQs; + VirtProcr **masterVPs; + + void *semanticEnv; + void *OSEventStruc; //for future, when add I/O to BLIS + MallocProlog *freeListHead; + int32 amtOfOutstandingMem; //total currently allocated + + void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop + + int32 setupComplete; + volatile int32 masterLock; + + int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP + GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal + int32 workStealingLock; + + int32 numProcrsCreated; //gives ordering to processor creation + + //=========== MEASUREMENT STUFF ============= + IntervalProbe **intervalProbes; + PrivDynArrayInfo *dynIntervalProbesInfo; + HashTable *probeNameHashTbl; + int32 masterCreateProbeID; + float64 createPtInSecs; + Histogram **measHists; + PrivDynArrayInfo *measHistsInfo; + #ifdef MEAS__TIME_PLUGIN + Histogram *reqHdlrLowTimeHist; + Histogram *reqHdlrHighTimeHist; + #endif + #ifdef MEAS__TIME_MALLOC + Histogram *mallocTimeHist; + Histogram *freeTimeHist; + #endif + #ifdef MEAS__TIME_MASTER_LOCK + Histogram *masterLockLowTimeHist; + Histogram *masterLockHighTimeHist; + #endif + } +MasterEnv; + +//========================= Extra Stuff Data Strucs ======================= +typedef struct + { + + } +VMSExcp; + +struct _GateStruc + { + int32 gateClosed; + int32 preGateProgress; + int32 waitProgress; + int32 exitProgress; + }; +//GateStruc + +//======================= OS Thread related =============================== + +void * coreLoop( void *paramsIn ); //standard PThreads fn prototype +void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype +void masterLoop( void *initData, VirtProcr *masterPr ); + + +typedef struct + { + void *endThdPt; + unsigned int coreNum; + } +ThdParams; + +pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state +ThdParams *coreLoopThdParams [ NUM_CORES ]; +pthread_mutex_t suspendLock; +pthread_cond_t suspend_cond; + + + +//===================== Global Vars =================== + +volatile MasterEnv *_VMSMasterEnv; + + + + +//=========================== Function Prototypes ========================= + + +//========== Setup and shutdown ========== +void +VMS__init(); + +void +VMS__init_Seq(); + +void +VMS__start_the_work_then_wait_until_done(); + +void +VMS__start_the_work_then_wait_until_done_Seq(); + +inline VirtProcr * +VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); + +void +VMS__dissipate_procr( VirtProcr *procrToDissipate ); + + //Use this to create processor inside entry point & other places outside + // the VMS system boundary (IE, not run in slave nor Master) +VirtProcr * +VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); + +void +VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ); + +void +VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ); + +void +VMS__shutdown(); + +void +VMS__cleanup_at_end_of_shutdown(); + +void * +VMS__give_sem_env_for( VirtProcr *animPr ); + + +//============== Request Related =============== + +void +VMS__suspend_procr( VirtProcr *callingPr ); + +inline void +VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr ); + +inline void +VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ); + +void +VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ); + +void inline +VMS__send_dissipate_req( VirtProcr *prToDissipate ); + +inline void +VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ); + +VMSReqst * +VMS__take_next_request_out_of( VirtProcr *procrWithReq ); + +inline void * +VMS__take_sem_reqst_from( VMSReqst *req ); + +void inline +VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, + ResumePrFnPtr resumePrFnPtr ); + +//======================== STATS ====================== + +//===== RDTSC wrapper ===== //Also runs with x86_64 code + +#define saveTimeStampCountInto(low, high) \ + asm volatile("RDTSC; \ + movl %%eax, %0; \ + movl %%edx, %1;" \ + /* outputs */ : "=m" (low), "=m" (high)\ + /* inputs */ : \ + /* clobber */ : "%eax", "%edx" \ + ); + +#define saveLowTimeStampCountInto(low) \ + asm volatile("RDTSC; \ + movl %%eax, %0;" \ + /* outputs */ : "=m" (low) \ + /* inputs */ : \ + /* clobber */ : "%eax", "%edx" \ + ); + +//==================== +#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ + makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ + _VMSMasterEnv->measHists[idx] = \ + makeFixedBinHist( numBins, startVal, binWidth, name ); + + +#define MEAS__SUB_CREATE /*turn on/off subtraction of create from plugin*/ + +#ifdef VPTHREAD + +//VPThread +#define createHistIdx 0 +#define mutexLockHistIdx 1 +#define mutexUnlockHistIdx 2 +#define condWaitHistIdx 3 +#define condSignalHistIdx 4 + +#define MakeTheMeasHists() \ + _VMSMasterEnv->measHistsInfo = \ + makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ + makeAMeasHist( createHistIdx, "create", 250, 0, 100 ) \ + makeAMeasHist( mutexLockHistIdx, "mutex_lock", 50, 0, 100 ) \ + makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock", 50, 0, 100 ) \ + makeAMeasHist( condWaitHistIdx, "cond_wait", 50, 0, 100 ) \ + makeAMeasHist( condSignalHistIdx, "cond_signal", 50, 0, 100 ) + +#endif + + +#ifdef VCILK + +//VCilk +#define spawnHistIdx 0 +#define syncHistIdx 1 + +#define MakeTheMeasHists() \ + _VMSMasterEnv->measHistsInfo = \ + makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ + makeAMeasHist( spawnHistIdx, "Spawn", 50, 0, 200 ) \ + makeAMeasHist( syncHistIdx, "Sync", 50, 0, 200 ) + + +#endif + +#ifdef SSR + +//SSR +#define SendFromToHistIdx 0 +#define SendOfTypeHistIdx 1 +#define ReceiveFromToHistIdx 2 +#define ReceiveOfTypeHistIdx 3 + +#define MakeTheMeasHists() \ + _VMSMasterEnv->measHistsInfo = \ + makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ + makeAMeasHist( SendFromToHistIdx, "SendFromTo", 50, 0, 100 ) \ + makeAMeasHist( SendOfTypeHistIdx, "SendOfType", 50, 0, 100 ) \ + makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \ + makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 ) + +#endif + +//=========================================================================== +//VPThread + + +#define Meas_startCreate \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); \ + +#define Meas_endCreate \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp, \ + _VMSMasterEnv->measHists[ createHistIdx ] ); + +#define Meas_startMutexLock \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); \ + +#define Meas_endMutexLock \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp, \ + _VMSMasterEnv->measHists[ mutexLockHistIdx ] ); + +#define Meas_startMutexUnlock \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); \ + +#define Meas_endMutexUnlock \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp, \ + _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] ); + +#define Meas_startCondWait \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); \ + +#define Meas_endCondWait \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp, \ + _VMSMasterEnv->measHists[ condWaitHistIdx ] ); + +#define Meas_startCondSignal \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); \ + +#define Meas_endCondSignal \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp, \ + _VMSMasterEnv->measHists[ condSignalHistIdx ] ); + +//=========================================================================== +// VCilk +#define Meas_startSpawn \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); \ + +#define Meas_endSpawn \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp, \ + _VMSMasterEnv->measHists[ spawnHistIdx ] ); + +#define Meas_startSync \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); \ + +#define Meas_endSync \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp, \ + _VMSMasterEnv->measHists[ syncHistIdx ] ); + +//=========================================================================== +// SSR +#define Meas_startSendFromTo \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); \ + +#define Meas_endSendFromTo \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp, \ + _VMSMasterEnv->measHists[ SendFromToHistIdx ] ); + +#define Meas_startSendOfType \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); \ + +#define Meas_endSendOfType \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp, \ + _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] ); + +#define Meas_startReceiveFromTo \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); \ + +#define Meas_endReceiveFromTo \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp, \ + _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] ); + +#define Meas_startReceiveOfType \ + int32 startStamp, endStamp; \ + saveLowTimeStampCountInto( startStamp ); \ + +#define Meas_endReceiveOfType \ + saveLowTimeStampCountInto( endStamp ); \ + addIntervalToHist( startStamp, endStamp, \ + _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] ); + +//===== + +#include "ProcrContext.h" +#include "probes.h" +#include "vutilities.h" + +#endif /* _VMS_H */ + diff -r ad8213a8e916 -r c1784868dcea VMS_primitive_data_types.h --- a/VMS_primitive_data_types.h Thu Oct 06 16:24:17 2011 +0200 +++ b/VMS_primitive_data_types.h Wed Jan 04 16:10:11 2012 -0800 @@ -1,53 +1,53 @@ -/* - * Copyright 2009 OpenSourceStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - - */ - -#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H -#define _BLIS_PRIMITIVE_DATA_TYPES_H - - -/*For portability, need primitive data types that have a well defined - * size, and well-defined layout into bytes - *To do this, provide BLIS standard aliases for all primitive data types - *These aliases must be used in all BLIS functions instead of the ANSI types - * - *These definitions will be replaced inside each specialization module - * according to the compiler used in that module and the hardware being - * specialized to. - */ -/* -#define int8 char -#define uint8 char -#define int16 short -#define uint16 unsigned short -#define int32 int -#define uint32 unsigned int -#define int64 long long -#define uint64 unsigned long long -#define float32 float -#define float64 double -*/ -typedef char bool8; -typedef char int8; -typedef char uint8; -typedef short int16; -typedef unsigned short uint16; -typedef int int32; -typedef unsigned int uint32; -typedef long long int64; -typedef unsigned long long uint64; -typedef float float32; -typedef double float64; -//typedef double double float128; -#define float128 double double - -#define TRUE 1 -#define FALSE 0 - -#endif /* _BLIS_PRIMITIVE_DATA_TYPES_H */ - +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + + */ + +#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H +#define _BLIS_PRIMITIVE_DATA_TYPES_H + + +/*For portability, need primitive data types that have a well defined + * size, and well-defined layout into bytes + *To do this, provide BLIS standard aliases for all primitive data types + *These aliases must be used in all BLIS functions instead of the ANSI types + * + *These definitions will be replaced inside each specialization module + * according to the compiler used in that module and the hardware being + * specialized to. + */ +/* +#define int8 char +#define uint8 char +#define int16 short +#define uint16 unsigned short +#define int32 int +#define uint32 unsigned int +#define int64 long long +#define uint64 unsigned long long +#define float32 float +#define float64 double +*/ +typedef char bool8; +typedef char int8; +typedef char uint8; +typedef short int16; +typedef unsigned short uint16; +typedef int int32; +typedef unsigned int uint32; +typedef long long int64; +typedef unsigned long long uint64; +typedef float float32; +typedef double float64; +//typedef double double float128; +#define float128 double double + +#define TRUE 1 +#define FALSE 0 + +#endif /* _BLIS_PRIMITIVE_DATA_TYPES_H */ + diff -r ad8213a8e916 -r c1784868dcea probes.h --- a/probes.h Thu Oct 06 16:24:17 2011 +0200 +++ b/probes.h Wed Jan 04 16:10:11 2012 -0800 @@ -1,195 +1,195 @@ -/* - * Copyright 2009 OpenSourceStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - */ - -#ifndef _PROBES_H -#define _PROBES_H -#define _GNU_SOURCE - -#include "VMS_primitive_data_types.h" - -#include - - - //when STATS__TURN_ON_PROBES is defined allows using probes to measure - // time intervals. The probes are macros that only compile to something - // when STATS__TURN_ON_PROBES is defined. The probes are saved in the - // master env -- but only when this is defined. - //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday -#define STATS__TURN_ON_PROBES -//#define STATS__USE_TSC_PROBES -#define STATS__USE_DBL_PROBES - -//typedef struct _IntervalProbe IntervalProbe; //in VMS.h - -struct _IntervalProbe - { - char *nameStr; - int32 probeID; - - int32 schedChoiceWasRecorded; - int32 coreNum; - int32 procrID; - float64 procrCreateSecs; - - #ifdef STATS__USE_TSC_PROBES - TSCount startStamp; - TSCount endStamp; - #else - struct timeval startStamp; - struct timeval endStamp; - #endif - float64 startSecs; - float64 endSecs; - float64 interval; - DblHist *hist;//if NULL, then is single interval probe - }; - - -//============================= Statistics ================================== - - //Frequency of TS counts - //TODO: change freq for each machine -#define TSCOUNT_FREQ 3180000000 - -inline TSCount getTSCount(); - - -//======================== Probes ============================= -// -// Use macros to allow turning probes off with a #define switch -#ifdef STATS__ENABLE_PROBES -int32 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ - VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) - -int32 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ - VMS_ext_impl__record_time_point_into_new_probe( nameStr ) - - -int32 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); -#define VMS__create_single_interval_probe( nameStr, animPr ) \ - VMS_impl__create_single_interval_probe( nameStr, animPr ) - - -int32 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, - float64 binWidth, char *nameStr, VirtProcr *animPr ); -#define VMS__create_histogram_probe( numBins, startValue, \ - binWidth, nameStr, animPr ) \ - VMS_impl__create_histogram_probe( numBins, startValue, \ - binWidth, nameStr, animPr ) -void -VMS_impl__free_probe( IntervalProbe *probe ); -#define VMS__free_probe( probe ) \ - VMS_impl__free_probe( probe ) - -void -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); -#define VMS__index_probe_by_its_name( probeID, animPr ) \ - VMS_impl__index_probe_by_its_name( probeID, animPr ) - -IntervalProbe * -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); -#define VMS__get_probe_by_name( probeID, animPr ) \ - VMS_impl__get_probe_by_name( probeName, animPr ) - -void -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ - VMS_impl__record_sched_choice_into_probe( probeID, animPr ) - -void -VMS_impl__record_interval_start_in_probe( int32 probeID ); -#define VMS__record_interval_start_in_probe( probeID ) \ - VMS_impl__record_interval_start_in_probe( probeID ) - -void -VMS_impl__record_interval_end_in_probe( int32 probeID ); -#define VMS__record_interval_end_in_probe( probeID ) \ - VMS_impl__record_interval_end_in_probe( probeID ) - -void -VMS_impl__print_stats_of_probe( int32 probeID ); -#define VMS__print_stats_of_probe( probeID ) \ - VMS_impl__print_stats_of_probe( probeID ) - -void -VMS_impl__print_stats_of_all_probes(); -#define VMS__print_stats_of_all_probes() \ - VMS_impl__print_stats_of_all_probes() - - -#else -int32 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ - 0 /* do nothing */ - -int32 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ - 0 /* do nothing */ - - -int32 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); -#define VMS__create_single_interval_probe( nameStr, animPr ) \ - 0 /* do nothing */ - - -int32 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, - float64 binWidth, char *nameStr, VirtProcr *animPr ); -#define VMS__create_histogram_probe( numBins, startValue, \ - binWidth, nameStr, animPr ) \ - 0 /* do nothing */ - -void -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); -#define VMS__index_probe_by_its_name( probeID, animPr ) \ - /* do nothing */ - -IntervalProbe * -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); -#define VMS__get_probe_by_name( probeID, animPr ) \ - NULL /* do nothing */ - -void -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ - /* do nothing */ - -void -VMS_impl__record_interval_start_in_probe( int32 probeID ); -#define VMS__record_interval_start_in_probe( probeID ) \ - /* do nothing */ - -void -VMS_impl__record_interval_end_in_probe( int32 probeID ); -#define VMS__record_interval_end_in_probe( probeID ) \ - /* do nothing */ - -inline void doNothing(); -void -VMS_impl__print_stats_of_probe( int32 probeID ); -#define VMS__print_stats_of_probe( probeID ) \ - doNothing/* do nothing */ - -void -VMS_impl__print_stats_of_all_probes(); -#define VMS__print_stats_of_all_probes \ - doNothing/* do nothing */ - -#endif /* defined STATS__ENABLE_PROBES */ - -#endif /* _PROBES_H */ - +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + */ + +#ifndef _PROBES_H +#define _PROBES_H +#define _GNU_SOURCE + +#include "VMS_primitive_data_types.h" + +#include + + + //when STATS__TURN_ON_PROBES is defined allows using probes to measure + // time intervals. The probes are macros that only compile to something + // when STATS__TURN_ON_PROBES is defined. The probes are saved in the + // master env -- but only when this is defined. + //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday +#define STATS__TURN_ON_PROBES +//#define STATS__USE_TSC_PROBES +#define STATS__USE_DBL_PROBES + +//typedef struct _IntervalProbe IntervalProbe; //in VMS.h + +struct _IntervalProbe + { + char *nameStr; + int32 probeID; + + int32 schedChoiceWasRecorded; + int32 coreNum; + int32 procrID; + float64 procrCreateSecs; + + #ifdef STATS__USE_TSC_PROBES + TSCount startStamp; + TSCount endStamp; + #else + struct timeval startStamp; + struct timeval endStamp; + #endif + float64 startSecs; + float64 endSecs; + float64 interval; + DblHist *hist;//if NULL, then is single interval probe + }; + + +//============================= Statistics ================================== + + //Frequency of TS counts + //TODO: change freq for each machine +#define TSCOUNT_FREQ 3180000000 + +inline TSCount getTSCount(); + + +//======================== Probes ============================= +// +// Use macros to allow turning probes off with a #define switch +#ifdef STATS__ENABLE_PROBES +int32 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ + VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) + +int32 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ + VMS_ext_impl__record_time_point_into_new_probe( nameStr ) + + +int32 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); +#define VMS__create_single_interval_probe( nameStr, animPr ) \ + VMS_impl__create_single_interval_probe( nameStr, animPr ) + + +int32 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, + float64 binWidth, char *nameStr, VirtProcr *animPr ); +#define VMS__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animPr ) \ + VMS_impl__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animPr ) +void +VMS_impl__free_probe( IntervalProbe *probe ); +#define VMS__free_probe( probe ) \ + VMS_impl__free_probe( probe ) + +void +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); +#define VMS__index_probe_by_its_name( probeID, animPr ) \ + VMS_impl__index_probe_by_its_name( probeID, animPr ) + +IntervalProbe * +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); +#define VMS__get_probe_by_name( probeID, animPr ) \ + VMS_impl__get_probe_by_name( probeName, animPr ) + +void +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ + VMS_impl__record_sched_choice_into_probe( probeID, animPr ) + +void +VMS_impl__record_interval_start_in_probe( int32 probeID ); +#define VMS__record_interval_start_in_probe( probeID ) \ + VMS_impl__record_interval_start_in_probe( probeID ) + +void +VMS_impl__record_interval_end_in_probe( int32 probeID ); +#define VMS__record_interval_end_in_probe( probeID ) \ + VMS_impl__record_interval_end_in_probe( probeID ) + +void +VMS_impl__print_stats_of_probe( int32 probeID ); +#define VMS__print_stats_of_probe( probeID ) \ + VMS_impl__print_stats_of_probe( probeID ) + +void +VMS_impl__print_stats_of_all_probes(); +#define VMS__print_stats_of_all_probes() \ + VMS_impl__print_stats_of_all_probes() + + +#else +int32 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ + 0 /* do nothing */ + +int32 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ + 0 /* do nothing */ + + +int32 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); +#define VMS__create_single_interval_probe( nameStr, animPr ) \ + 0 /* do nothing */ + + +int32 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, + float64 binWidth, char *nameStr, VirtProcr *animPr ); +#define VMS__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animPr ) \ + 0 /* do nothing */ + +void +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); +#define VMS__index_probe_by_its_name( probeID, animPr ) \ + /* do nothing */ + +IntervalProbe * +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); +#define VMS__get_probe_by_name( probeID, animPr ) \ + NULL /* do nothing */ + +void +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ + /* do nothing */ + +void +VMS_impl__record_interval_start_in_probe( int32 probeID ); +#define VMS__record_interval_start_in_probe( probeID ) \ + /* do nothing */ + +void +VMS_impl__record_interval_end_in_probe( int32 probeID ); +#define VMS__record_interval_end_in_probe( probeID ) \ + /* do nothing */ + +inline void doNothing(); +void +VMS_impl__print_stats_of_probe( int32 probeID ); +#define VMS__print_stats_of_probe( probeID ) \ + doNothing/* do nothing */ + +void +VMS_impl__print_stats_of_all_probes(); +#define VMS__print_stats_of_all_probes \ + doNothing/* do nothing */ + +#endif /* defined STATS__ENABLE_PROBES */ + +#endif /* _PROBES_H */ + diff -r ad8213a8e916 -r c1784868dcea vmalloc.c --- a/vmalloc.c Thu Oct 06 16:24:17 2011 +0200 +++ b/vmalloc.c Wed Jan 04 16:10:11 2012 -0800 @@ -1,495 +1,495 @@ -/* - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - * Created on November 14, 2009, 9:07 PM - */ - -#include -#include -#include -#include - -#include "VMS.h" -#include "Histogram/Histogram.h" - -/*Helper function - *Insert a newly generated free chunk into the first spot on the free list. - * The chunk is cast as a MallocProlog, so the various pointers in it are - * accessed with C's help -- and the size of the prolog is easily added to - * the pointer when a chunk is returned to the app -- so C handles changes - * in pointer sizes among machines. - * - *The list head is a normal MallocProlog struct -- identified by its - * prevChunkInFreeList being NULL -- the only one. - * - *The end of the list is identified by next chunk being NULL, as usual. - */ -void inline -add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) - { - chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; - if( chunk->nextChunkInFreeList != NULL ) //if not last in free list - chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; - chunk->prevChunkInFreeList = listHead; - listHead->nextChunkInFreeList = chunk; - } - - -/*This is sequential code, meant to only be called from the Master, not from - * any slave VPs. - *Search down list, checking size by the nextHigherInMem pointer, to find - * first chunk bigger than size needed. - *Shave off the extra and make it into a new free-list element, hook it in - * then return the address of the found element plus size of prolog. - * - *Will find a - */ -void *VMS__malloc( size_t sizeRequested ) - { MallocProlog *foundElem = NULL, *currElem, *newElem; - ssize_t amountExtra, sizeConsumed,sizeOfFound; - uint32 foundElemIsTopOfHeap; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - int32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //======================================================================== - - //step up the size to be aligned at 16-byte boundary, prob better ways - sizeRequested = (sizeRequested + 16) & ~15; - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; - - while( currElem != NULL ) - { //check if size of currElem is big enough - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); - if( amountExtra > 0 ) - { //found it, get out of loop - foundElem = currElem; - currElem = NULL; - } - else - currElem = currElem->nextChunkInFreeList; - } - - if( foundElem == NULL ) - { ERROR("\nmalloc failed\n") - return (void *)NULL; //indicates malloc failed - } - //Using a kludge to identify the element that is the top chunk in the - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and - // save addr of start of heap in head's nextLowerInMem - //Will handle top of Heap specially - foundElemIsTopOfHeap = foundElem->nextHigherInMem == - _VMSMasterEnv->freeListHead->nextHigherInMem; - - //before shave off and try to insert new elem, remove found elem - //note, foundElem will never be the head, so always has valid prevChunk - foundElem->prevChunkInFreeList->nextChunkInFreeList = - foundElem->nextChunkInFreeList; - if( foundElem->nextChunkInFreeList != NULL ) - { foundElem->nextChunkInFreeList->prevChunkInFreeList = - foundElem->prevChunkInFreeList; - } - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated - - //if enough, turn extra into new elem & insert it - if( amountExtra > 64 ) - { //make new elem by adding to addr of curr elem then casting - sizeConsumed = sizeof(MallocProlog) + sizeRequested; - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); - newElem->nextLowerInMem = foundElem; //This is evil (but why?) - newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) - foundElem->nextHigherInMem = newElem; - if( ! foundElemIsTopOfHeap ) - { //there is no next higher for top of heap, so can't write to it - newElem->nextHigherInMem->nextLowerInMem = newElem; - } - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); - } - else - { - sizeConsumed = sizeOfFound; - } - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); - #endif - //======================================================================== - - //skip over the prolog by adding its size to the pointer return - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); - } - -/*This is sequential code, meant to only be called from the Master, not from - * any slave VPs. - *Search down list, checking size by the nextHigherInMem pointer, to find - * first chunk bigger than size needed. - *Shave off the extra and make it into a new free-list element, hook it in - * then return the address of the found element plus size of prolog. - * - * The difference to the regular malloc is, that all the allocated chunks are - * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk - * before the aligned chunk. - */ -void *VMS__malloc_aligned( size_t sizeRequested ) - { MallocProlog *foundElem = NULL, *currElem, *newElem; - ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; - uint32 foundElemIsTopOfHeap; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - uint32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //======================================================================== - - //step up the size to be multiple of the cache line size - sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1); - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; - - while( currElem != NULL ) - { //check if size of currElem is big enough - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); - if( amountExtra > 0 ) - { - //look if the found element is already aligned - if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){ - //found it, get out of loop - foundElem = currElem; - break; - }else{ - //find first aligned address and check if it's still big enough - //check also if the space before the aligned address is big enough - //for a new element - void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1))); - prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; - sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); - amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); - if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ - //found suitable element - //create new previous element and exit loop - MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; - - //insert new element into free list - if(currElem->nextChunkInFreeList != NULL) - currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; - newAlignedElem->prevChunkInFreeList = currElem; - newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; - currElem->nextChunkInFreeList = newAlignedElem; - - //set higherInMem and lowerInMem - newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; - foundElemIsTopOfHeap = currElem->nextHigherInMem == - _VMSMasterEnv->freeListHead->nextHigherInMem; - if(!foundElemIsTopOfHeap) - currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; - currElem->nextHigherInMem = newAlignedElem; - newAlignedElem->nextLowerInMem = currElem; - - //Found new element leaving loop - foundElem = newAlignedElem; - break; - } - } - - } - currElem = currElem->nextChunkInFreeList; - } - - if( foundElem == NULL ) - { ERROR("\nmalloc failed\n") - return (void *)NULL; //indicates malloc failed - } - //Using a kludge to identify the element that is the top chunk in the - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and - // save addr of start of heap in head's nextLowerInMem - //Will handle top of Heap specially - foundElemIsTopOfHeap = foundElem->nextHigherInMem == - _VMSMasterEnv->freeListHead->nextHigherInMem; - - //before shave off and try to insert new elem, remove found elem - //note, foundElem will never be the head, so always has valid prevChunk - foundElem->prevChunkInFreeList->nextChunkInFreeList = - foundElem->nextChunkInFreeList; - if( foundElem->nextChunkInFreeList != NULL ) - { foundElem->nextChunkInFreeList->prevChunkInFreeList = - foundElem->prevChunkInFreeList; - } - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated - - //if enough, turn extra into new elem & insert it - if( amountExtra > 64 ) - { //make new elem by adding to addr of curr elem then casting - sizeConsumed = sizeof(MallocProlog) + sizeRequested; - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); - newElem->nextHigherInMem = foundElem->nextHigherInMem; - newElem->nextLowerInMem = foundElem; - foundElem->nextHigherInMem = newElem; - - if( ! foundElemIsTopOfHeap ) - { //there is no next higher for top of heap, so can't write to it - newElem->nextHigherInMem->nextLowerInMem = newElem; - } - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); - } - else - { - sizeConsumed = sizeOfFound; - } - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); - #endif - //======================================================================== - - //skip over the prolog by adding its size to the pointer return - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); - } - - -/*This is sequential code -- only to be called from the Master - * When free, subtract the size of prolog from pointer, then cast it to a - * MallocProlog. Then check the nextLower and nextHigher chunks to see if - * one or both are also free, and coalesce if so, and if neither free, then - * add this one to free-list. - */ -void -VMS__free( void *ptrToFree ) - { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; - size_t sizeOfElem; - uint32 lowerExistsAndIsFree, higherExistsAndIsFree; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - int32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //======================================================================== - - if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || - ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) - { //outside the range of data owned by VMS's malloc, so do nothing - return; - } - //subtract size of prolog to get pointer to prolog, then cast - elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog)); - sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree); - - if( elemToFree->prevChunkInFreeList != NULL ) - { printf( "error: freeing same element twice!" ); exit(1); - } - - _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; - - nextLowerElem = elemToFree->nextLowerInMem; - nextHigherElem = elemToFree->nextHigherInMem; - - if( nextHigherElem == NULL ) - higherExistsAndIsFree = FALSE; - else //okay exists, now check if in the free-list by checking back ptr - higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); - - if( nextLowerElem == NULL ) - lowerExistsAndIsFree = FALSE; - else //okay, it exists, now check if it's free - lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); - - - //now, know what exists and what's free - if( lowerExistsAndIsFree ) - { if( higherExistsAndIsFree ) - { //both exist and are free, so coalesce all three - //First, remove higher from free-list - nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = - nextHigherElem->nextChunkInFreeList; - if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? - nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = - nextHigherElem->prevChunkInFreeList; - //Now, fix-up sequence-in-mem list -- by side-effect, this also - // changes size of the lower elem, which is still in free-list - nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; - if( nextHigherElem->nextHigherInMem != - _VMSMasterEnv->freeListHead->nextHigherInMem ) - nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; - //notice didn't do anything to elemToFree -- it simply is no - // longer reachable from any of the lists. Wonder if could be a - // security leak because left valid addresses in it, - // but don't care for now. - } - else - { //lower is the only of the two that exists and is free, - //In this case, no adjustment to free-list, just change mem-list. - // By side-effect, changes size of the lower elem - nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; - if( elemToFree->nextHigherInMem != - _VMSMasterEnv->freeListHead->nextHigherInMem ) - elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; - } - } - else - { //lower either doesn't exist or isn't free, so check higher - if( higherExistsAndIsFree ) - { //higher exists and is the only of the two free - //First, in free-list, replace higher elem with the one to free - elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; - elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; - elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; - //Now chg mem-list. By side-effect, changes size of elemToFree - elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; - if( elemToFree->nextHigherInMem != - _VMSMasterEnv->freeListHead->nextHigherInMem ) - elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; - } - else - { //neither lower nor higher is availabe to coalesce so add to list - // this makes prev chunk ptr non-null, which indicates it's free - elemToFree->nextChunkInFreeList = - _VMSMasterEnv->freeListHead->nextChunkInFreeList; - _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; - elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; - } - } - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist ); - #endif - //======================================================================== - - } - - -/*Allocates memory from the external system -- higher overhead - * - *Because of Linux's malloc throwing bizarre random faults when malloc is - * used inside a VMS virtual processor, have to pass this as a request and - * have the core loop do it when it gets around to it -- will look for these - * chores leftover from the previous animation of masterVP the next time it - * goes to animate the masterVP -- so it takes two separate masterVP - * animations, separated by work, to complete an external malloc or - * external free request. - * - *Thinking core loop accepts signals -- just looks if signal-location is - * empty or not -- - */ -void * -VMS__malloc_in_ext( size_t sizeRequested ) - { - /* - //This is running in the master, so no chance for multiple cores to be - // competing for the core's flag. - if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) - { //something has already signalled to core loop, so save the signal - // and look, next time master animated, to see if can send it. - //Note, the addr to put a signal is in the coreloop's frame, so just - // checks it each time through -- make it volatile to avoid GCC - // optimizations -- it's a coreloop local var that only changes - // after jumping away. The signal includes the addr to send the - //return to -- even if just empty return completion-signal - // - //save the signal in some queue that the master looks at each time - // it starts up -- one loc says if empty for fast common case -- - //something like that -- want to hide this inside this call -- but - // think this has to come as a request -- req handler gives procr - // back to master loop, which gives it back to req handler at point - // it sees that core loop has sent return signal. Something like - // that. - saveTheSignal - - } - coreSigData->type = malloc; - coreSigData->sizeToMalloc = sizeRequested; - coreSigData->locToSignalCompletion = &figureOut; - _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; - */ - //just risk system-stack faults until get this figured out - return malloc( sizeRequested ); - } - - -/*Frees memory that was allocated in the external system -- higher overhead - * - *As noted in external malloc comment, this is clunky 'cause the free has - * to be called in the core loop. - */ -void -VMS__free_in_ext( void *ptrToFree ) - { - //just risk system-stack faults until get this figured out - free( ptrToFree ); - - //TODO: fix this -- so - } - - -/*Designed to be called from the main thread outside of VMS, during init - */ -MallocProlog * -VMS_ext__create_free_list() - { MallocProlog *freeListHead, *firstChunk; - - //Note, this is running in the main thread -- all increases in malloc - // mem and all frees of it must be done in this thread, with the - // thread's original stack available - freeListHead = malloc( sizeof(MallocProlog) ); - firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); - if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} - - //Touch memory to avoid page faults - void *ptr,*endPtr; - endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE; - for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE) - { - *(char*)ptr = 0; - } - - freeListHead->prevChunkInFreeList = NULL; - //Use this addr to free the heap when cleanup - freeListHead->nextLowerInMem = firstChunk; - //to identify top-of-heap elem, compare this addr to elem's next higher - freeListHead->nextHigherInMem = (void*)( (uintptr_t)firstChunk + - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); - freeListHead->nextChunkInFreeList = firstChunk; - - firstChunk->nextChunkInFreeList = NULL; - firstChunk->prevChunkInFreeList = freeListHead; - //next Higher has to be set to top of chunk, so can calc size in malloc - firstChunk->nextHigherInMem = (void*)( (uintptr_t)firstChunk + - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); - firstChunk->nextLowerInMem = NULL; //identifies as bott of heap - - _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet - - return freeListHead; - } - - -/*Designed to be called from the main thread outside of VMS, during cleanup - */ -void -VMS_ext__free_free_list( MallocProlog *freeListHead ) - { - //stashed a ptr to the one and only bug chunk malloc'd from OS in the - // free list head's next lower in mem pointer - free( freeListHead->nextLowerInMem ); - - //don't free the head -- it'll be in an array eventually -- free whole - // array when all the free lists linked from it have already been freed - } - +/* + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + * Created on November 14, 2009, 9:07 PM + */ + +#include +#include +#include +#include + +#include "VMS.h" +#include "Histogram/Histogram.h" + +/*Helper function + *Insert a newly generated free chunk into the first spot on the free list. + * The chunk is cast as a MallocProlog, so the various pointers in it are + * accessed with C's help -- and the size of the prolog is easily added to + * the pointer when a chunk is returned to the app -- so C handles changes + * in pointer sizes among machines. + * + *The list head is a normal MallocProlog struct -- identified by its + * prevChunkInFreeList being NULL -- the only one. + * + *The end of the list is identified by next chunk being NULL, as usual. + */ +void inline +add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) + { + chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; + if( chunk->nextChunkInFreeList != NULL ) //if not last in free list + chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; + chunk->prevChunkInFreeList = listHead; + listHead->nextChunkInFreeList = chunk; + } + + +/*This is sequential code, meant to only be called from the Master, not from + * any slave VPs. + *Search down list, checking size by the nextHigherInMem pointer, to find + * first chunk bigger than size needed. + *Shave off the extra and make it into a new free-list element, hook it in + * then return the address of the found element plus size of prolog. + * + *Will find a + */ +void *VMS__malloc( size_t sizeRequested ) + { MallocProlog *foundElem = NULL, *currElem, *newElem; + ssize_t amountExtra, sizeConsumed,sizeOfFound; + uint32 foundElemIsTopOfHeap; + + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + int32 startStamp, endStamp; + saveLowTimeStampCountInto( startStamp ); + #endif + //======================================================================== + + //step up the size to be aligned at 16-byte boundary, prob better ways + sizeRequested = (sizeRequested + 16) & ~15; + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; + + while( currElem != NULL ) + { //check if size of currElem is big enough + sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); + if( amountExtra > 0 ) + { //found it, get out of loop + foundElem = currElem; + currElem = NULL; + } + else + currElem = currElem->nextChunkInFreeList; + } + + if( foundElem == NULL ) + { ERROR("\nmalloc failed\n") + return (void *)NULL; //indicates malloc failed + } + //Using a kludge to identify the element that is the top chunk in the + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and + // save addr of start of heap in head's nextLowerInMem + //Will handle top of Heap specially + foundElemIsTopOfHeap = foundElem->nextHigherInMem == + _VMSMasterEnv->freeListHead->nextHigherInMem; + + //before shave off and try to insert new elem, remove found elem + //note, foundElem will never be the head, so always has valid prevChunk + foundElem->prevChunkInFreeList->nextChunkInFreeList = + foundElem->nextChunkInFreeList; + if( foundElem->nextChunkInFreeList != NULL ) + { foundElem->nextChunkInFreeList->prevChunkInFreeList = + foundElem->prevChunkInFreeList; + } + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated + + //if enough, turn extra into new elem & insert it + if( amountExtra > 64 ) + { //make new elem by adding to addr of curr elem then casting + sizeConsumed = sizeof(MallocProlog) + sizeRequested; + newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); + newElem->nextLowerInMem = foundElem; //This is evil (but why?) + newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) + foundElem->nextHigherInMem = newElem; + if( ! foundElemIsTopOfHeap ) + { //there is no next higher for top of heap, so can't write to it + newElem->nextHigherInMem->nextLowerInMem = newElem; + } + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); + } + else + { + sizeConsumed = sizeOfFound; + } + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; + + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + saveLowTimeStampCountInto( endStamp ); + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); + #endif + //======================================================================== + + //skip over the prolog by adding its size to the pointer return + return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); + } + +/*This is sequential code, meant to only be called from the Master, not from + * any slave VPs. + *Search down list, checking size by the nextHigherInMem pointer, to find + * first chunk bigger than size needed. + *Shave off the extra and make it into a new free-list element, hook it in + * then return the address of the found element plus size of prolog. + * + * The difference to the regular malloc is, that all the allocated chunks are + * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk + * before the aligned chunk. + */ +void *VMS__malloc_aligned( size_t sizeRequested ) + { MallocProlog *foundElem = NULL, *currElem, *newElem; + ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; + uint32 foundElemIsTopOfHeap; + + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + uint32 startStamp, endStamp; + saveLowTimeStampCountInto( startStamp ); + #endif + //======================================================================== + + //step up the size to be multiple of the cache line size + sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1); + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; + + while( currElem != NULL ) + { //check if size of currElem is big enough + sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); + if( amountExtra > 0 ) + { + //look if the found element is already aligned + if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){ + //found it, get out of loop + foundElem = currElem; + break; + }else{ + //find first aligned address and check if it's still big enough + //check also if the space before the aligned address is big enough + //for a new element + void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1))); + prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; + sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); + amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); + if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ + //found suitable element + //create new previous element and exit loop + MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; + + //insert new element into free list + if(currElem->nextChunkInFreeList != NULL) + currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; + newAlignedElem->prevChunkInFreeList = currElem; + newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; + currElem->nextChunkInFreeList = newAlignedElem; + + //set higherInMem and lowerInMem + newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; + foundElemIsTopOfHeap = currElem->nextHigherInMem == + _VMSMasterEnv->freeListHead->nextHigherInMem; + if(!foundElemIsTopOfHeap) + currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; + currElem->nextHigherInMem = newAlignedElem; + newAlignedElem->nextLowerInMem = currElem; + + //Found new element leaving loop + foundElem = newAlignedElem; + break; + } + } + + } + currElem = currElem->nextChunkInFreeList; + } + + if( foundElem == NULL ) + { ERROR("\nmalloc failed\n") + return (void *)NULL; //indicates malloc failed + } + //Using a kludge to identify the element that is the top chunk in the + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and + // save addr of start of heap in head's nextLowerInMem + //Will handle top of Heap specially + foundElemIsTopOfHeap = foundElem->nextHigherInMem == + _VMSMasterEnv->freeListHead->nextHigherInMem; + + //before shave off and try to insert new elem, remove found elem + //note, foundElem will never be the head, so always has valid prevChunk + foundElem->prevChunkInFreeList->nextChunkInFreeList = + foundElem->nextChunkInFreeList; + if( foundElem->nextChunkInFreeList != NULL ) + { foundElem->nextChunkInFreeList->prevChunkInFreeList = + foundElem->prevChunkInFreeList; + } + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated + + //if enough, turn extra into new elem & insert it + if( amountExtra > 64 ) + { //make new elem by adding to addr of curr elem then casting + sizeConsumed = sizeof(MallocProlog) + sizeRequested; + newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); + newElem->nextHigherInMem = foundElem->nextHigherInMem; + newElem->nextLowerInMem = foundElem; + foundElem->nextHigherInMem = newElem; + + if( ! foundElemIsTopOfHeap ) + { //there is no next higher for top of heap, so can't write to it + newElem->nextHigherInMem->nextLowerInMem = newElem; + } + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); + } + else + { + sizeConsumed = sizeOfFound; + } + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; + + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + saveLowTimeStampCountInto( endStamp ); + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); + #endif + //======================================================================== + + //skip over the prolog by adding its size to the pointer return + return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); + } + + +/*This is sequential code -- only to be called from the Master + * When free, subtract the size of prolog from pointer, then cast it to a + * MallocProlog. Then check the nextLower and nextHigher chunks to see if + * one or both are also free, and coalesce if so, and if neither free, then + * add this one to free-list. + */ +void +VMS__free( void *ptrToFree ) + { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; + size_t sizeOfElem; + uint32 lowerExistsAndIsFree, higherExistsAndIsFree; + + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + int32 startStamp, endStamp; + saveLowTimeStampCountInto( startStamp ); + #endif + //======================================================================== + + if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || + ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) + { //outside the range of data owned by VMS's malloc, so do nothing + return; + } + //subtract size of prolog to get pointer to prolog, then cast + elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog)); + sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree); + + if( elemToFree->prevChunkInFreeList != NULL ) + { printf( "error: freeing same element twice!" ); exit(1); + } + + _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; + + nextLowerElem = elemToFree->nextLowerInMem; + nextHigherElem = elemToFree->nextHigherInMem; + + if( nextHigherElem == NULL ) + higherExistsAndIsFree = FALSE; + else //okay exists, now check if in the free-list by checking back ptr + higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); + + if( nextLowerElem == NULL ) + lowerExistsAndIsFree = FALSE; + else //okay, it exists, now check if it's free + lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); + + + //now, know what exists and what's free + if( lowerExistsAndIsFree ) + { if( higherExistsAndIsFree ) + { //both exist and are free, so coalesce all three + //First, remove higher from free-list + nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = + nextHigherElem->nextChunkInFreeList; + if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? + nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = + nextHigherElem->prevChunkInFreeList; + //Now, fix-up sequence-in-mem list -- by side-effect, this also + // changes size of the lower elem, which is still in free-list + nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; + if( nextHigherElem->nextHigherInMem != + _VMSMasterEnv->freeListHead->nextHigherInMem ) + nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; + //notice didn't do anything to elemToFree -- it simply is no + // longer reachable from any of the lists. Wonder if could be a + // security leak because left valid addresses in it, + // but don't care for now. + } + else + { //lower is the only of the two that exists and is free, + //In this case, no adjustment to free-list, just change mem-list. + // By side-effect, changes size of the lower elem + nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; + if( elemToFree->nextHigherInMem != + _VMSMasterEnv->freeListHead->nextHigherInMem ) + elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; + } + } + else + { //lower either doesn't exist or isn't free, so check higher + if( higherExistsAndIsFree ) + { //higher exists and is the only of the two free + //First, in free-list, replace higher elem with the one to free + elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; + elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; + elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; + //Now chg mem-list. By side-effect, changes size of elemToFree + elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; + if( elemToFree->nextHigherInMem != + _VMSMasterEnv->freeListHead->nextHigherInMem ) + elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; + } + else + { //neither lower nor higher is availabe to coalesce so add to list + // this makes prev chunk ptr non-null, which indicates it's free + elemToFree->nextChunkInFreeList = + _VMSMasterEnv->freeListHead->nextChunkInFreeList; + _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; + elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; + } + } + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + saveLowTimeStampCountInto( endStamp ); + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist ); + #endif + //======================================================================== + + } + + +/*Allocates memory from the external system -- higher overhead + * + *Because of Linux's malloc throwing bizarre random faults when malloc is + * used inside a VMS virtual processor, have to pass this as a request and + * have the core loop do it when it gets around to it -- will look for these + * chores leftover from the previous animation of masterVP the next time it + * goes to animate the masterVP -- so it takes two separate masterVP + * animations, separated by work, to complete an external malloc or + * external free request. + * + *Thinking core loop accepts signals -- just looks if signal-location is + * empty or not -- + */ +void * +VMS__malloc_in_ext( size_t sizeRequested ) + { + /* + //This is running in the master, so no chance for multiple cores to be + // competing for the core's flag. + if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) + { //something has already signalled to core loop, so save the signal + // and look, next time master animated, to see if can send it. + //Note, the addr to put a signal is in the coreloop's frame, so just + // checks it each time through -- make it volatile to avoid GCC + // optimizations -- it's a coreloop local var that only changes + // after jumping away. The signal includes the addr to send the + //return to -- even if just empty return completion-signal + // + //save the signal in some queue that the master looks at each time + // it starts up -- one loc says if empty for fast common case -- + //something like that -- want to hide this inside this call -- but + // think this has to come as a request -- req handler gives procr + // back to master loop, which gives it back to req handler at point + // it sees that core loop has sent return signal. Something like + // that. + saveTheSignal + + } + coreSigData->type = malloc; + coreSigData->sizeToMalloc = sizeRequested; + coreSigData->locToSignalCompletion = &figureOut; + _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; + */ + //just risk system-stack faults until get this figured out + return malloc( sizeRequested ); + } + + +/*Frees memory that was allocated in the external system -- higher overhead + * + *As noted in external malloc comment, this is clunky 'cause the free has + * to be called in the core loop. + */ +void +VMS__free_in_ext( void *ptrToFree ) + { + //just risk system-stack faults until get this figured out + free( ptrToFree ); + + //TODO: fix this -- so + } + + +/*Designed to be called from the main thread outside of VMS, during init + */ +MallocProlog * +VMS_ext__create_free_list() + { MallocProlog *freeListHead, *firstChunk; + + //Note, this is running in the main thread -- all increases in malloc + // mem and all frees of it must be done in this thread, with the + // thread's original stack available + freeListHead = malloc( sizeof(MallocProlog) ); + firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); + if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} + + //Touch memory to avoid page faults + void *ptr,*endPtr; + endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE; + for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE) + { + *(char*)ptr = 0; + } + + freeListHead->prevChunkInFreeList = NULL; + //Use this addr to free the heap when cleanup + freeListHead->nextLowerInMem = firstChunk; + //to identify top-of-heap elem, compare this addr to elem's next higher + freeListHead->nextHigherInMem = (void*)( (uintptr_t)firstChunk + + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); + freeListHead->nextChunkInFreeList = firstChunk; + + firstChunk->nextChunkInFreeList = NULL; + firstChunk->prevChunkInFreeList = freeListHead; + //next Higher has to be set to top of chunk, so can calc size in malloc + firstChunk->nextHigherInMem = (void*)( (uintptr_t)firstChunk + + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); + firstChunk->nextLowerInMem = NULL; //identifies as bott of heap + + _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet + + return freeListHead; + } + + +/*Designed to be called from the main thread outside of VMS, during cleanup + */ +void +VMS_ext__free_free_list( MallocProlog *freeListHead ) + { + //stashed a ptr to the one and only bug chunk malloc'd from OS in the + // free list head's next lower in mem pointer + free( freeListHead->nextLowerInMem ); + + //don't free the head -- it'll be in an array eventually -- free whole + // array when all the free lists linked from it have already been freed + } + diff -r ad8213a8e916 -r c1784868dcea vmalloc.h --- a/vmalloc.h Thu Oct 06 16:24:17 2011 +0200 +++ b/vmalloc.h Wed Jan 04 16:10:11 2012 -0800 @@ -1,61 +1,61 @@ -/* - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - * Created on November 14, 2009, 9:07 PM - */ - -#ifndef _VMALLOC_H -#define _VMALLOC_H - -#include -#include -#include "VMS_primitive_data_types.h" - -typedef struct _MallocProlog MallocProlog; - -struct _MallocProlog - { - MallocProlog *nextChunkInFreeList; - MallocProlog *prevChunkInFreeList; - MallocProlog *nextHigherInMem; - MallocProlog *nextLowerInMem; - }; -//MallocProlog - -typedef struct - { - MallocProlog *firstChunkInFreeList; - int32 numInList; //TODO not used - } -FreeListHead; - -void * -VMS__malloc( size_t sizeRequested ); - -void * -VMS__malloc_aligned( size_t sizeRequested ); - -void -VMS__free( void *ptrToFree ); - -/*Allocates memory from the external system -- higher overhead - */ -void * -VMS__malloc_in_ext( size_t sizeRequested ); - -/*Frees memory that was allocated in the external system -- higher overhead - */ -void -VMS__free_in_ext( void *ptrToFree ); - - -MallocProlog * -VMS_ext__create_free_list(); - -void -VMS_ext__free_free_list( MallocProlog *freeListHead ); - +/* + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + * Created on November 14, 2009, 9:07 PM + */ + +#ifndef _VMALLOC_H +#define _VMALLOC_H + +#include +#include +#include "VMS_primitive_data_types.h" + +typedef struct _MallocProlog MallocProlog; + +struct _MallocProlog + { + MallocProlog *nextChunkInFreeList; + MallocProlog *prevChunkInFreeList; + MallocProlog *nextHigherInMem; + MallocProlog *nextLowerInMem; + }; +//MallocProlog + +typedef struct + { + MallocProlog *firstChunkInFreeList; + int32 numInList; //TODO not used + } +FreeListHead; + +void * +VMS__malloc( size_t sizeRequested ); + +void * +VMS__malloc_aligned( size_t sizeRequested ); + +void +VMS__free( void *ptrToFree ); + +/*Allocates memory from the external system -- higher overhead + */ +void * +VMS__malloc_in_ext( size_t sizeRequested ); + +/*Frees memory that was allocated in the external system -- higher overhead + */ +void +VMS__free_in_ext( void *ptrToFree ); + + +MallocProlog * +VMS_ext__create_free_list(); + +void +VMS_ext__free_free_list( MallocProlog *freeListHead ); + #endif \ No newline at end of file diff -r ad8213a8e916 -r c1784868dcea vutilities.c --- a/vutilities.c Thu Oct 06 16:24:17 2011 +0200 +++ b/vutilities.c Wed Jan 04 16:10:11 2012 -0800 @@ -1,25 +1,25 @@ -/* - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - * Created on November 14, 2009, 9:07 PM - */ - -#include -#include - -#include "VMS.h" - - -inline char * -VMS__strDup( char *str ) - { char *retStr; - - retStr = VMS__malloc( strlen(str) + 1 ); - if( str == NULL ) return str; - strcpy( retStr, str ); - - return retStr; - } +/* + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + * Created on November 14, 2009, 9:07 PM + */ + +#include +#include + +#include "VMS.h" + + +inline char * +VMS__strDup( char *str ) + { char *retStr; + + retStr = VMS__malloc( strlen(str) + 1 ); + if( str == NULL ) return str; + strcpy( retStr, str ); + + return retStr; + } diff -r ad8213a8e916 -r c1784868dcea vutilities.h --- a/vutilities.h Thu Oct 06 16:24:17 2011 +0200 +++ b/vutilities.h Wed Jan 04 16:10:11 2012 -0800 @@ -1,20 +1,20 @@ -/* - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - * Created on November 14, 2009, 9:07 PM - */ - - -#ifndef _UTILITIES_H -#define _UTILITIES_H - -#include -#include "VMS_primitive_data_types.h" - -inline char * -VMS__strDup( char *str ); - -#endif +/* + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + * Created on November 14, 2009, 9:07 PM + */ + + +#ifndef _UTILITIES_H +#define _UTILITIES_H + +#include +#include "VMS_primitive_data_types.h" + +inline char * +VMS__strDup( char *str ); + +#endif