# HG changeset patch # User Me # Date 1288497276 25200 # Node ID 8f7141a9272e10a0e7cd3c9d23d7db394510e410 # Parent 5388f1c2da6f17bfa3ba1bbbac67ea2062f7891a Added VMS__malloc and probes, and major re-factoring to separate mallocs diff -r 5388f1c2da6f -r 8f7141a9272e CoreLoop.c --- a/CoreLoop.c Thu Oct 14 17:07:23 2010 -0700 +++ b/CoreLoop.c Sat Oct 30 20:54:36 2010 -0700 @@ -83,11 +83,14 @@ CoreLoopStartPt: //Get virtual processor from queue - //_VMSWorkQ must be a global, static volatile var, so not kept in reg, + //The Q must be a global, static volatile var, so not kept in reg, // which forces reloading the pointer after each jmp to this point readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ ); + + if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; + int tries = 0; int gotLock = 0; while( currPr == NULL ) { //no VPs ready to animate, so run MasterVP --later make "try Master" @@ -101,24 +104,20 @@ //check if get the MasterLock gotLock = __sync_bool_compare_and_swap( &(_VMSMasterEnv->masterLock), \ UNLOCKED, LOCKED ); - if( gotLock ) - { - //run own MasterVP -- when its done, unlocks MasterLock and + { //run own MasterVP -- when its done, unlocks MasterLock and // jumps back to coreLoops's startPt currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; + if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 10000 ) + printf("10000 back to back MasterVP\n"); + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; break; //end while -- have a VP to animate now } - //Aug 24, 2010 -- changed so each core loop only gets work scheduled - // by its own master, so now stay in loop until get lock -// currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ ); tries++; -// if( tries % 10000 == 0 ) printf("empty tries: %d\n", tries/10000 ); - if( tries % READYTOANIMATE_RETRIES == 0 ) pthread_yield(); + if( tries > READYTOANIMATE_RETRIES ) { tries = 0; pthread_yield(); } } -// currPr->coreAnimatedBy = coreLoopThdParams->coreNum; //switch to virt procr's stack and frame ptr then jump to virt procr fn void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \ @@ -205,11 +204,17 @@ readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ ); if( currPr == NULL ) + { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] ) + printf("back to back MasterVP\n"); + _VMSMasterEnv->numMasterInARow[thisCoresIdx] = TRUE; currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; - + } + else + _VMSMasterEnv->numMasterInARow[thisCoresIdx] = FALSE; -// printf("core %d loop procr addr: %d\n", coreLoopThdParams->coreNum, \ -// (int)currPr ); fflush(stdin); + PRINT2_DEBUG("core %d loop procr addr: %d\n",\ + coreLoopThdParams->coreNum, \ + (int)currPr ) //switch to virt procr's stack and frame ptr then jump to virt procr void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \ diff -r 5388f1c2da6f -r 8f7141a9272e MasterLoop.c --- a/MasterLoop.c Thu Oct 14 17:07:23 2010 -0700 +++ b/MasterLoop.c Sat Oct 30 20:54:36 2010 -0700 @@ -185,4 +185,3 @@ );//can probably make clobber list empty -- but safe for now } - diff -r 5388f1c2da6f -r 8f7141a9272e VMS.c --- a/VMS.c Thu Oct 14 17:07:23 2010 -0700 +++ b/VMS.c Sat Oct 30 20:54:36 2010 -0700 @@ -6,7 +6,9 @@ #include #include +#include #include +#include #include "VMS.h" #include "Queue_impl/BlockingQueue.h" @@ -28,6 +30,10 @@ void create_the_coreLoop_OS_threads(); +MallocProlog * +create_free_list(); + + pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER; @@ -100,67 +106,47 @@ allSchedSlots = malloc( NUM_CORES * sizeof(SchedSlot *) ); for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) - { + { //running in main thread -- normal malloc inside makeSRSWQ readyToAnimateQs[ coreIdx ] = makeSRSWQ(); - //Q: should give masterVP core-specific into as its init data? - masterVPs[ coreIdx ] = VMS__create_procr( &masterLoop, masterEnv ); + //Q: should give masterVP core-specific info as its init data? + masterVPs[ coreIdx ] = VMS_ext__create_procr( &masterLoop, masterEnv ); masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx; allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core + _VMSMasterEnv->numMasterInARow[ coreIdx ] = FALSE; } _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs; _VMSMasterEnv->masterVPs = masterVPs; + _VMSMasterEnv->masterLock = UNLOCKED; _VMSMasterEnv->allSchedSlots = allSchedSlots; - + _VMSMasterEnv->numProcrsCreated = 0; //Aug 19, 2010: no longer need to place initial masterVP into queue // because coreLoop now controls -- animates its masterVP when no work + _VMSMasterEnv->freeListHead = VMS__create_free_list(); + _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet - //==================== malloc substitute ======================== - // - //Testing whether malloc is using thread-local storage and therefore - // causing unreliable behavior. - //Just allocate a massive chunk of memory and roll own malloc/free and - // make app use VMS__malloc_to, which will suspend and perform malloc - // in the master, taking from this massive chunk. + //============================= MEASUREMENT STUFF ======================== + #ifdef STATS__TURN_ON_PROBES + //creates intervalProbes array and sets pointer to it in masterEnv too + _VMSMasterEnv->dynIntervalProbesInfo = + makeDynArrayOfSize( &(_VMSMasterEnv->intervalProbes), 20 ); -// initFreeList(); + _VMSMasterEnv->probeNameHashTbl = makeHashTable( 1000, NULL ); + _VMSMasterEnv->masterCreateProbeID = + VMS_ext__record_time_point_into_new_probe( "masterCreateProbe" ); + //Also put creation time directly into master env, for fast retrieval + struct timeval timeStamp; + gettimeofday( &(timeStamp), NULL); + _VMSMasterEnv->createPtInSecs = + timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0); + #endif + //======================================================================== } -/* -void -initMasterMalloc() - { - _VMSMasterEnv->mallocChunk = malloc( MASSIVE_MALLOC_SIZE ); - - //The free-list element is the first several locations of an - // allocated chunk -- the address given to the application is pre- - // pended with both the ownership structure and the free-list struc. - //So, write the values of these into the first locations of - // mallocChunk -- which marks it as free & puts in its size. - listElem = (FreeListElem *)_VMSMasterEnv->mallocChunk; - listElem->size = MASSIVE_MALLOC_SIZE - NUM_PREPEND_BYTES - listElem->next = NULL; - } - -void -dissipateMasterMalloc() - { - //Just foo code -- to get going -- doing as if free list were link-list - currElem = _VMSMasterEnv->freeList; - while( currElem != NULL ) - { - nextElem = currElem->next; - masterFree( currElem ); - currElem = nextElem; - } - free( _VMSMasterEnv->freeList ); - } - */ - SchedSlot ** create_sched_slots() { SchedSlot **schedSlots; @@ -213,7 +199,7 @@ thdAttrs, &coreLoop, (void *)(coreLoopThdParams[coreIdx]) ); - if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(0);} + if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);} } } @@ -225,12 +211,6 @@ VMS__start_the_work_then_wait_until_done() { int coreIdx; //Start the core loops running -//=========================================================================== - TSCount startCount, endCount; - unsigned long long count = 0, freq = 0; - double runTime; - - startCount = getTSCount(); //tell the core loop threads that setup is complete //get lock, to lock out any threads still starting up -- they'll see @@ -251,14 +231,6 @@ //NOTE: do not clean up VMS env here -- semantic layer has to have // a chance to clean up its environment first, then do a call to free // the Master env and rest of VMS locations - - - endCount = getTSCount(); - count = endCount - startCount; - - runTime = (double)count / (double)TSCOUNT_FREQ; - - printf("\n Time startup to shutdown: %f\n", runTime); fflush( stdin ); } /*Only difference between version with an OS thread pinned to each core and @@ -285,37 +257,73 @@ * animator state to return to -- * */ -VirtProcr * -VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) - { VirtProcr *newPr; - char *stackLocs, *stackPtr; +inline VirtProcr * +create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, + void *initialData, char *stackLocs ) + { + char *stackPtr; - newPr = malloc( sizeof(VirtProcr) ); - newPr->procrID = numProcrsCreated++; + newPr->procrID = _VMSMasterEnv->numProcrsCreated++; newPr->nextInstrPt = fnPtr; newPr->initialData = initialData; newPr->requests = NULL; newPr->schedSlot = NULL; -// newPr->coreLoopStartPt = _VMSMasterEnv->coreLoopStartPt; //fnPtr takes two params -- void *initData & void *animProcr //alloc stack locations, make stackPtr be the highest addr minus room // for 2 params + return addr. Return addr (NULL) is in loc pointed to // by stackPtr, initData at stackPtr + 4 bytes, animatingPr just above - stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); - if(stackLocs == 0) - {perror("malloc stack"); exit(1);} - newPr->startOfStack = stackLocs; stackPtr = ( (char *)stackLocs + VIRT_PROCR_STACK_SIZE - 0x10 ); + //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp *( (int *)stackPtr + 2 ) = (int) newPr; //rightmost param -- 32bit pointer *( (int *)stackPtr + 1 ) = (int) initialData; //next param to left newPr->stackPtr = stackPtr; //core loop will switch to this, then newPr->framePtr = stackPtr; //suspend loop will save new stack & frame ptr + //============================= MEASUREMENT STUFF ======================== + #ifdef STATS__TURN_ON_PROBES + struct timeval timeStamp; + gettimeofday( &(timeStamp), NULL); + newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0); + #endif + //======================================================================== + return newPr; } +inline VirtProcr * +VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) + { VirtProcr *newPr; + char *stackLocs; + + newPr = VMS__malloc( sizeof(VirtProcr) ); + stackLocs = VMS__malloc( VIRT_PROCR_STACK_SIZE ); + if( stackLocs == 0 ) + { perror("VMS__malloc stack"); exit(1); } + newPr->startOfStack = stackLocs; + + return create_procr_helper( newPr, fnPtr, initialData, stackLocs ); + } + +/* "ext" designates that it's for use outside the VMS system -- should only + * be called from main thread or other thread -- never from code animated by + * a VMS virtual processor. + */ +inline VirtProcr * +VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) + { VirtProcr *newPr; + char *stackLocs; + + newPr = malloc( sizeof(VirtProcr) ); + stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); + if( stackLocs == 0 ) + { perror("malloc stack"); exit(1); } + newPr->startOfStack = stackLocs; + + return create_procr_helper( newPr, fnPtr, initialData, stackLocs ); + } + /*there is a label inside this function -- save the addr of this label in * the callingPr struc, as the pick-up point from which to start the next @@ -339,7 +347,6 @@ //return ownership of the virt procr and sched slot to Master virt pr animatingPr->schedSlot->workIsDone = TRUE; -// coreIdx = callingPr->coreAnimatedBy; stackPtrAddr = &(animatingPr->stackPtr); framePtrAddr = &(animatingPr->framePtr); @@ -390,6 +397,31 @@ +/*For this implementation of VMS, it may not make much sense to have the + * system of requests for creating a new processor done this way.. but over + * the scope of single-master, multi-master, mult-tasking, OS-implementing, + * distributed-memory, and so on, this gives VMS implementation a chance to + * do stuff before suspend, in the AppVP, and in the Master before the plugin + * is called, as well as in the lang-lib before this is called, and in the + * plugin. So, this gives both VMS and language implementations a chance to + * intercept at various points and do order-dependent stuff. + *Having a standard VMSNewPrReqData struc allows the language to create and + * free the struc, while VMS knows how to get the newPr if it wants it, and + * it lets the lang have lang-specific data related to creation transported + * to the plugin. + */ +void +VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ) + { VMSReqst req; + + req.reqType = createReq; + req.semReqData = semReqData; + req.nextReqst = reqstingPr->requests; + reqstingPr->requests = &req; + + VMS__suspend_procr( reqstingPr ); + } + /* *This adds a request to dissipate, then suspends the processor so that the @@ -414,80 +446,93 @@ */ void VMS__dissipate_procr( VirtProcr *procrToDissipate ) - { VMSReqst *req; + { VMSReqst req; - req = malloc( sizeof(VMSReqst) ); -// req->virtProcrFrom = callingPr; - req->reqType = dissipate; - req->nextReqst = procrToDissipate->requests; - procrToDissipate->requests = req; - + req.reqType = dissipate; + req.nextReqst = procrToDissipate->requests; + procrToDissipate->requests = &req; + VMS__suspend_procr( procrToDissipate ); -} + } + + +/* "ext" designates that it's for use outside the VMS system -- should only + * be called from main thread or other thread -- never from code animated by + * a VMS virtual processor. + * + *Use this version to dissipate VPs created outside the VMS system. + */ +void +VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ) + { + //NOTE: initialData was given to the processor, so should either have + // been alloc'd with VMS__malloc, or freed by the level above animPr. + //So, all that's left to free here is the stack and the VirtProcr struc + // itself + //Note, should not stack-allocate initial data -- no guarantee, in + // general that creating processor will outlive ones it creates. + free( procrToDissipate->startOfStack ); + free( procrToDissipate ); + } + /*This inserts the semantic-layer's request data into standard VMS carrier + * request data-struct is allocated on stack of this call & ptr to it sent + * to plugin */ inline void VMS__add_sem_request( void *semReqData, VirtProcr *callingPr ) - { VMSReqst *req; + { VMSReqst req; - req = malloc( sizeof(VMSReqst) ); -// req->virtProcrFrom = callingPr; - req->reqType = semantic; - req->semReqData = semReqData; - req->nextReqst = callingPr->requests; - callingPr->requests = req; + req.reqType = semantic; + req.semReqData = semReqData; + req.nextReqst = callingPr->requests; + callingPr->requests = &req; } +/*This inserts the semantic-layer's request data into standard VMS carrier + * request data-struct is allocated on stack of this call & ptr to it sent + * to plugin + *Then it does suspend, to cause request to be sent. + */ +inline void +VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ) + { VMSReqst req; -/*Use this to get first request before starting request handler's loop + req.reqType = semantic; + req.semReqData = semReqData; + req.nextReqst = callingPr->requests; + callingPr->requests = &req; + + VMS__suspend_procr( callingPr ); + } + + +inline void +VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ) + { VMSReqst req; + + req.reqType = VMSSemantic; + req.semReqData = semReqData; + req.nextReqst = callingPr->requests; //gab any other preceeding + callingPr->requests = &req; + + VMS__suspend_procr( callingPr ); + } + + +/* */ VMSReqst * -VMS__take_top_request_from( VirtProcr *procrWithReq ) - { VMSReqst *req; - - req = procrWithReq->requests; - if( req == NULL ) return req; - - procrWithReq->requests = procrWithReq->requests->nextReqst; - return req; - } - -/*A subtle bug due to freeing then accessing "next" after freed caused this - * form of call to be put in -- so call this at end of request handler loop - * that iterates through the requests. - */ -VMSReqst * -VMS__free_top_and_give_next_request_from( VirtProcr *procrWithReq ) +VMS__take_next_request_out_of( VirtProcr *procrWithReq ) { VMSReqst *req; req = procrWithReq->requests; if( req == NULL ) return NULL; procrWithReq->requests = procrWithReq->requests->nextReqst; - VMS__free_request( req ); - return procrWithReq->requests; - } - - -//TODO: add a semantic-layer supplied "freer" for the semantic-data portion -// of a request -- IE call with both a virt procr and a fn-ptr to request -// freer (also maybe put sem request freer as a field in virt procr?) -//MeasVMS relies right now on this only freeing VMS layer of request -- the -// semantic portion of request is alloc'd and freed by request handler -void -VMS__free_request( VMSReqst *req ) - { - free( req ); - } - - - -inline int -VMS__isSemanticReqst( VMSReqst *req ) - { - return ( req->reqType == semantic ); + return req; } @@ -497,36 +542,44 @@ return req->semReqData; } -inline int -VMS__isDissipateReqst( VMSReqst *req ) - { - return ( req->reqType == dissipate ); - } -inline int -VMS__isCreateReqst( VMSReqst *req ) - { - return ( req->reqType == regCreated ); - } -void -VMS__send_req_to_register_new_procr(VirtProcr *newPr, VirtProcr *reqstingPr) - { VMSReqst *req; +/* This is for OS requests and VMS infrastructure requests, such as to create + * a probe -- a probe is inside the heart of VMS-core, it's not part of any + * language -- but it's also a semantic thing that's triggered from and used + * in the application.. so it crosses abstractions.. so, need some special + * pattern here for handling such requests. + * This is called from the language's request handler when it sees a request + * of type VMSSemReq + */ +void inline +VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, + ResumePrFnPtr resumePrFnPtr ) + { VMSSemReq *semReq; + IntervalProbe *newProbe; + int32 nameLen; - req = malloc( sizeof(VMSReqst) ); - req->reqType = regCreated; - req->semReqData = newPr; - req->nextReqst = reqstingPr->requests; - reqstingPr->requests = req; + semReq = req->semReqData; - VMS__suspend_procr( reqstingPr ); + newProbe = VMS__malloc( sizeof(IntervalProbe) ); + nameLen = strlen( semReq->nameStr ); + newProbe->nameStr = VMS__malloc( nameLen ); + memcpy( newProbe->nameStr, semReq->nameStr, nameLen ); + newProbe->hist = NULL; + newProbe->schedChoiceWasRecorded = FALSE; + newProbe->probeID = + addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); + + requestingPr->dataReturnedFromReq = newProbe; + + (*resumePrFnPtr)( requestingPr, semEnv ); } /*This must be called by the request handler plugin -- it cannot be called * from the semantic library "dissipate processor" function -- instead, the - * semantic layer has to generate a request for the plug-in to call this + * semantic layer has to generate a request, and the plug-in calls this * function. *The reason is that this frees the virtual processor's stack -- which is * still in use inside semantic library calls! @@ -548,15 +601,15 @@ // any locations that it is (was) sole owner of //TODO: implement VMS__malloc system, including "give up ownership" - //The dissipate request might still be attached, so remove and free it - VMS__free_top_and_give_next_request_from( animatingPr ); //NOTE: initialData was given to the processor, so should either have // been alloc'd with VMS__malloc, or freed by the level above animPr. //So, all that's left to free here is the stack and the VirtProcr struc // itself - free( animatingPr->startOfStack ); - free( animatingPr ); + //Note, should not stack-allocate initial data -- no guarantee, in + // general that creating processor will outlive ones it creates. + VMS__free( animatingPr->startOfStack ); + VMS__free( animatingPr ); } @@ -603,7 +656,7 @@ //create the shutdown processors, one for each core loop -- put them // directly into the Q -- each core will die when gets one for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) - { + { //Note, this is running in the master shutDownPr = VMS__create_procr( &endOSThreadFn, NULL ); writeSRSWQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] ); } @@ -664,8 +717,8 @@ for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) { freeSRSWQ( readyToAnimateQs[ coreIdx ] ); - - VMS__handle_dissipate_reqst( masterVPs[ coreIdx ] ); + //master VPs were created external to VMS, so use external free + VMS_ext__dissipate_procr( masterVPs[ coreIdx ] ); freeSchedSlots( allSchedSlots[ coreIdx ] ); } @@ -673,20 +726,15 @@ free( _VMSMasterEnv->readyToAnimateQs ); free( _VMSMasterEnv->masterVPs ); free( _VMSMasterEnv->allSchedSlots ); + + VMS_ext__free_free_list( _VMSMasterEnv->freeListHead ); + + //============================= MEASUREMENT STUFF ======================== + #ifdef STATS__TURN_ON_PROBES + freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &free ); + #endif + //======================================================================== free( _VMSMasterEnv ); } - -//=========================================================================== - -inline TSCount getTSCount() - { unsigned int low, high; - TSCount out; - - saveTimeStampCountInto( low, high ); - out = high; - out = (out << 32) + low; - return out; - } - diff -r 5388f1c2da6f -r 8f7141a9272e VMS.h --- a/VMS.h Thu Oct 14 17:07:23 2010 -0700 +++ b/VMS.h Sat Oct 30 20:54:36 2010 -0700 @@ -13,17 +13,31 @@ #include "VMS_primitive_data_types.h" #include "Queue_impl/BlockingQueue.h" #include "Histogram/Histogram.h" +#include "DynArray/DynArray.h" +#include "Hash_impl/PrivateHash.h" +#include "vmalloc.h" + #include +#include + +//=============================== Debug =================================== //When SEQUENTIAL is defined, VMS does sequential exe in the main thread // It still does co-routines and all the mechanisms are the same, it just // has only a single thread and animates VPs one at a time //#define SEQUENTIAL -#define PRINT_DEBUG(msg) //printf(msg); fflush(stdin); +#define PRINT_DEBUG(msg)// printf(msg); fflush(stdin); #define PRINT1_DEBUG(msg, param) //printf(msg, param); fflush(stdin); #define PRINT2_DEBUG(msg, p1, p2) //printf(msg, p1, p2); fflush(stdin); +#define PRINT_ERROR(msg) printf(msg); fflush(stdin); +#define PRINT1_ERROR(msg, param) printf(msg, param); fflush(stdin); +#define PRINT2_ERROR(msg, p1, p2) printf(msg, p1, p2); fflush(stdin); + + +//=========================== STATS ======================= + //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and // compiled-in that saves the low part of the time stamp count just before // suspending a processor and just after resuming that processor. It is @@ -35,6 +49,8 @@ #define NUM_TSC_ROUND_TRIPS 10 + +//========================= Hardware related Constants ===================== //This value is the number of hardware threads in the shared memory // machine #define NUM_CORES 4 @@ -47,39 +63,75 @@ #define READYTOANIMATE_RETRIES 10000 // stack -#define VIRT_PROCR_STACK_SIZE 0x10000 +#define VIRT_PROCR_STACK_SIZE 0x4000 - //256M of total memory for VMS__malloc -#define MASSIVE_MALLOC_SIZE 0x10000000 + // memory for VMS__malloc -- 256M +#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 -#define NUM_PREPEND_BYTES sizeof(FreeListElem) + sizeof(ownerElem); + +//============================== #define SUCCESS 0 -#define writeVMSQ writeCASQ -#define readVMSQ readCASQ -#define makeVMSQ makeCASQ -#define VMSQueueStruc CASQueueStruc +#define writeVMSQ writeSRSWQ +#define readVMSQ readSRSWQ +#define makeVMSQ makeSRSWQ +#define VMSQueueStruc SRSWQueueStruc -//#define thdAttrs NULL //For PThreads -typedef struct _SchedSlot SchedSlot; -typedef struct _VMSReqst VMSReqst; -typedef struct _VirtProcr VirtProcr; + +//=========================================================================== +typedef unsigned long long TSCount; + +typedef struct _SchedSlot SchedSlot; +typedef struct _VMSReqst VMSReqst; +typedef struct _VirtProcr VirtProcr; +typedef struct _IntervalProbe IntervalProbe; typedef VirtProcr * (*SlaveScheduler) ( void *, int ); //semEnv, coreIdx typedef void (*RequestHandler) ( VirtProcr *, void * ); //prWReqst, semEnv typedef void (*VirtProcrFnPtr) ( void *, VirtProcr * ); //initData, animPr typedef void VirtProcrFn ( void *, VirtProcr * ); //initData, animPr +typedef void (*ResumePrFnPtr) ( VirtProcr *, void * ); + + +//============= Requests =========== +// + +enum VMSReqstType //avoid starting enums at 0, for debug reasons + { + semantic = 1, + createReq, + dissipate, + VMSSemantic //goes with VMSSemReqst below + }; + +struct _VMSReqst + { + enum VMSReqstType reqType;//used for dissipate and in future for IO requests + void *semReqData; + + VMSReqst *nextReqst; + }; +//VMSReqst + +enum VMSSemReqstType //These are equivalent to semantic requests, but for + { // VMS's services available directly to app, like OS + createProbe = 1, // and probe services -- like a VMS-wide built-in lang + openFile, + otherIO + }; typedef struct - { - void *endThdPt; - unsigned int coreNum; + { enum VMSSemReqstType reqType; + VirtProcr *requestingPr; + char *nameStr; //for create probe } -ThdParams; + VMSSemReq; +//==================== Core data structures =================== + struct _SchedSlot { int workIsDone; @@ -87,24 +139,6 @@ VirtProcr *procrAssignedToSlot; }; //SchedSlot - -enum ReqstType - { - semantic = 1, - dissipate, - regCreated, - IO - }; - -struct _VMSReqst - { -// VirtProcr *virtProcrFrom; - enum ReqstType reqType;//used for dissipate and in future for IO requests - void *semReqData; - - VMSReqst *nextReqst; - }; -//VMSReqst struct _VirtProcr { int procrID; //for debugging -- count up each time create @@ -123,9 +157,10 @@ SchedSlot *schedSlot; VMSReqst *requests; - void *semanticData; + void *semanticData; //this lives here for the life of VP + void *dataReturnedFromReq;//values returned from plugin to VP go here - //============================= MEASUREMENT STUFF ======================== + //=========== MEASUREMENT STUFF ========== #ifdef MEAS__TIME_STAMP_SUSP unsigned int preSuspTSCLow; unsigned int postSuspTSCLow; @@ -134,12 +169,12 @@ unsigned int startMasterTSCLow; unsigned int endMasterTSCLow; #endif - //======================================================================== + + float64 createPtInSecs; //have space but don't use on some configs }; //VirtProcr - typedef struct { SlaveScheduler slaveScheduler; @@ -151,35 +186,61 @@ void *semanticEnv; void *OSEventStruc; //for future, when add I/O to BLIS + MallocProlog *freeListHead; + int32 amtOfOutstandingMem; //total currently allocated void *coreLoopStartPt;//addr to jump to to re-enter coreLoop void *coreLoopEndPt; //addr to jump to to shut down a coreLoop - int setupComplete; - int masterLock; + int32 setupComplete; + int32 masterLock; + int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP + int32 numProcrsCreated; //gives ordering to processor creation + + //=========== MEASUREMENT STUFF ============= + IntervalProbe **intervalProbes; + DynArrayInfo *dynIntervalProbesInfo; + HashTable *probeNameHashTbl; + int32 masterCreateProbeID; + float64 createPtInSecs; } MasterEnv; -//========================================================== + + +//======================= OS Thread related =============================== void * coreLoop( void *paramsIn ); //standard PThreads fn prototype void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype void masterLoop( void *initData, VirtProcr *masterPr ); -//===================== Global Vars =================== - +typedef struct + { + void *endThdPt; + unsigned int coreNum; + } +ThdParams; pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state ThdParams *coreLoopThdParams [ NUM_CORES ]; pthread_mutex_t suspendLock; pthread_cond_t suspend_cond; + + +//===================== Global Vars =================== + volatile MasterEnv *_VMSMasterEnv; -//========================== + + + +//=========================== Function Prototypes ========================= + +//============== Setup and shutdown ============= void VMS__init(); @@ -195,16 +256,28 @@ VirtProcr * VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); + //Use this to create processor inside entry point & other places outside + // the VMS system boundary (IE, not run in slave nor Master) +VirtProcr * +VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); + VirtProcr * VMS__create_the_shutdown_procr(); -//========================== +void +VMS__cleanup_after_shutdown(); + + +//============== Request Related =============== + +void +VMS__suspend_procr( VirtProcr *callingPr ); + inline void VMS__add_sem_request( void *semReqData, VirtProcr *callingPr ); void -VMS__send_req_to_register_new_procr( VirtProcr *newPrToRegister, - VirtProcr *reqstingPr ); +VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ); void VMS__free_request( VMSReqst *req ); @@ -216,7 +289,7 @@ VMS__take_top_request_from( VirtProcr *reqstingPr ); VMSReqst * -VMS__free_top_and_give_next_request_from( VirtProcr *procrWithReq ); +VMS__take_next_request_out_of( VirtProcr *procrWithReq ); inline void * VMS__take_sem_reqst_from( VMSReqst *req ); @@ -232,25 +305,15 @@ //========================== -void -VMS__suspend_procr( VirtProcr *callingPr ); - -void +void inline VMS__dissipate_procr( VirtProcr *prToDissipate ); void VMS__handle_dissipate_reqst( VirtProcr *procrToDissipate ); -void -VMS__cleanup_after_shutdown(); -//============================= Statistics ================================== -typedef unsigned long long TSCount; - - //Frequency of TS counts - //TODO: change freq for each machine -#define TSCOUNT_FREQ 3180000000 +//===================== RDTSC wrapper ================== #define saveTimeStampCountInto(low, high) \ asm volatile("RDTSC; \ @@ -269,11 +332,9 @@ /* clobber */ : "%eax", "%edx" \ ); -inline TSCount getTSCount(); +//======================== STATS ====================== -//===================== Debug ========================== -int numProcrsCreated; - +#include "probes.h" #endif /* _VMS_H */ diff -r 5388f1c2da6f -r 8f7141a9272e probes.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/probes.c Sat Oct 30 20:54:36 2010 -0700 @@ -0,0 +1,342 @@ +/* + * Copyright 2010 OpenSourceStewardshipFoundation + * + * Licensed under BSD + */ + +#include +#include +#include +#include + +#include "VMS.h" +#include "Queue_impl/BlockingQueue.h" +#include "Histogram/Histogram.h" + + +//================================ STATS ==================================== + +inline TSCount getTSCount() + { unsigned int low, high; + TSCount out; + + saveTimeStampCountInto( low, high ); + out = high; + out = (out << 32) + low; + return out; + } + + + +//==================== Probes ================= +#ifdef STATS__USE_TSC_PROBES +int32 +VMS__create_single_interval_probe( char *nameStr ) + { IntervalProbe *newProbe; + int32 idx; + + newProbe = malloc( sizeof(IntervalProbe) ); + newProbe->nameStr = nameStr; //caller frees if not constant on stack + newProbe->hist = NULL; + idx = addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); + return idx; + } + +int32 +VMS__create_histogram_probe( int32 numBins, float32 startValue, + float32 binWidth, char *nameStr ) + { IntervalProbe *newProbe; + int32 idx; + FloatHist *hist; + + idx = VMS__create_single_interval_probe( nameStr ); + newProbe = _VMSMasterEnv->intervalProbes[ idx ]; + + hist = makeFloatHistogram( numBins, startValue, binWidth ); + newProbe->hist = hist; + return idx; + } + +void +VMS_impl__record_interval_start_in_probe( int32 probeID ) + { IntervalProbe *probe; + + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + probe->startStamp = getTSCount(); + } + +void +VMS_impl__record_interval_end_in_probe( int32 probeID ) + { IntervalProbe *probe; + TSCount endStamp; + + endStamp = getTSCount(); + + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + probe->endStamp = endStamp; + + if( probe->hist != NULL ) + { TSCount interval = probe->endStamp - probe->startStamp; + //if the interval is sane, then add to histogram + if( interval < probe->hist->endOfRange * 10 ) + addToFloatHist( interval, probe->hist ); + } + } + +void +VMS_impl__print_stats_of_probe( int32 probeID ) + { IntervalProbe *probe; + + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + + if( probe->hist == NULL ) + { + printf("probe: %s, interval: %.6lf\n", probe->nameStr,probe->interval); + } + + else + { + printf( "probe: %s\n", probe->nameStr ); + printFloatHist( probe->hist ); + } + } +#else +#ifdef STATS__USE_DBL_PROBES + +/* + * In practice, probe operations are called from the app, from inside slaves + * -- so have to be sure each probe is single-VP owned, and be sure that + * any place common structures are modified it's done inside the master. + * So -- the only place common structures are modified is during creation. + * after that, all mods are to individual instances. + * + * Thniking perhaps should change the semantics to be that probes are + * attached to the virtual processor -- and then everything is guaranteed + * to be isolated -- except then can't take any intervals that span VPs, + * and would have to transfer the probes to Master env when VP dissipates.. + * gets messy.. + * + * For now, just making so that probe creation causes a suspend, so that + * the dynamic array in the master env is only modified from the master + * + */ +IntervalProbe * +create_generic_probe( char *nameStr, VirtProcr *animPr ) + { IntervalProbe *newProbe; + int32 idx; + VMSSemReq reqData; + + reqData.reqType = createProbe; + reqData.nameStr = nameStr; + + VMS__send_VMSSem_request( reqData, animPr ); + + return animPr->dataReturnedFromReq; + } + +int32 +VMS_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr ) + { IntervalProbe *newProbe; + struct timeval *startStamp; + float64 startSecs; + + newProbe = create_generic_probe( nameStr, animPr ); + newProbe->endSecs = 0; + + gettimeofday( &(newProbe->startStamp), NULL); + + //turn into a double + startStamp = &(newProbe->startStamp); + startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); + newProbe->startSecs = startSecs; + + return newProbe->probeID; + } + +int32 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ) + { IntervalProbe *newProbe; + + newProbe = create_generic_probe( nameStr, animPr ); + + return newProbe->probeID; + } + +int32 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, + float64 binWidth, char *nameStr, VirtProcr *animPr ) + { IntervalProbe *newProbe; + DblHist *hist; + + newProbe = create_generic_probe( nameStr, animPr ); + + hist = makeDblHistogram( numBins, startValue, binWidth ); + newProbe->hist = hist; + return newProbe->probeID; + } + +void +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ) + { IntervalProbe *probe; + + //TODO: fix this To be in Master -- race condition + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + + addValueIntoTable(probe->nameStr, probe, _VMSMasterEnv->probeNameHashTbl); + } + +IntervalProbe * +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ) + { + //TODO: fix this To be in Master -- race condition + return getValueFromTable( probeName, _VMSMasterEnv->probeNameHashTbl ); + } + + +/*Everything is local to the animating procr, so no need for request, do + * work locally, in the anim Pr + */ +void +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animatingPr ) + { IntervalProbe *probe; + + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + probe->schedChoiceWasRecorded = TRUE; + probe->coreNum = animatingPr->coreAnimatedBy; + probe->procrID = animatingPr->procrID; + probe->procrCreateSecs = 0; + } + +/*Everything is local to the animating procr, so no need for request, do + * work locally, in the anim Pr + */ +void +VMS_impl__record_interval_start_in_probe( int32 probeID ) + { IntervalProbe *probe; + + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + gettimeofday( &(probe->startStamp), NULL ); + } + + +/*Everything is local to the animating procr, so no need for request, do + * work locally, in the anim Pr + */ +void +VMS_impl__record_interval_end_in_probe( int32 probeID ) + { IntervalProbe *probe; + struct timeval *endStamp, *startStamp; + double startSecs, endSecs; + + //possible seg-fault if array resized by diff core right after this + // one gets probe..? Something like that? Might be safe.. don't care + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + gettimeofday( &(probe->endStamp), NULL); + + //now turn into an interval held in a double + startStamp = &(probe->startStamp); + endStamp = &(probe->endStamp); + + startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); + endSecs = endStamp->tv_sec + ( endStamp->tv_usec / 1000000.0 ); + + probe->interval = endSecs - startSecs; + probe->startSecs = startSecs; + probe->endSecs = endSecs; + + if( probe->hist != NULL ) + { + //if the interval is sane, then add to histogram + if( probe->interval < probe->hist->endOfRange * 10 ) + addToDblHist( probe->interval, probe->hist ); + } + } + +void +print_probe_helper( IntervalProbe *probe ) + { + printf( "\nprobe: %s, ", probe->nameStr ); + + if( probe->schedChoiceWasRecorded ) + { printf( "coreNum: %d, procrID: %d, procrCreated: %.6lf | ", + probe->coreNum, probe->procrID, probe->procrCreateSecs ); + } + + if( probe->endSecs == 0 ) //just a single point in time + { + printf( " time point: %.6lf\n", + probe->startSecs - _VMSMasterEnv->createPtInSecs ); + } + else if( probe->hist == NULL ) //just an interval + { + printf( " startSecs: %.6lf, interval: %.6lf\n", + probe->startSecs - _VMSMasterEnv->createPtInSecs, probe->interval); + } + else //a full histogram of intervals + { + printDblHist( probe->hist ); + } + } + +//TODO: change so pass around pointer to probe instead of its array-index.. +// will eliminate chance for timing of resize to cause problems with the +// lookup -- even though don't think it actually can cause problems.. +// there's no need to pass index around -- have hash table for names, and +// only need it once, then have ptr to probe.. the thing about enum the +// index and use that as name is clunky in practice -- just hash. +void +VMS_impl__print_stats_of_probe( int32 probeID ) + { IntervalProbe *probe; + + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + + print_probe_helper( probe ); + } + + + +void +generic_print_probe( void *_probe ) + { IntervalProbe *probe; + + probe = (IntervalProbe *)_probe; + print_probe_helper( probe ); + } + +void +VMS_impl__print_stats_of_all_probes() + { IntervalProbe *probe; + + forAllInDynArrayDo( _VMSMasterEnv->dynIntervalProbesInfo, + &generic_print_probe ); + fflush( stdout ); + } +#endif +#endif + +/* Junk left over from when trying the different ways to get time stamps.. + struct timeval tim; + gettimeofday(&tim, NULL); + double t1=tim.tv_sec+(tim.tv_usec/1000000.0); + + clock_t startClockStamp = clock(); + + TSCount startMultStamp = getTSCount(); +*/ + +/* + TSCount endMultStamp = getTSCount(); + + dividerParams->numTSCsToExe = endMultStamp - startMultStamp; + printf("\ntime to execute: %d\n", endMultStamp - startMultStamp); + + //================================================================== + clock_t endClockStamp = clock(); + printf("%.4lf seconds of processing\n", + (endClockStamp - startClockStamp)/(double)CLOCKS_PER_SEC); + + //================================================================== + gettimeofday(&tim, NULL); + double t2=tim.tv_sec+(tim.tv_usec/1000000.0); + printf("%.6lf seconds elapsed\n", t2-t1); +*/ diff -r 5388f1c2da6f -r 8f7141a9272e probes.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/probes.h Sat Oct 30 20:54:36 2010 -0700 @@ -0,0 +1,193 @@ +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + */ + +#ifndef _PROBES_H +#define _PROBES_H +#define __USE_GNU + +#include "VMS_primitive_data_types.h" + +#include + + //turns on the probe-instrumentation in the application -- when not + // defined, the calls to the probe functions turn into comments +//#define STATS__ENABLE_PROBES + + //when STATS__TURN_ON_PROBES is defined allows using probes to measure + // time intervals. The probes are macros that only compile to something + // when STATS__TURN_ON_PROBES is defined. The probes are saved in the + // master env -- but only when this is defined. + //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday +#define STATS__TURN_ON_PROBES +//#define STATS__USE_TSC_PROBES +#define STATS__USE_DBL_PROBES + +//typedef struct _IntervalProbe IntervalProbe; //in VMS.h + +struct _IntervalProbe + { + char *nameStr; + int32 probeID; + + int32 schedChoiceWasRecorded; + int32 coreNum; + int32 procrID; + float64 procrCreateSecs; + + #ifdef STATS__USE_TSC_PROBES + TSCount startStamp; + TSCount endStamp; + #else + struct timeval startStamp; + struct timeval endStamp; + #endif + float64 startSecs; + float64 endSecs; + float64 interval; + DblHist *hist;//if NULL, then is single interval probe + }; + + +//============================= Statistics ================================== + + //Frequency of TS counts + //TODO: change freq for each machine +#define TSCOUNT_FREQ 3180000000 + +inline TSCount getTSCount(); + + +//======================== Probes ============================= +// +// Use macros to allow turning probes off with a #define switch +#ifdef STATS__ENABLE_PROBES +int32 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ + VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) + +int32 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr); +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ + VMS_ext_impl__record_time_point_into_new_probe_impl( nameStr ) + + +int32 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); +#define VMS__create_single_interval_probe( nameStr, animPr ) \ + VMS_impl__create_single_interval_probe( nameStr, animPr ) + + +int32 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, + float64 binWidth, char *nameStr, VirtProcr *animPr ); +#define VMS__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animPr ) \ + VMS_impl__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animPr ) + +void +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); +#define VMS__index_probe_by_its_name( probeID, animPr ) \ + VMS_impl__index_probe_by_its_name( probeID, animPr ) + +IntervalProbe * +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); +#define VMS__get_probe_by_name( probeID, animPr ) \ + VMS_impl__get_probe_by_name( probeName, animPr ) + +void +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ + VMS_impl__record_sched_choice_into_probe( probeID, animPr ) + +void +VMS_impl__record_interval_start_in_probe( int32 probeID ); +#define VMS__record_interval_start_in_probe( probeID ) \ + VMS_impl__record_interval_start_in_probe( probeID ) + +void +VMS_impl__record_interval_end_in_probe( int32 probeID ); +#define VMS__record_interval_end_in_probe( probeID ) \ + VMS_impl__record_interval_end_in_probe( probeID ) + +void +VMS_impl__print_stats_of_probe( int32 probeID ); +#define VMS__print_stats_of_probe( probeID ) \ + VMS_impl__print_stats_of_probe( probeID ) + +void +VMS_impl__print_stats_of_all_probes(); +#define VMS__print_stats_of_all_probes \ + VMS_impl__print_stats_of_all_probes + + +#else +int32 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ + 0 /* do nothing */ + +int32 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr); +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ + 0 /* do nothing */ + + +int32 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); +#define VMS__create_single_interval_probe( nameStr, animPr ) \ + 0 /* do nothing */ + + +int32 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, + float64 binWidth, char *nameStr, VirtProcr *animPr ); +#define VMS__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animPr ) \ + 0 /* do nothing */ + +void +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); +#define VMS__index_probe_by_its_name( probeID, animPr ) \ + /* do nothing */ + +IntervalProbe * +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); +#define VMS__get_probe_by_name( probeID, animPr ) \ + NULL /* do nothing */ + +void +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ + /* do nothing */ + +void +VMS_impl__record_interval_start_in_probe( int32 probeID ); +#define VMS__record_interval_start_in_probe( probeID ) \ + /* do nothing */ + +void +VMS_impl__record_interval_end_in_probe( int32 probeID ); +#define VMS__record_interval_end_in_probe( probeID ) \ + /* do nothing */ + +void +VMS_impl__print_stats_of_probe( int32 probeID ); +#define VMS__print_stats_of_probe( probeID ) \ + /* do nothing */ + +void +VMS_impl__print_stats_of_all_probes(); +#define VMS__print_stats_of_all_probes \ + /* do nothing */ + +#endif /* defined STATS__ENABLE_PROBES */ + +#endif /* _PROBES_H */ + diff -r 5388f1c2da6f -r 8f7141a9272e vmalloc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vmalloc.c Sat Oct 30 20:54:36 2010 -0700 @@ -0,0 +1,256 @@ +/* + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + * Created on November 14, 2009, 9:07 PM + */ + +#include + +#include "VMS.h" + +/*Helper function + *Insert a newly generated free chunk into the first spot on the free list. + * The chunk is cast as a MallocProlog, so the various pointers in it are + * accessed with C's help -- and the size of the prolog is easily added to + * the pointer when a chunk is returned to the app -- so C handles changes + * in pointer sizes among machines. + * + *The list head is a normal MallocProlog struct -- identified by its + * prevChunkInFreeList being NULL -- the only one. + * + *The end of the list is identified by next chunk being NULL, as usual. + */ +void inline +add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) + { + chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; + if( chunk->nextChunkInFreeList != NULL ) //if not last in free list + chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; + chunk->prevChunkInFreeList = listHead; + listHead->nextChunkInFreeList = chunk; + } + + +/*This is sequential code, meant to only be called from the Master, not from + * any slave VPs. + *Search down list, checking size by the nextHigherInMem pointer, to find + * first chunk bigger than size needed. + *Shave off the extra and make it into a new free-list element, hook it in + * then return the address of the found element plus size of prolog. + * + *Will find a + */ +void * +VMS__malloc( int32 sizeRequested ) + { MallocProlog *foundElem = NULL, *currElem, *newElem; + int32 amountExtra, foundElemIsTopOfHeap, sizeConsumed,sizeOfFound; + + //step up the size to be aligned at 16-byte boundary, prob better ways + sizeRequested = ((sizeRequested + 16) >> 4) << 4; + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; + + while( currElem != NULL ) + { //check if size of currElem is big enough + sizeOfFound=(int32)((char*)currElem->nextHigherInMem -(char*)currElem); + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); + if( amountExtra > 0 ) + { //found it, get out of loop + foundElem = currElem; + currElem = NULL; + } + else + currElem = currElem->nextChunkInFreeList; + } + + if( foundElem == NULL ) + { PRINT_ERROR("\nmalloc failed\n") + return NULL; //indicates malloc failed + } + //Using a kludge to identify the element that is the top chunk in the + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and + // save addr of start of heap in head's nextLowerInMem + //Will handle top of Heap specially + foundElemIsTopOfHeap = foundElem->nextHigherInMem == + _VMSMasterEnv->freeListHead->nextHigherInMem; + + //before shave off and try to insert new elem, remove found elem + //note, foundElem will never be the head, so always has valid prevChunk + foundElem->prevChunkInFreeList->nextChunkInFreeList = + foundElem->nextChunkInFreeList; + if( foundElem->nextChunkInFreeList != NULL ) + { foundElem->nextChunkInFreeList->prevChunkInFreeList = + foundElem->prevChunkInFreeList; + } + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated + + //if enough, turn extra into new elem & insert it + if( amountExtra > 64 ) + { //make new elem by adding to addr of curr elem then casting + sizeConsumed = sizeof(MallocProlog) + sizeRequested; + newElem = (MallocProlog *)( (char *)foundElem + sizeConsumed ); + newElem->nextHigherInMem = foundElem->nextHigherInMem; + newElem->nextLowerInMem = foundElem; + foundElem->nextHigherInMem = newElem; + + if( ! foundElemIsTopOfHeap ) + { //there is no next higher for top of heap, so can't write to it + newElem->nextHigherInMem->nextLowerInMem = newElem; + } + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); + } + else + { + sizeConsumed = sizeOfFound; + } + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; + + //skip over the prolog by adding its size to the pointer return + return (void *)((char *)foundElem + sizeof(MallocProlog)); + } + + +/*This is sequential code -- only to be called from the Master + * When free, subtract the size of prolog from pointer, then cast it to a + * MallocProlog. Then check the nextLower and nextHigher chunks to see if + * one or both are also free, and coalesce if so, and if neither free, then + * add this one to free-list. + */ +void +VMS__free( void *ptrToFree ) + { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; + int32 lowerExistsAndIsFree, higherExistsAndIsFree, sizeOfElem; + + if( ptrToFree < _VMSMasterEnv->freeListHead->nextLowerInMem || + ptrToFree > _VMSMasterEnv->freeListHead->nextHigherInMem ) + { //outside the range of data owned by VMS's malloc, so do nothing + return; + } + //subtract size of prolog to get pointer to prolog, then cast + elemToFree = (MallocProlog *)((char *)ptrToFree - sizeof(MallocProlog)); + sizeOfElem =(int32)((char*)elemToFree->nextHigherInMem-(char*)elemToFree); + _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; + + nextLowerElem = elemToFree->nextLowerInMem; + nextHigherElem = elemToFree->nextHigherInMem; + + if( nextHigherElem == NULL ) + higherExistsAndIsFree = FALSE; + else //okay exists, now check if in the free-list by checking back ptr + higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); + + if( nextLowerElem == NULL ) + lowerExistsAndIsFree = FALSE; + else //okay, it exists, now check if it's free + lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); + + + //now, know what exists and what's free + if( lowerExistsAndIsFree ) + { if( higherExistsAndIsFree ) + { //both exist and are free, so coalesce all three + //First, remove higher from free-list + nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = + nextHigherElem->nextChunkInFreeList; + if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? + nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = + nextHigherElem->prevChunkInFreeList; + //Now, fix-up sequence-in-mem list -- by side-effect, this also + // changes size of the lower elem, which is still in free-list + nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; + if( nextHigherElem->nextHigherInMem != + _VMSMasterEnv->freeListHead->nextHigherInMem ) + nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; + //notice didn't do anything to elemToFree -- it simply is no + // longer reachable from any of the lists. Wonder if could be a + // security leak because left valid addresses in it, + // but don't care for now. + } + else + { //lower is the only of the two that exists and is free, + //In this case, no adjustment to free-list, just change mem-list. + // By side-effect, changes size of the lower elem + nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; + if( elemToFree->nextHigherInMem != + _VMSMasterEnv->freeListHead->nextHigherInMem ) + elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; + } + } + else + { //lower either doesn't exist or isn't free, so check higher + if( higherExistsAndIsFree ) + { //higher exists and is the only of the two free + //First, in free-list, replace higher elem with the one to free + elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; + elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; + elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; + //Now chg mem-list. By side-effect, changes size of elemToFree + elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; + if( elemToFree->nextHigherInMem != + _VMSMasterEnv->freeListHead->nextHigherInMem ) + elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; + } + else + { //neither lower nor higher is availabe to coalesce so add to list + // this makes prev chunk ptr non-null, which indicates it's free + elemToFree->nextChunkInFreeList = + _VMSMasterEnv->freeListHead->nextChunkInFreeList; + _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; + elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; + } + } + + } + + +/*Designed to be called from the main thread outside of VMS, during init + */ +MallocProlog * +VMS__create_free_list() + { MallocProlog *freeListHead, *firstChunk; + + //Note, this is running in the main thread -- all increases in malloc + // mem and all frees of it must be done in this thread, with the + // thread's original stack available + freeListHead = malloc( sizeof(MallocProlog) ); + firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); + if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} + + freeListHead->prevChunkInFreeList = NULL; + //Use this addr to free the heap when cleanup + freeListHead->nextLowerInMem = firstChunk; + //to identify top-of-heap elem, compare this addr to elem's next higher + freeListHead->nextHigherInMem = (char *)firstChunk + + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE; + freeListHead->nextChunkInFreeList = firstChunk; + + firstChunk->nextChunkInFreeList = NULL; + firstChunk->prevChunkInFreeList = freeListHead; + //next Higher has to be set to top of chunk, so can calc size in malloc + firstChunk->nextHigherInMem = (char *)firstChunk + + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE; + firstChunk->nextLowerInMem = NULL; //identifies as bott of heap + + return freeListHead; + } + + +/*Designed to be called from the main thread outside of VMS, during cleanup + */ +void +VMS_ext__free_free_list( MallocProlog *freeListHead ) + { + //stashed a ptr to the one and only bug chunk malloc'd from OS in the + // free list head's next lower in mem pointer + free( freeListHead->nextLowerInMem ); + + //don't free the head -- it'll be in an array eventually -- free whole + // array when all the free lists linked from it have already been freed + } + diff -r 5388f1c2da6f -r 8f7141a9272e vmalloc.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vmalloc.h Sat Oct 30 20:54:36 2010 -0700 @@ -0,0 +1,41 @@ +/* + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + * Created on November 14, 2009, 9:07 PM + */ + +#include +#include "VMS_primitive_data_types.h" + +typedef struct _MallocProlog MallocProlog; + +struct _MallocProlog + { + MallocProlog *nextChunkInFreeList; + MallocProlog *prevChunkInFreeList; + MallocProlog *nextHigherInMem; + MallocProlog *nextLowerInMem; + }; +//MallocProlog + +typedef struct + { + MallocProlog *firstChunkInFreeList; + int32 numInList; + } +FreeListHead; + +void * +VMS__malloc( int32 sizeRequested ); + +void +VMS__free( void *ptrToFree ); + +MallocProlog * +VMS__create_free_list(); + +void +VMS_ext__free_free_list( MallocProlog *freeListHead );