# HG changeset patch # User Nina Engelhardt # Date 1325872505 -3600 # Node ID 7523ee70d66cdcd30608f9009586d230676564e8 # Parent 3bd35fc83c618c1c331bcb49fcaa1b0c3c3cc352# Parent dd0aa7f62780ab7844aded5955dd0bdb68c2b47e merge changes from default diff -r 3bd35fc83c61 -r 7523ee70d66c .hgeol --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgeol Fri Jan 06 18:55:05 2012 +0100 @@ -0,0 +1,12 @@ + +[patterns] +**.py = native +**.txt = native +**.c = native +**.h = native +**.cpp = native +**.java = native +**.sh = native +**.pl = native +**.jpg = bin +**.gif = bin diff -r 3bd35fc83c61 -r 7523ee70d66c .hgsub --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgsub Fri Jan 06 18:55:05 2012 +0100 @@ -0,0 +1,5 @@ +DynArray = ../C/DynArray +Hash_impl = ../C/Hash_impl +Histogram = ../C/Histogram +Queue_impl = ../C/Queue_impl + diff -r 3bd35fc83c61 -r 7523ee70d66c .hgsubstate --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgsubstate Fri Jan 06 18:55:05 2012 +0100 @@ -0,0 +1,4 @@ +20f0996a84002efdfb2e260488b70ad415f2b4cf DynArray +8bafd14e9fde67082fb08186463a4803bc25b428 Hash_impl +20410d90dabbfee1fff8a3b48c6cf6090603e8ea Histogram +7c9e00ff1bf452be98dce61cfa42c01939964d7d Queue_impl diff -r 3bd35fc83c61 -r 7523ee70d66c .hgtags --- a/.hgtags Wed Jan 04 16:40:10 2012 +0100 +++ b/.hgtags Fri Jan 06 18:55:05 2012 +0100 @@ -1,6 +1,1 @@ 9c3107044f86c36fea3a8f72f64910b1363555be Dec27_2010_about_to_add_sched_record -97e26095c01fef53783d2d89e11575856ce243c5 V0 -21c95d402fe66570067000e484bfec9a5d92c9d0 malloc_touch -42d015d48eebc7ba49b24fa5f24cb1e66244e5e7 malloc_touch -42d015d48eebc7ba49b24fa5f24cb1e66244e5e7 malloc_touch -0000000000000000000000000000000000000000 malloc_touch diff -r 3bd35fc83c61 -r 7523ee70d66c CoreLoop.c --- a/CoreLoop.c Wed Jan 04 16:40:10 2012 +0100 +++ b/CoreLoop.c Fri Jan 06 18:55:05 2012 +0100 @@ -1,232 +1,232 @@ -/* - * Copyright 2010 OpenSourceStewardshipFoundation - * - * Licensed under BSD - */ - - -#include "VMS.h" -#include "Queue_impl/BlockingQueue.h" -#include "ProcrContext.h" - -#include -#include -#include - -#include -#include - -void *terminateCoreLoop(VirtProcr *currPr); - -/*This is the loop that runs in the OS Thread pinned to each core - *Get virt procr from queue, - * save state of current animator, then load in state of virt procr, using - * jmp instr to switch the program-counter state -- making the virt procr - * the new animator. - *At some point, the virt procr will suspend itself by saving out its - * animator state (stack ptr, frame ptr, program counter) and switching - * back to the OS Thread's animator state, which means restoring the - * stack and frame and jumping to the core loop start point. - *This cycle then repeats, until a special shutdown virtual processor is - * animated, which jumps to the end point at the bottom of core loop. - */ -void * -coreLoop( void *paramsIn ) - { - ThdParams *coreLoopThdParams; - int thisCoresIdx; - VirtProcr *currPr = NULL; - VMSQueueStruc *readyToAnimateQ; - cpu_set_t coreMask; //has 1 in bit positions of allowed cores - int errorCode; - - //work-stealing struc on stack to prevent false-sharing in cache-line - volatile GateStruc gate; - //preGateProgress, waitProgress, exitProgress, gateClosed; - - - coreLoopThdParams = (ThdParams *)paramsIn; - thisCoresIdx = coreLoopThdParams->coreNum; - - gate.gateClosed = FALSE; - gate.preGateProgress = 0; - gate.waitProgress = 0; - gate.exitProgress = 0; - _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup - - //wait until signalled that setup is complete - pthread_mutex_lock( &suspendLock ); - while( !(_VMSMasterEnv->setupComplete) ) - { - pthread_cond_wait( &suspend_cond, - &suspendLock ); - } - pthread_mutex_unlock( &suspendLock ); - - //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); - - //set thread affinity - //Linux requires pinning thd to core inside thread-function - //Designate a core by a 1 in bit-position corresponding to the core - CPU_ZERO(&coreMask); - CPU_SET(coreLoopThdParams->coreNum,&coreMask); - //coreMask = 1L << coreLoopThdParams->coreNum; - - pthread_t selfThd = pthread_self(); - errorCode = - pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); - - if(errorCode){ printf("\nset affinity failure\n"); exit(0); } - - - //Save the return address in the SwitchVP function - saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt)); - - - while(1){ - #ifdef MEAS__PERF_COUNTERS - //end work - - if(currPr!=NULL){ - int lastRecordIdx = currPr->counter_history_array_info->numInArray -1; - CounterRecord* lastRecord = currPr->counter_history[lastRecordIdx]; - //lastRecord->work_core = thisCoresIdx; - saveCyclesAndInstrs(thisCoresIdx,lastRecord->suspend_cycles,lastRecord->suspend_instrs); - } - - #endif - //Get virtual processor from queue - //The Q must be a global, static volatile var, so not kept in reg, - // which forces reloading the pointer after each jmp to this point - readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; - - #ifdef USE_WORK_STEALING - //Alg for work-stealing designed to make common case fast. Comment - // in stealer code explains. - gate.preGateProgress++; - if( gate.gateClosed ) - { //now, set coreloop's progress, so stealer can see that core loop - // has made it into the waiting area. - gate.waitProgress = gate.preGateProgress; - while( gate.gateClosed ) /*busy wait*/; - } - - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); - - //Set the coreloop's progress, so stealer can see it has made it out - // of the protected area - gate.exitProgress = gate.preGateProgress; - #else - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); - #endif - - if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; - else - { - //============================= MEASUREMENT STUFF ===================== - #ifdef MEAS__TIME_MASTER_LOCK - int32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //===================================================================== - int tries = 0; int gotLock = 0; - while( currPr == NULL ) //if queue was empty, enter get masterLock loop - { //queue was empty, so get master lock - - gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), - UNLOCKED, LOCKED ); - if( gotLock ) - { //run own MasterVP -- jmps to coreLoops startPt when done - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; - if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) - { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); - pthread_yield(); - } - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; - break; //end while -- have a VP to animate now - } - - tries++; //if too many, means master on other core taking too long - if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } - } - //============================= MEASUREMENT STUFF ===================== - #ifdef MEAS__TIME_MASTER_LOCK - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, - _VMSMasterEnv->masterLockLowTimeHist ); - addIntervalToHist( startStamp, endStamp, - _VMSMasterEnv->masterLockHighTimeHist ); - #endif - //===================================================================== - - } - - #ifdef MEAS__PERF_COUNTERS - //start work - int lastRecordIdx = currPr->counter_history_array_info->numInArray -1; - CounterRecord* lastRecord = currPr->counter_history[lastRecordIdx]; - lastRecord->work_core = thisCoresIdx; - saveCyclesAndInstrs(thisCoresIdx,lastRecord->start_work_cycles,lastRecord->start_work_instrs); - #endif - switchToVP(currPr); //The VPs return in here - - flushRegisters(); - }//CoreLoop - } - - -void * -terminateCoreLoop(VirtProcr *currPr){ - //first free shutdown VP that jumped here -- it first restores the - // coreloop's stack, so addr of currPr in stack frame is still correct - VMS__dissipate_procr( currPr ); - pthread_exit( NULL ); -} - - - -#ifdef SEQUENTIAL - -//=========================================================================== -/*This sequential version is exact same as threaded, except doesn't do the - * pin-threads part, nor the wait until setup complete part. - */ -void * -coreLoop_Seq( void *paramsIn ) - { - VirtProcr *currPr; - VMSQueueStruc *readyToAnimateQ; - - ThdParams *coreLoopThdParams; - int thisCoresIdx; - - coreLoopThdParams = (ThdParams *)paramsIn; -// thisCoresIdx = coreLoopThdParams->coreNum; - thisCoresIdx = 0; - - //Save the return address in the SwitchVP function - saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt)); - - - while(1){ - //Get virtual processor from queue - //_VMSWorkQ must be a global, static volatile var, so not kept in reg, - // which forces reloading the pointer after each jmp to this point - readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); - if( currPr == NULL ) - { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) - { printf("too many back to back MasterVP\n"); exit(1); } - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; - - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; - } - else - _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; - - - switchToVP( currPr ); - flushRegisters(); - } - } -#endif +/* + * Copyright 2010 OpenSourceStewardshipFoundation + * + * Licensed under BSD + */ + + +#include "VMS.h" +#include "Queue_impl/BlockingQueue.h" +#include "ProcrContext.h" + +#include +#include +#include + +#include +#include + +void *terminateCoreLoop(VirtProcr *currPr); + +/*This is the loop that runs in the OS Thread pinned to each core + *Get virt procr from queue, + * save state of current animator, then load in state of virt procr, using + * jmp instr to switch the program-counter state -- making the virt procr + * the new animator. + *At some point, the virt procr will suspend itself by saving out its + * animator state (stack ptr, frame ptr, program counter) and switching + * back to the OS Thread's animator state, which means restoring the + * stack and frame and jumping to the core loop start point. + *This cycle then repeats, until a special shutdown virtual processor is + * animated, which jumps to the end point at the bottom of core loop. + */ +void * +coreLoop( void *paramsIn ) + { + ThdParams *coreLoopThdParams; + int thisCoresIdx; + VirtProcr *currPr = NULL; + VMSQueueStruc *readyToAnimateQ; + cpu_set_t coreMask; //has 1 in bit positions of allowed cores + int errorCode; + + //work-stealing struc on stack to prevent false-sharing in cache-line + volatile GateStruc gate; + //preGateProgress, waitProgress, exitProgress, gateClosed; + + + coreLoopThdParams = (ThdParams *)paramsIn; + thisCoresIdx = coreLoopThdParams->coreNum; + + gate.gateClosed = FALSE; + gate.preGateProgress = 0; + gate.waitProgress = 0; + gate.exitProgress = 0; + _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup + + //wait until signalled that setup is complete + pthread_mutex_lock( &suspendLock ); + while( !(_VMSMasterEnv->setupComplete) ) + { + pthread_cond_wait( &suspend_cond, + &suspendLock ); + } + pthread_mutex_unlock( &suspendLock ); + + //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); + + //set thread affinity + //Linux requires pinning thd to core inside thread-function + //Designate a core by a 1 in bit-position corresponding to the core + CPU_ZERO(&coreMask); + CPU_SET(coreLoopThdParams->coreNum,&coreMask); + //coreMask = 1L << coreLoopThdParams->coreNum; + + pthread_t selfThd = pthread_self(); + errorCode = + pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); + + if(errorCode){ printf("\nset affinity failure\n"); exit(0); } + + + //Save the return address in the SwitchVP function + saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt)); + + + while(1){ + #ifdef MEAS__PERF_COUNTERS + //end work + + if(currPr!=NULL){ + int lastRecordIdx = currPr->counter_history_array_info->numInArray -1; + CounterRecord* lastRecord = currPr->counter_history[lastRecordIdx]; + //lastRecord->work_core = thisCoresIdx; + saveCyclesAndInstrs(thisCoresIdx,lastRecord->suspend_cycles,lastRecord->suspend_instrs); + } + + #endif + //Get virtual processor from queue + //The Q must be a global, static volatile var, so not kept in reg, + // which forces reloading the pointer after each jmp to this point + readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; + + #ifdef USE_WORK_STEALING + //Alg for work-stealing designed to make common case fast. Comment + // in stealer code explains. + gate.preGateProgress++; + if( gate.gateClosed ) + { //now, set coreloop's progress, so stealer can see that core loop + // has made it into the waiting area. + gate.waitProgress = gate.preGateProgress; + while( gate.gateClosed ) /*busy wait*/; + } + + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); + + //Set the coreloop's progress, so stealer can see it has made it out + // of the protected area + gate.exitProgress = gate.preGateProgress; + #else + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); + #endif + + if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; + else + { + //============================= MEASUREMENT STUFF ===================== + #ifdef MEAS__TIME_MASTER_LOCK + int32 startStamp, endStamp; + saveLowTimeStampCountInto( startStamp ); + #endif + //===================================================================== + int tries = 0; int gotLock = 0; + while( currPr == NULL ) //if queue was empty, enter get masterLock loop + { //queue was empty, so get master lock + + gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), + UNLOCKED, LOCKED ); + if( gotLock ) + { //run own MasterVP -- jmps to coreLoops startPt when done + currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; + if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) + { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); + pthread_yield(); + } + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; + break; //end while -- have a VP to animate now + } + + tries++; //if too many, means master on other core taking too long + if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } + } + //============================= MEASUREMENT STUFF ===================== + #ifdef MEAS__TIME_MASTER_LOCK + saveLowTimeStampCountInto( endStamp ); + addIntervalToHist( startStamp, endStamp, + _VMSMasterEnv->masterLockLowTimeHist ); + addIntervalToHist( startStamp, endStamp, + _VMSMasterEnv->masterLockHighTimeHist ); + #endif + //===================================================================== + + } + + #ifdef MEAS__PERF_COUNTERS + //start work + int lastRecordIdx = currPr->counter_history_array_info->numInArray -1; + CounterRecord* lastRecord = currPr->counter_history[lastRecordIdx]; + lastRecord->work_core = thisCoresIdx; + saveCyclesAndInstrs(thisCoresIdx,lastRecord->start_work_cycles,lastRecord->start_work_instrs); + #endif + switchToVP(currPr); //The VPs return in here + + flushRegisters(); + }//CoreLoop + } + + +void * +terminateCoreLoop(VirtProcr *currPr){ + //first free shutdown VP that jumped here -- it first restores the + // coreloop's stack, so addr of currPr in stack frame is still correct + VMS__dissipate_procr( currPr ); + pthread_exit( NULL ); +} + + + +#ifdef SEQUENTIAL + +//=========================================================================== +/*This sequential version is exact same as threaded, except doesn't do the + * pin-threads part, nor the wait until setup complete part. + */ +void * +coreLoop_Seq( void *paramsIn ) + { + VirtProcr *currPr; + VMSQueueStruc *readyToAnimateQ; + + ThdParams *coreLoopThdParams; + int thisCoresIdx; + + coreLoopThdParams = (ThdParams *)paramsIn; +// thisCoresIdx = coreLoopThdParams->coreNum; + thisCoresIdx = 0; + + //Save the return address in the SwitchVP function + saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt)); + + + while(1){ + //Get virtual processor from queue + //_VMSWorkQ must be a global, static volatile var, so not kept in reg, + // which forces reloading the pointer after each jmp to this point + readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); + if( currPr == NULL ) + { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) + { printf("too many back to back MasterVP\n"); exit(1); } + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; + + currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; + } + else + _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; + + + switchToVP( currPr ); + flushRegisters(); + } + } +#endif diff -r 3bd35fc83c61 -r 7523ee70d66c MasterLoop.c --- a/MasterLoop.c Wed Jan 04 16:40:10 2012 +0100 +++ b/MasterLoop.c Fri Jan 06 18:55:05 2012 +0100 @@ -90,8 +90,6 @@ volatileMasterPr = animatingPr; masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp - bulb b = new_bulb(); - numSlotsFilled=1; //First animation of each MasterVP will in turn animate this part // of setup code.. (VP creator sets up the stack as if this function // was called normally, but actually get here by jmp) @@ -131,9 +129,6 @@ slaveScheduler = masterEnv->slaveScheduler; semanticEnv = masterEnv->semanticEnv; - #ifdef DETECT_LOOP_GRAPH - - #endif //Poll each slot's Done flag numSlotsFilled = 0; @@ -221,9 +216,7 @@ lastRecord->start_assign_instrs = tmp_instrs; saveCyclesAndInstrs(thisCoresIdx,lastRecord->end_assign_cycles,lastRecord->end_assign_instrs); #endif - #ifdef DETECT_LOOP_GRAPH - #endif writeVMSQ( schedVirtPr, readyToAnimateQ ); } } diff -r 3bd35fc83c61 -r 7523ee70d66c ProcrContext.h --- a/ProcrContext.h Wed Jan 04 16:40:10 2012 +0100 +++ b/ProcrContext.h Fri Jan 06 18:55:05 2012 +0100 @@ -1,33 +1,33 @@ -/* - * Copyright 2009 OpenSourceStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - */ - -#ifndef _ProcrContext_H -#define _ProcrContext_H -#define _GNU_SOURCE - -void saveCoreLoopReturnAddr(void **returnAddress); - -void switchToVP(VirtProcr *nextProcr); - -void switchToCoreLoop(VirtProcr *nextProcr); - -void masterSwitchToCoreLoop(VirtProcr *nextProcr); - -void startVirtProcrFn(); - -void *asmTerminateCoreLoop(VirtProcr *currPr); - -#define flushRegisters() \ - asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") - -inline VirtProcr * -create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, - void *initialData, void *stackLocs ); - -#endif /* _ProcrContext_H */ - +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + */ + +#ifndef _ProcrContext_H +#define _ProcrContext_H +#define _GNU_SOURCE + +void saveCoreLoopReturnAddr(void **returnAddress); + +void switchToVP(VirtProcr *nextProcr); + +void switchToCoreLoop(VirtProcr *nextProcr); + +void masterSwitchToCoreLoop(VirtProcr *nextProcr); + +void startVirtProcrFn(); + +void *asmTerminateCoreLoop(VirtProcr *currPr); + +#define flushRegisters() \ + asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") + +inline VirtProcr * +create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, + void *initialData, void *stackLocs ); + +#endif /* _ProcrContext_H */ + diff -r 3bd35fc83c61 -r 7523ee70d66c VMS.c --- a/VMS.c Wed Jan 04 16:40:10 2012 +0100 +++ b/VMS.c Fri Jan 06 18:55:05 2012 +0100 @@ -118,15 +118,15 @@ //============================= MEASUREMENT STUFF ======================== #ifdef MEAS__TIME_MALLOC - _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 100, 0, 100, + _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 100, 0, 30, "malloc_time_hist"); - _VMSMasterEnv->freeTimeHist = makeFixedBinHistExt( 80, 0, 100, + _VMSMasterEnv->freeTimeHist = makeFixedBinHistExt( 100, 0, 30, "free_time_hist"); #endif #ifdef MEAS__TIME_PLUGIN - _VMSMasterEnv->reqHdlrLowTimeHist = makeFixedBinHistExt( 1000, 0, 100, + _VMSMasterEnv->reqHdlrLowTimeHist = makeFixedBinHistExt( 100, 0, 200, "plugin_low_time_hist"); - _VMSMasterEnv->reqHdlrHighTimeHist = makeFixedBinHistExt( 1000, 0, 100, + _VMSMasterEnv->reqHdlrHighTimeHist = makeFixedBinHistExt( 100, 0, 200, "plugin_high_time_hist"); #endif //======================================================================== @@ -462,9 +462,8 @@ * it lets the lang have lang-specific data related to creation transported * to the plugin. */ -__attribute__ ((noinline)) void -VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ) - +void +VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ) { VMSReqst req; req.reqType = createReq; @@ -497,8 +496,8 @@ * gets suspended in this call and all the virt processor's state disap- * pears -- making that suspend the last thing in the virt procr's trace. */ -__attribute__ ((noinline)) void -VMS__send_dissipate_req( VirtProcr *procrToDissipate ) +void +VMS__send_dissipate_req( VirtProcr *procrToDissipate ) { VMSReqst req; req.reqType = dissipate; @@ -557,8 +556,8 @@ * to plugin *Then it does suspend, to cause request to be sent. */ -/*inline*/__attribute__ ((noinline)) void -VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ) +inline void +VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ) { VMSReqst req; req.reqType = semantic; @@ -570,9 +569,8 @@ } -/*inline*/ __attribute__ ((noinline)) void -VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ) - +inline void +VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ) { VMSReqst req; req.reqType = VMSSemantic; @@ -777,7 +775,7 @@ //Before getting rid of everything, print out any measurements made //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist ); //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile); - //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHistExt ); + //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist ); #ifdef MEAS__TIME_PLUGIN diff -r 3bd35fc83c61 -r 7523ee70d66c VMS.h --- a/VMS.h Wed Jan 04 16:40:10 2012 +0100 +++ b/VMS.h Fri Jan 06 18:55:05 2012 +0100 @@ -279,9 +279,6 @@ CounterRecord** counter_history; PrivDynArrayInfo* counter_history_array_info; #endif - #ifdef DETECT_LOOP_GRAPH - - #endif } MasterEnv; @@ -380,16 +377,16 @@ inline void VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr ); -/*inline*/ __attribute__ ((noinline)) void +inline void VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ); void VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ); -void /*inline**/ __attribute__ ((noinline)) +void inline VMS__send_dissipate_req( VirtProcr *prToDissipate ); -/*inline**/ __attribute__ ((noinline)) void +inline void VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ); VMSReqst * @@ -461,11 +458,11 @@ #ifdef VPTHREAD //VPThread -#define createHistIdx 1 -#define mutexLockHistIdx 2 -#define mutexUnlockHistIdx 3 -#define condWaitHistIdx 4 -#define condSignalHistIdx 5 +#define createHistIdx 0 +#define mutexLockHistIdx 1 +#define mutexUnlockHistIdx 2 +#define condWaitHistIdx 3 +#define condSignalHistIdx 4 #define MakeTheMeasHists() \ _VMSMasterEnv->measHistsInfo = \ @@ -482,8 +479,8 @@ #ifdef VCILK //VCilk -#define spawnHistIdx 1 -#define syncHistIdx 2 +#define spawnHistIdx 0 +#define syncHistIdx 1 #define MakeTheMeasHists() \ _VMSMasterEnv->measHistsInfo = \ @@ -497,10 +494,10 @@ #ifdef SSR //SSR -#define SendFromToHistIdx 1 -#define SendOfTypeHistIdx 2 -#define ReceiveFromToHistIdx 3 -#define ReceiveOfTypeHistIdx 4 +#define SendFromToHistIdx 0 +#define SendOfTypeHistIdx 1 +#define ReceiveFromToHistIdx 2 +#define ReceiveOfTypeHistIdx 3 #define MakeTheMeasHists() \ _VMSMasterEnv->measHistsInfo = \ diff -r 3bd35fc83c61 -r 7523ee70d66c VMS__DESIGN_NOTES.txt --- a/VMS__DESIGN_NOTES.txt Wed Jan 04 16:40:10 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ - -Implement VMS this way: diff -r 3bd35fc83c61 -r 7523ee70d66c VMS_primitive_data_types.h --- a/VMS_primitive_data_types.h Wed Jan 04 16:40:10 2012 +0100 +++ b/VMS_primitive_data_types.h Fri Jan 06 18:55:05 2012 +0100 @@ -1,53 +1,53 @@ -/* - * Copyright 2009 OpenSourceStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - - */ - -#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H -#define _BLIS_PRIMITIVE_DATA_TYPES_H - - -/*For portability, need primitive data types that have a well defined - * size, and well-defined layout into bytes - *To do this, provide BLIS standard aliases for all primitive data types - *These aliases must be used in all BLIS functions instead of the ANSI types - * - *These definitions will be replaced inside each specialization module - * according to the compiler used in that module and the hardware being - * specialized to. - */ -/* -#define int8 char -#define uint8 char -#define int16 short -#define uint16 unsigned short -#define int32 int -#define uint32 unsigned int -#define int64 long long -#define uint64 unsigned long long -#define float32 float -#define float64 double -*/ -typedef char bool8; -typedef char int8; -typedef char uint8; -typedef short int16; -typedef unsigned short uint16; -typedef int int32; -typedef unsigned int uint32; -typedef long long int64; -typedef unsigned long long uint64; -typedef float float32; -typedef double float64; -//typedef double double float128; -#define float128 double double - -#define TRUE 1 -#define FALSE 0 - -#endif /* _BLIS_PRIMITIVE_DATA_TYPES_H */ - +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + + */ + +#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H +#define _BLIS_PRIMITIVE_DATA_TYPES_H + + +/*For portability, need primitive data types that have a well defined + * size, and well-defined layout into bytes + *To do this, provide BLIS standard aliases for all primitive data types + *These aliases must be used in all BLIS functions instead of the ANSI types + * + *These definitions will be replaced inside each specialization module + * according to the compiler used in that module and the hardware being + * specialized to. + */ +/* +#define int8 char +#define uint8 char +#define int16 short +#define uint16 unsigned short +#define int32 int +#define uint32 unsigned int +#define int64 long long +#define uint64 unsigned long long +#define float32 float +#define float64 double +*/ +typedef char bool8; +typedef char int8; +typedef char uint8; +typedef short int16; +typedef unsigned short uint16; +typedef int int32; +typedef unsigned int uint32; +typedef long long int64; +typedef unsigned long long uint64; +typedef float float32; +typedef double float64; +//typedef double double float128; +#define float128 double double + +#define TRUE 1 +#define FALSE 0 + +#endif /* _BLIS_PRIMITIVE_DATA_TYPES_H */ + diff -r 3bd35fc83c61 -r 7523ee70d66c probes.h --- a/probes.h Wed Jan 04 16:40:10 2012 +0100 +++ b/probes.h Fri Jan 06 18:55:05 2012 +0100 @@ -1,195 +1,195 @@ -/* - * Copyright 2009 OpenSourceStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - */ - -#ifndef _PROBES_H -#define _PROBES_H -#define _GNU_SOURCE - -#include "VMS_primitive_data_types.h" - -#include - - - //when STATS__TURN_ON_PROBES is defined allows using probes to measure - // time intervals. The probes are macros that only compile to something - // when STATS__TURN_ON_PROBES is defined. The probes are saved in the - // master env -- but only when this is defined. - //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday -#define STATS__TURN_ON_PROBES -//#define STATS__USE_TSC_PROBES -#define STATS__USE_DBL_PROBES - -//typedef struct _IntervalProbe IntervalProbe; //in VMS.h - -struct _IntervalProbe - { - char *nameStr; - int32 probeID; - - int32 schedChoiceWasRecorded; - int32 coreNum; - int32 procrID; - float64 procrCreateSecs; - - #ifdef STATS__USE_TSC_PROBES - TSCount startStamp; - TSCount endStamp; - #else - struct timeval startStamp; - struct timeval endStamp; - #endif - float64 startSecs; - float64 endSecs; - float64 interval; - DblHist *hist;//if NULL, then is single interval probe - }; - - -//============================= Statistics ================================== - - //Frequency of TS counts - //TODO: change freq for each machine -#define TSCOUNT_FREQ 3180000000 - -inline TSCount getTSCount(); - - -//======================== Probes ============================= -// -// Use macros to allow turning probes off with a #define switch -#ifdef STATS__ENABLE_PROBES -int32 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ - VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) - -int32 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ - VMS_ext_impl__record_time_point_into_new_probe( nameStr ) - - -int32 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); -#define VMS__create_single_interval_probe( nameStr, animPr ) \ - VMS_impl__create_single_interval_probe( nameStr, animPr ) - - -int32 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, - float64 binWidth, char *nameStr, VirtProcr *animPr ); -#define VMS__create_histogram_probe( numBins, startValue, \ - binWidth, nameStr, animPr ) \ - VMS_impl__create_histogram_probe( numBins, startValue, \ - binWidth, nameStr, animPr ) -void -VMS_impl__free_probe( IntervalProbe *probe ); -#define VMS__free_probe( probe ) \ - VMS_impl__free_probe( probe ) - -void -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); -#define VMS__index_probe_by_its_name( probeID, animPr ) \ - VMS_impl__index_probe_by_its_name( probeID, animPr ) - -IntervalProbe * -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); -#define VMS__get_probe_by_name( probeID, animPr ) \ - VMS_impl__get_probe_by_name( probeName, animPr ) - -void -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ - VMS_impl__record_sched_choice_into_probe( probeID, animPr ) - -void -VMS_impl__record_interval_start_in_probe( int32 probeID ); -#define VMS__record_interval_start_in_probe( probeID ) \ - VMS_impl__record_interval_start_in_probe( probeID ) - -void -VMS_impl__record_interval_end_in_probe( int32 probeID ); -#define VMS__record_interval_end_in_probe( probeID ) \ - VMS_impl__record_interval_end_in_probe( probeID ) - -void -VMS_impl__print_stats_of_probe( int32 probeID ); -#define VMS__print_stats_of_probe( probeID ) \ - VMS_impl__print_stats_of_probe( probeID ) - -void -VMS_impl__print_stats_of_all_probes(); -#define VMS__print_stats_of_all_probes() \ - VMS_impl__print_stats_of_all_probes() - - -#else -int32 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ - 0 /* do nothing */ - -int32 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ - 0 /* do nothing */ - - -int32 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); -#define VMS__create_single_interval_probe( nameStr, animPr ) \ - 0 /* do nothing */ - - -int32 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, - float64 binWidth, char *nameStr, VirtProcr *animPr ); -#define VMS__create_histogram_probe( numBins, startValue, \ - binWidth, nameStr, animPr ) \ - 0 /* do nothing */ - -void -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); -#define VMS__index_probe_by_its_name( probeID, animPr ) \ - /* do nothing */ - -IntervalProbe * -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); -#define VMS__get_probe_by_name( probeID, animPr ) \ - NULL /* do nothing */ - -void -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ - /* do nothing */ - -void -VMS_impl__record_interval_start_in_probe( int32 probeID ); -#define VMS__record_interval_start_in_probe( probeID ) \ - /* do nothing */ - -void -VMS_impl__record_interval_end_in_probe( int32 probeID ); -#define VMS__record_interval_end_in_probe( probeID ) \ - /* do nothing */ - -inline void doNothing(); -void -VMS_impl__print_stats_of_probe( int32 probeID ); -#define VMS__print_stats_of_probe( probeID ) \ - doNothing/* do nothing */ - -void -VMS_impl__print_stats_of_all_probes(); -#define VMS__print_stats_of_all_probes \ - doNothing/* do nothing */ - -#endif /* defined STATS__ENABLE_PROBES */ - -#endif /* _PROBES_H */ - +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + */ + +#ifndef _PROBES_H +#define _PROBES_H +#define _GNU_SOURCE + +#include "VMS_primitive_data_types.h" + +#include + + + //when STATS__TURN_ON_PROBES is defined allows using probes to measure + // time intervals. The probes are macros that only compile to something + // when STATS__TURN_ON_PROBES is defined. The probes are saved in the + // master env -- but only when this is defined. + //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday +#define STATS__TURN_ON_PROBES +//#define STATS__USE_TSC_PROBES +#define STATS__USE_DBL_PROBES + +//typedef struct _IntervalProbe IntervalProbe; //in VMS.h + +struct _IntervalProbe + { + char *nameStr; + int32 probeID; + + int32 schedChoiceWasRecorded; + int32 coreNum; + int32 procrID; + float64 procrCreateSecs; + + #ifdef STATS__USE_TSC_PROBES + TSCount startStamp; + TSCount endStamp; + #else + struct timeval startStamp; + struct timeval endStamp; + #endif + float64 startSecs; + float64 endSecs; + float64 interval; + DblHist *hist;//if NULL, then is single interval probe + }; + + +//============================= Statistics ================================== + + //Frequency of TS counts + //TODO: change freq for each machine +#define TSCOUNT_FREQ 3180000000 + +inline TSCount getTSCount(); + + +//======================== Probes ============================= +// +// Use macros to allow turning probes off with a #define switch +#ifdef STATS__ENABLE_PROBES +int32 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ + VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) + +int32 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ + VMS_ext_impl__record_time_point_into_new_probe( nameStr ) + + +int32 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); +#define VMS__create_single_interval_probe( nameStr, animPr ) \ + VMS_impl__create_single_interval_probe( nameStr, animPr ) + + +int32 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, + float64 binWidth, char *nameStr, VirtProcr *animPr ); +#define VMS__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animPr ) \ + VMS_impl__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animPr ) +void +VMS_impl__free_probe( IntervalProbe *probe ); +#define VMS__free_probe( probe ) \ + VMS_impl__free_probe( probe ) + +void +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); +#define VMS__index_probe_by_its_name( probeID, animPr ) \ + VMS_impl__index_probe_by_its_name( probeID, animPr ) + +IntervalProbe * +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); +#define VMS__get_probe_by_name( probeID, animPr ) \ + VMS_impl__get_probe_by_name( probeName, animPr ) + +void +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ + VMS_impl__record_sched_choice_into_probe( probeID, animPr ) + +void +VMS_impl__record_interval_start_in_probe( int32 probeID ); +#define VMS__record_interval_start_in_probe( probeID ) \ + VMS_impl__record_interval_start_in_probe( probeID ) + +void +VMS_impl__record_interval_end_in_probe( int32 probeID ); +#define VMS__record_interval_end_in_probe( probeID ) \ + VMS_impl__record_interval_end_in_probe( probeID ) + +void +VMS_impl__print_stats_of_probe( int32 probeID ); +#define VMS__print_stats_of_probe( probeID ) \ + VMS_impl__print_stats_of_probe( probeID ) + +void +VMS_impl__print_stats_of_all_probes(); +#define VMS__print_stats_of_all_probes() \ + VMS_impl__print_stats_of_all_probes() + + +#else +int32 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ + 0 /* do nothing */ + +int32 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ + 0 /* do nothing */ + + +int32 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); +#define VMS__create_single_interval_probe( nameStr, animPr ) \ + 0 /* do nothing */ + + +int32 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, + float64 binWidth, char *nameStr, VirtProcr *animPr ); +#define VMS__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animPr ) \ + 0 /* do nothing */ + +void +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); +#define VMS__index_probe_by_its_name( probeID, animPr ) \ + /* do nothing */ + +IntervalProbe * +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); +#define VMS__get_probe_by_name( probeID, animPr ) \ + NULL /* do nothing */ + +void +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ + /* do nothing */ + +void +VMS_impl__record_interval_start_in_probe( int32 probeID ); +#define VMS__record_interval_start_in_probe( probeID ) \ + /* do nothing */ + +void +VMS_impl__record_interval_end_in_probe( int32 probeID ); +#define VMS__record_interval_end_in_probe( probeID ) \ + /* do nothing */ + +inline void doNothing(); +void +VMS_impl__print_stats_of_probe( int32 probeID ); +#define VMS__print_stats_of_probe( probeID ) \ + doNothing/* do nothing */ + +void +VMS_impl__print_stats_of_all_probes(); +#define VMS__print_stats_of_all_probes \ + doNothing/* do nothing */ + +#endif /* defined STATS__ENABLE_PROBES */ + +#endif /* _PROBES_H */ + diff -r 3bd35fc83c61 -r 7523ee70d66c vmalloc.c --- a/vmalloc.c Wed Jan 04 16:40:10 2012 +0100 +++ b/vmalloc.c Fri Jan 06 18:55:05 2012 +0100 @@ -1,495 +1,495 @@ -/* - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - * Created on November 14, 2009, 9:07 PM - */ - -#include -#include -#include -#include - -#include "VMS.h" -#include "Histogram/Histogram.h" - -/*Helper function - *Insert a newly generated free chunk into the first spot on the free list. - * The chunk is cast as a MallocProlog, so the various pointers in it are - * accessed with C's help -- and the size of the prolog is easily added to - * the pointer when a chunk is returned to the app -- so C handles changes - * in pointer sizes among machines. - * - *The list head is a normal MallocProlog struct -- identified by its - * prevChunkInFreeList being NULL -- the only one. - * - *The end of the list is identified by next chunk being NULL, as usual. - */ -void inline -add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) - { - chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; - if( chunk->nextChunkInFreeList != NULL ) //if not last in free list - chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; - chunk->prevChunkInFreeList = listHead; - listHead->nextChunkInFreeList = chunk; - } - - -/*This is sequential code, meant to only be called from the Master, not from - * any slave VPs. - *Search down list, checking size by the nextHigherInMem pointer, to find - * first chunk bigger than size needed. - *Shave off the extra and make it into a new free-list element, hook it in - * then return the address of the found element plus size of prolog. - * - *Will find a - */ -void *VMS__malloc( size_t sizeRequested ) - { MallocProlog *foundElem = NULL, *currElem, *newElem; - ssize_t amountExtra, sizeConsumed,sizeOfFound; - uint32 foundElemIsTopOfHeap; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - int32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //======================================================================== - - //step up the size to be aligned at 16-byte boundary, prob better ways - sizeRequested = (sizeRequested + 16) & ~15; - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; - - while( currElem != NULL ) - { //check if size of currElem is big enough - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); - if( amountExtra > 0 ) - { //found it, get out of loop - foundElem = currElem; - currElem = NULL; - } - else - currElem = currElem->nextChunkInFreeList; - } - - if( foundElem == NULL ) - { ERROR("\nmalloc failed\n") - return (void *)NULL; //indicates malloc failed - } - //Using a kludge to identify the element that is the top chunk in the - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and - // save addr of start of heap in head's nextLowerInMem - //Will handle top of Heap specially - foundElemIsTopOfHeap = foundElem->nextHigherInMem == - _VMSMasterEnv->freeListHead->nextHigherInMem; - - //before shave off and try to insert new elem, remove found elem - //note, foundElem will never be the head, so always has valid prevChunk - foundElem->prevChunkInFreeList->nextChunkInFreeList = - foundElem->nextChunkInFreeList; - if( foundElem->nextChunkInFreeList != NULL ) - { foundElem->nextChunkInFreeList->prevChunkInFreeList = - foundElem->prevChunkInFreeList; - } - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated - - //if enough, turn extra into new elem & insert it - if( amountExtra > 64 ) - { //make new elem by adding to addr of curr elem then casting - sizeConsumed = sizeof(MallocProlog) + sizeRequested; - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); - newElem->nextLowerInMem = foundElem; //This is evil (but why?) - newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) - foundElem->nextHigherInMem = newElem; - if( ! foundElemIsTopOfHeap ) - { //there is no next higher for top of heap, so can't write to it - newElem->nextHigherInMem->nextLowerInMem = newElem; - } - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); - } - else - { - sizeConsumed = sizeOfFound; - } - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); - #endif - //======================================================================== - - //skip over the prolog by adding its size to the pointer return - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); - } - -/*This is sequential code, meant to only be called from the Master, not from - * any slave VPs. - *Search down list, checking size by the nextHigherInMem pointer, to find - * first chunk bigger than size needed. - *Shave off the extra and make it into a new free-list element, hook it in - * then return the address of the found element plus size of prolog. - * - * The difference to the regular malloc is, that all the allocated chunks are - * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk - * before the aligned chunk. - */ -void *VMS__malloc_aligned( size_t sizeRequested ) - { MallocProlog *foundElem = NULL, *currElem, *newElem; - ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; - uint32 foundElemIsTopOfHeap; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - uint32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //======================================================================== - - //step up the size to be multiple of the cache line size - sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1); - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; - - while( currElem != NULL ) - { //check if size of currElem is big enough - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); - if( amountExtra > 0 ) - { - //look if the found element is already aligned - if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){ - //found it, get out of loop - foundElem = currElem; - break; - }else{ - //find first aligned address and check if it's still big enough - //check also if the space before the aligned address is big enough - //for a new element - void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1))); - prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; - sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); - amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); - if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ - //found suitable element - //create new previous element and exit loop - MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; - - //insert new element into free list - if(currElem->nextChunkInFreeList != NULL) - currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; - newAlignedElem->prevChunkInFreeList = currElem; - newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; - currElem->nextChunkInFreeList = newAlignedElem; - - //set higherInMem and lowerInMem - newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; - foundElemIsTopOfHeap = currElem->nextHigherInMem == - _VMSMasterEnv->freeListHead->nextHigherInMem; - if(!foundElemIsTopOfHeap) - currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; - currElem->nextHigherInMem = newAlignedElem; - newAlignedElem->nextLowerInMem = currElem; - - //Found new element leaving loop - foundElem = newAlignedElem; - break; - } - } - - } - currElem = currElem->nextChunkInFreeList; - } - - if( foundElem == NULL ) - { ERROR("\nmalloc failed\n") - return (void *)NULL; //indicates malloc failed - } - //Using a kludge to identify the element that is the top chunk in the - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and - // save addr of start of heap in head's nextLowerInMem - //Will handle top of Heap specially - foundElemIsTopOfHeap = foundElem->nextHigherInMem == - _VMSMasterEnv->freeListHead->nextHigherInMem; - - //before shave off and try to insert new elem, remove found elem - //note, foundElem will never be the head, so always has valid prevChunk - foundElem->prevChunkInFreeList->nextChunkInFreeList = - foundElem->nextChunkInFreeList; - if( foundElem->nextChunkInFreeList != NULL ) - { foundElem->nextChunkInFreeList->prevChunkInFreeList = - foundElem->prevChunkInFreeList; - } - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated - - //if enough, turn extra into new elem & insert it - if( amountExtra > 64 ) - { //make new elem by adding to addr of curr elem then casting - sizeConsumed = sizeof(MallocProlog) + sizeRequested; - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); - newElem->nextHigherInMem = foundElem->nextHigherInMem; - newElem->nextLowerInMem = foundElem; - foundElem->nextHigherInMem = newElem; - - if( ! foundElemIsTopOfHeap ) - { //there is no next higher for top of heap, so can't write to it - newElem->nextHigherInMem->nextLowerInMem = newElem; - } - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); - } - else - { - sizeConsumed = sizeOfFound; - } - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); - #endif - //======================================================================== - - //skip over the prolog by adding its size to the pointer return - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); - } - - -/*This is sequential code -- only to be called from the Master - * When free, subtract the size of prolog from pointer, then cast it to a - * MallocProlog. Then check the nextLower and nextHigher chunks to see if - * one or both are also free, and coalesce if so, and if neither free, then - * add this one to free-list. - */ -void -VMS__free( void *ptrToFree ) - { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; - size_t sizeOfElem; - uint32 lowerExistsAndIsFree, higherExistsAndIsFree; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - int32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //======================================================================== - - if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || - ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) - { //outside the range of data owned by VMS's malloc, so do nothing - return; - } - //subtract size of prolog to get pointer to prolog, then cast - elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog)); - sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree); - - if( elemToFree->prevChunkInFreeList != NULL ) - { printf( "error: freeing same element twice!" ); exit(1); - } - - _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; - - nextLowerElem = elemToFree->nextLowerInMem; - nextHigherElem = elemToFree->nextHigherInMem; - - if( nextHigherElem == NULL ) - higherExistsAndIsFree = FALSE; - else //okay exists, now check if in the free-list by checking back ptr - higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); - - if( nextLowerElem == NULL ) - lowerExistsAndIsFree = FALSE; - else //okay, it exists, now check if it's free - lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); - - - //now, know what exists and what's free - if( lowerExistsAndIsFree ) - { if( higherExistsAndIsFree ) - { //both exist and are free, so coalesce all three - //First, remove higher from free-list - nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = - nextHigherElem->nextChunkInFreeList; - if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? - nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = - nextHigherElem->prevChunkInFreeList; - //Now, fix-up sequence-in-mem list -- by side-effect, this also - // changes size of the lower elem, which is still in free-list - nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; - if( nextHigherElem->nextHigherInMem != - _VMSMasterEnv->freeListHead->nextHigherInMem ) - nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; - //notice didn't do anything to elemToFree -- it simply is no - // longer reachable from any of the lists. Wonder if could be a - // security leak because left valid addresses in it, - // but don't care for now. - } - else - { //lower is the only of the two that exists and is free, - //In this case, no adjustment to free-list, just change mem-list. - // By side-effect, changes size of the lower elem - nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; - if( elemToFree->nextHigherInMem != - _VMSMasterEnv->freeListHead->nextHigherInMem ) - elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; - } - } - else - { //lower either doesn't exist or isn't free, so check higher - if( higherExistsAndIsFree ) - { //higher exists and is the only of the two free - //First, in free-list, replace higher elem with the one to free - elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; - elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; - elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; - //Now chg mem-list. By side-effect, changes size of elemToFree - elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; - if( elemToFree->nextHigherInMem != - _VMSMasterEnv->freeListHead->nextHigherInMem ) - elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; - } - else - { //neither lower nor higher is availabe to coalesce so add to list - // this makes prev chunk ptr non-null, which indicates it's free - elemToFree->nextChunkInFreeList = - _VMSMasterEnv->freeListHead->nextChunkInFreeList; - _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; - elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; - } - } - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist ); - #endif - //======================================================================== - - } - - -/*Allocates memory from the external system -- higher overhead - * - *Because of Linux's malloc throwing bizarre random faults when malloc is - * used inside a VMS virtual processor, have to pass this as a request and - * have the core loop do it when it gets around to it -- will look for these - * chores leftover from the previous animation of masterVP the next time it - * goes to animate the masterVP -- so it takes two separate masterVP - * animations, separated by work, to complete an external malloc or - * external free request. - * - *Thinking core loop accepts signals -- just looks if signal-location is - * empty or not -- - */ -void * -VMS__malloc_in_ext( size_t sizeRequested ) - { - /* - //This is running in the master, so no chance for multiple cores to be - // competing for the core's flag. - if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) - { //something has already signalled to core loop, so save the signal - // and look, next time master animated, to see if can send it. - //Note, the addr to put a signal is in the coreloop's frame, so just - // checks it each time through -- make it volatile to avoid GCC - // optimizations -- it's a coreloop local var that only changes - // after jumping away. The signal includes the addr to send the - //return to -- even if just empty return completion-signal - // - //save the signal in some queue that the master looks at each time - // it starts up -- one loc says if empty for fast common case -- - //something like that -- want to hide this inside this call -- but - // think this has to come as a request -- req handler gives procr - // back to master loop, which gives it back to req handler at point - // it sees that core loop has sent return signal. Something like - // that. - saveTheSignal - - } - coreSigData->type = malloc; - coreSigData->sizeToMalloc = sizeRequested; - coreSigData->locToSignalCompletion = &figureOut; - _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; - */ - //just risk system-stack faults until get this figured out - return malloc( sizeRequested ); - } - - -/*Frees memory that was allocated in the external system -- higher overhead - * - *As noted in external malloc comment, this is clunky 'cause the free has - * to be called in the core loop. - */ -void -VMS__free_in_ext( void *ptrToFree ) - { - //just risk system-stack faults until get this figured out - free( ptrToFree ); - - //TODO: fix this -- so - } - - -/*Designed to be called from the main thread outside of VMS, during init - */ -MallocProlog * -VMS_ext__create_free_list() - { MallocProlog *freeListHead, *firstChunk; - - //Note, this is running in the main thread -- all increases in malloc - // mem and all frees of it must be done in this thread, with the - // thread's original stack available - freeListHead = malloc( sizeof(MallocProlog) ); - firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); - if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} - - //Touch memory to avoid page faults - void *ptr,*endPtr; - endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE; - for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE) - { - *(char*)ptr = 0; - } - - freeListHead->prevChunkInFreeList = NULL; - //Use this addr to free the heap when cleanup - freeListHead->nextLowerInMem = firstChunk; - //to identify top-of-heap elem, compare this addr to elem's next higher - freeListHead->nextHigherInMem = (void*)( (uintptr_t)firstChunk + - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); - freeListHead->nextChunkInFreeList = firstChunk; - - firstChunk->nextChunkInFreeList = NULL; - firstChunk->prevChunkInFreeList = freeListHead; - //next Higher has to be set to top of chunk, so can calc size in malloc - firstChunk->nextHigherInMem = (void*)( (uintptr_t)firstChunk + - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); - firstChunk->nextLowerInMem = NULL; //identifies as bott of heap - - _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet - - return freeListHead; - } - - -/*Designed to be called from the main thread outside of VMS, during cleanup - */ -void -VMS_ext__free_free_list( MallocProlog *freeListHead ) - { - //stashed a ptr to the one and only bug chunk malloc'd from OS in the - // free list head's next lower in mem pointer - free( freeListHead->nextLowerInMem ); - - //don't free the head -- it'll be in an array eventually -- free whole - // array when all the free lists linked from it have already been freed - } - +/* + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + * Created on November 14, 2009, 9:07 PM + */ + +#include +#include +#include +#include + +#include "VMS.h" +#include "Histogram/Histogram.h" + +/*Helper function + *Insert a newly generated free chunk into the first spot on the free list. + * The chunk is cast as a MallocProlog, so the various pointers in it are + * accessed with C's help -- and the size of the prolog is easily added to + * the pointer when a chunk is returned to the app -- so C handles changes + * in pointer sizes among machines. + * + *The list head is a normal MallocProlog struct -- identified by its + * prevChunkInFreeList being NULL -- the only one. + * + *The end of the list is identified by next chunk being NULL, as usual. + */ +void inline +add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) + { + chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; + if( chunk->nextChunkInFreeList != NULL ) //if not last in free list + chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; + chunk->prevChunkInFreeList = listHead; + listHead->nextChunkInFreeList = chunk; + } + + +/*This is sequential code, meant to only be called from the Master, not from + * any slave VPs. + *Search down list, checking size by the nextHigherInMem pointer, to find + * first chunk bigger than size needed. + *Shave off the extra and make it into a new free-list element, hook it in + * then return the address of the found element plus size of prolog. + * + *Will find a + */ +void *VMS__malloc( size_t sizeRequested ) + { MallocProlog *foundElem = NULL, *currElem, *newElem; + ssize_t amountExtra, sizeConsumed,sizeOfFound; + uint32 foundElemIsTopOfHeap; + + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + int32 startStamp, endStamp; + saveLowTimeStampCountInto( startStamp ); + #endif + //======================================================================== + + //step up the size to be aligned at 16-byte boundary, prob better ways + sizeRequested = (sizeRequested + 16) & ~15; + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; + + while( currElem != NULL ) + { //check if size of currElem is big enough + sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); + if( amountExtra > 0 ) + { //found it, get out of loop + foundElem = currElem; + currElem = NULL; + } + else + currElem = currElem->nextChunkInFreeList; + } + + if( foundElem == NULL ) + { ERROR("\nmalloc failed\n") + return (void *)NULL; //indicates malloc failed + } + //Using a kludge to identify the element that is the top chunk in the + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and + // save addr of start of heap in head's nextLowerInMem + //Will handle top of Heap specially + foundElemIsTopOfHeap = foundElem->nextHigherInMem == + _VMSMasterEnv->freeListHead->nextHigherInMem; + + //before shave off and try to insert new elem, remove found elem + //note, foundElem will never be the head, so always has valid prevChunk + foundElem->prevChunkInFreeList->nextChunkInFreeList = + foundElem->nextChunkInFreeList; + if( foundElem->nextChunkInFreeList != NULL ) + { foundElem->nextChunkInFreeList->prevChunkInFreeList = + foundElem->prevChunkInFreeList; + } + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated + + //if enough, turn extra into new elem & insert it + if( amountExtra > 64 ) + { //make new elem by adding to addr of curr elem then casting + sizeConsumed = sizeof(MallocProlog) + sizeRequested; + newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); + newElem->nextLowerInMem = foundElem; //This is evil (but why?) + newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) + foundElem->nextHigherInMem = newElem; + if( ! foundElemIsTopOfHeap ) + { //there is no next higher for top of heap, so can't write to it + newElem->nextHigherInMem->nextLowerInMem = newElem; + } + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); + } + else + { + sizeConsumed = sizeOfFound; + } + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; + + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + saveLowTimeStampCountInto( endStamp ); + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); + #endif + //======================================================================== + + //skip over the prolog by adding its size to the pointer return + return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); + } + +/*This is sequential code, meant to only be called from the Master, not from + * any slave VPs. + *Search down list, checking size by the nextHigherInMem pointer, to find + * first chunk bigger than size needed. + *Shave off the extra and make it into a new free-list element, hook it in + * then return the address of the found element plus size of prolog. + * + * The difference to the regular malloc is, that all the allocated chunks are + * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk + * before the aligned chunk. + */ +void *VMS__malloc_aligned( size_t sizeRequested ) + { MallocProlog *foundElem = NULL, *currElem, *newElem; + ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; + uint32 foundElemIsTopOfHeap; + + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + uint32 startStamp, endStamp; + saveLowTimeStampCountInto( startStamp ); + #endif + //======================================================================== + + //step up the size to be multiple of the cache line size + sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1); + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; + + while( currElem != NULL ) + { //check if size of currElem is big enough + sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); + if( amountExtra > 0 ) + { + //look if the found element is already aligned + if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){ + //found it, get out of loop + foundElem = currElem; + break; + }else{ + //find first aligned address and check if it's still big enough + //check also if the space before the aligned address is big enough + //for a new element + void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1))); + prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; + sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); + amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); + if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ + //found suitable element + //create new previous element and exit loop + MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; + + //insert new element into free list + if(currElem->nextChunkInFreeList != NULL) + currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; + newAlignedElem->prevChunkInFreeList = currElem; + newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; + currElem->nextChunkInFreeList = newAlignedElem; + + //set higherInMem and lowerInMem + newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; + foundElemIsTopOfHeap = currElem->nextHigherInMem == + _VMSMasterEnv->freeListHead->nextHigherInMem; + if(!foundElemIsTopOfHeap) + currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; + currElem->nextHigherInMem = newAlignedElem; + newAlignedElem->nextLowerInMem = currElem; + + //Found new element leaving loop + foundElem = newAlignedElem; + break; + } + } + + } + currElem = currElem->nextChunkInFreeList; + } + + if( foundElem == NULL ) + { ERROR("\nmalloc failed\n") + return (void *)NULL; //indicates malloc failed + } + //Using a kludge to identify the element that is the top chunk in the + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and + // save addr of start of heap in head's nextLowerInMem + //Will handle top of Heap specially + foundElemIsTopOfHeap = foundElem->nextHigherInMem == + _VMSMasterEnv->freeListHead->nextHigherInMem; + + //before shave off and try to insert new elem, remove found elem + //note, foundElem will never be the head, so always has valid prevChunk + foundElem->prevChunkInFreeList->nextChunkInFreeList = + foundElem->nextChunkInFreeList; + if( foundElem->nextChunkInFreeList != NULL ) + { foundElem->nextChunkInFreeList->prevChunkInFreeList = + foundElem->prevChunkInFreeList; + } + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated + + //if enough, turn extra into new elem & insert it + if( amountExtra > 64 ) + { //make new elem by adding to addr of curr elem then casting + sizeConsumed = sizeof(MallocProlog) + sizeRequested; + newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); + newElem->nextHigherInMem = foundElem->nextHigherInMem; + newElem->nextLowerInMem = foundElem; + foundElem->nextHigherInMem = newElem; + + if( ! foundElemIsTopOfHeap ) + { //there is no next higher for top of heap, so can't write to it + newElem->nextHigherInMem->nextLowerInMem = newElem; + } + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); + } + else + { + sizeConsumed = sizeOfFound; + } + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; + + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + saveLowTimeStampCountInto( endStamp ); + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); + #endif + //======================================================================== + + //skip over the prolog by adding its size to the pointer return + return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); + } + + +/*This is sequential code -- only to be called from the Master + * When free, subtract the size of prolog from pointer, then cast it to a + * MallocProlog. Then check the nextLower and nextHigher chunks to see if + * one or both are also free, and coalesce if so, and if neither free, then + * add this one to free-list. + */ +void +VMS__free( void *ptrToFree ) + { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; + size_t sizeOfElem; + uint32 lowerExistsAndIsFree, higherExistsAndIsFree; + + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + int32 startStamp, endStamp; + saveLowTimeStampCountInto( startStamp ); + #endif + //======================================================================== + + if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || + ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) + { //outside the range of data owned by VMS's malloc, so do nothing + return; + } + //subtract size of prolog to get pointer to prolog, then cast + elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog)); + sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree); + + if( elemToFree->prevChunkInFreeList != NULL ) + { printf( "error: freeing same element twice!" ); exit(1); + } + + _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; + + nextLowerElem = elemToFree->nextLowerInMem; + nextHigherElem = elemToFree->nextHigherInMem; + + if( nextHigherElem == NULL ) + higherExistsAndIsFree = FALSE; + else //okay exists, now check if in the free-list by checking back ptr + higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); + + if( nextLowerElem == NULL ) + lowerExistsAndIsFree = FALSE; + else //okay, it exists, now check if it's free + lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); + + + //now, know what exists and what's free + if( lowerExistsAndIsFree ) + { if( higherExistsAndIsFree ) + { //both exist and are free, so coalesce all three + //First, remove higher from free-list + nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = + nextHigherElem->nextChunkInFreeList; + if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? + nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = + nextHigherElem->prevChunkInFreeList; + //Now, fix-up sequence-in-mem list -- by side-effect, this also + // changes size of the lower elem, which is still in free-list + nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; + if( nextHigherElem->nextHigherInMem != + _VMSMasterEnv->freeListHead->nextHigherInMem ) + nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; + //notice didn't do anything to elemToFree -- it simply is no + // longer reachable from any of the lists. Wonder if could be a + // security leak because left valid addresses in it, + // but don't care for now. + } + else + { //lower is the only of the two that exists and is free, + //In this case, no adjustment to free-list, just change mem-list. + // By side-effect, changes size of the lower elem + nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; + if( elemToFree->nextHigherInMem != + _VMSMasterEnv->freeListHead->nextHigherInMem ) + elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; + } + } + else + { //lower either doesn't exist or isn't free, so check higher + if( higherExistsAndIsFree ) + { //higher exists and is the only of the two free + //First, in free-list, replace higher elem with the one to free + elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; + elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; + elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; + //Now chg mem-list. By side-effect, changes size of elemToFree + elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; + if( elemToFree->nextHigherInMem != + _VMSMasterEnv->freeListHead->nextHigherInMem ) + elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; + } + else + { //neither lower nor higher is availabe to coalesce so add to list + // this makes prev chunk ptr non-null, which indicates it's free + elemToFree->nextChunkInFreeList = + _VMSMasterEnv->freeListHead->nextChunkInFreeList; + _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; + elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; + } + } + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + saveLowTimeStampCountInto( endStamp ); + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist ); + #endif + //======================================================================== + + } + + +/*Allocates memory from the external system -- higher overhead + * + *Because of Linux's malloc throwing bizarre random faults when malloc is + * used inside a VMS virtual processor, have to pass this as a request and + * have the core loop do it when it gets around to it -- will look for these + * chores leftover from the previous animation of masterVP the next time it + * goes to animate the masterVP -- so it takes two separate masterVP + * animations, separated by work, to complete an external malloc or + * external free request. + * + *Thinking core loop accepts signals -- just looks if signal-location is + * empty or not -- + */ +void * +VMS__malloc_in_ext( size_t sizeRequested ) + { + /* + //This is running in the master, so no chance for multiple cores to be + // competing for the core's flag. + if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) + { //something has already signalled to core loop, so save the signal + // and look, next time master animated, to see if can send it. + //Note, the addr to put a signal is in the coreloop's frame, so just + // checks it each time through -- make it volatile to avoid GCC + // optimizations -- it's a coreloop local var that only changes + // after jumping away. The signal includes the addr to send the + //return to -- even if just empty return completion-signal + // + //save the signal in some queue that the master looks at each time + // it starts up -- one loc says if empty for fast common case -- + //something like that -- want to hide this inside this call -- but + // think this has to come as a request -- req handler gives procr + // back to master loop, which gives it back to req handler at point + // it sees that core loop has sent return signal. Something like + // that. + saveTheSignal + + } + coreSigData->type = malloc; + coreSigData->sizeToMalloc = sizeRequested; + coreSigData->locToSignalCompletion = &figureOut; + _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; + */ + //just risk system-stack faults until get this figured out + return malloc( sizeRequested ); + } + + +/*Frees memory that was allocated in the external system -- higher overhead + * + *As noted in external malloc comment, this is clunky 'cause the free has + * to be called in the core loop. + */ +void +VMS__free_in_ext( void *ptrToFree ) + { + //just risk system-stack faults until get this figured out + free( ptrToFree ); + + //TODO: fix this -- so + } + + +/*Designed to be called from the main thread outside of VMS, during init + */ +MallocProlog * +VMS_ext__create_free_list() + { MallocProlog *freeListHead, *firstChunk; + + //Note, this is running in the main thread -- all increases in malloc + // mem and all frees of it must be done in this thread, with the + // thread's original stack available + freeListHead = malloc( sizeof(MallocProlog) ); + firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); + if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} + + //Touch memory to avoid page faults + void *ptr,*endPtr; + endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE; + for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE) + { + *(char*)ptr = 0; + } + + freeListHead->prevChunkInFreeList = NULL; + //Use this addr to free the heap when cleanup + freeListHead->nextLowerInMem = firstChunk; + //to identify top-of-heap elem, compare this addr to elem's next higher + freeListHead->nextHigherInMem = (void*)( (uintptr_t)firstChunk + + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); + freeListHead->nextChunkInFreeList = firstChunk; + + firstChunk->nextChunkInFreeList = NULL; + firstChunk->prevChunkInFreeList = freeListHead; + //next Higher has to be set to top of chunk, so can calc size in malloc + firstChunk->nextHigherInMem = (void*)( (uintptr_t)firstChunk + + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); + firstChunk->nextLowerInMem = NULL; //identifies as bott of heap + + _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet + + return freeListHead; + } + + +/*Designed to be called from the main thread outside of VMS, during cleanup + */ +void +VMS_ext__free_free_list( MallocProlog *freeListHead ) + { + //stashed a ptr to the one and only bug chunk malloc'd from OS in the + // free list head's next lower in mem pointer + free( freeListHead->nextLowerInMem ); + + //don't free the head -- it'll be in an array eventually -- free whole + // array when all the free lists linked from it have already been freed + } + diff -r 3bd35fc83c61 -r 7523ee70d66c vmalloc.h --- a/vmalloc.h Wed Jan 04 16:40:10 2012 +0100 +++ b/vmalloc.h Fri Jan 06 18:55:05 2012 +0100 @@ -1,61 +1,61 @@ -/* - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - * Created on November 14, 2009, 9:07 PM - */ - -#ifndef _VMALLOC_H -#define _VMALLOC_H - -#include -#include -#include "VMS_primitive_data_types.h" - -typedef struct _MallocProlog MallocProlog; - -struct _MallocProlog - { - MallocProlog *nextChunkInFreeList; - MallocProlog *prevChunkInFreeList; - MallocProlog *nextHigherInMem; - MallocProlog *nextLowerInMem; - }; -//MallocProlog - -typedef struct - { - MallocProlog *firstChunkInFreeList; - int32 numInList; //TODO not used - } -FreeListHead; - -void * -VMS__malloc( size_t sizeRequested ); - -void * -VMS__malloc_aligned( size_t sizeRequested ); - -void -VMS__free( void *ptrToFree ); - -/*Allocates memory from the external system -- higher overhead - */ -void * -VMS__malloc_in_ext( size_t sizeRequested ); - -/*Frees memory that was allocated in the external system -- higher overhead - */ -void -VMS__free_in_ext( void *ptrToFree ); - - -MallocProlog * -VMS_ext__create_free_list(); - -void -VMS_ext__free_free_list( MallocProlog *freeListHead ); - +/* + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + * Created on November 14, 2009, 9:07 PM + */ + +#ifndef _VMALLOC_H +#define _VMALLOC_H + +#include +#include +#include "VMS_primitive_data_types.h" + +typedef struct _MallocProlog MallocProlog; + +struct _MallocProlog + { + MallocProlog *nextChunkInFreeList; + MallocProlog *prevChunkInFreeList; + MallocProlog *nextHigherInMem; + MallocProlog *nextLowerInMem; + }; +//MallocProlog + +typedef struct + { + MallocProlog *firstChunkInFreeList; + int32 numInList; //TODO not used + } +FreeListHead; + +void * +VMS__malloc( size_t sizeRequested ); + +void * +VMS__malloc_aligned( size_t sizeRequested ); + +void +VMS__free( void *ptrToFree ); + +/*Allocates memory from the external system -- higher overhead + */ +void * +VMS__malloc_in_ext( size_t sizeRequested ); + +/*Frees memory that was allocated in the external system -- higher overhead + */ +void +VMS__free_in_ext( void *ptrToFree ); + + +MallocProlog * +VMS_ext__create_free_list(); + +void +VMS_ext__free_free_list( MallocProlog *freeListHead ); + #endif \ No newline at end of file diff -r 3bd35fc83c61 -r 7523ee70d66c vutilities.c --- a/vutilities.c Wed Jan 04 16:40:10 2012 +0100 +++ b/vutilities.c Fri Jan 06 18:55:05 2012 +0100 @@ -1,25 +1,25 @@ -/* - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - * Created on November 14, 2009, 9:07 PM - */ - -#include -#include - -#include "VMS.h" - - -inline char * -VMS__strDup( char *str ) - { char *retStr; - - retStr = VMS__malloc( strlen(str) + 1 ); - if( str == NULL ) return str; - strcpy( retStr, str ); - - return retStr; - } +/* + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + * Created on November 14, 2009, 9:07 PM + */ + +#include +#include + +#include "VMS.h" + + +inline char * +VMS__strDup( char *str ) + { char *retStr; + + retStr = VMS__malloc( strlen(str) + 1 ); + if( str == NULL ) return str; + strcpy( retStr, str ); + + return retStr; + } diff -r 3bd35fc83c61 -r 7523ee70d66c vutilities.h --- a/vutilities.h Wed Jan 04 16:40:10 2012 +0100 +++ b/vutilities.h Fri Jan 06 18:55:05 2012 +0100 @@ -1,20 +1,20 @@ -/* - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - * Created on November 14, 2009, 9:07 PM - */ - - -#ifndef _UTILITIES_H -#define _UTILITIES_H - -#include -#include "VMS_primitive_data_types.h" - -inline char * -VMS__strDup( char *str ); - -#endif +/* + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + * Created on November 14, 2009, 9:07 PM + */ + + +#ifndef _UTILITIES_H +#define _UTILITIES_H + +#include +#include "VMS_primitive_data_types.h" + +inline char * +VMS__strDup( char *str ); + +#endif