# HG changeset patch # User Merten Sach # Date 1309801543 -7200 # Node ID 521c75d64cefc56351e2bd42530f80b575131c52 # Parent fe5ec83f1bafb4db5ff5b7178c6d7192a62e51b1 Version before optimization diff -r fe5ec83f1baf -r 521c75d64cef ProcrContext.c --- a/ProcrContext.c Wed Jun 22 16:12:27 2011 +0200 +++ b/ProcrContext.c Mon Jul 04 19:45:43 2011 +0200 @@ -54,10 +54,10 @@ //============================= MEASUREMENT STUFF ======================== #ifdef STATS__TURN_ON_PROBES - struct timeval timeStamp; - gettimeofday( &(timeStamp), NULL); - newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0) - - _VMSMasterEnv->createPtInSecs; + //struct timeval timeStamp; + //gettimeofday( &(timeStamp), NULL); + //newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0) - + // _VMSMasterEnv->createPtInSecs; #endif //======================================================================== diff -r fe5ec83f1baf -r 521c75d64cef ProcrContext.h --- a/ProcrContext.h Wed Jun 22 16:12:27 2011 +0200 +++ b/ProcrContext.h Mon Jul 04 19:45:43 2011 +0200 @@ -20,6 +20,8 @@ void startVirtProcrFn(); +void *asmTerminateCoreLoop(VirtProcr *currPr); + #define flushRegisters() \ asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") diff -r fe5ec83f1baf -r 521c75d64cef VMS.c --- a/VMS.c Wed Jun 22 16:12:27 2011 +0200 +++ b/VMS.c Mon Jul 04 19:45:43 2011 +0200 @@ -110,7 +110,7 @@ //============================= MEASUREMENT STUFF ======================== #ifdef MEAS__TIME_MALLOC - _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 50, 0, 100, + _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 100, 0, 100, "malloc time hist"); _VMSMasterEnv->freeTimeHist = makeFixedBinHistExt( 50, 0, 100, "free time hist"); @@ -671,13 +671,15 @@ void VMS__cleanup_at_end_of_shutdown() { - VMSQueueStruc **readyToAnimateQs; - int coreIdx; - VirtProcr **masterVPs; - SchedSlot ***allSchedSlots; //ptr to array of ptrs + //unused + //VMSQueueStruc **readyToAnimateQs; + //int coreIdx; + //VirtProcr **masterVPs; + //SchedSlot ***allSchedSlots; //ptr to array of ptrs //Before getting rid of everything, print out any measurements made forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist ); + forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile); //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHistExt ); #ifdef MEAS__TIME_PLUGIN printHist( _VMSMasterEnv->reqHdlrLowTimeHist ); diff -r fe5ec83f1baf -r 521c75d64cef VMS.h --- a/VMS.h Wed Jun 22 16:12:27 2011 +0200 +++ b/VMS.h Mon Jul 04 19:45:43 2011 +0200 @@ -87,11 +87,13 @@ #define MASTERLOCK_RETRIES 10000 // stack size in virtual processors created -#define VIRT_PROCR_STACK_SIZE 0x4000 /* 16K */ +#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */ // memory for VMS__malloc #define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */ +#define CACHE_LINE 64 + //============================== @@ -371,6 +373,10 @@ inline void * VMS__take_sem_reqst_from( VMSReqst *req ); +void inline +VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, + ResumePrFnPtr resumePrFnPtr ); + //======================== STATS ====================== //===== RDTSC wrapper ===== //Also runs with x86_64 code @@ -413,11 +419,11 @@ #define MakeTheMeasHists() \ _VMSMasterEnv->measHistsInfo = \ makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ - makeAMeasHist( createHistIdx, "Create", 50, 0, 100 ) \ - makeAMeasHist( mutexLockHistIdx, "mutex lock", 50, 0, 100 ) \ - makeAMeasHist( mutexUnlockHistIdx, "mutex unlock", 50, 0, 100 ) \ - makeAMeasHist( condWaitHistIdx, "cond wait", 50, 0, 100 ) \ - makeAMeasHist( condSignalHistIdx, "cond signal", 50, 0, 100 ) + makeAMeasHist( createHistIdx, "create", 50, 0, 1000 ) \ + makeAMeasHist( mutexLockHistIdx, "mutex_lock", 50, 0, 100 ) \ + makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock", 50, 0, 100 ) \ + makeAMeasHist( condWaitHistIdx, "cond_wait", 50, 0, 100 ) \ + makeAMeasHist( condSignalHistIdx, "cond_signal", 50, 0, 100 ) #endif diff -r fe5ec83f1baf -r 521c75d64cef probes.h --- a/probes.h Wed Jun 22 16:12:27 2011 +0200 +++ b/probes.h Mon Jul 04 19:45:43 2011 +0200 @@ -124,8 +124,8 @@ void VMS_impl__print_stats_of_all_probes(); -#define VMS__print_stats_of_all_probes \ - VMS_impl__print_stats_of_all_probes +#define VMS__print_stats_of_all_probes() \ + VMS_impl__print_stats_of_all_probes() #else diff -r fe5ec83f1baf -r 521c75d64cef vmalloc.c --- a/vmalloc.c Wed Jun 22 16:12:27 2011 +0200 +++ b/vmalloc.c Mon Jul 04 19:45:43 2011 +0200 @@ -60,7 +60,7 @@ //======================================================================== //step up the size to be aligned at 16-byte boundary, prob better ways - sizeRequested = ((sizeRequested + 16) >> 4) << 4; + sizeRequested = (sizeRequested + 16) & ~15; currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; while( currElem != NULL ) @@ -75,6 +75,135 @@ else currElem = currElem->nextChunkInFreeList; } + + if( foundElem == NULL ) + { ERROR("\nmalloc failed\n") + return (void *)NULL; //indicates malloc failed + } + //Using a kludge to identify the element that is the top chunk in the + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and + // save addr of start of heap in head's nextLowerInMem + //Will handle top of Heap specially + foundElemIsTopOfHeap = foundElem->nextHigherInMem == + _VMSMasterEnv->freeListHead->nextHigherInMem; + + //before shave off and try to insert new elem, remove found elem + //note, foundElem will never be the head, so always has valid prevChunk + foundElem->prevChunkInFreeList->nextChunkInFreeList = + foundElem->nextChunkInFreeList; + if( foundElem->nextChunkInFreeList != NULL ) + { foundElem->nextChunkInFreeList->prevChunkInFreeList = + foundElem->prevChunkInFreeList; + } + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated + + //if enough, turn extra into new elem & insert it + if( amountExtra > 64 ) + { //make new elem by adding to addr of curr elem then casting + sizeConsumed = sizeof(MallocProlog) + sizeRequested; + newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); + newElem->nextLowerInMem = foundElem; //This is evil (but why?) + newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) + foundElem->nextHigherInMem = newElem; + if( ! foundElemIsTopOfHeap ) + { //there is no next higher for top of heap, so can't write to it + newElem->nextHigherInMem->nextLowerInMem = newElem; + } + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); + } + else + { + sizeConsumed = sizeOfFound; + } + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; + + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + saveLowTimeStampCountInto( endStamp ); + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); + #endif + //======================================================================== + + //skip over the prolog by adding its size to the pointer return + return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); + } + +/*This is sequential code, meant to only be called from the Master, not from + * any slave VPs. + *Search down list, checking size by the nextHigherInMem pointer, to find + * first chunk bigger than size needed. + *Shave off the extra and make it into a new free-list element, hook it in + * then return the address of the found element plus size of prolog. + * + * The difference to the regular malloc is, that all the allocated chunks are + * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk + * before the aligned chunk. + */ +void *VMS__malloc_aligned( size_t sizeRequested ) + { MallocProlog *foundElem = NULL, *currElem, *newElem; + ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; + uint32 foundElemIsTopOfHeap; + + //============================= MEASUREMENT STUFF ======================== + #ifdef MEAS__TIME_MALLOC + uint32 startStamp, endStamp; + saveLowTimeStampCountInto( startStamp ); + #endif + //======================================================================== + + //step up the size to be multiple of the cache line size + sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1); + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; + + while( currElem != NULL ) + { //check if size of currElem is big enough + sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); + if( amountExtra > 0 ) + { + //look if the found element is already aligned + if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){ + //found it, get out of loop + foundElem = currElem; + break; + }else{ + //find first aligned address and check if it's still big enough + //check also if the space before the aligned address is big enough + //for a new element + void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1))); + prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; + sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); + amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); + if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ + //found suitable element + //create new previous element and exit loop + MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; + + //insert new element into free list + if(currElem->nextChunkInFreeList != NULL) + currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; + newAlignedElem->prevChunkInFreeList = currElem; + newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; + currElem->nextChunkInFreeList = newAlignedElem; + + //set higherInMem and lowerInMem + newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; + foundElemIsTopOfHeap = currElem->nextHigherInMem == + _VMSMasterEnv->freeListHead->nextHigherInMem; + if(!foundElemIsTopOfHeap) + currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; + currElem->nextHigherInMem = newAlignedElem; + newAlignedElem->nextLowerInMem = currElem; + + //Found new element leaving loop + foundElem = newAlignedElem; + break; + } + } + + } + currElem = currElem->nextChunkInFreeList; + } if( foundElem == NULL ) { ERROR("\nmalloc failed\n") diff -r fe5ec83f1baf -r 521c75d64cef vmalloc.h --- a/vmalloc.h Wed Jun 22 16:12:27 2011 +0200 +++ b/vmalloc.h Mon Jul 04 19:45:43 2011 +0200 @@ -35,6 +35,9 @@ void * VMS__malloc( size_t sizeRequested ); +void * +VMS__malloc_aligned( size_t sizeRequested ); + void VMS__free( void *ptrToFree );