# HG changeset patch # User Merten Sach # Date 1314717073 -7200 # Node ID 07e679ee209589f5816fc6557faf4fab8bc72dd6 # Parent 35547e66b971f82b72b431bdb929b8f53960900d Bit index search with simple chop off diff -r 35547e66b971 -r 07e679ee2095 vmalloc.c --- a/vmalloc.c Mon Aug 29 17:09:00 2011 +0200 +++ b/vmalloc.c Tue Aug 30 17:11:13 2011 +0200 @@ -32,15 +32,6 @@ } /* - * This calculates the size of a given container - */ -inline -size_t getContainerSize(uint32 idx) -{ - return pow(CHUNK_INCREASE_RATE,idx)*LOWER_BOUND; -} - -/* * Removes the first chunk of a freeList * The chunk is removed but not set as free. There is no check if * the free list is empty, so make sure this is not the case. @@ -87,9 +78,10 @@ } inline -void recycleSmallChunks(MallocProlog** container) +size_t getChunkSize(MallocProlog* chunk) { - //TODO recycle small chunks + return (uintptr_t)chunk->nextHigherInMem - + (uintptr_t)chunk - sizeof(MallocProlog); } /* @@ -104,13 +96,15 @@ //The last element in the list points to the container. If the container points //to NULL the container is empty - if(*((void**)(chunk->prevChunkInFreeList)) == NULL) + if(*((void**)(chunk->prevChunkInFreeList)) == NULL && getChunkSize(chunk) >= 160); { - uint32 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 6; + //Find the approppiate container because we do not know it + uint64 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 3; if(containerIdx < (uint32)64) freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); if(containerIdx < 128 && containerIdx >=64) freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64)); + } } @@ -123,6 +117,7 @@ MallocProlog *mergeChunks(MallocProlog* chunkA, MallocProlog* chunkB) { chunkA->nextHigherInMem = chunkB->nextHigherInMem; + chunkB->nextHigherInMem->nextLowerInMem = chunkA; return chunkA; } /* @@ -151,49 +146,60 @@ newChunk->nextLowerInMem = chunk; newChunk->nextHigherInMem = chunk->nextHigherInMem; + chunk->nextHigherInMem->nextLowerInMem = newChunk; chunk->nextHigherInMem = newChunk; - chunk->nextHigherInMem->nextLowerInMem = newChunk; return newChunk; } +/* + * Search for chunk in the list of big chunks. Split the block if it's too big + */ inline -size_t getChunkSize(MallocProlog* chunk) +MallocProlog *searchChunk(MallocArrays *freeLists, size_t sizeRequested, uint32 containerIdx) { - return (uintptr_t)chunk->nextHigherInMem - - (uintptr_t)chunk - sizeof(MallocProlog); + MallocProlog* foundChunk; + + uint64 searchVector = freeLists->bigChunksSearchVector[0]; + //set small chunk bits to zero + searchVector &= MAX_UINT64 << containerIdx; + containerIdx = __builtin_ffsl(searchVector); + + if(containerIdx == 0) + { + searchVector = freeLists->bigChunksSearchVector[1]; + containerIdx = __builtin_ffsl(searchVector); + if(containerIdx == 0) + { + printf("VMS malloc failed: low memory"); + exit(1); + } + containerIdx += 64; + } + containerIdx--; + + + foundChunk = removeChunk(freeLists, containerIdx); + size_t chunkSize = getChunkSize(foundChunk); + + //If the new chunk is larger than the requested size: split + if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + 160) + { + MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested); + containerIdx = getContainer(getChunkSize(foundChunk)) - 1; + insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]); + if(containerIdx < 64) + freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx); + else + freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64)); + foundChunk = newChunk; + } + + return foundChunk; } /* - * This makes 5 Chunks of the requested size. - */ -inline -void makeChunks(size_t size, MallocProlog **container) -{ - MallocArrays *freeLists = _VMSMasterEnv->freeLists; - size_t addedSize = 5*(size + sizeof(MallocProlog)); - - uint32 containerIdx = getContainer(addedSize)+4; - while(freeLists->bigChunks[containerIdx] == NULL ) - containerIdx++; - - MallocProlog *foundChunk = removeChunk(freeLists, containerIdx); - - int i; - for(i=0; i<5; i++) - { - insertChunk(divideChunk(foundChunk,size), container); - } - containerIdx = getContainer(getChunkSize(foundChunk)); - insertChunk(foundChunk, &freeLists->bigChunks[containerIdx]); - if(containerIdx < 64) - freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx); - else - freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64)); -} - -/* * This is sequential code, meant to only be called from the Master, not from * any slave VPs. */ @@ -207,67 +213,30 @@ //======================================================================== MallocArrays* freeLists = _VMSMasterEnv->freeLists; + MallocProlog* foundChunk; //Return a small chunk if the requested size is smaller than 128B if(sizeRequested <= LOWER_BOUND) { - uint32 freeListIdx = (sizeRequested-1)/32; - if(freeLists->smallChunkCount[freeListIdx] == 0) - { - makeChunks((freeListIdx+1)*32, &freeLists->smallChunks[freeListIdx]); - freeLists->smallChunkCount[freeListIdx] += 5; - } + uint32 freeListIdx = (sizeRequested-1)/SMALL_CHUNK_SIZE; + if(freeLists->smallChunks[freeListIdx] == NULL) + foundChunk = searchChunk(freeLists, SMALL_CHUNK_SIZE*(freeListIdx+1), 0); + else + foundChunk = removeSmallChunk(freeLists, freeListIdx); - freeLists->smallChunkCount[freeListIdx]--; - return removeSmallChunk(freeLists, freeListIdx) + 1; + //Mark as allocated + foundChunk->prevChunkInFreeList = NULL; + return foundChunk + 1; } //Calculate the expected container. Start one higher to have a Chunk that's //always big enough. - uint32 containerIdx = getContainer(sizeRequested) + 1; + uint32 containerIdx = getContainer(sizeRequested); - MallocProlog* foundChunk; if(freeLists->bigChunks[containerIdx] == NULL) - { - uint64 searchVector = freeLists->bigChunksSearchVector[0]; - //set small chunk bits to zero - searchVector &= MAX_UINT64 << containerIdx; - containerIdx = __builtin_ffsl(searchVector); - - if(containerIdx == 0) - { - searchVector = freeLists->bigChunksSearchVector[1]; - containerIdx = __builtin_ffsl(searchVector); - if(containerIdx == 0) - { - printf("VMS malloc failed: low memory"); - exit(1); - } - containerIdx += 64; - } - containerIdx--; - - - foundChunk = removeChunk(freeLists, containerIdx); - size_t chunkSize = getChunkSize(foundChunk); - - //If the new chunk is larger than the requested size: split - if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + LOWER_BOUND) - { - MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested); - containerIdx = getContainer(getChunkSize(foundChunk)); - insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]); - if(containerIdx < 64) - freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx); - else - freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64)); - foundChunk = newChunk; - } - } + foundChunk = searchChunk(freeLists, sizeRequested, containerIdx); else - { - foundChunk = removeChunk(freeLists, containerIdx); - } + foundChunk = removeChunk(freeLists, containerIdx); //Mark as allocated foundChunk->prevChunkInFreeList = NULL; @@ -290,7 +259,7 @@ void VMS__free( void *ptrToFree ) { - + //============================= MEASUREMENT STUFF ======================== #ifdef MEAS__TIME_MALLOC int32 startStamp, endStamp; @@ -301,50 +270,42 @@ MallocArrays* freeLists = _VMSMasterEnv->freeLists; MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1; uint32 containerIdx; - - size_t chunkSize = getChunkSize(chunkToFree); - if(chunkSize <= LOWER_BOUND) - { - containerIdx = (chunkSize-1)/32; - insertChunk(chunkToFree,&freeLists->smallChunks[containerIdx]); - freeLists->smallChunkCount[containerIdx]++; - - if(freeLists->smallChunkCount[containerIdx] > 20) - recycleSmallChunks(&freeLists->smallChunks[containerIdx]); - - return; - } //Check for free neighbors if(chunkToFree->nextLowerInMem) { if(chunkToFree->nextLowerInMem->prevChunkInFreeList != NULL) {//Chunk is not allocated - if(getChunkSize(chunkToFree->nextLowerInMem) > LOWER_BOUND) - { - extractChunk(chunkToFree->nextLowerInMem, freeLists); - chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree); - } + extractChunk(chunkToFree->nextLowerInMem, freeLists); + chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree); } } if(chunkToFree->nextHigherInMem) { if(chunkToFree->nextHigherInMem->prevChunkInFreeList != NULL) {//Chunk is not allocated - if(getChunkSize(chunkToFree->nextHigherInMem) > LOWER_BOUND) - { - extractChunk(chunkToFree->nextHigherInMem, freeLists); - chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem); - } + extractChunk(chunkToFree->nextHigherInMem, freeLists); + chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem); } } - - containerIdx = getContainer(getChunkSize(chunkToFree)); - insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]); - if(containerIdx < 64) - freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx; + + size_t chunkSize = getChunkSize(chunkToFree); + if(chunkSize < 160) + { + containerIdx = (chunkSize/SMALL_CHUNK_SIZE)-1; + if(containerIdx > 3) + containerIdx = 3; + insertChunk(chunkToFree, &freeLists->smallChunks[containerIdx]); + } else - freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64); + { + containerIdx = getContainer(getChunkSize(chunkToFree)) - 1; + insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]); + if(containerIdx < 64) + freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx; + else + freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64); + } //============================= MEASUREMENT STUFF ======================== #ifdef MEAS__TIME_MALLOC @@ -355,71 +316,8 @@ } - -/*Allocates memory from the external system -- higher overhead - * - *Because of Linux's malloc throwing bizarre random faults when malloc is - * used inside a VMS virtual processor, have to pass this as a request and - * have the core loop do it when it gets around to it -- will look for these - * chores leftover from the previous animation of masterVP the next time it - * goes to animate the masterVP -- so it takes two separate masterVP - * animations, separated by work, to complete an external malloc or - * external free request. - * - *Thinking core loop accepts signals -- just looks if signal-location is - * empty or not -- - */ -void * -VMS__malloc_in_ext( size_t sizeRequested ) - { - /* - //This is running in the master, so no chance for multiple cores to be - // competing for the core's flag. - if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) - { //something has already signalled to core loop, so save the signal - // and look, next time master animated, to see if can send it. - //Note, the addr to put a signal is in the coreloop's frame, so just - // checks it each time through -- make it volatile to avoid GCC - // optimizations -- it's a coreloop local var that only changes - // after jumping away. The signal includes the addr to send the - //return to -- even if just empty return completion-signal - // - //save the signal in some queue that the master looks at each time - // it starts up -- one loc says if empty for fast common case -- - //something like that -- want to hide this inside this call -- but - // think this has to come as a request -- req handler gives procr - // back to master loop, which gives it back to req handler at point - // it sees that core loop has sent return signal. Something like - // that. - saveTheSignal - - } - coreSigData->type = malloc; - coreSigData->sizeToMalloc = sizeRequested; - coreSigData->locToSignalCompletion = &figureOut; - _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; - */ - //just risk system-stack faults until get this figured out - return malloc( sizeRequested ); - } - - -/*Frees memory that was allocated in the external system -- higher overhead - * - *As noted in external malloc comment, this is clunky 'cause the free has - * to be called in the core loop. - */ -void -VMS__free_in_ext( void *ptrToFree ) - { - //just risk system-stack faults until get this figured out - free( ptrToFree ); - - //TODO: fix this -- so - } - - -/*Designed to be called from the main thread outside of VMS, during init +/* + * Designed to be called from the main thread outside of VMS, during init */ MallocArrays * VMS_ext__create_free_list() @@ -432,8 +330,6 @@ (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*)); memset((void*)freeLists->smallChunks, 0,SMALL_CHUNK_COUNT*sizeof(MallocProlog*)); - memset((void*)freeLists->smallChunkCount, - 0,SMALL_CHUNK_COUNT*sizeof(uint32)); //Calculate number of containers for big chunks uint32 container = getContainer(MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE)+1; @@ -443,7 +339,7 @@ //Create first element in lastContainer MallocProlog *firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); - if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} + if( firstChunk == NULL ) {printf("Can't allocate initial memory\n"); exit(1);} freeLists->memSpace = firstChunk; firstChunk->nextLowerInMem = NULL; @@ -451,14 +347,14 @@ MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog*)); firstChunk->nextChunkInFreeList = NULL; //previous element in the queue is the container - firstChunk->prevChunkInFreeList = freeLists->bigChunks[container-1]; + firstChunk->prevChunkInFreeList = &freeLists->bigChunks[container-2]; - freeLists->bigChunks[container-1] = firstChunk; + freeLists->bigChunks[container-2] = firstChunk; //Insert into bit search list - if(container <= 64) - freeLists->bigChunksSearchVector[0] |= ((uint64)1 << (container-1)); + if(container <= 65) + freeLists->bigChunksSearchVector[0] |= ((uint64)1 << (container-2)); else - freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (container-65)); + freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (container-66)); //Create dummy chunk to mark the top of stack this is of course //never freed diff -r 35547e66b971 -r 07e679ee2095 vmalloc.h --- a/vmalloc.h Mon Aug 29 17:09:00 2011 +0200 +++ b/vmalloc.h Tue Aug 30 17:11:13 2011 +0200 @@ -21,8 +21,8 @@ #define MAX_SMALL_CHUNKS 20 #define SMALL_CHUNKS_ALLOCATION 5 -#define LOG54 0.096910013 -#define LOG128 2.10720997 +#define LOG54 0.0969100130080564 +#define LOG128 2.1072099696478684 typedef struct _MallocProlog MallocProlog; @@ -40,7 +40,6 @@ struct MallocArrays { MallocProlog **smallChunks; - uint32 smallChunkCount[SMALL_CHUNK_COUNT]; MallocProlog **bigChunks; uint64 bigChunksSearchVector[2]; void *memSpace;