Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
diff vmalloc.c @ 117:07e679ee2095
Bit index search with simple chop off
| author | Merten Sach <msach@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 30 Aug 2011 17:11:13 +0200 |
| parents | 35547e66b971 |
| children | e29bb31df078 |
line diff
1.1 --- a/vmalloc.c Mon Aug 29 17:09:00 2011 +0200 1.2 +++ b/vmalloc.c Tue Aug 30 17:11:13 2011 +0200 1.3 @@ -32,15 +32,6 @@ 1.4 } 1.5 1.6 /* 1.7 - * This calculates the size of a given container 1.8 - */ 1.9 -inline 1.10 -size_t getContainerSize(uint32 idx) 1.11 -{ 1.12 - return pow(CHUNK_INCREASE_RATE,idx)*LOWER_BOUND; 1.13 -} 1.14 - 1.15 -/* 1.16 * Removes the first chunk of a freeList 1.17 * The chunk is removed but not set as free. There is no check if 1.18 * the free list is empty, so make sure this is not the case. 1.19 @@ -87,9 +78,10 @@ 1.20 } 1.21 1.22 inline 1.23 -void recycleSmallChunks(MallocProlog** container) 1.24 +size_t getChunkSize(MallocProlog* chunk) 1.25 { 1.26 - //TODO recycle small chunks 1.27 + return (uintptr_t)chunk->nextHigherInMem - 1.28 + (uintptr_t)chunk - sizeof(MallocProlog); 1.29 } 1.30 1.31 /* 1.32 @@ -104,13 +96,15 @@ 1.33 1.34 //The last element in the list points to the container. If the container points 1.35 //to NULL the container is empty 1.36 - if(*((void**)(chunk->prevChunkInFreeList)) == NULL) 1.37 + if(*((void**)(chunk->prevChunkInFreeList)) == NULL && getChunkSize(chunk) >= 160); 1.38 { 1.39 - uint32 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 6; 1.40 + //Find the approppiate container because we do not know it 1.41 + uint64 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 3; 1.42 if(containerIdx < (uint32)64) 1.43 freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); 1.44 if(containerIdx < 128 && containerIdx >=64) 1.45 freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64)); 1.46 + 1.47 } 1.48 } 1.49 1.50 @@ -123,6 +117,7 @@ 1.51 MallocProlog *mergeChunks(MallocProlog* chunkA, MallocProlog* chunkB) 1.52 { 1.53 chunkA->nextHigherInMem = chunkB->nextHigherInMem; 1.54 + chunkB->nextHigherInMem->nextLowerInMem = chunkA; 1.55 return chunkA; 1.56 } 1.57 /* 1.58 @@ -151,49 +146,60 @@ 1.59 newChunk->nextLowerInMem = chunk; 1.60 newChunk->nextHigherInMem = chunk->nextHigherInMem; 1.61 1.62 + chunk->nextHigherInMem->nextLowerInMem = newChunk; 1.63 chunk->nextHigherInMem = newChunk; 1.64 - chunk->nextHigherInMem->nextLowerInMem = newChunk; 1.65 1.66 return newChunk; 1.67 } 1.68 1.69 +/* 1.70 + * Search for chunk in the list of big chunks. Split the block if it's too big 1.71 + */ 1.72 inline 1.73 -size_t getChunkSize(MallocProlog* chunk) 1.74 +MallocProlog *searchChunk(MallocArrays *freeLists, size_t sizeRequested, uint32 containerIdx) 1.75 { 1.76 - return (uintptr_t)chunk->nextHigherInMem - 1.77 - (uintptr_t)chunk - sizeof(MallocProlog); 1.78 + MallocProlog* foundChunk; 1.79 + 1.80 + uint64 searchVector = freeLists->bigChunksSearchVector[0]; 1.81 + //set small chunk bits to zero 1.82 + searchVector &= MAX_UINT64 << containerIdx; 1.83 + containerIdx = __builtin_ffsl(searchVector); 1.84 + 1.85 + if(containerIdx == 0) 1.86 + { 1.87 + searchVector = freeLists->bigChunksSearchVector[1]; 1.88 + containerIdx = __builtin_ffsl(searchVector); 1.89 + if(containerIdx == 0) 1.90 + { 1.91 + printf("VMS malloc failed: low memory"); 1.92 + exit(1); 1.93 + } 1.94 + containerIdx += 64; 1.95 + } 1.96 + containerIdx--; 1.97 + 1.98 + 1.99 + foundChunk = removeChunk(freeLists, containerIdx); 1.100 + size_t chunkSize = getChunkSize(foundChunk); 1.101 + 1.102 + //If the new chunk is larger than the requested size: split 1.103 + if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + 160) 1.104 + { 1.105 + MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested); 1.106 + containerIdx = getContainer(getChunkSize(foundChunk)) - 1; 1.107 + insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]); 1.108 + if(containerIdx < 64) 1.109 + freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx); 1.110 + else 1.111 + freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64)); 1.112 + foundChunk = newChunk; 1.113 + } 1.114 + 1.115 + return foundChunk; 1.116 } 1.117 1.118 1.119 /* 1.120 - * This makes 5 Chunks of the requested size. 1.121 - */ 1.122 -inline 1.123 -void makeChunks(size_t size, MallocProlog **container) 1.124 -{ 1.125 - MallocArrays *freeLists = _VMSMasterEnv->freeLists; 1.126 - size_t addedSize = 5*(size + sizeof(MallocProlog)); 1.127 - 1.128 - uint32 containerIdx = getContainer(addedSize)+4; 1.129 - while(freeLists->bigChunks[containerIdx] == NULL ) 1.130 - containerIdx++; 1.131 - 1.132 - MallocProlog *foundChunk = removeChunk(freeLists, containerIdx); 1.133 - 1.134 - int i; 1.135 - for(i=0; i<5; i++) 1.136 - { 1.137 - insertChunk(divideChunk(foundChunk,size), container); 1.138 - } 1.139 - containerIdx = getContainer(getChunkSize(foundChunk)); 1.140 - insertChunk(foundChunk, &freeLists->bigChunks[containerIdx]); 1.141 - if(containerIdx < 64) 1.142 - freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx); 1.143 - else 1.144 - freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64)); 1.145 -} 1.146 - 1.147 -/* 1.148 * This is sequential code, meant to only be called from the Master, not from 1.149 * any slave VPs. 1.150 */ 1.151 @@ -207,67 +213,30 @@ 1.152 //======================================================================== 1.153 1.154 MallocArrays* freeLists = _VMSMasterEnv->freeLists; 1.155 + MallocProlog* foundChunk; 1.156 1.157 //Return a small chunk if the requested size is smaller than 128B 1.158 if(sizeRequested <= LOWER_BOUND) 1.159 { 1.160 - uint32 freeListIdx = (sizeRequested-1)/32; 1.161 - if(freeLists->smallChunkCount[freeListIdx] == 0) 1.162 - { 1.163 - makeChunks((freeListIdx+1)*32, &freeLists->smallChunks[freeListIdx]); 1.164 - freeLists->smallChunkCount[freeListIdx] += 5; 1.165 - } 1.166 + uint32 freeListIdx = (sizeRequested-1)/SMALL_CHUNK_SIZE; 1.167 + if(freeLists->smallChunks[freeListIdx] == NULL) 1.168 + foundChunk = searchChunk(freeLists, SMALL_CHUNK_SIZE*(freeListIdx+1), 0); 1.169 + else 1.170 + foundChunk = removeSmallChunk(freeLists, freeListIdx); 1.171 1.172 - freeLists->smallChunkCount[freeListIdx]--; 1.173 - return removeSmallChunk(freeLists, freeListIdx) + 1; 1.174 + //Mark as allocated 1.175 + foundChunk->prevChunkInFreeList = NULL; 1.176 + return foundChunk + 1; 1.177 } 1.178 1.179 //Calculate the expected container. Start one higher to have a Chunk that's 1.180 //always big enough. 1.181 - uint32 containerIdx = getContainer(sizeRequested) + 1; 1.182 + uint32 containerIdx = getContainer(sizeRequested); 1.183 1.184 - MallocProlog* foundChunk; 1.185 if(freeLists->bigChunks[containerIdx] == NULL) 1.186 - { 1.187 - uint64 searchVector = freeLists->bigChunksSearchVector[0]; 1.188 - //set small chunk bits to zero 1.189 - searchVector &= MAX_UINT64 << containerIdx; 1.190 - containerIdx = __builtin_ffsl(searchVector); 1.191 - 1.192 - if(containerIdx == 0) 1.193 - { 1.194 - searchVector = freeLists->bigChunksSearchVector[1]; 1.195 - containerIdx = __builtin_ffsl(searchVector); 1.196 - if(containerIdx == 0) 1.197 - { 1.198 - printf("VMS malloc failed: low memory"); 1.199 - exit(1); 1.200 - } 1.201 - containerIdx += 64; 1.202 - } 1.203 - containerIdx--; 1.204 - 1.205 - 1.206 - foundChunk = removeChunk(freeLists, containerIdx); 1.207 - size_t chunkSize = getChunkSize(foundChunk); 1.208 - 1.209 - //If the new chunk is larger than the requested size: split 1.210 - if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + LOWER_BOUND) 1.211 - { 1.212 - MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested); 1.213 - containerIdx = getContainer(getChunkSize(foundChunk)); 1.214 - insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]); 1.215 - if(containerIdx < 64) 1.216 - freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx); 1.217 - else 1.218 - freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64)); 1.219 - foundChunk = newChunk; 1.220 - } 1.221 - } 1.222 + foundChunk = searchChunk(freeLists, sizeRequested, containerIdx); 1.223 else 1.224 - { 1.225 - foundChunk = removeChunk(freeLists, containerIdx); 1.226 - } 1.227 + foundChunk = removeChunk(freeLists, containerIdx); 1.228 1.229 //Mark as allocated 1.230 foundChunk->prevChunkInFreeList = NULL; 1.231 @@ -290,7 +259,7 @@ 1.232 void 1.233 VMS__free( void *ptrToFree ) 1.234 { 1.235 - 1.236 + 1.237 //============================= MEASUREMENT STUFF ======================== 1.238 #ifdef MEAS__TIME_MALLOC 1.239 int32 startStamp, endStamp; 1.240 @@ -301,50 +270,42 @@ 1.241 MallocArrays* freeLists = _VMSMasterEnv->freeLists; 1.242 MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1; 1.243 uint32 containerIdx; 1.244 - 1.245 - size_t chunkSize = getChunkSize(chunkToFree); 1.246 - if(chunkSize <= LOWER_BOUND) 1.247 - { 1.248 - containerIdx = (chunkSize-1)/32; 1.249 - insertChunk(chunkToFree,&freeLists->smallChunks[containerIdx]); 1.250 - freeLists->smallChunkCount[containerIdx]++; 1.251 - 1.252 - if(freeLists->smallChunkCount[containerIdx] > 20) 1.253 - recycleSmallChunks(&freeLists->smallChunks[containerIdx]); 1.254 - 1.255 - return; 1.256 - } 1.257 1.258 //Check for free neighbors 1.259 if(chunkToFree->nextLowerInMem) 1.260 { 1.261 if(chunkToFree->nextLowerInMem->prevChunkInFreeList != NULL) 1.262 {//Chunk is not allocated 1.263 - if(getChunkSize(chunkToFree->nextLowerInMem) > LOWER_BOUND) 1.264 - { 1.265 - extractChunk(chunkToFree->nextLowerInMem, freeLists); 1.266 - chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree); 1.267 - } 1.268 + extractChunk(chunkToFree->nextLowerInMem, freeLists); 1.269 + chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree); 1.270 } 1.271 } 1.272 if(chunkToFree->nextHigherInMem) 1.273 { 1.274 if(chunkToFree->nextHigherInMem->prevChunkInFreeList != NULL) 1.275 {//Chunk is not allocated 1.276 - if(getChunkSize(chunkToFree->nextHigherInMem) > LOWER_BOUND) 1.277 - { 1.278 - extractChunk(chunkToFree->nextHigherInMem, freeLists); 1.279 - chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem); 1.280 - } 1.281 + extractChunk(chunkToFree->nextHigherInMem, freeLists); 1.282 + chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem); 1.283 } 1.284 } 1.285 - 1.286 - containerIdx = getContainer(getChunkSize(chunkToFree)); 1.287 - insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]); 1.288 - if(containerIdx < 64) 1.289 - freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx; 1.290 + 1.291 + size_t chunkSize = getChunkSize(chunkToFree); 1.292 + if(chunkSize < 160) 1.293 + { 1.294 + containerIdx = (chunkSize/SMALL_CHUNK_SIZE)-1; 1.295 + if(containerIdx > 3) 1.296 + containerIdx = 3; 1.297 + insertChunk(chunkToFree, &freeLists->smallChunks[containerIdx]); 1.298 + } 1.299 else 1.300 - freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64); 1.301 + { 1.302 + containerIdx = getContainer(getChunkSize(chunkToFree)) - 1; 1.303 + insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]); 1.304 + if(containerIdx < 64) 1.305 + freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx; 1.306 + else 1.307 + freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64); 1.308 + } 1.309 1.310 //============================= MEASUREMENT STUFF ======================== 1.311 #ifdef MEAS__TIME_MALLOC 1.312 @@ -355,71 +316,8 @@ 1.313 1.314 } 1.315 1.316 - 1.317 -/*Allocates memory from the external system -- higher overhead 1.318 - * 1.319 - *Because of Linux's malloc throwing bizarre random faults when malloc is 1.320 - * used inside a VMS virtual processor, have to pass this as a request and 1.321 - * have the core loop do it when it gets around to it -- will look for these 1.322 - * chores leftover from the previous animation of masterVP the next time it 1.323 - * goes to animate the masterVP -- so it takes two separate masterVP 1.324 - * animations, separated by work, to complete an external malloc or 1.325 - * external free request. 1.326 - * 1.327 - *Thinking core loop accepts signals -- just looks if signal-location is 1.328 - * empty or not -- 1.329 - */ 1.330 -void * 1.331 -VMS__malloc_in_ext( size_t sizeRequested ) 1.332 - { 1.333 - /* 1.334 - //This is running in the master, so no chance for multiple cores to be 1.335 - // competing for the core's flag. 1.336 - if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) 1.337 - { //something has already signalled to core loop, so save the signal 1.338 - // and look, next time master animated, to see if can send it. 1.339 - //Note, the addr to put a signal is in the coreloop's frame, so just 1.340 - // checks it each time through -- make it volatile to avoid GCC 1.341 - // optimizations -- it's a coreloop local var that only changes 1.342 - // after jumping away. The signal includes the addr to send the 1.343 - //return to -- even if just empty return completion-signal 1.344 - // 1.345 - //save the signal in some queue that the master looks at each time 1.346 - // it starts up -- one loc says if empty for fast common case -- 1.347 - //something like that -- want to hide this inside this call -- but 1.348 - // think this has to come as a request -- req handler gives procr 1.349 - // back to master loop, which gives it back to req handler at point 1.350 - // it sees that core loop has sent return signal. Something like 1.351 - // that. 1.352 - saveTheSignal 1.353 - 1.354 - } 1.355 - coreSigData->type = malloc; 1.356 - coreSigData->sizeToMalloc = sizeRequested; 1.357 - coreSigData->locToSignalCompletion = &figureOut; 1.358 - _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; 1.359 - */ 1.360 - //just risk system-stack faults until get this figured out 1.361 - return malloc( sizeRequested ); 1.362 - } 1.363 - 1.364 - 1.365 -/*Frees memory that was allocated in the external system -- higher overhead 1.366 - * 1.367 - *As noted in external malloc comment, this is clunky 'cause the free has 1.368 - * to be called in the core loop. 1.369 - */ 1.370 -void 1.371 -VMS__free_in_ext( void *ptrToFree ) 1.372 - { 1.373 - //just risk system-stack faults until get this figured out 1.374 - free( ptrToFree ); 1.375 - 1.376 - //TODO: fix this -- so 1.377 - } 1.378 - 1.379 - 1.380 -/*Designed to be called from the main thread outside of VMS, during init 1.381 +/* 1.382 + * Designed to be called from the main thread outside of VMS, during init 1.383 */ 1.384 MallocArrays * 1.385 VMS_ext__create_free_list() 1.386 @@ -432,8 +330,6 @@ 1.387 (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*)); 1.388 memset((void*)freeLists->smallChunks, 1.389 0,SMALL_CHUNK_COUNT*sizeof(MallocProlog*)); 1.390 - memset((void*)freeLists->smallChunkCount, 1.391 - 0,SMALL_CHUNK_COUNT*sizeof(uint32)); 1.392 1.393 //Calculate number of containers for big chunks 1.394 uint32 container = getContainer(MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE)+1; 1.395 @@ -443,7 +339,7 @@ 1.396 1.397 //Create first element in lastContainer 1.398 MallocProlog *firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); 1.399 - if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} 1.400 + if( firstChunk == NULL ) {printf("Can't allocate initial memory\n"); exit(1);} 1.401 freeLists->memSpace = firstChunk; 1.402 1.403 firstChunk->nextLowerInMem = NULL; 1.404 @@ -451,14 +347,14 @@ 1.405 MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog*)); 1.406 firstChunk->nextChunkInFreeList = NULL; 1.407 //previous element in the queue is the container 1.408 - firstChunk->prevChunkInFreeList = freeLists->bigChunks[container-1]; 1.409 + firstChunk->prevChunkInFreeList = &freeLists->bigChunks[container-2]; 1.410 1.411 - freeLists->bigChunks[container-1] = firstChunk; 1.412 + freeLists->bigChunks[container-2] = firstChunk; 1.413 //Insert into bit search list 1.414 - if(container <= 64) 1.415 - freeLists->bigChunksSearchVector[0] |= ((uint64)1 << (container-1)); 1.416 + if(container <= 65) 1.417 + freeLists->bigChunksSearchVector[0] |= ((uint64)1 << (container-2)); 1.418 else 1.419 - freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (container-65)); 1.420 + freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (container-66)); 1.421 1.422 //Create dummy chunk to mark the top of stack this is of course 1.423 //never freed
