annotate vmalloc.c @ 175:de5e7c522f1f

aligned memory allocation
author Merten Sach <msach@mailbox.tu-berlin.de>
date Tue, 20 Dec 2011 19:17:11 +0100
parents c3f458403cd6
children 0320b49ca013
rev   line source
Me@50 1 /*
Me@50 2 * Copyright 2009 OpenSourceCodeStewardshipFoundation.org
Me@50 3 * Licensed under GNU General Public License version 2
Me@50 4 *
Me@50 5 * Author: seanhalle@yahoo.com
Me@50 6 *
Me@50 7 * Created on November 14, 2009, 9:07 PM
Me@50 8 */
Me@50 9
Me@50 10 #include <malloc.h>
msach@76 11 #include <inttypes.h>
Me@53 12 #include <stdlib.h>
msach@76 13 #include <stdio.h>
msach@101 14 #include <string.h>
msach@101 15 #include <math.h>
Me@50 16
Me@50 17 #include "VMS.h"
Me@68 18 #include "Histogram/Histogram.h"
Me@50 19
msach@115 20 #define MAX_UINT64 0xFFFFFFFFFFFFFFFF
msach@115 21
msach@102 22 //A MallocProlog is a head element if the HigherInMem variable is NULL
msach@102 23 //A Chunk is free if the prevChunkInFreeList variable is NULL
msach@102 24
msach@101 25 /*
msach@101 26 * This calculates the container which fits the given size.
Me@50 27 */
msach@101 28 inline
msach@101 29 uint32 getContainer(size_t size)
msach@101 30 {
msach@118 31 return (log2(size)-LOG128)/LOG54;
msach@101 32 }
msach@101 33
msach@101 34 /*
msach@101 35 * Removes the first chunk of a freeList
msach@101 36 * The chunk is removed but not set as free. There is no check if
msach@101 37 * the free list is empty, so make sure this is not the case.
msach@101 38 */
msach@101 39 inline
msach@116 40 MallocProlog *removeChunk(MallocArrays* freeLists, uint32 containerIdx)
msach@101 41 {
msach@116 42 MallocProlog** container = &freeLists->bigChunks[containerIdx];
msach@116 43 MallocProlog* removedChunk = *container;
msach@116 44 *container = removedChunk->nextChunkInFreeList;
msach@116 45
msach@116 46 if(removedChunk->nextChunkInFreeList)
msach@116 47 removedChunk->nextChunkInFreeList->prevChunkInFreeList =
msach@116 48 (MallocProlog*)container;
msach@116 49
msach@116 50 if(*container == NULL)
msach@116 51 {
msach@116 52 if(containerIdx < 64)
msach@116 53 freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx);
msach@116 54 else
msach@116 55 freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64));
msach@116 56 }
msach@116 57
msach@116 58 return removedChunk;
msach@116 59 }
msach@116 60
msach@116 61 /*
msach@116 62 * Removes the first chunk of a freeList
msach@116 63 * The chunk is removed but not set as free. There is no check if
msach@116 64 * the free list is empty, so make sure this is not the case.
msach@116 65 */
msach@116 66 inline
msach@116 67 MallocProlog *removeSmallChunk(MallocArrays* freeLists, uint32 containerIdx)
msach@116 68 {
msach@116 69 MallocProlog** container = &freeLists->smallChunks[containerIdx];
msach@116 70 MallocProlog* removedChunk = *container;
msach@101 71 *container = removedChunk->nextChunkInFreeList;
msach@101 72
msach@101 73 if(removedChunk->nextChunkInFreeList)
msach@101 74 removedChunk->nextChunkInFreeList->prevChunkInFreeList =
msach@101 75 (MallocProlog*)container;
msach@101 76
msach@101 77 return removedChunk;
msach@101 78 }
msach@101 79
msach@102 80 inline
msach@117 81 size_t getChunkSize(MallocProlog* chunk)
msach@102 82 {
msach@117 83 return (uintptr_t)chunk->nextHigherInMem -
msach@117 84 (uintptr_t)chunk - sizeof(MallocProlog);
msach@102 85 }
msach@102 86
msach@101 87 /*
msach@101 88 * Removes a chunk from a free list.
msach@101 89 */
msach@101 90 inline
msach@115 91 void extractChunk(MallocProlog* chunk, MallocArrays *freeLists)
msach@101 92 {
msach@102 93 chunk->prevChunkInFreeList->nextChunkInFreeList = chunk->nextChunkInFreeList;
msach@102 94 if(chunk->nextChunkInFreeList)
msach@102 95 chunk->nextChunkInFreeList->prevChunkInFreeList = chunk->prevChunkInFreeList;
msach@116 96
msach@116 97 //The last element in the list points to the container. If the container points
msach@116 98 //to NULL the container is empty
msach@142 99 if(*((void**)(chunk->prevChunkInFreeList)) == NULL && getChunkSize(chunk) >= BIG_LOWER_BOUND)
msach@115 100 {
msach@117 101 //Find the approppiate container because we do not know it
msach@117 102 uint64 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 3;
msach@116 103 if(containerIdx < (uint32)64)
msach@116 104 freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx);
msach@116 105 if(containerIdx < 128 && containerIdx >=64)
msach@116 106 freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64));
msach@117 107
msach@115 108 }
msach@101 109 }
msach@101 110
msach@101 111 /*
msach@101 112 * Merges two chunks.
msach@102 113 * Chunk A has to be before chunk B in memory. Both have to be removed from
msach@102 114 * a free list
msach@101 115 */
msach@101 116 inline
msach@101 117 MallocProlog *mergeChunks(MallocProlog* chunkA, MallocProlog* chunkB)
msach@101 118 {
msach@102 119 chunkA->nextHigherInMem = chunkB->nextHigherInMem;
msach@117 120 chunkB->nextHigherInMem->nextLowerInMem = chunkA;
msach@102 121 return chunkA;
msach@101 122 }
msach@101 123 /*
msach@101 124 * Inserts a chunk into a free list.
msach@101 125 */
msach@101 126 inline
msach@101 127 void insertChunk(MallocProlog* chunk, MallocProlog** container)
msach@101 128 {
msach@101 129 chunk->nextChunkInFreeList = *container;
msach@101 130 chunk->prevChunkInFreeList = (MallocProlog*)container;
msach@101 131 if(*container)
msach@101 132 (*container)->prevChunkInFreeList = chunk;
msach@101 133 *container = chunk;
msach@101 134 }
msach@101 135
msach@101 136 /*
msach@101 137 * Divides the chunk that a new chunk of newSize is created.
msach@101 138 * There is no size check, so make sure the size value is valid.
msach@101 139 */
msach@101 140 inline
msach@101 141 MallocProlog *divideChunk(MallocProlog* chunk, size_t newSize)
msach@101 142 {
msach@101 143 MallocProlog* newChunk = (MallocProlog*)((uintptr_t)chunk->nextHigherInMem -
msach@175 144 newSize);
msach@175 145
msach@175 146 newChunk = __align_adress((uintptr_t)newChunk+CACHELINE_SIZE-1)-sizeof(MallocProlog);
msach@101 147
msach@101 148 newChunk->nextLowerInMem = chunk;
msach@101 149 newChunk->nextHigherInMem = chunk->nextHigherInMem;
msach@101 150
msach@117 151 chunk->nextHigherInMem->nextLowerInMem = newChunk;
msach@101 152 chunk->nextHigherInMem = newChunk;
msach@101 153
msach@101 154 return newChunk;
msach@101 155 }
msach@101 156
msach@117 157 /*
msach@117 158 * Search for chunk in the list of big chunks. Split the block if it's too big
msach@117 159 */
msach@101 160 inline
msach@117 161 MallocProlog *searchChunk(MallocArrays *freeLists, size_t sizeRequested, uint32 containerIdx)
msach@101 162 {
msach@117 163 MallocProlog* foundChunk;
msach@117 164
msach@117 165 uint64 searchVector = freeLists->bigChunksSearchVector[0];
msach@117 166 //set small chunk bits to zero
msach@117 167 searchVector &= MAX_UINT64 << containerIdx;
msach@117 168 containerIdx = __builtin_ffsl(searchVector);
msach@117 169
msach@117 170 if(containerIdx == 0)
msach@117 171 {
msach@117 172 searchVector = freeLists->bigChunksSearchVector[1];
msach@117 173 containerIdx = __builtin_ffsl(searchVector);
msach@117 174 if(containerIdx == 0)
msach@117 175 {
msach@117 176 printf("VMS malloc failed: low memory");
msach@117 177 exit(1);
msach@117 178 }
msach@117 179 containerIdx += 64;
msach@117 180 }
msach@117 181 containerIdx--;
msach@117 182
msach@117 183
msach@117 184 foundChunk = removeChunk(freeLists, containerIdx);
msach@117 185 size_t chunkSize = getChunkSize(foundChunk);
msach@117 186
msach@117 187 //If the new chunk is larger than the requested size: split
msach@125 188 if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + BIG_LOWER_BOUND)
msach@117 189 {
msach@117 190 MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested);
msach@117 191 containerIdx = getContainer(getChunkSize(foundChunk)) - 1;
msach@117 192 insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]);
msach@117 193 if(containerIdx < 64)
msach@117 194 freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx);
msach@117 195 else
msach@117 196 freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64));
msach@117 197 foundChunk = newChunk;
msach@117 198 }
msach@117 199
msach@117 200 return foundChunk;
msach@101 201 }
msach@101 202
msach@102 203
msach@102 204 /*
msach@101 205 * This is sequential code, meant to only be called from the Master, not from
msach@101 206 * any slave VPs.
msach@101 207 */
msach@101 208 void *VMS__malloc( size_t sizeRequested )
msach@101 209 {
msach@101 210 //============================= MEASUREMENT STUFF ========================
msach@101 211 #ifdef MEAS__TIME_MALLOC
msach@101 212 int32 startStamp, endStamp;
msach@101 213 saveLowTimeStampCountInto( startStamp );
msach@101 214 #endif
msach@101 215 //========================================================================
msach@101 216
msach@175 217 sizeRequested += (2*CACHELINE_SIZE); //Allocate more than cacheline_size to avoid false sharing
msach@174 218
msach@115 219 MallocArrays* freeLists = _VMSMasterEnv->freeLists;
msach@117 220 MallocProlog* foundChunk;
msach@101 221
msach@101 222 //Return a small chunk if the requested size is smaller than 128B
msach@102 223 if(sizeRequested <= LOWER_BOUND)
msach@102 224 {
msach@117 225 uint32 freeListIdx = (sizeRequested-1)/SMALL_CHUNK_SIZE;
msach@117 226 if(freeLists->smallChunks[freeListIdx] == NULL)
msach@117 227 foundChunk = searchChunk(freeLists, SMALL_CHUNK_SIZE*(freeListIdx+1), 0);
msach@117 228 else
msach@117 229 foundChunk = removeSmallChunk(freeLists, freeListIdx);
msach@102 230
msach@117 231 //Mark as allocated
msach@117 232 foundChunk->prevChunkInFreeList = NULL;
msach@117 233 return foundChunk + 1;
msach@102 234 }
msach@102 235
msach@101 236 //Calculate the expected container. Start one higher to have a Chunk that's
msach@101 237 //always big enough.
msach@117 238 uint32 containerIdx = getContainer(sizeRequested);
msach@101 239
msach@115 240 if(freeLists->bigChunks[containerIdx] == NULL)
msach@117 241 foundChunk = searchChunk(freeLists, sizeRequested, containerIdx);
msach@101 242 else
msach@117 243 foundChunk = removeChunk(freeLists, containerIdx);
msach@101 244
msach@101 245 //Mark as allocated
msach@101 246 foundChunk->prevChunkInFreeList = NULL;
msach@101 247
msach@101 248 //============================= MEASUREMENT STUFF ========================
msach@101 249 #ifdef MEAS__TIME_MALLOC
msach@101 250 saveLowTimeStampCountInto( endStamp );
msach@101 251 addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
msach@101 252 #endif
msach@101 253 //========================================================================
msach@102 254
msach@101 255 //skip over the prolog by adding its size to the pointer return
msach@102 256 return foundChunk + 1;
Me@50 257 }
Me@50 258
msach@101 259 /*
msach@101 260 * This is sequential code, meant to only be called from the Master, not from
Me@50 261 * any slave VPs.
Me@50 262 */
msach@101 263 void
msach@101 264 VMS__free( void *ptrToFree )
msach@101 265 {
msach@117 266
Me@65 267 //============================= MEASUREMENT STUFF ========================
Me@65 268 #ifdef MEAS__TIME_MALLOC
Me@65 269 int32 startStamp, endStamp;
Me@65 270 saveLowTimeStampCountInto( startStamp );
Me@65 271 #endif
Me@65 272 //========================================================================
Me@65 273
msach@102 274 MallocArrays* freeLists = _VMSMasterEnv->freeLists;
msach@101 275 MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1;
msach@102 276 uint32 containerIdx;
msach@101 277
msach@101 278 //Check for free neighbors
msach@101 279 if(chunkToFree->nextLowerInMem)
msach@101 280 {
msach@101 281 if(chunkToFree->nextLowerInMem->prevChunkInFreeList != NULL)
msach@101 282 {//Chunk is not allocated
msach@117 283 extractChunk(chunkToFree->nextLowerInMem, freeLists);
msach@117 284 chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree);
Me@50 285 }
msach@101 286 }
msach@102 287 if(chunkToFree->nextHigherInMem)
msach@102 288 {
msach@102 289 if(chunkToFree->nextHigherInMem->prevChunkInFreeList != NULL)
msach@102 290 {//Chunk is not allocated
msach@117 291 extractChunk(chunkToFree->nextHigherInMem, freeLists);
msach@117 292 chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem);
msach@102 293 }
msach@102 294 }
msach@117 295
msach@117 296 size_t chunkSize = getChunkSize(chunkToFree);
msach@125 297 if(chunkSize < BIG_LOWER_BOUND)
msach@117 298 {
msach@117 299 containerIdx = (chunkSize/SMALL_CHUNK_SIZE)-1;
msach@125 300 if(containerIdx > SMALL_CHUNK_COUNT-1)
msach@125 301 containerIdx = SMALL_CHUNK_COUNT-1;
msach@117 302 insertChunk(chunkToFree, &freeLists->smallChunks[containerIdx]);
msach@117 303 }
msach@116 304 else
msach@117 305 {
msach@117 306 containerIdx = getContainer(getChunkSize(chunkToFree)) - 1;
msach@117 307 insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]);
msach@117 308 if(containerIdx < 64)
msach@117 309 freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx;
msach@117 310 else
msach@117 311 freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64);
msach@117 312 }
msach@102 313
Me@65 314 //============================= MEASUREMENT STUFF ========================
Me@65 315 #ifdef MEAS__TIME_MALLOC
Me@65 316 saveLowTimeStampCountInto( endStamp );
Me@65 317 addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist );
Me@65 318 #endif
Me@65 319 //========================================================================
Me@50 320
Me@50 321 }
Me@50 322
msach@117 323 /*
msach@117 324 * Designed to be called from the main thread outside of VMS, during init
Me@50 325 */
msach@101 326 MallocArrays *
Me@53 327 VMS_ext__create_free_list()
msach@101 328 {
msach@101 329 //Initialize containers for small chunks and fill with zeros
msach@101 330 _VMSMasterEnv->freeLists = (MallocArrays*)malloc( sizeof(MallocArrays) );
msach@101 331 MallocArrays *freeLists = _VMSMasterEnv->freeLists;
msach@101 332
msach@101 333 freeLists->smallChunks =
msach@101 334 (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
msach@101 335 memset((void*)freeLists->smallChunks,
msach@101 336 0,SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
msach@101 337
msach@101 338 //Calculate number of containers for big chunks
msach@101 339 uint32 container = getContainer(MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE)+1;
msach@101 340 freeLists->bigChunks = (MallocProlog**)malloc(container*sizeof(MallocProlog*));
msach@101 341 memset((void*)freeLists->bigChunks,0,container*sizeof(MallocProlog*));
msach@101 342 freeLists->containerCount = container;
msach@101 343
msach@101 344 //Create first element in lastContainer
msach@101 345 MallocProlog *firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
msach@117 346 if( firstChunk == NULL ) {printf("Can't allocate initial memory\n"); exit(1);}
msach@102 347 freeLists->memSpace = firstChunk;
msach@79 348
msach@126 349 //Touch memory to avoid page faults
msach@126 350 void *ptr,*endPtr;
msach@126 351 endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE;
msach@126 352 for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE)
msach@126 353 {
msach@126 354 *(char*)ptr = 0;
msach@126 355 }
msach@126 356
msach@101 357 firstChunk->nextLowerInMem = NULL;
msach@101 358 firstChunk->nextHigherInMem = (MallocProlog*)((uintptr_t)firstChunk +
msach@142 359 MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog));
msach@101 360 firstChunk->nextChunkInFreeList = NULL;
msach@101 361 //previous element in the queue is the container
msach@117 362 firstChunk->prevChunkInFreeList = &freeLists->bigChunks[container-2];
Me@53 363
msach@117 364 freeLists->bigChunks[container-2] = firstChunk;
msach@115 365 //Insert into bit search list
msach@117 366 if(container <= 65)
msach@142 367 {
msach@142 368 freeLists->bigChunksSearchVector[0] = ((uint64)1 << (container-2));
msach@142 369 freeLists->bigChunksSearchVector[1] = 0;
msach@142 370 }
msach@116 371 else
msach@142 372 {
msach@142 373 freeLists->bigChunksSearchVector[0] = 0;
msach@142 374 freeLists->bigChunksSearchVector[1] = ((uint64)1 << (container-66));
msach@142 375 }
msach@101 376
msach@101 377 //Create dummy chunk to mark the top of stack this is of course
msach@101 378 //never freed
msach@101 379 MallocProlog *dummyChunk = firstChunk->nextHigherInMem;
msach@101 380 dummyChunk->nextHigherInMem = dummyChunk+1;
msach@101 381 dummyChunk->nextLowerInMem = NULL;
msach@101 382 dummyChunk->nextChunkInFreeList = NULL;
msach@101 383 dummyChunk->prevChunkInFreeList = NULL;
msach@101 384
msach@101 385 return freeLists;
Me@50 386 }
Me@50 387
Me@50 388
Me@50 389 /*Designed to be called from the main thread outside of VMS, during cleanup
Me@50 390 */
Me@50 391 void
msach@102 392 VMS_ext__free_free_list( MallocArrays *freeLists )
Me@50 393 {
msach@102 394 free(freeLists->memSpace);
msach@102 395 free(freeLists->bigChunks);
msach@102 396 free(freeLists->smallChunks);
msach@102 397
Me@50 398 }
Me@50 399