diff vmalloc.c @ 209:0c83ea8adefc

Close to compilable version of common_ancestor -- still includes HW dep stuff
author Some Random Person <seanhalle@yahoo.com>
date Sun, 04 Mar 2012 14:26:35 -0800
parents eaf7e4c58c9e
children a18539c0bc37
line diff
     1.1 --- a/vmalloc.c	Wed Feb 22 11:39:12 2012 -0800
     1.2 +++ b/vmalloc.c	Sun Mar 04 14:26:35 2012 -0800
     1.3 @@ -11,46 +11,200 @@
     1.4  #include <inttypes.h>
     1.5  #include <stdlib.h>
     1.6  #include <stdio.h>
     1.7 +#include <string.h>
     1.8 +#include <math.h>
     1.9  
    1.10  #include "VMS.h"
    1.11  #include "C_Libraries/Histogram/Histogram.h"
    1.12  
    1.13 -/*Helper function
    1.14 - *Insert a newly generated free chunk into the first spot on the free list.
    1.15 - * The chunk is cast as a MallocProlog, so the various pointers in it are
    1.16 - * accessed with C's help -- and the size of the prolog is easily added to
    1.17 - * the pointer when a chunk is returned to the app -- so C handles changes
    1.18 - * in pointer sizes among machines.
    1.19 - *
    1.20 - *The list head is a normal MallocProlog struct -- identified by its
    1.21 - * prevChunkInFreeList being NULL -- the only one.
    1.22 - *
    1.23 - *The end of the list is identified by next chunk being NULL, as usual.
    1.24 +#define MAX_UINT64 0xFFFFFFFFFFFFFFFF
    1.25 +
    1.26 +//A MallocProlog is a head element if the HigherInMem variable is NULL
    1.27 +//A Chunk is free if the prevChunkInFreeList variable is NULL
    1.28 +
    1.29 +/*
    1.30 + * This calculates the container which fits the given size.
    1.31   */
    1.32 -void inline
    1.33 -add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead )
    1.34 - { 
    1.35 -   chunk->nextChunkInFreeList     = listHead->nextChunkInFreeList;
    1.36 -   if( chunk->nextChunkInFreeList != NULL ) //if not last in free list
    1.37 -      chunk->nextChunkInFreeList->prevChunkInFreeList = chunk;
    1.38 -   chunk->prevChunkInFreeList     = listHead;
    1.39 -   listHead->nextChunkInFreeList  = chunk;
    1.40 - }
    1.41 +inline
    1.42 +uint32 getContainer(size_t size)
    1.43 +{
    1.44 +    return (log2(size)-LOG128)/LOG54;
    1.45 +}
    1.46  
    1.47 +/*
    1.48 + * Removes the first chunk of a freeList
    1.49 + * The chunk is removed but not set as free. There is no check if
    1.50 + * the free list is empty, so make sure this is not the case.
    1.51 + */
    1.52 +inline
    1.53 +MallocProlog *removeChunk(MallocArrays* freeLists, uint32 containerIdx)
    1.54 +{
    1.55 +    MallocProlog** container = &freeLists->bigChunks[containerIdx];
    1.56 +    MallocProlog*  removedChunk = *container;
    1.57 +    *container = removedChunk->nextChunkInFreeList;
    1.58 +    
    1.59 +    if(removedChunk->nextChunkInFreeList)
    1.60 +        removedChunk->nextChunkInFreeList->prevChunkInFreeList = 
    1.61 +                (MallocProlog*)container;
    1.62 +    
    1.63 +    if(*container == NULL)
    1.64 +    {
    1.65 +       if(containerIdx < 64)
    1.66 +           freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); 
    1.67 +       else
    1.68 +           freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64));
    1.69 +    }
    1.70 +    
    1.71 +    return removedChunk;
    1.72 +}
    1.73  
    1.74 -/*This is sequential code, meant to only be called from the Master, not from
    1.75 - * any slave VPs.
    1.76 - *Search down list, checking size by the nextHigherInMem pointer, to find
    1.77 - * first chunk bigger than size needed.
    1.78 - *Shave off the extra and make it into a new free-list element, hook it in
    1.79 - * then return the address of the found element plus size of prolog.
    1.80 - *
    1.81 +/*
    1.82 + * Removes the first chunk of a freeList
    1.83 + * The chunk is removed but not set as free. There is no check if
    1.84 + * the free list is empty, so make sure this is not the case.
    1.85 + */
    1.86 +inline
    1.87 +MallocProlog *removeSmallChunk(MallocArrays* freeLists, uint32 containerIdx)
    1.88 +{
    1.89 +    MallocProlog** container = &freeLists->smallChunks[containerIdx];
    1.90 +    MallocProlog*  removedChunk = *container;
    1.91 +    *container = removedChunk->nextChunkInFreeList;
    1.92 +    
    1.93 +    if(removedChunk->nextChunkInFreeList)
    1.94 +        removedChunk->nextChunkInFreeList->prevChunkInFreeList = 
    1.95 +                (MallocProlog*)container;
    1.96 +    
    1.97 +    return removedChunk;
    1.98 +}
    1.99 +
   1.100 +inline
   1.101 +size_t getChunkSize(MallocProlog* chunk)
   1.102 +{
   1.103 +    return (uintptr_t)chunk->nextHigherInMem -
   1.104 +            (uintptr_t)chunk - sizeof(MallocProlog);
   1.105 +}
   1.106 +
   1.107 +/*
   1.108 + * Removes a chunk from a free list.
   1.109 + */
   1.110 +inline
   1.111 +void extractChunk(MallocProlog* chunk, MallocArrays *freeLists)
   1.112 +{
   1.113 +   chunk->prevChunkInFreeList->nextChunkInFreeList = chunk->nextChunkInFreeList;
   1.114 +   if(chunk->nextChunkInFreeList)
   1.115 +       chunk->nextChunkInFreeList->prevChunkInFreeList = chunk->prevChunkInFreeList;
   1.116 +   
   1.117 +   //The last element in the list points to the container. If the container points
   1.118 +   //to NULL the container is empty
   1.119 +   if(*((void**)(chunk->prevChunkInFreeList)) == NULL && getChunkSize(chunk) >= BIG_LOWER_BOUND)
   1.120 +   {
   1.121 +       //Find the approppiate container because we do not know it
   1.122 +       uint64 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 3;
   1.123 +       if(containerIdx < (uint32)64)
   1.124 +           freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); 
   1.125 +       if(containerIdx < 128 && containerIdx >=64)
   1.126 +           freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64)); 
   1.127 +       
   1.128 +   }
   1.129 +}
   1.130 +
   1.131 +/*
   1.132 + * Merges two chunks.
   1.133 + * Chunk A has to be before chunk B in memory. Both have to be removed from
   1.134 + * a free list
   1.135 + */
   1.136 +inline
   1.137 +MallocProlog *mergeChunks(MallocProlog* chunkA, MallocProlog* chunkB)
   1.138 +{
   1.139 +    chunkA->nextHigherInMem = chunkB->nextHigherInMem;
   1.140 +    chunkB->nextHigherInMem->nextLowerInMem = chunkA;
   1.141 +    return chunkA;
   1.142 +}
   1.143 +/*
   1.144 + * Inserts a chunk into a free list.
   1.145 + */
   1.146 +inline
   1.147 +void insertChunk(MallocProlog* chunk, MallocProlog** container)
   1.148 +{
   1.149 +    chunk->nextChunkInFreeList = *container;
   1.150 +    chunk->prevChunkInFreeList = (MallocProlog*)container;
   1.151 +    if(*container)
   1.152 +        (*container)->prevChunkInFreeList = chunk;
   1.153 +    *container = chunk;
   1.154 +}
   1.155 +
   1.156 +/*
   1.157 + * Divides the chunk that a new chunk of newSize is created.
   1.158 + * There is no size check, so make sure the size value is valid.
   1.159 + */
   1.160 +inline
   1.161 +MallocProlog *divideChunk(MallocProlog* chunk, size_t newSize)
   1.162 +{
   1.163 +    MallocProlog* newChunk = (MallocProlog*)((uintptr_t)chunk->nextHigherInMem -
   1.164 +            newSize - sizeof(MallocProlog));
   1.165 +    
   1.166 +    newChunk->nextLowerInMem  = chunk;
   1.167 +    newChunk->nextHigherInMem = chunk->nextHigherInMem;
   1.168 +    
   1.169 +    chunk->nextHigherInMem->nextLowerInMem = newChunk;
   1.170 +    chunk->nextHigherInMem = newChunk;
   1.171 +    
   1.172 +    return newChunk;
   1.173 +}
   1.174 +
   1.175 +/* 
   1.176 + * Search for chunk in the list of big chunks. Split the block if it's too big
   1.177 + */
   1.178 +inline
   1.179 +MallocProlog *searchChunk(MallocArrays *freeLists, size_t sizeRequested, uint32 containerIdx)
   1.180 +{
   1.181 +    MallocProlog* foundChunk;
   1.182 +    
   1.183 +    uint64 searchVector = freeLists->bigChunksSearchVector[0];
   1.184 +    //set small chunk bits to zero
   1.185 +    searchVector &= MAX_UINT64 << containerIdx;
   1.186 +    containerIdx = __builtin_ffsl(searchVector);
   1.187 +
   1.188 +    if(containerIdx == 0)
   1.189 +    {
   1.190 +       searchVector = freeLists->bigChunksSearchVector[1];
   1.191 +       containerIdx = __builtin_ffsl(searchVector);
   1.192 +       if(containerIdx == 0)
   1.193 +       {
   1.194 +           printf("VMS malloc failed: low memory");
   1.195 +           exit(1);   
   1.196 +       }
   1.197 +       containerIdx += 64;
   1.198 +    }
   1.199 +    containerIdx--;
   1.200 +    
   1.201 +
   1.202 +    foundChunk = removeChunk(freeLists, containerIdx);
   1.203 +    size_t chunkSize     = getChunkSize(foundChunk);
   1.204 +
   1.205 +    //If the new chunk is larger than the requested size: split
   1.206 +    if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + BIG_LOWER_BOUND)
   1.207 +    {
   1.208 +       MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested);
   1.209 +       containerIdx = getContainer(getChunkSize(foundChunk)) - 1;
   1.210 +       insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]);
   1.211 +       if(containerIdx < 64)
   1.212 +           freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx);
   1.213 +       else
   1.214 +           freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64));
   1.215 +       foundChunk = newChunk;
   1.216 +    } 
   1.217 +    
   1.218 +    return foundChunk;
   1.219 +}
   1.220 +
   1.221 +
   1.222 +/*
   1.223 + * This is sequential code, meant to only be called from the Master, not from
   1.224 + * any slave Slvs.
   1.225   */
   1.226  void *VMS_int__malloc( size_t sizeRequested )
   1.227 - { MallocProlog *foundElem = NULL, *currElem, *newElem;
   1.228 -   ssize_t        amountExtra, sizeConsumed,sizeOfFound;
   1.229 -   uint32        foundElemIsTopOfHeap;
   1.230 -
   1.231 + {     
   1.232     //============================= MEASUREMENT STUFF ========================
   1.233     #ifdef MEAS__TIME_MALLOC
   1.234     int32 startStamp, endStamp;
   1.235 @@ -58,312 +212,101 @@
   1.236     #endif
   1.237     //========================================================================
   1.238     
   1.239 -      //step up the size to be aligned at 16-byte boundary, prob better ways
   1.240 -   sizeRequested = (sizeRequested + 16) & ~15;
   1.241 -   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
   1.242 -
   1.243 -   while( currElem != NULL )
   1.244 -    {    //check if size of currElem is big enough
   1.245 -      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
   1.246 -      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
   1.247 -      if( amountExtra > 0 )
   1.248 -       {    //found it, get out of loop
   1.249 -         foundElem = currElem;
   1.250 -         currElem = NULL;
   1.251 -       }
   1.252 -      else
   1.253 -         currElem = currElem->nextChunkInFreeList;
   1.254 -    }
   1.255 +   MallocArrays* freeLists = _VMSMasterEnv->freeLists;
   1.256 +   MallocProlog* foundChunk;
   1.257     
   1.258 -   if( foundElem == NULL )
   1.259 -    { ERROR("\nmalloc failed\n")
   1.260 -      return (void *)NULL;  //indicates malloc failed
   1.261 -    }
   1.262 -      //Using a kludge to identify the element that is the top chunk in the
   1.263 -      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
   1.264 -      // save addr of start of heap in head's nextLowerInMem
   1.265 -      //Will handle top of Heap specially
   1.266 -   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
   1.267 -                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   1.268 +   //Return a small chunk if the requested size is smaller than 128B
   1.269 +   if(sizeRequested <= LOWER_BOUND)
   1.270 +   {
   1.271 +       uint32 freeListIdx = (sizeRequested-1)/SMALL_CHUNK_SIZE;
   1.272 +       if(freeLists->smallChunks[freeListIdx] == NULL)
   1.273 +           foundChunk = searchChunk(freeLists, SMALL_CHUNK_SIZE*(freeListIdx+1), 0);
   1.274 +       else
   1.275 +           foundChunk = removeSmallChunk(freeLists, freeListIdx);
   1.276 +       
   1.277 +       //Mark as allocated
   1.278 +       foundChunk->prevChunkInFreeList = NULL;      
   1.279 +       return foundChunk + 1;
   1.280 +   }
   1.281     
   1.282 -      //before shave off and try to insert new elem, remove found elem
   1.283 -      //note, foundElem will never be the head, so always has valid prevChunk
   1.284 -   foundElem->prevChunkInFreeList->nextChunkInFreeList =
   1.285 -                                              foundElem->nextChunkInFreeList;
   1.286 -   if( foundElem->nextChunkInFreeList != NULL )
   1.287 -    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
   1.288 -                                              foundElem->prevChunkInFreeList;
   1.289 -    }
   1.290 -   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
   1.291 +   //Calculate the expected container. Start one higher to have a Chunk that's
   1.292 +   //always big enough.
   1.293 +   uint32 containerIdx = getContainer(sizeRequested);
   1.294     
   1.295 -      //if enough, turn extra into new elem & insert it
   1.296 -   if( amountExtra > 64 )
   1.297 -    {   //make new elem by adding to addr of curr elem then casting
   1.298 -        sizeConsumed = sizeof(MallocProlog) + sizeRequested; 
   1.299 -        newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
   1.300 -        newElem->nextLowerInMem    = foundElem; //This is evil (but why?) 
   1.301 -        newElem->nextHigherInMem   = foundElem->nextHigherInMem; //This is evil (but why?)
   1.302 -        foundElem->nextHigherInMem = newElem;
   1.303 -        if( ! foundElemIsTopOfHeap )
   1.304 -        {  //there is no next higher for top of heap, so can't write to it
   1.305 -           newElem->nextHigherInMem->nextLowerInMem = newElem;
   1.306 -        }
   1.307 -        add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
   1.308 -    }
   1.309 +   if(freeLists->bigChunks[containerIdx] == NULL)
   1.310 +       foundChunk = searchChunk(freeLists, sizeRequested, containerIdx); 
   1.311     else
   1.312 -    {
   1.313 -      sizeConsumed = sizeOfFound;
   1.314 -    }
   1.315 -  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
   1.316 -
   1.317 +       foundChunk = removeChunk(freeLists, containerIdx); 
   1.318 +   
   1.319 +   //Mark as allocated
   1.320 +   foundChunk->prevChunkInFreeList = NULL;      
   1.321 +   
   1.322     //============================= MEASUREMENT STUFF ========================
   1.323     #ifdef MEAS__TIME_MALLOC
   1.324     saveLowTimeStampCountInto( endStamp );
   1.325     addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
   1.326     #endif
   1.327     //========================================================================
   1.328 -
   1.329 -      //skip over the prolog by adding its size to the pointer return
   1.330 -   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
   1.331 +   
   1.332 +   //skip over the prolog by adding its size to the pointer return
   1.333 +   return foundChunk + 1;
   1.334   }
   1.335  
   1.336 -/*This is sequential code, meant to only be called from the Master, not from
   1.337 - * any slave VPs.
   1.338 - *Search down list, checking size by the nextHigherInMem pointer, to find
   1.339 - * first chunk bigger than size needed.
   1.340 - *Shave off the extra and make it into a new free-list element, hook it in
   1.341 - * then return the address of the found element plus size of prolog.
   1.342 - *
   1.343 - * The difference to the regular malloc is, that all the allocated chunks are
   1.344 - * aligned and padded to the size of a CACHE_LINE_SZ. Thus creating a new chunk
   1.345 - * before the aligned chunk.
   1.346 - */
   1.347 -void *VMS_int__malloc_aligned( size_t sizeRequested )
   1.348 - { MallocProlog *foundElem = NULL, *currElem, *newElem;
   1.349 -   ssize_t        amountExtra, sizeConsumed,sizeOfFound,prevAmount;
   1.350 -   uint32        foundElemIsTopOfHeap;
   1.351 -
   1.352 -   //============================= MEASUREMENT STUFF ========================
   1.353 -   #ifdef MEAS__TIME_MALLOC
   1.354 -   uint32 startStamp, endStamp;
   1.355 -   saveLowTimeStampCountInto( startStamp );
   1.356 -   #endif
   1.357 -   //========================================================================
   1.358 -   
   1.359 -      //step up the size to be multiple of the cache line size
   1.360 -   sizeRequested = (sizeRequested + CACHE_LINE_SZ) & ~(CACHE_LINE_SZ-1);
   1.361 -   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
   1.362 -
   1.363 -   while( currElem != NULL )
   1.364 -    {    //check if size of currElem is big enough
   1.365 -      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
   1.366 -      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
   1.367 -      if( amountExtra > 0 )
   1.368 -       {    
   1.369 -         //look if the found element is already aligned
   1.370 -         if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE_SZ-1)) == 0){
   1.371 -             //found it, get out of loop
   1.372 -             foundElem = currElem;
   1.373 -             break;
   1.374 -         }else{
   1.375 -             //find first aligned address and check if it's still big enough
   1.376 -             //check also if the space before the aligned address is big enough
   1.377 -             //for a new element
   1.378 -             void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE_SZ) & ~((uintptr_t)(CACHE_LINE_SZ-1)));
   1.379 -             prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem;
   1.380 -             sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog);
   1.381 -             amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog);
   1.382 -             if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){
   1.383 -                 //found suitable element
   1.384 -                 //create new previous element and exit loop
   1.385 -                 MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1;
   1.386 -                 
   1.387 -                 //insert new element into free list
   1.388 -                 if(currElem->nextChunkInFreeList != NULL)
   1.389 -                     currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem;                     
   1.390 -                 newAlignedElem->prevChunkInFreeList = currElem;
   1.391 -                 newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList;
   1.392 -                 currElem->nextChunkInFreeList = newAlignedElem;
   1.393 -                 
   1.394 -                 //set higherInMem and lowerInMem
   1.395 -                 newAlignedElem->nextHigherInMem = currElem->nextHigherInMem;
   1.396 -                 foundElemIsTopOfHeap = currElem->nextHigherInMem ==
   1.397 -                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   1.398 -                 if(!foundElemIsTopOfHeap)
   1.399 -                     currElem->nextHigherInMem->nextLowerInMem = newAlignedElem;
   1.400 -                 currElem->nextHigherInMem = newAlignedElem;
   1.401 -                 newAlignedElem->nextLowerInMem = currElem;
   1.402 -                 
   1.403 -                 //Found new element leaving loop
   1.404 -                 foundElem = newAlignedElem;
   1.405 -                 break;
   1.406 -             }
   1.407 -         }
   1.408 -         
   1.409 -       }
   1.410 -       currElem = currElem->nextChunkInFreeList;
   1.411 -    }
   1.412 -
   1.413 -   if( foundElem == NULL )
   1.414 -    { ERROR("\nmalloc failed\n")
   1.415 -      return (void *)NULL;  //indicates malloc failed
   1.416 -    }
   1.417 -      //Using a kludge to identify the element that is the top chunk in the
   1.418 -      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
   1.419 -      // save addr of start of heap in head's nextLowerInMem
   1.420 -      //Will handle top of Heap specially
   1.421 -   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
   1.422 -                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   1.423 -
   1.424 -      //before shave off and try to insert new elem, remove found elem
   1.425 -      //note, foundElem will never be the head, so always has valid prevChunk
   1.426 -   foundElem->prevChunkInFreeList->nextChunkInFreeList =
   1.427 -                                              foundElem->nextChunkInFreeList;
   1.428 -   if( foundElem->nextChunkInFreeList != NULL )
   1.429 -    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
   1.430 -                                              foundElem->prevChunkInFreeList;
   1.431 -    }
   1.432 -   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
   1.433 -   
   1.434 -      //if enough, turn extra into new elem & insert it
   1.435 -   if( amountExtra > 64 )
   1.436 -    {    //make new elem by adding to addr of curr elem then casting
   1.437 -      sizeConsumed = sizeof(MallocProlog) + sizeRequested;
   1.438 -      newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
   1.439 -      newElem->nextHigherInMem   = foundElem->nextHigherInMem;
   1.440 -      newElem->nextLowerInMem    = foundElem;
   1.441 -      foundElem->nextHigherInMem = newElem;
   1.442 -      
   1.443 -      if( ! foundElemIsTopOfHeap )
   1.444 -       {    //there is no next higher for top of heap, so can't write to it
   1.445 -         newElem->nextHigherInMem->nextLowerInMem = newElem;
   1.446 -       }
   1.447 -      add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
   1.448 -    }
   1.449 -   else
   1.450 -    {
   1.451 -      sizeConsumed = sizeOfFound;
   1.452 -    }
   1.453 -  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
   1.454 -
   1.455 -   //============================= MEASUREMENT STUFF ========================
   1.456 -   #ifdef MEAS__TIME_MALLOC
   1.457 -   saveLowTimeStampCountInto( endStamp );
   1.458 -   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
   1.459 -   #endif
   1.460 -   //========================================================================
   1.461 -
   1.462 -      //skip over the prolog by adding its size to the pointer return
   1.463 -   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
   1.464 - }
   1.465 -
   1.466 -
   1.467 -/*This is sequential code -- only to be called from the Master
   1.468 - * When free, subtract the size of prolog from pointer, then cast it to a
   1.469 - * MallocProlog.  Then check the nextLower and nextHigher chunks to see if
   1.470 - * one or both are also free, and coalesce if so, and if neither free, then
   1.471 - * add this one to free-list.
   1.472 +/*
   1.473 + * This is sequential code, meant to only be called from the Master, not from
   1.474 + * any slave Slvs.
   1.475   */
   1.476  void
   1.477  VMS_int__free( void *ptrToFree )
   1.478 - { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem;
   1.479 -   size_t         sizeOfElem;
   1.480 -   uint32         lowerExistsAndIsFree, higherExistsAndIsFree;
   1.481 -
   1.482 + {
   1.483 +    
   1.484     //============================= MEASUREMENT STUFF ========================
   1.485     #ifdef MEAS__TIME_MALLOC
   1.486     int32 startStamp, endStamp;
   1.487     saveLowTimeStampCountInto( startStamp );
   1.488     #endif
   1.489     //========================================================================
   1.490 -
   1.491 -   if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem ||
   1.492 -       ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem )
   1.493 -    {    //outside the range of data owned by VMS's malloc, so do nothing
   1.494 -      return;
   1.495 -    }
   1.496 -      //subtract size of prolog to get pointer to prolog, then cast
   1.497 -   elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog));
   1.498 -   sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree);
   1.499 -
   1.500 -   if( elemToFree->prevChunkInFreeList != NULL )
   1.501 -    { printf( "error: freeing same element twice!" ); exit(1);
   1.502 -    }
   1.503 -
   1.504 -   _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem;
   1.505 -
   1.506 -   nextLowerElem  = elemToFree->nextLowerInMem;
   1.507 -   nextHigherElem = elemToFree->nextHigherInMem;
   1.508 -
   1.509 -   if( nextHigherElem == NULL )
   1.510 -      higherExistsAndIsFree = FALSE;
   1.511 -   else //okay exists, now check if in the free-list by checking back ptr
   1.512 -      higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL);
   1.513 -    
   1.514 -   if( nextLowerElem == NULL )
   1.515 -      lowerExistsAndIsFree = FALSE;
   1.516 -   else //okay, it exists, now check if it's free
   1.517 -      lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL);
   1.518 -    
   1.519 -
   1.520 -      //now, know what exists and what's free
   1.521 -   if( lowerExistsAndIsFree )
   1.522 -    { if( higherExistsAndIsFree )
   1.523 -       {    //both exist and are free, so coalesce all three
   1.524 -            //First, remove higher from free-list
   1.525 -         nextHigherElem->prevChunkInFreeList->nextChunkInFreeList =
   1.526 -                                         nextHigherElem->nextChunkInFreeList;
   1.527 -         if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list?
   1.528 -            nextHigherElem->nextChunkInFreeList->prevChunkInFreeList =
   1.529 -                                         nextHigherElem->prevChunkInFreeList;
   1.530 -            //Now, fix-up sequence-in-mem list -- by side-effect, this also
   1.531 -            // changes size of the lower elem, which is still in free-list
   1.532 -         nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem;
   1.533 -         if( nextHigherElem->nextHigherInMem !=
   1.534 -             _VMSMasterEnv->freeListHead->nextHigherInMem )
   1.535 -            nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem;
   1.536 -            //notice didn't do anything to elemToFree -- it simply is no
   1.537 -            // longer reachable from any of the lists.  Wonder if could be a
   1.538 -            // security leak because left valid addresses in it,
   1.539 -            // but don't care for now.
   1.540 +   
   1.541 +   MallocArrays* freeLists = _VMSMasterEnv->freeLists;
   1.542 +   MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1;
   1.543 +   uint32 containerIdx;
   1.544 +   
   1.545 +   //Check for free neighbors
   1.546 +   if(chunkToFree->nextLowerInMem)
   1.547 +   {
   1.548 +       if(chunkToFree->nextLowerInMem->prevChunkInFreeList != NULL)
   1.549 +       {//Chunk is not allocated
   1.550 +           extractChunk(chunkToFree->nextLowerInMem, freeLists);
   1.551 +           chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree);
   1.552         }
   1.553 -      else
   1.554 -       {    //lower is the only of the two that exists and is free,
   1.555 -            //In this case, no adjustment to free-list, just change mem-list.
   1.556 -            // By side-effect, changes size of the lower elem
   1.557 -         nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem;
   1.558 -         if( elemToFree->nextHigherInMem !=
   1.559 -             _VMSMasterEnv->freeListHead->nextHigherInMem )
   1.560 -            elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem;
   1.561 +   }
   1.562 +   if(chunkToFree->nextHigherInMem)
   1.563 +   {
   1.564 +       if(chunkToFree->nextHigherInMem->prevChunkInFreeList != NULL)
   1.565 +       {//Chunk is not allocated
   1.566 +           extractChunk(chunkToFree->nextHigherInMem, freeLists);
   1.567 +           chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem);
   1.568         }
   1.569 -    }
   1.570 +   }
   1.571 +   
   1.572 +   size_t chunkSize = getChunkSize(chunkToFree);
   1.573 +   if(chunkSize < BIG_LOWER_BOUND)
   1.574 +   {
   1.575 +       containerIdx =  (chunkSize/SMALL_CHUNK_SIZE)-1;
   1.576 +       if(containerIdx > SMALL_CHUNK_COUNT-1)
   1.577 +           containerIdx = SMALL_CHUNK_COUNT-1;
   1.578 +       insertChunk(chunkToFree, &freeLists->smallChunks[containerIdx]);
   1.579 +   }
   1.580     else
   1.581 -    {    //lower either doesn't exist or isn't free, so check higher
   1.582 -      if( higherExistsAndIsFree )
   1.583 -       {    //higher exists and is the only of the two free
   1.584 -            //First, in free-list, replace higher elem with the one to free
   1.585 -         elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList;
   1.586 -         elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList;
   1.587 -         elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree;
   1.588 -         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
   1.589 -            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
   1.590 -            //Now chg mem-list. By side-effect, changes size of elemToFree
   1.591 -         elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem;
   1.592 -         if( elemToFree->nextHigherInMem !=
   1.593 -             _VMSMasterEnv->freeListHead->nextHigherInMem )
   1.594 -            elemToFree->nextHigherInMem->nextLowerInMem = elemToFree;
   1.595 -       }
   1.596 -      else
   1.597 -       {    //neither lower nor higher is availabe to coalesce so add to list
   1.598 -            // this makes prev chunk ptr non-null, which indicates it's free
   1.599 -         elemToFree->nextChunkInFreeList =
   1.600 -                            _VMSMasterEnv->freeListHead->nextChunkInFreeList;
   1.601 -         _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree;
   1.602 -         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
   1.603 -            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
   1.604 -         elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead;
   1.605 -       }
   1.606 -    }
   1.607 +   {
   1.608 +       containerIdx = getContainer(getChunkSize(chunkToFree)) - 1;
   1.609 +       insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]);
   1.610 +       if(containerIdx < 64)
   1.611 +           freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx;
   1.612 +       else
   1.613 +           freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64);
   1.614 +   }   
   1.615 +   
   1.616     //============================= MEASUREMENT STUFF ========================
   1.617     #ifdef MEAS__TIME_MALLOC
   1.618     saveLowTimeStampCountInto( endStamp );
   1.619 @@ -373,82 +316,31 @@
   1.620  
   1.621   }
   1.622  
   1.623 -
   1.624 -/*Allocates memory from the external system -- higher overhead
   1.625 - *
   1.626 - *Because of Linux's malloc throwing bizarre random faults when malloc is
   1.627 - * used inside a VMS virtual processor, have to pass this as a request and
   1.628 - * have the core loop do it when it gets around to it -- will look for these
   1.629 - * chores leftover from the previous animation of masterVP the next time it
   1.630 - * goes to animate the masterVP -- so it takes two separate masterVP
   1.631 - * animations, separated by work, to complete an external malloc or
   1.632 - * external free request.
   1.633 - *
   1.634 - *Thinking core loop accepts signals -- just looks if signal-location is
   1.635 - * empty or not --
   1.636 +/*
   1.637 + * Designed to be called from the main thread outside of VMS, during init
   1.638   */
   1.639 -void *
   1.640 -VMS__malloc_in_ext( size_t sizeRequested )
   1.641 - {
   1.642 - /*
   1.643 -      //This is running in the master, so no chance for multiple cores to be
   1.644 -      // competing for the core's flag.
   1.645 -   if(  *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 )
   1.646 -    {    //something has already signalled to core loop, so save the signal
   1.647 -         // and look, next time master animated, to see if can send it.
   1.648 -         //Note, the addr to put a signal is in the coreloop's frame, so just
   1.649 -         // checks it each time through -- make it volatile to avoid GCC
   1.650 -         // optimizations -- it's a coreloop local var that only changes
   1.651 -         // after jumping away.  The signal includes the addr to send the
   1.652 -         //return to -- even if just empty return completion-signal
   1.653 -         //
   1.654 -         //save the signal in some queue that the master looks at each time
   1.655 -         // it starts up -- one loc says if empty for fast common case --
   1.656 -         //something like that -- want to hide this inside this call -- but
   1.657 -         // think this has to come as a request -- req handler gives procr
   1.658 -         // back to master loop, which gives it back to req handler at point
   1.659 -         // it sees that core loop has sent return signal.  Something like
   1.660 -         // that.
   1.661 -      saveTheSignal
   1.662 -
   1.663 -    }
   1.664 -  coreSigData->type = malloc;
   1.665 -  coreSigData->sizeToMalloc = sizeRequested;
   1.666 -  coreSigData->locToSignalCompletion = &figureOut;
   1.667 -   _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData;
   1.668 -  */
   1.669 -      //just risk system-stack faults until get this figured out
   1.670 -   return malloc( sizeRequested );
   1.671 - }
   1.672 -
   1.673 -
   1.674 -/*Frees memory that was allocated in the external system -- higher overhead
   1.675 - *
   1.676 - *As noted in external malloc comment, this is clunky 'cause the free has
   1.677 - * to be called in the core loop.
   1.678 - */
   1.679 -void
   1.680 -VMS__free_in_ext( void *ptrToFree )
   1.681 - {
   1.682 -      //just risk system-stack faults until get this figured out
   1.683 -   free( ptrToFree );
   1.684 -
   1.685 -      //TODO: fix this -- so 
   1.686 - }
   1.687 -
   1.688 -
   1.689 -/*Designed to be called from the main thread outside of VMS, during init
   1.690 - */
   1.691 -MallocProlog *
   1.692 +MallocArrays *
   1.693  VMS_ext__create_free_list()
   1.694 - { MallocProlog *freeListHead, *firstChunk;
   1.695 -
   1.696 -      //Note, this is running in the main thread -- all increases in malloc
   1.697 -      // mem and all frees of it must be done in this thread, with the
   1.698 -      // thread's original stack available
   1.699 -   freeListHead = malloc( sizeof(MallocProlog) );
   1.700 -   firstChunk   = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
   1.701 -   if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);}
   1.702 +{     
   1.703 +   //Initialize containers for small chunks and fill with zeros
   1.704 +   _VMSMasterEnv->freeLists = (MallocArrays*)malloc( sizeof(MallocArrays) );
   1.705 +   MallocArrays *freeLists = _VMSMasterEnv->freeLists;
   1.706 +   
   1.707 +   freeLists->smallChunks = 
   1.708 +           (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
   1.709 +   memset((void*)freeLists->smallChunks,
   1.710 +           0,SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
   1.711 +   
   1.712 +   //Calculate number of containers for big chunks
   1.713 +   uint32 container = getContainer(MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE)+1;
   1.714 +   freeLists->bigChunks = (MallocProlog**)malloc(container*sizeof(MallocProlog*));
   1.715 +   memset((void*)freeLists->bigChunks,0,container*sizeof(MallocProlog*));
   1.716 +   freeLists->containerCount = container;
   1.717 +   
   1.718 +   //Create first element in lastContainer 
   1.719 +   MallocProlog *firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
   1.720 +   if( firstChunk == NULL ) {printf("Can't allocate initial memory\n"); exit(1);}
   1.721 +   freeLists->memSpace = firstChunk;
   1.722     
   1.723     //Touch memory to avoid page faults
   1.724     void *ptr,*endPtr; 
   1.725 @@ -457,38 +349,47 @@
   1.726     {
   1.727         *(char*)ptr = 0;
   1.728     }
   1.729 -
   1.730 -   freeListHead->prevChunkInFreeList = NULL;
   1.731 -      //Use this addr to free the heap when cleanup
   1.732 -   freeListHead->nextLowerInMem      = firstChunk;
   1.733 -      //to identify top-of-heap elem, compare this addr to elem's next higher
   1.734 -   freeListHead->nextHigherInMem     = (void*)( (uintptr_t)firstChunk +
   1.735 -                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
   1.736 -   freeListHead->nextChunkInFreeList = firstChunk;
   1.737 -
   1.738 -   firstChunk->nextChunkInFreeList   = NULL;
   1.739 -   firstChunk->prevChunkInFreeList   = freeListHead;
   1.740 -      //next Higher has to be set to top of chunk, so can calc size in malloc
   1.741 -   firstChunk->nextHigherInMem       = (void*)( (uintptr_t)firstChunk +
   1.742 -                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
   1.743 -   firstChunk->nextLowerInMem        = NULL; //identifies as bott of heap
   1.744     
   1.745 -   _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet
   1.746 -
   1.747 -   return freeListHead;
   1.748 +   firstChunk->nextLowerInMem = NULL;
   1.749 +   firstChunk->nextHigherInMem = (MallocProlog*)((uintptr_t)firstChunk +
   1.750 +                        MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog));
   1.751 +   firstChunk->nextChunkInFreeList = NULL;
   1.752 +   //previous element in the queue is the container
   1.753 +   firstChunk->prevChunkInFreeList = &freeLists->bigChunks[container-2];
   1.754 +   
   1.755 +   freeLists->bigChunks[container-2] = firstChunk;
   1.756 +   //Insert into bit search list
   1.757 +   if(container <= 65)
   1.758 +   {
   1.759 +       freeLists->bigChunksSearchVector[0] = ((uint64)1 << (container-2));
   1.760 +       freeLists->bigChunksSearchVector[1] = 0;
   1.761 +   }   
   1.762 +   else
   1.763 +   {
   1.764 +       freeLists->bigChunksSearchVector[0] = 0;
   1.765 +       freeLists->bigChunksSearchVector[1] = ((uint64)1 << (container-66));
   1.766 +   }
   1.767 +   
   1.768 +   //Create dummy chunk to mark the top of stack this is of course
   1.769 +   //never freed
   1.770 +   MallocProlog *dummyChunk = firstChunk->nextHigherInMem;
   1.771 +   dummyChunk->nextHigherInMem = dummyChunk+1;
   1.772 +   dummyChunk->nextLowerInMem  = NULL;
   1.773 +   dummyChunk->nextChunkInFreeList = NULL;
   1.774 +   dummyChunk->prevChunkInFreeList = NULL;
   1.775 +   
   1.776 +   return freeLists;
   1.777   }
   1.778  
   1.779  
   1.780  /*Designed to be called from the main thread outside of VMS, during cleanup
   1.781   */
   1.782  void
   1.783 -VMS_ext__free_free_list( MallocProlog *freeListHead )
   1.784 +VMS_ext__free_free_list( MallocArrays *freeLists )
   1.785   {    
   1.786 -      //stashed a ptr to the one and only bug chunk malloc'd from OS in the
   1.787 -      // free list head's next lower in mem pointer
   1.788 -   free( freeListHead->nextLowerInMem );
   1.789 -
   1.790 -   //don't free the head -- it'll be in an array eventually -- free whole
   1.791 -   // array when all the free lists linked from it have already been freed
   1.792 +   free(freeLists->memSpace);
   1.793 +   free(freeLists->bigChunks);
   1.794 +   free(freeLists->smallChunks);
   1.795 +   
   1.796   }
   1.797