diff vmalloc.c @ 117:07e679ee2095

Bit index search with simple chop off
author Merten Sach <msach@mailbox.tu-berlin.de>
date Tue, 30 Aug 2011 17:11:13 +0200
parents 35547e66b971
children e29bb31df078
line diff
     1.1 --- a/vmalloc.c	Mon Aug 29 17:09:00 2011 +0200
     1.2 +++ b/vmalloc.c	Tue Aug 30 17:11:13 2011 +0200
     1.3 @@ -32,15 +32,6 @@
     1.4  }
     1.5  
     1.6  /*
     1.7 - * This calculates the size of a given container
     1.8 - */
     1.9 -inline
    1.10 -size_t getContainerSize(uint32 idx)
    1.11 -{
    1.12 -    return pow(CHUNK_INCREASE_RATE,idx)*LOWER_BOUND;
    1.13 -}
    1.14 -
    1.15 -/*
    1.16   * Removes the first chunk of a freeList
    1.17   * The chunk is removed but not set as free. There is no check if
    1.18   * the free list is empty, so make sure this is not the case.
    1.19 @@ -87,9 +78,10 @@
    1.20  }
    1.21  
    1.22  inline
    1.23 -void recycleSmallChunks(MallocProlog** container)
    1.24 +size_t getChunkSize(MallocProlog* chunk)
    1.25  {
    1.26 -    //TODO recycle small chunks
    1.27 +    return (uintptr_t)chunk->nextHigherInMem -
    1.28 +            (uintptr_t)chunk - sizeof(MallocProlog);
    1.29  }
    1.30  
    1.31  /*
    1.32 @@ -104,13 +96,15 @@
    1.33     
    1.34     //The last element in the list points to the container. If the container points
    1.35     //to NULL the container is empty
    1.36 -   if(*((void**)(chunk->prevChunkInFreeList)) == NULL)
    1.37 +   if(*((void**)(chunk->prevChunkInFreeList)) == NULL && getChunkSize(chunk) >= 160);
    1.38     {
    1.39 -       uint32 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 6;
    1.40 +       //Find the approppiate container because we do not know it
    1.41 +       uint64 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 3;
    1.42         if(containerIdx < (uint32)64)
    1.43             freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); 
    1.44         if(containerIdx < 128 && containerIdx >=64)
    1.45             freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64)); 
    1.46 +       
    1.47     }
    1.48  }
    1.49  
    1.50 @@ -123,6 +117,7 @@
    1.51  MallocProlog *mergeChunks(MallocProlog* chunkA, MallocProlog* chunkB)
    1.52  {
    1.53      chunkA->nextHigherInMem = chunkB->nextHigherInMem;
    1.54 +    chunkB->nextHigherInMem->nextLowerInMem = chunkA;
    1.55      return chunkA;
    1.56  }
    1.57  /*
    1.58 @@ -151,49 +146,60 @@
    1.59      newChunk->nextLowerInMem  = chunk;
    1.60      newChunk->nextHigherInMem = chunk->nextHigherInMem;
    1.61      
    1.62 +    chunk->nextHigherInMem->nextLowerInMem = newChunk;
    1.63      chunk->nextHigherInMem = newChunk;
    1.64 -    chunk->nextHigherInMem->nextLowerInMem = newChunk;
    1.65      
    1.66      return newChunk;
    1.67  }
    1.68  
    1.69 +/* 
    1.70 + * Search for chunk in the list of big chunks. Split the block if it's too big
    1.71 + */
    1.72  inline
    1.73 -size_t getChunkSize(MallocProlog* chunk)
    1.74 +MallocProlog *searchChunk(MallocArrays *freeLists, size_t sizeRequested, uint32 containerIdx)
    1.75  {
    1.76 -    return (uintptr_t)chunk->nextHigherInMem -
    1.77 -            (uintptr_t)chunk - sizeof(MallocProlog);
    1.78 +    MallocProlog* foundChunk;
    1.79 +    
    1.80 +    uint64 searchVector = freeLists->bigChunksSearchVector[0];
    1.81 +    //set small chunk bits to zero
    1.82 +    searchVector &= MAX_UINT64 << containerIdx;
    1.83 +    containerIdx = __builtin_ffsl(searchVector);
    1.84 +
    1.85 +    if(containerIdx == 0)
    1.86 +    {
    1.87 +       searchVector = freeLists->bigChunksSearchVector[1];
    1.88 +       containerIdx = __builtin_ffsl(searchVector);
    1.89 +       if(containerIdx == 0)
    1.90 +       {
    1.91 +           printf("VMS malloc failed: low memory");
    1.92 +           exit(1);   
    1.93 +       }
    1.94 +       containerIdx += 64;
    1.95 +    }
    1.96 +    containerIdx--;
    1.97 +    
    1.98 +
    1.99 +    foundChunk = removeChunk(freeLists, containerIdx);
   1.100 +    size_t chunkSize     = getChunkSize(foundChunk);
   1.101 +
   1.102 +    //If the new chunk is larger than the requested size: split
   1.103 +    if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + 160)
   1.104 +    {
   1.105 +       MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested);
   1.106 +       containerIdx = getContainer(getChunkSize(foundChunk)) - 1;
   1.107 +       insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]);
   1.108 +       if(containerIdx < 64)
   1.109 +           freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx);
   1.110 +       else
   1.111 +           freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64));
   1.112 +       foundChunk = newChunk;
   1.113 +    } 
   1.114 +    
   1.115 +    return foundChunk;
   1.116  }
   1.117  
   1.118  
   1.119  /*
   1.120 - * This makes 5 Chunks of the requested size.
   1.121 - */
   1.122 -inline
   1.123 -void makeChunks(size_t size, MallocProlog **container)
   1.124 -{
   1.125 -    MallocArrays *freeLists = _VMSMasterEnv->freeLists;
   1.126 -    size_t addedSize = 5*(size + sizeof(MallocProlog));
   1.127 -    
   1.128 -    uint32 containerIdx = getContainer(addedSize)+4;
   1.129 -    while(freeLists->bigChunks[containerIdx] == NULL )
   1.130 -        containerIdx++;
   1.131 -    
   1.132 -    MallocProlog *foundChunk = removeChunk(freeLists, containerIdx);
   1.133 -    
   1.134 -    int i;
   1.135 -    for(i=0; i<5; i++)
   1.136 -    {
   1.137 -        insertChunk(divideChunk(foundChunk,size), container);
   1.138 -    }
   1.139 -    containerIdx = getContainer(getChunkSize(foundChunk));
   1.140 -    insertChunk(foundChunk, &freeLists->bigChunks[containerIdx]);
   1.141 -    if(containerIdx < 64)
   1.142 -        freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx);
   1.143 -    else
   1.144 -        freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64));
   1.145 -}
   1.146 -
   1.147 -/*
   1.148   * This is sequential code, meant to only be called from the Master, not from
   1.149   * any slave VPs.
   1.150   */
   1.151 @@ -207,67 +213,30 @@
   1.152     //========================================================================
   1.153     
   1.154     MallocArrays* freeLists = _VMSMasterEnv->freeLists;
   1.155 +   MallocProlog* foundChunk;
   1.156     
   1.157     //Return a small chunk if the requested size is smaller than 128B
   1.158     if(sizeRequested <= LOWER_BOUND)
   1.159     {
   1.160 -       uint32 freeListIdx = (sizeRequested-1)/32;
   1.161 -       if(freeLists->smallChunkCount[freeListIdx] == 0)
   1.162 -       {
   1.163 -           makeChunks((freeListIdx+1)*32, &freeLists->smallChunks[freeListIdx]);
   1.164 -           freeLists->smallChunkCount[freeListIdx] += 5;
   1.165 -       }
   1.166 +       uint32 freeListIdx = (sizeRequested-1)/SMALL_CHUNK_SIZE;
   1.167 +       if(freeLists->smallChunks[freeListIdx] == NULL)
   1.168 +           foundChunk = searchChunk(freeLists, SMALL_CHUNK_SIZE*(freeListIdx+1), 0);
   1.169 +       else
   1.170 +           foundChunk = removeSmallChunk(freeLists, freeListIdx);
   1.171         
   1.172 -       freeLists->smallChunkCount[freeListIdx]--;
   1.173 -       return removeSmallChunk(freeLists, freeListIdx) + 1;
   1.174 +       //Mark as allocated
   1.175 +       foundChunk->prevChunkInFreeList = NULL;      
   1.176 +       return foundChunk + 1;
   1.177     }
   1.178     
   1.179     //Calculate the expected container. Start one higher to have a Chunk that's
   1.180     //always big enough.
   1.181 -   uint32 containerIdx = getContainer(sizeRequested) + 1;
   1.182 +   uint32 containerIdx = getContainer(sizeRequested);
   1.183     
   1.184 -   MallocProlog* foundChunk;
   1.185     if(freeLists->bigChunks[containerIdx] == NULL)
   1.186 -   {
   1.187 -       uint64 searchVector = freeLists->bigChunksSearchVector[0];
   1.188 -       //set small chunk bits to zero
   1.189 -       searchVector &= MAX_UINT64 << containerIdx;
   1.190 -       containerIdx = __builtin_ffsl(searchVector);
   1.191 -       
   1.192 -       if(containerIdx == 0)
   1.193 -       {
   1.194 -           searchVector = freeLists->bigChunksSearchVector[1];
   1.195 -           containerIdx = __builtin_ffsl(searchVector);
   1.196 -           if(containerIdx == 0)
   1.197 -           {
   1.198 -               printf("VMS malloc failed: low memory");
   1.199 -               exit(1);   
   1.200 -           }
   1.201 -           containerIdx += 64;
   1.202 -       }
   1.203 -       containerIdx--;
   1.204 -       
   1.205 -       
   1.206 -       foundChunk = removeChunk(freeLists, containerIdx);
   1.207 -       size_t chunkSize     = getChunkSize(foundChunk);
   1.208 -       
   1.209 -       //If the new chunk is larger than the requested size: split
   1.210 -       if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + LOWER_BOUND)
   1.211 -       {
   1.212 -           MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested);
   1.213 -           containerIdx = getContainer(getChunkSize(foundChunk));
   1.214 -           insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]);
   1.215 -           if(containerIdx < 64)
   1.216 -               freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx);
   1.217 -           else
   1.218 -               freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64));
   1.219 -           foundChunk = newChunk;
   1.220 -       }        
   1.221 -   }
   1.222 +       foundChunk = searchChunk(freeLists, sizeRequested, containerIdx); 
   1.223     else
   1.224 -   {
   1.225 -        foundChunk = removeChunk(freeLists, containerIdx); 
   1.226 -   }
   1.227 +       foundChunk = removeChunk(freeLists, containerIdx); 
   1.228     
   1.229     //Mark as allocated
   1.230     foundChunk->prevChunkInFreeList = NULL;      
   1.231 @@ -290,7 +259,7 @@
   1.232  void
   1.233  VMS__free( void *ptrToFree )
   1.234   {
   1.235 -
   1.236 +    
   1.237     //============================= MEASUREMENT STUFF ========================
   1.238     #ifdef MEAS__TIME_MALLOC
   1.239     int32 startStamp, endStamp;
   1.240 @@ -301,50 +270,42 @@
   1.241     MallocArrays* freeLists = _VMSMasterEnv->freeLists;
   1.242     MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1;
   1.243     uint32 containerIdx;
   1.244 -
   1.245 -   size_t chunkSize = getChunkSize(chunkToFree);
   1.246 -   if(chunkSize <= LOWER_BOUND)
   1.247 -   {
   1.248 -       containerIdx = (chunkSize-1)/32;
   1.249 -       insertChunk(chunkToFree,&freeLists->smallChunks[containerIdx]);
   1.250 -       freeLists->smallChunkCount[containerIdx]++;
   1.251 -       
   1.252 -       if(freeLists->smallChunkCount[containerIdx] > 20)
   1.253 -           recycleSmallChunks(&freeLists->smallChunks[containerIdx]);
   1.254 -       
   1.255 -       return;
   1.256 -   }
   1.257     
   1.258     //Check for free neighbors
   1.259     if(chunkToFree->nextLowerInMem)
   1.260     {
   1.261         if(chunkToFree->nextLowerInMem->prevChunkInFreeList != NULL)
   1.262         {//Chunk is not allocated
   1.263 -           if(getChunkSize(chunkToFree->nextLowerInMem) > LOWER_BOUND)
   1.264 -           {
   1.265 -               extractChunk(chunkToFree->nextLowerInMem, freeLists);
   1.266 -               chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree);
   1.267 -           }
   1.268 +           extractChunk(chunkToFree->nextLowerInMem, freeLists);
   1.269 +           chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree);
   1.270         }
   1.271     }
   1.272     if(chunkToFree->nextHigherInMem)
   1.273     {
   1.274         if(chunkToFree->nextHigherInMem->prevChunkInFreeList != NULL)
   1.275         {//Chunk is not allocated
   1.276 -           if(getChunkSize(chunkToFree->nextHigherInMem) > LOWER_BOUND)
   1.277 -           {
   1.278 -               extractChunk(chunkToFree->nextHigherInMem, freeLists);
   1.279 -               chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem);
   1.280 -           }
   1.281 +           extractChunk(chunkToFree->nextHigherInMem, freeLists);
   1.282 +           chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem);
   1.283         }
   1.284     }
   1.285 -
   1.286 -   containerIdx = getContainer(getChunkSize(chunkToFree));
   1.287 -   insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]);
   1.288 -   if(containerIdx < 64)
   1.289 -       freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx;
   1.290 +   
   1.291 +   size_t chunkSize = getChunkSize(chunkToFree);
   1.292 +   if(chunkSize < 160)
   1.293 +   {
   1.294 +       containerIdx =  (chunkSize/SMALL_CHUNK_SIZE)-1;
   1.295 +       if(containerIdx > 3)
   1.296 +           containerIdx = 3;
   1.297 +       insertChunk(chunkToFree, &freeLists->smallChunks[containerIdx]);
   1.298 +   }
   1.299     else
   1.300 -       freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64);
   1.301 +   {
   1.302 +       containerIdx = getContainer(getChunkSize(chunkToFree)) - 1;
   1.303 +       insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]);
   1.304 +       if(containerIdx < 64)
   1.305 +           freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx;
   1.306 +       else
   1.307 +           freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64);
   1.308 +   }   
   1.309     
   1.310     //============================= MEASUREMENT STUFF ========================
   1.311     #ifdef MEAS__TIME_MALLOC
   1.312 @@ -355,71 +316,8 @@
   1.313  
   1.314   }
   1.315  
   1.316 -
   1.317 -/*Allocates memory from the external system -- higher overhead
   1.318 - *
   1.319 - *Because of Linux's malloc throwing bizarre random faults when malloc is
   1.320 - * used inside a VMS virtual processor, have to pass this as a request and
   1.321 - * have the core loop do it when it gets around to it -- will look for these
   1.322 - * chores leftover from the previous animation of masterVP the next time it
   1.323 - * goes to animate the masterVP -- so it takes two separate masterVP
   1.324 - * animations, separated by work, to complete an external malloc or
   1.325 - * external free request.
   1.326 - *
   1.327 - *Thinking core loop accepts signals -- just looks if signal-location is
   1.328 - * empty or not --
   1.329 - */
   1.330 -void *
   1.331 -VMS__malloc_in_ext( size_t sizeRequested )
   1.332 - {
   1.333 - /*
   1.334 -      //This is running in the master, so no chance for multiple cores to be
   1.335 -      // competing for the core's flag.
   1.336 -   if(  *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 )
   1.337 -    {    //something has already signalled to core loop, so save the signal
   1.338 -         // and look, next time master animated, to see if can send it.
   1.339 -         //Note, the addr to put a signal is in the coreloop's frame, so just
   1.340 -         // checks it each time through -- make it volatile to avoid GCC
   1.341 -         // optimizations -- it's a coreloop local var that only changes
   1.342 -         // after jumping away.  The signal includes the addr to send the
   1.343 -         //return to -- even if just empty return completion-signal
   1.344 -         //
   1.345 -         //save the signal in some queue that the master looks at each time
   1.346 -         // it starts up -- one loc says if empty for fast common case --
   1.347 -         //something like that -- want to hide this inside this call -- but
   1.348 -         // think this has to come as a request -- req handler gives procr
   1.349 -         // back to master loop, which gives it back to req handler at point
   1.350 -         // it sees that core loop has sent return signal.  Something like
   1.351 -         // that.
   1.352 -      saveTheSignal
   1.353 -
   1.354 -    }
   1.355 -  coreSigData->type = malloc;
   1.356 -  coreSigData->sizeToMalloc = sizeRequested;
   1.357 -  coreSigData->locToSignalCompletion = &figureOut;
   1.358 -   _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData;
   1.359 -  */
   1.360 -      //just risk system-stack faults until get this figured out
   1.361 -   return malloc( sizeRequested );
   1.362 - }
   1.363 -
   1.364 -
   1.365 -/*Frees memory that was allocated in the external system -- higher overhead
   1.366 - *
   1.367 - *As noted in external malloc comment, this is clunky 'cause the free has
   1.368 - * to be called in the core loop.
   1.369 - */
   1.370 -void
   1.371 -VMS__free_in_ext( void *ptrToFree )
   1.372 - {
   1.373 -      //just risk system-stack faults until get this figured out
   1.374 -   free( ptrToFree );
   1.375 -
   1.376 -      //TODO: fix this -- so 
   1.377 - }
   1.378 -
   1.379 -
   1.380 -/*Designed to be called from the main thread outside of VMS, during init
   1.381 +/*
   1.382 + * Designed to be called from the main thread outside of VMS, during init
   1.383   */
   1.384  MallocArrays *
   1.385  VMS_ext__create_free_list()
   1.386 @@ -432,8 +330,6 @@
   1.387             (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
   1.388     memset((void*)freeLists->smallChunks,
   1.389             0,SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
   1.390 -   memset((void*)freeLists->smallChunkCount,
   1.391 -           0,SMALL_CHUNK_COUNT*sizeof(uint32));
   1.392     
   1.393     //Calculate number of containers for big chunks
   1.394     uint32 container = getContainer(MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE)+1;
   1.395 @@ -443,7 +339,7 @@
   1.396     
   1.397     //Create first element in lastContainer 
   1.398     MallocProlog *firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
   1.399 -   if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);}
   1.400 +   if( firstChunk == NULL ) {printf("Can't allocate initial memory\n"); exit(1);}
   1.401     freeLists->memSpace = firstChunk;
   1.402     
   1.403     firstChunk->nextLowerInMem = NULL;
   1.404 @@ -451,14 +347,14 @@
   1.405                          MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog*));
   1.406     firstChunk->nextChunkInFreeList = NULL;
   1.407     //previous element in the queue is the container
   1.408 -   firstChunk->prevChunkInFreeList = freeLists->bigChunks[container-1];
   1.409 +   firstChunk->prevChunkInFreeList = &freeLists->bigChunks[container-2];
   1.410     
   1.411 -   freeLists->bigChunks[container-1] = firstChunk;
   1.412 +   freeLists->bigChunks[container-2] = firstChunk;
   1.413     //Insert into bit search list
   1.414 -   if(container <= 64)
   1.415 -       freeLists->bigChunksSearchVector[0] |= ((uint64)1 << (container-1));
   1.416 +   if(container <= 65)
   1.417 +       freeLists->bigChunksSearchVector[0] |= ((uint64)1 << (container-2));
   1.418     else
   1.419 -       freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (container-65));
   1.420 +       freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (container-66));
   1.421     
   1.422     //Create dummy chunk to mark the top of stack this is of course
   1.423     //never freed