diff vmalloc.c @ 132:dbfc8382d546

distributed memory allocation interface - unfinished
author Merten Sach <msach@mailbox.tu-berlin.de>
date Fri, 16 Sep 2011 14:25:49 +0200
parents 21c95d402fe6
children a9b72021f053
line diff
     1.1 --- a/vmalloc.c	Wed Sep 07 17:45:05 2011 +0200
     1.2 +++ b/vmalloc.c	Fri Sep 16 14:25:49 2011 +0200
     1.3 @@ -12,7 +12,7 @@
     1.4  #include <stdlib.h>
     1.5  #include <stdio.h>
     1.6  
     1.7 -#include "VMS.h"
     1.8 +#include "ProcrContext.h"
     1.9  #include "Histogram/Histogram.h"
    1.10  
    1.11  /*Helper function
    1.12 @@ -37,18 +37,37 @@
    1.13     listHead->nextChunkInFreeList  = chunk;
    1.14   }
    1.15  
    1.16 +/*
    1.17 + * This function is called by code which is part of the master loop.
    1.18 + * This reads the animating coreID from the MasterEnv and calls the normal malloc
    1.19 + * in VMS__malloc_on_core
    1.20 + */
    1.21 +void *
    1.22 +VMS__malloc( size_t sizeRequested)
    1.23 +{
    1.24 +    return VMS__malloc_on_core(sizeRequested, _VMSMasterEnv->currentMasterProcrID);
    1.25 +}
    1.26  
    1.27 -/*This is sequential code, meant to only be called from the Master, not from
    1.28 - * any slave VPs.
    1.29 +/*
    1.30 + * This is called by the plugin. This call to VMS_malloc_on_core is run on the
    1.31 + * slave VPs stack so there is no switch to the VMS runtime.
    1.32 + */
    1.33 +void *
    1.34 +VMS__malloc_in_lib(size_t sizeRequested, VirtProcr *VProcr)
    1.35 +{
    1.36 +    return VMS__malloc_on_core(sizeRequested, VProcr->coreAnimatedBy);
    1.37 +}
    1.38 +
    1.39 +/*
    1.40   *Search down list, checking size by the nextHigherInMem pointer, to find
    1.41   * first chunk bigger than size needed.
    1.42   *Shave off the extra and make it into a new free-list element, hook it in
    1.43   * then return the address of the found element plus size of prolog.
    1.44 - *
    1.45 - *Will find a
    1.46   */
    1.47 -void *VMS__malloc( size_t sizeRequested )
    1.48 +void *
    1.49 +VMS__malloc_on_core( size_t sizeRequested, int procrID)
    1.50   { MallocProlog *foundElem = NULL, *currElem, *newElem;
    1.51 +   MallocPrologAllocated *returnElem;
    1.52     ssize_t        amountExtra, sizeConsumed,sizeOfFound;
    1.53     uint32        foundElemIsTopOfHeap;
    1.54  
    1.55 @@ -61,7 +80,8 @@
    1.56     
    1.57        //step up the size to be aligned at 16-byte boundary, prob better ways
    1.58     sizeRequested = (sizeRequested + 16) & ~15;
    1.59 -   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
    1.60 +   currElem = (_VMSMasterEnv->freeListHead[_VMSMasterEnv->currentMasterProcrID])
    1.61 +                        ->nextChunkInFreeList;
    1.62  
    1.63     while( currElem != NULL )
    1.64      {    //check if size of currElem is big enough
    1.65 @@ -95,16 +115,18 @@
    1.66      { foundElem->nextChunkInFreeList->prevChunkInFreeList =
    1.67                                                foundElem->prevChunkInFreeList;
    1.68      }
    1.69 -   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
    1.70 +   returnElem = (MallocPrologAllocated*)foundElem;
    1.71 +   returnElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
    1.72 +   returnElem->procrID = procrID;
    1.73     
    1.74        //if enough, turn extra into new elem & insert it
    1.75     if( amountExtra > 64 )
    1.76      {   //make new elem by adding to addr of curr elem then casting
    1.77          sizeConsumed = sizeof(MallocProlog) + sizeRequested; 
    1.78 -        newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
    1.79 -        newElem->nextLowerInMem    = foundElem; //This is evil (but why?) 
    1.80 -        newElem->nextHigherInMem   = foundElem->nextHigherInMem; //This is evil (but why?)
    1.81 -        foundElem->nextHigherInMem = newElem;
    1.82 +        newElem = (MallocProlog *)( (uintptr_t)returnElem + sizeConsumed );
    1.83 +        newElem->nextLowerInMem    = returnElem; //This is evil (but why?) 
    1.84 +        newElem->nextHigherInMem   = returnElem->nextHigherInMem; //This is evil (but why?)
    1.85 +        returnElem->nextHigherInMem = newElem;
    1.86          if( ! foundElemIsTopOfHeap )
    1.87          {  //there is no next higher for top of heap, so can't write to it
    1.88             newElem->nextHigherInMem->nextLowerInMem = newElem;
    1.89 @@ -125,139 +147,46 @@
    1.90     //========================================================================
    1.91  
    1.92        //skip over the prolog by adding its size to the pointer return
    1.93 -   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
    1.94 +   return (void*)((uintptr_t)returnElem + sizeof(MallocProlog));
    1.95   }
    1.96  
    1.97 -/*This is sequential code, meant to only be called from the Master, not from
    1.98 - * any slave VPs.
    1.99 - *Search down list, checking size by the nextHigherInMem pointer, to find
   1.100 - * first chunk bigger than size needed.
   1.101 - *Shave off the extra and make it into a new free-list element, hook it in
   1.102 - * then return the address of the found element plus size of prolog.
   1.103 - *
   1.104 - * The difference to the regular malloc is, that all the allocated chunks are
   1.105 - * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk
   1.106 - * before the aligned chunk.
   1.107 +/*
   1.108 + * This free is called for a master loop. It decides whether the allocation of
   1.109 + * chunk was done on the same core. If it was it calls VMS__free_on_core 
   1.110 + * otherwise it sends a message to the responsible core.
   1.111   */
   1.112 -void *VMS__malloc_aligned( size_t sizeRequested )
   1.113 - { MallocProlog *foundElem = NULL, *currElem, *newElem;
   1.114 -   ssize_t        amountExtra, sizeConsumed,sizeOfFound,prevAmount;
   1.115 -   uint32        foundElemIsTopOfHeap;
   1.116 +void
   1.117 +VMS__free(void *ptrToFree)
   1.118 +{
   1.119 +    MallocPrologAllocated chunk = (MallocPrologAllocated*)ptrToFree - 1;
   1.120 +    if(chunk->procrID == _VMSMasterEnv->currentMasterProcrID)
   1.121 +    {
   1.122 +        VMS__free_on_core(ptrToFree, _VMSMasterEnv->currentMasterProcrID);
   1.123 +    }
   1.124 +    else
   1.125 +    {
   1.126 +        //Request from other Core
   1.127 +    }
   1.128 +}
   1.129  
   1.130 -   //============================= MEASUREMENT STUFF ========================
   1.131 -   #ifdef MEAS__TIME_MALLOC
   1.132 -   uint32 startStamp, endStamp;
   1.133 -   saveLowTimeStampCountInto( startStamp );
   1.134 -   #endif
   1.135 -   //========================================================================
   1.136 -   
   1.137 -      //step up the size to be multiple of the cache line size
   1.138 -   sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1);
   1.139 -   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
   1.140 -
   1.141 -   while( currElem != NULL )
   1.142 -    {    //check if size of currElem is big enough
   1.143 -      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
   1.144 -      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
   1.145 -      if( amountExtra > 0 )
   1.146 -       {    
   1.147 -         //look if the found element is already aligned
   1.148 -         if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){
   1.149 -             //found it, get out of loop
   1.150 -             foundElem = currElem;
   1.151 -             break;
   1.152 -         }else{
   1.153 -             //find first aligned address and check if it's still big enough
   1.154 -             //check also if the space before the aligned address is big enough
   1.155 -             //for a new element
   1.156 -             void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1)));
   1.157 -             prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem;
   1.158 -             sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog);
   1.159 -             amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog);
   1.160 -             if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){
   1.161 -                 //found suitable element
   1.162 -                 //create new previous element and exit loop
   1.163 -                 MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1;
   1.164 -                 
   1.165 -                 //insert new element into free list
   1.166 -                 if(currElem->nextChunkInFreeList != NULL)
   1.167 -                     currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem;                     
   1.168 -                 newAlignedElem->prevChunkInFreeList = currElem;
   1.169 -                 newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList;
   1.170 -                 currElem->nextChunkInFreeList = newAlignedElem;
   1.171 -                 
   1.172 -                 //set higherInMem and lowerInMem
   1.173 -                 newAlignedElem->nextHigherInMem = currElem->nextHigherInMem;
   1.174 -                 foundElemIsTopOfHeap = currElem->nextHigherInMem ==
   1.175 -                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   1.176 -                 if(!foundElemIsTopOfHeap)
   1.177 -                     currElem->nextHigherInMem->nextLowerInMem = newAlignedElem;
   1.178 -                 currElem->nextHigherInMem = newAlignedElem;
   1.179 -                 newAlignedElem->nextLowerInMem = currElem;
   1.180 -                 
   1.181 -                 //Found new element leaving loop
   1.182 -                 foundElem = newAlignedElem;
   1.183 -                 break;
   1.184 -             }
   1.185 -         }
   1.186 -         
   1.187 -       }
   1.188 -       currElem = currElem->nextChunkInFreeList;
   1.189 +/*
   1.190 + * This free is called for the plugins. It decides whether the allocation of
   1.191 + * chunk was done on the same core. If it was it calls VMS__free_on_core 
   1.192 + * otherwise it sends a message to the responsible core.
   1.193 + */
   1.194 +void
   1.195 +VMS__free_in_lib(void *ptrToFree, VirtProcr *VProc)
   1.196 +{
   1.197 +    MallocPrologAllocated chunk = (MallocPrologAllocated*)ptrToFree - 1;
   1.198 +    if(chunk->procrID == VProc->coreAnimatedBy)
   1.199 +    {
   1.200 +        VMS__free_on_core(ptrToFree, VProc->coreAnimatedBy);
   1.201      }
   1.202 -
   1.203 -   if( foundElem == NULL )
   1.204 -    { ERROR("\nmalloc failed\n")
   1.205 -      return (void *)NULL;  //indicates malloc failed
   1.206 +    else
   1.207 +    {
   1.208 +        //Request from other Core
   1.209      }
   1.210 -      //Using a kludge to identify the element that is the top chunk in the
   1.211 -      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
   1.212 -      // save addr of start of heap in head's nextLowerInMem
   1.213 -      //Will handle top of Heap specially
   1.214 -   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
   1.215 -                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   1.216 -
   1.217 -      //before shave off and try to insert new elem, remove found elem
   1.218 -      //note, foundElem will never be the head, so always has valid prevChunk
   1.219 -   foundElem->prevChunkInFreeList->nextChunkInFreeList =
   1.220 -                                              foundElem->nextChunkInFreeList;
   1.221 -   if( foundElem->nextChunkInFreeList != NULL )
   1.222 -    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
   1.223 -                                              foundElem->prevChunkInFreeList;
   1.224 -    }
   1.225 -   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
   1.226 -   
   1.227 -      //if enough, turn extra into new elem & insert it
   1.228 -   if( amountExtra > 64 )
   1.229 -    {    //make new elem by adding to addr of curr elem then casting
   1.230 -      sizeConsumed = sizeof(MallocProlog) + sizeRequested;
   1.231 -      newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
   1.232 -      newElem->nextHigherInMem   = foundElem->nextHigherInMem;
   1.233 -      newElem->nextLowerInMem    = foundElem;
   1.234 -      foundElem->nextHigherInMem = newElem;
   1.235 -      
   1.236 -      if( ! foundElemIsTopOfHeap )
   1.237 -       {    //there is no next higher for top of heap, so can't write to it
   1.238 -         newElem->nextHigherInMem->nextLowerInMem = newElem;
   1.239 -       }
   1.240 -      add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
   1.241 -    }
   1.242 -   else
   1.243 -    {
   1.244 -      sizeConsumed = sizeOfFound;
   1.245 -    }
   1.246 -  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
   1.247 -
   1.248 -   //============================= MEASUREMENT STUFF ========================
   1.249 -   #ifdef MEAS__TIME_MALLOC
   1.250 -   saveLowTimeStampCountInto( endStamp );
   1.251 -   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
   1.252 -   #endif
   1.253 -   //========================================================================
   1.254 -
   1.255 -      //skip over the prolog by adding its size to the pointer return
   1.256 -   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
   1.257 - }
   1.258 -
   1.259 +}
   1.260  
   1.261  /*This is sequential code -- only to be called from the Master
   1.262   * When free, subtract the size of prolog from pointer, then cast it to a
   1.263 @@ -266,7 +195,7 @@
   1.264   * add this one to free-list.
   1.265   */
   1.266  void
   1.267 -VMS__free( void *ptrToFree )
   1.268 +VMS__free_on_core( void *ptrToFree, int procrID)
   1.269   { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem;
   1.270     size_t         sizeOfElem;
   1.271     uint32         lowerExistsAndIsFree, higherExistsAndIsFree;
   1.272 @@ -443,7 +372,7 @@
   1.273  MallocProlog *
   1.274  VMS_ext__create_free_list()
   1.275   { MallocProlog *freeListHead, *firstChunk;
   1.276 -
   1.277 + 
   1.278        //Note, this is running in the main thread -- all increases in malloc
   1.279        // mem and all frees of it must be done in this thread, with the
   1.280        // thread's original stack available