diff vmalloc.c @ 78:521c75d64cef

Version before optimization
author Merten Sach <msach@mailbox.tu-berlin.de>
date Mon, 04 Jul 2011 19:45:43 +0200
parents 9ddbb071142d
children 97e26095c01f
line diff
     1.1 --- a/vmalloc.c	Wed Jun 22 16:12:27 2011 +0200
     1.2 +++ b/vmalloc.c	Mon Jul 04 19:45:43 2011 +0200
     1.3 @@ -60,7 +60,7 @@
     1.4     //========================================================================
     1.5     
     1.6        //step up the size to be aligned at 16-byte boundary, prob better ways
     1.7 -   sizeRequested = ((sizeRequested + 16) >> 4) << 4;
     1.8 +   sizeRequested = (sizeRequested + 16) & ~15;
     1.9     currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
    1.10  
    1.11     while( currElem != NULL )
    1.12 @@ -75,6 +75,135 @@
    1.13        else
    1.14           currElem = currElem->nextChunkInFreeList;
    1.15      }
    1.16 +   
    1.17 +   if( foundElem == NULL )
    1.18 +    { ERROR("\nmalloc failed\n")
    1.19 +      return (void *)NULL;  //indicates malloc failed
    1.20 +    }
    1.21 +      //Using a kludge to identify the element that is the top chunk in the
    1.22 +      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
    1.23 +      // save addr of start of heap in head's nextLowerInMem
    1.24 +      //Will handle top of Heap specially
    1.25 +   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
    1.26 +                          _VMSMasterEnv->freeListHead->nextHigherInMem;
    1.27 +   
    1.28 +      //before shave off and try to insert new elem, remove found elem
    1.29 +      //note, foundElem will never be the head, so always has valid prevChunk
    1.30 +   foundElem->prevChunkInFreeList->nextChunkInFreeList =
    1.31 +                                              foundElem->nextChunkInFreeList;
    1.32 +   if( foundElem->nextChunkInFreeList != NULL )
    1.33 +    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
    1.34 +                                              foundElem->prevChunkInFreeList;
    1.35 +    }
    1.36 +   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
    1.37 +   
    1.38 +      //if enough, turn extra into new elem & insert it
    1.39 +   if( amountExtra > 64 )
    1.40 +    {   //make new elem by adding to addr of curr elem then casting
    1.41 +        sizeConsumed = sizeof(MallocProlog) + sizeRequested; 
    1.42 +        newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
    1.43 +        newElem->nextLowerInMem    = foundElem; //This is evil (but why?) 
    1.44 +        newElem->nextHigherInMem   = foundElem->nextHigherInMem; //This is evil (but why?)
    1.45 +        foundElem->nextHigherInMem = newElem;
    1.46 +        if( ! foundElemIsTopOfHeap )
    1.47 +        {  //there is no next higher for top of heap, so can't write to it
    1.48 +           newElem->nextHigherInMem->nextLowerInMem = newElem;
    1.49 +        }
    1.50 +        add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
    1.51 +    }
    1.52 +   else
    1.53 +    {
    1.54 +      sizeConsumed = sizeOfFound;
    1.55 +    }
    1.56 +  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
    1.57 +
    1.58 +   //============================= MEASUREMENT STUFF ========================
    1.59 +   #ifdef MEAS__TIME_MALLOC
    1.60 +   saveLowTimeStampCountInto( endStamp );
    1.61 +   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
    1.62 +   #endif
    1.63 +   //========================================================================
    1.64 +
    1.65 +      //skip over the prolog by adding its size to the pointer return
    1.66 +   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
    1.67 + }
    1.68 +
    1.69 +/*This is sequential code, meant to only be called from the Master, not from
    1.70 + * any slave VPs.
    1.71 + *Search down list, checking size by the nextHigherInMem pointer, to find
    1.72 + * first chunk bigger than size needed.
    1.73 + *Shave off the extra and make it into a new free-list element, hook it in
    1.74 + * then return the address of the found element plus size of prolog.
    1.75 + *
    1.76 + * The difference to the regular malloc is, that all the allocated chunks are
    1.77 + * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk
    1.78 + * before the aligned chunk.
    1.79 + */
    1.80 +void *VMS__malloc_aligned( size_t sizeRequested )
    1.81 + { MallocProlog *foundElem = NULL, *currElem, *newElem;
    1.82 +   ssize_t        amountExtra, sizeConsumed,sizeOfFound,prevAmount;
    1.83 +   uint32        foundElemIsTopOfHeap;
    1.84 +
    1.85 +   //============================= MEASUREMENT STUFF ========================
    1.86 +   #ifdef MEAS__TIME_MALLOC
    1.87 +   uint32 startStamp, endStamp;
    1.88 +   saveLowTimeStampCountInto( startStamp );
    1.89 +   #endif
    1.90 +   //========================================================================
    1.91 +   
    1.92 +      //step up the size to be multiple of the cache line size
    1.93 +   sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1);
    1.94 +   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
    1.95 +
    1.96 +   while( currElem != NULL )
    1.97 +    {    //check if size of currElem is big enough
    1.98 +      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
    1.99 +      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
   1.100 +      if( amountExtra > 0 )
   1.101 +       {    
   1.102 +         //look if the found element is already aligned
   1.103 +         if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){
   1.104 +             //found it, get out of loop
   1.105 +             foundElem = currElem;
   1.106 +             break;
   1.107 +         }else{
   1.108 +             //find first aligned address and check if it's still big enough
   1.109 +             //check also if the space before the aligned address is big enough
   1.110 +             //for a new element
   1.111 +             void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1)));
   1.112 +             prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem;
   1.113 +             sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog);
   1.114 +             amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog);
   1.115 +             if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){
   1.116 +                 //found suitable element
   1.117 +                 //create new previous element and exit loop
   1.118 +                 MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1;
   1.119 +                 
   1.120 +                 //insert new element into free list
   1.121 +                 if(currElem->nextChunkInFreeList != NULL)
   1.122 +                     currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem;                     
   1.123 +                 newAlignedElem->prevChunkInFreeList = currElem;
   1.124 +                 newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList;
   1.125 +                 currElem->nextChunkInFreeList = newAlignedElem;
   1.126 +                 
   1.127 +                 //set higherInMem and lowerInMem
   1.128 +                 newAlignedElem->nextHigherInMem = currElem->nextHigherInMem;
   1.129 +                 foundElemIsTopOfHeap = currElem->nextHigherInMem ==
   1.130 +                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   1.131 +                 if(!foundElemIsTopOfHeap)
   1.132 +                     currElem->nextHigherInMem->nextLowerInMem = newAlignedElem;
   1.133 +                 currElem->nextHigherInMem = newAlignedElem;
   1.134 +                 newAlignedElem->nextLowerInMem = currElem;
   1.135 +                 
   1.136 +                 //Found new element leaving loop
   1.137 +                 foundElem = newAlignedElem;
   1.138 +                 break;
   1.139 +             }
   1.140 +         }
   1.141 +         
   1.142 +       }
   1.143 +       currElem = currElem->nextChunkInFreeList;
   1.144 +    }
   1.145  
   1.146     if( foundElem == NULL )
   1.147      { ERROR("\nmalloc failed\n")