changeset 78:521c75d64cef

Version before optimization
author Merten Sach <msach@mailbox.tu-berlin.de>
date Mon, 04 Jul 2011 19:45:43 +0200
parents fe5ec83f1baf
children 97e26095c01f
files ProcrContext.c ProcrContext.h VMS.c VMS.h probes.h vmalloc.c vmalloc.h
diffstat 7 files changed, 160 insertions(+), 18 deletions(-) [+]
line diff
     1.1 --- a/ProcrContext.c	Wed Jun 22 16:12:27 2011 +0200
     1.2 +++ b/ProcrContext.c	Mon Jul 04 19:45:43 2011 +0200
     1.3 @@ -54,10 +54,10 @@
     1.4  
     1.5     //============================= MEASUREMENT STUFF ========================
     1.6     #ifdef STATS__TURN_ON_PROBES
     1.7 -   struct timeval timeStamp;
     1.8 -   gettimeofday( &(timeStamp), NULL);
     1.9 -   newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0) -
    1.10 -                                               _VMSMasterEnv->createPtInSecs;
    1.11 +   //struct timeval timeStamp;
    1.12 +   //gettimeofday( &(timeStamp), NULL);
    1.13 +   //newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0) -
    1.14 +   //                                            _VMSMasterEnv->createPtInSecs;
    1.15     #endif
    1.16     //========================================================================
    1.17  
     2.1 --- a/ProcrContext.h	Wed Jun 22 16:12:27 2011 +0200
     2.2 +++ b/ProcrContext.h	Mon Jul 04 19:45:43 2011 +0200
     2.3 @@ -20,6 +20,8 @@
     2.4  
     2.5  void startVirtProcrFn();
     2.6  
     2.7 +void *asmTerminateCoreLoop(VirtProcr *currPr);
     2.8 +
     2.9  #define flushRegisters() \
    2.10          asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15")
    2.11  
     3.1 --- a/VMS.c	Wed Jun 22 16:12:27 2011 +0200
     3.2 +++ b/VMS.c	Mon Jul 04 19:45:43 2011 +0200
     3.3 @@ -110,7 +110,7 @@
     3.4  
     3.5     //============================= MEASUREMENT STUFF ========================
     3.6     #ifdef MEAS__TIME_MALLOC
     3.7 -   _VMSMasterEnv->mallocTimeHist  = makeFixedBinHistExt( 50, 0, 100,
     3.8 +   _VMSMasterEnv->mallocTimeHist  = makeFixedBinHistExt( 100, 0, 100,
     3.9                                                         "malloc time hist");
    3.10     _VMSMasterEnv->freeTimeHist  = makeFixedBinHistExt( 50, 0, 100,
    3.11                                                         "free time hist");
    3.12 @@ -671,13 +671,15 @@
    3.13  void
    3.14  VMS__cleanup_at_end_of_shutdown()
    3.15   { 
    3.16 -   VMSQueueStruc **readyToAnimateQs;
    3.17 -   int              coreIdx;
    3.18 -   VirtProcr      **masterVPs;
    3.19 -   SchedSlot     ***allSchedSlots; //ptr to array of ptrs
    3.20 +   //unused
    3.21 +   //VMSQueueStruc **readyToAnimateQs;
    3.22 +   //int              coreIdx;
    3.23 +   //VirtProcr      **masterVPs;
    3.24 +   //SchedSlot     ***allSchedSlots; //ptr to array of ptrs
    3.25  
    3.26        //Before getting rid of everything, print out any measurements made
    3.27     forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist );
    3.28 +   forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile);
    3.29     //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHistExt );
    3.30     #ifdef MEAS__TIME_PLUGIN
    3.31     printHist( _VMSMasterEnv->reqHdlrLowTimeHist );
     4.1 --- a/VMS.h	Wed Jun 22 16:12:27 2011 +0200
     4.2 +++ b/VMS.h	Mon Jul 04 19:45:43 2011 +0200
     4.3 @@ -87,11 +87,13 @@
     4.4  #define MASTERLOCK_RETRIES 10000
     4.5  
     4.6     // stack size in virtual processors created
     4.7 -#define VIRT_PROCR_STACK_SIZE 0x4000 /* 16K */
     4.8 +#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */
     4.9  
    4.10     // memory for VMS__malloc
    4.11  #define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */
    4.12  
    4.13 +#define CACHE_LINE 64
    4.14 +
    4.15  
    4.16  //==============================
    4.17  
    4.18 @@ -371,6 +373,10 @@
    4.19  inline void *
    4.20  VMS__take_sem_reqst_from( VMSReqst *req );
    4.21  
    4.22 +void inline
    4.23 +VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv,
    4.24 +                       ResumePrFnPtr resumePrFnPtr );
    4.25 +
    4.26  //======================== STATS ======================
    4.27  
    4.28  //===== RDTSC wrapper ===== //Also runs with x86_64 code
    4.29 @@ -413,11 +419,11 @@
    4.30  #define MakeTheMeasHists() \
    4.31     _VMSMasterEnv->measHistsInfo = \
    4.32                makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
    4.33 -   makeAMeasHist( createHistIdx,      "Create",        50, 0, 100 ) \
    4.34 -   makeAMeasHist( mutexLockHistIdx,   "mutex lock",    50, 0, 100 ) \
    4.35 -   makeAMeasHist( mutexUnlockHistIdx, "mutex unlock",  50, 0, 100 ) \
    4.36 -   makeAMeasHist( condWaitHistIdx,    "cond wait",     50, 0, 100 ) \
    4.37 -   makeAMeasHist( condSignalHistIdx,  "cond signal",   50, 0, 100 )
    4.38 +   makeAMeasHist( createHistIdx,      "create",        50, 0, 1000 ) \
    4.39 +   makeAMeasHist( mutexLockHistIdx,   "mutex_lock",    50, 0, 100 ) \
    4.40 +   makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock",  50, 0, 100 ) \
    4.41 +   makeAMeasHist( condWaitHistIdx,    "cond_wait",     50, 0, 100 ) \
    4.42 +   makeAMeasHist( condSignalHistIdx,  "cond_signal",   50, 0, 100 )
    4.43  
    4.44  #endif
    4.45  
     5.1 --- a/probes.h	Wed Jun 22 16:12:27 2011 +0200
     5.2 +++ b/probes.h	Mon Jul 04 19:45:43 2011 +0200
     5.3 @@ -124,8 +124,8 @@
     5.4  
     5.5  void
     5.6  VMS_impl__print_stats_of_all_probes();
     5.7 -#define VMS__print_stats_of_all_probes \
     5.8 -        VMS_impl__print_stats_of_all_probes
     5.9 +#define VMS__print_stats_of_all_probes() \
    5.10 +        VMS_impl__print_stats_of_all_probes()
    5.11  
    5.12  
    5.13  #else
     6.1 --- a/vmalloc.c	Wed Jun 22 16:12:27 2011 +0200
     6.2 +++ b/vmalloc.c	Mon Jul 04 19:45:43 2011 +0200
     6.3 @@ -60,7 +60,7 @@
     6.4     //========================================================================
     6.5     
     6.6        //step up the size to be aligned at 16-byte boundary, prob better ways
     6.7 -   sizeRequested = ((sizeRequested + 16) >> 4) << 4;
     6.8 +   sizeRequested = (sizeRequested + 16) & ~15;
     6.9     currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
    6.10  
    6.11     while( currElem != NULL )
    6.12 @@ -75,6 +75,135 @@
    6.13        else
    6.14           currElem = currElem->nextChunkInFreeList;
    6.15      }
    6.16 +   
    6.17 +   if( foundElem == NULL )
    6.18 +    { ERROR("\nmalloc failed\n")
    6.19 +      return (void *)NULL;  //indicates malloc failed
    6.20 +    }
    6.21 +      //Using a kludge to identify the element that is the top chunk in the
    6.22 +      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
    6.23 +      // save addr of start of heap in head's nextLowerInMem
    6.24 +      //Will handle top of Heap specially
    6.25 +   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
    6.26 +                          _VMSMasterEnv->freeListHead->nextHigherInMem;
    6.27 +   
    6.28 +      //before shave off and try to insert new elem, remove found elem
    6.29 +      //note, foundElem will never be the head, so always has valid prevChunk
    6.30 +   foundElem->prevChunkInFreeList->nextChunkInFreeList =
    6.31 +                                              foundElem->nextChunkInFreeList;
    6.32 +   if( foundElem->nextChunkInFreeList != NULL )
    6.33 +    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
    6.34 +                                              foundElem->prevChunkInFreeList;
    6.35 +    }
    6.36 +   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
    6.37 +   
    6.38 +      //if enough, turn extra into new elem & insert it
    6.39 +   if( amountExtra > 64 )
    6.40 +    {   //make new elem by adding to addr of curr elem then casting
    6.41 +        sizeConsumed = sizeof(MallocProlog) + sizeRequested; 
    6.42 +        newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
    6.43 +        newElem->nextLowerInMem    = foundElem; //This is evil (but why?) 
    6.44 +        newElem->nextHigherInMem   = foundElem->nextHigherInMem; //This is evil (but why?)
    6.45 +        foundElem->nextHigherInMem = newElem;
    6.46 +        if( ! foundElemIsTopOfHeap )
    6.47 +        {  //there is no next higher for top of heap, so can't write to it
    6.48 +           newElem->nextHigherInMem->nextLowerInMem = newElem;
    6.49 +        }
    6.50 +        add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
    6.51 +    }
    6.52 +   else
    6.53 +    {
    6.54 +      sizeConsumed = sizeOfFound;
    6.55 +    }
    6.56 +  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
    6.57 +
    6.58 +   //============================= MEASUREMENT STUFF ========================
    6.59 +   #ifdef MEAS__TIME_MALLOC
    6.60 +   saveLowTimeStampCountInto( endStamp );
    6.61 +   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
    6.62 +   #endif
    6.63 +   //========================================================================
    6.64 +
    6.65 +      //skip over the prolog by adding its size to the pointer return
    6.66 +   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
    6.67 + }
    6.68 +
    6.69 +/*This is sequential code, meant to only be called from the Master, not from
    6.70 + * any slave VPs.
    6.71 + *Search down list, checking size by the nextHigherInMem pointer, to find
    6.72 + * first chunk bigger than size needed.
    6.73 + *Shave off the extra and make it into a new free-list element, hook it in
    6.74 + * then return the address of the found element plus size of prolog.
    6.75 + *
    6.76 + * The difference to the regular malloc is, that all the allocated chunks are
    6.77 + * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk
    6.78 + * before the aligned chunk.
    6.79 + */
    6.80 +void *VMS__malloc_aligned( size_t sizeRequested )
    6.81 + { MallocProlog *foundElem = NULL, *currElem, *newElem;
    6.82 +   ssize_t        amountExtra, sizeConsumed,sizeOfFound,prevAmount;
    6.83 +   uint32        foundElemIsTopOfHeap;
    6.84 +
    6.85 +   //============================= MEASUREMENT STUFF ========================
    6.86 +   #ifdef MEAS__TIME_MALLOC
    6.87 +   uint32 startStamp, endStamp;
    6.88 +   saveLowTimeStampCountInto( startStamp );
    6.89 +   #endif
    6.90 +   //========================================================================
    6.91 +   
    6.92 +      //step up the size to be multiple of the cache line size
    6.93 +   sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1);
    6.94 +   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
    6.95 +
    6.96 +   while( currElem != NULL )
    6.97 +    {    //check if size of currElem is big enough
    6.98 +      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
    6.99 +      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
   6.100 +      if( amountExtra > 0 )
   6.101 +       {    
   6.102 +         //look if the found element is already aligned
   6.103 +         if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){
   6.104 +             //found it, get out of loop
   6.105 +             foundElem = currElem;
   6.106 +             break;
   6.107 +         }else{
   6.108 +             //find first aligned address and check if it's still big enough
   6.109 +             //check also if the space before the aligned address is big enough
   6.110 +             //for a new element
   6.111 +             void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1)));
   6.112 +             prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem;
   6.113 +             sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog);
   6.114 +             amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog);
   6.115 +             if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){
   6.116 +                 //found suitable element
   6.117 +                 //create new previous element and exit loop
   6.118 +                 MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1;
   6.119 +                 
   6.120 +                 //insert new element into free list
   6.121 +                 if(currElem->nextChunkInFreeList != NULL)
   6.122 +                     currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem;                     
   6.123 +                 newAlignedElem->prevChunkInFreeList = currElem;
   6.124 +                 newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList;
   6.125 +                 currElem->nextChunkInFreeList = newAlignedElem;
   6.126 +                 
   6.127 +                 //set higherInMem and lowerInMem
   6.128 +                 newAlignedElem->nextHigherInMem = currElem->nextHigherInMem;
   6.129 +                 foundElemIsTopOfHeap = currElem->nextHigherInMem ==
   6.130 +                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   6.131 +                 if(!foundElemIsTopOfHeap)
   6.132 +                     currElem->nextHigherInMem->nextLowerInMem = newAlignedElem;
   6.133 +                 currElem->nextHigherInMem = newAlignedElem;
   6.134 +                 newAlignedElem->nextLowerInMem = currElem;
   6.135 +                 
   6.136 +                 //Found new element leaving loop
   6.137 +                 foundElem = newAlignedElem;
   6.138 +                 break;
   6.139 +             }
   6.140 +         }
   6.141 +         
   6.142 +       }
   6.143 +       currElem = currElem->nextChunkInFreeList;
   6.144 +    }
   6.145  
   6.146     if( foundElem == NULL )
   6.147      { ERROR("\nmalloc failed\n")
     7.1 --- a/vmalloc.h	Wed Jun 22 16:12:27 2011 +0200
     7.2 +++ b/vmalloc.h	Mon Jul 04 19:45:43 2011 +0200
     7.3 @@ -35,6 +35,9 @@
     7.4  void *
     7.5  VMS__malloc( size_t sizeRequested );
     7.6  
     7.7 +void *
     7.8 +VMS__malloc_aligned( size_t sizeRequested );
     7.9 +
    7.10  void
    7.11  VMS__free( void *ptrToFree );
    7.12