changeset 173:bfaebdf60df3 false_sharing

coreLoop: All written variables are now on local stack or in seperate cache line
author Merten Sach <msach@mailbox.tu-berlin.de>
date Tue, 20 Dec 2011 15:39:30 +0100
parents 6ba4c9d86232
children c3f458403cd6
files CoreLoop.c ProcrContext.c VMS.c VMS.h contextSwitch.s
diffstat 5 files changed, 28 insertions(+), 24 deletions(-) [+]
line diff
     1.1 --- a/CoreLoop.c	Tue Dec 20 15:08:29 2011 +0100
     1.2 +++ b/CoreLoop.c	Tue Dec 20 15:39:30 2011 +0100
     1.3 @@ -41,6 +41,7 @@
     1.4     int             errorCode;
     1.5     TSCountLowHigh  endSusp;
     1.6     uint64          numCycles;
     1.7 +   int32            numMasterInARow = 0;
     1.8  
     1.9        //work-stealing struc on stack to prevent false-sharing in cache-line
    1.10     volatile GateStruc gate;
    1.11 @@ -112,7 +113,7 @@
    1.12     currVP = (VirtProcr *) readVMSQ( readyToAnimateQ );
    1.13     #endif
    1.14  
    1.15 -   if( currVP != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
    1.16 +   if( currVP != NULL ) numMasterInARow = 0;
    1.17     else
    1.18      {
    1.19        //============================= MEASUREMENT STUFF =====================
    1.20 @@ -125,16 +126,16 @@
    1.21        while( currVP == NULL ) //if queue was empty, enter get masterLock loop
    1.22         {    //queue was empty, so get master lock
    1.23  
    1.24 -         gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock),
    1.25 +         gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLockUnion.masterLock),
    1.26                                                            UNLOCKED, LOCKED );
    1.27           if( gotLock )
    1.28            {    //run own MasterVP -- jmps to coreLoops startPt when done
    1.29              currVP = _VMSMasterEnv->masterVPs[thisCoresIdx];
    1.30 -            if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
    1.31 +            if( numMasterInARow > 1000 )
    1.32               {       DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n");
    1.33                 pthread_yield();
    1.34               }
    1.35 -            _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
    1.36 +            numMasterInARow += 1;
    1.37              break;  //end while -- have a VP to animate now
    1.38            }
    1.39  
     2.1 --- a/ProcrContext.c	Tue Dec 20 15:08:29 2011 +0100
     2.2 +++ b/ProcrContext.c	Tue Dec 20 15:39:30 2011 +0100
     2.3 @@ -28,7 +28,7 @@
     2.4     newPr->schedSlot    = NULL;
     2.5  
     2.6     /*
     2.7 -    * Hardware dependent part           
     2.8 +    * Hardware dependent part, because of x86_64 calling convention         
     2.9      */
    2.10     //instead of calling the function directly, call a wrapper function to fetch
    2.11     //arguments from stack
     3.1 --- a/VMS.c	Tue Dec 20 15:08:29 2011 +0100
     3.2 +++ b/VMS.c	Tue Dec 20 15:39:30 2011 +0100
     3.3 @@ -100,7 +100,7 @@
     3.4  
     3.5  
     3.6        //Make the master env, which holds everything else
     3.7 -   _VMSMasterEnv = malloc(   sizeof(MasterEnv) );
     3.8 +   posix_memalign((void*)&_VMSMasterEnv, CACHELINE_SIZE, sizeof(MasterEnv) );
     3.9     memset( _VMSMasterEnv, 0, sizeof(MasterEnv) );
    3.10  
    3.11          //Very first thing put into the master env is the free-list, seeded
    3.12 @@ -143,12 +143,12 @@
    3.13        masterVPs[ coreIdx ] = VMS__create_procr( (VirtProcrFnPtr)&masterLoop, (void*)masterEnv );
    3.14        masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx;
    3.15        allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core
    3.16 -      _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0;
    3.17 +      //_VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; //moved to coreLoops stack, reason: avoid false sharing
    3.18        _VMSMasterEnv->workStealingGates[ coreIdx ] = NULL;
    3.19      }
    3.20     _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs;
    3.21     _VMSMasterEnv->masterVPs        = masterVPs;
    3.22 -   _VMSMasterEnv->masterLock       = UNLOCKED;
    3.23 +   _VMSMasterEnv->masterLockUnion.masterLock       = UNLOCKED;
    3.24     _VMSMasterEnv->allSchedSlots    = allSchedSlots;
    3.25     _VMSMasterEnv->workStealingLock = UNLOCKED;
    3.26  
    3.27 @@ -288,8 +288,8 @@
    3.28   { VirtProcr *newPr;
    3.29     void      *stackLocs;
    3.30  
    3.31 -   newPr      = VMS__malloc( sizeof(VirtProcr) );
    3.32 -   stackLocs  = VMS__malloc( VIRT_PROCR_STACK_SIZE );
    3.33 +   posix_memalign((void*)&newPr, CACHELINE_SIZE, sizeof(VirtProcr) ); //align to cacheline
    3.34 +   posix_memalign(&stackLocs, CACHELINE_SIZE, VIRT_PROCR_STACK_SIZE ); //align to cacheline
    3.35     if( stackLocs == 0 )
    3.36      { perror("VMS__malloc stack"); exit(1); }
    3.37  
    3.38 @@ -590,8 +590,8 @@
    3.39        // itself
    3.40        //Note, should not stack-allocate initial data -- no guarantee, in
    3.41        // general that creating processor will outlive ones it creates.
    3.42 -   VMS__free( animatingPr->startOfStack );
    3.43 -   VMS__free( animatingPr );
    3.44 +   //VMS__free( animatingPr->startOfStack );
    3.45 +   //VMS__free( animatingPr );
    3.46   }
    3.47  
    3.48  
     4.1 --- a/VMS.h	Tue Dec 20 15:08:29 2011 +0100
     4.2 +++ b/VMS.h	Tue Dec 20 15:39:30 2011 +0100
     4.3 @@ -143,6 +143,11 @@
     4.4   */
     4.5  typedef struct
     4.6   {
     4.7 +   union{ //added padding, because this variable is written a lot by different cores
     4.8 +          //thus invalidating a lot of the stucture
     4.9 +        volatile int32   masterLock;
    4.10 +        char             padding[256];    
    4.11 +   } masterLockUnion;
    4.12     SlaveScheduler   slaveScheduler;
    4.13     RequestHandler   requestHandler;
    4.14     
    4.15 @@ -158,9 +163,7 @@
    4.16     void            *coreLoopReturnPt;//addr to jump to to re-enter coreLoop
    4.17  
    4.18     int32            setupComplete;
    4.19 -   volatile int32   masterLock;
    4.20 -
    4.21 -   int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
    4.22 +   //int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
    4.23     GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
    4.24     int32            workStealingLock;
    4.25     
     5.1 --- a/contextSwitch.s	Tue Dec 20 15:08:29 2011 +0100
     5.2 +++ b/contextSwitch.s	Tue Dec 20 15:39:30 2011 +0100
     5.3 @@ -31,8 +31,8 @@
     5.4   * 0x38 coreLoopStackPtr
     5.5   *
     5.6   * _VMSMasterEnv  offsets:
     5.7 - * 0x48 coreLoopReturnPt
     5.8 - * 0x54 masterLock
     5.9 + * 0x148 coreLoopReturnPt
    5.10 + * 0x00 masterLock
    5.11   */
    5.12  .globl switchToVP
    5.13  switchToVP:
    5.14 @@ -56,8 +56,8 @@
    5.15   * 0x38 coreLoopStackPtr
    5.16   *
    5.17   * _VMSMasterEnv  offsets:
    5.18 - * 0x48 coreLoopReturnPt
    5.19 - * 0x54 masterLock
    5.20 + * 0x148 coreLoopReturnPt
    5.21 + * 0x00 masterLock
    5.22   */
    5.23  .globl switchToCoreLoop
    5.24  switchToCoreLoop:
    5.25 @@ -69,7 +69,7 @@
    5.26      movq    0x30(%rdi), %rbp         #restore frame pointer
    5.27      movq    $_VMSMasterEnv, %rcx
    5.28      movq    (%rcx)    , %rcx
    5.29 -    movq    0x48(%rcx), %rax         #get CoreLoopStartPt
    5.30 +    movq    0x148(%rcx), %rax         #get CoreLoopStartPt
    5.31      jmp     *%rax                    #jmp to CoreLoop
    5.32  VPReturn:
    5.33      ret
    5.34 @@ -86,8 +86,8 @@
    5.35   * 0x38 coreLoopStackPtr
    5.36   *
    5.37   * _VMSMasterEnv  offsets:
    5.38 - * 0x48 coreLoopReturnPt
    5.39 - * 0x54 masterLock
    5.40 + * 0x148 coreLoopReturnPt
    5.41 + * 0x00 masterLock
    5.42   */
    5.43  .globl masterSwitchToCoreLoop
    5.44  masterSwitchToCoreLoop:
    5.45 @@ -99,8 +99,8 @@
    5.46      movq    0x30(%rdi), %rbp         #restore frame pointer
    5.47      movq    $_VMSMasterEnv, %rcx
    5.48      movq    (%rcx)    , %rcx
    5.49 -    movq    0x48(%rcx), %rax         #get CoreLoopStartPt
    5.50 -    movl    $0x0      , 0x54(%rcx)   #release lock
    5.51 +    movq    0x148(%rcx), %rax         #get CoreLoopStartPt
    5.52 +    movl    $0x0      , 0x00(%rcx)   #release lock
    5.53      jmp     *%rax                    #jmp to CoreLoop
    5.54  MasterReturn:
    5.55      ret