Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 173:bfaebdf60df3 false_sharing
coreLoop: All written variables are now on local stack or in seperate cache line
| author | Merten Sach <msach@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 20 Dec 2011 15:39:30 +0100 |
| parents | 6ba4c9d86232 |
| children | c3f458403cd6 |
| files | CoreLoop.c ProcrContext.c VMS.c VMS.h contextSwitch.s |
| diffstat | 5 files changed, 28 insertions(+), 24 deletions(-) [+] |
line diff
1.1 --- a/CoreLoop.c Tue Dec 20 15:08:29 2011 +0100 1.2 +++ b/CoreLoop.c Tue Dec 20 15:39:30 2011 +0100 1.3 @@ -41,6 +41,7 @@ 1.4 int errorCode; 1.5 TSCountLowHigh endSusp; 1.6 uint64 numCycles; 1.7 + int32 numMasterInARow = 0; 1.8 1.9 //work-stealing struc on stack to prevent false-sharing in cache-line 1.10 volatile GateStruc gate; 1.11 @@ -112,7 +113,7 @@ 1.12 currVP = (VirtProcr *) readVMSQ( readyToAnimateQ ); 1.13 #endif 1.14 1.15 - if( currVP != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 1.16 + if( currVP != NULL ) numMasterInARow = 0; 1.17 else 1.18 { 1.19 //============================= MEASUREMENT STUFF ===================== 1.20 @@ -125,16 +126,16 @@ 1.21 while( currVP == NULL ) //if queue was empty, enter get masterLock loop 1.22 { //queue was empty, so get master lock 1.23 1.24 - gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), 1.25 + gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLockUnion.masterLock), 1.26 UNLOCKED, LOCKED ); 1.27 if( gotLock ) 1.28 { //run own MasterVP -- jmps to coreLoops startPt when done 1.29 currVP = _VMSMasterEnv->masterVPs[thisCoresIdx]; 1.30 - if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 1.31 + if( numMasterInARow > 1000 ) 1.32 { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); 1.33 pthread_yield(); 1.34 } 1.35 - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 1.36 + numMasterInARow += 1; 1.37 break; //end while -- have a VP to animate now 1.38 } 1.39
2.1 --- a/ProcrContext.c Tue Dec 20 15:08:29 2011 +0100 2.2 +++ b/ProcrContext.c Tue Dec 20 15:39:30 2011 +0100 2.3 @@ -28,7 +28,7 @@ 2.4 newPr->schedSlot = NULL; 2.5 2.6 /* 2.7 - * Hardware dependent part 2.8 + * Hardware dependent part, because of x86_64 calling convention 2.9 */ 2.10 //instead of calling the function directly, call a wrapper function to fetch 2.11 //arguments from stack
3.1 --- a/VMS.c Tue Dec 20 15:08:29 2011 +0100 3.2 +++ b/VMS.c Tue Dec 20 15:39:30 2011 +0100 3.3 @@ -100,7 +100,7 @@ 3.4 3.5 3.6 //Make the master env, which holds everything else 3.7 - _VMSMasterEnv = malloc( sizeof(MasterEnv) ); 3.8 + posix_memalign((void*)&_VMSMasterEnv, CACHELINE_SIZE, sizeof(MasterEnv) ); 3.9 memset( _VMSMasterEnv, 0, sizeof(MasterEnv) ); 3.10 3.11 //Very first thing put into the master env is the free-list, seeded 3.12 @@ -143,12 +143,12 @@ 3.13 masterVPs[ coreIdx ] = VMS__create_procr( (VirtProcrFnPtr)&masterLoop, (void*)masterEnv ); 3.14 masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx; 3.15 allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core 3.16 - _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; 3.17 + //_VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; //moved to coreLoops stack, reason: avoid false sharing 3.18 _VMSMasterEnv->workStealingGates[ coreIdx ] = NULL; 3.19 } 3.20 _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs; 3.21 _VMSMasterEnv->masterVPs = masterVPs; 3.22 - _VMSMasterEnv->masterLock = UNLOCKED; 3.23 + _VMSMasterEnv->masterLockUnion.masterLock = UNLOCKED; 3.24 _VMSMasterEnv->allSchedSlots = allSchedSlots; 3.25 _VMSMasterEnv->workStealingLock = UNLOCKED; 3.26 3.27 @@ -288,8 +288,8 @@ 3.28 { VirtProcr *newPr; 3.29 void *stackLocs; 3.30 3.31 - newPr = VMS__malloc( sizeof(VirtProcr) ); 3.32 - stackLocs = VMS__malloc( VIRT_PROCR_STACK_SIZE ); 3.33 + posix_memalign((void*)&newPr, CACHELINE_SIZE, sizeof(VirtProcr) ); //align to cacheline 3.34 + posix_memalign(&stackLocs, CACHELINE_SIZE, VIRT_PROCR_STACK_SIZE ); //align to cacheline 3.35 if( stackLocs == 0 ) 3.36 { perror("VMS__malloc stack"); exit(1); } 3.37 3.38 @@ -590,8 +590,8 @@ 3.39 // itself 3.40 //Note, should not stack-allocate initial data -- no guarantee, in 3.41 // general that creating processor will outlive ones it creates. 3.42 - VMS__free( animatingPr->startOfStack ); 3.43 - VMS__free( animatingPr ); 3.44 + //VMS__free( animatingPr->startOfStack ); 3.45 + //VMS__free( animatingPr ); 3.46 } 3.47 3.48
4.1 --- a/VMS.h Tue Dec 20 15:08:29 2011 +0100 4.2 +++ b/VMS.h Tue Dec 20 15:39:30 2011 +0100 4.3 @@ -143,6 +143,11 @@ 4.4 */ 4.5 typedef struct 4.6 { 4.7 + union{ //added padding, because this variable is written a lot by different cores 4.8 + //thus invalidating a lot of the stucture 4.9 + volatile int32 masterLock; 4.10 + char padding[256]; 4.11 + } masterLockUnion; 4.12 SlaveScheduler slaveScheduler; 4.13 RequestHandler requestHandler; 4.14 4.15 @@ -158,9 +163,7 @@ 4.16 void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop 4.17 4.18 int32 setupComplete; 4.19 - volatile int32 masterLock; 4.20 - 4.21 - int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP 4.22 + //int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP 4.23 GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal 4.24 int32 workStealingLock; 4.25
5.1 --- a/contextSwitch.s Tue Dec 20 15:08:29 2011 +0100 5.2 +++ b/contextSwitch.s Tue Dec 20 15:39:30 2011 +0100 5.3 @@ -31,8 +31,8 @@ 5.4 * 0x38 coreLoopStackPtr 5.5 * 5.6 * _VMSMasterEnv offsets: 5.7 - * 0x48 coreLoopReturnPt 5.8 - * 0x54 masterLock 5.9 + * 0x148 coreLoopReturnPt 5.10 + * 0x00 masterLock 5.11 */ 5.12 .globl switchToVP 5.13 switchToVP: 5.14 @@ -56,8 +56,8 @@ 5.15 * 0x38 coreLoopStackPtr 5.16 * 5.17 * _VMSMasterEnv offsets: 5.18 - * 0x48 coreLoopReturnPt 5.19 - * 0x54 masterLock 5.20 + * 0x148 coreLoopReturnPt 5.21 + * 0x00 masterLock 5.22 */ 5.23 .globl switchToCoreLoop 5.24 switchToCoreLoop: 5.25 @@ -69,7 +69,7 @@ 5.26 movq 0x30(%rdi), %rbp #restore frame pointer 5.27 movq $_VMSMasterEnv, %rcx 5.28 movq (%rcx) , %rcx 5.29 - movq 0x48(%rcx), %rax #get CoreLoopStartPt 5.30 + movq 0x148(%rcx), %rax #get CoreLoopStartPt 5.31 jmp *%rax #jmp to CoreLoop 5.32 VPReturn: 5.33 ret 5.34 @@ -86,8 +86,8 @@ 5.35 * 0x38 coreLoopStackPtr 5.36 * 5.37 * _VMSMasterEnv offsets: 5.38 - * 0x48 coreLoopReturnPt 5.39 - * 0x54 masterLock 5.40 + * 0x148 coreLoopReturnPt 5.41 + * 0x00 masterLock 5.42 */ 5.43 .globl masterSwitchToCoreLoop 5.44 masterSwitchToCoreLoop: 5.45 @@ -99,8 +99,8 @@ 5.46 movq 0x30(%rdi), %rbp #restore frame pointer 5.47 movq $_VMSMasterEnv, %rcx 5.48 movq (%rcx) , %rcx 5.49 - movq 0x48(%rcx), %rax #get CoreLoopStartPt 5.50 - movl $0x0 , 0x54(%rcx) #release lock 5.51 + movq 0x148(%rcx), %rax #get CoreLoopStartPt 5.52 + movl $0x0 , 0x00(%rcx) #release lock 5.53 jmp *%rax #jmp to CoreLoop 5.54 MasterReturn: 5.55 ret
