# HG changeset patch # User Merten Sach # Date 1324391970 -3600 # Node ID bfaebdf60df3045cb7f9abc4ba9db49a8a349626 # Parent 6ba4c9d862324f9da01cc0ffccfb16b5b9fc7819 coreLoop: All written variables are now on local stack or in seperate cache line diff -r 6ba4c9d86232 -r bfaebdf60df3 CoreLoop.c --- a/CoreLoop.c Tue Dec 20 15:08:29 2011 +0100 +++ b/CoreLoop.c Tue Dec 20 15:39:30 2011 +0100 @@ -41,6 +41,7 @@ int errorCode; TSCountLowHigh endSusp; uint64 numCycles; + int32 numMasterInARow = 0; //work-stealing struc on stack to prevent false-sharing in cache-line volatile GateStruc gate; @@ -112,7 +113,7 @@ currVP = (VirtProcr *) readVMSQ( readyToAnimateQ ); #endif - if( currVP != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; + if( currVP != NULL ) numMasterInARow = 0; else { //============================= MEASUREMENT STUFF ===================== @@ -125,16 +126,16 @@ while( currVP == NULL ) //if queue was empty, enter get masterLock loop { //queue was empty, so get master lock - gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), + gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLockUnion.masterLock), UNLOCKED, LOCKED ); if( gotLock ) { //run own MasterVP -- jmps to coreLoops startPt when done currVP = _VMSMasterEnv->masterVPs[thisCoresIdx]; - if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) + if( numMasterInARow > 1000 ) { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); pthread_yield(); } - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; + numMasterInARow += 1; break; //end while -- have a VP to animate now } diff -r 6ba4c9d86232 -r bfaebdf60df3 ProcrContext.c --- a/ProcrContext.c Tue Dec 20 15:08:29 2011 +0100 +++ b/ProcrContext.c Tue Dec 20 15:39:30 2011 +0100 @@ -28,7 +28,7 @@ newPr->schedSlot = NULL; /* - * Hardware dependent part + * Hardware dependent part, because of x86_64 calling convention */ //instead of calling the function directly, call a wrapper function to fetch //arguments from stack diff -r 6ba4c9d86232 -r bfaebdf60df3 VMS.c --- a/VMS.c Tue Dec 20 15:08:29 2011 +0100 +++ b/VMS.c Tue Dec 20 15:39:30 2011 +0100 @@ -100,7 +100,7 @@ //Make the master env, which holds everything else - _VMSMasterEnv = malloc( sizeof(MasterEnv) ); + posix_memalign((void*)&_VMSMasterEnv, CACHELINE_SIZE, sizeof(MasterEnv) ); memset( _VMSMasterEnv, 0, sizeof(MasterEnv) ); //Very first thing put into the master env is the free-list, seeded @@ -143,12 +143,12 @@ masterVPs[ coreIdx ] = VMS__create_procr( (VirtProcrFnPtr)&masterLoop, (void*)masterEnv ); masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx; allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core - _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; + //_VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; //moved to coreLoops stack, reason: avoid false sharing _VMSMasterEnv->workStealingGates[ coreIdx ] = NULL; } _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs; _VMSMasterEnv->masterVPs = masterVPs; - _VMSMasterEnv->masterLock = UNLOCKED; + _VMSMasterEnv->masterLockUnion.masterLock = UNLOCKED; _VMSMasterEnv->allSchedSlots = allSchedSlots; _VMSMasterEnv->workStealingLock = UNLOCKED; @@ -288,8 +288,8 @@ { VirtProcr *newPr; void *stackLocs; - newPr = VMS__malloc( sizeof(VirtProcr) ); - stackLocs = VMS__malloc( VIRT_PROCR_STACK_SIZE ); + posix_memalign((void*)&newPr, CACHELINE_SIZE, sizeof(VirtProcr) ); //align to cacheline + posix_memalign(&stackLocs, CACHELINE_SIZE, VIRT_PROCR_STACK_SIZE ); //align to cacheline if( stackLocs == 0 ) { perror("VMS__malloc stack"); exit(1); } @@ -590,8 +590,8 @@ // itself //Note, should not stack-allocate initial data -- no guarantee, in // general that creating processor will outlive ones it creates. - VMS__free( animatingPr->startOfStack ); - VMS__free( animatingPr ); + //VMS__free( animatingPr->startOfStack ); + //VMS__free( animatingPr ); } diff -r 6ba4c9d86232 -r bfaebdf60df3 VMS.h --- a/VMS.h Tue Dec 20 15:08:29 2011 +0100 +++ b/VMS.h Tue Dec 20 15:39:30 2011 +0100 @@ -143,6 +143,11 @@ */ typedef struct { + union{ //added padding, because this variable is written a lot by different cores + //thus invalidating a lot of the stucture + volatile int32 masterLock; + char padding[256]; + } masterLockUnion; SlaveScheduler slaveScheduler; RequestHandler requestHandler; @@ -158,9 +163,7 @@ void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop int32 setupComplete; - volatile int32 masterLock; - - int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP + //int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal int32 workStealingLock; diff -r 6ba4c9d86232 -r bfaebdf60df3 contextSwitch.s --- a/contextSwitch.s Tue Dec 20 15:08:29 2011 +0100 +++ b/contextSwitch.s Tue Dec 20 15:39:30 2011 +0100 @@ -31,8 +31,8 @@ * 0x38 coreLoopStackPtr * * _VMSMasterEnv offsets: - * 0x48 coreLoopReturnPt - * 0x54 masterLock + * 0x148 coreLoopReturnPt + * 0x00 masterLock */ .globl switchToVP switchToVP: @@ -56,8 +56,8 @@ * 0x38 coreLoopStackPtr * * _VMSMasterEnv offsets: - * 0x48 coreLoopReturnPt - * 0x54 masterLock + * 0x148 coreLoopReturnPt + * 0x00 masterLock */ .globl switchToCoreLoop switchToCoreLoop: @@ -69,7 +69,7 @@ movq 0x30(%rdi), %rbp #restore frame pointer movq $_VMSMasterEnv, %rcx movq (%rcx) , %rcx - movq 0x48(%rcx), %rax #get CoreLoopStartPt + movq 0x148(%rcx), %rax #get CoreLoopStartPt jmp *%rax #jmp to CoreLoop VPReturn: ret @@ -86,8 +86,8 @@ * 0x38 coreLoopStackPtr * * _VMSMasterEnv offsets: - * 0x48 coreLoopReturnPt - * 0x54 masterLock + * 0x148 coreLoopReturnPt + * 0x00 masterLock */ .globl masterSwitchToCoreLoop masterSwitchToCoreLoop: @@ -99,8 +99,8 @@ movq 0x30(%rdi), %rbp #restore frame pointer movq $_VMSMasterEnv, %rcx movq (%rcx) , %rcx - movq 0x48(%rcx), %rax #get CoreLoopStartPt - movl $0x0 , 0x54(%rcx) #release lock + movq 0x148(%rcx), %rax #get CoreLoopStartPt + movl $0x0 , 0x00(%rcx) #release lock jmp *%rax #jmp to CoreLoop MasterReturn: ret