# HG changeset patch # User Merten Sach # Date 1306772921 -7200 # Node ID 5ff1631c26ed929c476f71dc037bc2ca3a38202a # Parent 11bfe9d136ed2a011f18f222fa1e6523934c2805 working O3 version diff -r 11bfe9d136ed -r 5ff1631c26ed CoreLoop.c --- a/CoreLoop.c Wed May 11 16:13:24 2011 +0200 +++ b/CoreLoop.c Mon May 30 18:28:41 2011 +0200 @@ -14,6 +14,7 @@ #include #include +void *terminateCoreLoop(VirtProcr *currPr); /*This is the loop that runs in the OS Thread pinned to each core *Get virt procr from queue, @@ -78,8 +79,7 @@ //To get label addr in non-gcc compiler, can trick it by making a call // to a fn that does asm that pulls the "return" // addr off the stack and stores it in a pointed-to location. - _VMSMasterEnv->coreLoopEndPt = &&CoreLoopEndPt; - _VMSMasterEnv->coreLoopStartPt = &&CoreLoopStartPt; + saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopStartPt)); //Core loop has no values live upon CoreLoopStartPt except // _VMSMasterEnv @@ -90,7 +90,8 @@ // Get to work! -- virt procr jumps back here when suspends //Note, have to restore the frame-pointer before jump to here, to get // this code to work right (readyToAnimateQ and so forth are frame-ptr relative) -CoreLoopStartPt: + + while(1){ //Get virtual processor from queue //The Q must be a global, static volatile var, so not kept in reg, @@ -159,19 +160,21 @@ } - SwitchToVP( currPr ) + switchToVP(currPr); + flushRegisters(); + }//CoreLoop + } - - //=========== jmp to here when want to shut down the VMS system ========== - CoreLoopEndPt: - //first free shutdown VP that jumped here -- it first restores the - // coreloop's stack, so addr of currPr in stack frame is still correct +void * +terminateCoreLoop(VirtProcr *currPr){ + //first free shutdown VP that jumped here -- it first restores the + // coreloop's stack, so addr of currPr in stack frame is still correct VMS__dissipate_procr( currPr ); pthread_exit( NULL ); - } +} - +#ifdef SEQUENTIAL //=========================================================================== /*This sequential version is exact same as threaded, except doesn't do the @@ -237,3 +240,5 @@ VMS__dissipate_procr( currPr ); //free shutdown pr, that jmpd here return; } + +#endif diff -r 11bfe9d136ed -r 5ff1631c26ed MasterLoop.c --- a/MasterLoop.c Wed May 11 16:13:24 2011 +0200 +++ b/MasterLoop.c Mon May 30 18:28:41 2011 +0200 @@ -10,6 +10,7 @@ #include #include "VMS.h" +#include "SwitchAnimators.h" //=========================================================================== @@ -93,7 +94,7 @@ // of setup code.. (VP creator sets up the stack as if this function // was called normally, but actually get here by jmp) //So, setup values about stack ptr, jmp pt and all that - masterPr->nextInstrPt = &&masterLoopStartPt; + //masterPr->nextInstrPt = &&masterLoopStartPt; //Note, got rid of writing the stack and frame ptr up here, because @@ -104,7 +105,9 @@ //Also got rid of the busy-wait. - masterLoopStartPt: + //masterLoopStartPt: + while(1){ + //============================= MEASUREMENT STUFF ======================== #ifdef MEAS__TIME_MASTER //Total Master time includes one coreloop time -- just assume the core @@ -185,35 +188,11 @@ saveLowTimeStampCountInto( masterPr->endMasterTSCLow ); #endif + masterSwitchToCoreLoop(animatingPr); + flushRegisters(); + }//MasterLoop -/* VirtProcr offsets: - * 0xc stackPtr - * 0x10 framePtr - * 0x14 nextInstrPt - * 0x1c coreLoopFramePtr - * 0x20 coreLoopStackPtr - * - * _VMSMasterEnv offsets: - * 0x24 coreLoopStartPt - * 0x28 coreLoopEndPt - * 0x30 masterLock - */ -// masterSwitchToCoreLoop( masterPr ) - asm volatile("movl %0, %%ebx; \ - movl %1, %%ecx; \ - movl %%esp, 0x0c(%%ecx); \ - movl %%ebp, 0x10(%%ecx); \ - movl 0x24(%%ebx), %%eax; \ - movl 0x20(%%ecx), %%esp; \ - movl 0x1c(%%ecx), %%ebp; \ - movl $0x0, 0x30(%%ebx); \ - jmp %%eax" \ - /* outputs */ : \ - /* inputs */ : "g"(_VMSMasterEnv), "g"(masterPr) \ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - ); - } diff -r 11bfe9d136ed -r 5ff1631c26ed SwitchAnimators.h --- a/SwitchAnimators.h Wed May 11 16:13:24 2011 +0200 +++ b/SwitchAnimators.h Mon May 30 18:28:41 2011 +0200 @@ -10,6 +10,17 @@ #define _SwitchAnimators_H #define __USE_GNU +void saveCoreLoopReturnAddr(void *returnAddress); + +void switchToVP(VirtProcr *nextProcr); + +void switchToCoreLoop(VirtProcr *nextProcr); + +void masterSwitchToCoreLoop(VirtProcr *nextProcr); + +#define flushRegisters() \ + asm volatile ("":::"%eax","%ebx", "%ecx", "%edx","%edi","%esi") + /*Isolating code for switching between animators within these macros -- at * some point will make switches to compile for 32 bit or for 64 bit, which * having these isolated will make cleaner @@ -63,113 +74,11 @@ *%%ebp is the frame-ptr register and %%esp is the stack-ptr register */ -//=========================== MasterVP to CoreLoop ========================== -// - //Save stack ptr and frame, restore CoreLoop's stack and frame, - // and clear the MasterLock - //GCC's -O3 messes with this -- go through generated -- protect somehow - // -#define masterSwitchToCoreLoop( masterPr ) \ - void *stackPtrAddr, *framePtrAddr; \ - volatile void *masterLockAddr; \ - void *jmpPt, *coreLoopFramePtr, *coreLoopStackPtr; \ -\ - masterLockAddr = &(_VMSMasterEnv->masterLock); \ -\ - jmpPt = _VMSMasterEnv->coreLoopStartPt; \ - coreLoopStackPtr = masterPr->coreLoopStackPtr; \ - coreLoopFramePtr = masterPr->coreLoopFramePtr; \ -\ - asm volatile("mov %0, %%ecx; \ - mov %1, %%ebx; \ - mov %%ebx, %%eax; \ - add $0x10, %%eax; \ - movl %%esp, (%%eax); \ - mov %%ebx, %%eax; \ - add $0x14, %%eax; \ - movl %%ebp, (%%eax); \ - movl %2, %%eax; \ - movl %3, %%esp; \ - movl %4, %%ebp; \ - movl $0x0, (%%ecx); \ - jmp %%eax" \ - /* outputs */ : "=g"(masterLockAddr) \ - /* inputs */ : "g"(masterPr), "g" (jmpPt), "g" (coreLoopStackPtr), \ - "g" (coreLoopFramePtr) \ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - ); - -// asm volatile("movl %0, %%eax; \ - movl %%esp, (%%eax); \ - movl %1, %%eax; \ - movl %%ebp, (%%eax); \ - movl %2, %%ebx; \ - movl %3, %%eax; \ - movl %4, %%esp; \ - movl %5, %%ebp; \ - movl $0x0, (%%ebx); \ - jmp %%eax;" \ - /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr), \ - "=g"(masterLockAddr) \ - /* inputs */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - );//can probably make clobber list empty -- but safe for now //=========================== SlaveVP to CoreLoop =========================== // -#define SwitchToCoreLoop( animatingPr ) \ - void *jmpPt, *coreLoopStackPtr; \ - void *coreLoopFramePtr; \ -\ - jmpPt = _VMSMasterEnv->coreLoopStartPt; \ - coreLoopStackPtr = animatingPr->coreLoopStackPtr; \ - coreLoopFramePtr = animatingPr->coreLoopFramePtr; \ -\ - asm volatile("mov %0,%%ebx; \ - mov %%ebx, %%eax; \ - add $0x10, %%eax; \ - movl %%esp, (%%eax); \ - mov %%ebx, %%eax; \ - add $0x14, %%eax; \ - movl %%ebp, (%%eax); \ - movl %1, %%eax; \ - movl %2, %%esp; \ - movl %3, %%ebp; \ - jmp %%eax" \ - /* outputs */ : \ - /* inputs */ : "g"(animatingPr), "g" (jmpPt), "g" (coreLoopStackPtr), \ - "g" (coreLoopFramePtr) \ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - ); - - /*Save the virt procr's stack and frame ptrs*/ \ -// asm volatile("movl %0, %%eax; \ - movl %%esp, (%%eax); \ - movl %1, %%eax; \ - movl %%ebp, (%%eax) "\ - /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr) \ - /* inputs */ : \ - /* clobber */ : "%eax" \ - ); \ -\ - /*restore coreloop's frame ptr, then jump back to "start" of core loop*/\ - /*Note, GCC compiles to assembly that saves esp and ebp in the stack*/ \ - /* frame -- so have to explicitly do assembly that saves to memory*/ \ - asm volatile("movl %0, %%eax; \ - movl %1, %%esp; \ - movl %2, %%ebp; \ - jmp %%eax " \ - /* outputs */ : \ - /* inputs */ : "m" (jmpPt), "m"(coreLoopStackPtr), "m"(coreLoopFramePtr)\ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi" \ - ); - //list everything as clobbered to force GCC to save all - // live vars that are in regs on stack before this - // assembly, so that stack pointer is correct, before jmp - - //============================== CoreLoop to VP ============================= // @@ -184,53 +93,6 @@ // stack, so "movl %%ebp, %0" saves to the temp, NOT the data-struc! - //switch to virt procr's stack and frame ptr then jump to virt procr fn -/* VirtProcr offsets: - * 0xc stackPtr - * 0x10 framePtr - * 0x14 nextInstrPt - * 0x1c coreLoopFramePtr - * 0x20 coreLoopStackPtr - * - * _VMSMasterEnv offsets: - * 0x24 coreLoopStartPt - * 0x28 coreLoopEndPt - * 0x30 masterLock - */ -#define SwitchToVP( currPr ) \ - asm volatile("movl %0, %%ebx; \ - movl %%esp, 0x20(%%ebx); \ - movl %%ebp, 0x1c(%%ebx); \ - movl 0x14(%%ebx), %%eax; \ - movl 0x0c(%%ebx), %%esp; \ - movl 0x10(%%ebx), %%ebp; \ - jmp *%%eax" \ - /* outputs */ : \ - /* inputs */ : "g"(currPr) \ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - ); - -// void *stackPtr, *framePtr, *jmpPt; \ -\ - stackPtr = currPr->stackPtr; \ - framePtr = currPr->framePtr; \ - jmpPt = currPr->nextInstrPt; \ -\ - asm volatile("mov %0,%%ebx; \ - mov %%ebx, %%eax; \ - add $0x1c, %%eax; \ - movl %%esp, (%%eax); \ - mov %%ebx, %%eax; \ - add $0x20, %%eax; \ - movl %%ebp, (%%eax); \ - movl %1, %%eax; \ - movl %2, %%esp; \ - movl %3, %%ebp; \ - jmp %%eax" \ - /* outputs */ : \ - /* inputs */ : "g"(currPr), "g" (jmpPt), "g" (stackPtr), "g" (framePtr) \ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - ); #endif /* _SwitchAnimators_H */ diff -r 11bfe9d136ed -r 5ff1631c26ed VMS.c --- a/VMS.c Wed May 11 16:13:24 2011 +0200 +++ b/VMS.c Mon May 30 18:28:41 2011 +0200 @@ -11,6 +11,7 @@ #include #include "VMS.h" +#include "SwitchAnimators.h" #include "Queue_impl/BlockingQueue.h" #include "Histogram/Histogram.h" @@ -76,6 +77,8 @@ create_the_coreLoop_OS_threads(); } +#ifdef SEQUENTIAL + /*To initialize the sequential version, just don't create the threads */ void @@ -84,6 +87,8 @@ create_masterEnv(); } +#endif + void create_masterEnv() { MasterEnv *masterEnv; @@ -261,6 +266,7 @@ // the Master env and rest of VMS locations } +#ifdef SEQUENTIAL /*Only difference between version with an OS thread pinned to each core and * the sequential version of VMS is VMS__init_Seq, this, and coreLoop_Seq. */ @@ -272,8 +278,7 @@ coreLoop_Seq( NULL ); } - - +#endif /*Create stack, then create __cdecl structure on it and put initialData and * pointer to the new structure instance into the parameter positions on @@ -378,7 +383,7 @@ // scheduled again at some future point -- to resume, core loop jumps // to the resume point (below), which causes restore of saved regs and // "return" from this call. - animatingPr->nextInstrPt = &&ResumePt; + //animatingPr->nextInstrPt = &&ResumePt; //return ownership of the virt procr and sched slot to Master virt pr animatingPr->schedSlot->workIsDone = TRUE; @@ -390,51 +395,11 @@ #endif //======================================================================= -/* VirtProcr offsets: - * 0xc stackPtr - * 0x10 framePtr - * 0x14 nextInstrPt - * 0x1c coreLoopFramePtr - * 0x20 coreLoopStackPtr - * - * _VMSMasterEnv offsets: - * 0x24 coreLoopStartPt - * 0x28 coreLoopEndPt - * 0x30 masterLock - */ -// SwitchToCoreLoop( animatingPr ) - asm volatile("movl %0, %%ebx; \ - movl %1, %%ecx; \ - movl %%esp, 0x0c(%%ecx); \ - movl %%ebp, 0x10(%%ecx); \ - movl 0x24(%%ebx), %%eax; \ - movl 0x20(%%ecx), %%esp; \ - movl 0x1c(%%ecx), %%ebp; \ - jmp %%eax" \ - /* outputs */ : \ - /* inputs */ : "g"(_VMSMasterEnv), "g"(animatingPr) \ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - ); - -// asm volatile("mov %0,%%ebx; \ - mov %%ebx, %%eax; \ - add $0xc, %%eax; \ - movl %%esp, (%%eax); \ - mov %%ebx, %%eax; \ - add $0x10, %%eax; \ - movl %%ebp, (%%eax); \ - movl %1, %%eax; \ - movl %2, %%esp; \ - movl %3, %%ebp; \ - jmp %%eax" \ - /* outputs */ : \ - /* inputs */ : "g"(animatingPr), "g" (jmpPt), "g" (coreLoopStackPtr), \ - "g" (coreLoopFramePtr) \ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - ); + switchToCoreLoop(animatingPr); + flushRegisters(); //======================================================================= -ResumePt: + #ifdef MEAS__TIME_STAMP_SUSP //NOTE: only take low part of count -- do sanity check when take diff saveLowTimeStampCountInto( animatingPr->postSuspTSCLow ); @@ -738,21 +703,8 @@ */ void endOSThreadFn( void *initData, VirtProcr *animatingPr ) - { void *jmpPt, *coreLoopStackPtr, *coreLoopFramePtr; - - jmpPt = _VMSMasterEnv->coreLoopEndPt; - coreLoopStackPtr = animatingPr->coreLoopStackPtr; - coreLoopFramePtr = animatingPr->coreLoopFramePtr; - - - asm volatile("movl %0, %%eax; \ - movl %1, %%esp; \ - movl %2, %%ebp; \ - jmp %%eax " \ - /* outputs */ : \ - /* inputs */ : "m" (jmpPt), "m"(coreLoopStackPtr), "m"(coreLoopFramePtr)\ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi" \ - ); + { + asmTerminateCoreLoop(animatingPr); } diff -r 11bfe9d136ed -r 5ff1631c26ed VMS.h --- a/VMS.h Wed May 11 16:13:24 2011 +0200 +++ b/VMS.h Mon May 30 18:28:41 2011 +0200 @@ -229,7 +229,7 @@ void *coreLoopEndPt; //addr to jump to to shut down a coreLoop int32 setupComplete; - int32 masterLock; + volatile int32 masterLock; int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal diff -r 11bfe9d136ed -r 5ff1631c26ed contextSwitch.s --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contextSwitch.s Mon May 30 18:28:41 2011 +0200 @@ -0,0 +1,128 @@ +#include "VMS.h" + +.data + + +.text + +//Save return label address for the coreLoop to pointer +//Arguments: Pointer to variable holding address +.globl saveCoreLoopReturnAddr +saveCoreLoopReturnAddr: + movl 0x4(%esp) , %eax #load pointer + movl $coreLoopReturn, %ecx #load label address + movl %ecx, (%eax) #save address + ret + + + +//Switches form CoreLoop to VP ether normal VP or the Master Loop +//switch to virt procr's stack and frame ptr then jump to virt procr fn +/* VirtProcr offsets: + * 0xc stackPtr + * 0x10 framePtr + * 0x14 nextInstrPt + * 0x1c coreLoopFramePtr + * 0x20 coreLoopStackPtr + * + * _VMSMasterEnv offsets: + * 0x24 coreLoopStartPt + * 0x28 coreLoopEndPt + * 0x30 masterLock + */ +.globl switchToVP +switchToVP: + movl 0x4(%esp) , %ecx #get VirtProcr + movl %esp , 0x20(%ecx) #save core loop stack pointer + movl %ebp , 0x1c(%ecx) #save core loop frame pointer + movl 0x0c(%ecx), %esp #restore stack pointer + movl 0x10(%ecx), %ebp #restore frame pointer + movl 0x14(%ecx), %eax #get jmp pointer + jmp *%eax #jmp to VP +coreLoopReturn: + ret + + +//switches to core loop. saves return address +/* VirtProcr offsets: + * 0xc stackPtr + * 0x10 framePtr + * 0x14 nextInstrPt + * 0x1c coreLoopFramePtr + * 0x20 coreLoopStackPtr + * + * _VMSMasterEnv offsets: + * 0x24 coreLoopStartPt + * 0x28 coreLoopEndPt + * 0x30 masterLock + */ +.globl switchToCoreLoop +switchToCoreLoop: + movl 0x4(%esp) , %ecx #get VirtProcr + movl $VPReturn , 0x14(%ecx) #store return address + movl %esp , 0x0c(%ecx) #save stack pointer + movl %ebp , 0x10(%ecx) #save frame pointer + movl 0x20(%ecx), %esp #restore stack pointer + movl 0x1c(%ecx), %ebp #restore frame pointer + movl $_VMSMasterEnv, %ecx + movl (%ecx) , %ecx + movl 0x24(%ecx), %eax #get CoreLoopStartPt + jmp *%eax #jmp to CoreLoop +VPReturn: + ret + + + +//switches to core loop from master. saves return address +/* VirtProcr offsets: + * 0xc stackPtr + * 0x10 framePtr + * 0x14 nextInstrPt + * 0x1c coreLoopFramePtr + * 0x20 coreLoopStackPtr + * + * _VMSMasterEnv offsets: + * 0x24 coreLoopStartPt + * 0x28 coreLoopEndPt + * 0x30 masterLock + */ +.globl masterSwitchToCoreLoop +masterSwitchToCoreLoop: + movl 0x4(%esp) , %ecx #get VirtProcr + movl $MasterReturn, 0x14(%ecx) #store return address + movl %esp , 0x0c(%ecx) #save stack pointer + movl %ebp , 0x10(%ecx) #save frame pointer + movl 0x20(%ecx), %esp #restore stack pointer + movl 0x1c(%ecx), %ebp #restore frame pointer + movl $_VMSMasterEnv, %ecx + movl (%ecx) , %ecx + movl 0x24(%ecx), %eax #get CoreLoopStartPt + movl $0x0 , 0x30(%ecx) #release lock + jmp *%eax #jmp to CoreLoop +MasterReturn: + ret + + +//Switch to terminateCoreLoop +//no need to call because the stack is already set up for switchVP +//do not save register of VP because this function will never return +/* VirtProcr offsets: + * 0xc stackPtr + * 0x10 framePtr + * 0x14 nextInstrPt + * 0x1c coreLoopFramePtr + * 0x20 coreLoopStackPtr + * + * _VMSMasterEnv offsets: + * 0x24 coreLoopStartPt + * 0x28 coreLoopEndPt + * 0x30 masterLock + */ +.globl asmTerminateCoreLoop +asmTerminateCoreLoop: + movl 0x4(%esp) , %ecx #get VirtProcr + movl 0x20(%ecx), %esp #restore stack pointer + movl 0x1c(%ecx), %ebp #restore frame pointer + movl $terminateCoreLoop, %eax + jmp *%eax #jmp to CoreLoop +