# HG changeset patch # User Merten Sach # Date 1306492540 -7200 # Node ID f9b60012fd74c5d4eaf994285a6015a197a7082b # Parent 11bfe9d136ed2a011f18f222fa1e6523934c2805 working ucontext version diff -r 11bfe9d136ed -r f9b60012fd74 CoreLoop.c --- a/CoreLoop.c Wed May 11 16:13:24 2011 +0200 +++ b/CoreLoop.c Fri May 27 12:35:40 2011 +0200 @@ -27,7 +27,7 @@ *This cycle then repeats, until a special shutdown virtual processor is * animated, which jumps to the end point at the bottom of core loop. */ -void * +void coreLoop( void *paramsIn ) { ThdParams *coreLoopThdParams; @@ -36,8 +36,9 @@ VMSQueueStruc *readyToAnimateQ; unsigned long coreMask; //has 1 in bit positions of allowed cores int errorCode; + ucontext_t coreLoopContext; - //work-stealing struc on stack to prevent false-sharing in cache-line + //work-stealing struc on stack to prevent false-sharing in cache-line volatile GateStruc gate; //preGateProgress, waitProgress, exitProgress, gateClosed; @@ -78,8 +79,8 @@ //To get label addr in non-gcc compiler, can trick it by making a call // to a fn that does asm that pulls the "return" // addr off the stack and stores it in a pointed-to location. - _VMSMasterEnv->coreLoopEndPt = &&CoreLoopEndPt; - _VMSMasterEnv->coreLoopStartPt = &&CoreLoopStartPt; + //VMSMasterEnv->coreLoopEndPt = &&CoreLoopEndPt; + //VMSMasterEnv->coreLoopStartPt = &&CoreLoopStartPt; //Core loop has no values live upon CoreLoopStartPt except // _VMSMasterEnv @@ -90,7 +91,7 @@ // Get to work! -- virt procr jumps back here when suspends //Note, have to restore the frame-pointer before jump to here, to get // this code to work right (readyToAnimateQ and so forth are frame-ptr relative) -CoreLoopStartPt: + while(1){ //Get virtual processor from queue //The Q must be a global, static volatile var, so not kept in reg, @@ -117,7 +118,8 @@ currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); #endif - if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; + if( currPr != NULL ) + _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; else { //============================= MEASUREMENT STUFF ===================== @@ -127,7 +129,7 @@ #endif //===================================================================== int tries = 0; int gotLock = 0; - while( currPr == NULL ) //if queue was empty, enter get masterLock loop + while( currPr == NULL ) //if queue was empty, enter loop to get MasterLock { //queue was empty, so get master lock gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), @@ -158,21 +160,33 @@ } + //SwitchToVP( currPr ) + //Swap to current VProc + currPr->savedCoreLoopStatus = &coreLoopContext; + coreLoopContext.uc_link=NULL; + swapcontext(&coreLoopContext,currPr->savedVPStatus); + } + - SwitchToVP( currPr ) - - //=========== jmp to here when want to shut down the VMS system ========== - CoreLoopEndPt: - //first free shutdown VP that jumped here -- it first restores the - // coreloop's stack, so addr of currPr in stack frame is still correct + //first free shutdown VP that jumped here -- it first restores the + // coreloop's stack, so addr of currPr in stack frame is still correct VMS__dissipate_procr( currPr ); pthread_exit( NULL ); } +void terminateCoreLoop(VirtProcr *currPr) +{ + //=========== jmp to here when want to shut down the VMS system ========== + //first free shutdown VP that jumped here -- it first restores the + // coreloop's stack, so addr of currPr in stack frame is still correct + VMS__dissipate_procr( currPr ); + pthread_exit( NULL ); +} +#ifdef SEQUENTIAL //=========================================================================== /*This sequential version is exact same as threaded, except doesn't do the * pin-threads part, nor the wait until setup complete part. @@ -237,3 +251,4 @@ VMS__dissipate_procr( currPr ); //free shutdown pr, that jmpd here return; } +#endif diff -r 11bfe9d136ed -r f9b60012fd74 MasterLoop.c --- a/MasterLoop.c Wed May 11 16:13:24 2011 +0200 +++ b/MasterLoop.c Fri May 27 12:35:40 2011 +0200 @@ -20,7 +20,6 @@ //=========================================================================== - /*This code is animated by the virtual Master processor. * *Polls each sched slot exactly once, hands any requests made by a newly @@ -84,16 +83,12 @@ int32 thisCoresIdx; VirtProcr *masterPr; - volatile VirtProcr *volatileMasterPr; - - volatileMasterPr = animatingPr; - masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp //First animation of each MasterVP will in turn animate this part // of setup code.. (VP creator sets up the stack as if this function // was called normally, but actually get here by jmp) //So, setup values about stack ptr, jmp pt and all that - masterPr->nextInstrPt = &&masterLoopStartPt; + masterPr = animatingPr; //just to make sure after jmp //Note, got rid of writing the stack and frame ptr up here, because @@ -102,9 +97,21 @@ // new frame and stack ptr to the MasterVP storage before a second // version of that MasterVP can get animated on a different core. //Also got rid of the busy-wait. + + masterEnv = (MasterEnv*)_VMSMasterEnv; + + thisCoresIdx = masterPr->coreAnimatedBy; + readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; + schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; + + requestHandler = masterEnv->requestHandler; + slaveScheduler = masterEnv->slaveScheduler; + semanticEnv = masterEnv->semanticEnv; + - masterLoopStartPt: + while(1){ + //============================= MEASUREMENT STUFF ======================== #ifdef MEAS__TIME_MASTER //Total Master time includes one coreloop time -- just assume the core @@ -114,20 +121,7 @@ #endif //======================================================================== - masterEnv = (MasterEnv*)_VMSMasterEnv; - - //GCC may optimize so doesn't always re-define from frame-storage - masterPr = (VirtProcr*)volatileMasterPr; //just to make sure after jmp - thisCoresIdx = masterPr->coreAnimatedBy; - readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; - schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; - - requestHandler = masterEnv->requestHandler; - slaveScheduler = masterEnv->slaveScheduler; - semanticEnv = masterEnv->semanticEnv; - - - //Poll each slot's Done flag + //Poll each slot's Done flag numSlotsFilled = 0; for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++) { @@ -185,39 +179,16 @@ saveLowTimeStampCountInto( masterPr->endMasterTSCLow ); #endif - - -/* VirtProcr offsets: - * 0xc stackPtr - * 0x10 framePtr - * 0x14 nextInstrPt - * 0x1c coreLoopFramePtr - * 0x20 coreLoopStackPtr - * - * _VMSMasterEnv offsets: - * 0x24 coreLoopStartPt - * 0x28 coreLoopEndPt - * 0x30 masterLock - */ -// masterSwitchToCoreLoop( masterPr ) - asm volatile("movl %0, %%ebx; \ - movl %1, %%ecx; \ - movl %%esp, 0x0c(%%ecx); \ - movl %%ebp, 0x10(%%ecx); \ - movl 0x24(%%ebx), %%eax; \ - movl 0x20(%%ecx), %%esp; \ - movl 0x1c(%%ecx), %%ebp; \ - movl $0x0, 0x30(%%ebx); \ - jmp %%eax" \ - /* outputs */ : \ - /* inputs */ : "g"(_VMSMasterEnv), "g"(masterPr) \ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - ); + //Clear lock + _VMSMasterEnv->masterLock = 0; + + //Swap to coreLoop + swapcontext(masterPr->savedVPStatus,masterPr->savedCoreLoopStatus); + } } - /*This has a race condition -- the coreloops are accessing their own queues * at the same time that this work-stealer on a different core is trying to */ diff -r 11bfe9d136ed -r f9b60012fd74 VMS.c --- a/VMS.c Wed May 11 16:13:24 2011 +0200 +++ b/VMS.c Fri May 27 12:35:40 2011 +0200 @@ -261,6 +261,7 @@ // the Master env and rest of VMS locations } +#ifdef SEQUENTIAL /*Only difference between version with an OS thread pinned to each core and * the sequential version of VMS is VMS__init_Seq, this, and coreLoop_Seq. */ @@ -272,6 +273,7 @@ coreLoop_Seq( NULL ); } +#endif @@ -287,28 +289,30 @@ */ inline VirtProcr * create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, - void *initialData, char *stackLocs ) + void *initialData, char *stackLocs, + ucontext_t *contextBuf) { - char *stackPtr; - + char *stackPtr = ( (char *)stackLocs + VIRT_PROCR_STACK_SIZE - 0x4); + + getcontext(contextBuf); + contextBuf->uc_stack.ss_sp=stackLocs; + contextBuf->uc_stack.ss_size=VIRT_PROCR_STACK_SIZE; + contextBuf->uc_stack.ss_flags=0; + makecontext(contextBuf, fnPtr, 2, initialData, newPr); + contextBuf->uc_link=NULL; + + newPr->procrID = _VMSMasterEnv->numProcrsCreated++; + newPr->savedVPStatus = contextBuf; + newPr->initialData = initialData; newPr->startOfStack = stackLocs; - newPr->procrID = _VMSMasterEnv->numProcrsCreated++; - newPr->nextInstrPt = fnPtr; - newPr->initialData = initialData; newPr->requests = NULL; newPr->schedSlot = NULL; - - //fnPtr takes two params -- void *initData & void *animProcr - //alloc stack locations, make stackPtr be the highest addr minus room - // for 2 params + return addr. Return addr (NULL) is in loc pointed to - // by stackPtr, initData at stackPtr + 4 bytes, animatingPr just above - stackPtr = ( (char *)stackLocs + VIRT_PROCR_STACK_SIZE - 0x10 ); //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp - *( (int *)stackPtr + 2 ) = (int) newPr; //rightmost param -- 32bit pointer - *( (int *)stackPtr + 1 ) = (int) initialData; //next param to left - newPr->stackPtr = stackPtr; //core loop will switch to this, then - newPr->framePtr = stackPtr; //suspend loop will save new stack & frame ptr + //*( (int *)stackPtr + 2 ) = (int) newPr; //rightmost param -- 32bit pointer + //*( (int *)stackPtr + 1 ) = (int) initialData; //next param to left + //newPr->stackPtr = stackPtr; //core loop will switch to this, then + //newPr->framePtr = stackPtr; //suspend loop will save new stack & frame ptr //============================= MEASUREMENT STUFF ======================== #ifdef STATS__TURN_ON_PROBES @@ -326,13 +330,16 @@ VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) { VirtProcr *newPr; char *stackLocs; + ucontext_t *contextBuffer; newPr = VMS__malloc( sizeof(VirtProcr) ); stackLocs = VMS__malloc( VIRT_PROCR_STACK_SIZE ); + contextBuffer = VMS__malloc(sizeof(ucontext_t)); + if( stackLocs == 0 ) { perror("VMS__malloc stack"); exit(1); } - return create_procr_helper( newPr, fnPtr, initialData, stackLocs ); + return create_procr_helper( newPr, fnPtr, initialData, stackLocs, contextBuffer ); } /* "ext" designates that it's for use outside the VMS system -- should only @@ -343,13 +350,16 @@ VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) { VirtProcr *newPr; char *stackLocs; + ucontext_t *contextBuffer; newPr = malloc( sizeof(VirtProcr) ); stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); + contextBuffer = VMS__malloc(sizeof(ucontext_t)); + if( stackLocs == 0 ) { perror("malloc stack"); exit(1); } - return create_procr_helper( newPr, fnPtr, initialData, stackLocs ); + return create_procr_helper( newPr, fnPtr, initialData, stackLocs, contextBuffer ); } @@ -373,14 +383,7 @@ void VMS__suspend_procr( VirtProcr *animatingPr ) { - - //The request to master will cause this suspended virt procr to get - // scheduled again at some future point -- to resume, core loop jumps - // to the resume point (below), which causes restore of saved regs and - // "return" from this call. - animatingPr->nextInstrPt = &&ResumePt; - - //return ownership of the virt procr and sched slot to Master virt pr + //return ownership of the virt procr and sched slot to Master virt pr animatingPr->schedSlot->workIsDone = TRUE; //=========================== Measurement stuff ======================== @@ -389,57 +392,14 @@ saveLowTimeStampCountInto( animatingPr->preSuspTSCLow ); #endif //======================================================================= - -/* VirtProcr offsets: - * 0xc stackPtr - * 0x10 framePtr - * 0x14 nextInstrPt - * 0x1c coreLoopFramePtr - * 0x20 coreLoopStackPtr - * - * _VMSMasterEnv offsets: - * 0x24 coreLoopStartPt - * 0x28 coreLoopEndPt - * 0x30 masterLock - */ -// SwitchToCoreLoop( animatingPr ) - asm volatile("movl %0, %%ebx; \ - movl %1, %%ecx; \ - movl %%esp, 0x0c(%%ecx); \ - movl %%ebp, 0x10(%%ecx); \ - movl 0x24(%%ebx), %%eax; \ - movl 0x20(%%ecx), %%esp; \ - movl 0x1c(%%ecx), %%ebp; \ - jmp %%eax" \ - /* outputs */ : \ - /* inputs */ : "g"(_VMSMasterEnv), "g"(animatingPr) \ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - ); - -// asm volatile("mov %0,%%ebx; \ - mov %%ebx, %%eax; \ - add $0xc, %%eax; \ - movl %%esp, (%%eax); \ - mov %%ebx, %%eax; \ - add $0x10, %%eax; \ - movl %%ebp, (%%eax); \ - movl %1, %%eax; \ - movl %2, %%esp; \ - movl %3, %%ebp; \ - jmp %%eax" \ - /* outputs */ : \ - /* inputs */ : "g"(animatingPr), "g" (jmpPt), "g" (coreLoopStackPtr), \ - "g" (coreLoopFramePtr) \ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - ); + + swapcontext(animatingPr->savedVPStatus,animatingPr->savedCoreLoopStatus); //======================================================================= -ResumePt: #ifdef MEAS__TIME_STAMP_SUSP //NOTE: only take low part of count -- do sanity check when take diff saveLowTimeStampCountInto( animatingPr->postSuspTSCLow ); #endif - return; } @@ -739,21 +699,15 @@ void endOSThreadFn( void *initData, VirtProcr *animatingPr ) { void *jmpPt, *coreLoopStackPtr, *coreLoopFramePtr; - - jmpPt = _VMSMasterEnv->coreLoopEndPt; - coreLoopStackPtr = animatingPr->coreLoopStackPtr; - coreLoopFramePtr = animatingPr->coreLoopFramePtr; - - - asm volatile("movl %0, %%eax; \ - movl %1, %%esp; \ - movl %2, %%ebp; \ - jmp %%eax " \ - /* outputs */ : \ - /* inputs */ : "m" (jmpPt), "m"(coreLoopStackPtr), "m"(coreLoopFramePtr)\ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi" \ - ); - } + ucontext_t* contextBuf = animatingPr->savedCoreLoopStatus; + void *stackPtr = contextBuf->uc_mcontext.gregs[REG_ESP]; + + getcontext(contextBuf); + contextBuf->uc_stack.ss_sp=stackPtr; + makecontext(contextBuf,&terminateCoreLoop,1,animatingPr); + contextBuf->uc_link=NULL; + swapcontext(animatingPr->savedVPStatus,contextBuf); +} /*This is called from the startup & shutdown diff -r 11bfe9d136ed -r f9b60012fd74 VMS.h --- a/VMS.h Wed May 11 16:13:24 2011 +0200 +++ b/VMS.h Fri May 27 12:35:40 2011 +0200 @@ -17,9 +17,9 @@ #include "Hash_impl/PrivateHash.h" #include "vmalloc.h" -//#include #include #include +#include //=============================== Debug =================================== @@ -51,9 +51,9 @@ #define DEBUG2( bool, msg, p1, p2) \ // if(bool) {printf(msg, p1, p2); fflush(stdin);} -#define ERROR(msg) printf(msg); //fflush(stdin); -#define ERROR1(msg, param) printf(msg, param); fflush(stdin); -#define ERROR2(msg, p1, p2) printf(msg, p1, p2); fflush(stdin); +#define ERROR(msg) printf(msg); +#define ERROR1(msg, param) printf(msg, param); +#define ERROR2(msg, p1, p2) printf(msg, p1, p2); //=========================== STATS ======================= @@ -175,13 +175,15 @@ { int procrID; //for debugging -- count up each time create int coreAnimatedBy; void *startOfStack; - void *stackPtr; - void *framePtr; - void *nextInstrPt; + //void *stackPtr; + //void *framePtr; + //void *nextInstrPt; + ucontext_t *savedVPStatus; - void *coreLoopStartPt; //allows proto-runtime to be linked later - void *coreLoopFramePtr; //restore before jmp back to core loop - void *coreLoopStackPtr; //restore before jmp back to core loop + //void *coreLoopStartPt; //allows proto-runtime to be linked later + //void *coreLoopFramePtr; //restore before jmp back to core loop + //void *coreLoopStackPtr; //restore before jmp back to core loop + ucontext_t *savedCoreLoopStatus; void *initialData; @@ -225,11 +227,11 @@ MallocProlog *freeListHead; int32 amtOfOutstandingMem; //total currently allocated - void *coreLoopStartPt;//addr to jump to to re-enter coreLoop - void *coreLoopEndPt; //addr to jump to to shut down a coreLoop + //void *coreLoopStartPt;//addr to jump to to re-enter coreLoop + //void *coreLoopEndPt; //addr to jump to to shut down a coreLoop int32 setupComplete; - int32 masterLock; + volatile int32 masterLock; int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal @@ -278,11 +280,11 @@ //======================= OS Thread related =============================== -void * coreLoop( void *paramsIn ); //standard PThreads fn prototype +void coreLoop( void *paramsIn ); //standard PThreads fn prototype void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype +void terminateCoreLoop(VirtProcr *currPr); void masterLoop( void *initData, VirtProcr *masterPr ); - typedef struct { void *endThdPt; @@ -296,7 +298,6 @@ pthread_cond_t suspend_cond; - //===================== Global Vars =================== volatile MasterEnv *_VMSMasterEnv; @@ -548,6 +549,18 @@ //===== +//======= Utilities ====== +/* + * This Macro makes the gcc reload all variables from Stack after this function. + * This is necessary because we jmp into the masterLoop and the variables are + * kept in Registers for optimization. + */ +#define flushRegisters() \ + asm volatile ("" /*no instr needed*/ \ + ::: /*no output and input*/ \ + "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi") +//======== + #include "SwitchAnimators.h" #include "probes.h" #include "vutilities.h"