Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 26:668278fa7a63
Sequential -- just starting to add sequential version
| author | Me |
|---|---|
| date | Mon, 26 Jul 2010 15:25:53 -0700 |
| parents | c556193f7211 |
| children | 5a2068cbc28b |
| files | CoreLoop.c CoreLoop_Seq.c MasterLoop.c VMS.c VMS.h VMS_Seq.c |
| diffstat | 6 files changed, 644 insertions(+), 85 deletions(-) [+] |
line diff
1.1 --- a/CoreLoop.c Sat Jul 24 08:58:47 2010 -0700 1.2 +++ b/CoreLoop.c Mon Jul 26 15:25:53 2010 -0700 1.3 @@ -9,7 +9,9 @@ 1.4 #include "Queue_impl/BlockingQueue.h" 1.5 1.6 #include <stdio.h> 1.7 +#include <stdlib.h> 1.8 #include <time.h> 1.9 + 1.10 #include <pthread.h> 1.11 #include <sched.h> 1.12 1.13 @@ -26,7 +28,7 @@ 1.14 { 1.15 ThdParams *coreLoopThdParams; 1.16 VirtProcr *currPr; 1.17 - CASQueueStruc *workQ; 1.18 + VMSQueueStruc *workQ; 1.19 unsigned long coreMask; //has 1 in bit positions of allowed cores 1.20 int errorCode; 1.21 1.22 @@ -34,29 +36,37 @@ 1.23 coreLoopThdParams = (ThdParams *)paramsIn; 1.24 1.25 //wait until signalled that setup is complete 1.26 - pthread_mutex_lock( _VMSMasterEnv->suspend_mutex ); 1.27 + pthread_mutex_lock( &suspendLock ); 1.28 while( !(_VMSMasterEnv->setupComplete) ) 1.29 { 1.30 - pthread_cond_wait( _VMSMasterEnv->suspend_cond, 1.31 - _VMSMasterEnv->suspend_mutex ); 1.32 + pthread_cond_wait( &suspend_cond, 1.33 + &suspendLock ); 1.34 } 1.35 - pthread_mutex_unlock( _VMSMasterEnv->suspend_mutex ); 1.36 + pthread_mutex_unlock( &suspendLock ); 1.37 + 1.38 + printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); 1.39 1.40 //set thread affinity 1.41 //Linux requires pinning thd to core inside thread-function 1.42 //Designate a core by a 1 in bit-position corresponding to the core 1.43 - coreMask = 1 << coreLoopThdParams->coreNum 1.44 +// cpu_set_t cpuMask; 1.45 +// CPU_ZERO( &cpuMask ); 1.46 +// CPU_SET( coreLoopThdParams->coreNum, &cpuMask ); 1.47 + 1.48 + coreMask = 1 << coreLoopThdParams->coreNum; 1.49 + 1.50 + pthread_t selfThd = pthread_self(); 1.51 errorCode = 1.52 - pthread_setaffinity_np( pthread_self(), sizeof(coreMask), coreMask); 1.53 + pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); 1.54 1.55 - if(errorCode){ printf("\nset affinity failure\n"); exit(); } 1.56 + if(errorCode){ printf("\nset affinity failure\n"); exit(0); } 1.57 1.58 1.59 //Save addr of "end core loop" label - jump to it to shut down coreloop 1.60 //To get label addr in non-gcc compiler, can trick it by making a call 1.61 // to a fn that does asm that pulls the "return" 1.62 // addr off the stack and stores it in a pointed-to location. 1.63 - _VMSMasterEnv->coreLoopShutDownPt = &&EndCoreLoop; 1.64 + _VMSMasterEnv->coreLoopShutDownPt = &&CoreLoopEndPt; 1.65 1.66 //Core loop has no values live upon CoreLoopStartPt except workQ 1.67 // every value in the code is defined by a statement in core loop, 1.68 @@ -72,7 +82,7 @@ 1.69 //_VMSWorkQ must be a global, static volatile var, so not kept in reg, 1.70 // which forces reloading the pointer after each jmp to this point 1.71 workQ = _VMSWorkQ; 1.72 - currPr = (VirtProcr *) readCASQ( workQ ); 1.73 + currPr = (VirtProcr *) readVMSQ( workQ ); 1.74 1.75 // printf("core %d loop procr addr: %d\n", coreLoopThdParams->coreNum, \ 1.76 // (int)currPr ); fflush(stdin); 1.77 @@ -116,6 +126,6 @@ 1.78 //======================================================================== 1.79 1.80 //jmp to here when want to shut down the VMS system 1.81 - EndCoreLoop: 1.82 + CoreLoopEndPt: 1.83 pthread_exit( NULL ); 1.84 }
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/CoreLoop_Seq.c Mon Jul 26 15:25:53 2010 -0700 2.3 @@ -0,0 +1,97 @@ 2.4 +/* 2.5 + * Copyright 2010 OpenSourceCodeStewardshipFoundation 2.6 + * 2.7 + * Licensed under BSD 2.8 + */ 2.9 + 2.10 + 2.11 +#include "VMS.h" 2.12 +#include "Queue_impl/BlockingQueue.h" 2.13 + 2.14 +#include <stdio.h> 2.15 +#include <stdlib.h> 2.16 +#include <time.h> 2.17 + 2.18 + 2.19 + 2.20 +/*This is the loop that runs in the PThread pinned to each core 2.21 + * get work-unit struc from queue, 2.22 + * call function-ptr, passing it pointer to data 2.23 + * transfer return value to slave's "requests" pointer 2.24 + * write the slave's "Done" flag and repeat. 2.25 + */ 2.26 +//pthread_create requires ptr to func that takes void * and returns void * 2.27 +void * 2.28 +coreLoop_Seq( void *paramsIn ) 2.29 + { 2.30 + VirtProcr *currPr; 2.31 + VMSQueueStruc *workQ; 2.32 + 2.33 + 2.34 + //Save addr of "end core loop" label - jump to it to shut down coreloop 2.35 + //To get label addr in non-gcc compiler, can trick it by making a call 2.36 + // to a fn that does asm that pulls the "return" 2.37 + // addr off the stack and stores it in a pointed-to location. 2.38 + _VMSMasterEnv->coreLoopShutDownPt = &&CoreLoopEndPt; 2.39 + 2.40 + //Core loop has no values live upon CoreLoopStartPt except workQ 2.41 + // every value in the code is defined by a statement in core loop, 2.42 + // after the start point -- with the one exception of _VMSWorkQ 2.43 + 2.44 + 2.45 + // Get to work! -- virt procr jumps back here when done or suspends 2.46 + //Note, have to restore the frame-pointer before jump to here, to get 2.47 + // this code to work right (workQ and so forth are frame-ptr relative) 2.48 +CoreLoopStartPt: 2.49 + 2.50 + //Get virtual processor from queue 2.51 + //_VMSWorkQ must be a global, static volatile var, so not kept in reg, 2.52 + // which forces reloading the pointer after each jmp to this point 2.53 + workQ = _VMSWorkQ; 2.54 + currPr = (VirtProcr *) readVMSQ( workQ ); 2.55 + 2.56 +// printf("core %d loop procr addr: %d\n", coreLoopThdParams->coreNum, \ 2.57 +// (int)currPr ); fflush(stdin); 2.58 + currPr->coreLoopStartPt = &&CoreLoopStartPt; //to be sure.(GCC specific) 2.59 + 2.60 + currPr->coreAnimatedBy = coreLoopThdParams->coreNum; 2.61 + 2.62 + //switch to virt procr's stack and frame ptr then jump to virt procr 2.63 + void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \ 2.64 + *coreLoopStackPtrAddr; 2.65 + 2.66 + stackPtr = currPr->stackPtr; 2.67 + framePtr = currPr->framePtr; 2.68 + jmpPt = currPr->nextInstrPt; 2.69 + coreLoopFramePtrAddr = &(currPr->coreLoopFramePtr); 2.70 + coreLoopStackPtrAddr = &(currPr->coreLoopStackPtr); 2.71 + 2.72 + //Save the core loop's stack and frame pointers into virt procr struct 2.73 + // then switch to stack ptr and frame ptr of virt procr & jmp to it 2.74 + //This was a pain to get right because GCC converts the "(jmpPt)" to 2.75 + // frame-relative mem-op -- so generated machine code first changed the 2.76 + // frame pointer, then tried to jump to an addr stored on stack, which 2.77 + // it accessed as an offset from frame-ptr! (wrong frame-ptr now) 2.78 + //Explicitly loading into eax before changing frame-ptr fixed it 2.79 + //Also, it turns "(currPr->coreLoopFramePtr)" into a temporary on the 2.80 + // stack, so "movl %%ebp, %0" saves to the temp, NOT the data-struc! 2.81 + asm volatile("movl %0, %%eax; \ 2.82 + movl %%esp, (%%eax); \ 2.83 + movl %1, %%eax; \ 2.84 + movl %%ebp, (%%eax); \ 2.85 + movl %2, %%eax; \ 2.86 + movl %3, %%esp; \ 2.87 + movl %4, %%ebp; \ 2.88 + jmp %%eax" \ 2.89 + /* outputs */ : "=g"(coreLoopStackPtrAddr), \ 2.90 + "=g"(coreLoopFramePtrAddr) \ 2.91 + /* inputs */ : "g" (jmpPt), "g" (stackPtr), "g" (framePtr) \ 2.92 + /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ 2.93 + ); 2.94 + 2.95 + //======================================================================== 2.96 + 2.97 + //jmp to here when want to shut down the VMS system 2.98 + CoreLoopEndPt: 2.99 + return; 2.100 + }
3.1 --- a/MasterLoop.c Sat Jul 24 08:58:47 2010 -0700 3.2 +++ b/MasterLoop.c Mon Jul 26 15:25:53 2010 -0700 3.3 @@ -6,7 +6,6 @@ 3.4 3.5 3.6 3.7 -#include <windows.h> 3.8 #include <stdio.h> 3.9 #include <malloc.h> 3.10 #include <stddef.h> 3.11 @@ -48,11 +47,11 @@ 3.12 */ 3.13 void masterLoop( void *initData, VirtProcr *masterPr ) 3.14 { 3.15 - int slotIdx, numFilled, numInFirstChunk, filledSlotIdx; 3.16 + int slotIdx, numFilled, filledSlotIdx, masterHasBeenQueued; 3.17 VirtProcr *schedVirtPr; 3.18 SchedSlot *currSlot, **schedSlots, **filledSlots; 3.19 MasterEnv *masterEnv; 3.20 - CASQueueStruc *workQ; 3.21 + VMSQueueStruc *workQ; 3.22 void *jmpPt, *stackPtrAddr, *framePtrAddr, *stillRunningAddr; 3.23 void *coreLoopFramePtr, *coreLoopStackPtr, *semanticEnv; 3.24 3.25 @@ -65,7 +64,26 @@ 3.26 // of setup code.. 3.27 masterPr->nextInstrPt = &&masterLoopStartPt; 3.28 3.29 - 3.30 + //The second time MasterVP comes out of queue, the first animation of 3.31 + // it hasn't written the stackPtr and framePtr yet -- but the second 3.32 + // animation has already had its stackPtr and framePtr set to the old 3.33 + // value by the coreLoop. Fix this by writing the correct stack and 3.34 + // frame pointers here, at which point they're correct in the first 3.35 + // animation of MasterVP. 3.36 + //TODO: remove writing stackPtr and framePtr at the bottom, for eff 3.37 + stackPtrAddr = &(masterPr->stackPtr); 3.38 + framePtrAddr = &(masterPr->framePtr); 3.39 + 3.40 + asm volatile("movl %0, %%eax; \ 3.41 + movl %%esp, (%%eax); \ 3.42 + movl %1, %%eax; \ 3.43 + movl %%ebp, (%%eax); " 3.44 + /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr) \ 3.45 + /* inputs */ : \ 3.46 + /* clobber */ : "memory", "%eax", "%ebx" \ 3.47 + ); 3.48 + 3.49 + 3.50 masterLoopStartPt: 3.51 3.52 //if another reference to same Master VirtProcr still going, busy-wait 3.53 @@ -88,10 +106,11 @@ 3.54 semanticEnv = masterEnv->semanticEnv; 3.55 3.56 //prepare for scheduling 3.57 - masterEnv->numFilled = 0; 3.58 + numFilled = 0; 3.59 + masterHasBeenQueued = FALSE; 3.60 3.61 //Poll each slot's Done flag -- slot 0 reserved for master, start at 1 3.62 - for( slotIdx = 1; slotIdx < NUM_SCHED_SLOTS; slotIdx++) 3.63 + for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++) 3.64 { 3.65 currSlot = schedSlots[ slotIdx ]; 3.66 3.67 @@ -110,50 +129,63 @@ 3.68 3.69 if( schedVirtPr != NULL ) 3.70 { currSlot->procrAssignedToSlot = schedVirtPr; 3.71 - schedVirtPr->schedSlot = currSlot; 3.72 + schedVirtPr->schedSlot = currSlot; 3.73 + currSlot->needsProcrAssigned = FALSE; 3.74 3.75 - filledSlots[ masterEnv->numFilled ] = currSlot; 3.76 - masterEnv->numFilled += 1; 3.77 + filledSlots[ numFilled ] = currSlot; 3.78 + numFilled += 1; 3.79 3.80 - currSlot->needsProcrAssigned = FALSE; 3.81 + writeVMSQ( schedVirtPr, workQ ); 3.82 + if( numFilled == masterEnv->numToPrecede ) 3.83 + { 3.84 + writeVMSQ( masterEnv->masterVirtPr, workQ ); 3.85 + masterHasBeenQueued = TRUE; 3.86 + } 3.87 + 3.88 } 3.89 } 3.90 } 3.91 3.92 + if( !masterHasBeenQueued ) 3.93 + { 3.94 + writeVMSQ( masterEnv->masterVirtPr, workQ ); 3.95 + } 3.96 + 3.97 + //Adjust the number to precede, for next round -- assume rate of 3.98 + // finishing work is stable -- which is a bad assumption! But, just 3.99 + // want something working for the moment, look at dynamic behavior 3.100 + // later 3.101 +//TODO: look at dynamic behavior -- time-average numToPrecede or something 3.102 + if( numFilled < NUM_CORES - 1 ) 3.103 + { 3.104 + masterEnv->numToPrecede = 0; 3.105 + } 3.106 + else 3.107 + { masterEnv->numToPrecede = numFilled - NUM_CORES + 1; 3.108 + } 3.109 +/* 3.110 //put some scheduled slaves in, then Master continuation, then rest 3.111 //Adjust position of master such that it maintains close to a fixed 3.112 // ratio --> make NUM_CORES - 1 slots or fewer come after the master 3.113 - numFilled = masterEnv->numFilled; 3.114 - 3.115 - int numPrecede = numFilled; 3.116 - int numFollow = NUM_CORES - 1; 3.117 - 3.118 - if( numFilled < numFollow ) 3.119 - { numFollow = numFilled; 3.120 - numPrecede = 0; 3.121 - } 3.122 - else 3.123 - { numPrecede -= numFollow; 3.124 - } 3.125 - 3.126 + 3.127 for( filledSlotIdx = 0; filledSlotIdx < numPrecede; filledSlotIdx++) 3.128 { 3.129 - writeCASQ( filledSlots[ filledSlotIdx ]->procrAssignedToSlot, workQ ); 3.130 + writeVMSQ( filledSlots[ filledSlotIdx ]->procrAssignedToSlot, workQ ); 3.131 } 3.132 3.133 //enqueue continuation of this loop 3.134 // note that After this enqueue, continuation might sneak through 3.135 - writeCASQ( masterEnv->masterVirtPr, workQ ); 3.136 + writeVMSQ( masterEnv->masterVirtPr, workQ ); 3.137 3.138 for( filledSlotIdx = numPrecede; 3.139 filledSlotIdx < numFilled; 3.140 filledSlotIdx++) 3.141 { 3.142 - writeCASQ( filledSlots[ filledSlotIdx ]->procrAssignedToSlot, workQ ); 3.143 + writeVMSQ( filledSlots[ filledSlotIdx ]->procrAssignedToSlot, workQ ); 3.144 } 3.145 3.146 masterEnv->numFilled = 0; 3.147 - 3.148 +*/ 3.149 3.150 //Save stack ptr and frame -- don't need to, take out later, but safe 3.151 // Also, wait to set stillRunning to FALSE until just before jump, to
4.1 --- a/VMS.c Sat Jul 24 08:58:47 2010 -0700 4.2 +++ b/VMS.c Mon Jul 26 15:25:53 2010 -0700 4.3 @@ -12,6 +12,8 @@ 4.4 #include "Queue_impl/BlockingQueue.h" 4.5 4.6 4.7 +#define thdAttrs NULL 4.8 + 4.9 //=========================================================================== 4.10 void 4.11 shutdownFn( void *dummy, VirtProcr *dummy2 ); 4.12 @@ -19,6 +21,9 @@ 4.13 void 4.14 create_sched_slots( MasterEnv *masterEnv ); 4.15 4.16 +pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER; 4.17 +pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER; 4.18 + 4.19 //=========================================================================== 4.20 4.21 /*Setup has two phases: 4.22 @@ -52,10 +57,10 @@ 4.23 void 4.24 VMS__init() 4.25 { MasterEnv *masterEnv; 4.26 - CASQueueStruc *workQ; 4.27 + VMSQueueStruc *workQ; 4.28 4.29 //Make the central work-queue 4.30 - _VMSWorkQ = makeCASQ(); 4.31 + _VMSWorkQ = makeVMSQ(); 4.32 workQ = _VMSWorkQ; 4.33 4.34 _VMSMasterEnv = malloc( sizeof(MasterEnv) ); 4.35 @@ -66,33 +71,25 @@ 4.36 4.37 create_sched_slots( masterEnv ); 4.38 4.39 - //Set slot 0 to be the master virt procr & set flags just in case 4.40 - masterEnv->schedSlots[0]->needsProcrAssigned = FALSE; //says don't touch 4.41 - masterEnv->schedSlots[0]->workIsDone = FALSE; //says don't touch 4.42 - masterEnv->schedSlots[0]->procrAssignedToSlot = masterEnv->masterVirtPr; 4.43 - masterEnv->masterVirtPr->schedSlot = masterEnv->schedSlots[0]; 4.44 masterEnv->stillRunning = FALSE; 4.45 + masterEnv->numToPrecede = NUM_CORES; 4.46 4.47 //First core loop to start up gets this, which will schedule seed Pr 4.48 //TODO: debug: check address of masterVirtPr 4.49 - writeCASQ( masterEnv->masterVirtPr, workQ ); 4.50 + writeVMSQ( masterEnv->masterVirtPr, workQ ); 4.51 4.52 numProcrsCreated = 1; 4.53 4.54 //======================================================================== 4.55 // Create the Threads 4.56 int coreIdx, retCode; 4.57 - #define thdAttrs NULL 4.58 - 4.59 - _VMSMasterEnv->setupComplete = 0; 4.60 - _VMSMasterEnv->suspend_mutex = PTHREAD_MUTEX_INITIALIZER; 4.61 - _VMSMasterEnv->suspend_cond = PTHREAD_COND_INITIALIZER; 4.62 - 4.63 + 4.64 //Need the threads to be created suspended, and wait for a signal 4.65 // before proceeding -- gives time after creating to initialize other 4.66 // stuff before the coreLoops set off. 4.67 - 4.68 - //Make params given to the win threads that animate the core loops 4.69 + _VMSMasterEnv->setupComplete = 0; 4.70 + 4.71 + //Make the threads that animate the core loops 4.72 for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 4.73 { coreLoopThdParams[coreIdx] = malloc( sizeof(ThdParams) ); 4.74 coreLoopThdParams[coreIdx]->coreNum = coreIdx; 4.75 @@ -102,10 +99,8 @@ 4.76 thdAttrs, 4.77 &coreLoop, 4.78 (void *)(coreLoopThdParams[coreIdx]) ); 4.79 - if(!retCode){printf("ERROR creating thread: %d\n", retCode); exit();} 4.80 + if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(0);} 4.81 } 4.82 - 4.83 - 4.84 } 4.85 4.86 void 4.87 @@ -148,10 +143,10 @@ 4.88 //get lock, to lock out any threads still starting up -- they'll see 4.89 // that setupComplete is true before entering while loop, and so never 4.90 // wait on the condition 4.91 - pthread_mutex_lock( _VMSMasterEnv->suspend_mutex ); 4.92 + pthread_mutex_lock( &suspendLock ); 4.93 _VMSMasterEnv->setupComplete = 1; 4.94 - pthread_mutex_unlock( _VMSMasterEnv->suspend_mutex ); 4.95 - pthread_cond_broadcast( _VMSMasterEnv->suspend_cond ); 4.96 + pthread_mutex_unlock( &suspendLock ); 4.97 + pthread_cond_broadcast( &suspend_cond ); 4.98 4.99 4.100 //wait for all to complete 4.101 @@ -200,6 +195,8 @@ 4.102 // for 2 params + return addr. Return addr (NULL) is in loc pointed to 4.103 // by stackPtr, initData at stackPtr + 4 bytes, animatingPr just above 4.104 stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); 4.105 + if(stackLocs == 0) 4.106 + {perror("malloc stack"); exit(1);} 4.107 newPr->startOfStack = stackLocs; 4.108 stackPtr = ( (char *)stackLocs + VIRT_PROCR_STACK_SIZE - 0x10 ); 4.109 //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp 4.110 @@ -212,7 +209,7 @@ 4.111 } 4.112 4.113 4.114 - /*there is a label inside this function -- save the addr of this label in 4.115 +/*there is a label inside this function -- save the addr of this label in 4.116 * the callingPr struc, as the pick-up point from which to start the next 4.117 * work-unit for that procr. If turns out have to save registers, then 4.118 * save them in the procr struc too. Then do assembly jump to the CoreLoop's 4.119 @@ -238,25 +235,32 @@ 4.120 4.121 stackPtrAddr = &(callingPr->stackPtr); 4.122 framePtrAddr = &(callingPr->framePtr); 4.123 - 4.124 + 4.125 jmpPt = callingPr->coreLoopStartPt; 4.126 coreLoopFramePtr = callingPr->coreLoopFramePtr;//need this only 4.127 coreLoopStackPtr = callingPr->coreLoopStackPtr;//shouldn't need -- safety 4.128 4.129 - //Save the virt procr's stack and frame ptrs, restore coreloop's frame 4.130 - // ptr, then jump back to "start" of core loop 4.131 - //Note, GCC compiles to assembly that saves esp and ebp in the stack 4.132 - // frame -- so have to explicitly do assembly that saves to memory 4.133 + //Eclipse's compilation sequence complains -- so break into two 4.134 + // separate in-line assembly pieces 4.135 + //Save the virt procr's stack and frame ptrs, 4.136 asm volatile("movl %0, %%eax; \ 4.137 movl %%esp, (%%eax); \ 4.138 movl %1, %%eax; \ 4.139 - movl %%ebp, (%%eax); \ 4.140 - movl %2, %%eax; \ 4.141 - movl %3, %%esp; \ 4.142 - movl %4, %%ebp; \ 4.143 + movl %%ebp, (%%eax) "\ 4.144 + /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr) \ 4.145 + /* inputs */ : \ 4.146 + /* clobber */ : "%eax" \ 4.147 + ); 4.148 + 4.149 + //restore coreloop's frame ptr, then jump back to "start" of core loop 4.150 + //Note, GCC compiles to assembly that saves esp and ebp in the stack 4.151 + // frame -- so have to explicitly do assembly that saves to memory 4.152 + asm volatile("movl %0, %%eax; \ 4.153 + movl %1, %%esp; \ 4.154 + movl %2, %%ebp; \ 4.155 jmp %%eax " \ 4.156 - /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr) \ 4.157 - /* inputs */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\ 4.158 + /* outputs */ : \ 4.159 + /* inputs */ : "m" (jmpPt), "m"(coreLoopStackPtr), "m"(coreLoopFramePtr)\ 4.160 /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi" \ 4.161 ); //list everything as clobbered to force GCC to save all 4.162 // live vars that are in regs on stack before this 4.163 @@ -356,6 +360,8 @@ 4.164 //TODO: add a semantic-layer supplied "freer" for the semantic-data portion 4.165 // of a request -- IE call with both a virt procr and a fn-ptr to request 4.166 // freer (also maybe put sem request freer as a field in virt procr?) 4.167 +//VMSHW relies right now on this only freeing VMS layer of request -- the 4.168 +// semantic portion of request is alloc'd and freed by request handler 4.169 void 4.170 VMS__free_request( VMSReqst *req ) 4.171 { 4.172 @@ -500,7 +506,7 @@ 4.173 shutdownFn( void *dummy, VirtProcr *animatingPr ) 4.174 { int coreIdx; 4.175 VirtProcr *shutDownPr; 4.176 - CASQueueStruc *workQ = _VMSWorkQ; 4.177 + VMSQueueStruc *workQ = _VMSWorkQ; 4.178 4.179 //free all the locations owned within the VMS system 4.180 //TODO: write VMS__malloc and free.. -- take the DKU malloc as starting pt 4.181 @@ -510,7 +516,7 @@ 4.182 { 4.183 shutDownPr = VMS__create_procr( NULL, NULL ); 4.184 shutDownPr->nextInstrPt = _VMSMasterEnv->coreLoopShutDownPt; 4.185 - writeCASQ( shutDownPr, workQ ); 4.186 + writeVMSQ( shutDownPr, workQ ); 4.187 } 4.188 4.189 //This is an issue: the animating processor of this function may not
5.1 --- a/VMS.h Sat Jul 24 08:58:47 2010 -0700 5.2 +++ b/VMS.h Mon Jul 26 15:25:53 2010 -0700 5.3 @@ -12,7 +12,7 @@ 5.4 5.5 #include "VMS_primitive_data_types.h" 5.6 #include "Queue_impl/BlockingQueue.h" 5.7 -#include "pthread.h" 5.8 +#include <pthread.h> 5.9 5.10 //This value is the number of hardware threads in the shared memory 5.11 // machine 5.12 @@ -22,14 +22,19 @@ 5.13 #define NUM_SCHED_SLOTS (2 * NUM_CORES + 1) 5.14 5.15 //128K stack.. compromise, want 10K virtPr 5.16 -#define VIRT_PROCR_STACK_SIZE 0x100000 5.17 +#define VIRT_PROCR_STACK_SIZE 0x10000 5.18 5.19 #define SUCCESS 0 5.20 5.21 +#define writeVMSQ writePThdQ 5.22 +#define readVMSQ readPThdQ 5.23 +#define makeVMSQ makePThdQ 5.24 +#define VMSQueueStruc PThdQueueStruc 5.25 + 5.26 //#define thdAttrs NULL //For PThreads 5.27 5.28 typedef struct _SchedSlot SchedSlot; 5.29 -typedef struct _VMSReqst VMSReqst; 5.30 +typedef struct _VMSReqst VMSReqst; 5.31 typedef struct _VirtProcr VirtProcr; 5.32 5.33 typedef VirtProcr * (*SlaveScheduler) ( void * ); //semEnv 5.34 @@ -103,9 +108,9 @@ 5.35 5.36 SchedSlot **schedSlots; 5.37 SchedSlot **filledSlots; 5.38 - int numFilled; 5.39 + int numToPrecede; 5.40 5.41 - int stillRunning; 5.42 + volatile int stillRunning; 5.43 5.44 VirtProcr *masterVirtPr; 5.45 5.46 @@ -114,9 +119,7 @@ 5.47 5.48 void *coreLoopShutDownPt; //addr to jump to to shut down a coreLoop 5.49 5.50 - int setupComplete; 5.51 - pthread_mutex_t suspend_mutex; 5.52 - pthread_cond_t suspend_cond; 5.53 + int setupComplete; 5.54 } 5.55 MasterEnv; 5.56 5.57 @@ -130,15 +133,17 @@ 5.58 //===================== Global Vars =================== 5.59 5.60 5.61 -pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state 5.62 -ThdParams *coreLoopThdParams[ NUM_CORES ]; 5.63 +pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state 5.64 +ThdParams *coreLoopThdParams [ NUM_CORES ]; 5.65 +pthread_mutex_t suspendLock; 5.66 +pthread_cond_t suspend_cond; 5.67 5.68 volatile MasterEnv *_VMSMasterEnv; 5.69 5.70 //workQ is global, static, and volatile so that core loop has its location 5.71 // hard coded, and reloads every time through the loop -- that way don't 5.72 // need to save any regs used by core loop 5.73 -volatile CASQueueStruc *_VMSWorkQ; 5.74 +volatile VMSQueueStruc *_VMSWorkQ; 5.75 5.76 //========================== 5.77 void
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/VMS_Seq.c Mon Jul 26 15:25:53 2010 -0700 6.3 @@ -0,0 +1,409 @@ 6.4 +/* 6.5 + * Copyright 2010 OpenSourceCodeStewardshipFoundation 6.6 + * 6.7 + * Licensed under BSD 6.8 + */ 6.9 + 6.10 +#include <stdio.h> 6.11 +#include <stdlib.h> 6.12 +#include <malloc.h> 6.13 + 6.14 +#include "VMS.h" 6.15 +#include "Queue_impl/BlockingQueue.h" 6.16 + 6.17 + 6.18 +#define thdAttrs NULL 6.19 + 6.20 +//=========================================================================== 6.21 +void 6.22 +shutdownFnSeq( void *dummy, VirtProcr *dummy2 ); 6.23 + 6.24 +void 6.25 +create_sched_slots( MasterEnv *masterEnv ); 6.26 + 6.27 +//=========================================================================== 6.28 + 6.29 +/*Setup has two phases: 6.30 + * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts 6.31 + * the master virt procr into the work-queue, ready for first "call" 6.32 + * 2) Semantic layer then does its own init, which creates the seed virt 6.33 + * procr inside the semantic layer, ready to schedule it when 6.34 + * asked by the first run of the masterLoop. 6.35 + * 6.36 + *This part is bit weird because VMS really wants to be "always there", and 6.37 + * have applications attach and detach.. for now, this VMS is part of 6.38 + * the app, so the VMS system starts up as part of running the app. 6.39 + * 6.40 + *The semantic layer is isolated from the VMS internals by making the 6.41 + * semantic layer do setup to a state that it's ready with its 6.42 + * initial virt procrs, ready to schedule them to slots when the masterLoop 6.43 + * asks. Without this pattern, the semantic layer's setup would 6.44 + * have to modify slots directly to assign the initial virt-procrs, and put 6.45 + * them into the workQ itself, breaking the isolation completely. 6.46 + * 6.47 + * 6.48 + *The semantic layer creates the initial virt procr(s), and adds its 6.49 + * own environment to masterEnv, and fills in the pointers to 6.50 + * the requestHandler and slaveScheduler plug-in functions 6.51 + */ 6.52 + 6.53 +/*This allocates VMS data structures, populates the master VMSProc, 6.54 + * and master environment, and returns the master environment to the semantic 6.55 + * layer. 6.56 + */ 6.57 +void 6.58 +VMS__init_Seq() 6.59 + { MasterEnv *masterEnv; 6.60 + VMSQueueStruc *workQ; 6.61 + 6.62 + //Make the central work-queue 6.63 + _VMSWorkQ = makeVMSQ(); 6.64 + workQ = _VMSWorkQ; 6.65 + 6.66 + _VMSMasterEnv = malloc( sizeof(MasterEnv) ); 6.67 + masterEnv = _VMSMasterEnv; 6.68 + 6.69 + //create the master virtual processor 6.70 + masterEnv->masterVirtPr = VMS__create_procr( &masterLoop, masterEnv ); 6.71 + 6.72 + create_sched_slots( masterEnv ); 6.73 + 6.74 + masterEnv->stillRunning = FALSE; 6.75 + masterEnv->numToPrecede = NUM_CORES; 6.76 + 6.77 + //First core loop to start up gets this, which will schedule seed Pr 6.78 + //TODO: debug: check address of masterVirtPr 6.79 + writeVMSQ( masterEnv->masterVirtPr, workQ ); 6.80 + 6.81 + numProcrsCreated = 1; 6.82 + 6.83 + //======================================================================== 6.84 + // Create the Threads 6.85 + 6.86 + } 6.87 + 6.88 + 6.89 +/*Semantic layer calls this when it want the system to start running.. 6.90 + * 6.91 + *This starts the core loops running then waits for them to exit. 6.92 + */ 6.93 +void 6.94 +VMS__start_the_work_then_wait_until_done_Seq() 6.95 + { int coreIdx; 6.96 + //Start the core loops running 6.97 +//=========================================================================== 6.98 + TSCount startCount, endCount; 6.99 + unsigned long long count = 0, freq = 0; 6.100 + double runTime; 6.101 + 6.102 + startCount = getTSCount(); 6.103 + 6.104 + //Instead of un-suspending threads, just call the one and only 6.105 + // core loop, in the main thread. 6.106 + coreLoop_Seq( NULL ); 6.107 + 6.108 + //NOTE: do not clean up VMS env here -- semantic layer has to have 6.109 + // a chance to clean up its environment first, then do a call to free 6.110 + // the Master env and rest of VMS locations 6.111 + 6.112 + 6.113 + endCount = getTSCount(); 6.114 + count = endCount - startCount; 6.115 + 6.116 + runTime = (double)count / (double)TSCOUNT_FREQ; 6.117 + 6.118 + printf("\n Time startup to shutdown: %f\n", runTime); fflush( stdin ); 6.119 + } 6.120 + 6.121 + 6.122 + 6.123 + 6.124 + 6.125 +/*This is equivalent to "jump back to core loop" -- it's mainly only used 6.126 + * just after adding dissipate request to a processor -- so the semantic 6.127 + * layer is the only place it will be seen and/or used. 6.128 + * 6.129 + *It does almost the same thing as suspend, except don't need to save the 6.130 + * stack nor set the nextInstrPt 6.131 + * 6.132 + *As of June 30, 2010 just implementing as a call to suspend -- just sugar 6.133 + */ 6.134 +void 6.135 +VMS__return_from_fn( VirtProcr *animatingPr ) 6.136 + { 6.137 + VMS__suspend_procr( animatingPr ); 6.138 + } 6.139 + 6.140 + 6.141 +/*Not sure yet the form going to put "dissipate" in, so this is the third 6.142 + * possibility -- the semantic layer can just make a macro that looks like 6.143 + * a call to its name, then expands to a call to this. 6.144 + * 6.145 + *As of June 30, 2010 this looks like the top choice.. 6.146 + * 6.147 + *This adds a request to dissipate, then suspends the processor so that the 6.148 + * request handler will receive the request. The request handler is what 6.149 + * does the work of freeing memory and removing the processor from the 6.150 + * semantic environment's data structures. 6.151 + *The request handler also is what figures out when to shutdown the VMS 6.152 + * system -- which causes all the core loop threads to die, and returns from 6.153 + * the call that started up VMS to perform the work. 6.154 + * 6.155 + *This form is a bit misleading to understand if one is trying to figure out 6.156 + * how VMS works -- it looks like a normal function call, but inside it 6.157 + * sends a request to the request handler and suspends the processor, which 6.158 + * jumps out of the VMS__dissipate_procr function, and out of all nestings 6.159 + * above it, transferring the work of dissipating to the request handler, 6.160 + * which then does the actual work -- causing the processor that animated 6.161 + * the call of this function to disappear and the "hanging" state of this 6.162 + * function to just poof into thin air -- the virtual processor's trace 6.163 + * never returns from this call, but instead the virtual processor's trace 6.164 + * gets suspended in this call and all the virt processor's state disap- 6.165 + * pears -- making that suspend the last thing in the virt procr's trace. 6.166 + */ 6.167 +void 6.168 +VMS__dissipate_procr( VirtProcr *procrToDissipate ) 6.169 + { VMSReqst *req; 6.170 + 6.171 + req = malloc( sizeof(VMSReqst) ); 6.172 +// req->virtProcrFrom = callingPr; 6.173 + req->reqType = dissipate; 6.174 + req->nextReqst = procrToDissipate->requests; 6.175 + procrToDissipate->requests = req; 6.176 + 6.177 + VMS__suspend_procr( procrToDissipate ); 6.178 +} 6.179 + 6.180 + 6.181 +/*This inserts the semantic-layer's request data into standard VMS carrier 6.182 + */ 6.183 +inline void 6.184 +VMS__add_sem_request( void *semReqData, VirtProcr *callingPr ) 6.185 + { VMSReqst *req; 6.186 + 6.187 + req = malloc( sizeof(VMSReqst) ); 6.188 +// req->virtProcrFrom = callingPr; 6.189 + req->reqType = semantic; 6.190 + req->semReqData = semReqData; 6.191 + req->nextReqst = callingPr->requests; 6.192 + callingPr->requests = req; 6.193 + } 6.194 + 6.195 + 6.196 + 6.197 +//TODO: add a semantic-layer supplied "freer" for the semantic-data portion 6.198 +// of a request -- IE call with both a virt procr and a fn-ptr to request 6.199 +// freer (or maybe put request freer as a field in virt procr?) 6.200 +void 6.201 +VMS__remove_and_free_top_request( VirtProcr *procrWithReq ) 6.202 + { VMSReqst *req; 6.203 + 6.204 + req = procrWithReq->requests; 6.205 + procrWithReq->requests = procrWithReq->requests->nextReqst; 6.206 + free( req ); 6.207 + } 6.208 + 6.209 + 6.210 +//TODO: add a semantic-layer supplied "freer" for the semantic-data portion 6.211 +// of a request -- IE call with both a virt procr and a fn-ptr to request 6.212 +// freer (also maybe put sem request freer as a field in virt procr?) 6.213 +//VMSHW relies right now on this only freeing VMS layer of request -- the 6.214 +// semantic portion of request is alloc'd and freed by request handler 6.215 +void 6.216 +VMS__free_request( VMSReqst *req ) 6.217 + { 6.218 + free( req ); 6.219 + } 6.220 + 6.221 +VMSReqst * 6.222 +VMS__take_top_request_from( VirtProcr *procrWithReq ) 6.223 + { VMSReqst *req; 6.224 + 6.225 + req = procrWithReq->requests; 6.226 + if( req == NULL ) return req; 6.227 + 6.228 + procrWithReq->requests = procrWithReq->requests->nextReqst; 6.229 + return req; 6.230 + } 6.231 + 6.232 +inline int 6.233 +VMS__isSemanticReqst( VMSReqst *req ) 6.234 + { 6.235 + return ( req->reqType == semantic ); 6.236 + } 6.237 + 6.238 + 6.239 +inline void * 6.240 +VMS__take_sem_reqst_from( VMSReqst *req ) 6.241 + { 6.242 + return req->semReqData; 6.243 + } 6.244 + 6.245 +inline int 6.246 +VMS__isDissipateReqst( VMSReqst *req ) 6.247 + { 6.248 + return ( req->reqType == dissipate ); 6.249 + } 6.250 + 6.251 +inline int 6.252 +VMS__isCreateReqst( VMSReqst *req ) 6.253 + { 6.254 + return ( req->reqType == regCreated ); 6.255 + } 6.256 + 6.257 +void 6.258 +VMS__send_register_new_procr_request(VirtProcr *newPr, VirtProcr *reqstingPr) 6.259 + { VMSReqst *req; 6.260 + 6.261 + req = malloc( sizeof(VMSReqst) ); 6.262 + req->reqType = regCreated; 6.263 + req->semReqData = newPr; 6.264 + req->nextReqst = reqstingPr->requests; 6.265 + reqstingPr->requests = req; 6.266 + 6.267 + VMS__suspend_procr( reqstingPr ); 6.268 + } 6.269 + 6.270 + 6.271 +/*The semantic layer figures out when the work is done ( perhaps by a call 6.272 + * in the application to "work all done", or perhaps all the virtual 6.273 + * processors have dissipated.. a.s.o. ) 6.274 + * 6.275 + *The semantic layer is responsible for making sure all work has fully 6.276 + * completed before using this to shutdown the VMS system. 6.277 + * 6.278 + *After the semantic layer has determined it wants to shut down, the 6.279 + * next time the Master Loop calls the scheduler plug-in, the scheduler 6.280 + * then calls this function and returns the virtual processor it gets back. 6.281 + * 6.282 + *When the shut-down processor runs, it first frees all locations malloc'd to 6.283 + * the VMS system (that wasn't 6.284 + * specified as return-locations). Then it creates one core-loop shut-down 6.285 + * processor for each core loop and puts them all into the workQ. When a 6.286 + * core loop animates a core loop shut-down processor, it causes exit-thread 6.287 + * to run, and when all core loop threads have exited, then the "wait for 6.288 + * work to finish" in the main thread is woken, and the function-call that 6.289 + * started all the work returns. 6.290 + * 6.291 + *The function animated by this processor performs the shut-down work. 6.292 + */ 6.293 +VirtProcr * 6.294 +VMS__create_the_shutdown_procr() 6.295 + { 6.296 + return VMS__create_procr( &shutdownFn, NULL ); 6.297 + } 6.298 + 6.299 + 6.300 +/*This must be called by the request handler plugin -- it cannot be called 6.301 + * from the semantic library "dissipate processor" function -- instead, the 6.302 + * semantic layer has to generate a request for the plug-in to call this 6.303 + * function. 6.304 + *The reason is that this frees the virtual processor's stack -- which is 6.305 + * still in use inside semantic library calls! 6.306 + * 6.307 + *This frees or recycles all the state owned by and comprising the VMS 6.308 + * portion of the animating virtual procr. The request handler must first 6.309 + * free any semantic data created for the processor that didn't use the 6.310 + * VMS_malloc mechanism. Then it calls this, which first asks the malloc 6.311 + * system to disown any state that did use VMS_malloc, and then frees the 6.312 + * statck and the processor-struct itself. 6.313 + *If the dissipated processor is the sole (remaining) owner of VMS__malloc'd 6.314 + * state, then that state gets freed (or sent to recycling) as a side-effect 6.315 + * of dis-owning it. 6.316 + */ 6.317 +void 6.318 +VMS__free_procr_locs( VirtProcr *animatingPr ) 6.319 + { 6.320 + //dis-own all locations owned by this processor, causing to be freed 6.321 + // any locations that it is (was) sole owner of 6.322 + //TODO: implement VMS__malloc system, including "give up ownership" 6.323 + 6.324 + //The dissipate request might still be attached, so remove and free it 6.325 + VMS__remove_and_free_top_request( animatingPr ); 6.326 + free( animatingPr->startOfStack ); 6.327 + 6.328 + //NOTE: initialData was given to the processor, so should either have 6.329 + // been alloc'd with VMS__malloc, or freed by the level above animPr. 6.330 + //So, all that's left to free here is the stack and the VirtProcr struc 6.331 + // itself 6.332 + free( animatingPr->startOfStack ); 6.333 + free( animatingPr ); 6.334 + } 6.335 + 6.336 + 6.337 + 6.338 +/*This is the function run by the special "shut-down" processor 6.339 + * 6.340 + *The _VMSMasterEnv is needed by this shut down function, so the "wait" 6.341 + * function run in the main loop has to free it, and the thread-related 6.342 + * locations (coreLoopThdParams a.s.o.). 6.343 + *However, the semantic environment and all data malloc'd to VMS can be 6.344 + * freed here. 6.345 + * 6.346 + *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the 6.347 + * locations it needs -- they will be automatically freed by the standard 6.348 + * "free all owned locations" 6.349 + * 6.350 + *Free any locations malloc'd to the VMS system (that weren't 6.351 + * specified as return-locations). 6.352 + *Then create one core-loop shut-down processor for each core loop and puts 6.353 + * them all into the workQ. 6.354 + */ 6.355 +void 6.356 +shutdownFn( void *dummy, VirtProcr *animatingPr ) 6.357 + { int coreIdx; 6.358 + VirtProcr *shutDownPr; 6.359 + VMSQueueStruc *workQ = _VMSWorkQ; 6.360 + 6.361 + //free all the locations owned within the VMS system 6.362 + //TODO: write VMS__malloc and free.. -- take the DKU malloc as starting pt 6.363 + 6.364 + //make the core loop shut-down processors and put them into the workQ 6.365 + for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 6.366 + { 6.367 + shutDownPr = VMS__create_procr( NULL, NULL ); 6.368 + shutDownPr->nextInstrPt = _VMSMasterEnv->coreLoopShutDownPt; 6.369 + writeVMSQ( shutDownPr, workQ ); 6.370 + } 6.371 + 6.372 + //This is an issue: the animating processor of this function may not 6.373 + // get its request handled before all the cores have shutdown. 6.374 + //TODO: after all the threads stop, clean out the MasterEnv, the 6.375 + // SemanticEnv, and the workQ before returning. 6.376 + VMS__dissipate_procr( animatingPr ); //will never come back from this 6.377 + } 6.378 + 6.379 + 6.380 +/*This has to free anything allocated during VMS_init, and any other alloc'd 6.381 + * locations that might be left over. 6.382 + */ 6.383 +void 6.384 +VMS__shutdown() 6.385 + { int i; 6.386 + 6.387 + free( _VMSWorkQ ); 6.388 + free( _VMSMasterEnv->filledSlots ); 6.389 + for( i = 0; i < NUM_SCHED_SLOTS; i++ ) 6.390 + { 6.391 + free( _VMSMasterEnv->schedSlots[i] ); 6.392 + } 6.393 + 6.394 + free( _VMSMasterEnv->schedSlots); 6.395 + VMS__free_procr_locs( _VMSMasterEnv->masterVirtPr ); 6.396 + 6.397 + free( _VMSMasterEnv ); 6.398 + } 6.399 + 6.400 + 6.401 +//=========================================================================== 6.402 + 6.403 +inline TSCount getTSCount() 6.404 + { unsigned int low, high; 6.405 + TSCount out; 6.406 + 6.407 + saveTimeStampCountInto( low, high ); 6.408 + out = high; 6.409 + out = (out << 32) + low; 6.410 + return out; 6.411 + } 6.412 +
