Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl

changeset 31:e69579a0e797 Pin2Core
Works multi-core.. pinned VP to a core loop
author: Me
date: Wed, 01 Sep 2010 08:23:39 -0700
parents: c8823e0bb2b4
children: 609c7222e4a6 17d20e5cf924
files: CoreLoop.c MasterLoop.c VMS.c VMS.h
diffstat: 4 files changed, 236 insertions(+), 191 deletions(-) [+]
[-]

CoreLoop.c 83

MasterLoop.c 147

VMS.c 150

VMS.h 47 CoreLoop.c 83 MasterLoop.c 147 VMS.c 150 VMS.h 47
CoreLoop.c 83
MasterLoop.c 147
     1.1 --- a/CoreLoop.c	Mon Aug 09 02:24:31 2010 -0700
     1.2 +++ b/CoreLoop.c	Wed Sep 01 08:23:39 2010 -0700
     1.3 @@ -30,14 +30,16 @@
     1.4   */
     1.5  void *
     1.6  coreLoop( void *paramsIn )
     1.7 - {   
     1.8 + { 
     1.9     ThdParams      *coreLoopThdParams;
    1.10 +   int             thisCoresIdx;
    1.11     VirtProcr      *currPr;
    1.12 -   VMSQueueStruc  *workQ;
    1.13 +   SRSWQueueStruc *readyToAnimateQ;
    1.14     unsigned long   coreMask;  //has 1 in bit positions of allowed cores
    1.15     int             errorCode;
    1.16     
    1.17     coreLoopThdParams = (ThdParams *)paramsIn;
    1.18 +   thisCoresIdx = coreLoopThdParams->coreNum;
    1.19  
    1.20        //wait until signalled that setup is complete
    1.21     pthread_mutex_lock(   &suspendLock );
    1.22 @@ -66,26 +68,57 @@
    1.23        //To get label addr in non-gcc compiler, can trick it by making a call
    1.24        // to a fn that does asm that pulls the "return"
    1.25        // addr off the stack and stores it in a pointed-to location.
    1.26 -   _VMSMasterEnv->coreLoopEndPt = &&CoreLoopEndPt;
    1.27 +   _VMSMasterEnv->coreLoopEndPt   = &&CoreLoopEndPt;
    1.28 +   _VMSMasterEnv->coreLoopStartPt = &&CoreLoopStartPt;
    1.29     
    1.30 -      //Core loop has no values live upon CoreLoopStartPt except workQ
    1.31 +      //Core loop has no values live upon CoreLoopStartPt except
    1.32 +      // _VMSMasterEnv
    1.33        // every value in the code is defined by a statement in core loop,
    1.34 -      // after the start point -- with the one exception of _VMSWorkQ
    1.35 +      // after the start point -- with the one exception of _VMSMasterEnv
    1.36   
    1.37     
    1.38        // Get to work!  --  virt procr jumps back here when suspends
    1.39        //Note, have to restore the frame-pointer before jump to here, to get
    1.40 -      // this code to work right (workQ and so forth are frame-ptr relative)
    1.41 +      // this code to work right (readyToAnimateQ and so forth are frame-ptr relative)
    1.42  CoreLoopStartPt:
    1.43     
    1.44        //Get virtual processor from queue
    1.45        //_VMSWorkQ must be a global, static volatile var, so not kept in reg,
    1.46        // which forces reloading the pointer after each jmp to this point
    1.47 -   workQ  = _VMSWorkQ;
    1.48 -   currPr = (VirtProcr *) readVMSQ( workQ );
    1.49 +   readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
    1.50  
    1.51 -   currPr->coreLoopStartPt = &&CoreLoopStartPt;  //to be sure -- chg for perf
    1.52 -   currPr->coreAnimatedBy  = coreLoopThdParams->coreNum;
    1.53 +   currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ );
    1.54 +   int tries = 0; int gotLock = 0;
    1.55 +   while( currPr == NULL )
    1.56 +    {    //no VPs ready to animate, so run MasterVP --later make "try Master"
    1.57 +         // VPs & put one in every queue at strategic point -- so have work
    1.58 +         // avail if don't get lock & short-circuit out of it if master has
    1.59 +         // recently run on another core
    1.60 +         //TODO: perf -- "try Master" VP that checks if should run Master Fn
    1.61 +         //But just letting queue run empty is quickest to see if pinning VP
    1.62 +         // to core will solve the bizarre random seg-faults in system stack.
    1.63 +
    1.64 +         //check if get the MasterLock
    1.65 +      gotLock = __sync_bool_compare_and_swap( &(_VMSMasterEnv->masterLock), \
    1.66 +                                                 UNLOCKED, LOCKED );
    1.67 +
    1.68 +      if( gotLock )
    1.69 +       {
    1.70 +            //run own MasterVP -- when its done, unlocks MasterLock and
    1.71 +            // jumps back to coreLoops's startPt
    1.72 +         currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
    1.73 +         break;  //end while -- have a VP to animate now
    1.74 +       }
    1.75 +         //Aug 24, 2010 -- changed so each core loop only gets work scheduled
    1.76 +         // by its own master, so now stay in loop until get lock
    1.77 +//      currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ );
    1.78 +      
    1.79 +      tries++;
    1.80 +//      if( tries % 10000 == 0 ) printf("empty tries: %d\n", tries/10000 );
    1.81 +      if( tries % READYTOANIMATE_RETRIES == 0 ) pthread_yield();
    1.82 +    }
    1.83 +   
    1.84 +//   currPr->coreAnimatedBy  = coreLoopThdParams->coreNum;
    1.85  
    1.86        //switch to virt procr's stack and frame ptr then jump to virt procr fn
    1.87     void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \
    1.88 @@ -139,36 +172,44 @@
    1.89  coreLoop_Seq( void *paramsIn )
    1.90   {
    1.91     VirtProcr      *currPr;
    1.92 -   VMSQueueStruc  *workQ;
    1.93 +   SRSWQueueStruc *readyToAnimateQ;
    1.94 +   
    1.95 +   ThdParams      *coreLoopThdParams;
    1.96 +   int             thisCoresIdx;
    1.97 +   
    1.98 +   coreLoopThdParams = (ThdParams *)paramsIn;
    1.99 +//   thisCoresIdx = coreLoopThdParams->coreNum;
   1.100 +   thisCoresIdx = 0;
   1.101  
   1.102  
   1.103        //Save addr of "end core loop" label - jump to it to shut down coreloop
   1.104        //To get label addr in non-gcc compiler, can trick it by making a call
   1.105        // to a fn that does asm that pulls the "return"
   1.106        // addr off the stack and stores it in a pointed-to location.
   1.107 -   _VMSMasterEnv->coreLoopEndPt = &&CoreLoopEndPt;
   1.108 +   _VMSMasterEnv->coreLoopStartPt = &&SeqCoreLoopStartPt;
   1.109 +   _VMSMasterEnv->coreLoopEndPt   = &&SeqCoreLoopEndPt;
   1.110  
   1.111 -      //Core loop has no values live upon CoreLoopStartPt except workQ
   1.112 +      //Core loop has no values live upon CoreLoopStartPt except readyToAnimateQ
   1.113        // every value in the code is defined by a statement in core loop,
   1.114        // after the start point -- with the one exception of _VMSWorkQ
   1.115  
   1.116  
   1.117        // Get to work!  --  virt procr jumps back here when done or suspends
   1.118        //Note, have to restore the frame-pointer before jump to here, to get
   1.119 -      // this code to work right (workQ and so forth are frame-ptr relative)
   1.120 -CoreLoopStartPt:
   1.121 +      // this code to work right (readyToAnimateQ and so forth are frame-ptr relative)
   1.122 +SeqCoreLoopStartPt:
   1.123  
   1.124        //Get virtual processor from queue
   1.125        //_VMSWorkQ must be a global, static volatile var, so not kept in reg,
   1.126        // which forces reloading the pointer after each jmp to this point
   1.127 -   workQ  = _VMSWorkQ;
   1.128 -   currPr = (VirtProcr *) readVMSQ( workQ );
   1.129 +   readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
   1.130 +   currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ );
   1.131 +   if( currPr == NULL )
   1.132 +      currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
   1.133 +   
   1.134  
   1.135  //   printf("core %d loop procr addr: %d\n", coreLoopThdParams->coreNum, \
   1.136  //       (int)currPr ); fflush(stdin);
   1.137 -   currPr->coreLoopStartPt = &&CoreLoopStartPt;  //to be sure.(GCC specific)
   1.138 -
   1.139 -//   currPr->coreAnimatedBy  = coreLoopThdParams->coreNum;
   1.140  
   1.141        //switch to virt procr's stack and frame ptr then jump to virt procr
   1.142     void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \
   1.143 @@ -210,7 +251,7 @@
   1.144        // to exit, and so the entry point function, which has been waiting for
   1.145        // all the threads to die will proceed, gather the result, and
   1.146        // return to the calling application.
   1.147 -   CoreLoopEndPt:
   1.148 +SeqCoreLoopEndPt:
   1.149     VMS__handle_dissipate_reqst( currPr ); //free shutdown pr, that jmpd here
   1.150     return;
   1.151   }

     2.1 --- a/MasterLoop.c	Mon Aug 09 02:24:31 2010 -0700
     2.2 +++ b/MasterLoop.c	Wed Sep 01 08:23:39 2010 -0700
     2.3 @@ -42,74 +42,77 @@
     2.4   * all the others -- then does any extra setup needed and puts it into the
     2.5   * work queue.
     2.6   *However means have to make masterEnv a global static volatile the same way
     2.7 - * did with workQ in core loop.  -- for performance, put the
     2.8 + * did with readyToAnimateQ in core loop.  -- for performance, put the
     2.9   * jump to the core loop directly in here, and have it directly jump back.
    2.10 + *
    2.11 + *
    2.12 + *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this
    2.13 + * avoids the suspected bug in the system stack that causes bizarre faults
    2.14 + * at random places in the system code.
    2.15 + *
    2.16 + *So, this function is coupled to each of the MasterVPs, -- meaning this
    2.17 + * function can't rely on a particular stack and frame -- each MasterVP that
    2.18 + * animates this function has a different one.
    2.19 + *
    2.20 + *At this point, the masterLoop does not write itself into the queue anymore,
    2.21 + * instead, the coreLoop acquires the masterLock when it has nothing to
    2.22 + * animate, and then animates its own masterLoop.  However, still try to put
    2.23 + * several AppVPs into the queue to amortize the startup cost of switching
    2.24 + * to the MasterVP.  Note, don't have to worry about latency of requests much
    2.25 + * because most requests generate work for same core -- only latency issue
    2.26 + * is case when other cores starved and one core's requests generate work
    2.27 + * for them -- so keep max in queue to 3 or 4..
    2.28   */
    2.29 -void masterLoop( void *initData, VirtProcr *masterPr )
    2.30 +void masterLoop( void *initData, VirtProcr *animatingPr )
    2.31   { 
    2.32 -   int             slotIdx, numFilled, filledSlotIdx, masterHasBeenQueued;
    2.33 +   int             slotIdx;
    2.34     VirtProcr      *schedVirtPr;
    2.35 -   SchedSlot      *currSlot, **schedSlots, **filledSlots;
    2.36 +   SchedSlot      *currSlot, **schedSlots;
    2.37     MasterEnv      *masterEnv;
    2.38 -   VMSQueueStruc  *workQ;
    2.39 -   void           *jmpPt, *stackPtrAddr, *framePtrAddr, *stillRunningAddr;
    2.40 -   void           *coreLoopFramePtr, *coreLoopStackPtr, *semanticEnv;
    2.41 +   VMSQueueStruc  *readyToAnimateQ;
    2.42     
    2.43     SlaveScheduler  slaveScheduler;
    2.44     RequestHandler  requestHandler;
    2.45 +   void           *semanticEnv;
    2.46  
    2.47 -      //this will run as the first virt processor in workQ, and will be a
    2.48 -      // new born -- so will do all the GCC-generated allocating space on
    2.49 -      // the stack owned by master virt procr -- and will run this last bit
    2.50 -      // of setup code..
    2.51 +   int             thisCoresIdx;
    2.52 +   VirtProcr      *masterPr;
    2.53 +   volatile        VirtProcr *volatileMasterPr;
    2.54 +   
    2.55 +   volatileMasterPr = animatingPr;
    2.56 +   masterPr         = volatileMasterPr; //used to force re-define after jmp
    2.57 +
    2.58 +      //First animation of each MasterVP will in turn animate this part
    2.59 +      // of setup code.. (VP creator sets up the stack as if this function
    2.60 +      // was called normally, but actually get here by jmp)
    2.61 +      //So, setup values about stack ptr, jmp pt and all that
    2.62     masterPr->nextInstrPt = &&masterLoopStartPt;
    2.63  
    2.64 -      //The second time MasterVP comes out of queue, the first animation of
    2.65 -      // it hasn't written the stackPtr and framePtr yet -- but the second
    2.66 -      // animation has already had its stackPtr and framePtr set to the old
    2.67 -      // value by the coreLoop.  Fix this by writing the correct stack and
    2.68 -      // frame pointers here, at which point they're correct in the first
    2.69 -      // animation of MasterVP.
    2.70 -      //TODO: remove writing stackPtr and framePtr at the bottom, for eff
    2.71 -   stackPtrAddr      = &(masterPr->stackPtr);
    2.72 -   framePtrAddr      = &(masterPr->framePtr);
    2.73  
    2.74 -   asm volatile("movl %0,     %%eax;  \
    2.75 -                 movl %%esp, (%%eax); \
    2.76 -                 movl %1,     %%eax;  \
    2.77 -                 movl %%ebp, (%%eax); "
    2.78 -   /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr)                 \
    2.79 -   /* inputs  */ :                                                          \
    2.80 -   /* clobber */ : "memory", "%eax", "%ebx"                                 \
    2.81 -                );
    2.82 +      //Note, got rid of writing the stack and frame ptr up here, because
    2.83 +      // only one
    2.84 +      // core can ever animate a given MasterVP, so don't need to communicate
    2.85 +      // new frame and stack ptr to the MasterVP storage before a second
    2.86 +      // version of that MasterVP can get animated on a different core.
    2.87 +      //Also got rid of the busy-wait.
    2.88  
    2.89 -
    2.90 +   
    2.91     masterLoopStartPt:
    2.92  
    2.93 -      //if another reference to same Master VirtProcr still going, busy-wait
    2.94 -      //Could put this lower, but don't want to think about shared stack..
    2.95 -   while( _VMSMasterEnv->stillRunning ) /*busy wait*/ ;
    2.96 -      //TODO: want to do busy-wait as assembly, to be sure stack not touched?
    2.97 +   masterEnv        = _VMSMasterEnv;
    2.98     
    2.99 -      //this is the only master running now, set flag again
   2.100 -   _VMSMasterEnv->stillRunning = TRUE;
   2.101 -   masterEnv = _VMSMasterEnv;
   2.102 +//TODO: check that compiles so that always re-define from frame-storage
   2.103 +   masterPr         = volatileMasterPr;  //just to make sure after jmp
   2.104 +   thisCoresIdx     = masterPr->coreAnimatedBy;
   2.105 +   readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
   2.106 +   schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
   2.107  
   2.108 -      //TODO: gdb -- check that a volatile _VMSMasterEnv and _VMSWorkQ means
   2.109 -      // all these will be re-filled every time jump here..
   2.110 -   workQ            = _VMSWorkQ;
   2.111     requestHandler   = masterEnv->requestHandler;
   2.112     slaveScheduler   = masterEnv->slaveScheduler;
   2.113 -   schedSlots       = masterEnv->schedSlots;
   2.114 -   filledSlots      = masterEnv->filledSlots;
   2.115 -   masterPr         = masterEnv->masterVirtPr;  //post-jmp clobbered, re-load
   2.116     semanticEnv      = masterEnv->semanticEnv;
   2.117  
   2.118 -      //prepare for scheduling
   2.119 -   numFilled = 0;
   2.120 -   masterHasBeenQueued = FALSE;
   2.121  
   2.122 -      //Poll each slot's Done flag -- slot 0 reserved for master, start at 1
   2.123 +      //Poll each slot's Done flag
   2.124     for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
   2.125      {
   2.126        currSlot = schedSlots[ slotIdx ];
   2.127 @@ -125,63 +128,31 @@
   2.128        if( currSlot->needsProcrAssigned )
   2.129         {    //give slot a new virt procr
   2.130           schedVirtPr =
   2.131 -          (*slaveScheduler)( semanticEnv );
   2.132 +          (*slaveScheduler)( semanticEnv, thisCoresIdx );
   2.133           
   2.134           if( schedVirtPr != NULL )
   2.135            { currSlot->procrAssignedToSlot = schedVirtPr;
   2.136              schedVirtPr->schedSlot        = currSlot;
   2.137              currSlot->needsProcrAssigned  = FALSE;
   2.138  
   2.139 -            filledSlots[ numFilled ]      = currSlot;
   2.140 -
   2.141 -            writeVMSQ( schedVirtPr, workQ );
   2.142 -            numFilled += 1;
   2.143 -            
   2.144 -            if( numFilled == masterEnv->numToPrecede )
   2.145 -             {
   2.146 -               writeVMSQ( masterEnv->masterVirtPr, workQ );
   2.147 -               masterHasBeenQueued = TRUE;
   2.148 -             }
   2.149 -
   2.150 +            writeSRSWQ( schedVirtPr, readyToAnimateQ );
   2.151            }
   2.152         }
   2.153      }
   2.154  
   2.155 -   if( !masterHasBeenQueued )
   2.156 -    {
   2.157 -      writeVMSQ( masterEnv->masterVirtPr, workQ );
   2.158 -    }
   2.159  
   2.160 -      //Adjust the number to precede, for next round -- assume rate of
   2.161 -      // finishing work is stable -- which is a bad assumption!  But, just
   2.162 -      // want something working for the moment, look at dynamic behavior
   2.163 -      // later
   2.164 -//TODO: look at dynamic behavior -- time-average numToPrecede or something
   2.165 -   if( numFilled < NUM_CORES - 1 )
   2.166 -    { 
   2.167 -      masterEnv->numToPrecede = 1;
   2.168 -    }
   2.169 -   else
   2.170 -    { masterEnv->numToPrecede = numFilled - NUM_CORES + 1;
   2.171 -    }
   2.172 -
   2.173 -      //Save stack ptr and frame -- don't need to, take out later, but safe
   2.174 -      // Also, wait to set stillRunning to FALSE until just before jump, to
   2.175 -      // be safe -- although the two simulatneously animated MasterLoops
   2.176 -      // are on different cores, so have different stacks, so no worries
   2.177 -      // there.
   2.178 -      //Restore CoreLoop's stack frame (and stack pointer, to be safe)
   2.179 +      //Save stack ptr and frame, restore CoreLoop's stack and frame,
   2.180 +      // and clear the MasterLock
   2.181        //TODO: cafefully verify don't need to force saving anything to stack
   2.182        // before jumping back to core loop.
   2.183 +   void           *stackPtrAddr, *framePtrAddr, *masterLockAddr;
   2.184 +   void           *jmpPt, *coreLoopFramePtr, *coreLoopStackPtr;
   2.185 +
   2.186     stackPtrAddr      = &(masterPr->stackPtr);
   2.187     framePtrAddr      = &(masterPr->framePtr);
   2.188 -   stillRunningAddr  = &(_VMSMasterEnv->stillRunning); //when race condition
   2.189 -      //arises, stillRunning is shared between the two cores both animating
   2.190 -      // MasterLoop -- but those two cores have different esp & ebp, so safe
   2.191 -      // to change stack and frame pointer here, without one messing up other
   2.192 -      // one
   2.193 +   masterLockAddr    = &(_VMSMasterEnv->masterLock);
   2.194  
   2.195 -   jmpPt             = masterPr->coreLoopStartPt;
   2.196 +   jmpPt             = _VMSMasterEnv->coreLoopStartPt;
   2.197     coreLoopFramePtr  = masterPr->coreLoopFramePtr;//need this only
   2.198     coreLoopStackPtr  = masterPr->coreLoopStackPtr;//shouldn't need -- safety
   2.199     
   2.200 @@ -196,7 +167,7 @@
   2.201                   movl $0x0, (%%ebx);  \
   2.202                   jmp  %%eax;"         \
   2.203     /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr),                \
   2.204 -                   "=g"(stillRunningAddr)                                   \
   2.205 +                   "=g"(masterLockAddr)                                   \
   2.206     /* inputs  */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\
   2.207     /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \
   2.208                  );//can probably make clobber list empty -- but safe for now

     3.1 --- a/VMS.c	Mon Aug 09 02:24:31 2010 -0700
     3.2 +++ b/VMS.c	Wed Sep 01 08:23:39 2010 -0700
     3.3 @@ -18,8 +18,8 @@
     3.4  void
     3.5  shutdownFn( void *dummy, VirtProcr *dummy2 );
     3.6  
     3.7 -void
     3.8 -create_sched_slots( MasterEnv *masterEnv );
     3.9 +SchedSlot **
    3.10 +create_sched_slots();
    3.11  
    3.12  void
    3.13  create_masterEnv();
    3.14 @@ -48,7 +48,7 @@
    3.15   * initial virt procrs, ready to schedule them to slots when the masterLoop
    3.16   * asks.  Without this pattern, the semantic layer's setup would
    3.17   * have to modify slots directly to assign the initial virt-procrs, and put
    3.18 - * them into the workQ itself, breaking the isolation completely.
    3.19 + * them into the readyToAnimateQ itself, breaking the isolation completely.
    3.20   *
    3.21   * 
    3.22   *The semantic layer creates the initial virt procr(s), and adds its
    3.23 @@ -77,29 +77,45 @@
    3.24  
    3.25  void
    3.26  create_masterEnv()
    3.27 - { MasterEnv  *masterEnv;
    3.28 -   VMSQueueStruc *workQ;
    3.29 -
    3.30 -      //Make the central work-queue
    3.31 -   _VMSWorkQ = makeVMSQ();
    3.32 -   workQ     = _VMSWorkQ;
    3.33 -
    3.34 + { MasterEnv       *masterEnv;
    3.35 +   SRSWQueueStruc **readyToAnimateQs;
    3.36 +   int              coreIdx;
    3.37 +   VirtProcr      **masterVPs;
    3.38 +   SchedSlot     ***allSchedSlots; //ptr to array of ptrs
    3.39 +   
    3.40 +      //Make the master env, which holds everything else
    3.41     _VMSMasterEnv = malloc( sizeof(MasterEnv) );
    3.42     masterEnv     = _VMSMasterEnv;
    3.43 +      //Need to set start pt here 'cause used by seed procr, which is created
    3.44 +      // before the first core loop starts up. -- not sure how yet..
    3.45 +//   masterEnv->coreLoopStartPt = ;
    3.46 +//   masterEnv->coreLoopEndPt   = ;
    3.47 +   
    3.48 +      //Make a readyToAnimateQ for each core loop
    3.49 +   readyToAnimateQs = malloc( NUM_CORES * sizeof(SRSWQueueStruc *) );
    3.50 +   masterVPs        = malloc( NUM_CORES * sizeof(VirtProcr *) );
    3.51  
    3.52 -      //create the master virtual processor
    3.53 -   masterEnv->masterVirtPr = VMS__create_procr( &masterLoop, masterEnv );
    3.54 +      //One array for each core, 3 in array, core's masterVP scheds all
    3.55 +   allSchedSlots    = malloc( NUM_CORES * sizeof(SchedSlot *) );
    3.56  
    3.57 -   create_sched_slots( masterEnv );
    3.58 +   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
    3.59 +    {
    3.60 +      readyToAnimateQs[ coreIdx ] = makeSRSWQ();
    3.61 +      
    3.62 +         //Q: should give masterVP core-specific into as its init data?
    3.63 +      masterVPs[ coreIdx ] = VMS__create_procr( &masterLoop, masterEnv );
    3.64 +      masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx;
    3.65 +      allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core
    3.66 +    }
    3.67 +   _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs;
    3.68 +   _VMSMasterEnv->masterVPs        = masterVPs;
    3.69 +   _VMSMasterEnv->allSchedSlots    = allSchedSlots;
    3.70  
    3.71 -   masterEnv->stillRunning = FALSE;
    3.72 -   masterEnv->numToPrecede = NUM_CORES;
    3.73  
    3.74 -      //First core loop to start up gets this, which will schedule seed Pr
    3.75 -      //TODO: debug: check address of masterVirtPr
    3.76 -   writeVMSQ( masterEnv->masterVirtPr, workQ );
    3.77  
    3.78 -   numProcrsCreated = 1;  //global counter for debugging
    3.79 +      //Aug 19, 2010:  no longer need to place initial masterVP into queue
    3.80 +      // because coreLoop now controls -- animates its masterVP when no work
    3.81 +
    3.82  
    3.83     //==================== malloc substitute ========================
    3.84     //
    3.85 @@ -143,15 +159,12 @@
    3.86   }
    3.87   */
    3.88  
    3.89 -void
    3.90 -create_sched_slots( MasterEnv *masterEnv )
    3.91 - { SchedSlot  **schedSlots, **filledSlots;
    3.92 +SchedSlot **
    3.93 +create_sched_slots()
    3.94 + { SchedSlot  **schedSlots;
    3.95     int i;
    3.96  
    3.97     schedSlots  = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
    3.98 -   filledSlots = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
    3.99 -   masterEnv->schedSlots  = schedSlots;
   3.100 -   masterEnv->filledSlots = filledSlots;
   3.101  
   3.102     for( i = 0; i < NUM_SCHED_SLOTS; i++ )
   3.103      {
   3.104 @@ -161,6 +174,18 @@
   3.105        schedSlots[i]->workIsDone         = FALSE;
   3.106        schedSlots[i]->needsProcrAssigned = TRUE;
   3.107      }
   3.108 +   return schedSlots;
   3.109 + }
   3.110 +
   3.111 +
   3.112 +void
   3.113 +freeSchedSlots( SchedSlot **schedSlots )
   3.114 + { int i;
   3.115 +   for( i = 0; i < NUM_SCHED_SLOTS; i++ )
   3.116 +    {
   3.117 +      free( schedSlots[i] );
   3.118 +    }
   3.119 +   free( schedSlots );
   3.120   }
   3.121  
   3.122  
   3.123 @@ -267,6 +292,8 @@
   3.124     newPr->procrID     = numProcrsCreated++;
   3.125     newPr->nextInstrPt = fnPtr;
   3.126     newPr->initialData = initialData;
   3.127 +   newPr->requests    = NULL;
   3.128 +//   newPr->coreLoopStartPt = _VMSMasterEnv->coreLoopStartPt;
   3.129  
   3.130        //fnPtr takes two params -- void *initData & void *animProcr
   3.131        //alloc stack locations, make stackPtr be the highest addr minus room
   3.132 @@ -314,7 +341,7 @@
   3.133     stackPtrAddr      = &(callingPr->stackPtr);
   3.134     framePtrAddr      = &(callingPr->framePtr);
   3.135  
   3.136 -   jmpPt             = callingPr->coreLoopStartPt;
   3.137 +   jmpPt             = _VMSMasterEnv->coreLoopStartPt;
   3.138     coreLoopFramePtr  = callingPr->coreLoopFramePtr;//need this only
   3.139     coreLoopStackPtr  = callingPr->coreLoopStackPtr;//shouldn't need -- safety
   3.140  
   3.141 @@ -350,21 +377,6 @@
   3.142  
   3.143  
   3.144  
   3.145 -/*This is equivalent to "jump back to core loop" -- it's mainly only used
   3.146 - * just after adding dissipate request to a processor -- so the semantic
   3.147 - * layer is the only place it will be seen and/or used.
   3.148 - *
   3.149 - *It does almost the same thing as suspend, except don't need to save the
   3.150 - * stack nor set the nextInstrPt
   3.151 - *
   3.152 - *As of June 30, 2010  just implementing as a call to suspend -- just sugar
   3.153 - */
   3.154 -void
   3.155 -VMS__return_from_fn( VirtProcr *animatingPr )
   3.156 - {
   3.157 -   VMS__suspend_procr( animatingPr );
   3.158 - }
   3.159 -
   3.160  
   3.161  /*Not sure yet the form going to put "dissipate" in, so this is the third
   3.162   * possibility -- the semantic layer can just make a macro that looks like
   3.163 @@ -439,7 +451,7 @@
   3.164  //TODO: add a semantic-layer supplied "freer" for the semantic-data portion
   3.165  // of a request -- IE call with both a virt procr and a fn-ptr to request
   3.166  // freer (also maybe put sem request freer as a field in virt procr?)
   3.167 -//VMSHW relies right now on this only freeing VMS layer of request -- the
   3.168 +//SSR relies right now on this only freeing VMS layer of request -- the
   3.169  // semantic portion of request is alloc'd and freed by request handler
   3.170  void
   3.171  VMS__free_request( VMSReqst *req )
   3.172 @@ -453,11 +465,23 @@
   3.173  
   3.174     req = procrWithReq->requests;
   3.175     if( req == NULL ) return req;
   3.176 -   
   3.177 +
   3.178     procrWithReq->requests = procrWithReq->requests->nextReqst;
   3.179     return req;
   3.180   }
   3.181  
   3.182 +VMSReqst *
   3.183 +VMS__free_top_and_give_next_request_from( VirtProcr *procrWithReq )
   3.184 + { VMSReqst *req;
   3.185 +
   3.186 +   req = procrWithReq->requests;
   3.187 +   if( req == NULL ) return req;
   3.188 +
   3.189 +   procrWithReq->requests = procrWithReq->requests->nextReqst;
   3.190 +   VMS__free_request( req );
   3.191 +   return procrWithReq->requests;
   3.192 + }
   3.193 +
   3.194  inline int
   3.195  VMS__isSemanticReqst( VMSReqst *req )
   3.196   {
   3.197 @@ -562,7 +586,7 @@
   3.198   * the core loop threads have all exited)
   3.199   *
   3.200   *In here,create one core-loop shut-down processor for each core loop and put
   3.201 - * them all directly into the workQ.
   3.202 + * them all directly into the readyToAnimateQ.
   3.203   *Note, this function can ONLY be called after the semantic environment no
   3.204   * longer cares if AppVPs get animated after the point this is called.  In
   3.205   * other words, this can be used as an abort, or else it should only be
   3.206 @@ -573,15 +597,13 @@
   3.207  VMS__handle_shutdown_reqst( void *dummy, VirtProcr *animatingPr )
   3.208   { int coreIdx;
   3.209     VirtProcr *shutDownPr;
   3.210 -   VMSQueueStruc *workQ = _VMSWorkQ;
   3.211  
   3.212        //create the shutdown processors, one for each core loop -- put them
   3.213 -      // directly into _VMSWorkQ -- each core will die when gets one, so
   3.214 -      // the system distributes them evenly itself.
   3.215 +      // directly into the Q -- each core will die when gets one
   3.216     for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
   3.217      {
   3.218        shutDownPr = VMS__create_procr( &endOSThreadFn, NULL );
   3.219 -      writeVMSQ( shutDownPr, workQ );
   3.220 +      writeSRSWQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] );
   3.221      }
   3.222  
   3.223   }
   3.224 @@ -620,26 +642,36 @@
   3.225   }
   3.226  
   3.227  
   3.228 -
   3.229 -/*This is called after the threads have shut down and control as returned
   3.230 +/*This is called after the threads have shut down and control has returned
   3.231   * to the semantic layer, in the entry point function in the main thread.
   3.232   * It has to free anything allocated during VMS_init, and any other alloc'd
   3.233   * locations that might be left over.
   3.234   */
   3.235  void
   3.236  VMS__cleanup_after_shutdown()
   3.237 - { int i;
   3.238 - 
   3.239 -   free( _VMSWorkQ );
   3.240 -   free( _VMSMasterEnv->filledSlots );
   3.241 -   for( i = 0; i < NUM_SCHED_SLOTS; i++ )
   3.242 + { 
   3.243 +   SRSWQueueStruc **readyToAnimateQs;
   3.244 +   int              coreIdx;
   3.245 +   VirtProcr      **masterVPs;
   3.246 +   SchedSlot     ***allSchedSlots; //ptr to array of ptrs
   3.247 +
   3.248 +   readyToAnimateQs = _VMSMasterEnv->readyToAnimateQs;
   3.249 +   masterVPs        = _VMSMasterEnv->masterVPs;
   3.250 +   allSchedSlots    = _VMSMasterEnv->allSchedSlots;
   3.251 +   
   3.252 +   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
   3.253      {
   3.254 -      free( _VMSMasterEnv->schedSlots[i] );
   3.255 +      freeSRSWQ( readyToAnimateQs[ coreIdx ] );
   3.256 +
   3.257 +      VMS__handle_dissipate_reqst( masterVPs[ coreIdx ] );
   3.258 +      
   3.259 +      freeSchedSlots( allSchedSlots[ coreIdx ] );
   3.260      }
   3.261 +   
   3.262 +   free( _VMSMasterEnv->readyToAnimateQs );
   3.263 +   free( _VMSMasterEnv->masterVPs );
   3.264 +   free( _VMSMasterEnv->allSchedSlots );
   3.265  
   3.266 -   free( _VMSMasterEnv->schedSlots);
   3.267 -   VMS__handle_dissipate_reqst( _VMSMasterEnv->masterVirtPr );
   3.268 -   
   3.269     free( _VMSMasterEnv );
   3.270   }
   3.271  

     4.1 --- a/VMS.h	Mon Aug 09 02:24:31 2010 -0700
     4.2 +++ b/VMS.h	Wed Sep 01 08:23:39 2010 -0700
     4.3 @@ -14,6 +14,11 @@
     4.4  #include "Queue_impl/BlockingQueue.h"
     4.5  #include <pthread.h>
     4.6  
     4.7 +   //When DEBUG is defined, VMS does sequential exe in the main thread
     4.8 +   // It still does co-routines and all the mechanisms are the same, it just
     4.9 +   // has only a single thread and animates VPs one at a time
    4.10 +//#define DEBUG
    4.11 +
    4.12     //This value is the number of hardware threads in the shared memory
    4.13     // machine
    4.14  #define NUM_CORES        4
    4.15 @@ -22,8 +27,10 @@
    4.16  //#define NUM_SCHED_SLOTS  (2 * NUM_CORES + 1)
    4.17  #define NUM_SCHED_SLOTS  3
    4.18  
    4.19 -   // 8K stack
    4.20 -#define VIRT_PROCR_STACK_SIZE 0x20000
    4.21 +#define READYTOANIMATE_RETRIES 10000
    4.22 +
    4.23 +   // stack
    4.24 +#define VIRT_PROCR_STACK_SIZE 0x10000
    4.25  
    4.26     //256M of total memory for VMS application to VMS__malloc
    4.27  #define MASSIVE_MALLOC_SIZE 0x10000000
    4.28 @@ -43,7 +50,7 @@
    4.29  typedef struct _VMSReqst   VMSReqst;
    4.30  typedef struct _VirtProcr  VirtProcr;
    4.31  
    4.32 -typedef VirtProcr * (*SlaveScheduler)  ( void * );        //semEnv
    4.33 +typedef VirtProcr * (*SlaveScheduler)  ( void *, int );   //semEnv, coreIdx
    4.34  typedef void  (*RequestHandler)  ( VirtProcr *, void * ); //prWReqst, semEnv
    4.35  typedef void  (*VirtProcrFnPtr)  ( void *, VirtProcr * ); //initData, animPr
    4.36  typedef void    VirtProcrFn      ( void *, VirtProcr * ); //initData, animPr
    4.37 @@ -109,25 +116,21 @@
    4.38  
    4.39  typedef struct
    4.40   {
    4.41 -   SlaveScheduler slaveScheduler;
    4.42 -   RequestHandler requestHandler;
    4.43 +   SlaveScheduler   slaveScheduler;
    4.44 +   RequestHandler   requestHandler;
    4.45     
    4.46 -   SchedSlot    **schedSlots;
    4.47 -   SchedSlot    **filledSlots;
    4.48 -   int            numToPrecede;
    4.49 -   
    4.50 -   volatile int   stillRunning;
    4.51 -   
    4.52 -   VirtProcr     *masterVirtPr;
    4.53 +   SchedSlot     ***allSchedSlots;
    4.54 +   SRSWQueueStruc **readyToAnimateQs;
    4.55 +   VirtProcr      **masterVPs;
    4.56  
    4.57 -   void          *semanticEnv;
    4.58 -   void          *OSEventStruc;    //for future, when add I/O to BLIS
    4.59 +   void            *semanticEnv;
    4.60 +   void            *OSEventStruc;   //for future, when add I/O to BLIS
    4.61  
    4.62 -   void          *coreLoopEndPt; //addr to jump to to shut down a coreLoop
    4.63 +   void            *coreLoopStartPt;//addr to jump to to re-enter coreLoop
    4.64 +   void            *coreLoopEndPt;  //addr to jump to to shut down a coreLoop
    4.65  
    4.66 -   int            setupComplete;
    4.67 -
    4.68 -   void          *mallocChunk;
    4.69 +   int              setupComplete;
    4.70 +   int              masterLock;
    4.71   }
    4.72  MasterEnv;
    4.73  
    4.74 @@ -149,11 +152,6 @@
    4.75  
    4.76  volatile MasterEnv      *_VMSMasterEnv;
    4.77  
    4.78 -   //workQ is global, static, and volatile so that core loop has its location
    4.79 -   // hard coded, and reloads every time through the loop -- that way don't
    4.80 -   // need to save any regs used by core loop
    4.81 -volatile VMSQueueStruc  *_VMSWorkQ;
    4.82 -
    4.83  //==========================
    4.84  void
    4.85  VMS__init();
    4.86 @@ -190,6 +188,9 @@
    4.87  VMSReqst *
    4.88  VMS__take_top_request_from( VirtProcr *reqstingPr );
    4.89  
    4.90 +VMSReqst *
    4.91 +VMS__free_top_and_give_next_request_from( VirtProcr *procrWithReq );
    4.92 +
    4.93  inline void *
    4.94  VMS__take_sem_reqst_from( VMSReqst *req );
    4.95