# HG changeset patch
# User Me
# Date 1283354619 25200
# Node ID e69579a0e7976d48febaa60141a53af90e760139
# Parent  c8823e0bb2b430713eeb99a088a8ea6d2e826a3f
Works multi-core..  pinned VP to a core loop

diff -r c8823e0bb2b4 -r e69579a0e797 CoreLoop.c
--- a/CoreLoop.c	Mon Aug 09 02:24:31 2010 -0700
+++ b/CoreLoop.c	Wed Sep 01 08:23:39 2010 -0700
@@ -30,14 +30,16 @@
  */
 void *
 coreLoop( void *paramsIn )
- {   
+ { 
    ThdParams      *coreLoopThdParams;
+   int             thisCoresIdx;
    VirtProcr      *currPr;
-   VMSQueueStruc  *workQ;
+   SRSWQueueStruc *readyToAnimateQ;
    unsigned long   coreMask;  //has 1 in bit positions of allowed cores
    int             errorCode;
    
    coreLoopThdParams = (ThdParams *)paramsIn;
+   thisCoresIdx = coreLoopThdParams->coreNum;
 
       //wait until signalled that setup is complete
    pthread_mutex_lock(   &suspendLock );
@@ -66,26 +68,57 @@
       //To get label addr in non-gcc compiler, can trick it by making a call
       // to a fn that does asm that pulls the "return"
       // addr off the stack and stores it in a pointed-to location.
-   _VMSMasterEnv->coreLoopEndPt = &&CoreLoopEndPt;
+   _VMSMasterEnv->coreLoopEndPt   = &&CoreLoopEndPt;
+   _VMSMasterEnv->coreLoopStartPt = &&CoreLoopStartPt;
    
-      //Core loop has no values live upon CoreLoopStartPt except workQ
+      //Core loop has no values live upon CoreLoopStartPt except
+      // _VMSMasterEnv
       // every value in the code is defined by a statement in core loop,
-      // after the start point -- with the one exception of _VMSWorkQ
+      // after the start point -- with the one exception of _VMSMasterEnv
  
    
       // Get to work!  --  virt procr jumps back here when suspends
       //Note, have to restore the frame-pointer before jump to here, to get
-      // this code to work right (workQ and so forth are frame-ptr relative)
+      // this code to work right (readyToAnimateQ and so forth are frame-ptr relative)
 CoreLoopStartPt:
    
       //Get virtual processor from queue
       //_VMSWorkQ must be a global, static volatile var, so not kept in reg,
       // which forces reloading the pointer after each jmp to this point
-   workQ  = _VMSWorkQ;
-   currPr = (VirtProcr *) readVMSQ( workQ );
+   readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
 
-   currPr->coreLoopStartPt = &&CoreLoopStartPt;  //to be sure -- chg for perf
-   currPr->coreAnimatedBy  = coreLoopThdParams->coreNum;
+   currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ );
+   int tries = 0; int gotLock = 0;
+   while( currPr == NULL )
+    {    //no VPs ready to animate, so run MasterVP --later make "try Master"
+         // VPs & put one in every queue at strategic point -- so have work
+         // avail if don't get lock & short-circuit out of it if master has
+         // recently run on another core
+         //TODO: perf -- "try Master" VP that checks if should run Master Fn
+         //But just letting queue run empty is quickest to see if pinning VP
+         // to core will solve the bizarre random seg-faults in system stack.
+
+         //check if get the MasterLock
+      gotLock = __sync_bool_compare_and_swap( &(_VMSMasterEnv->masterLock), \
+                                                 UNLOCKED, LOCKED );
+
+      if( gotLock )
+       {
+            //run own MasterVP -- when its done, unlocks MasterLock and
+            // jumps back to coreLoops's startPt
+         currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
+         break;  //end while -- have a VP to animate now
+       }
+         //Aug 24, 2010 -- changed so each core loop only gets work scheduled
+         // by its own master, so now stay in loop until get lock
+//      currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ );
+      
+      tries++;
+//      if( tries % 10000 == 0 ) printf("empty tries: %d\n", tries/10000 );
+      if( tries % READYTOANIMATE_RETRIES == 0 ) pthread_yield();
+    }
+   
+//   currPr->coreAnimatedBy  = coreLoopThdParams->coreNum;
 
       //switch to virt procr's stack and frame ptr then jump to virt procr fn
    void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \
@@ -139,36 +172,44 @@
 coreLoop_Seq( void *paramsIn )
  {
    VirtProcr      *currPr;
-   VMSQueueStruc  *workQ;
+   SRSWQueueStruc *readyToAnimateQ;
+   
+   ThdParams      *coreLoopThdParams;
+   int             thisCoresIdx;
+   
+   coreLoopThdParams = (ThdParams *)paramsIn;
+//   thisCoresIdx = coreLoopThdParams->coreNum;
+   thisCoresIdx = 0;
 
 
       //Save addr of "end core loop" label - jump to it to shut down coreloop
       //To get label addr in non-gcc compiler, can trick it by making a call
       // to a fn that does asm that pulls the "return"
       // addr off the stack and stores it in a pointed-to location.
-   _VMSMasterEnv->coreLoopEndPt = &&CoreLoopEndPt;
+   _VMSMasterEnv->coreLoopStartPt = &&SeqCoreLoopStartPt;
+   _VMSMasterEnv->coreLoopEndPt   = &&SeqCoreLoopEndPt;
 
-      //Core loop has no values live upon CoreLoopStartPt except workQ
+      //Core loop has no values live upon CoreLoopStartPt except readyToAnimateQ
       // every value in the code is defined by a statement in core loop,
       // after the start point -- with the one exception of _VMSWorkQ
 
 
       // Get to work!  --  virt procr jumps back here when done or suspends
       //Note, have to restore the frame-pointer before jump to here, to get
-      // this code to work right (workQ and so forth are frame-ptr relative)
-CoreLoopStartPt:
+      // this code to work right (readyToAnimateQ and so forth are frame-ptr relative)
+SeqCoreLoopStartPt:
 
       //Get virtual processor from queue
       //_VMSWorkQ must be a global, static volatile var, so not kept in reg,
       // which forces reloading the pointer after each jmp to this point
-   workQ  = _VMSWorkQ;
-   currPr = (VirtProcr *) readVMSQ( workQ );
+   readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
+   currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ );
+   if( currPr == NULL )
+      currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
+   
 
 //   printf("core %d loop procr addr: %d\n", coreLoopThdParams->coreNum, \
 //       (int)currPr ); fflush(stdin);
-   currPr->coreLoopStartPt = &&CoreLoopStartPt;  //to be sure.(GCC specific)
-
-//   currPr->coreAnimatedBy  = coreLoopThdParams->coreNum;
 
       //switch to virt procr's stack and frame ptr then jump to virt procr
    void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \
@@ -210,7 +251,7 @@
       // to exit, and so the entry point function, which has been waiting for
       // all the threads to die will proceed, gather the result, and
       // return to the calling application.
-   CoreLoopEndPt:
+SeqCoreLoopEndPt:
    VMS__handle_dissipate_reqst( currPr ); //free shutdown pr, that jmpd here
    return;
  }
diff -r c8823e0bb2b4 -r e69579a0e797 MasterLoop.c
--- a/MasterLoop.c	Mon Aug 09 02:24:31 2010 -0700
+++ b/MasterLoop.c	Wed Sep 01 08:23:39 2010 -0700
@@ -42,74 +42,77 @@
  * all the others -- then does any extra setup needed and puts it into the
  * work queue.
  *However means have to make masterEnv a global static volatile the same way
- * did with workQ in core loop.  -- for performance, put the
+ * did with readyToAnimateQ in core loop.  -- for performance, put the
  * jump to the core loop directly in here, and have it directly jump back.
+ *
+ *
+ *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this
+ * avoids the suspected bug in the system stack that causes bizarre faults
+ * at random places in the system code.
+ *
+ *So, this function is coupled to each of the MasterVPs, -- meaning this
+ * function can't rely on a particular stack and frame -- each MasterVP that
+ * animates this function has a different one.
+ *
+ *At this point, the masterLoop does not write itself into the queue anymore,
+ * instead, the coreLoop acquires the masterLock when it has nothing to
+ * animate, and then animates its own masterLoop.  However, still try to put
+ * several AppVPs into the queue to amortize the startup cost of switching
+ * to the MasterVP.  Note, don't have to worry about latency of requests much
+ * because most requests generate work for same core -- only latency issue
+ * is case when other cores starved and one core's requests generate work
+ * for them -- so keep max in queue to 3 or 4..
  */
-void masterLoop( void *initData, VirtProcr *masterPr )
+void masterLoop( void *initData, VirtProcr *animatingPr )
  { 
-   int             slotIdx, numFilled, filledSlotIdx, masterHasBeenQueued;
+   int             slotIdx;
    VirtProcr      *schedVirtPr;
-   SchedSlot      *currSlot, **schedSlots, **filledSlots;
+   SchedSlot      *currSlot, **schedSlots;
    MasterEnv      *masterEnv;
-   VMSQueueStruc  *workQ;
-   void           *jmpPt, *stackPtrAddr, *framePtrAddr, *stillRunningAddr;
-   void           *coreLoopFramePtr, *coreLoopStackPtr, *semanticEnv;
+   VMSQueueStruc  *readyToAnimateQ;
    
    SlaveScheduler  slaveScheduler;
    RequestHandler  requestHandler;
+   void           *semanticEnv;
 
-      //this will run as the first virt processor in workQ, and will be a
-      // new born -- so will do all the GCC-generated allocating space on
-      // the stack owned by master virt procr -- and will run this last bit
-      // of setup code..
+   int             thisCoresIdx;
+   VirtProcr      *masterPr;
+   volatile        VirtProcr *volatileMasterPr;
+   
+   volatileMasterPr = animatingPr;
+   masterPr         = volatileMasterPr; //used to force re-define after jmp
+
+      //First animation of each MasterVP will in turn animate this part
+      // of setup code.. (VP creator sets up the stack as if this function
+      // was called normally, but actually get here by jmp)
+      //So, setup values about stack ptr, jmp pt and all that
    masterPr->nextInstrPt = &&masterLoopStartPt;
 
-      //The second time MasterVP comes out of queue, the first animation of
-      // it hasn't written the stackPtr and framePtr yet -- but the second
-      // animation has already had its stackPtr and framePtr set to the old
-      // value by the coreLoop.  Fix this by writing the correct stack and
-      // frame pointers here, at which point they're correct in the first
-      // animation of MasterVP.
-      //TODO: remove writing stackPtr and framePtr at the bottom, for eff
-   stackPtrAddr      = &(masterPr->stackPtr);
-   framePtrAddr      = &(masterPr->framePtr);
 
-   asm volatile("movl %0,     %%eax;  \
-                 movl %%esp, (%%eax); \
-                 movl %1,     %%eax;  \
-                 movl %%ebp, (%%eax); "
-   /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr)                 \
-   /* inputs  */ :                                                          \
-   /* clobber */ : "memory", "%eax", "%ebx"                                 \
-                );
+      //Note, got rid of writing the stack and frame ptr up here, because
+      // only one
+      // core can ever animate a given MasterVP, so don't need to communicate
+      // new frame and stack ptr to the MasterVP storage before a second
+      // version of that MasterVP can get animated on a different core.
+      //Also got rid of the busy-wait.
 
-
+   
    masterLoopStartPt:
 
-      //if another reference to same Master VirtProcr still going, busy-wait
-      //Could put this lower, but don't want to think about shared stack..
-   while( _VMSMasterEnv->stillRunning ) /*busy wait*/ ;
-      //TODO: want to do busy-wait as assembly, to be sure stack not touched?
+   masterEnv        = _VMSMasterEnv;
    
-      //this is the only master running now, set flag again
-   _VMSMasterEnv->stillRunning = TRUE;
-   masterEnv = _VMSMasterEnv;
+//TODO: check that compiles so that always re-define from frame-storage
+   masterPr         = volatileMasterPr;  //just to make sure after jmp
+   thisCoresIdx     = masterPr->coreAnimatedBy;
+   readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
+   schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
 
-      //TODO: gdb -- check that a volatile _VMSMasterEnv and _VMSWorkQ means
-      // all these will be re-filled every time jump here..
-   workQ            = _VMSWorkQ;
    requestHandler   = masterEnv->requestHandler;
    slaveScheduler   = masterEnv->slaveScheduler;
-   schedSlots       = masterEnv->schedSlots;
-   filledSlots      = masterEnv->filledSlots;
-   masterPr         = masterEnv->masterVirtPr;  //post-jmp clobbered, re-load
    semanticEnv      = masterEnv->semanticEnv;
 
-      //prepare for scheduling
-   numFilled = 0;
-   masterHasBeenQueued = FALSE;
 
-      //Poll each slot's Done flag -- slot 0 reserved for master, start at 1
+      //Poll each slot's Done flag
    for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
     {
       currSlot = schedSlots[ slotIdx ];
@@ -125,63 +128,31 @@
       if( currSlot->needsProcrAssigned )
        {    //give slot a new virt procr
          schedVirtPr =
-          (*slaveScheduler)( semanticEnv );
+          (*slaveScheduler)( semanticEnv, thisCoresIdx );
          
          if( schedVirtPr != NULL )
           { currSlot->procrAssignedToSlot = schedVirtPr;
             schedVirtPr->schedSlot        = currSlot;
             currSlot->needsProcrAssigned  = FALSE;
 
-            filledSlots[ numFilled ]      = currSlot;
-
-            writeVMSQ( schedVirtPr, workQ );
-            numFilled += 1;
-            
-            if( numFilled == masterEnv->numToPrecede )
-             {
-               writeVMSQ( masterEnv->masterVirtPr, workQ );
-               masterHasBeenQueued = TRUE;
-             }
-
+            writeSRSWQ( schedVirtPr, readyToAnimateQ );
           }
        }
     }
 
-   if( !masterHasBeenQueued )
-    {
-      writeVMSQ( masterEnv->masterVirtPr, workQ );
-    }
 
-      //Adjust the number to precede, for next round -- assume rate of
-      // finishing work is stable -- which is a bad assumption!  But, just
-      // want something working for the moment, look at dynamic behavior
-      // later
-//TODO: look at dynamic behavior -- time-average numToPrecede or something
-   if( numFilled < NUM_CORES - 1 )
-    { 
-      masterEnv->numToPrecede = 1;
-    }
-   else
-    { masterEnv->numToPrecede = numFilled - NUM_CORES + 1;
-    }
-
-      //Save stack ptr and frame -- don't need to, take out later, but safe
-      // Also, wait to set stillRunning to FALSE until just before jump, to
-      // be safe -- although the two simulatneously animated MasterLoops
-      // are on different cores, so have different stacks, so no worries
-      // there.
-      //Restore CoreLoop's stack frame (and stack pointer, to be safe)
+      //Save stack ptr and frame, restore CoreLoop's stack and frame,
+      // and clear the MasterLock
       //TODO: cafefully verify don't need to force saving anything to stack
       // before jumping back to core loop.
+   void           *stackPtrAddr, *framePtrAddr, *masterLockAddr;
+   void           *jmpPt, *coreLoopFramePtr, *coreLoopStackPtr;
+
    stackPtrAddr      = &(masterPr->stackPtr);
    framePtrAddr      = &(masterPr->framePtr);
-   stillRunningAddr  = &(_VMSMasterEnv->stillRunning); //when race condition
-      //arises, stillRunning is shared between the two cores both animating
-      // MasterLoop -- but those two cores have different esp & ebp, so safe
-      // to change stack and frame pointer here, without one messing up other
-      // one
+   masterLockAddr    = &(_VMSMasterEnv->masterLock);
 
-   jmpPt             = masterPr->coreLoopStartPt;
+   jmpPt             = _VMSMasterEnv->coreLoopStartPt;
    coreLoopFramePtr  = masterPr->coreLoopFramePtr;//need this only
    coreLoopStackPtr  = masterPr->coreLoopStackPtr;//shouldn't need -- safety
    
@@ -196,7 +167,7 @@
                  movl $0x0, (%%ebx);  \
                  jmp  %%eax;"         \
    /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr),                \
-                   "=g"(stillRunningAddr)                                   \
+                   "=g"(masterLockAddr)                                   \
    /* inputs  */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\
    /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \
                 );//can probably make clobber list empty -- but safe for now
diff -r c8823e0bb2b4 -r e69579a0e797 VMS.c
--- a/VMS.c	Mon Aug 09 02:24:31 2010 -0700
+++ b/VMS.c	Wed Sep 01 08:23:39 2010 -0700
@@ -18,8 +18,8 @@
 void
 shutdownFn( void *dummy, VirtProcr *dummy2 );
 
-void
-create_sched_slots( MasterEnv *masterEnv );
+SchedSlot **
+create_sched_slots();
 
 void
 create_masterEnv();
@@ -48,7 +48,7 @@
  * initial virt procrs, ready to schedule them to slots when the masterLoop
  * asks.  Without this pattern, the semantic layer's setup would
  * have to modify slots directly to assign the initial virt-procrs, and put
- * them into the workQ itself, breaking the isolation completely.
+ * them into the readyToAnimateQ itself, breaking the isolation completely.
  *
  * 
  *The semantic layer creates the initial virt procr(s), and adds its
@@ -77,29 +77,45 @@
 
 void
 create_masterEnv()
- { MasterEnv  *masterEnv;
-   VMSQueueStruc *workQ;
-
-      //Make the central work-queue
-   _VMSWorkQ = makeVMSQ();
-   workQ     = _VMSWorkQ;
-
+ { MasterEnv       *masterEnv;
+   SRSWQueueStruc **readyToAnimateQs;
+   int              coreIdx;
+   VirtProcr      **masterVPs;
+   SchedSlot     ***allSchedSlots; //ptr to array of ptrs
+   
+      //Make the master env, which holds everything else
    _VMSMasterEnv = malloc( sizeof(MasterEnv) );
    masterEnv     = _VMSMasterEnv;
+      //Need to set start pt here 'cause used by seed procr, which is created
+      // before the first core loop starts up. -- not sure how yet..
+//   masterEnv->coreLoopStartPt = ;
+//   masterEnv->coreLoopEndPt   = ;
+   
+      //Make a readyToAnimateQ for each core loop
+   readyToAnimateQs = malloc( NUM_CORES * sizeof(SRSWQueueStruc *) );
+   masterVPs        = malloc( NUM_CORES * sizeof(VirtProcr *) );
 
-      //create the master virtual processor
-   masterEnv->masterVirtPr = VMS__create_procr( &masterLoop, masterEnv );
+      //One array for each core, 3 in array, core's masterVP scheds all
+   allSchedSlots    = malloc( NUM_CORES * sizeof(SchedSlot *) );
 
-   create_sched_slots( masterEnv );
+   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
+    {
+      readyToAnimateQs[ coreIdx ] = makeSRSWQ();
+      
+         //Q: should give masterVP core-specific into as its init data?
+      masterVPs[ coreIdx ] = VMS__create_procr( &masterLoop, masterEnv );
+      masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx;
+      allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core
+    }
+   _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs;
+   _VMSMasterEnv->masterVPs        = masterVPs;
+   _VMSMasterEnv->allSchedSlots    = allSchedSlots;
 
-   masterEnv->stillRunning = FALSE;
-   masterEnv->numToPrecede = NUM_CORES;
 
-      //First core loop to start up gets this, which will schedule seed Pr
-      //TODO: debug: check address of masterVirtPr
-   writeVMSQ( masterEnv->masterVirtPr, workQ );
 
-   numProcrsCreated = 1;  //global counter for debugging
+      //Aug 19, 2010:  no longer need to place initial masterVP into queue
+      // because coreLoop now controls -- animates its masterVP when no work
+
 
    //==================== malloc substitute ========================
    //
@@ -143,15 +159,12 @@
  }
  */
 
-void
-create_sched_slots( MasterEnv *masterEnv )
- { SchedSlot  **schedSlots, **filledSlots;
+SchedSlot **
+create_sched_slots()
+ { SchedSlot  **schedSlots;
    int i;
 
    schedSlots  = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
-   filledSlots = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
-   masterEnv->schedSlots  = schedSlots;
-   masterEnv->filledSlots = filledSlots;
 
    for( i = 0; i < NUM_SCHED_SLOTS; i++ )
     {
@@ -161,6 +174,18 @@
       schedSlots[i]->workIsDone         = FALSE;
       schedSlots[i]->needsProcrAssigned = TRUE;
     }
+   return schedSlots;
+ }
+
+
+void
+freeSchedSlots( SchedSlot **schedSlots )
+ { int i;
+   for( i = 0; i < NUM_SCHED_SLOTS; i++ )
+    {
+      free( schedSlots[i] );
+    }
+   free( schedSlots );
  }
 
 
@@ -267,6 +292,8 @@
    newPr->procrID     = numProcrsCreated++;
    newPr->nextInstrPt = fnPtr;
    newPr->initialData = initialData;
+   newPr->requests    = NULL;
+//   newPr->coreLoopStartPt = _VMSMasterEnv->coreLoopStartPt;
 
       //fnPtr takes two params -- void *initData & void *animProcr
       //alloc stack locations, make stackPtr be the highest addr minus room
@@ -314,7 +341,7 @@
    stackPtrAddr      = &(callingPr->stackPtr);
    framePtrAddr      = &(callingPr->framePtr);
 
-   jmpPt             = callingPr->coreLoopStartPt;
+   jmpPt             = _VMSMasterEnv->coreLoopStartPt;
    coreLoopFramePtr  = callingPr->coreLoopFramePtr;//need this only
    coreLoopStackPtr  = callingPr->coreLoopStackPtr;//shouldn't need -- safety
 
@@ -350,21 +377,6 @@
 
 
 
-/*This is equivalent to "jump back to core loop" -- it's mainly only used
- * just after adding dissipate request to a processor -- so the semantic
- * layer is the only place it will be seen and/or used.
- *
- *It does almost the same thing as suspend, except don't need to save the
- * stack nor set the nextInstrPt
- *
- *As of June 30, 2010  just implementing as a call to suspend -- just sugar
- */
-void
-VMS__return_from_fn( VirtProcr *animatingPr )
- {
-   VMS__suspend_procr( animatingPr );
- }
-
 
 /*Not sure yet the form going to put "dissipate" in, so this is the third
  * possibility -- the semantic layer can just make a macro that looks like
@@ -439,7 +451,7 @@
 //TODO: add a semantic-layer supplied "freer" for the semantic-data portion
 // of a request -- IE call with both a virt procr and a fn-ptr to request
 // freer (also maybe put sem request freer as a field in virt procr?)
-//VMSHW relies right now on this only freeing VMS layer of request -- the
+//SSR relies right now on this only freeing VMS layer of request -- the
 // semantic portion of request is alloc'd and freed by request handler
 void
 VMS__free_request( VMSReqst *req )
@@ -453,11 +465,23 @@
 
    req = procrWithReq->requests;
    if( req == NULL ) return req;
-   
+
    procrWithReq->requests = procrWithReq->requests->nextReqst;
    return req;
  }
 
+VMSReqst *
+VMS__free_top_and_give_next_request_from( VirtProcr *procrWithReq )
+ { VMSReqst *req;
+
+   req = procrWithReq->requests;
+   if( req == NULL ) return req;
+
+   procrWithReq->requests = procrWithReq->requests->nextReqst;
+   VMS__free_request( req );
+   return procrWithReq->requests;
+ }
+
 inline int
 VMS__isSemanticReqst( VMSReqst *req )
  {
@@ -562,7 +586,7 @@
  * the core loop threads have all exited)
  *
  *In here,create one core-loop shut-down processor for each core loop and put
- * them all directly into the workQ.
+ * them all directly into the readyToAnimateQ.
  *Note, this function can ONLY be called after the semantic environment no
  * longer cares if AppVPs get animated after the point this is called.  In
  * other words, this can be used as an abort, or else it should only be
@@ -573,15 +597,13 @@
 VMS__handle_shutdown_reqst( void *dummy, VirtProcr *animatingPr )
  { int coreIdx;
    VirtProcr *shutDownPr;
-   VMSQueueStruc *workQ = _VMSWorkQ;
 
       //create the shutdown processors, one for each core loop -- put them
-      // directly into _VMSWorkQ -- each core will die when gets one, so
-      // the system distributes them evenly itself.
+      // directly into the Q -- each core will die when gets one
    for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
     {
       shutDownPr = VMS__create_procr( &endOSThreadFn, NULL );
-      writeVMSQ( shutDownPr, workQ );
+      writeSRSWQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] );
     }
 
  }
@@ -620,26 +642,36 @@
  }
 
 
-
-/*This is called after the threads have shut down and control as returned
+/*This is called after the threads have shut down and control has returned
  * to the semantic layer, in the entry point function in the main thread.
  * It has to free anything allocated during VMS_init, and any other alloc'd
  * locations that might be left over.
  */
 void
 VMS__cleanup_after_shutdown()
- { int i;
- 
-   free( _VMSWorkQ );
-   free( _VMSMasterEnv->filledSlots );
-   for( i = 0; i < NUM_SCHED_SLOTS; i++ )
+ { 
+   SRSWQueueStruc **readyToAnimateQs;
+   int              coreIdx;
+   VirtProcr      **masterVPs;
+   SchedSlot     ***allSchedSlots; //ptr to array of ptrs
+
+   readyToAnimateQs = _VMSMasterEnv->readyToAnimateQs;
+   masterVPs        = _VMSMasterEnv->masterVPs;
+   allSchedSlots    = _VMSMasterEnv->allSchedSlots;
+   
+   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
     {
-      free( _VMSMasterEnv->schedSlots[i] );
+      freeSRSWQ( readyToAnimateQs[ coreIdx ] );
+
+      VMS__handle_dissipate_reqst( masterVPs[ coreIdx ] );
+      
+      freeSchedSlots( allSchedSlots[ coreIdx ] );
     }
+   
+   free( _VMSMasterEnv->readyToAnimateQs );
+   free( _VMSMasterEnv->masterVPs );
+   free( _VMSMasterEnv->allSchedSlots );
 
-   free( _VMSMasterEnv->schedSlots);
-   VMS__handle_dissipate_reqst( _VMSMasterEnv->masterVirtPr );
-   
    free( _VMSMasterEnv );
  }
 
diff -r c8823e0bb2b4 -r e69579a0e797 VMS.h
--- a/VMS.h	Mon Aug 09 02:24:31 2010 -0700
+++ b/VMS.h	Wed Sep 01 08:23:39 2010 -0700
@@ -14,6 +14,11 @@
 #include "Queue_impl/BlockingQueue.h"
 #include <pthread.h>
 
+   //When DEBUG is defined, VMS does sequential exe in the main thread
+   // It still does co-routines and all the mechanisms are the same, it just
+   // has only a single thread and animates VPs one at a time
+//#define DEBUG
+
    //This value is the number of hardware threads in the shared memory
    // machine
 #define NUM_CORES        4
@@ -22,8 +27,10 @@
 //#define NUM_SCHED_SLOTS  (2 * NUM_CORES + 1)
 #define NUM_SCHED_SLOTS  3
 
-   // 8K stack
-#define VIRT_PROCR_STACK_SIZE 0x20000
+#define READYTOANIMATE_RETRIES 10000
+
+   // stack
+#define VIRT_PROCR_STACK_SIZE 0x10000
 
    //256M of total memory for VMS application to VMS__malloc
 #define MASSIVE_MALLOC_SIZE 0x10000000
@@ -43,7 +50,7 @@
 typedef struct _VMSReqst   VMSReqst;
 typedef struct _VirtProcr  VirtProcr;
 
-typedef VirtProcr * (*SlaveScheduler)  ( void * );        //semEnv
+typedef VirtProcr * (*SlaveScheduler)  ( void *, int );   //semEnv, coreIdx
 typedef void  (*RequestHandler)  ( VirtProcr *, void * ); //prWReqst, semEnv
 typedef void  (*VirtProcrFnPtr)  ( void *, VirtProcr * ); //initData, animPr
 typedef void    VirtProcrFn      ( void *, VirtProcr * ); //initData, animPr
@@ -109,25 +116,21 @@
 
 typedef struct
  {
-   SlaveScheduler slaveScheduler;
-   RequestHandler requestHandler;
+   SlaveScheduler   slaveScheduler;
+   RequestHandler   requestHandler;
    
-   SchedSlot    **schedSlots;
-   SchedSlot    **filledSlots;
-   int            numToPrecede;
-   
-   volatile int   stillRunning;
-   
-   VirtProcr     *masterVirtPr;
+   SchedSlot     ***allSchedSlots;
+   SRSWQueueStruc **readyToAnimateQs;
+   VirtProcr      **masterVPs;
 
-   void          *semanticEnv;
-   void          *OSEventStruc;    //for future, when add I/O to BLIS
+   void            *semanticEnv;
+   void            *OSEventStruc;   //for future, when add I/O to BLIS
 
-   void          *coreLoopEndPt; //addr to jump to to shut down a coreLoop
+   void            *coreLoopStartPt;//addr to jump to to re-enter coreLoop
+   void            *coreLoopEndPt;  //addr to jump to to shut down a coreLoop
 
-   int            setupComplete;
-
-   void          *mallocChunk;
+   int              setupComplete;
+   int              masterLock;
  }
 MasterEnv;
 
@@ -149,11 +152,6 @@
 
 volatile MasterEnv      *_VMSMasterEnv;
 
-   //workQ is global, static, and volatile so that core loop has its location
-   // hard coded, and reloads every time through the loop -- that way don't
-   // need to save any regs used by core loop
-volatile VMSQueueStruc  *_VMSWorkQ;
-
 //==========================
 void
 VMS__init();
@@ -190,6 +188,9 @@
 VMSReqst *
 VMS__take_top_request_from( VirtProcr *reqstingPr );
 
+VMSReqst *
+VMS__free_top_and_give_next_request_from( VirtProcr *procrWithReq );
+
 inline void *
 VMS__take_sem_reqst_from( VMSReqst *req );