# HG changeset patch
# User Me
# Date 1280183153 25200
# Node ID 668278fa7a630e991b057a386901217e6bfa073c
# Parent  c556193f7211ca3759f3e563179de364e2614850
Sequential -- just starting to add sequential version

diff -r c556193f7211 -r 668278fa7a63 CoreLoop.c
--- a/CoreLoop.c	Sat Jul 24 08:58:47 2010 -0700
+++ b/CoreLoop.c	Mon Jul 26 15:25:53 2010 -0700
@@ -9,7 +9,9 @@
 #include "Queue_impl/BlockingQueue.h"
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <time.h>
+
 #include <pthread.h>
 #include <sched.h>
 
@@ -26,7 +28,7 @@
  {   
    ThdParams      *coreLoopThdParams;
    VirtProcr      *currPr;
-   CASQueueStruc  *workQ;
+   VMSQueueStruc  *workQ;
    unsigned long   coreMask;  //has 1 in bit positions of allowed cores
    int             errorCode;
    
@@ -34,29 +36,37 @@
    coreLoopThdParams = (ThdParams *)paramsIn;
 
       //wait until signalled that setup is complete
-   pthread_mutex_lock( _VMSMasterEnv->suspend_mutex );
+   pthread_mutex_lock(   &suspendLock );
    while( !(_VMSMasterEnv->setupComplete) )
     {
-      pthread_cond_wait( _VMSMasterEnv->suspend_cond,
-                         _VMSMasterEnv->suspend_mutex );
+      pthread_cond_wait( &suspend_cond,
+                         &suspendLock );
     }
-   pthread_mutex_unlock( _VMSMasterEnv->suspend_mutex );
+   pthread_mutex_unlock( &suspendLock );
+
+   printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum );
 
       //set thread affinity
       //Linux requires pinning thd to core inside thread-function
       //Designate a core by a 1 in bit-position corresponding to the core
-   coreMask = 1 << coreLoopThdParams->coreNum
+//   cpu_set_t cpuMask;
+//   CPU_ZERO( &cpuMask );
+//   CPU_SET( coreLoopThdParams->coreNum, &cpuMask );
+
+   coreMask = 1 << coreLoopThdParams->coreNum;
+
+   pthread_t selfThd = pthread_self();
    errorCode =
-   pthread_setaffinity_np( pthread_self(), sizeof(coreMask), coreMask);
+   pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
    
-   if(errorCode){ printf("\nset affinity failure\n"); exit(); }
+   if(errorCode){ printf("\nset affinity failure\n"); exit(0); }
 
    
       //Save addr of "end core loop" label - jump to it to shut down coreloop
       //To get label addr in non-gcc compiler, can trick it by making a call
       // to a fn that does asm that pulls the "return"
       // addr off the stack and stores it in a pointed-to location.
-   _VMSMasterEnv->coreLoopShutDownPt = &&EndCoreLoop;
+   _VMSMasterEnv->coreLoopShutDownPt = &&CoreLoopEndPt;
    
       //Core loop has no values live upon CoreLoopStartPt except workQ
       // every value in the code is defined by a statement in core loop,
@@ -72,7 +82,7 @@
       //_VMSWorkQ must be a global, static volatile var, so not kept in reg,
       // which forces reloading the pointer after each jmp to this point
    workQ  = _VMSWorkQ;
-   currPr = (VirtProcr *) readCASQ( workQ );
+   currPr = (VirtProcr *) readVMSQ( workQ );
 
 //   printf("core %d loop procr addr: %d\n", coreLoopThdParams->coreNum, \
 //       (int)currPr ); fflush(stdin);
@@ -116,6 +126,6 @@
    //========================================================================
 
       //jmp to here when want to shut down the VMS system
-   EndCoreLoop:
+   CoreLoopEndPt:
    pthread_exit( NULL );
  }
diff -r c556193f7211 -r 668278fa7a63 CoreLoop_Seq.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CoreLoop_Seq.c	Mon Jul 26 15:25:53 2010 -0700
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+
+#include "VMS.h"
+#include "Queue_impl/BlockingQueue.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+
+
+/*This is the loop that runs in the PThread pinned to each core
+ * get work-unit struc from queue, 
+ * call function-ptr, passing it pointer to data
+ * transfer return value to slave's "requests" pointer
+ * write the slave's "Done" flag and repeat.
+ */
+//pthread_create requires ptr to func that takes void * and returns void *
+void *
+coreLoop_Seq( void *paramsIn )
+ {   
+   VirtProcr      *currPr;
+   VMSQueueStruc  *workQ;
+   
+   
+      //Save addr of "end core loop" label - jump to it to shut down coreloop
+      //To get label addr in non-gcc compiler, can trick it by making a call
+      // to a fn that does asm that pulls the "return"
+      // addr off the stack and stores it in a pointed-to location.
+   _VMSMasterEnv->coreLoopShutDownPt = &&CoreLoopEndPt;
+   
+      //Core loop has no values live upon CoreLoopStartPt except workQ
+      // every value in the code is defined by a statement in core loop,
+      // after the start point -- with the one exception of _VMSWorkQ
+ 
+   
+      // Get to work!  --  virt procr jumps back here when done or suspends
+      //Note, have to restore the frame-pointer before jump to here, to get
+      // this code to work right (workQ and so forth are frame-ptr relative)
+CoreLoopStartPt:
+   
+      //Get virtual processor from queue
+      //_VMSWorkQ must be a global, static volatile var, so not kept in reg,
+      // which forces reloading the pointer after each jmp to this point
+   workQ  = _VMSWorkQ;
+   currPr = (VirtProcr *) readVMSQ( workQ );
+
+//   printf("core %d loop procr addr: %d\n", coreLoopThdParams->coreNum, \
+//       (int)currPr ); fflush(stdin);
+   currPr->coreLoopStartPt = &&CoreLoopStartPt;  //to be sure.(GCC specific)
+   
+   currPr->coreAnimatedBy  = coreLoopThdParams->coreNum;
+
+      //switch to virt procr's stack and frame ptr then jump to virt procr
+   void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \
+        *coreLoopStackPtrAddr;
+   
+   stackPtr = currPr->stackPtr;
+   framePtr = currPr->framePtr;
+   jmpPt    = currPr->nextInstrPt;
+   coreLoopFramePtrAddr = &(currPr->coreLoopFramePtr);
+   coreLoopStackPtrAddr = &(currPr->coreLoopStackPtr);
+
+      //Save the core loop's stack and frame pointers into virt procr struct
+      // then switch to stack ptr and frame ptr of virt procr & jmp to it
+      //This was a pain to get right because GCC converts the "(jmpPt)" to
+      // frame-relative mem-op -- so generated machine code first changed the
+      // frame pointer, then tried to jump to an addr stored on stack, which
+      // it accessed as an offset from frame-ptr!  (wrong frame-ptr now)
+      //Explicitly loading into eax before changing frame-ptr fixed it
+      //Also, it turns "(currPr->coreLoopFramePtr)" into a temporary on the
+      // stack, so "movl %%ebp, %0" saves to the temp, NOT the data-struc!
+   asm volatile("movl %0, %%eax;      \
+                 movl %%esp, (%%eax); \
+                 movl %1, %%eax;      \
+                 movl %%ebp, (%%eax); \
+                 movl %2, %%eax;      \
+                 movl %3, %%esp;      \
+                 movl %4, %%ebp;      \
+                 jmp  %%eax"          \
+   /* outputs */ : "=g"(coreLoopStackPtrAddr),                 \
+                   "=g"(coreLoopFramePtrAddr)                  \
+   /* inputs  */ : "g" (jmpPt), "g" (stackPtr), "g" (framePtr) \
+   /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \
+                );
+
+   //========================================================================
+
+      //jmp to here when want to shut down the VMS system
+   CoreLoopEndPt:
+   return;
+ }
diff -r c556193f7211 -r 668278fa7a63 MasterLoop.c
--- a/MasterLoop.c	Sat Jul 24 08:58:47 2010 -0700
+++ b/MasterLoop.c	Mon Jul 26 15:25:53 2010 -0700
@@ -6,7 +6,6 @@
 
 
 
-#include <windows.h>
 #include <stdio.h>
 #include <malloc.h>
 #include <stddef.h>
@@ -48,11 +47,11 @@
  */
 void masterLoop( void *initData, VirtProcr *masterPr )
  { 
-   int slotIdx, numFilled, numInFirstChunk, filledSlotIdx;
+   int             slotIdx, numFilled, filledSlotIdx, masterHasBeenQueued;
    VirtProcr      *schedVirtPr;
    SchedSlot      *currSlot, **schedSlots, **filledSlots;
    MasterEnv      *masterEnv;
-   CASQueueStruc  *workQ;
+   VMSQueueStruc  *workQ;
    void           *jmpPt, *stackPtrAddr, *framePtrAddr, *stillRunningAddr;
    void           *coreLoopFramePtr, *coreLoopStackPtr, *semanticEnv;
    
@@ -65,7 +64,26 @@
       // of setup code..
    masterPr->nextInstrPt = &&masterLoopStartPt;
 
- 
+      //The second time MasterVP comes out of queue, the first animation of
+      // it hasn't written the stackPtr and framePtr yet -- but the second
+      // animation has already had its stackPtr and framePtr set to the old
+      // value by the coreLoop.  Fix this by writing the correct stack and
+      // frame pointers here, at which point they're correct in the first
+      // animation of MasterVP.
+      //TODO: remove writing stackPtr and framePtr at the bottom, for eff
+   stackPtrAddr      = &(masterPr->stackPtr);
+   framePtrAddr      = &(masterPr->framePtr);
+
+   asm volatile("movl %0,     %%eax;  \
+                 movl %%esp, (%%eax); \
+                 movl %1,     %%eax;  \
+                 movl %%ebp, (%%eax); "
+   /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr)                 \
+   /* inputs  */ :                                                          \
+   /* clobber */ : "memory", "%eax", "%ebx"                                 \
+                );
+
+
    masterLoopStartPt:
 
       //if another reference to same Master VirtProcr still going, busy-wait
@@ -88,10 +106,11 @@
    semanticEnv      = masterEnv->semanticEnv;
 
       //prepare for scheduling
-   masterEnv->numFilled = 0;
+   numFilled = 0;
+   masterHasBeenQueued = FALSE;
 
       //Poll each slot's Done flag -- slot 0 reserved for master, start at 1
-   for( slotIdx = 1; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
+   for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
     {
       currSlot = schedSlots[ slotIdx ];
 
@@ -110,50 +129,63 @@
          
          if( schedVirtPr != NULL )
           { currSlot->procrAssignedToSlot = schedVirtPr;
-            schedVirtPr->schedSlot = currSlot;
+            schedVirtPr->schedSlot        = currSlot;
+            currSlot->needsProcrAssigned  = FALSE;
 
-            filledSlots[ masterEnv->numFilled ] = currSlot;
-            masterEnv->numFilled += 1;
+            filledSlots[ numFilled ]      = currSlot;
+            numFilled += 1;
 
-            currSlot->needsProcrAssigned = FALSE;
+            writeVMSQ( schedVirtPr, workQ );
+            if( numFilled == masterEnv->numToPrecede )
+             {
+               writeVMSQ( masterEnv->masterVirtPr, workQ );
+               masterHasBeenQueued = TRUE;
+             }
+
           }
        }
     }
 
+   if( !masterHasBeenQueued )
+    {
+      writeVMSQ( masterEnv->masterVirtPr, workQ );
+    }
+
+      //Adjust the number to precede, for next round -- assume rate of
+      // finishing work is stable -- which is a bad assumption!  But, just
+      // want something working for the moment, look at dynamic behavior
+      // later
+//TODO: look at dynamic behavior -- time-average numToPrecede or something
+   if( numFilled < NUM_CORES - 1 )
+    { 
+      masterEnv->numToPrecede = 0;
+    }
+   else
+    { masterEnv->numToPrecede = numFilled - NUM_CORES + 1;
+    }
+/*
       //put some scheduled slaves in, then Master continuation, then rest
       //Adjust position of master such that it maintains close to a fixed
       // ratio --> make NUM_CORES - 1  slots or fewer come after the master
-   numFilled    = masterEnv->numFilled;
-   
-   int numPrecede = numFilled;
-   int numFollow  = NUM_CORES - 1;
-   
-   if( numFilled < numFollow )
-    { numFollow  = numFilled;
-      numPrecede = 0;
-    }
-   else
-    { numPrecede -= numFollow;
-    }
-
+ 
    for( filledSlotIdx = 0; filledSlotIdx < numPrecede; filledSlotIdx++)
     {
-      writeCASQ( filledSlots[ filledSlotIdx ]->procrAssignedToSlot, workQ );
+      writeVMSQ( filledSlots[ filledSlotIdx ]->procrAssignedToSlot, workQ );
     }
 
       //enqueue continuation of this loop
       // note that After this enqueue, continuation might sneak through
-   writeCASQ( masterEnv->masterVirtPr, workQ );
+   writeVMSQ( masterEnv->masterVirtPr, workQ );
 
    for( filledSlotIdx = numPrecede;
         filledSlotIdx < numFilled;
         filledSlotIdx++)
     {
-      writeCASQ( filledSlots[ filledSlotIdx ]->procrAssignedToSlot, workQ );
+      writeVMSQ( filledSlots[ filledSlotIdx ]->procrAssignedToSlot, workQ );
     }
 
    masterEnv->numFilled = 0;
-   
+*/
 
       //Save stack ptr and frame -- don't need to, take out later, but safe
       // Also, wait to set stillRunning to FALSE until just before jump, to
diff -r c556193f7211 -r 668278fa7a63 VMS.c
--- a/VMS.c	Sat Jul 24 08:58:47 2010 -0700
+++ b/VMS.c	Mon Jul 26 15:25:53 2010 -0700
@@ -12,6 +12,8 @@
 #include "Queue_impl/BlockingQueue.h"
 
 
+#define thdAttrs NULL
+
 //===========================================================================
 void
 shutdownFn( void *dummy, VirtProcr *dummy2 );
@@ -19,6 +21,9 @@
 void
 create_sched_slots( MasterEnv *masterEnv );
 
+pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t  suspend_cond  = PTHREAD_COND_INITIALIZER;
+
 //===========================================================================
 
 /*Setup has two phases:
@@ -52,10 +57,10 @@
 void
 VMS__init()
  { MasterEnv  *masterEnv;
-   CASQueueStruc *workQ;
+   VMSQueueStruc *workQ;
 
       //Make the central work-queue
-   _VMSWorkQ = makeCASQ();
+   _VMSWorkQ = makeVMSQ();
    workQ     = _VMSWorkQ;
 
    _VMSMasterEnv = malloc( sizeof(MasterEnv) );
@@ -66,33 +71,25 @@
 
    create_sched_slots( masterEnv );
 
-     //Set slot 0 to be the master virt procr & set flags just in case
-   masterEnv->schedSlots[0]->needsProcrAssigned  = FALSE;  //says don't touch
-   masterEnv->schedSlots[0]->workIsDone          = FALSE;  //says don't touch
-   masterEnv->schedSlots[0]->procrAssignedToSlot = masterEnv->masterVirtPr;
-   masterEnv->masterVirtPr->schedSlot = masterEnv->schedSlots[0];
    masterEnv->stillRunning = FALSE;
+   masterEnv->numToPrecede = NUM_CORES;
    
       //First core loop to start up gets this, which will schedule seed Pr
       //TODO: debug: check address of masterVirtPr
-   writeCASQ( masterEnv->masterVirtPr, workQ );
+   writeVMSQ( masterEnv->masterVirtPr, workQ );
 
    numProcrsCreated = 1;
 
    //========================================================================
    //                      Create the Threads
    int coreIdx, retCode;
-   #define thdAttrs NULL
-
-   _VMSMasterEnv->setupComplete = 0;
-   _VMSMasterEnv->suspend_mutex = PTHREAD_MUTEX_INITIALIZER;
-   _VMSMasterEnv->suspend_cond  = PTHREAD_COND_INITIALIZER;
-
+   
       //Need the threads to be created suspended, and wait for a signal
       // before proceeding -- gives time after creating to initialize other
       // stuff before the coreLoops set off.
-   
-   //Make params given to the win threads that animate the core loops
+   _VMSMasterEnv->setupComplete = 0;
+
+      //Make the threads that animate the core loops
    for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
     { coreLoopThdParams[coreIdx]          = malloc( sizeof(ThdParams) );
       coreLoopThdParams[coreIdx]->coreNum = coreIdx;
@@ -102,10 +99,8 @@
                         thdAttrs,
                        &coreLoop,
                (void *)(coreLoopThdParams[coreIdx]) );
-      if(!retCode){printf("ERROR creating thread: %d\n", retCode); exit();}
+      if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(0);}
     }
-
-
  }
 
 void
@@ -148,10 +143,10 @@
       //get lock, to lock out any threads still starting up -- they'll see
       // that setupComplete is true before entering while loop, and so never
       // wait on the condition
-   pthread_mutex_lock(     _VMSMasterEnv->suspend_mutex );
+   pthread_mutex_lock(     &suspendLock );
    _VMSMasterEnv->setupComplete = 1;
-   pthread_mutex_unlock(   _VMSMasterEnv->suspend_mutex );
-   pthread_cond_broadcast( _VMSMasterEnv->suspend_cond );
+   pthread_mutex_unlock(   &suspendLock );
+   pthread_cond_broadcast( &suspend_cond );
    
    
       //wait for all to complete
@@ -200,6 +195,8 @@
       // for 2 params + return addr.  Return addr (NULL) is in loc pointed to
       // by stackPtr, initData at stackPtr + 4 bytes, animatingPr just above
    stackLocs = malloc( VIRT_PROCR_STACK_SIZE );
+   if(stackLocs == 0)
+   {perror("malloc stack"); exit(1);}
    newPr->startOfStack = stackLocs;
    stackPtr = ( (char *)stackLocs + VIRT_PROCR_STACK_SIZE - 0x10 );
       //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp
@@ -212,7 +209,7 @@
  }
 
 
- /*there is a label inside this function -- save the addr of this label in
+/*there is a label inside this function -- save the addr of this label in
  * the callingPr struc, as the pick-up point from which to start the next
  * work-unit for that procr.  If turns out have to save registers, then
  * save them in the procr struc too.  Then do assembly jump to the CoreLoop's
@@ -238,25 +235,32 @@
 
    stackPtrAddr      = &(callingPr->stackPtr);
    framePtrAddr      = &(callingPr->framePtr);
-   
+
    jmpPt             = callingPr->coreLoopStartPt;
    coreLoopFramePtr  = callingPr->coreLoopFramePtr;//need this only
    coreLoopStackPtr  = callingPr->coreLoopStackPtr;//shouldn't need -- safety
 
-      //Save the virt procr's stack and frame ptrs, restore coreloop's frame
-      // ptr, then jump back to "start" of core loop
-      //Note, GCC compiles to assembly that saves esp and ebp in the stack
-      // frame -- so have to explicitly do assembly that saves to memory
+      //Eclipse's compilation sequence complains -- so break into two
+      // separate in-line assembly pieces
+      //Save the virt procr's stack and frame ptrs,
    asm volatile("movl %0,     %%eax;  \
                  movl %%esp, (%%eax); \
                  movl %1,     %%eax;  \
-                 movl %%ebp, (%%eax); \
-                 movl %2, %%eax;      \
-                 movl %3, %%esp;      \
-                 movl %4, %%ebp;      \
+                 movl %%ebp, (%%eax) "\
+   /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr) \
+   /* inputs  */ :        \
+   /* clobber */ : "%eax" \
+                );
+
+      //restore coreloop's frame ptr, then jump back to "start" of core loop
+      //Note, GCC compiles to assembly that saves esp and ebp in the stack
+      // frame -- so have to explicitly do assembly that saves to memory
+   asm volatile("movl %0, %%eax;      \
+                 movl %1, %%esp;      \
+                 movl %2, %%ebp;      \
                  jmp  %%eax    "      \
-   /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr) \
-   /* inputs  */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\
+   /* outputs */ :                    \
+   /* inputs  */ : "m" (jmpPt), "m"(coreLoopStackPtr), "m"(coreLoopFramePtr)\
    /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi"  \
                 ); //list everything as clobbered to force GCC to save all
                    // live vars that are in regs on stack before this
@@ -356,6 +360,8 @@
 //TODO: add a semantic-layer supplied "freer" for the semantic-data portion
 // of a request -- IE call with both a virt procr and a fn-ptr to request
 // freer (also maybe put sem request freer as a field in virt procr?)
+//VMSHW relies right now on this only freeing VMS layer of request -- the
+// semantic portion of request is alloc'd and freed by request handler
 void
 VMS__free_request( VMSReqst *req )
  { 
@@ -500,7 +506,7 @@
 shutdownFn( void *dummy, VirtProcr *animatingPr )
  { int coreIdx;
    VirtProcr *shutDownPr;
-   CASQueueStruc *workQ = _VMSWorkQ;
+   VMSQueueStruc *workQ = _VMSWorkQ;
 
       //free all the locations owned within the VMS system
    //TODO: write VMS__malloc and free.. -- take the DKU malloc as starting pt
@@ -510,7 +516,7 @@
     {
       shutDownPr = VMS__create_procr( NULL, NULL );
       shutDownPr->nextInstrPt = _VMSMasterEnv->coreLoopShutDownPt;
-      writeCASQ( shutDownPr, workQ );
+      writeVMSQ( shutDownPr, workQ );
     }
 
       //This is an issue: the animating processor of this function may not
diff -r c556193f7211 -r 668278fa7a63 VMS.h
--- a/VMS.h	Sat Jul 24 08:58:47 2010 -0700
+++ b/VMS.h	Mon Jul 26 15:25:53 2010 -0700
@@ -12,7 +12,7 @@
 
 #include "VMS_primitive_data_types.h"
 #include "Queue_impl/BlockingQueue.h"
-#include "pthread.h"
+#include <pthread.h>
 
    //This value is the number of hardware threads in the shared memory
    // machine
@@ -22,14 +22,19 @@
 #define NUM_SCHED_SLOTS  (2 * NUM_CORES + 1)
 
    //128K stack.. compromise, want 10K virtPr
-#define VIRT_PROCR_STACK_SIZE 0x100000
+#define VIRT_PROCR_STACK_SIZE 0x10000
 
 #define SUCCESS 0
 
+#define writeVMSQ     writePThdQ
+#define readVMSQ      readPThdQ
+#define makeVMSQ      makePThdQ
+#define VMSQueueStruc PThdQueueStruc
+
 //#define thdAttrs NULL  //For PThreads
 
 typedef struct _SchedSlot  SchedSlot;
-typedef struct _VMSReqst VMSReqst;
+typedef struct _VMSReqst   VMSReqst;
 typedef struct _VirtProcr  VirtProcr;
 
 typedef VirtProcr * (*SlaveScheduler)  ( void * );        //semEnv
@@ -103,9 +108,9 @@
    
    SchedSlot **schedSlots;
    SchedSlot **filledSlots;
-   int         numFilled;
+   int         numToPrecede;
    
-   int         stillRunning;
+   volatile int stillRunning;
    
    VirtProcr  *masterVirtPr;
 
@@ -114,9 +119,7 @@
 
    void       *coreLoopShutDownPt; //addr to jump to to shut down a coreLoop
 
-   int             setupComplete;
-   pthread_mutex_t suspend_mutex;
-   pthread_cond_t  suspend_cond;
+   int         setupComplete;
  }
 MasterEnv;
 
@@ -130,15 +133,17 @@
 //=====================  Global Vars ===================
 
 
-pthread_t   coreLoopThdHandles[ NUM_CORES ];  //pthread's virt-procr state
-ThdParams   *coreLoopThdParams[ NUM_CORES ];
+pthread_t       coreLoopThdHandles[ NUM_CORES ];  //pthread's virt-procr state
+ThdParams      *coreLoopThdParams [ NUM_CORES ];
+pthread_mutex_t suspendLock;
+pthread_cond_t  suspend_cond;
 
 volatile MasterEnv      *_VMSMasterEnv;
 
    //workQ is global, static, and volatile so that core loop has its location
    // hard coded, and reloads every time through the loop -- that way don't
    // need to save any regs used by core loop
-volatile CASQueueStruc  *_VMSWorkQ;
+volatile VMSQueueStruc  *_VMSWorkQ;
 
 //==========================
 void
diff -r c556193f7211 -r 668278fa7a63 VMS_Seq.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/VMS_Seq.c	Mon Jul 26 15:25:53 2010 -0700
@@ -0,0 +1,409 @@
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <malloc.h>
+
+#include "VMS.h"
+#include "Queue_impl/BlockingQueue.h"
+
+
+#define thdAttrs NULL
+
+//===========================================================================
+void
+shutdownFnSeq( void *dummy, VirtProcr *dummy2 );
+
+void
+create_sched_slots( MasterEnv *masterEnv );
+
+//===========================================================================
+
+/*Setup has two phases:
+ * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts
+ *    the master virt procr into the work-queue, ready for first "call"
+ * 2) Semantic layer then does its own init, which creates the seed virt
+ *    procr inside the semantic layer, ready to schedule it when
+ *    asked by the first run of the masterLoop.
+ *
+ *This part is bit weird because VMS really wants to be "always there", and
+ * have applications attach and detach..  for now, this VMS is part of
+ * the app, so the VMS system starts up as part of running the app.
+ *
+ *The semantic layer is isolated from the VMS internals by making the
+ * semantic layer do setup to a state that it's ready with its
+ * initial virt procrs, ready to schedule them to slots when the masterLoop
+ * asks.  Without this pattern, the semantic layer's setup would
+ * have to modify slots directly to assign the initial virt-procrs, and put
+ * them into the workQ itself, breaking the isolation completely.
+ *
+ * 
+ *The semantic layer creates the initial virt procr(s), and adds its
+ * own environment to masterEnv, and fills in the pointers to
+ * the requestHandler and slaveScheduler plug-in functions
+ */
+
+/*This allocates VMS data structures, populates the master VMSProc,
+ * and master environment, and returns the master environment to the semantic
+ * layer.
+ */
+void
+VMS__init_Seq()
+ { MasterEnv  *masterEnv;
+   VMSQueueStruc *workQ;
+
+      //Make the central work-queue
+   _VMSWorkQ = makeVMSQ();
+   workQ     = _VMSWorkQ;
+
+   _VMSMasterEnv = malloc( sizeof(MasterEnv) );
+   masterEnv     = _VMSMasterEnv;
+
+      //create the master virtual processor
+   masterEnv->masterVirtPr = VMS__create_procr( &masterLoop, masterEnv );
+
+   create_sched_slots( masterEnv );
+
+   masterEnv->stillRunning = FALSE;
+   masterEnv->numToPrecede = NUM_CORES;
+   
+      //First core loop to start up gets this, which will schedule seed Pr
+      //TODO: debug: check address of masterVirtPr
+   writeVMSQ( masterEnv->masterVirtPr, workQ );
+
+   numProcrsCreated = 1;
+
+   //========================================================================
+   //                      Create the Threads
+   
+ }
+
+
+/*Semantic layer calls this when it want the system to start running..
+ *
+ *This starts the core loops running then waits for them to exit.
+ */
+void
+VMS__start_the_work_then_wait_until_done_Seq()
+ { int coreIdx;
+      //Start the core loops running
+//===========================================================================
+   TSCount  startCount, endCount;
+   unsigned long long count = 0, freq = 0;
+   double   runTime;
+
+      startCount = getTSCount();
+      
+         //Instead of un-suspending threads, just call the one and only
+         // core loop, in the main thread.
+      coreLoop_Seq( NULL );
+
+      //NOTE: do not clean up VMS env here -- semantic layer has to have
+      // a chance to clean up its environment first, then do a call to free
+      // the Master env and rest of VMS locations
+
+
+      endCount = getTSCount();
+      count = endCount - startCount;
+
+      runTime = (double)count / (double)TSCOUNT_FREQ;
+
+      printf("\n Time startup to shutdown: %f\n", runTime); fflush( stdin );
+ }
+
+
+
+
+
+/*This is equivalent to "jump back to core loop" -- it's mainly only used
+ * just after adding dissipate request to a processor -- so the semantic
+ * layer is the only place it will be seen and/or used.
+ *
+ *It does almost the same thing as suspend, except don't need to save the
+ * stack nor set the nextInstrPt
+ *
+ *As of June 30, 2010  just implementing as a call to suspend -- just sugar
+ */
+void
+VMS__return_from_fn( VirtProcr *animatingPr )
+ {
+   VMS__suspend_procr( animatingPr );
+ }
+
+
+/*Not sure yet the form going to put "dissipate" in, so this is the third
+ * possibility -- the semantic layer can just make a macro that looks like
+ * a call to its name, then expands to a call to this.
+ *
+ *As of June 30, 2010  this looks like the top choice..
+ *
+ *This adds a request to dissipate, then suspends the processor so that the
+ * request handler will receive the request.  The request handler is what
+ * does the work of freeing memory and removing the processor from the
+ * semantic environment's data structures.
+ *The request handler also is what figures out when to shutdown the VMS
+ * system -- which causes all the core loop threads to die, and returns from
+ * the call that started up VMS to perform the work.
+ *
+ *This form is a bit misleading to understand if one is trying to figure out
+ * how VMS works -- it looks like a normal function call, but inside it
+ * sends a request to the request handler and suspends the processor, which
+ * jumps out of the VMS__dissipate_procr function, and out of all nestings
+ * above it, transferring the work of dissipating to the request handler,
+ * which then does the actual work -- causing the processor that animated
+ * the call of this function to disappear and the "hanging" state of this
+ * function to just poof into thin air -- the virtual processor's trace
+ * never returns from this call, but instead the virtual processor's trace
+ * gets suspended in this call and all the virt processor's state disap-
+ * pears -- making that suspend the last thing in the virt procr's trace.
+ */
+void
+VMS__dissipate_procr( VirtProcr *procrToDissipate )
+ { VMSReqst *req;
+
+   req = malloc( sizeof(VMSReqst) );
+//   req->virtProcrFrom      = callingPr;
+   req->reqType               = dissipate;
+   req->nextReqst             = procrToDissipate->requests;
+   procrToDissipate->requests = req;
+   
+   VMS__suspend_procr( procrToDissipate );
+}
+
+
+/*This inserts the semantic-layer's request data into standard VMS carrier
+ */
+inline void
+VMS__add_sem_request( void *semReqData, VirtProcr *callingPr )
+ { VMSReqst *req;
+
+   req = malloc( sizeof(VMSReqst) );
+//   req->virtProcrFrom      = callingPr;
+   req->reqType        = semantic;
+   req->semReqData     = semReqData;
+   req->nextReqst      = callingPr->requests;
+   callingPr->requests = req;
+ }
+
+
+
+//TODO: add a semantic-layer supplied "freer" for the semantic-data portion
+// of a request -- IE call with both a virt procr and a fn-ptr to request
+// freer (or maybe put request freer as a field in virt procr?)
+void
+VMS__remove_and_free_top_request( VirtProcr *procrWithReq )
+ { VMSReqst *req;
+
+   req = procrWithReq->requests;
+   procrWithReq->requests = procrWithReq->requests->nextReqst;
+   free( req );
+ }
+
+
+//TODO: add a semantic-layer supplied "freer" for the semantic-data portion
+// of a request -- IE call with both a virt procr and a fn-ptr to request
+// freer (also maybe put sem request freer as a field in virt procr?)
+//VMSHW relies right now on this only freeing VMS layer of request -- the
+// semantic portion of request is alloc'd and freed by request handler
+void
+VMS__free_request( VMSReqst *req )
+ { 
+   free( req );
+ }
+
+VMSReqst *
+VMS__take_top_request_from( VirtProcr *procrWithReq )
+ { VMSReqst *req;
+
+   req = procrWithReq->requests;
+   if( req == NULL ) return req;
+   
+   procrWithReq->requests = procrWithReq->requests->nextReqst;
+   return req;
+ }
+
+inline int
+VMS__isSemanticReqst( VMSReqst *req )
+ {
+   return ( req->reqType == semantic );
+ }
+
+
+inline void *
+VMS__take_sem_reqst_from( VMSReqst *req )
+ {
+   return req->semReqData;
+ }
+
+inline int
+VMS__isDissipateReqst( VMSReqst *req )
+ {
+   return ( req->reqType == dissipate );
+ }
+
+inline int
+VMS__isCreateReqst( VMSReqst *req )
+ {
+   return ( req->reqType == regCreated );
+ }
+
+void
+VMS__send_register_new_procr_request(VirtProcr *newPr, VirtProcr *reqstingPr)
+ { VMSReqst *req;
+
+   req                  = malloc( sizeof(VMSReqst) );
+   req->reqType         = regCreated;
+   req->semReqData      = newPr;
+   req->nextReqst       = reqstingPr->requests;
+   reqstingPr->requests = req;
+
+   VMS__suspend_procr( reqstingPr );
+ }
+
+
+/*The semantic layer figures out when the work is done ( perhaps by a call
+ * in the application to "work all done", or perhaps all the virtual
+ * processors have dissipated.. a.s.o. )
+ *
+ *The semantic layer is responsible for making sure all work has fully
+ * completed before using this to shutdown the VMS system.
+ *
+ *After the semantic layer has determined it wants to shut down, the
+ * next time the Master Loop calls the scheduler plug-in, the scheduler
+ * then calls this function and returns the virtual processor it gets back.
+ *
+ *When the shut-down processor runs, it first frees all locations malloc'd to
+ * the VMS system (that wasn't
+ * specified as return-locations).  Then it creates one core-loop shut-down
+ * processor for each core loop and puts them all into the workQ.  When a
+ * core loop animates a core loop shut-down processor, it causes exit-thread
+ * to run, and when all core loop threads have exited, then the "wait for
+ * work to finish" in the main thread is woken, and the function-call that
+ * started all the work returns.
+ *
+ *The function animated by this processor performs the shut-down work.
+ */
+VirtProcr *
+VMS__create_the_shutdown_procr()
+ {
+   return VMS__create_procr( &shutdownFn, NULL );
+ }
+
+
+/*This must be called by the request handler plugin -- it cannot be called
+ * from the semantic library "dissipate processor" function -- instead, the
+ * semantic layer has to generate a request for the plug-in to call this
+ * function.
+ *The reason is that this frees the virtual processor's stack -- which is
+ * still in use inside semantic library calls!
+ *
+ *This frees or recycles all the state owned by and comprising the VMS
+ * portion of the animating virtual procr.  The request handler must first
+ * free any semantic data created for the processor that didn't use the
+ * VMS_malloc mechanism.  Then it calls this, which first asks the malloc
+ * system to disown any state that did use VMS_malloc, and then frees the
+ * statck and the processor-struct itself.
+ *If the dissipated processor is the sole (remaining) owner of VMS__malloc'd
+ * state, then that state gets freed (or sent to recycling) as a side-effect
+ * of dis-owning it.
+ */
+void
+VMS__free_procr_locs( VirtProcr *animatingPr )
+ {
+      //dis-own all locations owned by this processor, causing to be freed
+      // any locations that it is (was) sole owner of
+   //TODO: implement VMS__malloc system, including "give up ownership"
+
+      //The dissipate request might still be attached, so remove and free it
+   VMS__remove_and_free_top_request( animatingPr );
+   free( animatingPr->startOfStack );
+
+      //NOTE: initialData was given to the processor, so should either have
+      // been alloc'd with VMS__malloc, or freed by the level above animPr.
+      //So, all that's left to free here is the stack and the VirtProcr struc
+      // itself
+   free( animatingPr->startOfStack );
+   free( animatingPr );
+ }
+
+
+
+/*This is the function run by the special "shut-down" processor
+ * 
+ *The _VMSMasterEnv is needed by this shut down function, so the "wait"
+ * function run in the main loop has to free it, and the thread-related
+ * locations (coreLoopThdParams a.s.o.).
+ *However, the semantic environment and all data malloc'd to VMS can be
+ * freed here.
+ *
+ *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the
+ * locations it needs -- they will be automatically freed by the standard
+ * "free all owned locations"
+ *
+ *Free any locations malloc'd to the VMS system (that weren't
+ * specified as return-locations).
+ *Then create one core-loop shut-down processor for each core loop and puts
+ * them all into the workQ.
+ */
+void
+shutdownFn( void *dummy, VirtProcr *animatingPr )
+ { int coreIdx;
+   VirtProcr *shutDownPr;
+   VMSQueueStruc *workQ = _VMSWorkQ;
+
+      //free all the locations owned within the VMS system
+   //TODO: write VMS__malloc and free.. -- take the DKU malloc as starting pt
+
+      //make the core loop shut-down processors and put them into the workQ
+   for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
+    {
+      shutDownPr = VMS__create_procr( NULL, NULL );
+      shutDownPr->nextInstrPt = _VMSMasterEnv->coreLoopShutDownPt;
+      writeVMSQ( shutDownPr, workQ );
+    }
+
+      //This is an issue: the animating processor of this function may not
+      // get its request handled before all the cores have shutdown.
+      //TODO: after all the threads stop, clean out the MasterEnv, the
+      // SemanticEnv, and the workQ before returning.
+   VMS__dissipate_procr( animatingPr );  //will never come back from this
+ }
+
+
+/*This has to free anything allocated during VMS_init, and any other alloc'd
+ * locations that might be left over.
+ */
+void
+VMS__shutdown()
+ { int i;
+ 
+   free( _VMSWorkQ );
+   free( _VMSMasterEnv->filledSlots );
+   for( i = 0; i < NUM_SCHED_SLOTS; i++ )
+    {
+      free( _VMSMasterEnv->schedSlots[i] );
+    }
+
+   free( _VMSMasterEnv->schedSlots);
+   VMS__free_procr_locs( _VMSMasterEnv->masterVirtPr );
+   
+   free( _VMSMasterEnv );
+ }
+
+
+//===========================================================================
+
+inline TSCount getTSCount()
+ { unsigned int low, high;
+   TSCount  out;
+
+   saveTimeStampCountInto( low, high );
+   out = high;
+   out = (out << 32) + low;
+   return out;
+ }
+