# HG changeset patch
# User Sean Halle <seanhalle@yahoo.com>
# Date 1351061177 25200
# Node ID dafae55597ce56490c2d3412da8d7f931835f223
# Parent  999f2966a3e55b69f45caf7d2e1f32fe69706ab2
Getting closer -- added PRServ as built-in langlet (but still just copy)
about to rework a lot of the Master code.. possibly eliminate core controller

diff -r 999f2966a3e5 -r dafae55597ce AnimationMaster.c
--- a/AnimationMaster.c	Wed Sep 19 23:12:44 2012 -0700
+++ b/AnimationMaster.c	Tue Oct 23 23:46:17 2012 -0700
@@ -10,7 +10,10 @@
 #include <stddef.h>
 
 #include "PR.h"
+#include "VSs_impl/VSs.h"
 
+inline void
+replaceWithNewSlotSlv( SlaveVP *requestingSlv, PRProcessEnv *processEnv );
 
 
 /*The animationMaster embodies most of the animator of the language.  The
@@ -37,7 +40,7 @@
  *
  */
 
-
+        
 //=====================  The versions of the Animation Master  =================
 //
 //==============================================================================
@@ -105,11 +108,11 @@
  *There is a separate masterVP for each core, but a single semantic
  * environment shared by all cores.  Each core also has its own scheduling
  * slots, which are used to communicate slaves between animationMaster and
- * coreController.  There is only one global variable, _PRMasterEnv, which
+ * coreController.  There is only one global variable, _PRTopEnv, which
  * holds the semantic env and other things shared by the different
  * masterVPs.  The request handler and Assigner are registered with
  * the animationMaster by the language's init function, and a pointer to
- * each is in the _PRMasterEnv. (There are also some pthread related global
+ * each is in the _PRTopEnv. (There are also some pthread related global
  * vars, but they're only used during init of PR).
  *PR gains control over the cores by essentially "turning off" the OS's
  * scheduler, using pthread pin-to-core commands.
@@ -122,7 +125,7 @@
  * based application.
  *The masterVPs share a single system-wide master-lock, so only one
  * masterVP may be animated at a time.
- *The core controllers access _PRMasterEnv to get the masterVP, and when
+ *The core controllers access _PRTopEnv to get the masterVP, and when
  * they start, the slots are all empty, so they run their associated core's
  * masterVP.  The first of those to get the master lock sees the seed slave
  * in the shared semantic environment, so when it runs the Assigner, that
@@ -160,7 +163,7 @@
    int32           thisCoresIdx;
   
    //======================== Initializations ========================
-   masterEnv        = (MasterEnv*)_VMSMasterEnv;
+   masterEnv        = (MasterEnv*)_PRTopEnv;
    
    thisCoresIdx     = masterVP->coreAnimatedBy;
    animSlots       = masterEnv->allAnimSlots[thisCoresIdx];
@@ -196,12 +199,12 @@
             SlaveVP *currSlave = currSlot->slaveAssignedToSlot;
             
 	justAddedReqHdlrChg();
-			//handle the request, either by VMS or by the language
+			//handle the request, either by PR or by the language
             if( currSlave->requests->reqType != LangReq )
-             {    //The request is a standard VMS one, not one defined by the
-                  // language, so VMS handles it, then queues slave to be assigned
-               handleReqInVMS( currSlave );
-               writePrivQ( currSlave, VMSReadyQ ); //Q slave to be assigned below
+             {    //The request is a standard PR one, not one defined by the
+                  // language, so PR handles it, then queues slave to be assigned
+               handleReqInPR( currSlave );
+               writePrivQ( currSlave, PRReadyQ ); //Q slave to be assigned below
              }
             else
              {       MEAS__startReqHdlr;
@@ -272,7 +275,7 @@
    //#endif
    
    //======================== Initializations ========================
-   masterEnv        = (MasterEnv*)_PRMasterEnv;
+   masterEnv        = (MasterEnv*)_PRTopEnv;
    
    thisCoresIdx     = masterVP->coreAnimatedBy;
    animSlots        = masterEnv->allAnimSlots[thisCoresIdx];
@@ -498,7 +501,7 @@
    //#endif
    
    //======================== Initializations ========================
-   masterEnv        = (MasterEnv*)_PRMasterEnv;
+   masterEnv        = (MasterEnv*)_PRTopEnv;
    
    thisCoresIdx     = masterVP->coreAnimatedBy;
    animSlots        = masterEnv->allAnimSlots[thisCoresIdx];
@@ -614,37 +617,22 @@
 //#ifdef MODE__MULTI_PROCESS
 void animationMaster( void *initData, SlaveVP *masterVP )
  { 
+   int32           slotIdx;
+//   int32           numSlotsFilled;
+   AnimSlot       *currSlot;
       //Used while scanning and filling animation slots
-   int32           slotIdx, numSlotsFilled;
-   AnimSlot       *currSlot, **animSlots;
-   SlaveVP        *assignedSlaveVP;  //the slave chosen by the assigner
+   AnimSlot      **animSlots;
    
       //Local copies, for performance
    MasterEnv      *masterEnv;
-   SlaveAssigner   slaveAssigner;
-   RequestHandler  requestHandler;
-   PRSemEnv       *semanticEnv;
    int32           thisCoresIdx;
-
-   SlaveVP        *slave;
-   PRProcess      *process;
-   PRConstrEnvHolder *constrEnvHolder;
-   int32           langMagicNumber;
    
    //======================== Initializations ========================
-   masterEnv        = (MasterEnv*)_PRMasterEnv;
+   masterEnv        = (MasterEnv*)_PRTopEnv;
    
    thisCoresIdx     = masterVP->coreAnimatedBy;
    animSlots        = masterEnv->allAnimSlots[thisCoresIdx];
-
-   requestHandler   = masterEnv->requestHandler;
-   slaveAssigner    = masterEnv->slaveAssigner;
-   semanticEnv      = masterEnv->semanticEnv;
-   
-      //initialize, for non-multi-lang, non multi-proc case
-      // default handler gets put into master env by a registration call by lang
-   endTaskHandler   = masterEnv->defaultTaskHandler;
-   
+      
       HOLISTIC__Insert_Master_Global_Vars;
    
    //======================== animationMaster ========================
@@ -653,15 +641,36 @@
    //Having two cases makes this logic complex.. can be finishing either, and 
    // then the next available work may be either.. so really have two distinct
    // loops that are inter-twined.. 
-   while(1){
-       
-      MEAS__Capture_Pre_Master_Point
+   while(1)
+    {  
+            MEAS__Capture_Pre_Master_Point
+      
+      for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
+       {
+         currSlot = animSlots[ slotIdx ];
 
-      //Scan the animation slots
-   numSlotsFilled = 0;
-   for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
-    {
-      currSlot = animSlots[ slotIdx ];
+         masterFunction_multiLang( currSlot );
+       }
+            
+            MEAS__Capture_Post_Master_Point;
+    
+      masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
+      flushRegisters();
+    } 
+ }
+#endif  //MODE__MULTI_LANG
+#endif  //MODE__MULTI_PROCESS
+
+inline
+void
+masterFunction_multiLang( AnimSlot  *currSlot )
+ {    //Scan the animation slots
+   int32           magicNumber;
+   SlaveVP        *slave;
+   SlaveVP        *assignedSlaveVP;
+   PRSemEnv       *semanticEnv;
+   PRReqst        *req;
+   RequestHandler  requestHandler;
 
          //Check if newly-done slave in slot, which will need request handled
       if( currSlot->workIsDone )
@@ -674,34 +683,71 @@
             //process the request made by the slave (held inside slave struc)
          slave = currSlot->slaveAssignedToSlot;
          
-            //check if the completed work was a task..
-         if( slave->taskMetaInfo->isATask )
-          {
-             if( slave->reqst->type == TaskEnd ) 
-              {    //do task end handler, which is registered separately
-                   //note, end hdlr may use semantic data from reqst..
-                   //get end-task handler
-                //taskEndHandler = lookup( slave->reqst->langMagicNumber, processEnv );
-                taskEndHandler = slave->taskMetaInfo->endTaskHandler;
-                
-                (*taskEndHandler)( slave, semanticEnv );
-                
-                goto AssignWork;
-              }
-             else  //is a task, and just suspended
-              {    //turn slot slave into free task slave & make replacement
-                if( slave->typeOfVP == TaskSlotSlv ) changeSlvType();
-                
-                //goto normal slave request handling
-                goto SlaveReqHandling; 
-              }
+            //check if the slave was doing a task..
+         //Action depends on both on the request type, and whether it's on
+         // a generic slave vs a suspended task
+         if( slave->metaTask->taskType == AtomicTask ||
+             slave->metaTask->taskType == SuspendedTask )
+          { 
+            switch( slave->request->reqType )
+             { case TaskEnd: 
+                { PRHandle_EndTask( slave ); //if free task slave, update count, put into recycle Q -- do handler before lang's handler
+
+                     //do task end handler, which is registered separately
+                     //note, end hdlr may use semantic data from reqst..
+                     //get end-task handler
+
+                  RequestHandler
+                  taskEndHandler = slave->metaTask->reqHandler;
+                  semanticEnv = PR_int__give_sem_env_for_slave( slave, 
+                                              slave->request->langMagicNumber );
+                  (*taskEndHandler)( slave, semanticEnv );
+
+                  goto AssignWork;
+                }
+               case TaskCreate:
+                { PRHandle_CreateTask( slave );
+                  justCopied_check;
+                  RequestHandler
+                  taskCreateHandler = slave->metaTask->reqHandler;
+                  semanticEnv = PR_int__give_sem_env_for_slave( slave, 
+                                              slave->request->langMagicNumber );
+                  (*taskCreateHandler)( slave, semanticEnv );
+
+                  want_to_resume_creating_slave;
+                  goto AssignWork;
+                }
+               default:  
+                {    //is a task, and just suspended, so tied to a free task slave
+                     //First turn slot slave into free task slave & make replacement
+                  if( slave->typeOfVP == TaskSlotSlv )
+                     replaceWithNewSlotSlv( slave, slave->processSlaveIsIn->processEnv );
+
+                  //goto normal slave request handling
+                  goto SlaveReqHandling; 
+                }
+             }
           }
          else //is a slave that suspended
           {
              
           SlaveReqHandling:
-            (*requestHandler)( slave, semanticEnv ); //(note: indirect Fn call more efficient when use fewer params, instead re-fetch from slave)
-         
+               //Q: put the switch in inline call, to clean up code?
+            req = slave->request;
+            switch( req->reqType )
+             { case SlvCreate:    PRHandle_CreateSlave( slave );    break;
+               case SlvDissipate: PRHandle_Dissipate( slave ); break;
+               case Service:      PR_int__handle_PRServiceReq( slave );  break; //resume into PR's own semantic env
+               case Hardware: //for future expansion
+               case IO:       //for future expansion
+               case OSCall:   //for future expansion
+               case Language: //normal sem request
+                  magicNumber = slave->request->langMagicNumber;
+                  semanticEnv = PR_PI__give_sem_env_for( slave, magicNumber );
+                  requestHandler = semanticEnv->requestHdlr;
+                  (*requestHandler)( slave, semanticEnv ); //(note: indirect Fn call more efficient when use fewer params, instead re-fetch from slave)
+             }
+            
                HOLISTIC__Record_AppResponder_end;
                MEAS__endReqHdlr;
                
@@ -709,14 +755,14 @@
           }
        } //if has suspended slave that needs handling
       
-         //if slot empty, hand to Assigner to fill with a slave
+         //End up here when the slot did not have ended work in it (no req)
+         //So, here, if slot empty, look for work to fill the slot
       if( currSlot->needsSlaveAssigned )
-       {    //Scan sem environs, looking for one with ready work.
-            // call the Assigner for that sem Env, to give slot a new slave
-               HOLISTIC__Record_Assigner_start;
+       {       HOLISTIC__Record_Assigner_start;
                
        AssignWork:
-     
+            //Scan sem environs, looking for semEnv with ready work.
+            // call the Assigner for that sem Env, to get a slave for the slot
          assignedSlaveVP = assignWork( semanticEnv, currSlot );
        
             //put the chosen slave into slot, and adjust flags and state
@@ -724,185 +770,245 @@
           { currSlot->slaveAssignedToSlot = assignedSlaveVP;
             assignedSlaveVP->animSlotAssignedTo = currSlot;
             currSlot->needsSlaveAssigned  = FALSE;
-            numSlotsFilled               += 1;
           }
          else
-          {
-            currSlot->needsSlaveAssigned  = TRUE; //local write
+          { currSlot->needsSlaveAssigned  = TRUE; //local write
           }
                HOLISTIC__Record_Assigner_end;
        }//if slot needs slave assigned
-    }//for( slotIdx..
+ }
 
-         MEAS__Capture_Post_Master_Point;
+//==========================================================================
+/*When a task in a slot slave suspends, the slot slave has to be changed to
+ * a free task slave, then the slot slave replaced.  The replacement can be
+ * either a recycled free task slave that finished it's task and has been
+ * idle in the recycle queue, or else create a new slave to be the slot slave.
+ *The master only calls this with a slot slave that needs to be replaced.
+ */
+inline void
+replaceWithNewSlotSlv( SlaveVP *requestingSlv, PRProcessEnv *processEnv )
+ { SlaveVP *newSlotSlv;
+   VSsSemData *semData;
+
+   fixMe__still_VSs_stuff_in_here;
+      //get a new slave to be the slot slave
+   newSlotSlv     = readPrivQ( processEnv->freeTaskSlvRecycleQ );
+   if( newSlotSlv == NULL )
+    { newSlotSlv  = PR_int__create_slaveVP( &idle_fn, NULL, processEnv, 0);
+         //just made a new free task slave, so count it
+      processEnv->numLiveFreeTaskSlvs += 1;
+    }
    
-   masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
-   flushRegisters();
-   }//while(1) 
+      //set slave values to make it the slot slave
+   newSlotSlv->metaTask              = NULL;
+   newSlotSlv->typeOfVP              = TaskSlotSlv;
+   newSlotSlv->needsTaskAssigned     = TRUE;
+   
+      //a slot slave is pinned to a particular slot on a particular core
+      //Note, this happens before the request is seen by handler, so nothing
+      // has had a chance to change the coreAnimatedBy or anything else..
+   newSlotSlv->animSlotAssignedTo = requestingSlv->animSlotAssignedTo;
+   newSlotSlv->coreAnimatedBy     = requestingSlv->coreAnimatedBy;
+    
+      //put it into the slot slave matrix
+   int32 slotNum = requestingSlv->animSlotAssignedTo->slotIdx;
+   int32 coreNum = requestingSlv->coreAnimatedBy;
+   processEnv->slotTaskSlvs[coreNum][slotNum] = newSlotSlv;
+
+      //Fix up requester, to be an extra slave now (but not an ended one)
+      // because it's active, doesn't go into freeTaskSlvRecycleQ
+   requestingSlv->typeOfVP = FreeTaskSlv;
  }
-#endif  //MODE__MULTI_LANG
-#endif  //MODE__MULTI_PROCESS
 
 
-/*This does three things:
- * 1) ask for a slave ready to resume
- * 2) if none, then ask for a task, and assign to the slot slave
- * 3) if none, then prune former task slaves waiting to be recycled.
- *
-   //Have two separate assigners in each semantic env,
-   // which keeps its own work in its own structures.. the master, here, 
-   // searches through the semantic environs, takes the first that has work
-   // available, and whatever it returns is assigned to the slot..
-   //However, also have an override assigner.. because static analysis tools know
-   // which languages are grouped together.. and the override enables them to
-   // generate a custom assigner that uses info from all the languages in a 
-   // unified way..  Don't really expect this to happen, but making it possible.
+
+/*This does:
+ * 1) searches the semantic environments for one with work ready
+ *    if finds one, asks its assigner to return work
+ * 2) checks what kind of work: new task, resuming task, resuming slave
+ *    if new task, gets the slot slave and assigns task to it and returns slave
+ *    else, gets the slave attached to the metaTask and returns that.
+ * 3) if no work found, then prune former task slaves waiting to be recycled.
+ *    If no work and no slaves to prune, check for shutdown conditions.
+ * 
+ * Semantic env keeps its own work in its own structures, and has its own
+ *  assigner.  It chooses 
+ * However, include a switch that switches-in an override assigner, which
+ *  sees all the work in all the semantic env's.  This is most likely  
+ *  generated by static tools and included in the executable.  That means it
+ *  has to be called via a registered pointer from here.  The idea is that
+ *  the static tools know which languages are grouped together.. and the
+ *  override enables them to generate a custom assigner that uses info from
+ *  all the languages in a unified way..  Don't really expect this to happen,
+ *  but am making it possible.
  */
 inline SlaveVP *
-assignWork( PRProcessEnv *processEnv, AnimSlot *slot )
- { SlaveVP     *returnSlv;
-   //VSsSemEnv   *semEnv;
-   //VSsSemData  *semData;
-   int32        coreNum, slotNum;
-   PRTaskMetaInfo *newTaskStub;
-   SlaveVP     *freeTaskSlv;
+assignWork( PRProcess *process, AnimSlot *slot )
+ { SlaveVP        *returnSlv;
+   //VSsSemEnv      *semEnv;
+   //VSsSemData     *semData;
+   int32           coreNum, slotNum;
+   PRMetaTask     *newMetaTask, *assignedMetaTask;
+   SlaveVP        *freeTaskSlv;
 
+   coreNum = slot->coreSlotIsOn;
    
-      //master has to handle slot slaves.. so either assigner returns
-      // taskMetaInfo or else two assigners, one for slaves, other for tasks..     
-   semEnvs = processEnv->semEnvs;
-   numEnvs = processEnv->numSemEnvs;
-   for( envIdx = 0; envIdx < numEnvs; envIdx++ )
+   if( _PRTopEnv->overrideAssigner != NULL )
+    { assignedMetaTask = (*_PRTopEnv->overrideAssigner)( process, slot );
+      if( assignedMetaTask != NULL )
+       {
+            //have work, so reset Done flag (caused by work generated on other core)
+         if( process->coreIsDone[coreNum] == TRUE ) //reads are higher perf
+            process->coreIsDone[coreNum] = FALSE;   //don't just write always
+         
+         switch( assignedMetaTask->taskType )
+          { case GenericSlave: goto AssignSlave;
+            case ResumedTask:  goto AssignSlave;
+            case NewTask:      goto AssignNewTask;
+            case default:      PR_int__throw_exception( "unknown task type ret by assigner" );
+          }
+       }
+      else
+         goto NoWork;
+    }
+   
+      //If here, then no override assigner, so search semantic envs for work
+   int32 envIdx, numEnvs; PRSemEnv **semEnvs, *semEnv; SlaveAssigner assigner;
+   semEnvs = process->semEnvs;
+   numEnvs = process->numSemEnvs;
+   for( envIdx = 0; envIdx < numEnvs; envIdx++ ) //keep semEnvs in hash AND array
     { semEnv = semEnvs[envIdx];
       if( semEnv->hasWork )
        { assigner = semEnv->assigner; 
-         retTaskMetaInfo = (*assigner)( semEnv, slot );
+         assignedMetaTask = (*assigner)( semEnv, slot );
          
-         return retTaskMetaInfo; //quit, have work
+            //have work, so reset Done flag (caused by work generated on other core)
+         if( process->coreIsDone[coreNum] == TRUE ) //reads are higher perf
+            process->coreIsDone[coreNum] = FALSE;   //don't just write always
+         
+         switch( assignedMetaTask->taskType )
+          { case GenericSlave: goto AssignSlave;
+            case ResumedTask:  goto AssignSlave;
+            case NewTask:      goto AssignNewTask;
+            case default:      PR_int__throw_exception( "unknown task type ret by assigner" );
+          }
        }
     }
    
-   coreNum = slot->coreSlotIsOn;
-   slotNum = slot->slotIdx;
- 
-      //first try to get a ready slave
-   returnSlv = getReadySlave();
+ NoWork:
+      //No work, if reach here..
+      //no task, so prune the recycle pool of free task slaves
+   freeTaskSlv = readPrivQ( process->freeTaskSlvRecycleQ );
+   if( freeTaskSlv != NULL )
+    {    //delete, so that bound the num extras, and deliver shutdown cond
+      deleteExtraneousFreeTaskSlv( freeTaskSlv, process );
+         //then return NULL
+      returnSlv = NULL;
+         
+      goto ReturnTheSlv;
+    }
+   else
+    { //candidate for shutdown.. all extras dissipated, and no tasks
+      // and no ready to resume slaves, so no way to generate
+      // more work (on this core -- other core might have work still)
+      if( process->numLiveFreeTaskSlvs == 0 && 
+          process->numLiveGenericSlvs == 0 )
+       { //This core sees no way to generate more tasks, so say it
+         if( process->coreIsDone[coreNum] == FALSE )
+          { process->numCoresDone += 1;
+            process->coreIsDone[coreNum] = TRUE;
+            #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
+            process->shutdownInitiated = TRUE;
+            
+            #else
+            if( process->numCoresDone == NUM_CORES )
+             { //means no cores have work, and none can generate more
+               process->shutdownInitiated = TRUE;
+             }
+            #endif
+          }
+       }
+         //check if shutdown has been initiated by this or other core
+      if( process->shutdownInitiated )
+       { returnSlv = PR_SS__create_shutdown_slave();
+       }
+      else
+         returnSlv = NULL;
 
-   if( returnSlv != NULL )
-    { returnSlv->coreAnimatedBy   = coreNum;
-    
-         //have work, so reset Done flag (when work generated on other core)
-      if( processEnv->coreIsDone[coreNum] == TRUE ) //reads are higher perf
-         processEnv->coreIsDone[coreNum] = FALSE;   //don't just write always
+      goto ReturnTheSlv;
+    } //if( freeTaskSlv != NULL )
+
+
+ AssignSlave:
+    {    //get slave pointed to by meta task.
+      returnSlv = assignedMetaTask->slaveAssignedTo;
+
+      returnSlv->coreAnimatedBy   = coreNum;
     
       goto ReturnTheSlv;
     }
-   
-      //were no slaves, so try to get a ready task.. 
-   newTaskStub = getTaskStub();
-   
-   if( newTaskStub != NULL )
+ 
+ AssignNewTask:
     { 
          //get the slot slave to assign the task to..
-      returnSlv = processEnv->slotTaskSlvs[coreNum][slotNum];
+      coreNum = slot->coreSlotIsOn;
+      slotNum = slot->slotIdx;
+      returnSlv = process->slotTaskSlvs[coreNum][slotNum];
 
          //point slave to task's function, and mark slave as having task
       PR_int__reset_slaveVP_to_TopLvlFn( returnSlv, 
-                          newTaskStub->taskType->fn, newTaskStub->args );
-      returnSlv->taskStub          = newTaskStub;
-      newTaskStub->slaveAssignedTo = returnSlv;
+                       assignedMetaTask->topLevelFn, assignedMetaTask->initData );
+      returnSlv->metaTask          = assignedMetaTask;
+      assignedMetaTask->slaveAssignedTo = returnSlv;
       returnSlv->needsTaskAssigned = FALSE;  //slot slave is a "Task" slave type
       
          //have work, so reset Done flag, if was set
-      if( processEnv->coreIsDone[coreNum] == TRUE ) //reads are higher perf
-         processEnv->coreIsDone[coreNum] = FALSE;   //don't just write always
+      if( process->coreIsDone[coreNum] == TRUE ) //reads are higher perf
+         process->coreIsDone[coreNum] = FALSE;   //don't just write always
       
       goto ReturnTheSlv;
     }
-   else
-    {    //no task, so prune the recycle pool of free task slaves
-      freeTaskSlv = readPrivQ( processEnv->freeTaskSlvRecycleQ );
-      if( freeTaskSlv != NULL )
-       {    //delete to bound the num extras, and deliver shutdown cond
-         handleDissipate( freeTaskSlv, processEnv );
-            //then return NULL
-         returnSlv = NULL;
-         
-         goto ReturnTheSlv;
-       }
-      else
-       { //candidate for shutdown.. if all extras dissipated, and no tasks
-         // and no ready to resume slaves, then no way to generate
-         // more tasks (on this core -- other core might have task still)
-         if( processEnv->numLiveExtraTaskSlvs == 0 && 
-             processEnv->numLiveThreadSlvs == 0 )
-          { //This core sees no way to generate more tasks, so say it
-            if( processEnv->coreIsDone[coreNum] == FALSE )
-             { processEnv->numCoresDone += 1;
-               processEnv->coreIsDone[coreNum] = TRUE;
-               #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
-               processEnv->shutdownInitiated = TRUE;
-               
-               #else
-               if( processEnv->numCoresDone == NUM_CORES )
-                { //means no cores have work, and none can generate more
-                  processEnv->shutdownInitiated = TRUE;
-                }
-               #endif
-             }
-          }
-            //check if shutdown has been initiated by this or other core
-         if(processEnv->shutdownInitiated) 
-          { returnSlv = PR_SS__create_shutdown_slave();
-          }
-         else
-            returnSlv = NULL;
-
-         goto ReturnTheSlv; //don't need, but completes pattern
-       } //if( freeTaskSlv != NULL )
-    } //if( newTaskStub == NULL )
-   //outcome: 1)slave was just pointed to task, 2)no tasks, so slave NULL
  
 
  ReturnTheSlv:  //All paths goto here.. to provide single point for holistic..
 
    #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
    if( returnSlv == NULL )
-    { returnSlv = processEnv->idleSlv[coreNum][slotNum]; 
+    { returnSlv = process->idleSlv[coreNum][slotNum]; 
     
          //things that would normally happen in resume(), but idle VPs
          // never go there
-      returnSlv->assignCount++; //gives each idle unit a unique ID
+      returnSlv->numTimesAssignedToASlot++; //gives each idle unit a unique ID
       Unit newU;
       newU.vp = returnSlv->slaveID;
-      newU.task = returnSlv->assignCount;
-      addToListOfArrays(Unit,newU,processEnv->unitList);
+      newU.task = returnSlv->numTimesAssignedToASlot;
+      addToListOfArrays(Unit,newU,process->unitList);
 
-      if (returnSlv->assignCount > 1) //make a dependency from prev idle unit
+      if (returnSlv->numTimesAssignedToASlot > 1) //make a dependency from prev idle unit
        { Dependency newD;             // to this one
          newD.from_vp = returnSlv->slaveID;
-         newD.from_task = returnSlv->assignCount - 1;
+         newD.from_task = returnSlv->numTimesAssignedToASlot - 1;
          newD.to_vp = returnSlv->slaveID;
-         newD.to_task = returnSlv->assignCount;
-         addToListOfArrays(Dependency, newD ,processEnv->ctlDependenciesList);  
+         newD.to_task = returnSlv->numTimesAssignedToASlot;
+         addToListOfArrays(Dependency, newD ,process->ctlDependenciesList);  
        }
     }
    else //have a slave will be assigned to the slot
     { //assignSlv->numTimesAssigned++;
          //get previous occupant of the slot
       Unit prev_in_slot = 
-         processEnv->last_in_slot[coreNum * NUM_ANIM_SLOTS + slotNum];
+         process->last_in_slot[coreNum * NUM_ANIM_SLOTS + slotNum];
       if(prev_in_slot.vp != 0) //if not first slave in slot, make dependency
        { Dependency newD;      // is a hardware dependency
          newD.from_vp = prev_in_slot.vp;
          newD.from_task = prev_in_slot.task;
          newD.to_vp = returnSlv->slaveID;
-         newD.to_task = returnSlv->assignCount;
-         addToListOfArrays(Dependency,newD,processEnv->hwArcs);   
+         newD.to_task = returnSlv->numTimesAssignedToASlot;
+         addToListOfArrays(Dependency,newD,process->hwArcs);   
        }
       prev_in_slot.vp = returnSlv->slaveID; //make new slave the new previous
-      prev_in_slot.task = returnSlv->assignCount;
-      processEnv->last_in_slot[coreNum * NUM_ANIM_SLOTS + slotNum] =
+      prev_in_slot.task = returnSlv->numTimesAssignedToASlot;
+      process->last_in_slot[coreNum * NUM_ANIM_SLOTS + slotNum] =
          prev_in_slot;        
     }
    #endif
@@ -910,92 +1016,150 @@
    return( returnSlv );
  }
 
-      
-//=================================================================
-         //#else  //is MODE__MULTI_LANG
-            //For multi-lang mode, first, get the constraint-env holder out of
-            // the process, which is in the slave.
-            //Second, get the magic number out of the request, use it to look up
-            // the constraint Env within the constraint-env holder.
-            //Then get the request handler out of the constr env
-         constrEnvHolder = slave->process->constrEnvHolder;
-         reqst = slave->request;
-         langMagicNumber = reqst->langMagicNumber;
-         semanticEnv = lookup( langMagicNumber, constrEnvHolder ); //a macro
-         if( slave->reqst->type == taskEnd ) //end-task is special
-          {    //need to know what lang's task ended
-            taskEndHandler = semanticEnv->taskEndHandler;
-            (*taskEndHandler)( slave, reqst, semanticEnv ); //can put semantic data into task end reqst, for continuation, etc
-               //this is a slot slave, get a new task for it
-            if( !existsOverrideAssigner )//if exists, is set above, before loop
-             {    //search for task assigner that has work
-               for( a = 0; a < num_assigners; a++ )
-                { if( taskAssigners[a]->hasWork )
-                   { newTaskAssigner = taskAssigners[a];
-                     (*newTaskAssigner)( slave, semanticEnv );
-                     goto GotTask;
-                   }
-                }
-               goto NoTasks;
-             }
-            
-           GotTask:
-            continue; //have work, so do next iter of loop, don't call slave assigner
-          }
-         if( slave->typeOfVP == taskSlotSlv ) changeSlvType();//is suspended task
-            //now do normal suspended slave request handler
-         requestHandler = semanticEnv->requestHandler;
-         //#endif
 
-         
-       }
-         //If make it here, then was no task for this slot
-         //slot empty, hand to Assigner to fill with a slave
-      if( currSlot->needsSlaveAssigned )
-       {    //Call plugin's Assigner to give slot a new slave
-               HOLISTIC__Record_Assigner_start;
-               
-         //#ifdef  MODE__MULTI_LANG
-        NoTasks:
-            //First, choose an Assigner..
-            //There are several Assigners, one for each langlet.. they all
-            // indicate whether they have work available.. just pick the first
-            // one that has work..  Or, if there's a Unified Assigner, call
-            // that one..  So, go down array, checking..
-         if( !existsOverrideAssigner ) 
-          { for( a = 0; a < num_assigners; a++ )
-             { if( assigners[a]->hasWork )
-                { slaveAssigner = assigners[a];
-                  goto GotAssigner;
-                }
-             }
-            //no work, so just continue to next iter of scan loop
-            continue;
-          }
-         //when exists override, the assigner is set, once, above, so do nothing
-        GotAssigner:
-         //#endif
-        
-         assignedSlaveVP =
-          (*slaveAssigner)( semanticEnv, currSlot );
-         
-            //put the chosen slave into slot, and adjust flags and state
-         if( assignedSlaveVP != NULL )
-          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
-            assignedSlaveVP->animSlotAssignedTo = currSlot;
-            currSlot->needsSlaveAssigned  = FALSE;
-            numSlotsFilled               += 1;
-            
-            HOLISTIC__Record_Assigner_end;
-          }
-       }//if slot needs slave assigned
-    }//for( slotIdx..
-
-         MEAS__Capture_Post_Master_Point;
+/*In creator, only PR related things happen, and things in the langlet whose
+ * creator construct was used.
+ *Other langlet still gets a chance to create semData -- but by registering a
+ * "createSemData" handler in the semEnv.  When a construct  of the langlet
+ * calls "PR__give_sem_data()", if there is no semData for that langlet,
+ * the PR will call the creator in the langlet's semEnv, place whatever it
+ * makes as the semData in that slave for that langlet, and return that semData
+ *
+ *So, as far as counting things, a langlet is only allowed to count creation
+ * of slaves it creates itself..  may have to change this later.. add a way for
+ * langlet to register a trigger Fn called each time a slave gets created.. 
+ * need more experience with what langlets will do at create time..  think Cilk
+ * has interesting create behavior..  not sure how that will differ in light
+ * of true tasks and langlet approach.  Look at it after all done and start
+ * modifying the langs to be langlets..
+ * 
+ *PR itself needs to create the slave, then update numLiveSlaves in process,
+ * copy processID from requestor to newly created
+ */
+PRHandle_CreateSlave( PRReqst *req, SlaveVP *requestingSlv )
+ { SlaveVP *newSlv;
+   PRMetaTask metaTask;
+   PRProcess *process;
+ 
+   process = requestingSlv->processSlaveIsIn;
+   newSlv = PR_int__create_slaveVP();
+   newSlv->typeOfVP = GenericSlv;
+   newSlv->processSlaveIsIn = process;
+   process->numLiveGenericSlaves += 1;
+   metaTask = PR_int__create_slave_meta_task();
+   metaTask->taskID = req->ID;
+   metaTask->taskType = GenericSlave;
    
-   masterSwitchToCoreCtlr( masterVP );
-   flushRegisters();
-         DEBUG__printf(FALSE,"came back after switch to core -- so lock released!");
-   }//while(1) 
+   (*req->handler)(newSlv);
  }
 
+/*The dissipate handler has to update the number of slaves of the type, within
+ * the process, and call the langlet handler linked into the request,
+ * and after that returns, then call the PR function that frees the slave state
+ * (or recycles the slave).
+ * 
+ *The PR function that frees the slave state has to also free all of the
+ * semData in the slave..  or else reset all of the semDatas.. by, say, marking
+ * them, then in PR__give_semData( magicNum ) call the langlet registered
+ * "resetSemData" Fn.
+ */
+PRHandle_Dissipate( SlaveVP *slave )
+ { PRProcess *process;
+   void      *semEnv;
+   
+   process = slave->processSlaveIsIn;
+   
+      //do the language's dissipate handler
+   semEnv = PR_int__give_sem_env_for( slave, slave->request->langMagicNumber );
+   (*slave->request->handler)( slave, semEnv );
+   
+   process->numLiveGenericSlaves -= 1;
+   PR_int__dissipate_slaveVP_multilang( slave ); //recycles and resets semDatas
+   
+      //check End Of Process Condition
+   if( process->numLiveTasks == 0 &&
+       process->numLiveGenericSlaves == 0 )
+      signalEndOfProcess;
+ }
+
+/*Create task is a special form, that has PR behavior in addition to plugin
+ * behavior.  Master calls this first, and this in turn calls the plugin's
+ * create task handler.
+ */
+inline void
+PRHandle_CreateTask( TopLevelFn topLevelFn, void *initData, PRReqst *req, 
+                                                        SlaveVP *requestingSlv )
+ { PRMetaTask    *metaTask;
+   PRProcess     *process;
+   void          *semEnv, _langMetaTask;
+   PRLangMetaTask *langMetaTask;
+                    
+   process = requestingSlv->processSlaveIsIn;
+
+   metaTask         = PR_int__create_meta_task( req );
+   metaTask->taskID = req->ID; //may be NULL
+   metaTask->topLevelFn = topLevelFn;
+   metaTask->initData   = initData;
+           
+   process->numLiveTasks += 1;
+      
+      //plugin tracks tasks ready, and has its own assigner, so task doesn't
+      // come back from lang's handler -- it's consumed and stays in semEnv.
+      //But handler gives back the language-specific meta-task it creates, and
+      // then hook that into the PR meta-task
+      //(Could also do PRMetaTask as a prolog -- make a Fn that takes the size
+      // of the lang's metaTask, and alloc's that plus the prolog and returns
+      // ptr to position just above the prolog)
+   semEnv = PR_int__give_semEnv_of_req( req, requestingSlv ); //magic num in req
+   _langMetaTask = (*requestingSlv->request->handler)(req, semEnv);
+   langMetaTask  = (PRLangMetaTask *)_langMetaTask;
+   metaTask->langMetaTask      = langMetaTask;
+   langMetaTask->protoMetaTask = metaTask;
+   
+   return;
+ }
+
+/*When a task ends, are two scenarios: 1) task ran to completion, or 2) task
+ * suspended at some point in its code.
+ *For 1, just decr count of live tasks (and check for end condition) -- the
+ * master loop will decide what goes into the slot freed up by this task end,
+ * so, here, don't worry about assigning a new task to the slot slave.
+ *For 2, the task's slot slave has been converted to a free task slave, which
+ * now has nothing more to do, so send it to the recycle Q (which includes
+ * freeing all the semData and meta task structs alloc'd for it).  Then
+ * decrement the live task count and check end condition.
+ * 
+ *PR has to update count of live tasks, and check end of process condition.
+ * There are constructs that wait for a process to end, so when end detected,
+ * have to resume what's waiting..
+ *Thing is, the wait is used in "main", so it's an OS thread.  That means
+ * PR internals have to do OS thread signaling.  Want to do that in the
+ * core controller, which has the original stack of an OS thread.
+ * 
+ *So here, when detect process end, signal to the core controller, which will
+ * then do the condition variable notify to the OS thread that's waiting. 
+ */
+inline void
+PRHandle_EndTask( SlaveVP *requestingSlv )
+ { void *semEnv;
+   PRReqst *req;  
+   PRMetaTask *metaTask;
+   PRProcess  *process;
+ 
+   req = requestingSlv->request;
+   semEnv = PR_int__give_semEnv_of_req( req, requestingSlv ); //magic num in req
+   metaTask = req->metaTask;
+      //Want to keep PRMetaTask hidden from plugin, so extract semReq..
+   (*req->handler)( metaTask, req->semReq, semEnv );
+   
+   recycleFreeTaskSlave( requestingSlv );
+   
+   process->numLiveTasks -= 1;
+  
+      //check End Of Process Condition
+   if( process->numLiveTasks == 0 &&
+       process->numLiveGenericSlaves == 0 )
+      signalEndOfProcessToCoreCtlr;
+ }
+
+ 
\ No newline at end of file
diff -r 999f2966a3e5 -r dafae55597ce CoreController.c
--- a/CoreController.c	Wed Sep 19 23:12:44 2012 -0700
+++ b/CoreController.c	Tue Oct 23 23:46:17 2012 -0700
@@ -92,13 +92,13 @@
    thisCoresIdx = thisCoresThdParams->coreNum;
 
       //Assembly that saves addr of label of return instr -- label in assmbly
-   recordCoreCtlrReturnLabelAddr((void**)&(_PRMasterEnv->coreCtlrReturnPt));
+   recordCoreCtlrReturnLabelAddr((void**)&(_PRTopEnv->coreCtlrReturnPt));
 
-   animSlots = _PRMasterEnv->allAnimSlots[thisCoresIdx];
+   animSlots = _PRTopEnv->allAnimSlots[thisCoresIdx];
    currSlotIdx = 0; //start at slot 0, go up until one empty, then do master
    numRepetitionsWithNoWork = 0;
-   addrOfMasterLock = &(_PRMasterEnv->masterLock);
-   thisCoresMasterVP = _PRMasterEnv->masterVPs[thisCoresIdx];
+   addrOfMasterLock = &(_PRTopEnv->masterLock);
+   thisCoresMasterVP = _PRTopEnv->masterVPs[thisCoresIdx];
    
    //==================== pthread related stuff ======================
       //pin the pthread to the core -- takes away Linux control
@@ -113,7 +113,7 @@
 
       //make sure the controllers all start at same time, by making them wait
    pthread_mutex_lock(  &suspendLock );
-   while( !(_PRMasterEnv->setupComplete) )
+   while( !(_PRTopEnv->setupComplete) )
     { pthread_cond_wait( &suspendCond, &suspendLock );
     }
    pthread_mutex_unlock( &suspendLock );
@@ -225,11 +225,11 @@
 inline uint32_t
 randomNumber()
  {
-	_PRMasterEnv->seed1 = (uint32)(36969 * (_PRMasterEnv->seed1 & 65535) + 
-                                   (_PRMasterEnv->seed1 >> 16) );
-	_PRMasterEnv->seed2 = (uint32)(18000 * (_PRMasterEnv->seed2 & 65535) + 
-                                   (_PRMasterEnv->seed2 >> 16) );
-	return (_PRMasterEnv->seed1 << 16) + _PRMasterEnv->seed2;
+	_PRTopEnv->seed1 = (uint32)(36969 * (_PRTopEnv->seed1 & 65535) + 
+                                   (_PRTopEnv->seed1 >> 16) );
+	_PRTopEnv->seed2 = (uint32)(18000 * (_PRTopEnv->seed2 & 65535) + 
+                                   (_PRTopEnv->seed2 >> 16) );
+	return (_PRTopEnv->seed1 << 16) + _PRTopEnv->seed2;
  }
 
 
@@ -292,14 +292,14 @@
    
    //===============  Initializations ===================
    thisCoresIdx = 0; //sequential version
-   animSlots = _PRMasterEnv->allAnimSlots[thisCoresIdx];
+   animSlots = _PRTopEnv->allAnimSlots[thisCoresIdx];
    currSlotIdx = 0; //start at slot 0, go up until one empty, then do master
    numRepetitionsWithNoWork = 0;
-   addrOfMasterLock = &(_PRMasterEnv->masterLock);
-   thisCoresMasterVP = _PRMasterEnv->masterVPs[thisCoresIdx];
+   addrOfMasterLock = &(_PRTopEnv->masterLock);
+   thisCoresMasterVP = _PRTopEnv->masterVPs[thisCoresIdx];
    
       //Assembly that saves addr of label of return instr -- label in assmbly
-   recordCoreCtlrReturnLabelAddr((void**)&(_PRMasterEnv->coreCtlrReturnPt));
+   recordCoreCtlrReturnLabelAddr((void**)&(_PRTopEnv->coreCtlrReturnPt));
 
    
    //====================== The Core Controller ======================
diff -r 999f2966a3e5 -r dafae55597ce Defines/MEAS__macros_to_be_moved_to_langs.h
--- a/Defines/MEAS__macros_to_be_moved_to_langs.h	Wed Sep 19 23:12:44 2012 -0700
+++ b/Defines/MEAS__macros_to_be_moved_to_langs.h	Tue Oct 23 23:46:17 2012 -0700
@@ -26,8 +26,8 @@
 #define syncHistIdx       2
 
 #define MEAS__Make_Meas_Hists_for_Language() \
-   _PRMasterEnv->measHistsInfo = \
-          makePrivDynArrayOfSize( (void***)&(_PRMasterEnv->measHists), 200); \
+   _PRTopEnv->measHistsInfo = \
+          makePrivDynArrayOfSize( (void***)&(_PRTopEnv->measHists), 200); \
     makeAMeasHist( spawnHistIdx,      "Spawn",        50, 0, 200 ) \
     makeAMeasHist( syncHistIdx,       "Sync",         50, 0, 200 )
 
@@ -39,7 +39,7 @@
 #define Meas_endSpawn \
     saveLowTimeStampCountInto( endStamp ); \
     addIntervalToHist( startStamp, endStamp, \
-                             _PRMasterEnv->measHists[ spawnHistIdx ] );
+                             _PRTopEnv->measHists[ spawnHistIdx ] );
 
 #define Meas_startSync \
     int32 startStamp, endStamp; \
@@ -48,7 +48,7 @@
 #define Meas_endSync \
     saveLowTimeStampCountInto( endStamp ); \
     addIntervalToHist( startStamp, endStamp, \
-                             _PRMasterEnv->measHists[ syncHistIdx ] );
+                             _PRTopEnv->measHists[ syncHistIdx ] );
 #endif
 
 //===========================================================================
diff -r 999f2966a3e5 -r dafae55597ce Defines/PR_defs__HW_constants.h
--- a/Defines/PR_defs__HW_constants.h	Wed Sep 19 23:12:44 2012 -0700
+++ b/Defines/PR_defs__HW_constants.h	Tue Oct 23 23:46:17 2012 -0700
@@ -16,10 +16,14 @@
    // machine
 #define NUM_CORES        4
 
-   // tradeoff amortizing master fixed overhead vs imbalance potential
+   //tradeoff amortizing master fixed overhead vs imbalance potential
    // when work-stealing, can make bigger, at risk of losing cache affinity
 #define NUM_ANIM_SLOTS  1
 
+   //number of PRSemEnv structs created inside a process -- can't start more
+   // than this many langlets inside a single process
+#define NUM_SEM_ENVS_IN_PROCESS 64
+
    //These are for backoff inside core-loop, which reduces lock contention
 #define NUM_REPS_W_NO_WORK_BEFORE_YIELD      10
 #define NUM_REPS_W_NO_WORK_BEFORE_BACKOFF    2
@@ -36,6 +40,8 @@
    //Frequency of TS counts -- have to do tests to verify
    //NOTE: turn off (in BIOS)  TURBO-BOOST and SPEED-STEP else won't be const
 #define TSCOUNT_FREQ 3180000000
+#define TSC_LOW_CYCLES 27
+#define TSC_LOWHI_CYCLES 45
 
 #define CACHE_LINE_SZ  256
 #define PAGE_SIZE     4096
diff -r 999f2966a3e5 -r dafae55597ce HW_Dependent_Primitives/PR__HW_measurement.c
--- a/HW_Dependent_Primitives/PR__HW_measurement.c	Wed Sep 19 23:12:44 2012 -0700
+++ b/HW_Dependent_Primitives/PR__HW_measurement.c	Tue Oct 23 23:46:17 2012 -0700
@@ -27,25 +27,25 @@
     {
        hw_event.type = PERF_TYPE_HARDWARE;	
        hw_event.config = PERF_COUNT_HW_CPU_CYCLES; //cycles
-        _PRMasterEnv->cycles_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event,
+        _PRTopEnv->cycles_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event,
  		0,//pid_t pid, 
 		coreIdx,//int cpu, 
 		-1,//int group_fd,
 		0//unsigned long flags
 	);
-        if (_PRMasterEnv->cycles_counter_fd[coreIdx]<0){
+        if (_PRTopEnv->cycles_counter_fd[coreIdx]<0){
             fprintf(stderr,"On core %d: ",coreIdx);
             perror("Failed to open cycles counter");
         }
         hw_event.type = PERF_TYPE_HARDWARE;
         hw_event.config = PERF_COUNT_HW_INSTRUCTIONS; //instrs
-        _PRMasterEnv->instrs_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event,
+        _PRTopEnv->instrs_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event,
  		0,//pid_t pid, 
 		coreIdx,//int cpu, 
 		-1,//int group_fd,
 		0//unsigned long flags
 	);
-        if (_PRMasterEnv->instrs_counter_fd[coreIdx]<0){
+        if (_PRTopEnv->instrs_counter_fd[coreIdx]<0){
             fprintf(stderr,"On core %d: ",coreIdx);
             perror("Failed to open instrs counter");
         }
@@ -53,13 +53,13 @@
         hw_event.config = PERF_COUNT_HW_CACHE_L1D <<  0  |
 	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16); //cache misses
-        _PRMasterEnv->cachem_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event,
+        _PRTopEnv->cachem_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event,
  		0,//pid_t pid, 
 		coreIdx,//int cpu, 
 		-1,//int group_fd,
 		0//unsigned long flags
 	);
-        if (_PRMasterEnv->cachem_counter_fd[coreIdx]<0){
+        if (_PRTopEnv->cachem_counter_fd[coreIdx]<0){
             fprintf(stderr,"On core %d: ",coreIdx);
             perror("Failed to open cache miss counter");
             exit(1);
diff -r 999f2966a3e5 -r dafae55597ce HW_Dependent_Primitives/PR__primitives_asm.s
--- a/HW_Dependent_Primitives/PR__primitives_asm.s	Wed Sep 19 23:12:44 2012 -0700
+++ b/HW_Dependent_Primitives/PR__primitives_asm.s	Tue Oct 23 23:46:17 2012 -0700
@@ -51,7 +51,7 @@
  * 0x18 coreCtlrFramePtr
  * 0x20 coreCtlrStackPtr
  *
- * _PRMasterEnv  offsets:
+ * _PRTopEnv  offsets:
  * 0x00 coreCtlrReturnPt
  * 0x100 masterLock
  */
@@ -76,7 +76,7 @@
  * 0x18 coreCtlrFramePtr
  * 0x20 coreCtlrStackPtr
  *
- * _PRMasterEnv  offsets:
+ * _PRTopEnv  offsets:
  * 0x00 coreCtlrReturnPt
  * 0x100 masterLock
  */
@@ -88,8 +88,8 @@
     movq    %rbp      , 0x08(%rdi)   #save frame pointer
     movq    0x20(%rdi), %rsp         #restore stack pointer
     movq    0x18(%rdi), %rbp         #restore frame pointer
-    movq    $_PRMasterEnv, %rcx
-    movq        (%rcx), %rcx         #_PRMasterEnv is pointer to struct
+    movq    $_PRTopEnv, %rcx
+    movq        (%rcx), %rcx         #_PRTopEnv is pointer to struct
     movq    0x00(%rcx), %rax         #get CoreCtlrStartPt
     jmp     *%rax                    #jmp to CoreCtlr
 SlvReturn:
@@ -106,7 +106,7 @@
  * 0x18 coreCtlrFramePtr
  * 0x20 coreCtlrStackPtr
  *
- * _PRMasterEnv  offsets:
+ * _PRTopEnv  offsets:
  * 0x00 coreCtlrReturnPt
  * 0x100 masterLock
  */
@@ -118,8 +118,8 @@
     movq    %rbp      , 0x08(%rdi)   #save frame pointer
     movq    0x20(%rdi), %rsp         #restore stack pointer
     movq    0x18(%rdi), %rbp         #restore frame pointer
-    movq    $_PRMasterEnv, %rcx
-    movq        (%rcx), %rcx         #_PRMasterEnv is pointer to struct
+    movq    $_PRTopEnv, %rcx
+    movq        (%rcx), %rcx         #_PRTopEnv is pointer to struct
     movq    0x00(%rcx), %rax         #get CoreCtlr return pt
     movl    $0x0      , 0x100(%rcx)  #release lock
     jmp     *%rax                    #jmp to CoreCtlr
@@ -142,7 +142,7 @@
  * 0x18 coreCtlrFramePtr
  * 0x20 coreCtlrStackPtr
  *
- * _PRMasterEnv  offsets:
+ * _PRTopEnv  offsets:
  * 0x00 coreCtlrReturnPt
  * 0x100 masterLock
  */
diff -r 999f2966a3e5 -r dafae55597ce PR.h
--- a/PR.h	Wed Sep 19 23:12:44 2012 -0700
+++ b/PR.h	Tue Oct 23 23:46:17 2012 -0700
@@ -32,15 +32,17 @@
 //
 typedef unsigned long long    TSCount;
 
-typedef struct _AnimSlot     AnimSlot;
-typedef struct _PRReqst      PRReqst;
+typedef struct _AnimSlot      AnimSlot;
+typedef struct _PRReqst       PRReqst;
 typedef struct _SlaveVP       SlaveVP;
 typedef struct _MasterVP      MasterVP;
 typedef struct _IntervalProbe IntervalProbe;
+typedef struct _PRMetaTask    PRMetaTask;
 
 
 typedef SlaveVP *(*SlaveAssigner)  ( void *, AnimSlot*); //semEnv, slot for HW info
 typedef void     (*RequestHandler) ( SlaveVP *, void * ); //prWReqst, semEnv
+typedef void     (*IndivReqHandler)( SlaveVP *, void * ); //prWReqst, semEnv
 typedef void     (*TopLevelFnPtr)  ( void *, SlaveVP * ); //initData, animSlv
 typedef void       TopLevelFn      ( void *, SlaveVP * ); //initData, animSlv
 typedef void     (*ResumeSlvFnPtr) ( SlaveVP *, void * );
@@ -57,25 +59,41 @@
 //============= Request Related ===========
 //
 
-enum PRReqstType   //avoid starting enums at 0, for debug reasons
+enum PRReqstType  //avoid starting enums at 0, for debug reasons
  {
-   semantic = 1,
-   createReq,
-   dissipate,
-   PRSemantic      //goes with PRSemReqst below
+   TaskCreate = 1,
+   TaskEnd,
+   SlvCreate,
+   SlvDissipate,
+   Language,
+   Service,       //To invoke a PR provided equivalent of a language request (ex: probe)
+   Hardware,
+   IO,
+   OSCall
  };
 
 struct _PRReqst
  {
-   enum PRReqstType  reqType;//used for dissipate and in future for IO requests
-   void              *semReqData;
-
+   enum PRReqstType   reqType;//used for special forms that have PR behavior
+   void              *semReq;
+   PRProcess         *processReqIsIn;
+   int32              langMagicNumber;
+   PRMetaTask        *metaTask;
+   TopLevelFn         topLevelFn;
+   void              *initData;
+   int32             *ID;
+   
+      //The request handling structure is a bit messy..  for special forms, 
+      // such as create and dissipate, the language inserts pointer to handler
+      // fn directly into the request..  might change to this for all requests
+   IndivReqHandler    handler; //pointer to handler fn for create, dissip, etc
+   
    PRReqst *nextReqst;
  };
 //PRReqst
 
-enum PRSemReqstType   //These are equivalent to semantic requests, but for
- {                     // PR's services available directly to app, like OS
+enum PRServReqType   //These are equivalent to semantic requests, but for
+ {                    // PR's services available directly to app, like OS
    make_probe = 1,    // and probe services -- like a PR-wide built-in lang
    throw_excp,
    openFile,
@@ -83,13 +101,13 @@
  };
 
 typedef struct
- { enum PRSemReqstType reqType;
+ { enum PRServReqType   reqType;
    SlaveVP             *requestingSlv;
    char                *nameStr;  //for create probe
    char                *msgStr;   //for exception
    void                *exceptionData;
  }
- PRSemReq;
+PRServReq;
 
 
 //====================  Core data structures  ===================
@@ -114,9 +132,8 @@
 
 enum VPtype 
  { TaskSlotSlv = 1,//Slave tied to an anim slot, only animates tasks
-   TaskExtraSlv,   //When a suspended task ends, the slave becomes this
-   PersistentSlv,  //the VP is explicitly seen in the app code, or task suspends
-   Slave, //to be removed
+   TaskFreeSlv,   //When a suspended task ends, the slave becomes this
+   GenericSlv,     //the VP is explicitly seen in the app code, or task suspends
    Master,
    Shutdown,
    Idle
@@ -135,22 +152,27 @@
    
       //============ below this, no fields are used in asm =============
    
+   void       *startOfStack;  //used to free, and to point slave to Fn
+   PRProcess  *processSlaveIsIn;
+   PRMetaTask *metaTask;
+   enum VPtype typeOfVP;      //Slave vs Master vs Shutdown..
    int         slaveID;       //each slave given a globally unique ID
    int         coreAnimatedBy; 
-   void       *startOfStack;  //used to free, and to point slave to Fn
-   enum VPtype typeOfVP;      //Slave vs Master vs Shutdown..
-   int         assignCount;   //Each assign is for one work-unit, so IDs it
+   int         numTimesAssignedToASlot;   //Each assign is for one work-unit, so is an ID
       //note, a scheduling decision is uniquely identified by the triple:
-      // <slaveID, coreAnimatedBy, assignCount> -- used in record & replay
+      // <slaveID, coreAnimatedBy, numTimesAssignedToASlot> -- used in record & replay
    
       //for comm -- between master and coreCtlr & btwn wrapper lib and plugin
    AnimSlot   *animSlotAssignedTo;
-   PRReqst   *request;      //wrapper lib puts in requests, plugin takes out
+   PRReqst    *request;      //wrapper lib puts in requests, plugin takes out
    void       *dataRetFromReq;//Return vals from plugin to Wrapper Lib
 
-      //For using Slave as carrier for data
+      //For language specific data that needs to be in the slave
    void       *semanticData;  //Lang saves lang-specific things in slave here
 
+      //Task related stuff
+   bool        needsTaskAssigned;
+   
         //=========== MEASUREMENT STUFF ==========
          MEAS__Insert_Meas_Fields_into_Slave;
          float64     createPtInSecs;  //time VP created, in seconds
@@ -172,14 +194,12 @@
       //Basic PR infrastructure
    SlaveVP        **masterVPs;
    AnimSlot      ***allAnimSlots;
+ 
+   PRProcess      **processes;
    
-      //plugin related
-   PRSemEnv       **langlets;
-   
-      //Slave creation -- global count of slaves existing, across langs and processes
+//move to processEnv      //Slave creation -- global count of slaves existing, across langs and processes
    int32            numSlavesCreated;  //used to give unique ID to processor
-//no reasonable way to do fail-safe when have mult langlets and processes.. have to detect for each langlet separately
-//   int32            numSlavesAlive;    //used to detect fail-safe shutdown
+   int32            numTasksCreated;   //to give unique ID to a task
 
       //Initialization related
    int32            setupComplete;      //use while starting up coreCtlr
@@ -192,14 +212,24 @@
    uint32_t seed1;
    uint32_t seed2;
 
+   These_Prob_belong_in_PRPRocess;
+//   SlaveVP         *slotTaskSlvs[NUM_CORES][NUM_ANIM_SLOTS];
+//   int32            numLiveFreeTaskSlvs;
+//   int32            numLiveThreadSlvs;
+//   bool32          *coreIsDone;
+//   int32            numCoresDone;
+   
+//   SlaveVP* idleSlv[NUM_CORES][NUM_ANIM_SLOTS];
+//   int shutdownInitiated;
+   
       //=========== MEASUREMENT STUFF =============
        IntervalProbe   **intervalProbes;
-       PtrToPrivDynArray *dynIntervalProbesInfo;
+       PrivDynArrayInfo *dynIntervalProbesInfo;
        HashTable        *probeNameHashTbl;
        int32             masterCreateProbeID;
        float64           createPtInSecs; //real-clock time PR initialized
        Histogram       **measHists;
-       PtrToPrivDynArray *measHistsInfo;
+       PrivDynArrayInfo *measHistsInfo;
        MEAS__Insert_Susp_Meas_Fields_into_MasterEnv;
        MEAS__Insert_Master_Meas_Fields_into_MasterEnv;
        MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv;
@@ -213,45 +243,112 @@
 
 //=====================
 typedef struct
- { int32   langletID; //acts as index into array of langlets in master env
-   void   *langletSemEnv;
-   int32   langMagicNumber;
-   SlaveAssigner    slaveAssigner;
-   RequestHandler   requestHandler;
-   EndTaskHandler   endTaskHandler;
+ { int32     langMagicNumber; //indexes into hash array of semEnvs in PRProcess
+   PRSemEnv *chainedSemEnv;   //chains to semEnvs with same hash
+   void     *langSemEnv;
    
-      //Tack slaves created, separately for each langlet (in each process)
-   int32            numSlavesCreated;  //gives ordering to processor creation
-   int32            numSlavesAlive;    //used to detect fail-safe shutdown
+   SlaveAssigner   slaveAssigner;
+   RequestHandler  requestHdlr;
+   
+   RequestHandler  createTaskHdlr;
+   RequestHandler  endTaskHdlr;
+   RequestHandler  createSlaveHdlr;
+   RequestHandler  dissipateSlaveHdlr;
+   RequestHandler  semDataCreator;
+   RequestHandler  semDataInitializer;
+  
+   
+      //Track slaves created, separately for each langlet? (in each process)
+//   int32            numSlavesCreated;  //gives ordering to processor creation
+//   int32            numSlavesAlive;    //used to detect fail-safe shutdown
    
       //when multi-lang, master polls sem env's to find one with work in it..
       // in single-lang case, flag ignored, master always asks lang for work
-   int32   hasWork;    
+   int32   hasWork;
  }
 PRSemEnv;
 
-//=====================  Top Processor level Data Strucs  ======================
+//The semantic env of every langlet must start with these two fields, so that
+// PR can cast the void * to this struct, in order to access these two fields
 typedef struct
+ { int32     langMagicNumber;
+   PRSemEnv *protoSemEnv;
+ }
+PRLangSemEnv;
+
+//can cast any langlet's sem env to one of these, so PR can access values
+typedef struct
+ { int32     langMagicNumber;
+   PRSemEnv *protoSemEnv;
+ }
+PRServSemEnv;
+
+enum PRTaskType
+ { GenericSlave = 1,
+   AtomicTask,
+   SuspendedTask
+ };
+
+struct _PRMetaTask
  { 
-   
+   PRTaskType      taskType;
+   RequestHandler  reqHandler;      //Lang-specific hdlr for create, end, etc
+   int32          *taskID;          //is standard PR ID
+   SlaveVP        *slaveAssignedTo; //no valid until task animated
+   TopLevelFn      topLevelFn;      //This is the Fn executes as the task
+   void           *initData;        //The data taken by the function
+   void           *langMetaTask;
+
+   //NOTE: info needed for "wait" functionality is inside lang's metaTask
+ };
+//PRMetaTask
+
+/*The language's meta task is cast to this struct, inside PR, then the 
+ * back pointer to protoMetaTask is set.  Keeps existence of PRMetaTask hidden
+ * from plugin -- so can change later.
+ */
+typedef struct
+ { int32       langMagicNumber;
+   PRMetaTask *protoMetaTask;
  }
-PRProcess;
+PRLangMetaTask;
+ 
+typedef struct
+ {
+   void (*freeFn)(void *);
+ }
+PRSemDataTemplate;
+
+typedef struct
+ { PRSemDataTemplate **semDatas;
+   PRSemDataTemplate **semDatasIter;
+   int32               numSemDatas;
+ }
+PRSemDataHolder;
+//=====================  Top Process level Data Strucs  ======================
+
 /*This structure holds all the information PR needs to manage a program.  PR
  * stores information about what percent of CPU time the program is getting, 
  * 
  */
 typedef struct
- { //void               *semEnv;
-   //RequestHdlrFnPtr    requestHandler;
-   //SlaveAssignerFnPtr  slaveAssigner;
-   int32               numSlavesLive;
-   void               *resultToReturn;
+ { 
+   PRSemEnv semEnvs[NUM_SEM_ENVS_IN_PROCESS];    //used as a hash table
+   PRSemEnv semEnvList[NUM_SEM_ENVS_IN_PROCESS]; //lines up the semEnvs, so can iterate through
+   int32    numSemEnvs;     //must be less than num sem envs.. used to iterate through
+    
+   int32           numLiveGenericSlaves;
+   int32           numLiveFreeTaskSlaves;
+   int32           numLiveTasks;
+   bool32          coreIsDone[NUM_CORES][CACHE_LINE_SZ]; //Fixes false sharing
+   
+   void           *resultToReturn;
   
    SlaveVP        *seedSlv;   
    
-      //These are used to coordinate within the main function..?
+      //These are used to coord with OS thread waiting for process to end
    bool32          executionIsComplete;
-   pthread_mutex_t doneLock; //? not sure need these..?
+   pthread_mutex_t doneLock;
    pthread_cond_t  doneCond;
  }
 PRProcess;
@@ -280,7 +377,7 @@
 
 //=============================  Global Vars ================================
 
-volatile MasterEnv      *_PRMasterEnv __align_to_cacheline__;
+volatile MasterEnv      *_PRTopEnv __align_to_cacheline__;
 
    //these are global, but only used for startup and shutdown
 pthread_t       coreCtlrThdHandles[ NUM_CORES ]; //pthread's virt-procr state
@@ -315,9 +412,6 @@
 void
 PR__start();
 
-void
-PR_SS__start_the_work_then_wait_until_done();
-
 SlaveVP* 
 PR_SS__create_shutdown_slave();
 
@@ -328,8 +422,7 @@
 PR_SS__cleanup_at_end_of_shutdown();
 
 void
-PR_SS__register_langlets_semEnv( PRSemEnv *semEnv, int32 VSs_MAGIC_NUMBER, 
-                              SlaveVP  *seedVP );
+PR_SS__register_langlets_semEnv( PRSemEnv *semEnv, SlaveVP  *seedVP, int32 VSs_MAGIC_NUMBER );
 
 
 //==============    ===============
@@ -339,35 +432,45 @@
 #define PR_PI__create_slaveVP PR_int__create_slaveVP
 #define PR_WL__create_slaveVP PR_int__create_slaveVP
 
-   //Use this to create processor inside entry point & other places outside
-   // the PR system boundary (IE, don't animate with a SlaveVP or MasterVP)
+inline 
 SlaveVP *
-PR_ext__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam );
+PR_int__create_slot_slave();
 
-inline SlaveVP *
+inline 
+SlaveVP *
 PR_int__create_slaveVP_helper( SlaveVP *newSlv,       TopLevelFnPtr  fnPtr,
                                 void      *dataParam, void           *stackLocs );
 
-inline void
+inline
+PRMetaTask *
+PR_int__create_generic_slave_meta_task( void *initData );
+
+inline
+void
 PR_int__reset_slaveVP_to_TopLvlFn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr,
                               void    *dataParam);
 
-inline void
+inline
+void
 PR_int__point_slaveVP_to_OneParamFn( SlaveVP *slaveVP, void *fnPtr,
                               void    *param);
 
-inline void
+inline
+void
 PR_int__point_slaveVP_to_TwoParamFn( SlaveVP *slaveVP, void *fnPtr,
                               void    *param1, void *param2);
 
+inline
 void
 PR_int__dissipate_slaveVP( SlaveVP *slaveToDissipate );
 #define PR_PI__dissipate_slaveVP PR_int__dissipate_slaveVP
 //WL: dissipate a SlaveVP by sending a request
 
+inline
 void
-PR_ext__dissipate_slaveVP( SlaveVP *slaveToDissipate );
+PR_int__dissipate_slaveVP_multilang( SlaveVP *slaveToDissipate );
 
+inline
 void
 PR_int__throw_exception( char *msgStr, SlaveVP *reqstSlv, PRExcp *excpData );
 #define PR_PI__throw_exception  PR_int__throw_exception
@@ -375,17 +478,51 @@
 PR_WL__throw_exception( char *msgStr, SlaveVP *reqstSlv,  PRExcp *excpData );
 #define PR_App__throw_exception PR_WL__throw_exception
 
+inline
 void *
-PR_int__give_sem_env_for( SlaveVP *animSlv );
-#define PR_PI__give_sem_env_for  PR_int__give_sem_env_for
-#define PR_SS__give_sem_env_for  PR_int__give_sem_env_for
-//No WL version -- not safe!  if use in WL, be sure data rd & wr is stable
+PR_int__give_sem_env_for_slave( SlaveVP *slave, int32 magicNumber );
+#define PR_PI__give_sem_env_for  PR_int__give_sem_env_for_slave
+#define PR_SS__give_sem_env_for_slave  PR_int__give_sem_env_for_slave
+//No WL version -- not safe!  if use env in WL, be sure data rd & wr is stable
+inline
+PRSemEnv *
+PR_int__give_proto_sem_env_for_slave( SlaveVP *slave, int32 magicNumber );
+#define PR_PI__give_proto_sem_env_for  PR_int__give_proto_sem_env_for_slave
+#define PR_SS__give_proto_sem_env_for_slave  PR_int__give_proto_sem_env_for_slave
+//No WL version -- not safe!  if use env in WL, be sure data rd & wr is stable
+inline
+void *
+PR_int__give_sem_env_from_process( PRProcess *process, int32 magicNumer );
+#define PR_PI__give_sem_env_from_process  PR_int__give_sem_env_from_process
+#define PR_SS__give_sem_env_from_process  PR_int__give_sem_env_from_process
+//#define PR_WL__give_sem_env_from_process  PR_int__give_sem_env_from_process
+//No WL version -- not safe!  if use env in WL, be sure data rd & wr is stable
 
+inline
+void *
+PR_int__give_sem_data( SlaveVP *slave, int32 magicNumer );
+#define PR_PI__give_sem_data  PR_int__give_sem_data
+#define PR_SS__give_sem_data  PR_int__give_sem_data
+#define PR_WL__give_sem_data  PR_int__give_sem_data
+
+
+#define PR_int__give_lang_meta_task( slave, magicNumber )\
+        slave->metaTask->langMetaTask;
+#define PR_PI__give_lang_meta_task  PR_int__give_lang_meta_task
+#define PR_SS__give_lang_meta_task  PR_int__give_lang_meta_task
+#define PR_WL__give_lang_meta_task  PR_int__give_lang_meta_task
+
+inline
+SlaveVP *
+PR_PI__give_slave_assigned_to( PRLangMetaTask *langMetaTask );
+        
+void 
+idle_fn(void* data, SlaveVP *animatingSlv);
 
 inline void
 PR_int__get_master_lock();
 
-#define PR_int__release_master_lock() _PRMasterEnv->masterLock = UNLOCKED
+#define PR_int__release_master_lock() _PRTopEnv->masterLock = UNLOCKED
 
 inline uint32_t
 PR_int__randomNumber();
@@ -393,13 +530,13 @@
 //==============  Request Related  ===============
 
 void
-PR_int__suspend_slaveVP_and_send_req( SlaveVP *callingSlv );
+PR_WL__suspend_slaveVP_and_send_req( SlaveVP *callingSlv );
 
 inline void
 PR_WL__add_sem_request_in_mallocd_PRReqst( void *semReqData, SlaveVP *callingSlv );
 
 inline void
-PR_WL__send_sem_request( void *semReqData, SlaveVP *callingSlv );
+PR_WL__send_sem_request( void *semReq, SlaveVP *callingSlv, int32 magicNum );
 
 void
 PR_WL__send_create_slaveVP_req( void *semReqData, SlaveVP *reqstingSlv );
@@ -408,7 +545,7 @@
 PR_WL__send_dissipate_req( SlaveVP *prToDissipate );
 
 inline void
-PR_WL__send_PRSem_request( void *semReqData, SlaveVP *callingSlv );
+PR_WL__send_service_request( void *semReqData, SlaveVP *callingSlv );
 
 PRReqst *
 PR_PI__take_next_request_out_of( SlaveVP *slaveWithReq );
@@ -419,7 +556,7 @@
 #define PR_PI__take_sem_reqst_from( req ) req->semReqData
 
 void inline
-PR_PI__handle_PRSemReq( PRReqst *req, SlaveVP *requestingSlv, void *semEnv,
+PR_int__handle_PRServiceReq( PRReqst *req, SlaveVP *requestingSlv, void *semEnv,
                        ResumeSlvFnPtr resumeSlvFnPtr );
 
 //======================== MEASUREMENT ======================
@@ -434,6 +571,14 @@
 PR_int__strDup( char *str );
 
 
+//=========================  PR request handlers  ========================
+void inline
+handleMakeProbe( PRServReq *semReq, void *semEnv, ResumeSlvFnPtr resumeFn );
+
+void inline
+handleThrowException( PRServReq *semReq, void *semEnv, ResumeSlvFnPtr resumeFn );
+//=======================================================================
+
 //========================= Probes =======================
 #include "Services_Offered_by_PR/Measurement_and_Stats/probes.h"
 
diff -r 999f2966a3e5 -r dafae55597ce PR__PI.c
--- a/PR__PI.c	Wed Sep 19 23:12:44 2012 -0700
+++ b/PR__PI.c	Tue Oct 23 23:46:17 2012 -0700
@@ -22,13 +22,7 @@
  * int: internal to the PR implementation
  */
 
-//=========================  Local Declarations  ========================
-void inline
-handleMakeProbe( PRSemReq *semReq, void *semEnv, ResumeSlvFnPtr resumeFn );
 
-void inline
-handleThrowException( PRSemReq *semReq, void *semEnv, ResumeSlvFnPtr resumeFn );
-//=======================================================================
 
  
 PRReqst *
@@ -57,65 +51,5 @@
 */
 
 
-/* This is for OS requests and PR infrastructure requests, such as to create
- *  a probe -- a probe is inside the heart of PR-core, it's not part of any
- *  language -- but it's also a semantic thing that's triggered from and used
- *  in the application.. so it crosses abstractions..  so, need some special
- *  pattern here for handling such requests.
- * Doing this just like it were a second language sharing PR-core.
- * 
- * This is called from the language's request handler when it sees a request
- *  of type PRSemReq
- *
- * TODO: Later change this, to give probes their own separate plugin & have
- *  PR-core steer the request to appropriate plugin
- * Do the same for OS calls -- look later at it..
- */
-void inline
-PR_PI__handle_PRSemReq( PRReqst *req, SlaveVP *requestingSlv, void *semEnv,
-                       ResumeSlvFnPtr resumeFn )
- { PRSemReq *semReq;
 
-   semReq = PR_PI__take_sem_reqst_from(req);
-   if( semReq == NULL ) return;
-   switch( semReq->reqType )  //sem handlers are all in other file
-    {
-      case make_probe:      handleMakeProbe(   semReq, semEnv, resumeFn);
-         break;
-      case throw_excp:  handleThrowException(  semReq, semEnv, resumeFn);
-         break;
-    }
- }
 
-/*
- */
-void inline
-handleMakeProbe( PRSemReq *semReq, void *semEnv, ResumeSlvFnPtr resumeFn )
- { IntervalProbe *newProbe;
-
-   newProbe          = PR_int__malloc( sizeof(IntervalProbe) );
-   newProbe->nameStr = PR_int__strDup( semReq->nameStr );
-   newProbe->hist    = NULL;
-   newProbe->schedChoiceWasRecorded = FALSE;
-
-      //This runs in masterVP, so no race-condition worries
-   newProbe->probeID =
-            addToDynArray( newProbe, _PRMasterEnv->dynIntervalProbesInfo );
-
-   semReq->requestingSlv->dataRetFromReq = newProbe;
-
-   //This in inside PR, while resume_slaveVP fn is inside language, so pass
-   // pointer from lang to here, then call it.
-   (*resumeFn)( semReq->requestingSlv, semEnv );
- }
-
-void inline
-handleThrowException( PRSemReq *semReq, void *semEnv, ResumeSlvFnPtr resumeFn )
- {
-   PR_int__throw_exception(  semReq->msgStr, semReq->requestingSlv, semReq->exceptionData );
-   
-   (*resumeFn)( semReq->requestingSlv, semEnv );
- }
-
-
-
diff -r 999f2966a3e5 -r dafae55597ce PR__SS.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PR__SS.c	Tue Oct 23 23:46:17 2012 -0700
@@ -0,0 +1,697 @@
+/*
+ * Copyright 2010  OpenSourceStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+#include "PR.h"
+
+
+#define thdAttrs NULL
+
+
+/* MEANING OF   WL  PI  SS  int
+ * These indicate which places the function is safe to use.  They stand for:
+ * WL: Wrapper Library
+ * PI: Plugin 
+ * SS: Startup and Shutdown
+ * int: internal to the PR implementation
+ */
+
+
+//===========================================================================
+AnimSlot **
+create_anim_slots( int32 coreSlotsAreOn );
+
+void
+create_masterEnv();
+
+void
+create_the_coreCtlr_OS_threads();
+
+MallocProlog *
+create_free_list();
+
+void
+endOSThreadFn( void *initData, SlaveVP *animatingSlv );
+
+
+//===========================================================================
+
+/*Setup has two phases:
+ * 1) Semantic layer first calls init_PR, which creates masterEnv, and puts
+ *    the master Slv into the work-queue, ready for first "call"
+ * 2) Semantic layer then does its own init, which creates the seed virt
+ *    slave inside the semantic layer, ready to assign it when
+ *    asked by the first run of the animationMaster.
+ *
+ *This part is bit weird because PR really wants to be "always there", and
+ * have applications attach and detach..  for now, this PR is part of
+ * the app, so the PR system starts up as part of running the app.
+ *
+ *The semantic layer is isolated from the PR internals by making the
+ * semantic layer do setup to a state that it's ready with its
+ * initial Slvs, ready to assign them to slots when the animationMaster
+ * asks.  Without this pattern, the semantic layer's setup would
+ * have to modify slots directly to assign the initial virt-procrs, and put
+ * them into the readyToAnimateQ itself, breaking the isolation completely.
+ *
+ * 
+ *The semantic layer creates the initial Slv(s), and adds its
+ * own environment to masterEnv, and fills in the pointers to
+ * the requestHandler and slaveAssigner plug-in functions
+ */
+
+/*This allocates PR data structures, populates the master PRProc,
+ * and master environment, and returns the master environment to the semantic
+ * layer.
+ */
+void
+PR__start()
+ {
+   #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
+      create_masterEnv();
+      printf( "\n\n Running in SEQUENTIAL mode \n\n" );
+   #else
+      create_masterEnv();
+      DEBUG__printf1(dbgInfra,"Offset of lock in masterEnv: %d ", (int32)offsetof(MasterEnv,masterLock) );
+      create_the_coreCtlr_OS_threads();
+   #endif
+ }
+
+
+/*A process is represented by a structure that holds all the process-specific
+ * information:
+ *-] The hash-array containing the semantic environs of any langlets started
+ *   inside the process.
+ *-] Flags used to detect the end of activity in the process
+ *-] Counter of num live slaves and num live tasks in the process
+ * 
+ *PR automatically generates the seedVP when it creates the process, and
+ * inserts the processID of the newly created process into it. 
+ */
+PRProcess *
+PR__create_process( TopLevelFnPtr seed_Fn, void *seedData )
+ { SlaveVP    *seedSlv;
+   PRProcess  *process;
+   PRMetaTask *metaTask;
+   PRSemEnv   *semEnvs;
+   int32       idx;
+   
+   process = malloc( sizeof(PRProcess) );
+   process->numSemEnvs = 0;
+   semEnvs = process->semEnvs;
+   for( idx = 0; idx < NUM_SEM_ENVS_IN_PROCESS; idx++ )
+    { semEnvs[idx].langSemEnv = NULL;
+      semEnvs[idx].chainedSemEnv = NULL;
+    }
+         
+      //A Process starts with one slave, the seed slave
+   seedSlv = PR_int__create_slaveVP( seed_Fn, seedData );
+   
+   seedSlv->processSlaveIsIn = process;
+   
+      //seed slave is a generic slave, so make a generic slave meta task for it
+   metaTask          = PR_int__create_generic_slave_meta_task( seedData );
+   seedSlv->metaTask = metaTask;
+   
+   process->numLiveGenericSlaves = 1; //count the  seed
+   process->numLiveTasks = 0;
+   
+   PRServSemEnv *
+   servicesSemEnv = PR_SS__malloc( sizeof(PRServSemEnv) );
+   PR_SS__register_langlets_semEnv( servicesSemEnv, seedSlv, PRSERV_MAGIC_NUMBER );
+
+      //resume seedVP into PR's built-in services language's semantic env
+   PRServ__resume_slaveVP( seedSlv, servicesSemEnv );
+   
+   return process;
+ }
+
+
+/*This gets the process struct out of the seedVP, then gets the semEnv-holding
+ * struct out of that, then inserts the semantic env into that struct, using
+ * the magic number as the key to the sem env placement.  The master will 
+ * use the magic number from a request to retrieve the semantic env appropriate
+ * for the construct that made the request.
+ */
+void
+PR_SS__register_langlets_semEnv( void *_semEnv, SlaveVP  *seedVP, int32 magicNum )
+ { PRSemEnv     *protoSemEnv;
+   PRProcess    *process;
+   PRServSemEnv *semEnv = (PRServSemEnv *)_semEnv;
+
+   process     = seedVP->processSlaveIsIn;
+   
+   protoSemEnv = PR_int__create_proto_sem_env_in_process( process, magicNum );
+   protoSemEnv->langSemEnv      = semEnv;
+   protoSemEnv->langMagicNumber = magicNum;
+   protoSemEnv->hasWork         = FALSE;
+   
+   semEnv->protoSemEnv          = protoSemEnv;
+ }
+
+/*These store the pointer to handler into the semantic env -- semantic env
+ * found by using magic num to look it up in the process that the seedVP
+ * is inside of.
+ */
+void
+PR_SS__register_create_task_handler( RequestHandler createTaskHandler, SlaveVP *seedVP, int32 magicNum )
+ { PRSemEnv *semEnv;
+ 
+   semEnv = PR_SS__give_proto_sem_env_for_slave( seedVP, magicNum );
+   semEnv->createTaskHdlr = createTaskHandler; 
+ }
+void
+PR_SS__register_end_task_handler( RequestHandler endTaskHandler, SlaveVP *seedVP, int32 magicNum )
+ { PRSemEnv *semEnv;
+ 
+   semEnv = PR_SS__give_proto_sem_env_for_slave( seedVP, magicNum );
+   semEnv->endTaskHdlr = endTaskHandler; 
+ }
+void
+PR_SS__register_create_slave_handler( RequestHandler createSlvHandler, SlaveVP *seedVP, int32 magicNum )
+ { PRSemEnv *semEnv;
+ 
+   semEnv = PR_SS__give_proto_sem_env_for_slave( seedVP, magicNum );
+   semEnv->createSlaveHdlr = createSlvHandler; 
+ }
+void
+PR_SS__register_dissipate_slave_handler( RequestHandler dissipateHandler, SlaveVP *seedVP, int32 magicNum )
+ { PRSemEnv *semEnv;
+ 
+   semEnv = PR_SS__give_proto_sem_env_for_slave( seedVP, magicNum );
+   semEnv->dissipateSlaveHdlr = dissipateHandler; 
+ }
+void
+PR_SS__register_request_handler( RequestHandler reqHandler, SlaveVP *seedVP, int32 magicNum )
+ { PRSemEnv *semEnv;
+ 
+   semEnv = PR_SS__give_proto_sem_env_for_slave( seedVP, magicNum );
+   semEnv->requestHdlr = reqHandler; 
+ }
+void
+PR_SS__register_assigner( SlaveAssigner assigner, SlaveVP *seedVP, int32 magicNum )
+ { PRSemEnv *semEnv;
+ 
+   semEnv = PR_SS__give_proto_sem_env_for_slave( seedVP, magicNum );
+   semEnv->slaveAssigner = assigner; 
+ }
+void
+PR_SS__register_sem_data_creator( SemDataCreator semDataCreator, 
+                                              SlaveVP *seedVP, int32 magicNum )
+ { PRSemEnv *semEnv;
+ 
+   semEnv = PR_SS__give_proto_sem_env_for_slave( seedVP, magicNum );
+   semEnv->semDataCreator = semDataCreator; 
+ }
+void
+PR_SS__register_sem_data_initializer( SemDataInitializer semDataInitializer, 
+                                              SlaveVP *seedVP, int32 magicNum )
+ { PRSemEnv *semEnv;
+ 
+   semEnv = PR_SS__give_proto_sem_env_for_slave( seedVP, magicNum );
+   semEnv->semDataInitializer = semDataInitializer; 
+ }
+
+
+/*TODO: finish implementing
+ *This function returns information about the version of PR, the language
+ * the program is being run in, its version, and information on the 
+ * hardware.
+ */
+/*
+char *
+PR_App__give_environment_string()
+ {
+   //--------------------------
+    fprintf(output, "#\n# >> Build information <<\n");
+    fprintf(output, "# GCC VERSION: %d.%d.%d\n",__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__);
+    fprintf(output, "# Build Date: %s %s\n", __DATE__, __TIME__);
+    
+    fprintf(output, "#\n# >> Hardware information <<\n");
+    fprintf(output, "# Hardware Architecture: ");
+   #ifdef __x86_64
+    fprintf(output, "x86_64");
+   #endif //__x86_64
+   #ifdef __i386
+    fprintf(output, "x86");
+   #endif //__i386
+    fprintf(output, "\n");
+    fprintf(output, "# Number of Cores: %d\n", NUM_CORES);
+   //--------------------------
+    
+   //PR Plugins
+    fprintf(output, "#\n# >> PR Plugins <<\n");
+    fprintf(output, "# Language : ");
+    fprintf(output, _LANG_NAME_);
+    fprintf(output, "\n");
+       //Meta info gets set by calls from the language during its init,
+       // and info registered by calls from inside the application
+    fprintf(output, "# Assigner: %s\n", _PRTopEnv->metaInfo->assignerInfo);
+
+   //--------------------------
+   //Application
+    fprintf(output, "#\n# >> Application <<\n");
+    fprintf(output, "# Name: %s\n", _PRTopEnv->metaInfo->appInfo);
+    fprintf(output, "# Data Set:\n%s\n",_PRTopEnv->metaInfo->inputSet);
+    
+   //--------------------------
+ }
+ */
+ 
+
+/*A pointer to the startup-function for the language is given as the last
+ * argument to the call.  Use this to initialize a program in the language.
+ * This creates a data structure that encapsulates the bookkeeping info
+ * PR uses to track and schedule a program run.
+ */
+/*PRProcess *
+PR__spawn_program_on_data_in_Lang( TopLevelFnPtr seed_fn, void *data )
+ { PRProcess *newProcess;
+   newProcess = malloc( sizeof(PRProcess) );
+   
+   newProcess->doneLock = PTHREAD_MUTEX_INITIALIZER;
+   newProcess->doneCond = PTHREAD_COND_INITIALIZER;
+   newProcess->executionIsComplete = FALSE;
+   newProcess->numSlavesLive = 0;
+   
+   newProcess->dataForSeed = data;
+   newProcess->seedFnPtr   = prog_seed_fn;
+   
+      //The language's spawn-process function fills in the plugin function-ptrs in
+      // the PRProcess struct, gives the struct to PR, which then makes and
+      // queues the seed SlaveVP, which starts processors made from the code being
+      // animated.
+    
+   (*langInitFnPtr)( newProcess );  
+   
+   return newProcess;
+ }
+*/
+
+
+/*When all SlaveVPs owned by the program-run associated to the process have
+ * dissipated, then return from this call.  There is no language to cleanup,
+ * and PR does not shutdown..  but the process bookkeeping structure,
+ * which is used by PR to track and schedule the program, is freed.
+ *The PRProcess structure is kept until this call collects the results from it,
+ * then freed.  If the process is not done yet when PR gets this
+ * call, then this call waits..  the challenge here is that this call comes from
+ * a live OS thread that's outside PR..  so, inside here, it waits on a 
+ * condition..  then it's a PR thread that signals this to wake up..
+ *First checks whether the process is done, if yes, calls the clean-up fn then
+ * returns the result extracted from the PRProcess struct.
+ *If process not done yet, then performs a wait (in a loop to be sure the
+ * wakeup is not spurious, which can happen).  PR registers the wait, and upon
+ * the process ending (last SlaveVP owned by it dissipates), then PR signals
+ * this to wakeup.  This then calls the cleanup fn and returns the result.
+ */
+/*
+void *
+PR_App__give_results_when_done_for( PRProcess *process )
+ { void *result;
+   
+   pthread_mutex_lock( process->doneLock );
+   while( !(process->executionIsComplete) )
+    {
+      pthread_cond_wait( process->doneCond,
+                         process->doneLock );
+    }
+   pthread_mutex_unlock( process->doneLock );
+   
+   result = process->resultToReturn;
+   
+   PR_int__cleanup_process_after_done( process );
+   free( process );  //was malloc'd above, so free it here
+   
+   return result;
+ }
+*/
+
+
+void
+create_masterEnv()
+ { MasterEnv       *masterEnv;
+   PRQueueStruc   **readyToAnimateQs;
+   int              coreIdx;
+   SlaveVP        **masterVPs;
+   AnimSlot      ***allAnimSlots; //ptr to array of ptrs
+
+
+      //Make the master env, which holds everything else
+   _PRTopEnv = malloc( sizeof(MasterEnv) );
+
+        //Very first thing put into the master env is the free-list, seeded
+        // with a massive initial chunk of memory.
+        //After this, all other mallocs are PR__malloc.
+   _PRTopEnv->freeLists        = PR_ext__create_free_list();
+   
+   
+   //===================== Only PR__malloc after this ====================
+   masterEnv     = (MasterEnv*)_PRTopEnv;
+   
+      //Make a readyToAnimateQ for each core controller
+   readyToAnimateQs = PR_int__malloc( NUM_CORES * sizeof(PRQueueStruc *) );
+   masterVPs        = PR_int__malloc( NUM_CORES * sizeof(SlaveVP *) );
+
+      //One array for each core, several in array, core's masterVP scheds all
+   allAnimSlots    = PR_int__malloc( NUM_CORES * sizeof(AnimSlot *) );
+
+   _PRTopEnv->numSlavesAlive = 0;  //used to detect shut-down condition
+
+//========================================
+   
+   Copied__fixThis;
+   
+   semEnv->freeExtraTaskSlvQ    = makePRQ();
+   semEnv->numLiveExtraTaskSlvs   = 0; //must be last
+   semEnv->numLiveThreadSlvs      = 1; //must be last, counts the seed
+   
+   semEnv->shutdownInitiated = FALSE;
+   semEnv->coreIsDone = PR_int__malloc( NUM_CORES * sizeof( bool32 ) );
+   
+      //For each animation slot, there is an idle slave, and an initial
+      // slave assigned as the current-task-slave.  Create them here.
+   int32    coreNum,  slotNum;
+   SlaveVP *idleSlv, *slotTaskSlv;
+   for( coreNum = 0;  coreNum < NUM_CORES; coreNum++ )
+    { semEnv->coreIsDone[coreNum] = FALSE; //use during shutdown
+    
+      for( slotNum = 0; slotNum < NUM_ANIM_SLOTS; ++slotNum )
+       { idleSlv = PR__create_slave_helper( &idle_fn, NULL, semEnv, 0);
+         idleSlv->coreAnimatedBy                = coreNum;
+         idleSlv->animSlotAssignedTo            =
+                               _PRTopEnv->allAnimSlots[coreNum][slotNum];
+         _PRTopEnv->idleSlv[coreNum][slotNum] = idleSlv;
+         
+         slotTaskSlv = PR_int__create_slot_slave( );
+         slotTaskSlv->coreAnimatedBy            = coreNum;
+         slotTaskSlv->animSlotAssignedTo        = 
+                               _PRTopEnv->allAnimSlots[coreNum][slotNum];
+         
+         slotTaskSlv->needsTaskAssigned = TRUE;
+         slotTaskSlv->slaveType         = SlotTaskSlv;
+         _PRTopEnv->slotTaskSlvs[coreNum][slotNum] = slotTaskSlv;
+       }
+    }
+
+      //create the recycle queue where free task slaves are put after their task ends
+   semEnv->freeTaskSlvRecycleQ  = makePRQ();
+   
+
+   semEnv->numLiveFreeTaskSlvs   = 0;
+   semEnv->numLiveGenericSlvs    = 0; //none existent yet.. "create process" creates the seeds  
+//==================================================================
+   
+   _PRTopEnv->numSlavesCreated = 0;  //used by create slave to set slave ID
+   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
+    {    
+      readyToAnimateQs[ coreIdx ] = makePRQ();
+      
+         //Q: should give masterVP core-specific info as its init data?
+      masterVPs[ coreIdx ] = PR_int__create_slaveVP( (TopLevelFnPtr)&animationMaster, (void*)masterEnv );
+      masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx;
+      masterVPs[ coreIdx ]->typeOfVP = Master;
+      allAnimSlots[ coreIdx ] = create_anim_slots( coreIdx ); //makes for one core
+    }
+   _PRTopEnv->masterVPs        = masterVPs;
+   _PRTopEnv->masterLock       = UNLOCKED;
+   _PRTopEnv->seed1 = rand()%1000; // init random number generator
+   _PRTopEnv->seed2 = rand()%1000; // init random number generator
+   _PRTopEnv->allAnimSlots    = allAnimSlots;
+   _PRTopEnv->measHistsInfo = NULL; 
+
+   //============================= MEASUREMENT STUFF ========================
+      
+         MEAS__Make_Meas_Hists_for_Susp_Meas;
+         MEAS__Make_Meas_Hists_for_Master_Meas;
+         MEAS__Make_Meas_Hists_for_Master_Lock_Meas;
+         MEAS__Make_Meas_Hists_for_Malloc_Meas;
+         MEAS__Make_Meas_Hists_for_Plugin_Meas;
+         MEAS__Make_Meas_Hists_for_Language;
+
+         PROBES__Create_Probe_Bookkeeping_Vars;
+         
+         HOLISTIC__Setup_Perf_Counters;
+         
+   //========================================================================
+ }
+
+AnimSlot **
+create_anim_slots( int32 coreSlotsAreOn )
+ { AnimSlot  **animSlots;
+   int i;
+
+   animSlots  = PR_int__malloc( NUM_ANIM_SLOTS * sizeof(AnimSlot *) );
+
+   for( i = 0; i < NUM_ANIM_SLOTS; i++ )
+    {
+      animSlots[i] = PR_int__malloc( sizeof(AnimSlot) );
+
+         //Set state to mean "handling requests done, slot needs filling"
+      animSlots[i]->workIsDone         = FALSE;
+      animSlots[i]->needsSlaveAssigned = TRUE;
+      animSlots[i]->slotIdx            = i; //quick retrieval of slot pos
+      animSlots[i]->coreSlotIsOn       = coreSlotsAreOn;
+    }
+   return animSlots;
+ }
+
+
+void
+freeAnimSlots( AnimSlot **animSlots )
+ { int i;
+   for( i = 0; i < NUM_ANIM_SLOTS; i++ )
+    {
+      PR_int__free( animSlots[i] );
+    }
+   PR_int__free( animSlots );
+ }
+
+
+void
+create_the_coreCtlr_OS_threads()
+ {
+   //========================================================================
+   //                      Create the Threads
+   int coreIdx, retCode;
+
+      //Need the threads to be created suspended, and wait for a signal
+      // before proceeding -- gives time after creating to initialize other
+      // stuff before the coreCtlrs set off.
+   _PRTopEnv->setupComplete = 0;
+   
+      //initialize the cond used to make the new threads wait and sync up
+      //must do this before *creating* the threads..
+   pthread_mutex_init( &suspendLock, NULL );
+   pthread_cond_init( &suspendCond, NULL );
+
+      //Make the threads that animate the core controllers
+   for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
+    { coreCtlrThdParams[coreIdx]          = PR_int__malloc( sizeof(ThdParams) );
+      coreCtlrThdParams[coreIdx]->coreNum = coreIdx;
+
+      retCode =
+      pthread_create( &(coreCtlrThdHandles[coreIdx]),
+                        thdAttrs,
+                       &coreController,
+               (void *)(coreCtlrThdParams[coreIdx]) );
+      if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);}
+    }
+ }
+
+
+/*This is what causes the PR system to initialize.. then waits for it to
+ * exit.
+ * 
+ *Wrapper lib layer calls this when it wants the system to start running..
+ */
+/*
+void
+PR_SS__start_the_work_then_wait_until_done()
+ { 
+#ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
+   //Only difference between version with an OS thread pinned to each core and
+   // the sequential version of PR is PR__init_Seq, this, and coreCtlr_Seq.
+   //
+         //Instead of un-suspending threads, just call the one and only
+         // core ctlr (sequential version), in the main thread.
+      coreCtlr_Seq( NULL );
+      flushRegisters();
+#else
+   int coreIdx;
+      //Start the core controllers running
+   
+      //tell the core controller threads that setup is complete
+      //get lock, to lock out any threads still starting up -- they'll see
+      // that setupComplete is true before entering while loop, and so never
+      // wait on the condition
+   pthread_mutex_lock(     &suspendLock );
+   _PRTopEnv->setupComplete = 1;
+   pthread_mutex_unlock(   &suspendLock );
+   pthread_cond_broadcast( &suspendCond );
+   
+   
+      //wait for all to complete
+   for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
+    {
+      pthread_join( coreCtlrThdHandles[coreIdx], NULL );
+    }
+   
+      //NOTE: do not clean up PR env here -- semantic layer has to have
+      // a chance to clean up its environment first, then do a call to free
+      // the Master env and rest of PR locations
+#endif
+ }
+*/
+
+SlaveVP* PR_SS__create_shutdown_slave()
+ {
+   SlaveVP* shutdownVP;
+   
+   shutdownVP = PR_int__create_slaveVP( &endOSThreadFn, NULL );
+   shutdownVP->typeOfVP = Shutdown;
+    
+   return shutdownVP;
+ }
+
+//TODO: look at architecting cleanest separation between request handler
+// and animation master, for dissipate, create, shutdown, and other non-semantic
+// requests.  Issue is chain: one removes requests from AppSlv, one dispatches
+// on type of request, and one handles each type..  but some types require
+// action from both request handler and animation master -- maybe just give the
+// request handler calls like:  PR__handle_X_request_type
+
+
+/*This is called by the semantic layer's request handler when it decides its
+ * time to shut down the PR system.  Calling this causes the core controller OS
+ * threads to exit, which unblocks the entry-point function that started up
+ * PR, and allows it to grab the result and return to the original single-
+ * threaded application.
+ * 
+ *The _PRTopEnv is needed by this shut down function, so the create-seed-
+ * and-wait function has to free a bunch of stuff after it detects the
+ * threads have all died: the masterEnv, the thread-related locations,
+ * masterVP any AppSlvs that might still be allocated and sitting in the
+ * semantic environment, or have been orphaned in the _PRWorkQ.
+ * 
+ *NOTE: the semantic plug-in is expected to use PR__malloc to get all the
+ * locations it needs, and give ownership to masterVP.  Then, they will be
+ * automatically freed.
+ *
+ *In here,create one core-loop shut-down processor for each core controller and put
+ * them all directly into the readyToAnimateQ.
+ *Note, this function can ONLY be called after the semantic environment no
+ * longer cares if AppSlvs get animated after the point this is called.  In
+ * other words, this can be used as an abort, or else it should only be
+ * called when all AppSlvs have finished dissipate requests -- only at that
+ * point is it sure that all results have completed.
+ */
+void
+PR_SS__shutdown()
+ { int32       coreIdx;
+   SlaveVP    *shutDownSlv;
+   AnimSlot **animSlots;
+      //create the shutdown processors, one for each core controller -- put them
+      // directly into the Q -- each core will die when gets one
+   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
+    {    //Note, this is running in the master
+      shutDownSlv = PR_SS__create_shutdown_slave();
+         //last slave has dissipated, so no more in slots, so write
+         // shut down slave into first animulng slot.
+      animSlots = _PRTopEnv->allAnimSlots[ coreIdx ];
+      animSlots[0]->slaveAssignedToSlot = shutDownSlv;
+      animSlots[0]->needsSlaveAssigned = FALSE;
+      shutDownSlv->coreAnimatedBy = coreIdx;
+      shutDownSlv->animSlotAssignedTo = animSlots[ 0 ];
+    }
+ }
+
+
+/*Am trying to be cute, avoiding IF statement in coreCtlr that checks for
+ * a special shutdown slaveVP.  Ended up with extra-complex shutdown sequence.
+ *This function has the sole purpose of setting the stack and framePtr
+ * to the coreCtlr's stack and framePtr.. it does that then jumps to the
+ * core ctlr's shutdown point -- might be able to just call Pthread_exit
+ * from here, but am going back to the pthread's stack and setting everything
+ * up just as if it never jumped out, before calling pthread_exit.
+ *The end-point of core ctlr will free the stack and so forth of the
+ * processor that animates this function, (this fn is transfering the
+ * animator of the AppSlv that is in turn animating this function over
+ * to core controller function -- note that this slices out a level of virtual
+ * processors).
+ */
+void
+endOSThreadFn( void *initData, SlaveVP *animatingSlv )
+ { 
+   #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
+    asmTerminateCoreCtlrSeq(animatingSlv);
+   #else
+    asmTerminateCoreCtlr(animatingSlv);
+   #endif
+ }
+
+
+/*This is called from the startup & shutdown
+ */
+void
+PR_SS__cleanup_at_end_of_shutdown()
+ { 
+      //Before getting rid of everything, print out any measurements made
+   if( _PRTopEnv->measHistsInfo != NULL )
+    { forAllInDynArrayDo( _PRTopEnv->measHistsInfo, (DynArrayFnPtr)&printHist );
+      forAllInDynArrayDo( _PRTopEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile);
+      forAllInDynArrayDo( _PRTopEnv->measHistsInfo, (DynArrayFnPtr)&freeHist );
+    }
+   
+   MEAS__Print_Hists_for_Susp_Meas;
+   MEAS__Print_Hists_for_Master_Meas;
+   MEAS__Print_Hists_for_Master_Lock_Meas;
+   MEAS__Print_Hists_for_Malloc_Meas;
+   MEAS__Print_Hists_for_Plugin_Meas;
+   
+
+      //All the environment data has been allocated with PR__malloc, so just
+      // free its internal big-chunk and all inside it disappear.
+/*
+   readyToAnimateQs = _PRTopEnv->readyToAnimateQs;
+   masterVPs        = _PRTopEnv->masterVPs;
+   allAnimSlots    = _PRTopEnv->allAnimSlots;
+   
+   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
+    {
+      freePRQ( readyToAnimateQs[ coreIdx ] );
+         //master Slvs were created external to PR, so use external free
+      PR_int__dissipate_slaveVP( masterVPs[ coreIdx ] );
+      
+      freeAnimSlots( allAnimSlots[ coreIdx ] );
+    }
+   
+   PR_int__free( _PRTopEnv->readyToAnimateQs );
+   PR_int__free( _PRTopEnv->masterVPs );
+   PR_int__free( _PRTopEnv->allAnimSlots );
+   
+   //============================= MEASUREMENT STUFF ========================
+   #ifdef PROBES__TURN_ON_STATS_PROBES
+   freeDynArrayDeep( _PRTopEnv->dynIntervalProbesInfo, &PR_WL__free_probe);
+   #endif
+   //========================================================================
+*/
+      //These are the only two that use system free 
+   PR_ext__free_free_list( _PRTopEnv->freeLists );
+   free( (void *)_PRTopEnv );
+ }
+
+
+//================================
+
+
diff -r 999f2966a3e5 -r dafae55597ce PR__WL.c
--- a/PR__WL.c	Wed Sep 19 23:12:44 2012 -0700
+++ b/PR__WL.c	Tue Oct 23 23:46:17 2012 -0700
@@ -24,6 +24,20 @@
 
 
 
+
+inline int32 *
+PR__give_task_ID( SlaveVP *animSlv, int32 magicNumber )
+ {
+   return animSlv->metaTask->taskID;
+ }
+
+SlaveVP *
+PR__give_slave_of_task_ID( int32 *taskID, SlaveVP *animSlv )
+ {
+   metaTask = lookup( taskID );
+   return metaTask->slaveAssignedTo;
+ }
+
 /*For this implementation of PR, it may not make much sense to have the
  * system of requests for creating a new processor done this way.. but over
  * the scope of single-master, multi-master, mult-tasking, OS-implementing,
@@ -38,15 +52,18 @@
  * to the plugin.
  */
 void
-PR_WL__send_create_slaveVP_req( void *semReqData, SlaveVP *reqstingSlv )
+PR_WL__send_create_slaveVP_req( void *semReq, int32 *slvID, SlaveVP *reqstingSlv, 
+                                int32 magicNum )
  { PRReqst req;
 
-   req.reqType          = createReq;
-   req.semReqData       = semReqData;
-   req.nextReqst        = reqstingSlv->request;
+   req.reqType          = SlvCreate;
+   req.ID               = slvID;
+   req.langMagicNumber  = magicNum;
+   req.semReq           = semReq;
+//   req.nextReqst        = reqstingSlv->request;
    reqstingSlv->request = &req;
 
-   PR_int__suspend_slaveVP_and_send_req( reqstingSlv );
+   PR_WL__suspend_slaveVP_and_send_req( reqstingSlv );
  }
 
 
@@ -75,13 +92,42 @@
 PR_WL__send_dissipate_req( SlaveVP *slaveToDissipate )
  { PRReqst req;
 
-   req.reqType                = dissipate;
-   req.nextReqst              = slaveToDissipate->request;
+   req.reqType                = SlvDissipate;
+//   req.nextReqst              = slaveToDissipate->request;
    slaveToDissipate->request = &req;
 
-   PR_int__suspend_slaveVP_and_send_req( slaveToDissipate );
+   PR_WL__suspend_slaveVP_and_send_req( slaveToDissipate );
  }
 
+inline
+void
+PR_WL__send_create_task_req( TopLevelFn fn, void *initData, void *semReq, 
+                             int32 *taskID, SlaveVP *animSlv, int32 magicNumber)
+ { PRReqst req;
+ 
+   req.reqType    = TaskCreate;
+   req.topLevelFn = fn;
+   req.initData   = initData;
+   req.ID         = taskID;
+   req.semReq     = semReq;
+   req.langMagicNumber = magicNumber;
+   animSlv->request = &req;
+   
+   PR_WL__suspend_slaveVP_and_send_req( animSlv );
+ }
+
+inline
+void
+PR_WL__send_end_task_request( void *semReq, SlaveVP *animSlv, int32 magicNum )
+ { PRReqst req;
+ 
+   req.reqType    = TaskEnd;
+   req.semReq     = semReq;
+   req.langMagicNumber = magicNum;
+   animSlv->request = &req;
+   
+   PR_WL__suspend_slaveVP_and_send_req( animSlv );
+ }
 
 
 /*This call's name indicates that request is malloc'd -- so req handler
@@ -100,43 +146,55 @@
  { PRReqst *req;
 
    req = PR_int__malloc( sizeof(PRReqst) );
-   req->reqType         = semantic;
-   req->semReqData      = semReqData;
+   req->reqType         = Language;
+   req->semReq          = semReqData;
    req->nextReqst       = callingSlv->request;
-   callingSlv->request = req;
+   callingSlv->request  = req;
  }
 
-/*This inserts the semantic-layer's request data into standard PR carrier
- * request data-struct is allocated on stack of this call & ptr to it sent
+inline int32 *
+PR_WL__create_taskID_of_size( int32 numInts )
+ { int32 *taskID;
+   
+   taskID    = PR_WL__malloc( sizeof(int32) + numInts * sizeof(int32) );
+   taskID[0] = numInts;
+   return taskID;
+ }
+
+/*This inserts the semantic-layer's request data into standard PR carrier.
+ * PR Request data-struct is allocated on stack of this call & ptr to it sent
  * to plugin
  *Then it does suspend, to cause request to be sent.
  */
 inline void
-PR_WL__send_sem_request( void *semReqData, SlaveVP *callingSlv )
+PR_WL__send_sem_request( void *semReqData, SlaveVP *callingSlv, int32 magicNum )
  { PRReqst req;
 
-   req.reqType         = semantic;
-   req.semReqData      = semReqData;
+   req.reqType         = Language;
+   req.langMagicNumber = magicNum;
+   req.
+   req.semReq          = semReqData;
    req.nextReqst       = callingSlv->request;
    callingSlv->request = &req;
    
-   PR_int__suspend_slaveVP_and_send_req( callingSlv );
+   PR_WL__suspend_slaveVP_and_send_req( callingSlv );
  }
 
 
-/*May 2012 Not sure what this is..  looks like old idea for PR semantic
- * request
+/*This sends a PRLang request -- for probe, exception, and so on..
+ * 
  */
 inline void
-PR_WL__send_PRSem_request( void *semReqData, SlaveVP *callingSlv )
+PR_WL__send_service_request( void *semReqData, SlaveVP *callingSlv )
  { PRReqst req;
 
-   req.reqType         = PRSemantic;
-   req.semReqData      = semReqData;
-   req.nextReqst       = callingSlv->request; //gab any other preceeding 
+   req.reqType         = PRLang;
+   req.langMagicNumber = PRLang_MAGIC_NUMBER;
+   req.semReq          = semReqData;
+   req.nextReqst       = callingSlv->request; //grab any other preceeding 
    callingSlv->request = &req;
 
-   PR_int__suspend_slaveVP_and_send_req( callingSlv );
+   PR_WL__suspend_slaveVP_and_send_req( callingSlv );
  }
 
 /*May 2012
@@ -146,15 +204,15 @@
 void
 PR_WL__throw_exception( char *msgStr, SlaveVP *reqstSlv,  PRExcp *excpData )
  { PRReqst req;
-   PRSemReq semReq;
+   PRServReq semReq;
 
    req.reqType         = PRSemantic;
-   req.semReqData      = &semReq;
+   req.semReq      = &semReq;
    req.nextReqst       = reqstSlv->request; //gab any other preceeding 
    reqstSlv->request   = &req;
 
    semReq.msgStr        = msgStr;
    semReq.exceptionData = excpData;
    
-   PR_int__suspend_slaveVP_and_send_req( reqstSlv );
+   PR_WL__suspend_slaveVP_and_send_req( reqstSlv );
  }
diff -r 999f2966a3e5 -r dafae55597ce PR__int.c
--- a/PR__int.c	Wed Sep 19 23:12:44 2012 -0700
+++ b/PR__int.c	Tue Oct 23 23:46:17 2012 -0700
@@ -22,41 +22,123 @@
  * int: internal to the PR implementation
  */
 
+//===========================================================================
+//
+//===========================================================================
 
 inline SlaveVP *
-PR_int__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam )
+PR_int__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam, PRProcess *process )
  { SlaveVP *newSlv;
-   void      *stackLocs;
+   void    *stackLocs;
+
+   PR_int__create_slaveVP_helper( fnPtr, dataParam );
+           
+   process->numLiveGenericSlaves += 1;
+
+   newSlv->needsTaskAssigned   = TRUE;
+   newSlv->metaTask            = NULL;
+   newSlv->typeOfVP            = GenericSlave;
+   
+   return newSlv;
+ }
+
+
+inline SlaveVP *
+PR_int__create_slaveVP_helper( TopLevelFnPtr fnPtr, void *dataParam )
+ { SlaveVP *newSlv;
+   void    *stackLocs;
 
    newSlv      = PR_int__malloc( sizeof(SlaveVP) );
    stackLocs   = PR_int__malloc( VIRT_PROCR_STACK_SIZE );
    if( stackLocs == 0 )
     { perror("PR_int__malloc stack"); exit(1); }
 
-   _PRMasterEnv->numSlavesAlive += 1;
+   newSlv->startOfStack = stackLocs;
+   newSlv->slaveID      = _PRTopEnv->numSlavesCreated++;
+   newSlv->request      = NULL;
+   newSlv->animSlotAssignedTo = NULL;
+      
+   newSlv->numTimesAssignedToASlot  = 0;  
 
-   return PR_int__create_slaveVP_helper( newSlv, fnPtr, dataParam, stackLocs );
+   #ifdef MODE__MULTI_LANG
+   PRSemDataHolder *
+   semDataHolder = PR_int__malloc( sizeof(PRSemDataHolder) );
+   newSlv->semanticData = semDataHolder;
+   #else
+   newSlv->semanticData = NULL;
+   #endif
+
+   PR_int__reset_slaveVP_to_TopLvlFn( newSlv, fnPtr, dataParam );
+   
+   //============================= MEASUREMENT STUFF ========================
+   #ifdef PROBES__TURN_ON_STATS_PROBES
+   //TODO: make this TSCHiLow or generic equivalent
+   //struct timeval timeStamp;
+   //gettimeofday( &(timeStamp), NULL);
+   //newSlv->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0) -
+   //                                           _PRTopEnv->createPtInSecs;
+   #endif
+   //========================================================================
+
+   return newSlv;
  }
 
-/* "ext" designates that it's for use outside the PR system -- should only
- * be called from main thread or other thread -- never from code animated by
- * a PR virtual processor.
- */
-inline SlaveVP *
-PR_ext__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam )
- { SlaveVP *newSlv;
-   char      *stackLocs;
-
-   newSlv      = malloc( sizeof(SlaveVP) );
-   stackLocs  = malloc( VIRT_PROCR_STACK_SIZE );
-   if( stackLocs == 0 )
-    { perror("malloc stack"); exit(1); }
-
-   _PRMasterEnv->numSlavesAlive += 1;
-
-   return PR_int__create_slaveVP_helper(newSlv, fnPtr, dataParam, stackLocs);
+SlaveVP *
+PR_int__create_slot_slave()
+ { 
+    fixme;
  }
 
+void idle_fn(void* data, SlaveVP *animatingSlv)
+ {
+   while(1)
+    { PR_WL__suspend_slaveVP_and_send_req( animatingSlv );
+    }
+ }
+
+
+PRMetaTask *
+PR_int__create_generic_slave_meta_task( void *initData )
+ { PRMetaTask *newStub;
+         
+   newStub = PR_PI__malloc( sizeof(PRMetaTask) );
+   newStub->slaveAssignedTo = NULL; //set later
+   newStub->taskType        = IS_A_GENERIC_SLV;
+   newStub->taskID          = NULL;
+
+   return newStub;
+ }
+
+
+/* This is for OS requests and PR infrastructure requests, such as to create
+ *  a probe -- a probe is inside the heart of PR-core, it's not part of any
+ *  language -- but it's also a semantic thing that's triggered from and used
+ *  in the application.. so it crosses abstractions..  so, need some special
+ *  pattern here for handling such requests.
+ * Doing this just like it were a second language sharing PR-core.
+ * 
+ * This is called from the language's request handler when it sees a request
+ *  of type PRSemReq
+ *
+ * TODO: Later change this, to give probes their own separate plugin & have
+ *  PR-core steer the request to appropriate plugin
+ * Do the same for OS calls -- look later at it..
+ */
+void inline
+PR_int__handle_PRServiceReq( PRReqst *req, SlaveVP *requestingSlv, void *semEnv,
+                       ResumeSlvFnPtr resumeFn )
+ { PRServReq *semReq;
+
+   semReq = PR_PI__take_sem_reqst_from(req);
+   if( semReq == NULL ) return;
+   switch( semReq->reqType )  //sem handlers are all in other file
+    {
+      case make_probe:      handleMakeProbe(   semReq, semEnv, resumeFn);
+         break;
+      case throw_excp:  handleThrowException(  semReq, semEnv, resumeFn);
+         break;
+    }
+ }
 
 //===========================================================================
 /*there is a label inside this function -- save the addr of this label in
@@ -69,7 +151,7 @@
  * next work-unit for that slave.
  */
 void
-PR_int__suspend_slaveVP_and_send_req( SlaveVP *animatingSlv )
+PR_WL__suspend_slaveVP_and_send_req( SlaveVP *animatingSlv )
  { 
 
       //This suspended Slv will get assigned by Master again at some
@@ -96,32 +178,6 @@
  }
 
 
-/* "ext" designates that it's for use outside the PR system -- should only
- * be called from main thread or other thread -- never from code animated by
- * a SlaveVP, nor from a masterVP.
- *
- *Use this version to dissipate Slvs created outside the PR system.
- */
-void
-PR_ext__dissipate_slaveVP( SlaveVP *slaveToDissipate )
- {
-   _PRMasterEnv->numSlavesAlive -= 1;
-   if( _PRMasterEnv->numSlavesAlive == 0 )
-    {    //no more work, so shutdown
-      PR_SS__shutdown();  //note, creates shut-down slaves on each core
-    }
-
-   //NOTE: dataParam was given to the processor, so should either have
-      // been alloc'd with PR_int__malloc, or freed by the level above animSlv.
-      //So, all that's left to free here is the stack and the SlaveVP struc
-      // itself
-      //Note, should not stack-allocate the data param -- no guarantee, in
-      // general that creating processor will outlive ones it creates.
-   free( slaveToDissipate->startOfStack );
-   free( slaveToDissipate );
- }
-
-
 
 /*This must be called by the request handler plugin -- it cannot be called
  * from the semantic library "dissipate processor" function -- instead, the
@@ -143,11 +199,11 @@
 void
 PR_int__dissipate_slaveVP( SlaveVP *animatingSlv )
  {
-         DEBUG__printf2(dbgRqstHdlr, "PR int dissipate slaveID: %d, alive: %d",animatingSlv->slaveID, _PRMasterEnv->numSlavesAlive-1);
+         DEBUG__printf2(dbgRqstHdlr, "PR int dissipate slaveID: %d, alive: %d",animatingSlv->slaveID, _PRTopEnv->numSlavesAlive-1);
       //dis-own all locations owned by this processor, causing to be freed
       // any locations that it is (was) sole owner of
-   _PRMasterEnv->numSlavesAlive -= 1;
-   if( _PRMasterEnv->numSlavesAlive == 0 )
+   _PRTopEnv->numSlavesAlive -= 1;
+   if( _PRTopEnv->numSlavesAlive == 0 )
     {    //no more work, so shutdown
       PR_SS__shutdown();  //note, creates shut-down processor on each core
     }
@@ -162,41 +218,114 @@
    PR_int__free( animatingSlv );
  }
 
+/*In multi-lang mode, there are multiple semData in the slave..  
+ * 
+ *At some point want to recycle rather than free..
+ * 
+ *For now, iterate through semData, call registered free-er on each, then
+ * free the basic slave
+ */
+void
+PR_int__dissipate_slaveVP_multilang( SlaveVP *slave )
+ { PRSemDataHolder   *semDataHolder;
+   PRSemDataTemplate *semData;
+   int32              idx;
+   
+   semDataHolder = (PRSemDataHolder *)slave->semanticData;
+   for(idx = 0; idx < semDataHolder->numSemDatas; idx++)
+    { 
+      semData = semDataHolder->semDatas[idx];
+      (*(semData->freeFn))(semData); //this Fn is lang-spec 
+    }
+   
+   PR_int__free( slave->startOfStack );
+   PR_int__free( slave );   
+ }
+
+inline
+void *
+PR_int__give_semEnv_of_req( PRReqst *req, SlaveVP *requestingSlv )
+ {     
+   return PR_int__give_sem_env_for_process( requestingSlv->processSlaveIsIn, 
+                                                         req->langMagicNumber );
+ }
+
 /*Anticipating multi-tasking
  */
+inline
 void *
-PR_int__give_sem_env_for( SlaveVP *animSlv )
- {
-   return _PRMasterEnv->semanticEnv;
+PR_int__give_sem_env_for_slave( SlaveVP *slave, int32 magicNum )
+ {    
+   return PR_int__give_sem_env_for_process( slave->processSlaveIsIn, magicNum );
+ }
+inline
+PRSemEnv *
+PR_int__give_proto_sem_env_for_slave( SlaveVP *slave, int32 magicNum )
+ {    
+   return PR_int__give_proto_sem_env_for_process( slave->processSlaveIsIn, magicNum );
  }
 
-/*
- *
- */
-inline SlaveVP *
-PR_int__create_slaveVP_helper( SlaveVP *newSlv,    TopLevelFnPtr  fnPtr,
-                     void    *dataParam, void          *stackLocs )
- {
-   newSlv->startOfStack = stackLocs;
-   newSlv->slaveID      = _PRMasterEnv->numSlavesCreated++;
-   newSlv->request     = NULL;
-   newSlv->animSlotAssignedTo    = NULL;
-   newSlv->typeOfVP     = Slave;
-   newSlv->assignCount  = 0;
+inline
+void *
+PR_int__give_sem_env_for_process( PRProcess *process, int32 magicNum )
+ { PRSemEnv *protoSemEnv;
 
-   PR_int__reset_slaveVP_to_TopLvlFn( newSlv, fnPtr, dataParam );
-           
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef PROBES__TURN_ON_STATS_PROBES
-   //TODO: make this TSCHiLow or generic equivalent
-   //struct timeval timeStamp;
-   //gettimeofday( &(timeStamp), NULL);
-   //newSlv->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0) -
-   //                                           _PRMasterEnv->createPtInSecs;
-   #endif
-   //========================================================================
+   protoSemEnv = lookup_proto_sem_env_in_array( process->semEnvs, magicNum );
+   return protoSemEnv->langSemEnv;
+ }
+inline
+PRSemEnv *
+PR_int__give_proto_sem_env_for_process( PRProcess *process, int32 magicNum )
+ { 
+   return lookup_proto_sem_env_in_array( process->semEnvs, magicNum );
+ }
 
-   return newSlv;
+inline
+PRSemEnv *
+lookup_proto_sem_env_in_array( PRSemEnv *semEnvs, int32 magicNum )
+ { PRSemEnv *retEnv;
+   int32 idx;
+ 
+   idx = magicNum & 63; //mask off, leaving lowest 6 bits
+   retEnv =  &(semEnvs[idx]); //is array of structs, so take addr
+   while( retEnv->langMagicNumber != magicNum ) //assume magicNums unique
+    { retEnv = retEnv->chainedSemEnv;
+      if( retEnv == NULL ) goto NotFound;
+    }
+   return retEnv;
+   
+ NotFound:
+   return NULL;
+ }
+
+inline
+PRSemEnv *
+PR_int__create_proto_sem_env_in_process( PRProcess process, int32 magicNum )
+ { PRSemEnv *semEnvs;
+   PRSemEnv *retEnv, *newEnv;
+   int32 idx;
+ 
+   semEnvs = process->semEnvs;
+   
+   idx = magicNum & 63; //mask upper bits off, leaving lowest 6 bits
+   retEnv =  &(semEnvs[idx]); //is array of structs, so take addr
+   if( retEnv->langSemEnv == NULL ) 
+    { //if env that's in array is empty, do nothing, drop down to return sequence
+    }
+   else //look for last environment in chain
+    { while( retEnv->chainedSemEnv != NULL ) 
+       { retEnv = retEnv->chainedSemEnv;
+       }
+         //add a new proto sem env to the end of chain
+      newEnv = PR_int__malloc( sizeof(PRSemEnv) );
+      newEnv->chainedSemEnv = NULL;
+      retEnv->chainedSemEnv = newEnv;
+      retEnv = newEnv;
+    }
+
+   process->semEnvList[process->numSemEnvs] = retEnv;
+   process->numSemEnvs += 1;
+   return retEnv;
  }
 
 
@@ -231,7 +360,7 @@
 PR_int__get_master_lock()
  { int32 *addrOfMasterLock;
  
-   addrOfMasterLock = &(_PRMasterEnv->masterLock);
+   addrOfMasterLock = &(_PRTopEnv->masterLock);
 
    int numTriesToGetLock = 0;
    int gotLock = 0;
@@ -263,11 +392,11 @@
 inline uint32_t
 PR_int__randomNumber()
  {
-	_PRMasterEnv->seed1 = 36969 * (_PRMasterEnv->seed1 & 65535) + 
-                          (_PRMasterEnv->seed1 >> 16);
-	_PRMasterEnv->seed2 = 18000 * (_PRMasterEnv->seed2 & 65535) + 
-                          (_PRMasterEnv->seed2 >> 16);
-	return (_PRMasterEnv->seed1 << 16) + _PRMasterEnv->seed2;
+	_PRTopEnv->seed1 = 36969 * (_PRTopEnv->seed1 & 65535) + 
+                          (_PRTopEnv->seed1 >> 16);
+	_PRTopEnv->seed2 = 18000 * (_PRTopEnv->seed2 & 65535) + 
+                          (_PRTopEnv->seed2 >> 16);
+	return (_PRTopEnv->seed1 << 16) + _PRTopEnv->seed2;
  }
 
 
diff -r 999f2966a3e5 -r dafae55597ce PR__startup_and_shutdown.c
--- a/PR__startup_and_shutdown.c	Wed Sep 19 23:12:44 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,601 +0,0 @@
-/*
- * Copyright 2010  OpenSourceStewardshipFoundation
- *
- * Licensed under BSD
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <malloc.h>
-#include <inttypes.h>
-#include <sys/time.h>
-#include <pthread.h>
-
-#include "PR.h"
-
-
-#define thdAttrs NULL
-
-
-/* MEANING OF   WL  PI  SS  int
- * These indicate which places the function is safe to use.  They stand for:
- * WL: Wrapper Library
- * PI: Plugin 
- * SS: Startup and Shutdown
- * int: internal to the PR implementation
- */
-
-
-//===========================================================================
-AnimSlot **
-create_anim_slots( int32 coreSlotsAreOn );
-
-void
-create_masterEnv();
-
-void
-create_the_coreCtlr_OS_threads();
-
-MallocProlog *
-create_free_list();
-
-void
-endOSThreadFn( void *initData, SlaveVP *animatingSlv );
-
-
-//===========================================================================
-
-/*Setup has two phases:
- * 1) Semantic layer first calls init_PR, which creates masterEnv, and puts
- *    the master Slv into the work-queue, ready for first "call"
- * 2) Semantic layer then does its own init, which creates the seed virt
- *    slave inside the semantic layer, ready to assign it when
- *    asked by the first run of the animationMaster.
- *
- *This part is bit weird because PR really wants to be "always there", and
- * have applications attach and detach..  for now, this PR is part of
- * the app, so the PR system starts up as part of running the app.
- *
- *The semantic layer is isolated from the PR internals by making the
- * semantic layer do setup to a state that it's ready with its
- * initial Slvs, ready to assign them to slots when the animationMaster
- * asks.  Without this pattern, the semantic layer's setup would
- * have to modify slots directly to assign the initial virt-procrs, and put
- * them into the readyToAnimateQ itself, breaking the isolation completely.
- *
- * 
- *The semantic layer creates the initial Slv(s), and adds its
- * own environment to masterEnv, and fills in the pointers to
- * the requestHandler and slaveAssigner plug-in functions
- */
-
-/*This allocates PR data structures, populates the master PRProc,
- * and master environment, and returns the master environment to the semantic
- * layer.
- */
-void
-PR__start()
- {
-   #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
-      create_masterEnv();
-      printf( "\n\n Running in SEQUENTIAL mode \n\n" );
-   #else
-      create_masterEnv();
-      DEBUG__printf1(dbgInfra,"Offset of lock in masterEnv: %d ", (int32)offsetof(MasterEnv,masterLock) );
-      create_the_coreCtlr_OS_threads();
-   #endif
- }
-
-/*This gets the process struct out of the seedVP, then gets the semEnv-holding
- * struct out of that, then inserts the semantic env into that struct, using
- * the magic number as the key to the sem env placement.  The master will 
- * use the magic number from a request to retrieve the semantic env appropriate
- * for the construct that made the request.
- */
-void
-PR__register_langlets_semEnv( PRSemEnv *semEnv, int32 magicNumber, 
-                              SlaveVP  *seedVP )
- { PREnvHolder *envHolder;
-   PRProcess   *process;
-
-   process   = seedVP->process;
-   envHolder = process->semEnvHolder;
-   
-   insert( magicNumber, semEnv, envHolder );
- }
-
-
-/*TODO: finish implementing
- *This function returns information about the version of PR, the language
- * the program is being run in, its version, and information on the 
- * hardware.
- */
-/*
-char *
-PR_App__give_environment_string()
- {
-   //--------------------------
-    fprintf(output, "#\n# >> Build information <<\n");
-    fprintf(output, "# GCC VERSION: %d.%d.%d\n",__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__);
-    fprintf(output, "# Build Date: %s %s\n", __DATE__, __TIME__);
-    
-    fprintf(output, "#\n# >> Hardware information <<\n");
-    fprintf(output, "# Hardware Architecture: ");
-   #ifdef __x86_64
-    fprintf(output, "x86_64");
-   #endif //__x86_64
-   #ifdef __i386
-    fprintf(output, "x86");
-   #endif //__i386
-    fprintf(output, "\n");
-    fprintf(output, "# Number of Cores: %d\n", NUM_CORES);
-   //--------------------------
-    
-   //PR Plugins
-    fprintf(output, "#\n# >> PR Plugins <<\n");
-    fprintf(output, "# Language : ");
-    fprintf(output, _LANG_NAME_);
-    fprintf(output, "\n");
-       //Meta info gets set by calls from the language during its init,
-       // and info registered by calls from inside the application
-    fprintf(output, "# Assigner: %s\n", _PRMasterEnv->metaInfo->assignerInfo);
-
-   //--------------------------
-   //Application
-    fprintf(output, "#\n# >> Application <<\n");
-    fprintf(output, "# Name: %s\n", _PRMasterEnv->metaInfo->appInfo);
-    fprintf(output, "# Data Set:\n%s\n",_PRMasterEnv->metaInfo->inputSet);
-    
-   //--------------------------
- }
- */
- 
-
-/*A pointer to the startup-function for the language is given as the last
- * argument to the call.  Use this to initialize a program in the language.
- * This creates a data structure that encapsulates the bookkeeping info
- * PR uses to track and schedule a program run.
- */
-PRProcess *
-PR__spawn_program_on_data_in_Lang( TopLevelFnPtr seed_fn, void *data )
- { PRProcess *newProcess;
-   newProcess = malloc( sizeof(PRProcess) );
-   
-   newProcess->doneLock = PTHREAD_MUTEX_INITIALIZER;
-   newProcess->doneCond = PTHREAD_COND_INITIALIZER;
-   newProcess->executionIsComplete = FALSE;
-   newProcess->numSlavesLive = 0;
-   
-   newProcess->dataForSeed = data;
-   newProcess->seedFnPtr   = prog_seed_fn;
-   
-      //The language's spawn-process function fills in the plugin function-ptrs in
-      // the PRProcess struct, gives the struct to PR, which then makes and
-      // queues the seed SlaveVP, which starts processors made from the code being
-      // animated.
-    
-   (*langInitFnPtr)( newProcess );  
-   
-   return newProcess;
- }
-
-
-/*When all SlaveVPs owned by the program-run associated to the process have
- * dissipated, then return from this call.  There is no language to cleanup,
- * and PR does not shutdown..  but the process bookkeeping structure,
- * which is used by PR to track and schedule the program, is freed.
- *The PRProcess structure is kept until this call collects the results from it,
- * then freed.  If the process is not done yet when PR gets this
- * call, then this call waits..  the challenge here is that this call comes from
- * a live OS thread that's outside PR..  so, inside here, it waits on a 
- * condition..  then it's a PR thread that signals this to wake up..
- *First checks whether the process is done, if yes, calls the clean-up fn then
- * returns the result extracted from the PRProcess struct.
- *If process not done yet, then performs a wait (in a loop to be sure the
- * wakeup is not spurious, which can happen).  PR registers the wait, and upon
- * the process ending (last SlaveVP owned by it dissipates), then PR signals
- * this to wakeup.  This then calls the cleanup fn and returns the result.
- */
-/*
-void *
-PR_App__give_results_when_done_for( PRProcess *process )
- { void *result;
-   
-   pthread_mutex_lock( process->doneLock );
-   while( !(process->executionIsComplete) )
-    {
-      pthread_cond_wait( process->doneCond,
-                         process->doneLock );
-    }
-   pthread_mutex_unlock( process->doneLock );
-   
-   result = process->resultToReturn;
-   
-   PR_int__cleanup_process_after_done( process );
-   free( process );  //was malloc'd above, so free it here
-   
-   return result;
- }
-*/
-
-/*Turns off the PR system, and frees all data associated with it.  Does this
- * by creating shutdown SlaveVPs and inserting them into animation slots.
- * Will probably have to wake up sleeping cores as part of this -- the fn that
- * inserts the new SlaveVPs should handle the wakeup..
- */
-/*
-void
-PR_SS__shutdown(); //already defined -- look at it
-
-void
-PR_App__shutdown()
- {
-   for( cores )
-    { slave = PR_int__create_new_SlaveVP( endOSThreadFn, NULL );
-      PR_int__insert_slave_onto_core( SlaveVP *slave, coreNum );
-    }
- }
-*/
-
-/* PR_App__start_PR_running();
-
-   PRProcess matrixMultProcess;
-   
-   matrixMultProcess =
-    PR_App__spawn_program_on_data_in_Lang( &prog_seed_fn, data, Vthread_lang );
-   
-   resMatrix = PR_App__give_results_when_done_for( matrixMultProcess );
-   
-   PR_App__shutdown();
- */
-
-void
-create_masterEnv()
- { MasterEnv       *masterEnv;
-   PRQueueStruc  **readyToAnimateQs;
-   int              coreIdx;
-   SlaveVP        **masterVPs;
-   AnimSlot     ***allAnimSlots; //ptr to array of ptrs
-
-
-      //Make the master env, which holds everything else
-   _PRMasterEnv = malloc( sizeof(MasterEnv) );
-
-        //Very first thing put into the master env is the free-list, seeded
-        // with a massive initial chunk of memory.
-        //After this, all other mallocs are PR__malloc.
-   _PRMasterEnv->freeLists        = PR_ext__create_free_list();
-   
-   
-   //===================== Only PR__malloc after this ====================
-   masterEnv     = (MasterEnv*)_PRMasterEnv;
-   
-      //Make a readyToAnimateQ for each core controller
-   readyToAnimateQs = PR_int__malloc( NUM_CORES * sizeof(PRQueueStruc *) );
-   masterVPs        = PR_int__malloc( NUM_CORES * sizeof(SlaveVP *) );
-
-      //One array for each core, several in array, core's masterVP scheds all
-   allAnimSlots    = PR_int__malloc( NUM_CORES * sizeof(AnimSlot *) );
-
-   _PRMasterEnv->numSlavesAlive = 0;  //used to detect shut-down condition
-
-//========================================
-   semEnv->shutdownInitiated = FALSE;
-   semEnv->coreIsDone = PR_int__malloc( NUM_CORES * sizeof( bool32 ) );
-   
-      //For each animation slot, there is an idle slave, and an initial
-      // slave assigned as the current-task-slave.  Create them here.
-   SlaveVP *idleSlv, *slotTaskSlv;
-   for( coreNum = 0; coreNum < NUM_CORES; coreNum++ )
-    { semEnv->coreIsDone[coreNum] = FALSE; //use during shutdown
-    
-      for( slotNum = 0; slotNum < NUM_ANIM_SLOTS; ++slotNum )
-       { idleSlv = VSs__create_slave_helper( &idle_fn, NULL, semEnv, 0);
-         idleSlv->coreAnimatedBy                = coreNum;
-         idleSlv->animSlotAssignedTo            =
-                               _PRMasterEnv->allAnimSlots[coreNum][slotNum];
-         semEnv->idleSlv[coreNum][slotNum] = idleSlv;
-         
-         slotTaskSlv = VSs__create_slave_helper( &idle_fn, NULL, semEnv, 0);
-         slotTaskSlv->coreAnimatedBy            = coreNum;
-         slotTaskSlv->animSlotAssignedTo        = 
-                               _PRMasterEnv->allAnimSlots[coreNum][slotNum];
-         
-         semData                    = slotTaskSlv->semanticData;
-         semData->needsTaskAssigned = TRUE;
-         semData->slaveType         = SlotTaskSlv;
-         semEnv->slotTaskSlvs[coreNum][slotNum] = slotTaskSlv;
-       }
-    }
-
-      //create the recycle queue where free task slaves are put after their task ends
-   semEnv->freeTaskSlvRecycleQ  = makePRQ();
-   
-
-   semEnv->numLiveExtraTaskSlvs   = 0;
-   semEnv->numLiveThreadSlvs      = 0; //none existent yet.. "create process" creates the seeds  
-//==================================================================
-   
-   _PRMasterEnv->numSlavesCreated = 0;  //used by create slave to set slave ID
-   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
-    {    
-      readyToAnimateQs[ coreIdx ] = makePRQ();
-      
-         //Q: should give masterVP core-specific info as its init data?
-      masterVPs[ coreIdx ] = PR_int__create_slaveVP( (TopLevelFnPtr)&animationMaster, (void*)masterEnv );
-      masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx;
-      masterVPs[ coreIdx ]->typeOfVP = Master;
-      allAnimSlots[ coreIdx ] = create_anim_slots( coreIdx ); //makes for one core
-    }
-   _PRMasterEnv->masterVPs        = masterVPs;
-   _PRMasterEnv->masterLock       = UNLOCKED;
-   _PRMasterEnv->seed1 = rand()%1000; // init random number generator
-   _PRMasterEnv->seed2 = rand()%1000; // init random number generator
-   _PRMasterEnv->allAnimSlots    = allAnimSlots;
-   _PRMasterEnv->measHistsInfo = NULL; 
-
-   //============================= MEASUREMENT STUFF ========================
-      
-         MEAS__Make_Meas_Hists_for_Susp_Meas;
-         MEAS__Make_Meas_Hists_for_Master_Meas;
-         MEAS__Make_Meas_Hists_for_Master_Lock_Meas;
-         MEAS__Make_Meas_Hists_for_Malloc_Meas;
-         MEAS__Make_Meas_Hists_for_Plugin_Meas;
-         MEAS__Make_Meas_Hists_for_Language;
-
-         PROBES__Create_Probe_Bookkeeping_Vars;
-         
-         HOLISTIC__Setup_Perf_Counters;
-         
-   //========================================================================
- }
-
-AnimSlot **
-create_anim_slots( int32 coreSlotsAreOn )
- { AnimSlot  **animSlots;
-   int i;
-
-   animSlots  = PR_int__malloc( NUM_ANIM_SLOTS * sizeof(AnimSlot *) );
-
-   for( i = 0; i < NUM_ANIM_SLOTS; i++ )
-    {
-      animSlots[i] = PR_int__malloc( sizeof(AnimSlot) );
-
-         //Set state to mean "handling requests done, slot needs filling"
-      animSlots[i]->workIsDone         = FALSE;
-      animSlots[i]->needsSlaveAssigned = TRUE;
-      animSlots[i]->slotIdx            = i; //quick retrieval of slot pos
-      animSlots[i]->coreSlotIsOn       = coreSlotsAreOn;
-    }
-   return animSlots;
- }
-
-
-void
-freeAnimSlots( AnimSlot **animSlots )
- { int i;
-   for( i = 0; i < NUM_ANIM_SLOTS; i++ )
-    {
-      PR_int__free( animSlots[i] );
-    }
-   PR_int__free( animSlots );
- }
-
-
-void
-create_the_coreCtlr_OS_threads()
- {
-   //========================================================================
-   //                      Create the Threads
-   int coreIdx, retCode;
-
-      //Need the threads to be created suspended, and wait for a signal
-      // before proceeding -- gives time after creating to initialize other
-      // stuff before the coreCtlrs set off.
-   _PRMasterEnv->setupComplete = 0;
-   
-      //initialize the cond used to make the new threads wait and sync up
-      //must do this before *creating* the threads..
-   pthread_mutex_init( &suspendLock, NULL );
-   pthread_cond_init( &suspendCond, NULL );
-
-      //Make the threads that animate the core controllers
-   for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
-    { coreCtlrThdParams[coreIdx]          = PR_int__malloc( sizeof(ThdParams) );
-      coreCtlrThdParams[coreIdx]->coreNum = coreIdx;
-
-      retCode =
-      pthread_create( &(coreCtlrThdHandles[coreIdx]),
-                        thdAttrs,
-                       &coreController,
-               (void *)(coreCtlrThdParams[coreIdx]) );
-      if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);}
-    }
- }
-
-
-/*This is what causes the PR system to initialize.. then waits for it to
- * exit.
- * 
- *Wrapper lib layer calls this when it wants the system to start running..
- */
-/*
-void
-PR_SS__start_the_work_then_wait_until_done()
- { 
-#ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
-   //Only difference between version with an OS thread pinned to each core and
-   // the sequential version of PR is PR__init_Seq, this, and coreCtlr_Seq.
-   //
-         //Instead of un-suspending threads, just call the one and only
-         // core ctlr (sequential version), in the main thread.
-      coreCtlr_Seq( NULL );
-      flushRegisters();
-#else
-   int coreIdx;
-      //Start the core controllers running
-   
-      //tell the core controller threads that setup is complete
-      //get lock, to lock out any threads still starting up -- they'll see
-      // that setupComplete is true before entering while loop, and so never
-      // wait on the condition
-   pthread_mutex_lock(     &suspendLock );
-   _PRMasterEnv->setupComplete = 1;
-   pthread_mutex_unlock(   &suspendLock );
-   pthread_cond_broadcast( &suspendCond );
-   
-   
-      //wait for all to complete
-   for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
-    {
-      pthread_join( coreCtlrThdHandles[coreIdx], NULL );
-    }
-   
-      //NOTE: do not clean up PR env here -- semantic layer has to have
-      // a chance to clean up its environment first, then do a call to free
-      // the Master env and rest of PR locations
-#endif
- }
-*/
-
-SlaveVP* PR_SS__create_shutdown_slave(){
-    SlaveVP* shutdownVP;
-    
-    shutdownVP = PR_int__create_slaveVP( &endOSThreadFn, NULL );
-    shutdownVP->typeOfVP = Shutdown;
-    
-    return shutdownVP;
-}
-
-//TODO: look at architecting cleanest separation between request handler
-// and animation master, for dissipate, create, shutdown, and other non-semantic
-// requests.  Issue is chain: one removes requests from AppSlv, one dispatches
-// on type of request, and one handles each type..  but some types require
-// action from both request handler and animation master -- maybe just give the
-// request handler calls like:  PR__handle_X_request_type
-
-
-/*This is called by the semantic layer's request handler when it decides its
- * time to shut down the PR system.  Calling this causes the core controller OS
- * threads to exit, which unblocks the entry-point function that started up
- * PR, and allows it to grab the result and return to the original single-
- * threaded application.
- * 
- *The _PRMasterEnv is needed by this shut down function, so the create-seed-
- * and-wait function has to free a bunch of stuff after it detects the
- * threads have all died: the masterEnv, the thread-related locations,
- * masterVP any AppSlvs that might still be allocated and sitting in the
- * semantic environment, or have been orphaned in the _PRWorkQ.
- * 
- *NOTE: the semantic plug-in is expected to use PR__malloc to get all the
- * locations it needs, and give ownership to masterVP.  Then, they will be
- * automatically freed.
- *
- *In here,create one core-loop shut-down processor for each core controller and put
- * them all directly into the readyToAnimateQ.
- *Note, this function can ONLY be called after the semantic environment no
- * longer cares if AppSlvs get animated after the point this is called.  In
- * other words, this can be used as an abort, or else it should only be
- * called when all AppSlvs have finished dissipate requests -- only at that
- * point is it sure that all results have completed.
- */
-void
-PR_SS__shutdown()
- { int32       coreIdx;
-   SlaveVP    *shutDownSlv;
-   AnimSlot **animSlots;
-      //create the shutdown processors, one for each core controller -- put them
-      // directly into the Q -- each core will die when gets one
-   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
-    {    //Note, this is running in the master
-      shutDownSlv = PR_SS__create_shutdown_slave();
-         //last slave has dissipated, so no more in slots, so write
-         // shut down slave into first animulng slot.
-      animSlots = _PRMasterEnv->allAnimSlots[ coreIdx ];
-      animSlots[0]->slaveAssignedToSlot = shutDownSlv;
-      animSlots[0]->needsSlaveAssigned = FALSE;
-      shutDownSlv->coreAnimatedBy = coreIdx;
-      shutDownSlv->animSlotAssignedTo = animSlots[ 0 ];
-    }
- }
-
-
-/*Am trying to be cute, avoiding IF statement in coreCtlr that checks for
- * a special shutdown slaveVP.  Ended up with extra-complex shutdown sequence.
- *This function has the sole purpose of setting the stack and framePtr
- * to the coreCtlr's stack and framePtr.. it does that then jumps to the
- * core ctlr's shutdown point -- might be able to just call Pthread_exit
- * from here, but am going back to the pthread's stack and setting everything
- * up just as if it never jumped out, before calling pthread_exit.
- *The end-point of core ctlr will free the stack and so forth of the
- * processor that animates this function, (this fn is transfering the
- * animator of the AppSlv that is in turn animating this function over
- * to core controller function -- note that this slices out a level of virtual
- * processors).
- */
-void
-endOSThreadFn( void *initData, SlaveVP *animatingSlv )
- { 
-   #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
-    asmTerminateCoreCtlrSeq(animatingSlv);
-   #else
-    asmTerminateCoreCtlr(animatingSlv);
-   #endif
- }
-
-
-/*This is called from the startup & shutdown
- */
-void
-PR_SS__cleanup_at_end_of_shutdown()
- { 
-      //Before getting rid of everything, print out any measurements made
-   if( _PRMasterEnv->measHistsInfo != NULL )
-    { forAllInDynArrayDo( _PRMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist );
-      forAllInDynArrayDo( _PRMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile);
-      forAllInDynArrayDo( _PRMasterEnv->measHistsInfo, (DynArrayFnPtr)&freeHist );
-    }
-   
-   MEAS__Print_Hists_for_Susp_Meas;
-   MEAS__Print_Hists_for_Master_Meas;
-   MEAS__Print_Hists_for_Master_Lock_Meas;
-   MEAS__Print_Hists_for_Malloc_Meas;
-   MEAS__Print_Hists_for_Plugin_Meas;
-   
-
-      //All the environment data has been allocated with PR__malloc, so just
-      // free its internal big-chunk and all inside it disappear.
-/*
-   readyToAnimateQs = _PRMasterEnv->readyToAnimateQs;
-   masterVPs        = _PRMasterEnv->masterVPs;
-   allAnimSlots    = _PRMasterEnv->allAnimSlots;
-   
-   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
-    {
-      freePRQ( readyToAnimateQs[ coreIdx ] );
-         //master Slvs were created external to PR, so use external free
-      PR_int__dissipate_slaveVP( masterVPs[ coreIdx ] );
-      
-      freeAnimSlots( allAnimSlots[ coreIdx ] );
-    }
-   
-   PR_int__free( _PRMasterEnv->readyToAnimateQs );
-   PR_int__free( _PRMasterEnv->masterVPs );
-   PR_int__free( _PRMasterEnv->allAnimSlots );
-   
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef PROBES__TURN_ON_STATS_PROBES
-   freeDynArrayDeep( _PRMasterEnv->dynIntervalProbesInfo, &PR_WL__free_probe);
-   #endif
-   //========================================================================
-*/
-      //These are the only two that use system free 
-   PR_ext__free_free_list( _PRMasterEnv->freeLists );
-   free( (void *)_PRMasterEnv );
- }
-
-
-//================================
-
-
diff -r 999f2966a3e5 -r dafae55597ce PR_req_handlers.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PR_req_handlers.c	Tue Oct 23 23:46:17 2012 -0700
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2010  OpenSourceStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <inttypes.h>
+#include <sys/time.h>
+
+#include "PR.h"
+
+
+/* MEANING OF   WL  PI  SS  int
+ * These indicate which places the function is safe to use.  They stand for:
+ * WL: Wrapper Library
+ * PI: Plugin 
+ * SS: Startup and Shutdown
+ * int: internal to the PR implementation
+ */
+
+
+/*
+ */
+void inline
+handleMakeProbe( PRSemReq *semReq, void *semEnv, ResumeSlvFnPtr resumeFn )
+ { IntervalProbe *newProbe;
+
+   newProbe          = PR_int__malloc( sizeof(IntervalProbe) );
+   newProbe->nameStr = PR_int__strDup( semReq->nameStr );
+   newProbe->hist    = NULL;
+   newProbe->schedChoiceWasRecorded = FALSE;
+
+      //This runs in masterVP, so no race-condition worries
+   newProbe->probeID =
+            addToDynArray( newProbe, _PRMasterEnv->dynIntervalProbesInfo );
+
+   semReq->requestingSlv->dataRetFromReq = newProbe;
+
+   //This in inside PR, while resume_slaveVP fn is inside language, so pass
+   // pointer from lang to here, then call it.
+   (*resumeFn)( semReq->requestingSlv, semEnv );
+ }
+
+void inline
+handleThrowException( PRSemReq *semReq, void *semEnv, ResumeSlvFnPtr resumeFn )
+ {
+   PR_int__throw_exception(  semReq->msgStr, semReq->requestingSlv, semReq->exceptionData );
+   
+   (*resumeFn)( semReq->requestingSlv, semEnv );
+ }
+
+
+
diff -r 999f2966a3e5 -r dafae55597ce Services_Offered_by_PR/Measurement_and_Stats/MEAS__macros.h
--- a/Services_Offered_by_PR/Measurement_and_Stats/MEAS__macros.h	Wed Sep 19 23:12:44 2012 -0700
+++ b/Services_Offered_by_PR/Measurement_and_Stats/MEAS__macros.h	Tue Oct 23 23:46:17 2012 -0700
@@ -26,7 +26,7 @@
 
    #define MEAS__Capture_Post_Point( histName ) \
       saveLowTimeStampCountInto( endStamp ); \
-      addIntervalToHist( startStamp, endStamp, _PRMasterEnv->histName ); 
+      addIntervalToHist( startStamp, endStamp, _PRTopEnv->histName ); 
 
 
 
@@ -43,9 +43,9 @@
        Histogram       *suspHighTimeHist;
 
    #define MEAS__Make_Meas_Hists_for_Susp_Meas \
-      _PRMasterEnv->suspLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
+      _PRTopEnv->suspLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
                                                     "master_low_time_hist");\
-      _PRMasterEnv->suspHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
+      _PRTopEnv->suspHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
                                                     "master_high_time_hist");
       
       //record time stamp: compare to time-stamp recorded below
@@ -56,12 +56,12 @@
    #define MEAS__Capture_Post_Susp_Point \
       saveLowTimeStampCountInto( animatingSlv->postSuspTSCLow );\
       addIntervalToHist( preSuspTSCLow, postSuspTSCLow,\
-                         _PRMasterEnv->suspLowTimeHist ); \
+                         _PRTopEnv->suspLowTimeHist ); \
       addIntervalToHist( preSuspTSCLow, postSuspTSCLow,\
-                         _PRMasterEnv->suspHighTimeHist );
+                         _PRTopEnv->suspHighTimeHist );
 
    #define MEAS__Print_Hists_for_Susp_Meas \
-      printHist( _PRMasterEnv->pluginTimeHist );
+      printHist( _PRTopEnv->pluginTimeHist );
       
 #else
    #define MEAS__Insert_Susp_Meas_Fields_into_Slave     
@@ -82,9 +82,9 @@
        Histogram       *masterHighTimeHist;
 
    #define MEAS__Make_Meas_Hists_for_Master_Meas \
-      _PRMasterEnv->masterLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
+      _PRTopEnv->masterLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
                                                     "master_low_time_hist");\
-      _PRMasterEnv->masterHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
+      _PRTopEnv->masterHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
                                                     "master_high_time_hist");
 
       //Total Master time includes one coreloop time -- just assume the core
@@ -96,12 +96,12 @@
    #define MEAS__Capture_Post_Master_Point \
       saveLowTimeStampCountInto( masterVP->endMasterTSCLow );\
       addIntervalToHist( startMasterTSCLow, endMasterTSCLow,\
-                         _PRMasterEnv->masterLowTimeHist ); \
+                         _PRTopEnv->masterLowTimeHist ); \
       addIntervalToHist( startMasterTSCLow, endMasterTSCLow,\
-                         _PRMasterEnv->masterHighTimeHist );
+                         _PRTopEnv->masterHighTimeHist );
 
    #define MEAS__Print_Hists_for_Master_Meas \
-      printHist( _PRMasterEnv->pluginTimeHist );
+      printHist( _PRTopEnv->pluginTimeHist );
 
 #else
    #define MEAS__Insert_Master_Meas_Fields_into_Slave
@@ -119,9 +119,9 @@
        Histogram       *masterLockHighTimeHist;
 
    #define MEAS__Make_Meas_Hists_for_Master_Lock_Meas \
-      _PRMasterEnv->masterLockLowTimeHist  = makeFixedBinHist( 50, 0, 2, \
+      _PRTopEnv->masterLockLowTimeHist  = makeFixedBinHist( 50, 0, 2, \
                                                "master lock low time hist");\
-      _PRMasterEnv->masterLockHighTimeHist  = makeFixedBinHist( 50, 0, 100,\
+      _PRTopEnv->masterLockHighTimeHist  = makeFixedBinHist( 50, 0, 100,\
                                                "master lock high time hist");
 
    #define MEAS__Capture_Pre_Master_Lock_Point \
@@ -131,13 +131,13 @@
    #define MEAS__Capture_Post_Master_Lock_Point \
       saveLowTimeStampCountInto( endStamp ); \
       addIntervalToHist( startStamp, endStamp,\
-                         _PRMasterEnv->masterLockLowTimeHist ); \
+                         _PRTopEnv->masterLockLowTimeHist ); \
       addIntervalToHist( startStamp, endStamp,\
-                         _PRMasterEnv->masterLockHighTimeHist );
+                         _PRTopEnv->masterLockHighTimeHist );
 
    #define MEAS__Print_Hists_for_Master_Lock_Meas \
-      printHist( _PRMasterEnv->masterLockLowTimeHist ); \
-      printHist( _PRMasterEnv->masterLockHighTimeHist );
+      printHist( _PRTopEnv->masterLockLowTimeHist ); \
+      printHist( _PRTopEnv->masterLockHighTimeHist );
       
 #else
    #define MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv
@@ -154,9 +154,9 @@
        Histogram       *freeTimeHist;
 
    #define MEAS__Make_Meas_Hists_for_Malloc_Meas \
-      _PRMasterEnv->mallocTimeHist  = makeFixedBinHistExt( 100, 0, 30,\
+      _PRTopEnv->mallocTimeHist  = makeFixedBinHistExt( 100, 0, 30,\
                                                        "malloc_time_hist");\
-      _PRMasterEnv->freeTimeHist  = makeFixedBinHistExt( 100, 0, 30,\
+      _PRTopEnv->freeTimeHist  = makeFixedBinHistExt( 100, 0, 30,\
                                                        "free_time_hist");
 
    #define MEAS__Capture_Pre_Malloc_Point \
@@ -166,7 +166,7 @@
    #define MEAS__Capture_Post_Malloc_Point \
       saveLowTimeStampCountInto( endStamp ); \
       addIntervalToHist( startStamp, endStamp,\
-                         _PRMasterEnv->mallocTimeHist ); 
+                         _PRTopEnv->mallocTimeHist ); 
 
    #define MEAS__Capture_Pre_Free_Point \
       int32 startStamp, endStamp; \
@@ -175,15 +175,15 @@
    #define MEAS__Capture_Post_Free_Point \
       saveLowTimeStampCountInto( endStamp ); \
       addIntervalToHist( startStamp, endStamp,\
-                         _PRMasterEnv->freeTimeHist ); 
+                         _PRTopEnv->freeTimeHist ); 
 
    #define MEAS__Print_Hists_for_Malloc_Meas \
-      printHist( _PRMasterEnv->mallocTimeHist   ); \
-      saveHistToFile( _PRMasterEnv->mallocTimeHist   ); \
-      printHist( _PRMasterEnv->freeTimeHist     ); \
-      saveHistToFile( _PRMasterEnv->freeTimeHist     ); \
-      freeHistExt( _PRMasterEnv->mallocTimeHist ); \
-      freeHistExt( _PRMasterEnv->freeTimeHist   );
+      printHist( _PRTopEnv->mallocTimeHist   ); \
+      saveHistToFile( _PRTopEnv->mallocTimeHist   ); \
+      printHist( _PRTopEnv->freeTimeHist     ); \
+      saveHistToFile( _PRTopEnv->freeTimeHist     ); \
+      freeHistExt( _PRTopEnv->mallocTimeHist ); \
+      freeHistExt( _PRTopEnv->freeTimeHist   );
       
 #else
    #define MEAS__Insert_Malloc_Meas_Fields_into_MasterEnv
@@ -203,9 +203,9 @@
       Histogram       *reqHdlrHighTimeHist;
           
    #define MEAS__Make_Meas_Hists_for_Plugin_Meas \
-      _PRMasterEnv->reqHdlrLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
+      _PRTopEnv->reqHdlrLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
                                                     "plugin_low_time_hist");\
-      _PRMasterEnv->reqHdlrHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
+      _PRTopEnv->reqHdlrHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
                                                     "plugin_high_time_hist");
 
    #define MEAS__startReqHdlr \
@@ -215,17 +215,17 @@
    #define MEAS__endReqHdlr \
       saveLowTimeStampCountInto( endStamp1 ); \
       addIntervalToHist( startStamp1, endStamp1, \
-                           _PRMasterEnv->reqHdlrLowTimeHist ); \
+                           _PRTopEnv->reqHdlrLowTimeHist ); \
       addIntervalToHist( startStamp1, endStamp1, \
-                           _PRMasterEnv->reqHdlrHighTimeHist );
+                           _PRTopEnv->reqHdlrHighTimeHist );
 
    #define MEAS__Print_Hists_for_Plugin_Meas \
-      printHist( _PRMasterEnv->reqHdlrLowTimeHist ); \
-      saveHistToFile( _PRMasterEnv->reqHdlrLowTimeHist ); \
-      printHist( _PRMasterEnv->reqHdlrHighTimeHist ); \
-      saveHistToFile( _PRMasterEnv->reqHdlrHighTimeHist ); \
-      freeHistExt( _PRMasterEnv->reqHdlrLowTimeHist ); \
-      freeHistExt( _PRMasterEnv->reqHdlrHighTimeHist );
+      printHist( _PRTopEnv->reqHdlrLowTimeHist ); \
+      saveHistToFile( _PRTopEnv->reqHdlrLowTimeHist ); \
+      printHist( _PRTopEnv->reqHdlrHighTimeHist ); \
+      saveHistToFile( _PRTopEnv->reqHdlrHighTimeHist ); \
+      freeHistExt( _PRTopEnv->reqHdlrLowTimeHist ); \
+      freeHistExt( _PRTopEnv->reqHdlrHighTimeHist );
 #else
    #define MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv
    #define MEAS__Make_Meas_Hists_for_Plugin_Meas
@@ -255,16 +255,16 @@
    #define MEAS__startAnimationMaster_forSys \
       TSCountLowHigh startStamp1, endStamp1; \
       saveTSCLowHigh( endStamp1 ); \
-      _PRMasterEnv->cyclesTillStartAnimationMaster = \
+      _PRTopEnv->cyclesTillStartAnimationMaster = \
       endStamp1.longVal - masterVP->startSusp.longVal;
 
    #define Meas_startReqHdlr_forSys \
         saveTSCLowHigh( startStamp1 ); \
-        _PRMasterEnv->startReqHdlr.longVal = startStamp1.longVal;
+        _PRTopEnv->startReqHdlr.longVal = startStamp1.longVal;
  
    #define MEAS__endAnimationMaster_forSys \
       saveTSCLowHigh( startStamp1 ); \
-      _PRMasterEnv->endAnimationMaster.longVal = startStamp1.longVal;
+      _PRTopEnv->endAnimationMaster.longVal = startStamp1.longVal;
 
    /*A TSC is stored in VP first thing inside wrapper-lib
     * Now, measures cycles from there to here
@@ -279,7 +279,7 @@
              currVP->numGoodSusp++; \
            } \
              /*recorded every time, but only read if currVP == MasterVP*/ \
-          _PRMasterEnv->startMaster.longVal = endSusp.longVal;
+          _PRTopEnv->startMaster.longVal = endSusp.longVal;
 
 #else
    #define MEAS__Insert_System_Meas_Fields_into_Slave 
@@ -314,9 +314,9 @@
    };
    
    #define saveCyclesAndInstrs(core,cycles,instrs,cachem) do{ \
-   int cycles_fd = _PRMasterEnv->cycles_counter_fd[core]; \
-   int instrs_fd = _PRMasterEnv->instrs_counter_fd[core]; \
-   int cachem_fd = _PRMasterEnv->cachem_counter_fd[core]; \
+   int cycles_fd = _PRTopEnv->cycles_counter_fd[core]; \
+   int instrs_fd = _PRTopEnv->instrs_counter_fd[core]; \
+   int cachem_fd = _PRTopEnv->cachem_counter_fd[core]; \
    int nread;                                           \
                                                         \
    nread = read(cycles_fd,&(cycles),sizeof(cycles));    \
@@ -348,7 +348,7 @@
    
 
    #define HOLISTIC__CoreCtrl_Setup \
-   CounterHandler counterHandler = _PRMasterEnv->counterHandler; \
+   CounterHandler counterHandler = _PRTopEnv->counterHandler; \
    SlaveVP      *lastVPBeforeMaster = NULL; \
    /*if(thisCoresThdParams->coreNum == 0){ \
        uint64 initval = tsc_offset_send(thisCoresThdParams,0); \
@@ -376,12 +376,12 @@
       uint64 cycles,instrs,cachem; \
       saveCyclesAndInstrs(thisCoresIdx,cycles, instrs,cachem); \
       if(lastVPBeforeMaster){ \
-        (*counterHandler)(AppResponderInvocation_start,lastVPBeforeMaster->slaveID,lastVPBeforeMaster->assignCount,lastVPBeforeMaster,cycles,instrs,cachem); \
+        (*counterHandler)(AppResponderInvocation_start,lastVPBeforeMaster->slaveID,lastVPBeforeMaster->numTimesAssignedToASlot,lastVPBeforeMaster,cycles,instrs,cachem); \
         lastVPBeforeMaster = NULL; \
       } else { \
-          _PRMasterEnv->start_master_lock[thisCoresIdx][0] = cycles; \
-          _PRMasterEnv->start_master_lock[thisCoresIdx][1] = instrs; \
-          _PRMasterEnv->start_master_lock[thisCoresIdx][2] = cachem; \
+          _PRTopEnv->start_master_lock[thisCoresIdx][0] = cycles; \
+          _PRTopEnv->start_master_lock[thisCoresIdx][1] = instrs; \
+          _PRTopEnv->start_master_lock[thisCoresIdx][2] = cachem; \
       }
  
            /* Request Handler may call resume() on the VP, but we want to 
@@ -396,7 +396,7 @@
                 */
    #define HOLISTIC__Record_AppResponder_start \
                vpid = currSlot->slaveAssignedToSlot->slaveID; \
-               task = currSlot->slaveAssignedToSlot->assignCount; \
+               task = currSlot->slaveAssignedToSlot->numTimesAssignedToASlot; \
                uint64 cycles, instrs, cachem; \
                saveCyclesAndInstrs(thisCoresIdx,cycles, instrs,cachem); \
                (*counterHandler)(AppResponder_start,vpid,task,currSlot->slaveAssignedToSlot,cycles,instrs,cachem);
@@ -429,30 +429,30 @@
         uint64 cycles,instrs,cachem; \
         saveCyclesAndInstrs(thisCoresIdx,cycles,instrs,cachem); \
         if(empty){ \
-            (*counterHandler)(AssignerInvocation_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,masterEnv->start_master_lock[thisCoresIdx][0],masterEnv->start_master_lock[thisCoresIdx][1],masterEnv->start_master_lock[thisCoresIdx][2]); \
+            (*counterHandler)(AssignerInvocation_start,assignedSlaveVP->slaveID,assignedSlaveVP->numTimesAssignedToASlot,assignedSlaveVP,masterEnv->start_master_lock[thisCoresIdx][0],masterEnv->start_master_lock[thisCoresIdx][1],masterEnv->start_master_lock[thisCoresIdx][2]); \
         } \
-        (*counterHandler)(Timestamp_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tsc,0,0); \
-        (*counterHandler)(Assigner_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tmp_cycles,tmp_instrs,tmp_cachem); \
-        (*counterHandler)(Assigner_end,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,cycles,instrs,tmp_cachem);
+        (*counterHandler)(Timestamp_start,assignedSlaveVP->slaveID,assignedSlaveVP->numTimesAssignedToASlot,assignedSlaveVP,tsc,0,0); \
+        (*counterHandler)(Assigner_start,assignedSlaveVP->slaveID,assignedSlaveVP->numTimesAssignedToASlot,assignedSlaveVP,tmp_cycles,tmp_instrs,tmp_cachem); \
+        (*counterHandler)(Assigner_end,assignedSlaveVP->slaveID,assignedSlaveVP->numTimesAssignedToASlot,assignedSlaveVP,cycles,instrs,tmp_cachem);
 
    #define HOLISTIC__Record_Work_start \
         if(currVP){ \
                 uint64 cycles,instrs,cachem; \
                 saveCyclesAndInstrs(thisCoresIdx,cycles, instrs,cachem); \
-                (*counterHandler)(Work_start,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs,cachem); \
+                (*counterHandler)(Work_start,currVP->slaveID,currVP->numTimesAssignedToASlot,currVP,cycles,instrs,cachem); \
         }
    
    #define HOLISTIC__Record_Work_end \
        if(currVP){ \
                uint64 cycles,instrs,cachem; \
                saveCyclesAndInstrs(thisCoresIdx,cycles, instrs,cachem); \
-               (*counterHandler)(Work_end,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs,cachem); \
+               (*counterHandler)(Work_end,currVP->slaveID,currVP->numTimesAssignedToASlot,currVP,cycles,instrs,cachem); \
        }
 
    #define HOLISTIC__Record_HwResponderInvocation_start \
         uint64 cycles,instrs,cachem; \
         saveCyclesAndInstrs(animatingSlv->coreAnimatedBy,cycles, instrs,cachem); \
-        (*(_PRMasterEnv->counterHandler))(HwResponderInvocation_start,animatingSlv->slaveID,animatingSlv->assignCount,animatingSlv,cycles,instrs,cachem); 
+        (*(_PRTopEnv->counterHandler))(HwResponderInvocation_start,animatingSlv->slaveID,animatingSlv->numTimesAssignedToASlot,animatingSlv,cycles,instrs,cachem); 
         
 
    #define getReturnAddressBeforeLibraryCall(vp_ptr, res_ptr) do{     \
@@ -502,8 +502,8 @@
 #endif
 
 #define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \
-      makeHighestDynArrayIndexBeAtLeast( _PRMasterEnv->measHistsInfo, idx ); \
-      _PRMasterEnv->measHists[idx] =  \
+      makeHighestDynArrayIndexBeAtLeast( _PRTopEnv->measHistsInfo, idx ); \
+      _PRTopEnv->measHists[idx] =  \
                        makeFixedBinHist( numBins, startVal, binWidth, name );
 
 //==============================  Probes  ===================================
diff -r 999f2966a3e5 -r dafae55597ce Services_Offered_by_PR/Measurement_and_Stats/probes.c
--- a/Services_Offered_by_PR/Measurement_and_Stats/probes.c	Wed Sep 19 23:12:44 2012 -0700
+++ b/Services_Offered_by_PR/Measurement_and_Stats/probes.c	Tue Oct 23 23:46:17 2012 -0700
@@ -49,12 +49,12 @@
 IntervalProbe *
 create_generic_probe( char *nameStr, SlaveVP *animSlv )
  {
-   PRSemReq reqData;
+   PRServReq reqData;
 
    reqData.reqType  = make_probe;
    reqData.nameStr  = nameStr;
 
-   PR_WL__send_PRSem_request( &reqData, animSlv );
+   PR_WL__send_service_request( &reqData, animSlv );
 
    return animSlv->dataRetFromReq;
  }
@@ -74,7 +74,7 @@
    newProbe->hist    = NULL;
    newProbe->schedChoiceWasRecorded = FALSE;
    newProbe->probeID =
-             addToDynArray( newProbe, _PRMasterEnv->dynIntervalProbesInfo );
+             addToDynArray( newProbe, _PRTopEnv->dynIntervalProbesInfo );
 
    return newProbe;
  }
@@ -164,9 +164,9 @@
  { IntervalProbe *probe;
 
    PR_int__get_master_lock();
-   probe = _PRMasterEnv->intervalProbes[ probeID ];
+   probe = _PRTopEnv->intervalProbes[ probeID ];
 
-   addValueIntoTable(probe->nameStr, probe, _PRMasterEnv->probeNameHashTbl);
+   addValueIntoTable(probe->nameStr, probe, _PRTopEnv->probeNameHashTbl);
    PR_int__release_master_lock();
  }
 
@@ -175,7 +175,7 @@
 PR_impl__get_probe_by_name( char *probeName, SlaveVP *animSlv )
  {
    //TODO: fix this To be in Master -- race condition
-   return getValueFromTable( probeName, _PRMasterEnv->probeNameHashTbl );
+   return getValueFromTable( probeName, _PRTopEnv->probeNameHashTbl );
  }
 
 
@@ -186,7 +186,7 @@
 PR_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animatingSlv )
  { IntervalProbe *probe;
  
-   probe = _PRMasterEnv->intervalProbes[ probeID ];
+   probe = _PRTopEnv->intervalProbes[ probeID ];
    probe->schedChoiceWasRecorded = TRUE;
    probe->coreNum = animatingSlv->coreAnimatedBy;
    probe->slaveID = animatingSlv->slaveID;
@@ -201,7 +201,7 @@
  { IntervalProbe *probe;
 
          DEBUG__printf( dbgProbes, "record start of interval" )
-   probe = _PRMasterEnv->intervalProbes[ probeID ];
+   probe = _PRTopEnv->intervalProbes[ probeID ];
 
       //record *start* point as last thing, after lookup
 #ifdef PROBES__USE_TIME_OF_DAY_PROBES
@@ -235,7 +235,7 @@
 
 #endif
    
-   probe = _PRMasterEnv->intervalProbes[ probeID ];
+   probe = _PRTopEnv->intervalProbes[ probeID ];
 
 #ifdef PROBES__USE_TIME_OF_DAY_PROBES
    if( probe->hist != NULL )
@@ -272,12 +272,12 @@
    if( probe->endSecs == 0 ) //just a single point in time
     {
       printf( " time point: %.6f\n",
-              probe->startSecs - _PRMasterEnv->createPtInSecs );
+              probe->startSecs - _PRTopEnv->createPtInSecs );
     }
    else if( probe->hist == NULL ) //just an interval
     {
       printf( " startSecs: %.6f interval: %.6f\n", 
-         (probe->startSecs - _PRMasterEnv->createPtInSecs), probe->interval);
+         (probe->startSecs - _PRTopEnv->createPtInSecs), probe->interval);
     }
    else  //a full histogram of intervals
     {
@@ -289,7 +289,7 @@
 PR_impl__print_stats_of_probe( IntervalProbe *probe )
  { 
 
-//   probe = _PRMasterEnv->intervalProbes[ probeID ];
+//   probe = _PRTopEnv->intervalProbes[ probeID ];
 
    print_probe_helper( probe );
  }
@@ -298,7 +298,7 @@
 void
 PR_impl__print_stats_of_all_probes()
  {
-   forAllInDynArrayDo( _PRMasterEnv->dynIntervalProbesInfo,
+   forAllInDynArrayDo( _PRTopEnv->dynIntervalProbesInfo,
                           (DynArrayFnPtr) &PR_impl__print_stats_of_probe );
    fflush( stdout );
  }
diff -r 999f2966a3e5 -r dafae55597ce Services_Offered_by_PR/Measurement_and_Stats/probes.h
--- a/Services_Offered_by_PR/Measurement_and_Stats/probes.h	Wed Sep 19 23:12:44 2012 -0700
+++ b/Services_Offered_by_PR/Measurement_and_Stats/probes.h	Tue Oct 23 23:46:17 2012 -0700
@@ -121,15 +121,15 @@
 #ifdef PROBES__TURN_ON_STATS_PROBES
 
    #define PROBES__Create_Probe_Bookkeeping_Vars \
-      _PRMasterEnv->dynIntervalProbesInfo = \
-       makePrivDynArrayOfSize( (void***)&(_PRMasterEnv->intervalProbes), 200); \
+      _PRTopEnv->dynIntervalProbesInfo = \
+       makePrivDynArrayOfSize( (void***)&(_PRTopEnv->intervalProbes), 200); \
       \
-      _PRMasterEnv->probeNameHashTbl = makeHashTable( 1000, &PR_int__free ); \
+      _PRTopEnv->probeNameHashTbl = makeHashTable( 1000, &PR_int__free ); \
       \
       /*put creation time directly into master env, for fast retrieval*/ \
    struct timeval timeStamp; \
    gettimeofday( &(timeStamp), NULL); \
-   _PRMasterEnv->createPtInSecs = \
+   _PRTopEnv->createPtInSecs = \
                            timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0);
 
    #define PR_WL__record_time_point_into_new_probe( nameStr, animSlv ) \
diff -r 999f2966a3e5 -r dafae55597ce Services_Offered_by_PR/Memory_Handling/vmalloc.c
--- a/Services_Offered_by_PR/Memory_Handling/vmalloc.c	Wed Sep 19 23:12:44 2012 -0700
+++ b/Services_Offered_by_PR/Memory_Handling/vmalloc.c	Tue Oct 23 23:46:17 2012 -0700
@@ -252,7 +252,7 @@
  {     
          MEAS__Capture_Pre_Malloc_Point
    
-   MallocArrays* freeLists = _PRMasterEnv->freeLists;
+   MallocArrays* freeLists = _PRTopEnv->freeLists;
    MallocProlog* foundChunk;
    
    //Return a small chunk if the requested size is smaller than 128B
@@ -308,7 +308,7 @@
     
          MEAS__Capture_Pre_Free_Point;
          
-   MallocArrays* freeLists = _PRMasterEnv->freeLists;
+   MallocArrays* freeLists = _PRTopEnv->freeLists;
    MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1;
    uint32 containerIdx;
    
@@ -366,8 +366,8 @@
 PR_ext__create_free_list()
 {     
    //Initialize containers for small chunks and fill with zeros
-   _PRMasterEnv->freeLists = (MallocArrays*)malloc( sizeof(MallocArrays) );
-   MallocArrays *freeLists = _PRMasterEnv->freeLists;
+   _PRTopEnv->freeLists = (MallocArrays*)malloc( sizeof(MallocArrays) );
+   MallocArrays *freeLists = _PRTopEnv->freeLists;
    
    freeLists->smallChunks = 
            (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
diff -r 999f2966a3e5 -r dafae55597ce Services_Offered_by_PR/Memory_Handling/vmalloc.h
--- a/Services_Offered_by_PR/Memory_Handling/vmalloc.h	Wed Sep 19 23:12:44 2012 -0700
+++ b/Services_Offered_by_PR/Memory_Handling/vmalloc.h	Tue Oct 23 23:46:17 2012 -0700
@@ -55,6 +55,7 @@
 void *
 PR_int__malloc( size_t sizeRequested );
 #define PR_PI__malloc  PR_int__malloc
+#define PR_SS__malloc  PR_int__malloc
 
 void *
 PR_WL__malloc( int32  sizeRequested ); /*BUG: -- get master lock */
diff -r 999f2966a3e5 -r dafae55597ce Services_Offered_by_PR/Services_Language/PRServ.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Services_Offered_by_PR/Services_Language/PRServ.c	Tue Oct 23 23:46:17 2012 -0700
@@ -0,0 +1,517 @@
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <malloc.h>
+
+#include "Queue_impl/PrivateQueue.h"
+#include "Hash_impl/PrivateHash.h"
+
+#include "../../PR.h"
+#include "PRServ.h"
+
+//==========================================================================
+void
+PRServ__init_Helper();
+//==========================================================================
+
+
+
+//===========================================================================
+
+
+/*These are the library functions *called in the application*
+ * 
+ */
+
+
+//===========================================================================
+
+int32
+PRServ__giveMinWorkUnitCycles( float32 percentOverhead )
+ {
+   return MIN_WORK_UNIT_CYCLES;
+ }
+
+int32
+PRServ__giveIdealNumWorkUnits()
+ {
+   return NUM_ANIM_SLOTS * NUM_CORES;
+ }
+
+int32
+PRServ__give_number_of_cores_to_schedule_onto()
+ {
+   return NUM_CORES;
+ }
+
+/*For now, use TSC -- later, make these two macros with assembly that first
+ * saves jump point, and second jumps back several times to get reliable time
+ */
+void
+PRServ__begin_primitive()
+ { PRServSemData *semData;
+   
+   semData = (PRServSemData *)PR_WL__give_sem_data( animSlv, PRServ_MAGIC_NUMBER);
+
+   saveLowTimeStampCountInto( semData->primitiveStartTime );
+ }
+
+/*Just quick and dirty for now -- make reliable later
+ * will want this to jump back several times -- to be sure cache is warm
+ * because don't want comm time included in calc-time measurement -- and
+ * also to throw out any "weird" values due to OS interrupt or TSC rollover
+ */
+int32
+PRServ__end_primitive_and_give_cycles( SlaveVP animSlv )
+ { int32 endTime, startTime;
+   PRServSemData *semData;
+   
+   //TODO: fix by repeating time-measurement
+   saveLowTimeStampCountInto( endTime );
+   semData = (PRServSemData *)PR_WL__give_sem_data( animSlv, PRServ_MAGIC_NUMBER);
+   startTime = semData->primitiveStartTime;
+   return (endTime - startTime - 2*TSC_LOW_CYCLES);
+ }
+
+
+
+//===========================================================================
+
+SlaveVP *
+PRServ__create_thread( TopLevelFnPtr fnPtr,   void *initData,
+                        SlaveVP *creatingThd )
+ { 
+   return PRServ__create_thread_w_ID_and_affinity( fnPtr, initData, NO_ID,
+                                                        ANY_CORE, creatingThd );
+ }
+
+SlaveVP *
+PRServ__create_thread_w_ID( TopLevelFnPtr fnPtr,   void *initData, int32 *thdID,
+                         SlaveVP *creatingThd )
+ { 
+   return PRServ__create_thread_w_ID_and_affinity( fnPtr, initData, thdID, 
+                                                        ANY_CORE, creatingThd );
+ }
+
+
+SlaveVP *
+PRServ__create_thread_w_ID_and_affinity( TopLevelFnPtr fnPtr,   void *initData, 
+                    int32 *thdID, int32 coreToAssignOnto, SlaveVP *creatingThd )
+ { PRServSemReq reqData;
+
+      //the semantic request data is on the stack and disappears when this
+      // call returns -- it's guaranteed to remain in the VP's stack for as
+      // long as the VP is suspended.
+   reqData.reqType            = create_slave; //know type because in a PR create req
+   reqData.coreToAssignOnto   = coreToAssignOnto;
+   
+   PR_WL__send_create_slaveVP_req( &reqData, fnPtr, initData, thdID, 
+                                                creatingThd, PRServ_MAGIC_NUMBER );
+   return creatingThd->dataRetFromReq;
+ }
+
+/*This is always the last thing done in the code animated by a thread VP.
+ * Normally, this would be the last line of the thread's top level function.
+ * But, if the thread exits from any point, it has to do so by calling
+ * this.
+ *
+ *It simply sends a dissipate request, which handles all the state cleanup.
+ */
+void
+PRServ__end_thread( SlaveVP *thdToEnd )
+ {    
+   PR_WL__send_dissipate_req( thdToEnd, PRServ_MAGIC_NUMBER );
+ }
+
+
+
+//===========================================================================
+
+
+//======================= task submit and end ==============================
+/*
+ */
+void
+PRServ__submit_task( PRServTaskType *taskType, void *args, SlaveVP *animSlv)
+ { PRServSemReq  reqData;
+
+   reqData.reqType    = submit_task;
+   
+   reqData.taskType   = taskType;
+   reqData.args       = args;
+   reqData.callingSlv = animSlv;
+   
+      //Create task is a special form, so have to pass as parameters, the
+      // top-level-fn of task and the data for that fn, plus lang's req,
+      // animating slave, and lang's magic number
+   PR_WL__send_create_task_req( taskType->fn, args, &reqData, NO_ID, animSlv, PRServ_MAGIC_NUMBER );
+ }
+
+void
+PRServ__submit_task_with_ID( PRServTaskType *taskType, void *args, int32 *taskID, 
+                          SlaveVP     *animSlv)
+ { PRServSemReq  reqData;
+ 
+   reqData.reqType    = submit_task;
+   
+   reqData.taskType   = taskType;
+   reqData.args       = args;
+   reqData.callingSlv = animSlv;
+ 
+   PR_WL__send_create_task_req( taskType->fn, args, &reqData, taskID, animSlv, PRServ_MAGIC_NUMBER );
+ }
+
+
+/*This call is the last to happen in every task.  It causes the slave to
+ * suspend and get the next task out of the task-queue.  Notice there is no
+ * assigner here.. only one slave, no slave ReadyQ, and so on..
+ *Can either make the assigner take the next task out of the taskQ, or can
+ * leave all as it is, and make task-end take the next task.
+ *Note: this fits the case in the new PR for no-context tasks, so will use
+ * the built-in taskQ of new PR, and should be local and much faster.
+ * 
+ *The task-stub is saved in the animSlv, so the request handler will get it
+ * from there, along with the task-type which has arg types, and so on..
+ * 
+ * NOTE: if want, don't need to send the animating SlaveVP around.. 
+ * instead, can make a single slave per core, and coreCtrlr looks up the
+ * slave from having the core number.
+ * 
+ *But, to stay compatible with all the other PR languages, leave it in..
+ */
+void
+PRServ__end_task( SlaveVP *animSlv )
+ { PRServSemReq  reqData;
+
+   reqData.reqType      = end_task;
+   reqData.callingSlv   = animSlv;
+   
+   PR_WL__send_end_task_request( &reqData, animSlv, PRServ_MAGIC_NUMBER );
+ }
+
+
+/*Waits for all tasks that are direct children to end, then resumes calling
+ * task or thread
+ */
+void
+PRServ__taskwait(SlaveVP *animSlv)
+ {
+    PRServSemReq  reqData;
+
+   reqData.reqType      = taskwait;
+   reqData.callingSlv   = animSlv;
+   
+   PR_WL__send_sem_request( &reqData, animSlv, PRServ_MAGIC_NUMBER );
+ }
+
+
+
+//==========================  send and receive ============================
+//
+
+inline int32 *
+PRServ__give_self_taskID( SlaveVP *animSlv )
+ {
+   return PR__give_task_ID( animSlv, PRServ_MAGIC_NUMBER );
+ }
+
+//================================ send ===================================
+
+void
+PRServ__send_of_type_to( void *msg, const int32 type, int32 *receiverID,
+                      SlaveVP *senderSlv )
+ { PRServSemReq  reqData;
+
+   reqData.reqType    = send_type_to;
+   
+   reqData.msg        = msg;
+   reqData.msgType    = type;
+   reqData.receiverID = receiverID;
+   reqData.senderSlv  = senderSlv;
+   
+   reqData.nextReqInHashEntry = NULL;
+
+   PR_WL__send_sem_request( &reqData, senderSlv, PRServ_MAGIC_NUMBER );
+
+      //When come back from suspend, no longer own data reachable from msg
+ }
+
+void
+PRServ__send_from_to( void *msg, int32 *senderID, int32 *receiverID, SlaveVP *senderSlv )
+ { PRServSemReq  reqData;
+
+   reqData.reqType     = send_from_to;
+   
+   reqData.msg         = msg;
+   reqData.senderID    = senderID;
+   reqData.receiverID  = receiverID;
+   reqData.senderSlv   = senderSlv;
+
+   reqData.nextReqInHashEntry = NULL;
+
+   PR_WL__send_sem_request( &reqData, senderSlv, PRServ_MAGIC_NUMBER );
+ }
+
+
+//================================ receive ================================
+
+/*The "type" version of send and receive creates a many-to-one relationship.
+ * The sender is anonymous, and many sends can stack up, waiting to be
+ * received.  The same receiver can also have send from-to's
+ * waiting for it, and those will be kept separate from the "type"
+ * messages.
+ */
+void *
+PRServ__receive_type_to( const int32 type, int32* receiverID, SlaveVP *receiverSlv )
+ {       DEBUG__printf1(dbgRqstHdlr,"WL: receive type to %d",receiverID[1] );
+   PRServSemReq  reqData;
+
+   reqData.reqType     = receive_type_to;
+   
+   reqData.msgType     = type;
+   reqData.receiverID  = receiverID;
+   reqData.receiverSlv = receiverSlv;
+   
+   reqData.nextReqInHashEntry = NULL;
+
+   PR_WL__send_sem_request( &reqData, receiverSlv, PRServ_MAGIC_NUMBER );
+   
+   return receiverSlv->dataRetFromReq;
+ }
+
+
+
+/*Call this at the point a receiving task wants in-coming data.
+ * Use this from-to form when know senderID -- it makes a direct channel
+ * between sender and receiver.
+ */
+void *
+PRServ__receive_from_to( int32 *senderID, int32 *receiverID, SlaveVP *receiverSlv )
+ { 
+   PRServSemReq  reqData;
+
+   reqData.reqType     = receive_from_to;
+
+   reqData.senderID    = senderID;
+   reqData.receiverID  = receiverID;
+   reqData.receiverSlv = receiverSlv;
+
+   reqData.nextReqInHashEntry = NULL;
+      DEBUG__printf2(dbgRqstHdlr,"WL: receive from %d to: %d", reqData.senderID[1], reqData.receiverID[1]);
+      
+   PR_WL__send_sem_request( &reqData, receiverSlv, PRServ_MAGIC_NUMBER );
+
+   return receiverSlv->dataRetFromReq;
+ }
+
+
+
+
+//==========================================================================
+//
+/*A function singleton is a function whose body executes exactly once, on a
+ * single core, no matter how many times the fuction is called and no
+ * matter how many cores or the timing of cores calling it.
+ *
+ *A data singleton is a ticket attached to data.  That ticket can be used
+ * to get the data through the function exactly once, no matter how many
+ * times the data is given to the function, and no matter the timing of
+ * trying to get the data through from different cores.
+ */
+
+/*asm function declarations*/
+void asm_save_ret_to_singleton(PRServSingleton *singletonPtrAddr);
+void asm_write_ret_from_singleton(PRServSingleton *singletonPtrAddr);
+
+/*Fn singleton uses ID as index into array of singleton structs held in the
+ * semantic environment.
+ */
+void
+PRServ__start_fn_singleton( int32 singletonID,   SlaveVP *animSlv )
+ {
+   PRServSemReq  reqData;
+
+      //
+   reqData.reqType     = singleton_fn_start;
+   reqData.singletonID = singletonID;
+
+   PR_WL__send_sem_request( &reqData, animSlv, PRServ_MAGIC_NUMBER );
+   if( animSlv->dataRetFromReq ) //will be 0 or addr of label in end singleton
+    {
+       PRServSemEnv *semEnv = PR_WL__give_sem_env_for( animSlv, PRServ_MAGIC_NUMBER );
+       asm_write_ret_from_singleton(&(semEnv->fnSingletons[ singletonID]));
+    }
+ }
+
+/*Data singleton hands addr of loc holding a pointer to a singleton struct.
+ * The start_data_singleton makes the structure and puts its addr into the
+ * location.
+ */
+void
+PRServ__start_data_singleton( PRServSingleton **singletonAddr,  SlaveVP *animSlv )
+ {
+   PRServSemReq  reqData;
+
+   if( *singletonAddr && (*singletonAddr)->hasFinished )
+       goto JmpToEndSingleton;
+   
+   reqData.reqType          = singleton_data_start;
+   reqData.singletonPtrAddr = singletonAddr;
+
+   PR_WL__send_sem_request( &reqData, animSlv, PRServ_MAGIC_NUMBER );
+   if( animSlv->dataRetFromReq ) //either 0 or end singleton's return addr
+    {    //Assembly code changes the return addr on the stack to the one
+         // saved into the singleton by the end-singleton-fn
+         //The return addr is at 0x4(%%ebp)
+        JmpToEndSingleton:
+          asm_write_ret_from_singleton(*singletonAddr);
+    }
+   //now, simply return
+   //will exit either from the start singleton call or the end-singleton call
+ }
+
+/*Uses ID as index into array of flags.  If flag already set, resumes from
+ * end-label.  Else, sets flag and resumes normally.
+ *
+ *Note, this call cannot be inlined because the instr addr at the label
+ * inside is shared by all invocations of a given singleton ID.
+ */
+void
+PRServ__end_fn_singleton( int32 singletonID, SlaveVP *animSlv )
+ {
+   PRServSemReq  reqData;
+
+      //don't need this addr until after at least one singleton has reached
+      // this function
+   PRServSemEnv *semEnv = PR_WL__give_sem_env_for( animSlv, PRServ_MAGIC_NUMBER );
+   asm_write_ret_from_singleton(&(semEnv->fnSingletons[ singletonID]));
+
+   reqData.reqType     = singleton_fn_end;
+   reqData.singletonID = singletonID;
+
+   PR_WL__send_sem_request( &reqData, animSlv, PRServ_MAGIC_NUMBER );
+
+EndSingletonInstrAddr:
+   return;
+ }
+
+void
+PRServ__end_data_singleton(  PRServSingleton **singletonPtrAddr, SlaveVP *animSlv )
+ {
+   PRServSemReq  reqData;
+
+      //don't need this addr until after singleton struct has reached
+      // this function for first time
+      //do assembly that saves the return addr of this fn call into the
+      // data singleton -- that data-singleton can only be given to exactly
+      // one instance in the code of this function.  However, can use this
+      // function in different places for different data-singletons.
+   asm_save_ret_to_singleton(*singletonPtrAddr);
+
+   reqData.reqType          = singleton_data_end;
+   reqData.singletonPtrAddr = singletonPtrAddr;
+
+   PR_WL__send_sem_request( &reqData, animSlv, PRServ_MAGIC_NUMBER );
+ }
+
+/*This executes the function in the masterVP, so it executes in isolation
+ * from any other copies -- only one copy of the function can ever execute
+ * at a time.
+ *
+ *It suspends to the master, and the request handler takes the function
+ * pointer out of the request and calls it, then resumes the VP.
+ *Only very short functions should be called this way -- for longer-running
+ * isolation, use transaction-start and transaction-end, which run the code
+ * between as work-code.
+ */
+void
+PRServ__animate_short_fn_in_isolation( PtrToAtomicFn ptrToFnToExecInMaster,
+                                    void *data, SlaveVP *animSlv )
+ {
+   PRServSemReq  reqData;
+
+      //
+   reqData.reqType          = atomic;
+   reqData.fnToExecInMaster = ptrToFnToExecInMaster;
+   reqData.dataForFn        = data;
+
+   PR_WL__send_sem_request( &reqData, animSlv, PRServ_MAGIC_NUMBER );
+ }
+
+
+/*This suspends to the master.
+ *First, it looks at the VP's data, to see the highest transactionID that VP
+ * already has entered.  If the current ID is not larger, it throws an
+ * exception stating a bug in the code.  Otherwise it puts the current ID
+ * there, and adds the ID to a linked list of IDs entered -- the list is
+ * used to check that exits are properly ordered.
+ *Next it is uses transactionID as index into an array of transaction
+ * structures.
+ *If the "VP_currently_executing" field is non-null, then put requesting VP
+ * into queue in the struct.  (At some point a holder will request
+ * end-transaction, which will take this VP from the queue and resume it.)
+ *If NULL, then write requesting into the field and resume.
+ */
+void
+PRServ__start_transaction( int32 transactionID, SlaveVP *animSlv )
+ {
+   PRServSemReq  reqData;
+
+      //
+   reqData.callingSlv  = animSlv;
+   reqData.reqType     = trans_start;
+   reqData.transID     = transactionID;
+
+   PR_WL__send_sem_request( &reqData, animSlv, PRServ_MAGIC_NUMBER );
+ }
+
+/*This suspends to the master, then uses transactionID as index into an
+ * array of transaction structures.
+ *It looks at VP_currently_executing to be sure it's same as requesting VP.
+ * If different, throws an exception, stating there's a bug in the code.
+ *Next it looks at the queue in the structure.
+ *If it's empty, it sets VP_currently_executing field to NULL and resumes.
+ *If something in, gets it, sets VP_currently_executing to that VP, then
+ * resumes both.
+ */
+void
+PRServ__end_transaction( int32 transactionID, SlaveVP *animSlv )
+ {
+   PRServSemReq  reqData;
+
+      //
+   reqData.callingSlv      = animSlv;
+   reqData.reqType     = trans_end;
+   reqData.transID     = transactionID;
+
+   PR_WL__send_sem_request( &reqData, animSlv, PRServ_MAGIC_NUMBER );
+ }
+
+//======================== Internal ==================================
+/*
+ */
+
+SlaveVP *
+PRServ__create_slave_with_affinity( TopLevelFnPtr fnPtr, void *initData,
+                        SlaveVP *creatingSlv,  int32  coreToAssignOnto )
+ { PRServSemReq  reqData;
+
+      //the semantic request data is on the stack and disappears when this
+      // call returns -- it's guaranteed to remain in the VP's stack for as
+      // long as the VP is suspended.
+   reqData.reqType            = create_slave_w_aff; //not used, May 2012
+   reqData.coreToAssignOnto   = coreToAssignOnto;
+   reqData.fnPtr              = fnPtr;
+   reqData.initData           = initData;
+   reqData.callingSlv         = creatingSlv;
+
+   PR_WL__send_create_slaveVP_req( &reqData, creatingSlv, PRServ_MAGIC_NUMBER );
+
+   return creatingSlv->dataRetFromReq;
+ }
+
diff -r 999f2966a3e5 -r dafae55597ce Services_Offered_by_PR/Services_Language/PRServ.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Services_Offered_by_PR/Services_Language/PRServ.h	Tue Oct 23 23:46:17 2012 -0700
@@ -0,0 +1,296 @@
+/*
+ *  Copyright 2009 OpenSourceStewardshipFoundation.org
+ *  Licensed under GNU General Public License version 2
+ *
+ * Author: seanhalle@yahoo.com
+ *
+ */
+
+#ifndef _PRServ_H
+#define	_PRServ_H
+
+#include "Queue_impl/PrivateQueue.h"
+#include "Hash_impl/PrivateHash.h"
+#include "PR_impl/PR.h"
+#include "Measurement/dependency.h"
+
+
+//===========================================================================
+   //uniquely identifies PRServ -- should be a jenkins char-hash of "PRServ" onto int32
+#define PRServ_MAGIC_NUMBER 0000000002
+
+#define NUM_STRUCS_IN_SEM_ENV 1000
+
+
+//===========================================================================
+/*This header defines everything specific to the PRServ semantic plug-in
+ */
+typedef struct _PRServSemReq    PRServSemReq;
+//typedef struct _PRServTaskStub  PRServTaskStub;
+//typedef void  (*PRServTaskFnPtr )   ( void *, SlaveVP *);
+//typedef void  (*PtrToAtomicFn )  ( void * ); //executed atomically in master
+//===========================================================================
+
+
+//===========================================================================
+
+/*This is placed into semData, used for dependencies and wait construct*/
+struct _PRServTaskStub
+ {
+ //====== The first fields must match PRLangMetaTask fields ======
+   int32             langMagicNumber; //magic num must be 1st field of langMetaTask
+   PRMetaTask       *protoMetaTask;   //back-link must always be 2nd field
+ //====== end PRLangMetaTask fields =========
+ 
+ };
+
+
+enum PRServReqType
+ {
+   submit_task = 1,
+   end_task,
+   create_slave,
+   create_slave_w_aff,
+   dissipate_slave,
+   //===============================
+   send_type_to,
+   receive_type_to,
+   send_from_to,
+   receive_from_to,
+   //===============================
+   taskwait,
+   malloc_req,
+   free_req,
+   singleton_fn_start,
+   singleton_fn_end,
+   singleton_data_start,
+   singleton_data_end,
+   atomic,
+   trans_start,
+   trans_end
+ };
+
+struct _PRServSemReq
+ { enum PRServReqType    reqType;
+   SlaveVP           *callingSlv;
+   PRServTaskType       *taskType;
+   void              *args;
+//   PRServTaskStub       *taskStub;  //not needed -- get via PR accessor from slv
+   
+   SlaveVP           *senderSlv;
+   SlaveVP           *receiverSlv;
+   int32             *senderID;
+   int32             *receiverID;
+   int32              msgType;
+   void              *msg;
+   PRServSemReq         *nextReqInHashEntry;
+//In PRReq:   int32             *taskID;
+   
+   TopLevelFnPtr      fnPtr;
+   void              *initData;
+   int32              coreToAssignOnto;
+
+//These, below, should move to util language..
+   int32              sizeToMalloc;
+   void              *ptrToFree;
+
+   int32              singletonID;
+   PRServSingleton     **singletonPtrAddr;
+
+   PtrToAtomicFn      fnToExecInMaster;
+   void              *dataForFn;
+
+   int32              transID;
+ }
+/* PRServSemReq */;
+
+
+typedef struct
+ { PRSemEnv        *protoSemEnv;
+   PrivQueueStruc  *slavesReadyToResumeQ; //Shared (slaves not pinned)
+   PrivQueueStruc  *freeTaskSlvRecycleQ;    //Shared
+   PrivQueueStruc  *taskReadyQ;           //Shared (tasks not pinned)
+   HashTable       *argPtrHashTbl;
+   HashTable       *commHashTbl;
+   int32            nextCoreToGetNewSlv;
+   int32            primitiveStartTime;
+
+                       //fix limit on num with dynArray
+   PRServSingleton     fnSingletons[NUM_STRUCS_IN_SEM_ENV];
+   PRServTrans         transactionStrucs[NUM_STRUCS_IN_SEM_ENV];
+   
+   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
+   ListOfArrays* unitList;
+   ListOfArrays* ctlDependenciesList;
+   ListOfArrays* commDependenciesList;
+   NtoN** ntonGroups;
+   PrivDynArrayInfo* ntonGroupsInfo;
+   ListOfArrays* dynDependenciesList;
+   Unit last_in_slot[NUM_CORES * NUM_ANIM_SLOTS];
+   ListOfArrays* hwArcs;
+   #endif
+
+   #ifdef HOLISTIC__TURN_ON_PERF_COUNTERS
+   ListOfArrays* counterList[NUM_CORES];
+   #endif
+ }
+PRServSemEnv;
+
+
+typedef struct _TransListElem TransListElem;
+struct _TransListElem
+ {
+   int32          transID;
+   TransListElem *nextTrans;
+ };
+//TransListElem
+ 
+/* PR now handles what this used to be used for
+enum PRServSlvType
+ { FreeTaskSlv = 1,
+   SlotTaskSlv,
+   ThreadSlv
+ };
+*/
+ 
+typedef struct
+ {
+   int32            highestTransEntered;
+   TransListElem   *lastTransEntered;
+   int32            primitiveStartTime;
+//   PRServTaskStub     *taskStub; //get from slave via PR accessor
+ }
+PRServSemData;
+ 
+//===========================================================================
+
+void
+PRServ__create_seed_slave_and_do_work( TopLevelFnPtr fn, void *initData );
+
+int32
+PRServ__giveMinWorkUnitCycles( float32 percentOverhead );
+
+void
+PRServ__begin_primitive();
+
+int32
+PRServ__end_primitive_and_give_cycles();
+
+int32
+PRServ__giveIdealNumWorkUnits();
+
+int32
+PRServ__give_number_of_cores_to_schedule_onto();
+
+//=======================
+
+void
+PRServ__start( SlaveVP *seedSlv );
+
+void
+PRServ__cleanup_after_shutdown();
+
+//=======================
+
+SlaveVP *
+PRServ__create_thread( TopLevelFnPtr fnPtr,   void *initData,
+                                                     SlaveVP *creatingThd );
+
+void
+PRServ__end_thread( SlaveVP *thdToEnd );
+
+//=======================
+
+#define PRServ__malloc( numBytes, callingSlave ) PR_App__malloc( numBytes, callingSlave)
+
+#define PRServ__free(ptrToFree, callingSlave ) PR_App__free( ptrToFree, callingSlave )
+
+
+//=======================
+void
+PRServ__submit_task( PRServTaskType *taskType, void *args, SlaveVP *animSlv);
+
+inline int32 *
+PRServ__create_taskID_of_size( int32 numInts, SlaveVP *animSlv );
+
+void
+PRServ__submit_task_with_ID( PRServTaskType *taskType, void *args, int32 *taskID, 
+                          SlaveVP     *animSlv);
+
+void
+PRServ__end_task( SlaveVP *animSlv );
+
+//=========================
+void
+PRServ__taskwait(SlaveVP *animSlv);
+
+
+inline int32 *
+PRServ__give_self_taskID( SlaveVP *animSlv );
+
+void
+PRServ__send_of_type_to( void *msg, const int32 type, int32 *receiverID,
+                      SlaveVP *senderSlv );
+
+void
+PRServ__send_from_to( void *msg, int32 *senderID, int32 *receiverID, SlaveVP *senderSlv );
+
+void *
+PRServ__receive_type_to( const int32 type, int32* receiverID, SlaveVP *receiverSlv );
+
+void *
+PRServ__receive_from_to( int32 *senderID, int32 *receiverID, SlaveVP *receiverSlv );
+
+//======================= Concurrency Stuff ======================
+void
+PRServ__start_fn_singleton( int32 singletonID, SlaveVP *animSlv );
+
+void
+PRServ__end_fn_singleton( int32 singletonID, SlaveVP *animSlv );
+
+void
+PRServ__start_data_singleton( PRServSingleton **singeltonAddr, SlaveVP *animSlv );
+
+void
+PRServ__end_data_singleton( PRServSingleton **singletonAddr, SlaveVP *animSlv );
+
+void
+PRServ__animate_short_fn_in_isolation( PtrToAtomicFn ptrToFnToExecInMaster,
+                                    void *data, SlaveVP *animSlv );
+
+void
+PRServ__start_transaction( int32 transactionID, SlaveVP *animSlv );
+
+void
+PRServ__end_transaction( int32 transactionID, SlaveVP *animSlv );
+
+
+//=========================  Internal use only  =============================
+void
+PRServ__Request_Handler( SlaveVP *requestingSlv, void *_semEnv );
+
+SlaveVP *
+PRServ__assign_work_to_slot( void *_semEnv, AnimSlot *slot );
+
+SlaveVP*
+PRServ__create_slave_helper( TopLevelFnPtr fnPtr, void *initData,
+                          PRServSemEnv *semEnv,    int32 coreToAssignOnto );
+
+PRMetaTask *
+PR_int__create_generic_slave_meta_task( void *initData );
+
+
+SlaveVP *
+PRServ__create_slave_with( TopLevelFnPtr fnPtr, void *initData,
+                          SlaveVP *creatingSlv );
+
+SlaveVP *
+PRServ__create_slave_with_affinity( TopLevelFnPtr fnPtr,    void *initData,
+                            SlaveVP *creatingSlv, int32 coreToAssignOnto);
+
+//=====================  Measurement of Lang Overheads  =====================
+#include "Measurement/PRServ_Measurement.h"
+
+//===========================================================================
+#endif	/* _PRServ_H */
+
diff -r 999f2966a3e5 -r dafae55597ce Services_Offered_by_PR/Services_Language/PRServ_PluginFns.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Services_Offered_by_PR/Services_Language/PRServ_PluginFns.c	Tue Oct 23 23:46:17 2012 -0700
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "Queue_impl/PrivateQueue.h"
+#include "PRServ.h"
+#include "PRServ_Request_Handlers.h"
+
+//=========================== Local Fn Prototypes ===========================
+void
+resume_slaveVP( SlaveVP *slave, PRServSemEnv *semEnv );
+
+inline void
+handleSemReq( PRReqst *req, SlaveVP *requestingSlv, PRServSemEnv *semEnv );
+
+inline void
+handleDissipate(                SlaveVP *requestingSlv, PRServSemEnv *semEnv );
+
+inline void
+handleCreate(    PRReqst *req, SlaveVP *requestingSlv, PRServSemEnv *semEnv );
+
+//============================== Assigner ==================================
+//
+/*The assigner is complicated by having both tasks and explicitly created
+ * VPs, and by tasks being able to suspend.
+ *It can't use an explicit slave to animate a task because of stack
+ * pollution. So, it has to keep the two kinds separate.
+ * 
+ *Q: one assigner for both tasks and slaves, or separate?
+ * 
+ *Simplest way for the assigner logic is with a Q for extra empty task
+ * slaves, and another Q for slaves of both types that are ready to resume.
+ *
+ *Keep a current task slave for each anim slot. The request handler manages
+ * it by pulling from the extraTaskSlvQ when a task suspends, or else
+ * creating a new task slave if taskSlvQ empty. 
+ *Assigner only assigns a task to the current task slave for the slot.
+ *If no more tasks, then takes a ready to resume slave, if also none of them
+ * then dissipates extra task slaves (one per invocation).
+ *Shutdown condition is: must have no suspended tasks, and no suspended
+ * explicit slaves and no more tasks in taskQ.  Will only have the masters
+ * plus a current task slave for each slot.. detects this condition. 
+ * 
+ *Having the two types of slave is part of having communications directly
+ * between tasks, and tasks to explicit slaves, which requires the ability
+ * to suspend both kinds, but also to keep explicit slave stacks clean from
+ * the junk tasks are allowed to leave behind.
+ */
+SlaveVP *
+PRServ__assign_work_to_slot( void *_semEnv, AnimSlot *slot )
+ { SlaveVP     *returnSlv;
+   PRServSemEnv   *semEnv;
+   int32        coreNum, slotNum;
+   PRMetaTask  *returnMetaTask = NULL, *newTaskStub;
+  
+   coreNum = slot->coreSlotIsOn;
+   slotNum = slot->slotIdx;
+   
+   semEnv  = (PRServSemEnv *)_semEnv;
+   
+      //Check for suspended slaves that are ready to resume
+   returnSlv = readPrivQ( semEnv->slavesReadyToResumeQ );
+   if( returnSlv != NULL )  //Yes, have a slave, so return it.
+    { returnSlv->coreAnimatedBy   = coreNum;
+      returnMetaTask = returnSlv->metaTask;
+      goto ReturnTheMetaTask;
+    }
+   
+   newTaskStub = readPrivQ( semEnv->taskReadyQ );
+   if( newTaskStub != NULL )
+    { returnMetaTask = newTaskStub->protoMetaTask;
+      goto ReturnTheMetaTask;
+    }
+
+ReturnTheMetaTask:  //doing gotos to here should help with holistic..
+
+   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
+   //This no longer works -- should be moved into PR in master
+   //This assumes the task has already been assigned to a slave, which happens
+   // inside Master..
+   if( returnMetaTask == NULL )
+    { returnSlv = semEnv->process->idleSlv[coreNum][slotNum]; 
+    
+         //things that would normally happen in resume(), but these VPs
+         // never go there
+      returnSlv->numTimesAssignedToASlot++;
+      Unit newU;
+      newU.vp = returnSlv->slaveID;
+      newU.task = returnSlv->numTimesAssignedToASlot;
+      addToListOfArrays(Unit,newU,semEnv->unitList);
+
+      if (returnSlv->numTimesAssignedToASlot > 1)
+       { Dependency newD;
+         newD.from_vp = returnSlv->slaveID;
+         newD.from_task = returnSlv->numTimesAssignedToASlot - 1;
+         newD.to_vp = returnSlv->slaveID;
+         newD.to_task = returnSlv->numTimesAssignedToASlot;
+         addToListOfArrays(Dependency, newD, semEnv->ctlDependenciesList);  
+       }
+      returnMetaTask = returnSlv->metaTask;
+    }
+   else //returnSlv != NULL
+    { //assignSlv->numTimesAssigned++;
+      Unit prev_in_slot = 
+         semEnv->last_in_slot[coreNum * NUM_ANIM_SLOTS + slotNum];
+      if(prev_in_slot.vp != 0)
+       { Dependency newD;
+         newD.from_vp = prev_in_slot.vp;
+         newD.from_task = prev_in_slot.task;
+         newD.to_vp = returnSlv->slaveID;
+         newD.to_task = returnSlv->numTimesAssignedToASlot;
+         addToListOfArrays(Dependency,newD,semEnv->hwArcs);   
+       }
+      prev_in_slot.vp = returnSlv->slaveID;
+      prev_in_slot.task = returnSlv->numTimesAssignedToASlot;
+      semEnv->last_in_slot[coreNum * NUM_ANIM_SLOTS + slotNum] =
+         prev_in_slot;        
+    }
+   #endif
+   return( returnMetaTask );
+ }
+
+
+//===========================  Request Handler  ============================
+//
+/*
+ * (Not inline because invoked indirectly via a pointer)
+ */
+
+void
+handleSemReq( PRReqst *req, SlaveVP *reqSlv, PRServSemEnv *semEnv )
+ { PRServSemReq *semReq;
+
+   semReq = PR_PI__take_sem_reqst_from(req);
+   if( semReq == NULL ) return;
+   switch( semReq->reqType )  //sem handlers are all in other file
+    {
+      case send_type_to:    handleSendTypeTo(   semReq,         semEnv);
+         break;
+      case send_from_to:    handleSendFromTo(   semReq,         semEnv);
+         break;
+      case receive_type_to: handleReceiveTypeTo(semReq,         semEnv);
+         break;
+      case receive_from_to: handleReceiveFromTo(semReq,         semEnv);
+         break;
+      case taskwait:        handleTaskwait(     semReq, reqSlv, semEnv);
+           break;
+         
+      //====================================================================
+      case malloc_req:      handleMalloc(       semReq, reqSlv, semEnv);
+         break;
+      case free_req:        handleFree(         semReq, reqSlv, semEnv);
+         break;
+      case singleton_fn_start:  handleStartFnSingleton(semReq, reqSlv, semEnv);
+         break;
+      case singleton_fn_end:    handleEndFnSingleton(  semReq, reqSlv, semEnv);
+         break;
+      case singleton_data_start:handleStartDataSingleton(semReq,reqSlv,semEnv);
+         break;
+      case singleton_data_end:  handleEndDataSingleton(semReq, reqSlv, semEnv);
+         break;
+      case atomic:          handleAtomic(       semReq, reqSlv, semEnv);
+         break;
+      case trans_start:     handleTransStart(   semReq, reqSlv, semEnv);
+         break;
+      case trans_end:       handleTransEnd(     semReq, reqSlv, semEnv);
+         break;
+    }
+ }
+
+
+
+
+//=========================== Helper ==============================
+void
+resume_slaveVP( SlaveVP *slave, PRServSemEnv *semEnv )
+ {
+      //both suspended tasks and suspended explicit slaves resumed with this
+   writePrivQ( slave, semEnv->slavesReadyToResumeQ );
+   if( semEnv->protoSemEnv->hasWork != TRUE ) 
+       semEnv->protoSemEnv->hasWork = TRUE;
+   
+   #ifdef HOLISTIC__TURN_ON_PERF_COUNTERS
+/*
+   int lastRecordIdx = slave->counter_history_array_info->numInArray -1;
+   CounterRecord* lastRecord = slave->counter_history[lastRecordIdx];
+   saveLowTimeStampCountInto(lastRecord->unblocked_timestamp);
+*/
+   #endif
+   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
+   slave->numTimesAssignedToASlot++; //Somewhere here!
+   Unit newU;
+   newU.vp = slave->slaveID;
+   newU.task = slave->numTimesAssignedToASlot;
+   addToListOfArrays(Unit,newU,semEnv->unitList);
+   
+   if (slave->numTimesAssignedToASlot > 1)
+    { Dependency newD;
+      newD.from_vp = slave->slaveID;
+      newD.from_task = slave->numTimesAssignedToASlot - 1;
+      newD.to_vp = slave->slaveID;
+      newD.to_task = slave->numTimesAssignedToASlot;
+      addToListOfArrays(Dependency, newD ,semEnv->ctlDependenciesList);  
+    }
+   #endif
+ }
diff -r 999f2966a3e5 -r dafae55597ce Services_Offered_by_PR/Services_Language/PRServ_Request_Handlers.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Services_Offered_by_PR/Services_Language/PRServ_Request_Handlers.c	Tue Oct 23 23:46:17 2012 -0700
@@ -0,0 +1,1285 @@
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "PR_impl/PR.h"
+#include "Queue_impl/PrivateQueue.h"
+#include "Hash_impl/PrivateHash.h"
+#include "PRServ.h"
+#include "PRServ_Request_Handlers.h"
+
+
+
+
+//=========================== Local Fn Prototypes ===========================
+void
+resume_slaveVP( SlaveVP *slave, PRServSemEnv *semEnv );
+
+
+
+//==========================================================================
+//                           Helpers
+//
+
+/*Only clone the elements of req used in these reqst handlers
+ */
+PRServSemReq *
+cloneReq( PRServSemReq *semReq )
+ { PRServSemReq *clonedReq;
+
+   clonedReq             = PR_PI__malloc( sizeof(PRServSemReq) );
+   clonedReq->reqType    = semReq->reqType;
+   clonedReq->senderSlv  = semReq->senderSlv;
+   clonedReq->receiverSlv= semReq->receiverSlv;
+   clonedReq->msg        = semReq->msg;
+   clonedReq->nextReqInHashEntry = NULL;
+   
+   return clonedReq;
+ }
+
+
+
+HashEntry *
+giveEntryElseInsertReqst32( int32 *key, PRServSemReq *semReq,
+                            HashTable   *commHashTbl )
+ { HashEntry    *entry;
+   PRServSemReq    *waitingReq;
+
+   entry = getEntryFromTable32( key, commHashTbl );
+   if( entry == NULL )
+    {    //no waiting sends or receives, so add this request and exit
+         // note: have to clone the request because it's on stack of sender
+      addValueIntoTable32( key, cloneReq( semReq ), commHashTbl );
+      return NULL;
+    }
+   waitingReq = (PRServSemReq *)entry->content;
+   if( waitingReq == NULL )  //might happen when last waiting gets paired
+    {    //no waiting sends or receives, so add this request and exit
+      entry->content = semReq;
+      return NULL;
+    }
+   return entry;
+ }
+
+      
+inline PRServPointerEntry *
+create_pointer_entry( )
+ { PRServPointerEntry *newEntry;
+   
+   newEntry = PR_PI__malloc( sizeof(PRServPointerEntry) );
+   newEntry->hasEnabledNonFinishedWriter = FALSE;
+   newEntry->numEnabledNonDoneReaders    = 0;
+   newEntry->waitersQ                    = makePrivQ();
+      
+   return newEntry;
+ }
+
+/*malloc's space and initializes fields -- and COPIES the arg values
+ * to new space
+ */
+inline PRServTaskStub *
+create_task_stub( PRServTaskType *taskType, void **args )
+ { void **newArgs;
+   PRServTaskStub* newStub = PR_int__malloc( sizeof(PRMetaTask) + taskType->sizeOfArgs );
+   newStub->numBlockingProp = taskType->numCtldArgs;
+   newStub->taskType   = taskType;
+   newStub->ptrEntries = 
+      PR_int__malloc( taskType->numCtldArgs * sizeof(PRServPointerEntry *) );
+   newArgs = (void **)( (uint8 *)newStub + sizeof(PRMetaTask) );
+   newStub->args = newArgs;
+   newStub->numLiveChildTasks   = 0;
+   newStub->numLiveChildThreads = 0;
+   newStub->isEnded = FALSE;
+   
+      //Copy the arg-pointers.. can be more arguments than just the ones 
+      // that StarSs uses to control ordering of task execution.
+   memcpy( newArgs, args, taskType->sizeOfArgs );
+   
+   return newStub;
+ }
+
+inline PRServTaskStubCarrier *
+create_task_carrier( PRServTaskStub *taskStub, int32 argNum, int32 rdOrWrite )
+ { PRServTaskStubCarrier *newCarrier;
+ 
+   newCarrier = PR_PI__malloc( sizeof(PRServTaskStubCarrier) );
+   newCarrier->taskStub = taskStub;
+   newCarrier->argNum   = argNum;
+   newCarrier->isReader = rdOrWrite == READER;
+ }
+
+
+
+//===========================  ==============================
+
+/*Application invokes this via wrapper library, when it explicitly creates a
+ * thread with the "PRServ__create_thread()" command.
+ * 
+ *Slave creation is a special form, so PR does handling before calling this.
+ * It does creation of the new slave, and hands it to this handler.  
+ *This handler is registered with PR during PRServ__start().
+ * 
+ *So, here, create a task Stub that contains a marker stating this is a thread. 
+ * Then, attach the task stub to the slave's meta Task via a PR command.
+ * 
+ *When slave dissipates, PR will call the registered recycler for the task stub.
+ */
+inline void
+handleCreateThd( PRReqst *req, SlaveVP *requestingSlv, SlaveVP *newSlv, PRServSemEnv *semEnv )
+ { PRServSemReq  *semReq;
+   PRServTaskStub *taskStub, *parentTaskStub;
+   
+   semReq = PR_PI__take_sem_reqst_from( req );
+
+   parentTaskStub = PR_PI__give_lang_meta_task( requestingSlv );
+   parentTaskStub->numLiveChildThreads += 1;
+   
+   taskStub = create_thread_task_stub(); //only used for wait info
+   taskStub->parentTaskStub = parentTaskStub;
+
+      //note, semantic data will be initialized by separate, registered 
+      // initializer, at the point it is accessed the first time.   
+
+   //================= Assign the new thread to a core ===================
+   #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
+   newSlv->coreAnimatedBy = 0;
+
+   #else
+      //Assigning slaves to cores is part of SSR code..
+   int32 coreToAssignOnto = semReq->coreToAssignOnto;
+   if(coreToAssignOnto < 0 || coreToAssignOnto >= NUM_CORES )
+    {    //out-of-range, so round-robin assignment
+      newSlv->coreAnimatedBy = semEnv->nextCoreToGetNewSlv;
+
+      if( semEnv->nextCoreToGetNewSlv >= NUM_CORES - 1 )
+          semEnv->nextCoreToGetNewSlv  = 0;
+      else
+          semEnv->nextCoreToGetNewSlv += 1;
+    }
+   else //core num in-range, so use it
+    { newSlv->coreAnimatedBy = coreToAssignOnto;
+    }
+   #endif
+   //========================================================================
+   
+   
+
+         DEBUG__printf2(dbgRqstHdlr,"Create from: %d, new VP: %d",
+                                    requestingSlv->slaveID, newSlv->slaveID)
+
+   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
+   Dependency newD;
+   newD.from_vp = requestingSlv->slaveID;
+   newD.from_task = requestingSlv->numTimesAssignedToASlot;
+   newD.to_vp = newSlv->slaveID;
+   newD.to_task = 1;
+   addToListOfArrays(Dependency,newD,semEnv->commDependenciesList);   
+   #endif
+
+      //For PRServ, caller needs ptr to created thread returned to it
+   requestingSlv->dataRetFromReq = newSlv;
+   resume_slaveVP(requestingSlv , semEnv );
+   resume_slaveVP( newSlv,        semEnv );
+ }
+
+/*Initialize semantic data struct..  this initializer doesn't need any input,
+ * but some languages may need something from inside the request that was sent
+ * to create a slave..  in that case, just make initializer do the malloc then
+ * use the PR_PI__give_sem_data  inside the create handler, and fill in the
+ * semData values there.
+ */
+void * createInitialSemanticData( )
+ { PRServSemData *semData;
+ 
+   semData = PR_PI__malloc( sizeof(PRServSemData) );
+   
+   semData->highestTransEntered = -1;
+   semData->lastTransEntered    = NULL;
+   return semData;
+ }
+
+/*SlaveVP dissipate -- this is NOT task-end!, only call this to end explicitly
+ * created threads
+ */
+inline void
+handleDissipate( SlaveVP *requestingSlv, PRServSemEnv *semEnv )
+ { PRServSemData  *semData;
+   PRServTaskStub  *parentTaskStub, *ownTaskStub;
+ 
+         DEBUG__printf1(dbgRqstHdlr,"Dissipate request from processor %d",
+                                                     requestingSlv->slaveID)
+             
+   ownTaskStub    = PR_PI__give_lang_meta_task( requestingSlv, PRServ_MAGIC_NUMBER );
+   parentTaskStub = ownTaskStub->parentTaskStub;
+   parentTaskStub->numLiveChildThreads -= 1;  //parent wasn't freed, even if ended
+   
+      //if all children ended, then free this task's stub
+      // else, keep stub around, and last child will free it (below)
+   if( ownTaskStub->numLiveChildTasks   == 0 &&
+       ownTaskStub->numLiveChildThreads == 0 )
+      free_task_stub( ownTaskStub );
+   else
+      ownTaskStub->isEnded = TRUE; //for children to see when they end
+
+      //Now, check on parents waiting on child threads to end
+   if( parentTaskStub->isWaitingForChildThreadsToEnd &&
+       parentTaskStub->numLiveChildThreads == 0 )
+    { parentTaskStub->isWaitingForChildThreadsToEnd = FALSE;
+      if( parentTaskStub->isWaitingForChildTasksToEnd )
+        return; //still waiting on tasks (should be impossible)
+      else //parent free to resume
+        resume_slaveVP( PR_PI__give_slave_assigned_to(parentTaskStub), semEnv );
+    }
+   
+      //check if this is last child of ended parent (note, not possible to
+      // have more than one level of ancestor waiting to be freed)
+   if( parentTaskStub->isEnded &&
+       parentTaskStub->numLiveChildTasks   == 0 && 
+       parentTaskStub->numLiveChildThreads == 0 )
+    { free_task_stub( parentTaskStub ); //just stub, semData already freed
+    }
+
+ FreeSlaveStateAndReturn:
+      //Used to free the semData and requesting slave's base state, but
+      // now PR does those things, so nothing more to do..
+//PR handles this:   PR_PI__free( semData );
+//PR handles this:   PR_PI__dissipate_slaveVP( requestingSlv );
+   return; 
+ }
+
+/*Register this with PR, during PRServ start
+ *
+ *At some point, may change PR so that it recycles semData, in which case this
+ * only gets called when a process shuts down..  at that point, PR will call
+ * dissipate on all the slaves it has in the recycle Q.
+ */
+void
+freePRServSemData( void *_semData )
+ { //
+   PR_PI__free( _semData );
+ }
+
+void resetPRServSemData( void *_semData ) 
+ { PRServSemData *semData = (PRServSemData *)_semData;
+   
+   semData->highestTransEntered = -1;
+   semData->lastTransEntered    = NULL;
+ }
+
+//==========================================================================
+//
+//
+/*Submit Task
+ *
+ *PR creates a PRMetaTask and passes it in.  This handler adds language-
+ * specific stuff to it.  The language-specific stuff is linked to the
+ * PRMetaTask, but if the task is suspended for any reason, the lang-specific
+ * part is moved to the semData of the slave that is animating the task.
+ *So, while the PRMetaTask is inside the creating language's semantic
+ * env, waiting to be assigned to a slave for animation, the lang-specific
+ * task info is accessed from the PRMetaTask.  But once the task suspends,
+ * that lang-specific task info transfers to the slave's semData.  All lang
+ * constructs that want to access it must get it from the semData.
+ *However, taskEnd still accesses the lang-specific task info from the 
+ * PRMetaTask, whether it suspended or not..  and the task code can access
+ * data to be used within the application behavior via 
+ * PR__give_task_info( animatingSlave ).
+ *  
+ *Uses a hash table to match the arg-pointers to each other. So, an
+ * argument-pointer is one-to-one with a hash-table entry.
+ * 
+ *If overlapping region detection is enabled, then a hash entry is one
+ * link in a ring of all entries that overlap each other.  For example,
+ * say region A shared common addresses with region B, but the pointers
+ * to them are different, then the hash entries for the two would be
+ * linked in a ring.  When a pointer is processed, all the pointers in
+ * the ring are processed (Doesn't differentiate independent siblings
+ * from parent-child or conjoined twins overlap..)
+ * NOT ENABLED AS OF MAY 25 2012
+ * 
+ *A hash entry has a queue of tasks that are waiting to access the
+ * pointed-to  region.  The queue goes in the order of creation of
+ * the tasks.  Each entry in the queue has a pointer to the task-stub
+ * and whether the task reads-only vs writes to the hash-entry's region.
+ * 
+ *A hash entry also has a count of the enabled but not yet finished readers
+ * of the region. It also has a flag that says whether a writer has been
+ * enabled and is not yet finished.
+ * 
+ *There are two kinds of events that access a hash entry: creation of a
+ * task and end of a task.
+ *
+ * 
+ * ==========================  creation  ========================== 
+ * 
+ *At creation, make a task-stub.  Set the count of blocking propendents
+ * to the number of controlled arguments (a task can have
+ * arguments that are not controlled by the language, like simple integer
+ * inputs from the sequential portion. Note that all controlled arguments
+ * are pointers, and marked as controlled in the application code).
+ * 
+ *The controlled arguments are then processed one by one.
+ *Processing an argument means getting the hash of the pointer.  Then,
+ * looking up the hash entry.  (If none, create one).
+ *With the hash entry:
+ *
+ *If the arg is a reader, and the entry does not have an enabled
+ * non-finished writer, and the queue is empty (could be prev readers,
+ * then a writer that got queued and now new readers that have to also be
+ * queued).
+ *The reader is free.  So, decrement the blocking-propendent count in
+ * the task-stub. If the count is zero, then put the task-stub into the
+ * readyQ.
+ *At the same time, increment the hash-entry's count of enabled and
+ * non-finished readers. 
+ * 
+ *Otherwise, the reader is put into the hash-entry's Q of waiters
+ * 
+ *If the arg is a writer, plus the entry does not have a current writer,
+ * plus the number of enabled non-finished readers is zero, plus the Q is
+ * empty, then the writer is free.  Mark the entry has having an
+ * enabled and non-finished writer.  Decrement the blocking-propendent
+ * count in the writer's task-stub. If the count is zero, then put the
+ * task-stub into the readyQ.
+ * 
+ *Otherwise, put the writer into the entry's Q of waiters.
+ * 
+ *No matter what, if the hash entry was chained, put it at the start of
+ * the chain.  (Means no-longer-used pointers accumulate at end of chain,
+ * decide garbage collection of no-longer-used pointers later)
+ *
+ */
+inline 
+void *
+handleSubmitTask( PRServSemReq *semReq, PRServSemEnv *semEnv )
+ { uint32           key[3];
+   HashEntry       *rawHashEntry; //has char *, but use with uint32 *
+   PRServPointerEntry *ptrEntry; //contents of hash table entry for an arg pointer
+   void           **args;
+   PRServTaskStub     *taskStub, *parentTaskStub;
+   PRServTaskType     *taskType;
+   PRServTaskStubCarrier *taskCarrier;
+   
+   HashTable *
+   argPtrHashTbl = semEnv->argPtrHashTbl;
+   
+ 
+   /* ==========================  creation  ========================== 
+    *Make a task-stub.  Set the count of blocking propendents
+    * to the number of controlled arguments (a task can have
+    * arguments that are not controlled by the language, like simple integer
+    * inputs from the sequential portion. Note that all controlled arguments
+    * are pointers, and marked as controlled in the application code).
+    */
+   args     = semReq->args;
+   taskType = semReq->taskType; //this is PRServ task type struct
+   taskStub = create_task_stub( taskType, args );//copies arg ptrs
+   
+   taskStub->numBlockingProp = taskType->numCtldArgs;
+   //PR does this (metaTask contains taskID): taskStub->taskID = semReq->taskID;
+      
+   parentTaskStub = (PRServTaskStub *)PR_PI__give_lang_meta_task(semReq->callingSlv, PRServ_MAGIC_NUMBER);
+   taskStub->parentTaskStub = parentTaskStub; 
+   parentTaskStub->numLiveChildTasks += 1;
+   
+         //DEBUG__printf3(dbgRqstHdlr,"Submit req from slaveID: %d, from task: %d, for task: %d", semReq->callingSlv->slaveID, parentSemData->taskStub->taskID[1], taskStub->taskID[1])
+         DEBUG__printf2(dbgRqstHdlr,"Submit req from slaveID: %d, for task: %d", semReq->callingSlv->slaveID, taskStub->taskID[1])
+          
+   /*=============== Process args =================
+    *The controlled arguments are processed one by one.
+    *Processing an argument means getting the hash of the pointer.  Then,
+    * looking up the hash entry.  (If none, create one).
+    */
+   int32 argNum;
+   for( argNum = 0; argNum < taskType->numCtldArgs; argNum++ )
+    { 
+      key[0] = 2; //two 32b values in key
+      *( (uint64*)&key[1]) = (uint64)args[argNum];  //write 64b into two 32b
+
+      /*If the hash entry was chained, put it at the
+       * start of the chain.  (Means no-longer-used pointers accumulate
+       * at end of chain, decide garbage collection later) */
+      rawHashEntry = getEntryFromTable32( key, argPtrHashTbl );
+      if( rawHashEntry == NULL )
+       {    //adding a value auto-creates the hash-entry
+         ptrEntry = create_pointer_entry();
+         rawHashEntry = addValueIntoTable32( key, ptrEntry, argPtrHashTbl );
+       }
+      else
+       { ptrEntry = (PRServPointerEntry *)rawHashEntry->content;
+         if( ptrEntry == NULL )
+          { ptrEntry = create_pointer_entry();
+            rawHashEntry = addValueIntoTable32(key, ptrEntry, argPtrHashTbl);
+          }
+       }
+      taskStub->ptrEntries[argNum] = ptrEntry;
+      
+      /*Have the hash entry.
+       *If the arg is a reader and the entry does not have an enabled
+       * non-finished writer, and the queue is empty. */
+      if( taskType->argTypes[argNum] == READER )
+       { if( !ptrEntry->hasEnabledNonFinishedWriter && 
+             isEmptyPrivQ( ptrEntry->waitersQ ) )
+          { /*The reader is free.  So, decrement the blocking-propendent
+             * count in the task-stub. If the count is zero, then put the
+             * task-stub into the readyQ.  At the same time, increment
+             * the hash-entry's count of enabled and non-finished readers.*/
+            taskStub->numBlockingProp -= 1;
+            if( taskStub->numBlockingProp == 0 )
+             { writePrivQ( taskStub, semEnv->taskReadyQ );
+               if( semEnv->protoSemEnv->hasWork != TRUE ) 
+                   semEnv->protoSemEnv->hasWork = TRUE;
+             }
+            ptrEntry->numEnabledNonDoneReaders += 1;
+          }
+         else
+          { /*Otherwise, the reader is put into the hash-entry's Q of
+             * waiters*/
+            taskCarrier = create_task_carrier( taskStub, argNum, READER );
+            writePrivQ( taskCarrier, ptrEntry->waitersQ );
+          }
+       }
+      else //arg is a writer
+       { /*the arg is a writer, plus the entry does not have a current
+          * writer, plus the number of enabled non-finished readers is
+          * zero, (the Q must be empty, else bug!) then the writer is free*/
+         if( !ptrEntry->hasEnabledNonFinishedWriter &&
+              ptrEntry->numEnabledNonDoneReaders == 0 )
+          { /*Mark the entry has having a enabled and non-finished writer.
+              * Decrement the blocking-propenden count in the writer's
+              * task-stub. If the count is zero, then put the task-stub
+              * into the readyQ.*/
+            taskStub->numBlockingProp -= 1;
+            if( taskStub->numBlockingProp == 0 )
+             { writePrivQ( taskStub, semEnv->taskReadyQ );
+               if( semEnv->protoSemEnv->hasWork != TRUE ) 
+                   semEnv->protoSemEnv->hasWork = TRUE;
+             }
+            ptrEntry->hasEnabledNonFinishedWriter = TRUE;
+          }
+         else
+          {/*Otherwise, put the writer into the entry's Q of waiters.*/
+            taskCarrier = create_task_carrier( taskStub, argNum, WRITER );
+            writePrivQ( taskCarrier, ptrEntry->waitersQ );            
+          }
+       }
+    } //for argNum
+   
+      //resume the parent, creator
+   resume_slaveVP( semReq->callingSlv, semEnv );
+
+   return;
+ }
+
+
+/* ========================== end of task ===========================
+ * 
+ *At the end of a task,
+ *The task's controlled arguments are processed one by one.
+ *Processing an argument means getting the hash of the pointer.  Then,
+ * looking up the hash entry (and putting the entry at the start of the
+ * chain, if there was a chain).
+ *With the hash entry:
+ *
+ *If the arg is a reader, then decrement the enabled and non-finished
+ * reader-count in the hash-entry. If the count becomes zero, then take
+ * the next entry from the Q. It should be a writer, or else there's a
+ * bug in this algorithm.
+ *Set the hash-entry to have an enabled non-finished writer.  Decrement
+ * the blocking-propendent-count of the writer's task-stub.  If the count
+ * has reached zero, then put the task-stub into the readyQ.
+ * 
+ *If the arg is a writer, then clear the enabled non-finished writer flag
+ * of the hash-entry. Take the next entry from the waiters Q. 
+ *If it is a writer, then turn the flag back on.  Decrement the writer's
+ * blocking-propendent-count in its task-stub.  If it becomes zero, then
+ * put the task-stub into the readyQ.
+ *
+ *If waiter is a reader, then do a loop, getting all waiting readers.
+ * For each, increment the hash-entry's count of enabled
+ * non-finished readers.  Decrement the blocking propendents count of the
+ * reader's task-stub.  If it reaches zero, then put the task-stub into the
+ * readyQ.
+ *Repeat until encounter a writer -- put that writer back into the Q.
+ * 
+ *May 2012 -- not keeping track of how many references to a given ptrEntry
+ * exist, so no way to garbage collect..
+ *TODO: Might be safe to delete an entry when task ends and waiterQ empty
+ * and no readers and no writers..
+ */
+inline void
+handleEndTask( void *langMetaTask, PRServSemReq *semReq, PRServSemEnv *semEnv )
+ { PRServPointerEntry  *ptrEntry; //contents of hash table entry for an arg pointer
+   void            **args;
+   PRServSemData       *endingSlvSemData;
+   PRServTaskStub      *endingTaskStub, *waitingTaskStub, *parentStub;
+   PRServTaskType      *endingTaskType;
+   PRServTaskStubCarrier *waitingTaskCarrier;
+   PRServPointerEntry **ptrEntries;
+         
+ 
+//   endingTaskStub   = (PRServTaskStub *)PR_PI__give_lang_spec_task_info( semReq->callingSlv );
+   
+   endingTaskStub   = (PRServTaskStub *)langMetaTask;
+   args             = endingTaskStub->args;
+   endingTaskType   = endingTaskStub->taskType;
+   ptrEntries       = endingTaskStub->ptrEntries; //saved in stub when create
+   
+         DEBUG__printf2(dbgRqstHdlr,"EndTask req from slaveID: %d, task: %d",semReq->callingSlv->slaveID, endingTaskStub->taskID[1])
+          
+      //"wait" functionality: Check if parent was waiting on this task
+   parentStub = endingTaskStub->parentTaskStub;
+   parentStub->numLiveChildTasks -= 1;
+   if( parentStub->isWaitingForChildTasksToEnd && 
+       parentStub->numLiveChildTasks == 0)
+    {
+      parentStub->isWaitingForChildTasksToEnd = FALSE;
+      resume_slaveVP( PR_PI__give_slave_assigned_to(parentStub), semEnv );
+    }
+   
+      //Check if parent ended, and this was last descendent, then free it
+   if( parentStub->isEnded && parentStub->numLiveChildTasks == 0 )
+    { free_task_stub( parentStub );
+    }
+   
+   
+      //Now, update state of dependents and start ready tasks
+   /*The task's controlled arguments are processed one by one.
+    *Processing an argument means getting arg-pointer's entry.
+    */
+   int32 argNum;
+   for( argNum = 0; argNum < endingTaskType->numCtldArgs; argNum++ )
+    {       
+      ptrEntry = ptrEntries[argNum];
+      //check if the ending task was reader of this arg
+      if( endingTaskType->argTypes[argNum] == READER )
+       { //then decrement the enabled and non-finished reader-count in
+         // the hash-entry. 
+         ptrEntry->numEnabledNonDoneReaders -= 1;
+         
+         //If the count becomes zero, then take the next entry from the Q. 
+         //It should be a writer, or else there's a bug in this algorithm.
+         if( ptrEntry->numEnabledNonDoneReaders == 0 )
+          { waitingTaskCarrier = readPrivQ( ptrEntry->waitersQ );
+            if( waitingTaskCarrier == NULL ) 
+             { //TODO: looks safe to delete the ptr entry at this point 
+               continue; //next iter of loop
+             }
+            if( waitingTaskCarrier->isReader ) 
+               PR_App__throw_exception("READER waiting", NULL, NULL);
+                   
+            waitingTaskStub = waitingTaskCarrier->taskStub;
+            
+            //Set the hash-entry to have an enabled non-finished writer.
+            ptrEntry->hasEnabledNonFinishedWriter = TRUE;
+            
+            // Decrement the blocking-propendent-count of the writer's
+            // task-stub.  If the count has reached zero, then put the
+            // task-stub into the readyQ.
+            waitingTaskStub->numBlockingProp -= 1;
+            if( waitingTaskStub->numBlockingProp == 0 )
+             { writePrivQ( waitingTaskStub, semEnv->taskReadyQ );
+               if( semEnv->protoSemEnv->hasWork != TRUE ) 
+                   semEnv->protoSemEnv->hasWork = TRUE;
+             }
+          }
+       }
+      else //the ending task is a writer of this arg 
+       { //clear the enabled non-finished writer flag of the hash-entry.
+         ptrEntry->hasEnabledNonFinishedWriter = FALSE;
+         
+         //Take the next waiter from the hash-entry's Q.
+         waitingTaskCarrier = readPrivQ( ptrEntry->waitersQ );
+         if( waitingTaskCarrier == NULL )
+          { //TODO: looks safe to delete ptr entry at this point
+            continue; //go to next iter of loop, done here.
+          }
+         waitingTaskStub = waitingTaskCarrier->taskStub;
+         
+         //If task is a writer of this hash-entry's pointer
+         if( !waitingTaskCarrier->isReader ) 
+          { // then turn the flag back on.
+            ptrEntry->hasEnabledNonFinishedWriter = TRUE;
+            //Decrement the writer's blocking-propendent-count in task-stub
+            // If it becomes zero, then put the task-stub into the readyQ.
+            waitingTaskStub->numBlockingProp -= 1;
+            if( waitingTaskStub->numBlockingProp == 0 )
+             { writePrivQ( waitingTaskStub, semEnv->taskReadyQ );
+                if( semEnv->protoSemEnv->hasWork != TRUE ) 
+                    semEnv->protoSemEnv->hasWork = TRUE;
+             }
+          }
+         else
+          { //Waiting task is a reader, so do a loop, of all waiting readers
+            // until encounter a writer or waitersQ is empty
+            while( TRUE ) //The checks guarantee have a waiting reader
+             { //Increment the hash-entry's count of enabled non-finished
+               // readers.
+               ptrEntry->numEnabledNonDoneReaders += 1;
+
+               //Decrement the blocking propendents count of the reader's
+               // task-stub.  If it reaches zero, then put the task-stub
+               // into the readyQ.
+               waitingTaskStub->numBlockingProp -= 1;
+               if( waitingTaskStub->numBlockingProp == 0 )
+                { writePrivQ( waitingTaskStub, semEnv->taskReadyQ );
+                   if( semEnv->protoSemEnv->hasWork != TRUE ) 
+                       semEnv->protoSemEnv->hasWork = TRUE;
+                }
+               //Get next waiting task
+               waitingTaskCarrier = peekPrivQ( ptrEntry->waitersQ );
+               if( waitingTaskCarrier == NULL )    break; //no more waiting readers
+               if( !waitingTaskCarrier->isReader ) break; //no more waiting readers
+               waitingTaskCarrier = readPrivQ( ptrEntry->waitersQ );               
+               waitingTaskStub = waitingTaskCarrier->taskStub;
+             }//while waiter is a reader
+          }//if-else, first waiting task is a reader
+       }//if-else, check of ending task, whether writer or reader
+    }//for argnum in ending task
+   
+   
+      //done ending the task, if still has live children, then keep stub around
+      // else, free the stub and args copy
+   if( endingTaskStub->numLiveChildTasks   == 0 &&
+       endingTaskStub->numLiveChildThreads == 0 )
+    { free_task_stub( endingTaskStub ); 
+    }
+   
+   return; 
+ }
+
+
+inline void
+free_task_stub( PRServTaskStub *stubToFree )
+ { if(stubToFree->ptrEntries != NULL ) //a thread stub has NULL entry
+    { PR_PI__free( stubToFree->ptrEntries );
+    }
+   PR_PI__free( stubToFree );
+ }
+
+//========================== Task Comm handlers ===========================
+
+
+
+//============================  Send Handlers ==============================
+/*Send of Type -- The semantic request has the receiving task ID and Type
+ *
+ *Messages of a given Type have to be kept separate..  so need a separate
+ * entry in the hash table for each pair: receiverID, Type
+ *
+ *Also, if same sender sends multiple before any get received, then need to
+ * stack the sends up -- even if a send waits until it's paired, several
+ * separate tasks can send to the same receiver, and doing hash on the
+ * receive task, so they will stack up.
+ */
+inline void
+handleSendTypeTo( PRServSemReq *semReq, PRServSemEnv *semEnv )
+ { SlaveVP    *senderSlv, *receiverSlv;
+   int32      *senderID, *receiverID;
+   int32      *key, keySz, receiverIDNumInt;
+   PRServSemReq  *waitingReq;
+   HashEntry  *entry;
+   HashTable  *commHashTbl = semEnv->commHashTbl;
+   
+   receiverID  = semReq->receiverID; //For "send", know both send & recv procrs
+   senderSlv   = semReq->senderSlv;
+
+         DEBUG__printf2(dbgRqstHdlr,"SendType req from sender slaveID: %d, recTask: %d", senderSlv->slaveID, receiverID[1])
+          
+         
+   receiverIDNumInt = receiverID[0] + 1; //pos 0 doesn't include itself
+   keySz = receiverIDNumInt * sizeof(int32) + 2 * sizeof(int32);
+   key = PR_PI__malloc( keySz );
+   key[0] = receiverIDNumInt + 1; //loc 0 is num int32 in key
+   memcpy( &key[1], receiverID, receiverIDNumInt * sizeof(int32) );
+   key[ 1 + receiverIDNumInt ] = semReq->msgType; 
+   
+   entry = giveEntryElseInsertReqst32( key, semReq, commHashTbl );
+   if( entry == NULL ) //was just inserted, means task has to wait
+    { return;
+    }
+
+      //if here, found a waiting request with same key
+   waitingReq = (PRServSemReq *)entry->content;
+
+      //At this point, know have waiting request(s) -- either sends or recv
+      //Note, can only have max of one receive waiting, and cannot have both
+      // sends and receives waiting (they would have paired off)
+      // but can have multiple sends from diff sending VPs, all same msg-type
+   if( waitingReq->reqType == send_type_to )
+    {    //waiting request is another send, so stack this up on list
+         // but first clone the sending request so it persists.
+      PRServSemReq *clonedReq = cloneReq( semReq );
+      clonedReq-> nextReqInHashEntry = waitingReq->nextReqInHashEntry;
+      waitingReq->nextReqInHashEntry = clonedReq;
+         DEBUG__printf2( dbgRqstHdlr, "linked requests: %p, %p ", clonedReq,\
+                                                                 waitingReq )
+      return;
+    }
+   else
+    {    
+       #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
+        Dependency newD;
+        newD.from_vp = senderID->slaveID;
+        newD.from_task = senderID->numTimesAssignedToASlot;
+        newD.to_vp = receiverID->slaveID;
+        newD.to_task = receiverID->numTimesAssignedToASlot +1;
+        //(newD,semEnv->commDependenciesList);  
+        addToListOfArrays(Dependency,newD,semEnv->dynDependenciesList);  
+                int32 groupId = semReq->msgType;
+        if(semEnv->ntonGroupsInfo->numInArray <= groupId){
+            makeHighestDynArrayIndexBeAtLeast(semEnv->ntonGroupsInfo, groupId);
+        }
+        if(semEnv->ntonGroups[groupId] == NULL){
+            semEnv->ntonGroups[groupId] = new_NtoN(groupId);
+        }
+        Unit u;
+        u.vp = senderID->slaveID;
+        u.task = senderID->numTimesAssignedToASlot;
+        addToListOfArrays(Unit,u,semEnv->ntonGroups[groupId]->senders);
+        u.vp = receiverID->slaveID;
+        u.task = receiverID->numTimesAssignedToASlot +1;
+        addToListOfArrays(Unit,u,semEnv->ntonGroups[groupId]->receivers);
+       #endif
+
+         //set receiver slave, from the waiting request
+      receiverSlv = waitingReq->receiverSlv;
+      
+         //waiting request is a receive_type_to, so it pairs to this send
+         //First, remove the waiting receive request from the entry
+      entry->content = waitingReq->nextReqInHashEntry;
+      PR_PI__free( waitingReq ); //Don't use contents -- so free it
+      
+      if( entry->content == NULL )
+       {    //TODO: mod hash table to double-link, so can delete entry from
+            // table without hashing the key and looking it up again
+         deleteEntryFromTable32( (uint32*)entry->key, commHashTbl );  //frees hashEntry
+       }
+      
+         //attach msg that's in this send request to receiving task's Slv
+         // when comes back from suspend will have msg in dataRetFromReq
+      receiverSlv->dataRetFromReq = semReq->msg;
+
+         //bring both processors back from suspend
+      resume_slaveVP( senderSlv,   semEnv );
+      resume_slaveVP( receiverSlv, semEnv );
+
+      return;
+    }
+ }
+
+
+/*If Send or Receive are called within a task, it causes the task to suspend,
+ * which converts the slave animating it to a free slave and suspends that slave.
+ *Which means that send and receive operate upon slaves, no matter whether they
+ * were called from within a task or a slave.
+ *  
+ *Looks like can make single handler for both kinds of send..
+ */
+//TODO: combine both send handlers into single handler
+inline void
+handleSendFromTo( PRServSemReq *semReq, PRServSemEnv *semEnv)
+ { SlaveVP     *senderSlv, *receiverSlv;
+   int32       *senderID, *receiverID;
+   int32       *key, keySz, receiverIDNumInt, senderIDNumInt;
+   PRServSemReq   *waitingReq;
+   HashEntry   *entry;
+   HashTable   *commHashTbl = semEnv->commHashTbl;
+
+         DEBUG__printf2(dbgRqstHdlr,"SendFromTo req from task %d to %d",
+                        semReq->senderID[1],semReq->receiverID[1])
+   
+   receiverID  = semReq->receiverID; //For "send", know both send & recv procrs
+   senderID    = semReq->senderID;
+   senderSlv   = semReq->senderSlv;
+
+   
+   receiverIDNumInt = receiverID[0] + 1; //include the count in the key
+   senderIDNumInt   = senderID[0] + 1;
+   keySz = (receiverIDNumInt + senderIDNumInt) * sizeof(int32) + sizeof(int32);
+   key   = PR_PI__malloc( keySz );
+   key[0] = receiverIDNumInt + senderIDNumInt;
+   memcpy( &key[1], receiverID, receiverIDNumInt * sizeof(int32) );
+   memcpy( &key[1 + receiverIDNumInt], senderID, senderIDNumInt * sizeof(int32) );
+
+   entry = giveEntryElseInsertReqst32( key, semReq, commHashTbl );
+   if( entry == NULL ) //was just inserted, means task has to wait
+    { return;
+    }
+
+   waitingReq = (PRServSemReq *)entry->content;
+
+      //At this point, know have waiting request(s) -- either sends or recv
+   if( waitingReq->reqType == send_from_to )
+    { printf("\n ERROR: shouldn't be two send-from-tos waiting \n");
+    }
+   else
+    {    //waiting request is a receive, so it completes pair with this send
+      #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
+        Dependency newD;
+        newD.from_vp = sendPr->slaveID;
+        newD.from_task = sendPr->numTimesAssignedToASlot;
+        newD.to_vp = receivePr->slaveID;
+        newD.to_task = receivePr->numTimesAssignedToASlot +1;
+        //addToListOfArraysDependency(newD,semEnv->commDependenciesList);  
+        addToListOfArrays(Dependency,newD,semEnv->commDependenciesList);   
+      #endif 
+
+         //set receiver slave, from the waiting request
+      receiverSlv = waitingReq->receiverSlv;
+       
+         //First, remove the waiting receive request from the entry
+      entry->content = waitingReq->nextReqInHashEntry;
+      PR_PI__free( waitingReq ); //Don't use contents -- so free it
+      
+         //can only be one waiting req for "from-to" semantics
+      if( entry->content != NULL )
+       {
+         printf("\nERROR in handleSendFromTo\n");
+       }
+      deleteEntryFromTable32( (uint32*)entry->key, commHashTbl );  //frees HashEntry
+
+         //attach msg that's in this send request to receiving procr
+         // when comes back from suspend, will have msg in dataRetFromReq
+      receiverSlv->dataRetFromReq = semReq->msg;
+
+         //bring both processors back from suspend
+      resume_slaveVP( senderSlv,   semEnv );
+      resume_slaveVP( receiverSlv, semEnv );
+            
+      return;
+    }
+ }
+
+
+
+//==============================  Receives  ===========================
+//
+
+
+inline void
+handleReceiveTypeTo( PRServSemReq *semReq, PRServSemEnv *semEnv)
+ { SlaveVP    *senderSlv, *receiverSlv;
+   int32      *receiverID;
+   int32      *key, keySz, receiverIDNumInt;
+   PRServSemReq  *waitingReq;
+   HashEntry  *entry;
+   HashTable  *commHashTbl = semEnv->commHashTbl;
+   
+         DEBUG__printf2(dbgRqstHdlr,"ReceiveType req to ID: %d type: %d",semReq->receiverID[1], semReq->msgType)
+ 
+   receiverID  = semReq->receiverID; //For "send", know both send & recv procrs
+   receiverSlv = semReq->receiverSlv;
+   
+
+      //key is the receiverID plus the type -- have to copy them into key
+   receiverIDNumInt = receiverID[0] + 1; //pos 0 doesn't include itself
+   keySz = receiverIDNumInt * sizeof(int32) + 2 * sizeof(int32);
+   key = PR_PI__malloc( keySz );
+   key[0] = receiverIDNumInt + 1; //loc 0 is num int32s in key
+   memcpy( &key[1], receiverID, receiverIDNumInt * sizeof(int32) );
+   key[ 1 + receiverIDNumInt ] = semReq->msgType; 
+
+   entry = giveEntryElseInsertReqst32( key, semReq, commHashTbl );//clones
+   if( entry == NULL ) //was just inserted, means task has to wait
+    { return;
+    }
+
+   waitingReq = (PRServSemReq *)entry->content;  //previously cloned by insert
+
+      //At this point, know have waiting request(s) -- should be send(s)
+   if( waitingReq->reqType == send_type_to )
+    {    
+         //set sending slave  from the request
+      senderSlv = waitingReq->senderSlv;
+      
+         //waiting request is a send, so pair it with this receive
+         //first, remove the waiting send request from the list in entry
+      entry->content = waitingReq->nextReqInHashEntry;
+      if( entry->content == NULL )
+       { deleteEntryFromTable32( (uint32*)entry->key, commHashTbl );  //frees HashEntry
+       }
+      
+         //attach msg that's in the send request to receiving procr
+         // when comes back from suspend, will have msg in dataRetFromReq
+      receiverSlv->dataRetFromReq = waitingReq->msg;
+
+         //bring both processors back from suspend
+      PR_PI__free( waitingReq );
+
+       #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
+        Dependency newD;
+        newD.from_vp = sendPr->slaveID;
+        newD.from_task = sendPr->numTimesAssignedToASlot;
+        newD.to_vp = receivePr->slaveID;
+        newD.to_task = receivePr->numTimesAssignedToASlot +1;
+        //addToListOfArraysDependency(newD,semEnv->commDependenciesList);  
+        addToListOfArrays(Dependency,newD,semEnv->dynDependenciesList); 
+        int32 groupId = semReq->msgType;
+        if(semEnv->ntonGroupsInfo->numInArray <= groupId){
+            makeHighestDynArrayIndexBeAtLeast(semEnv->ntonGroupsInfo, groupId);
+        }
+        if(semEnv->ntonGroups[groupId] == NULL){
+            semEnv->ntonGroups[groupId] = new_NtoN(groupId);
+        }
+        Unit u;
+        u.vp = sendPr->slaveID;
+        u.task = sendPr->numTimesAssignedToASlot;
+        addToListOfArrays(Unit,u,semEnv->ntonGroups[groupId]->senders);
+        u.vp = receivePr->slaveID;
+        u.task = receivePr->numTimesAssignedToASlot +1;
+        addToListOfArrays(Unit,u,semEnv->ntonGroups[groupId]->receivers);
+       #endif
+      
+      resume_slaveVP( senderSlv,   semEnv );
+      resume_slaveVP( receiverSlv, semEnv );
+
+      return;
+    }
+   printf("\nLang Impl Error: Should never be two waiting receives!\n");
+ }
+
+
+/*
+ */
+inline void
+handleReceiveFromTo( PRServSemReq *semReq, PRServSemEnv *semEnv)
+ { SlaveVP     *senderSlv, *receiverSlv;
+   int32       *senderID,  *receiverID;
+   int32       *key, keySz, receiverIDNumInt, senderIDNumInt;
+   PRServSemReq   *waitingReq;
+   HashEntry   *entry;
+   HashTable   *commHashTbl = semEnv->commHashTbl;
+
+         DEBUG__printf2(dbgRqstHdlr,"RecFromTo req from ID: %d to ID: %d",semReq->senderID[1],semReq->receiverID[1])
+   
+   receiverID  = semReq->receiverID; //For "send", know both send & recv procrs
+   senderID    = semReq->senderID;
+   receiverSlv = semReq->receiverSlv;
+   
+   receiverIDNumInt = receiverID[0] + 1; //pos 0 doesn't include itself
+   senderIDNumInt   = senderID[0] + 1;
+   keySz = (receiverIDNumInt + senderIDNumInt) * sizeof(int32) + sizeof(int32);
+   key = PR_PI__malloc( keySz );
+   key[0] = receiverIDNumInt + senderIDNumInt; //loc 0 is num int32s in key
+   memcpy( &key[1], receiverID, receiverIDNumInt * sizeof(int32) );
+   memcpy( &key[1 + receiverIDNumInt], senderID, senderIDNumInt * sizeof(int32));
+
+   entry = giveEntryElseInsertReqst32( key, semReq, commHashTbl );
+   if( entry == NULL ) //was just inserted, means task has to wait
+    { return;
+    }
+
+   waitingReq = (PRServSemReq *)entry->content;
+
+      //At this point, know have a request to rendez-vous -- should be send
+   if( waitingReq->reqType == send_from_to )
+    {    //waiting request is a send, so pair it with this receive
+      #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
+        Dependency newD;
+        newD.from_vp = sendPr->slaveID;
+        newD.from_task = sendPr->numTimesAssignedToASlot;
+        newD.to_vp = receivePr->slaveID;
+        newD.to_task = receivePr->numTimesAssignedToASlot +1;
+        //addToListOfArraysDependency(newD,semEnv->commDependenciesList);  
+        addToListOfArrays(Dependency,newD,semEnv->commDependenciesList);    
+      #endif
+      
+         //have receiver slave, now set sender slave
+      senderSlv = waitingReq->senderSlv;
+      
+         //For from-to, should only ever be a single reqst waiting tobe paird
+      entry->content = waitingReq->nextReqInHashEntry;
+      if( entry->content != NULL ) printf("\nERROR in handleRecvFromTo\n");
+      deleteEntryFromTable32( (uint32*)entry->key, commHashTbl );  //frees entry too
+
+         //attach msg that's in the send request to receiving procr
+         // when comes back from suspend, will have msg in dataRetFromReq
+      receiverSlv->dataRetFromReq = waitingReq->msg;
+
+         //bring both processors back from suspend
+      PR_PI__free( waitingReq );
+
+      resume_slaveVP( senderSlv,   semEnv );
+      resume_slaveVP( receiverSlv, semEnv );
+
+      return;
+    }
+   printf("\nLang Impl Error: Should never be two waiting receives!\n");
+ }
+
+
+/*Waits for all tasks that are direct children to end, then resumes calling
+ * task or thread
+ */
+inline void
+handleTaskwait( PRServSemReq *semReq, SlaveVP *requestingSlv, PRServSemEnv *semEnv)
+ { PRServTaskStub* taskStub;
+ 
+            DEBUG__printf1(dbgRqstHdlr,"Taskwait request from processor %d",
+                                                      requestingSlv->slaveID)
+    
+   taskStub = (PRServTaskStub *)PR_PI__give_lang_meta_task( requestingSlv, PRServ_MAGIC_NUMBER);
+   
+   if( taskStub->numLiveChildTasks == 0 )
+    {    //nobody to wait for, resume
+      resume_slaveVP( requestingSlv, semEnv );
+    }
+   else  //have to wait, mark waiting
+    {        
+      taskStub->isWaitingForChildTasksToEnd = TRUE;
+    }    
+ }
+
+
+//==========================================================================
+/*
+ */
+void
+handleMalloc( PRServSemReq *semReq, SlaveVP *requestingSlv, PRServSemEnv *semEnv )
+ { void *ptr;
+ 
+      DEBUG__printf1(dbgRqstHdlr,"Malloc request from processor %d",requestingSlv->slaveID)
+
+   ptr = PR_PI__malloc( semReq->sizeToMalloc );
+   requestingSlv->dataRetFromReq = ptr;
+   resume_slaveVP( requestingSlv, semEnv );
+ }
+
+/*
+ */
+void
+handleFree( PRServSemReq *semReq, SlaveVP *requestingSlv, PRServSemEnv *semEnv )
+ {
+         DEBUG__printf1(dbgRqstHdlr,"Free request from processor %d",requestingSlv->slaveID)
+   PR_PI__free( semReq->ptrToFree );
+   resume_slaveVP( requestingSlv, semEnv );
+ }
+
+
+//===========================================================================
+//
+/*Uses ID as index into array of flags.  If flag already set, resumes from
+ * end-label.  Else, sets flag and resumes normally.
+ */
+void inline
+handleStartSingleton_helper( PRServSingleton *singleton, SlaveVP *reqstingSlv,
+                             PRServSemEnv    *semEnv )
+ {
+   if( singleton->hasFinished )
+    {    //the code that sets the flag to true first sets the end instr addr
+      reqstingSlv->dataRetFromReq = singleton->endInstrAddr;
+      resume_slaveVP( reqstingSlv, semEnv );
+      return;
+    }
+   else if( singleton->hasBeenStarted )
+    {    //singleton is in-progress in a diff slave, so wait for it to finish
+      writePrivQ(reqstingSlv, singleton->waitQ );
+      return;
+    }
+   else
+    {    //hasn't been started, so this is the first attempt at the singleton
+      singleton->hasBeenStarted = TRUE;
+      reqstingSlv->dataRetFromReq = 0x0;
+      resume_slaveVP( reqstingSlv, semEnv );
+      return;
+    }
+ }
+void inline
+handleStartFnSingleton( PRServSemReq *semReq, SlaveVP *requestingSlv,
+                      PRServSemEnv *semEnv )
+ { PRServSingleton *singleton;
+         DEBUG__printf1(dbgRqstHdlr,"StartFnSingleton request from processor %d",requestingSlv->slaveID)
+
+   singleton = &(semEnv->fnSingletons[ semReq->singletonID ]);
+   handleStartSingleton_helper( singleton, requestingSlv, semEnv );
+ }
+void inline
+handleStartDataSingleton( PRServSemReq *semReq, SlaveVP *requestingSlv,
+                      PRServSemEnv *semEnv )
+ { PRServSingleton *singleton;
+
+         DEBUG__printf1(dbgRqstHdlr,"StartDataSingleton request from processor %d",requestingSlv->slaveID)
+   if( *(semReq->singletonPtrAddr) == NULL )
+    { singleton                 = PR_PI__malloc( sizeof(PRServSingleton) );
+      singleton->waitQ          = makePRQ();
+      singleton->endInstrAddr   = 0x0;
+      singleton->hasBeenStarted = FALSE;
+      singleton->hasFinished    = FALSE;
+      *(semReq->singletonPtrAddr)  = singleton;
+    }
+   else
+      singleton = *(semReq->singletonPtrAddr);
+   handleStartSingleton_helper( singleton, requestingSlv, semEnv );
+ }
+
+
+void inline
+handleEndSingleton_helper( PRServSingleton *singleton, SlaveVP *requestingSlv,
+                           PRServSemEnv    *semEnv )
+ { PrivQueueStruc *waitQ;
+   int32           numWaiting, i;
+   SlaveVP      *resumingSlv;
+
+   if( singleton->hasFinished )
+    { //by definition, only one slave should ever be able to run end singleton
+      // so if this is true, is an error
+      ERROR1( "singleton code ran twice", requestingSlv );
+    }
+
+   singleton->hasFinished = TRUE;
+   waitQ = singleton->waitQ;
+   numWaiting = numInPrivQ( waitQ );
+   for( i = 0; i < numWaiting; i++ )
+    {    //they will resume inside start singleton, then jmp to end singleton
+      resumingSlv = readPrivQ( waitQ );
+      resumingSlv->dataRetFromReq = singleton->endInstrAddr;
+      resume_slaveVP( resumingSlv, semEnv );
+    }
+
+   resume_slaveVP( requestingSlv, semEnv );
+
+}
+void inline
+handleEndFnSingleton( PRServSemReq *semReq, SlaveVP *requestingSlv,
+                        PRServSemEnv *semEnv )
+ {
+   PRServSingleton   *singleton;
+
+         DEBUG__printf1(dbgRqstHdlr,"EndFnSingleton request from processor %d",requestingSlv->slaveID)
+   
+   singleton = &(semEnv->fnSingletons[ semReq->singletonID ]);
+   handleEndSingleton_helper( singleton, requestingSlv, semEnv );
+  }
+void inline
+handleEndDataSingleton( PRServSemReq *semReq, SlaveVP *requestingSlv,
+                        PRServSemEnv *semEnv )
+ {
+   PRServSingleton   *singleton;
+
+         DEBUG__printf1(dbgRqstHdlr,"EndDataSingleton request from processor %d",requestingSlv->slaveID)
+   
+   singleton = *(semReq->singletonPtrAddr);
+   handleEndSingleton_helper( singleton, requestingSlv, semEnv );
+  }
+
+
+/*This executes the function in the masterVP, take the function
+ * pointer out of the request and call it, then resume the VP.
+ */
+void
+handleAtomic( PRServSemReq *semReq, SlaveVP *requestingSlv, PRServSemEnv *semEnv )
+ {
+         DEBUG__printf1(dbgRqstHdlr,"Atomic request from processor %d",requestingSlv->slaveID)
+   semReq->fnToExecInMaster( semReq->dataForFn );
+   resume_slaveVP( requestingSlv, semEnv );
+ }
+
+/*First, it looks at the VP's semantic data, to see the highest transactionID
+ * that VP
+ * already has entered.  If the current ID is not larger, it throws an
+ * exception stating a bug in the code.
+ *Otherwise it puts the current ID
+ * there, and adds the ID to a linked list of IDs entered -- the list is
+ * used to check that exits are properly ordered.
+ *Next it is uses transactionID as index into an array of transaction
+ * structures.
+ *If the "VP_currently_executing" field is non-null, then put requesting VP
+ * into queue in the struct.  (At some point a holder will request
+ * end-transaction, which will take this VP from the queue and resume it.)
+ *If NULL, then write requesting into the field and resume.
+ */
+void
+handleTransStart( PRServSemReq *semReq, SlaveVP *requestingSlv,
+                  PRServSemEnv *semEnv )
+ { PRServSemData *semData;
+   TransListElem *nextTransElem;
+
+         DEBUG__printf1(dbgRqstHdlr,"TransStart request from processor %d",requestingSlv->slaveID)
+   
+      //check ordering of entering transactions is correct
+   semData = requestingSlv->semanticData;
+   if( semData->highestTransEntered > semReq->transID )
+    {    //throw PR exception, which shuts down PR.
+      PR_PI__throw_exception( "transID smaller than prev", requestingSlv, NULL);
+    }
+      //add this trans ID to the list of transactions entered -- check when
+      // end a transaction
+   semData->highestTransEntered = semReq->transID;
+   nextTransElem = PR_PI__malloc( sizeof(TransListElem) );
+   nextTransElem->transID = semReq->transID;
+   nextTransElem->nextTrans = semData->lastTransEntered;
+   semData->lastTransEntered = nextTransElem;
+
+      //get the structure for this transaction ID
+   PRServTrans *
+   transStruc = &(semEnv->transactionStrucs[ semReq->transID ]);
+
+   if( transStruc->VPCurrentlyExecuting == NULL )
+    {
+      transStruc->VPCurrentlyExecuting = requestingSlv;
+      resume_slaveVP( requestingSlv, semEnv );
+    }
+   else
+    {    //note, might make future things cleaner if save request with VP and
+         // add this trans ID to the linked list when gets out of queue.
+         // but don't need for now, and lazy..
+      writePrivQ( requestingSlv, transStruc->waitingVPQ );
+    }
+ }
+
+
+/*Use the trans ID to get the transaction structure from the array.
+ *Look at VP_currently_executing to be sure it's same as requesting VP.
+ * If different, throw an exception, stating there's a bug in the code.
+ *Next, take the first element off the list of entered transactions.
+ * Check to be sure the ending transaction is the same ID as the next on
+ * the list.  If not, incorrectly nested so throw an exception.
+ *
+ *Next, get from the queue in the structure.
+ *If it's empty, set VP_currently_executing field to NULL and resume
+ * requesting VP.
+ *If get somethine, set VP_currently_executing to the VP from the queue, then
+ * resume both.
+ */
+void
+handleTransEnd(PRServSemReq *semReq, SlaveVP *requestingSlv, PRServSemEnv *semEnv)
+ { PRServSemData    *semData;
+   SlaveVP     *waitingSlv;
+   PRServTrans      *transStruc;
+   TransListElem *lastTrans;
+   
+         DEBUG__printf1(dbgRqstHdlr,"TransEnd request from processor %d",requestingSlv->slaveID)
+   
+   transStruc = &(semEnv->transactionStrucs[ semReq->transID ]);
+
+      //make sure transaction ended in same VP as started it.
+   if( transStruc->VPCurrentlyExecuting != requestingSlv )
+    {
+      PR_PI__throw_exception( "trans ended in diff VP", requestingSlv, NULL );
+    }
+
+      //make sure nesting is correct -- last ID entered should == this ID
+   semData = requestingSlv->semanticData;
+   lastTrans = semData->lastTransEntered;
+   if( lastTrans->transID != semReq->transID )
+    {
+      PR_PI__throw_exception( "trans incorrectly nested", requestingSlv, NULL );
+    }
+
+   semData->lastTransEntered = semData->lastTransEntered->nextTrans;
+
+
+   waitingSlv = readPrivQ( transStruc->waitingVPQ );
+   transStruc->VPCurrentlyExecuting = waitingSlv;
+
+   if( waitingSlv != NULL )
+      resume_slaveVP( waitingSlv, semEnv );
+
+   resume_slaveVP( requestingSlv, semEnv );
+ }
diff -r 999f2966a3e5 -r dafae55597ce Services_Offered_by_PR/Services_Language/PRServ_Request_Handlers.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Services_Offered_by_PR/Services_Language/PRServ_Request_Handlers.h	Tue Oct 23 23:46:17 2012 -0700
@@ -0,0 +1,60 @@
+/*
+ *  Copyright 2009 OpenSourceStewardshipFoundation.org
+ *  Licensed under GNU General Public License version 2
+ *
+ * Author: seanhalle@yahoo.com
+ *
+ */
+
+#ifndef _PRServ_REQ_H
+#define	_PRServ_REQ_H
+
+#include "PRServ.h"
+
+/*This header defines everything specific to the PRServ semantic plug-in
+ */
+
+inline void
+handleSubmitTask( PRServSemReq *semReq, PRServSemEnv *semEnv);
+inline void
+handleEndTask( PRServSemReq *semReq, PRServSemEnv *semEnv);
+inline void
+handleSendTypeTo( PRServSemReq *semReq, PRServSemEnv *semEnv);
+inline void
+handleSendFromTo( PRServSemReq *semReq, PRServSemEnv *semEnv);
+inline void
+handleReceiveTypeTo( PRServSemReq *semReq, PRServSemEnv *semEnv);
+inline void
+handleReceiveFromTo( PRServSemReq *semReq, PRServSemEnv *semEnv);
+inline void
+handleTaskwait(PRServSemReq *semReq, SlaveVP *requestingSlv, PRServSemEnv *semEnv);
+
+inline void
+handleMalloc( PRServSemReq *semReq, SlaveVP *requestingSlv, PRServSemEnv *semEnv);
+inline void
+handleFree( PRServSemReq *semReq, SlaveVP *requestingSlv, PRServSemEnv *semEnv );
+inline void
+handleTransEnd(PRServSemReq *semReq, SlaveVP *requestingSlv, PRServSemEnv*semEnv);
+inline void
+handleTransStart( PRServSemReq *semReq, SlaveVP *requestingSlv,
+                  PRServSemEnv *semEnv );
+inline void
+handleAtomic( PRServSemReq *semReq, SlaveVP *requestingSlv, PRServSemEnv *semEnv);
+inline void
+handleStartFnSingleton( PRServSemReq *semReq, SlaveVP *reqstingSlv,
+                      PRServSemEnv *semEnv );
+inline void
+handleEndFnSingleton( PRServSemReq *semReq, SlaveVP *requestingSlv,
+                    PRServSemEnv *semEnv );
+inline void
+handleStartDataSingleton( PRServSemReq *semReq, SlaveVP *reqstingSlv,
+                      PRServSemEnv *semEnv );
+inline void
+handleEndDataSingleton( PRServSemReq *semReq, SlaveVP *requestingSlv,
+                    PRServSemEnv *semEnv );
+inline void
+free_task_stub( PRMetaTask *stubToFree );
+
+
+#endif	/* _PRServ_REQ_H */
+
diff -r 999f2966a3e5 -r dafae55597ce Services_Offered_by_PR/Services_Language/PRServ_SS.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Services_Offered_by_PR/Services_Language/PRServ_SS.c	Tue Oct 23 23:46:17 2012 -0700
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <malloc.h>
+
+#include "Queue_impl/PrivateQueue.h"
+#include "Hash_impl/PrivateHash.h"
+
+#include "PRServ.h"
+#include "Measurement/PRServ_Counter_Recording.h"
+
+//==========================================================================
+
+
+
+//===========================================================================
+
+/*
+ */
+void
+PRServ__start( SlaveVP *seedSlv )
+ { PRServSemEnv       *semEnv;
+   int32            i, coreNum, slotNum;
+   PRServSemData      *semData;
+   PRServTaskStub     *threadTaskStub, *parentTaskStub;
+   
+   semEnv = PR_WL__malloc( sizeof(PRServSemEnv) );
+   
+   PR_SS__register_langlets_semEnv( semEnv, seedSlv, PRServ_MAGIC_NUMBER );
+   
+      //seed slave is a thread slave, so make a thread's task stub for it
+      // and then make another to stand for the seed's parent task.  Make
+      // the parent be already ended, and have one child (the seed).  This
+      // will make the dissipate handler do the right thing when the seed
+      // is dissipated.
+   threadTaskStub = create_thread_task_stub( initData );
+   parentTaskStub = create_thread_task_stub( NULL );
+   parentTaskStub->isEnded = TRUE;
+   parentTaskStub->numLiveChildThreads = 1; //so dissipate works for seed
+   threadTaskStub->parentTaskStub = parentTaskStub;
+   
+   PR_SS__set_langMetaTask_for_seedSlv( threadTaskStub, seedSlv );
+   
+      //Hook up the semantic layer's plug-ins to the Master virt procr
+   PR_SS__register_create_task_handler(     &createTaskHandler, seedVP, PRServ_MAGIC_NUMBER );
+   PR_SS__register_end_task_handler(        &endTaskHandler, seedVP, PRServ_MAGIC_NUMBER );
+   PR_SS__register_create_slave_handler(    &createThreadHandler, seedVP, PRServ_MAGIC_NUMBER );
+   PR_SS__register_dissipate_slave_handler( &endThreadHandler, seedVP, PRServ_MAGIC_NUMBER );
+   PR_SS__register_request_handler(         &PRServ__Request_Handler, seedVP, PRServ_MAGIC_NUMBER );
+   PR_SS__register_assigner(                &PRServ__assign_work_to_slot, seedVP, PRServ_MAGIC_NUMBER );
+    RequestHandler createInitialSemDataFn;
+   RequestHandler resetSemDataFn;
+  
+   #ifdef HOLISTIC__TURN_ON_PERF_COUNTERS
+   _PRTopEnv->counterHandler = &PRServ__counter_handler;
+   PRServ__init_counter_data_structs();
+   #endif
+
+
+      //create the ready queues, hash tables used for matching and so forth
+   semEnv->slavesReadyToResumeQ = makePRQ();
+   semEnv->taskReadyQ           = makePRQ();
+   
+   semEnv->argPtrHashTbl  = makeHashTable32( 16, &PR_int__free );
+   semEnv->commHashTbl    = makeHashTable32( 16, &PR_int__free );
+   
+   semEnv->nextCoreToGetNewSlv = 0;
+   
+
+   //TODO: bug -- turn these arrays into dyn arrays to eliminate limit
+   //semanticEnv->singletonHasBeenExecutedFlags = makeDynArrayInfo( );
+   //semanticEnv->transactionStrucs = makeDynArrayInfo( );
+   for( i = 0; i < NUM_STRUCS_IN_SEM_ENV; i++ )
+    {
+      semEnv->fnSingletons[i].endInstrAddr      = NULL;
+      semEnv->fnSingletons[i].hasBeenStarted    = FALSE;
+      semEnv->fnSingletons[i].hasFinished       = FALSE;
+      semEnv->fnSingletons[i].waitQ             = makePRQ();
+      semEnv->transactionStrucs[i].waitingVPQ   = makePRQ();
+    }
+
+
+   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
+   semEnv->unitList = makeListOfArrays(sizeof(Unit),128);
+   semEnv->ctlDependenciesList = makeListOfArrays(sizeof(Dependency),128);
+   semEnv->commDependenciesList = makeListOfArrays(sizeof(Dependency),128);
+   semEnv->dynDependenciesList = makeListOfArrays(sizeof(Dependency),128);
+   semEnv->ntonGroupsInfo = makePrivDynArrayOfSize((void***)&(semEnv->ntonGroups),8);
+   
+   semEnv->hwArcs = makeListOfArrays(sizeof(Dependency),128);
+   memset(semEnv->last_in_slot,0,sizeof(NUM_CORES * NUM_ANIM_SLOTS * sizeof(Unit)));
+   #endif
+ }
+
+
+/*Frees any memory allocated by PRServ__init() then calls PR_int__shutdown
+ */
+void
+PRServ__cleanup_after_shutdown()
+ { PRServSemEnv *semanticEnv;
+   
+   semanticEnv = _PRTopEnv->semanticEnv;
+
+   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
+   //UCC
+   FILE* output;
+   int n;
+   char filename[255];    
+    for(n=0;n<255;n++)
+    {
+        sprintf(filename, "./counters/UCC.%d",n);
+        output = fopen(filename,"r");
+        if(output)
+        {
+            fclose(output);
+        }else{
+            break;
+        }
+    }
+   if(n<255){
+    printf("Saving UCC to File: %s ...\n", filename);
+    output = fopen(filename,"w+");
+    if(output!=NULL){
+        set_dependency_file(output);
+        //fprintf(output,"digraph Dependencies {\n");
+        //set_dot_file(output);
+        //FIXME:  first line still depends on counters being enabled, replace w/ unit struct!
+        //forAllInDynArrayDo(_PRTopEnv->counter_history_array_info, &print_dot_node_info );
+        forAllInListOfArraysDo(semanticEnv->unitList, &print_unit_to_file);
+        forAllInListOfArraysDo( semanticEnv->commDependenciesList, &print_comm_dependency_to_file );
+        forAllInListOfArraysDo( semanticEnv->ctlDependenciesList, &print_ctl_dependency_to_file );
+        forAllInDynArrayDo(semanticEnv->ntonGroupsInfo,&print_nton_to_file);
+        //fprintf(output,"}\n");
+        fflush(output);
+
+    } else
+        printf("Opening UCC file failed. Please check that folder \"counters\" exists in run directory and has write permission.\n");
+   } else {
+       printf("Could not open UCC file, please clean \"counters\" folder. (Must contain less than 255 files.)\n");
+   }
+   //Loop Graph
+   for(n=0;n<255;n++)
+    {
+        sprintf(filename, "./counters/LoopGraph.%d",n);
+        output = fopen(filename,"r");
+        if(output)
+        {
+            fclose(output);
+        }else{
+            break;
+        }
+    }
+   if(n<255){
+    printf("Saving LoopGraph to File: %s ...\n", filename);
+    output = fopen(filename,"w+");
+    if(output!=NULL){
+        set_dependency_file(output);
+        //fprintf(output,"digraph Dependencies {\n");
+        //set_dot_file(output);
+        //FIXME:  first line still depends on counters being enabled, replace w/ unit struct!
+        //forAllInDynArrayDo(_PRTopEnv->counter_history_array_info, &print_dot_node_info );
+        forAllInListOfArraysDo( semanticEnv->unitList, &print_unit_to_file );
+        forAllInListOfArraysDo( semanticEnv->commDependenciesList, &print_comm_dependency_to_file );
+        forAllInListOfArraysDo( semanticEnv->ctlDependenciesList, &print_ctl_dependency_to_file );
+        forAllInListOfArraysDo( semanticEnv->dynDependenciesList, &print_dyn_dependency_to_file );
+        forAllInListOfArraysDo( semanticEnv->hwArcs, &print_hw_dependency_to_file );
+        //fprintf(output,"}\n");
+        fflush(output);
+
+    } else
+        printf("Opening LoopGraph file failed. Please check that folder \"counters\" exists in run directory and has write permission.\n");
+   } else {
+       printf("Could not open LoopGraph file, please clean \"counters\" folder. (Must contain less than 255 files.)\n");
+   }
+   
+   
+   freeListOfArrays(semanticEnv->unitList);
+   freeListOfArrays(semanticEnv->commDependenciesList);
+   freeListOfArrays(semanticEnv->ctlDependenciesList);
+   freeListOfArrays(semanticEnv->dynDependenciesList);
+   
+   #endif
+#ifdef HOLISTIC__TURN_ON_PERF_COUNTERS    
+    for(n=0;n<255;n++)
+    {
+        sprintf(filename, "./counters/Counters.%d.csv",n);
+        output = fopen(filename,"r");
+        if(output)
+        {
+            fclose(output);
+        }else{
+            break;
+        }
+    }
+    if(n<255){
+    printf("Saving Counter measurements to File: %s ...\n", filename);
+    output = fopen(filename,"w+");
+    if(output!=NULL){
+        set_counter_file(output);
+        int i;
+        for(i=0;i<NUM_CORES;i++){
+            forAllInListOfArraysDo( semanticEnv->counterList[i], &print_counter_events_to_file );
+            fflush(output);
+        }
+
+    } else
+        printf("Opening UCC file failed. Please check that folder \"counters\" exists in run directory and has write permission.\n");
+   } else {
+       printf("Could not open UCC file, please clean \"counters\" folder. (Must contain less than 255 files.)\n");
+   }
+    
+#endif
+/* It's all allocated inside PR's big chunk -- that's about to be freed, so
+ *  nothing to do here
+   
+
+   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
+    {
+      PR_int__free( semanticEnv->readyVPQs[coreIdx]->startOfData );
+      PR_int__free( semanticEnv->readyVPQs[coreIdx] );
+    }
+   PR_int__free( semanticEnv->readyVPQs );
+   
+   freeHashTable( semanticEnv->commHashTbl );
+   PR_int__free( _PRTopEnv->semanticEnv );
+ */
+   PR_SS__cleanup_at_end_of_shutdown();
+ }
+
+