# HG changeset patch
# User Sean Halle <seanhalle@yahoo.com>
# Date 1358208637 28800
# Node ID e6a68e7ea63fc1602fca10d6eb79f62d889246e7
# Parent  e5bd470b562b4b7df73fb22b1b5e2ebc3d84c92f
Removed the extra level of core controller -- now only one anim slot and master
called after every work unit

diff -r e5bd470b562b -r e6a68e7ea63f AnimationMaster.c
--- a/AnimationMaster.c	Mon Jan 14 15:31:23 2013 -0800
+++ b/AnimationMaster.c	Mon Jan 14 16:10:37 2013 -0800
@@ -22,11 +22,11 @@
 inline void PRHandle_CreateTask( PRReqst *req, SlaveVP *slave );
 inline void PRHandle_EndTask(    PRReqst *req, SlaveVP *slave );
 inline void PRHandle_CreateSlave(PRReqst *req, SlaveVP *slave );
-void        PRHandle_Dissipate(  PRReqst *req, SlaveVP *slave );
+void        PRHandle_EndSlave(  PRReqst *req, SlaveVP *slave );
 
 
 //inline void  masterFunction_SingleLang( PRLangEnv *protoLangEnv, AnimSlot *slot );
-inline void masterFunction_MultiLang( AnimSlot  *slot );
+inline void masterFunction( AnimSlot  *slot );
 inline PRProcess * pickAProcess( AnimSlot *slot );
 inline SlaveVP * assignWork( PRProcess *process, AnimSlot *slot );
 
@@ -78,461 +78,29 @@
    //Have three different modes, and the master behavior is different for
    // each, so jump to the loop that corresponds to the mode.
    //
-   switch(masterEnv->mode)
-    {
-/*
-    { case SingleLang: 
-         while(1)
-          {       MEAS__Capture_Pre_Master_Point
-            for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
-             {
-               currSlot = animSlots[ slotIdx ];
+   while(1)
+    {       MEAS__Capture_Pre_Master_Point
+      for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
+       {
+         currSlot = animSlots[ slotIdx ];
 
-               masterFunction_StandaloneSlavesOnly( masterEnv, currSlot );
-             }
-                  MEAS__Capture_Post_Master_Point;
-            masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
-            flushRegisters();
-          } 
-
-      case SingleLang:     
-       { PRLangEnv  *protoLangEnv =  _PRTopEnv->protoLangEnv;
-         while(1)
-          {       MEAS__Capture_Pre_Master_Point
-            for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
-             {
-               currSlot = animSlots[ slotIdx ];
-
-               masterFunction_SingleLang( protoLangEnv, currSlot );
-             }
-                  MEAS__Capture_Post_Master_Point;
-            masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
-            flushRegisters();
-          }
+         masterFunction( currSlot );
        }
- */
-      case MultiLang:
-       { while(1)
-          {       MEAS__Capture_Pre_Master_Point
-            for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
-             {
-               currSlot = animSlots[ slotIdx ];
-
-               masterFunction_MultiLang( currSlot );
-             }
-                  MEAS__Capture_Post_Master_Point;
-            masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
-            flushRegisters();
-          }
-       }
+            MEAS__Capture_Post_Master_Point;
+      masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
+      flushRegisters();
     }
  }
 
 
-        
-//=====================  The versions of the Animation Master  =================
-//
-//==============================================================================
-
-/* 1) This version is for a single language, that has only slaves, no tasks,
- *    such as Vthread or SSR.
- *This version is for when an application has only a single language, and
- * that language exposes slaves explicitly (as opposed to a task based 
- * language like pure dataflow).
- * 
- *
- *It scans the animation slots for just-completed slaves.
- * Each completed slave has a request in it.  So, the master hands each to
- * the plugin's request handler (there is only one plugin, because only one
- * lang).
- *Each request represents a language construct that has been encountered
- * by the application code in the slave. Passing the request to the
- * request handler is how that language construct's behavior gets invoked.
- * The request handler then performs the actions of the construct's
- * behavior. So, the request handler encodes the behavior of the 
- * language's parallelism constructs, and performs that when the master
- * hands it a slave containing a request to perform that construct.
- * 
- *On a shared-memory machine, the behavior of parallelism constructs
- * equals control, over order of execution of code.  Hence, the behavior
- * of the language constructs performed by the request handler is to 
- * choose the order that slaves get animated, and thereby control the
- * order that application code in the slaves executes.
- * 
- *To control order of animation of slaves, the request handler has a
- * language environment that holds data structures used to hold slaves
- * and choose when they're ready to be animated.
- *
- *Once a slave is marked as ready to be animated by the request handler,
- * it is the second plugin function, the Assigner, which chooses the core
- * the slave gets assigned to for animation.  Hence, the Assigner doesn't
- * perform any of the semantic behavior of language constructs, rather
- * it gives the language a chance to improve performance. The performance
- * of application code is strongly related to communication between
- * cores. On shared-memory machines, communication is caused during
- * execution of code, by memory accesses, and how much depends on contents
- * of caches connected to the core executing the code.  So, the placement
- * of slaves determines the communication caused during execution of the
- * slave's code.
- *The point of the Assigner, then, is to use application information during
- * execution of the program, to make choices about slave placement onto
- * cores, with the aim to put slaves close to caches containing the data
- * used by the slave's code.
- * 
- *==========================================================================
- *In summary, the animationMaster scans the slots, finds slaves
- * just-finished, which hold requests, pass those to the request handler,
- * along with the language environment, and the request handler then manages
- * the structures in the language env, which controls the order of
- * animation of slaves, and so embodies the behavior of the language
- * constructs.
- *The animationMaster then rescans the slots, offering each empty one to
- * the Assigner, along with the language environment.  The Assigner chooses
- * among the ready slaves in the language env, finding the one best suited
- * to be animated by that slot's associated core.
- * 
- *==========================================================================
- *Implementation Details:
- * 
- *There is a separate masterVP for each core, but a single language
- * environment shared by all cores.  Each core also has its own scheduling
- * slots, which are used to communicate slaves between animationMaster and
- * coreController.  There is only one global variable, _PRTopEnv, which
- * holds the language env and other things shared by the different
- * masterVPs.  The request handler and Assigner are registered with
- * the animationMaster by the language's init function, and a pointer to
- * each is in the _PRTopEnv. (There are also some pthread related global
- * vars, but they're only used during init of PR).
- *PR gains control over the cores by essentially "turning off" the OS's
- * scheduler, using pthread pin-to-core commands.
- *
- *The masterVPs are created during init, with this animationMaster as their
- * top level function.  The masterVPs use the same SlaveVP data structure,
- * even though they're not slave VPs.
- *A "seed slave" is also created during init -- this is equivalent to the
- * "main" function in C, and acts as the entry-point to the PR-language-
- * based application.
- *The masterVPs share a single system-wide master-lock, so only one
- * masterVP may be animated at a time.
- *The core controllers access _PRTopEnv to get the masterVP, and when
- * they start, the slots are all empty, so they run their associated core's
- * masterVP.  The first of those to get the master lock sees the seed slave
- * in the shared language environment, so when it runs the Assigner, that
- * returns the seed slave, which the animationMaster puts into a scheduling
- * slot then switches to the core controller.  That then switches the core
- * over to the seed slave, which then proceeds to execute language
- * constructs to create more slaves, and so on.  Each of those constructs
- * causes the seed slave to suspend, switching over to the core controller,
- * which eventually switches to the masterVP, which executes the 
- * request handler, which uses PR primitives to carry out the creation of
- * new slave VPs, which are marked as ready for the Assigner, and so on..
- * 
- *On animation slots, and system behavior:
- * A request may linger in an animation slot for a long time while
- * the slaves in the other slots are animated.  This only becomes a problem
- * when such a request is a choke-point in the constraints, and is needed
- * to free work for *other* cores.  To reduce this occurrence, the number
- * of animation slots should be kept low.  In balance, having multiple
- * animation slots amortizes the overhead of switching to the masterVP and
- * executing the animationMaster code, which drives for more than one. In
- * practice, the best balance should be discovered by profiling.
- */
-/*
-void masterFunction_StandaloneSlavesOnly( AnimSlot  *slot )
- { 
-   SlaveVP        *slave;
-   PRReqst        *req;
-   PRLangEnv      *langEnv = _PRTopEnv->langEnv;
-    
-   
-   //======================== animationMaster ========================
-      
-      //Check if newly-done slave in slot, which will need request handled
-   if( slot->workIsDone )
-    { slot->workIsDone = FALSE;
-      slot->needsWorkAssigned = TRUE;
-
-
-            HOLISTIC__Record_AppResponder_start;
-            MEAS__startReqHdlr;
-         //process the request made by the slave (held inside slave struc)
-      slave = slot->slaveAssignedToSlot;
-      req = slave->request;
-
-      //Handle task create and end first -- they're special cases..
-      switch( req->reqType )
-       { case SlvCreate:    PRHandle_CreateSlave( slave );           break;
-         case SlvDissipate: PRHandle_Dissipate( slave );             break;
-         case Service:      PR_int__handle_PRServiceReq( slave );    break; //resume into PR's own language env
-         case Hardware: //for future expansion
-         case IO:       //for future expansion
-         case OSCall:   //for future expansion
-            PR_int__throw_exception("Not implemented");             break;
-         case Language: //normal lang request
-          { 
-            (*langEnv->requestHdlr)( req->langReq, slave, langEnv );
-          }
-       }
-            HOLISTIC__Record_AppResponder_end;
-            MEAS__endReqHdlr;
-    }
-      //If slot empty, hand to Assigner to fill with a slave
-   if( slot->needsWorkAssigned )
-    {    //Call plugin's Assigner to give slot a new slave
-            HOLISTIC__Record_Assigner_start;
-
-      if( langEnv->hasWork )
-       {  (*langEnv->slaveAssigner)( langEnv, slot ); //calls PR fn that inserts work into slot
-         goto ReturnAfterAssigningWork; //quit for-loop, cause found work
-       }
-      else
-         goto NoWork;
-    }
-   
- NoWork:
-      //No work, if reach here..
-    { 
-   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
-      coreNum = slot->coreSlotIsOn;
-      returnSlv = process->idleSlv[coreNum][slotNum]; 
-    
-         //things that would normally happen in resume(), but idle VPs
-         // never go there
-      returnSlv->numTimesAssignedToASlot++; //gives each idle unit a unique ID
-      Unit newU;
-      newU.vp = returnSlv->slaveNum;
-      newU.task = returnSlv->numTimesAssignedToASlot;
-      addToListOfArrays(Unit,newU,process->unitList);
-
-      if (returnSlv->numTimesAssignedToASlot > 1) //make a dependency from prev idle unit
-       { Dependency newD;             // to this one
-         newD.from_vp = returnSlv->slaveNum;
-         newD.from_task = returnSlv->numTimesAssignedToASlot - 1;
-         newD.to_vp = returnSlv->slaveNum;
-         newD.to_task = returnSlv->numTimesAssignedToASlot;
-         addToListOfArrays(Dependency, newD ,process->ctlDependenciesList);  
-       }
-   #endif
-            HOLISTIC__Record_Assigner_end;
-      return;
-    }
- 
- ReturnAfterAssigningWork:  //All paths goto here.. to provide single point for holistic..
-    {
-            HOLISTIC__Record_Assigner_end;
-      return;
-    }
- }
-*/
-
-
-/*This is the master when just multi-lang, but not multi-process mode is on.
- * This version has to handle both tasks and slaves, and do extra work of 
- * looking up the language env and handlers to use, for each completed bit of 
- * work.
- *It also has to search through the language envs to find one with work,
- * then ask that env's assigner to return a unit of that work.
- * 
- *The language is written to startup in the same way as if it were the only
- * language in the app, and it operates in the same way,
- * the only difference between single language and multi-lang is here, in the
- * master.
- *This invisibility to mode is why the language has to use registration calls
- * for everything during startup -- those calls do different things depending
- * on whether it's single-language or multi-language mode.
- * 
- *In this version of the master, work can either be a task or a resumed slave
- *Having two cases makes this logic complex.. can be finishing either, and
- * then the next available work may be either.. so really have two distinct 
- * loops that are inter-twined.. 
- * 
- *Some special cases:
- * A task-end is a special case for a few reasons (below).
- * A task-end can't block a slave (can't cause it to "logically suspend")
- * A task available for work can only be assigned to a special slave, which 
- *   has been set aside for doing tasks, one such task-slave is always 
- *   assigned to each slot. So, when a task ends, a new task is assigned to
- *   that slot's task-slave right away.  
- * But if no tasks are available, then have to switch over to looking at
- *   slaves to find one ready to resume, to find work for the slot.
- * If a task just suspends, not ends, then its task-slave is no longer 
- *   available to take new tasks, so a new task-slave has to be assigned to
- *   that slot.  Then the slave of the suspended task is turned into a free
- *   task-slave and request handling is done on it as if it were a slave 
- *   that suspended.
- * After request handling, do the same sequence of looking for a task to be
- *   work, and if none, look for a slave ready to resume, as work for the slot.
- * If a slave suspends, handle its request, then look for work.. first for a
- *   task to assign, and if none, slaves ready to resume.
- * Another special case is when task-end is done on a free task-slave.. in
- *   that case, the slave has no more work and no way to get more.. so place
- *   it into a recycle queue.
- * If no work is found of either type, then do a special thing to prune down
- *   the extra slaves in the recycle queue, just so don't get too many..
- * 
- *The multi-lang thing complicates matters..  
- *
- *For request handling, it means have to first fetch the language environment
- * of the language, and then do the request handler pointed to by that
- * language env.
- *For assigning, things get more complex because of competing goals..  One
- * goal is for language specific stuff to be used during assignment, so
- * assigner can make higher quality decisions..  but with multiple languages,
- * which only get mixed in the application, the assigners can't be written
- * with knowledge of each other.  So, they can only make localized decisions,
- * and so different language's assigners may interfere with each other..
- * 
- *So, have some possibilities available:
- *1) can have a fixed scheduler in the proto-runtime, that all the
- * languages give their work to..  (but then lose language-specific info, 
- * there is a standard PR format for assignment info, and the langauge 
- * attaches this to the work-unit when it gives it to PR.. also have issue
- * with HWSim, which uses a priority Q instead of FIFO, and requests can 
- * "undo" previous work put in, so request handlers need way to manipulate
- * the work-holding Q..) (this might be fudgeable with
- * HWSim, if the master did a lang-supplied callback each time it assigns a
- * unit to a slot..  then HWSim can keep exactly one unit of work in PR's
- * queue at a time..  but this is quite hack-like.. or perhaps HWSim supplies
- * a task-end handler that kicks the next unit of work from HWSim internal
- * priority queue, over to PR readyQ)
- *2) can have each language have its own language env, that holds its own
- * work, which is assigned by its own assigner.. then the master searches
- * through all the language envs to find one with work and asks it give work..
- * (this has downside of blinding assigners to each other.. but does work
- * for HWSim case)
- *3) could make PR have a different readyQ for each core, and ask the lang
- * to put work to the core it prefers.. but the work may be moved by PR if
- * needed, say if one core idles for too long. This is a hybrid approach, 
- * letting the language decide which core, but PR keeps the work and does it
- * FIFO style.. (this might als be fudgeable with HWSim, in similar fashion, 
- * but it would be complicated by having to track cores separately) 
- *
- *Choosing 2, to keep compatibility with single-lang mode..  it allows the same
- * assigner to be used for single-lang as for multi-lang..  the overhead of
- * the extra master search for work is part of the price of the flexibility,
- * but should be fairly small.. takes the first env that has work available, 
- * and whatever it returns is assigned to the slot..
- * 
- *As a hybrid, giving an option for a unified override assigner to be registered
- * and used..  This allows something like a static analysis to detect
- * which languages are grouped together, and then analyze the pattern of 
- * construct calls, and generate a custom assigner that uses info from all
- * the languages in a unified way..  Don't really expect this to happen, 
- * but making it possible.
- */
-/*
-inline
-void 
-masterFunction_SingleLang( PRLangEnv *protoLangEnv, AnimSlot *slot )
- {    //Scan the animation slots
-   SlaveVP        *slave;
-   PRReqst        *req;
-
-      //Check if newly-done slave in slot, which will need request handled
-   if( slot->workIsDone )
-    { slot->workIsDone = FALSE;
-      slot->needsWorkAssigned = TRUE;
-
-            HOLISTIC__Record_AppResponder_start; //TODO: update to check which process for each slot
-            MEAS__startReqHdlr;
-
-
-         //process the request made by the slave (held inside slave struc)
-      slave = slot->slaveAssignedToSlot;
-      req = slave->request;
-
-         //If the requesting slave is a slot slave, and request is not
-         // task-end, then turn it into a free task slave. 
-      if( slave->typeOfVP == SlotTaskSlv && req->reqType != TaskEnd )
-         PR_int__replace_with_new_slot_slv( slave );
-
-      //Handle task create and end first -- they're special cases..
-      switch( req->reqType )
-       { case TaskEnd: 
-          { //do PR handler, which calls lang's hdlr and does recycle of
-            // free task slave if needed -- PR handler checks for free task Slv
-            PRHandle_EndTask_SL( slave );                            break;
-          }
-         case TaskCreate:
-          { //Do PR's create-task handler, which calls the lang's hdlr
-            // PR handler checks for free task Slv
-            PRHandle_CreateTask_SL( slave );                         break;
-          }
-         case SlvCreate:    PRHandle_CreateSlave_SL( slave );        break;
-         case SlvDissipate: PRHandle_Dissipate_SL( slave );          break;
-         case Service:      PR_int__handle_PRServiceReq_SL( slave ); break; //resume into PR's own language env
-         case Hardware: //for future expansion
-         case IO:       //for future expansion
-         case OSCall:   //for future expansion
-            PR_int__throw_exception("Not implemented", slave, NULL); break;
-         case Language: //normal lang request
-          { 
-            (*protoLangEnv->requestHdlr)( req->langReq, slave, (void*)PR_int__give_lang_env(protoLangEnv ));
-          }
-       }
-              
-            MEAS__endReqHdlr;          
-            HOLISTIC__Record_AppResponder_end;
-    } //if have request to be handled
-
-      //If slot empty, hand to Assigner to fill with a slave
-   if( slot->needsWorkAssigned )
-    {    //Call plugin's Assigner to give slot a new slave
-            HOLISTIC__Record_Assigner_start;
-
-      if( protoLangEnv->hasWork )
-       {  (*protoLangEnv->slaveAssigner)( protoLangEnv, slot ); //calls PR fn that inserts work into slot
-         goto ReturnAfterAssigningWork; //quit for-loop, cause found work
-       }
-      else
-         goto NoWork;
-    }
-   
- NoWork:
-      //No work, if reach here..
-    { 
-   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
-      coreNum = slot->coreSlotIsOn;
-      returnSlv = process->idleSlv[coreNum][slotNum]; 
-    
-         //things that would normally happen in resume(), but idle VPs
-         // never go there
-      returnSlv->numTimesAssignedToASlot++; //gives each idle unit a unique ID
-      Unit newU;
-      newU.vp = returnSlv->slaveNum;
-      newU.task = returnSlv->numTimesAssignedToASlot;
-      addToListOfArrays(Unit,newU,process->unitList);
-
-      if (returnSlv->numTimesAssignedToASlot > 1) //make a dependency from prev idle unit
-       { Dependency newD;             // to this one
-         newD.from_vp = returnSlv->slaveNum;
-         newD.from_task = returnSlv->numTimesAssignedToASlot - 1;
-         newD.to_vp = returnSlv->slaveNum;
-         newD.to_task = returnSlv->numTimesAssignedToASlot;
-         addToListOfArrays(Dependency, newD ,process->ctlDependenciesList);  
-       }
-   #endif
-            HOLISTIC__Record_Assigner_end;
-      return;
-    }
- 
- ReturnAfterAssigningWork:  //All paths goto here.. to provide single point for holistic..
-    {
-            HOLISTIC__Record_Assigner_end;
-      return;
-    }
- }
-*/
-
 inline
 void
-masterFunction_MultiLang( AnimSlot  *slot )
+masterFunction( AnimSlot  *slot )
  {    //Scan the animation slots
    int32           magicNumber;
    SlaveVP        *slave;
    PRLangEnv      *langEnv;
    PRReqst        *req;
-   RequestHandler  requestHandler;
    PRProcess      *process;
 
       //Check if newly-done slave in slot, which will need request handled
@@ -566,8 +134,8 @@
             PRHandle_CreateTask( req, slave );                       break;
           }
          case SlvCreate:    PRHandle_CreateSlave( req, slave );      break;
-         case SlvDissipate: PRHandle_Dissipate( req, slave );        break;
-         case Service:      PR_int__handle_PRServiceReq( slave );    break; //resume into PR's own language env
+         case SlvDissipate: PRHandle_EndSlave( req, slave );         break;
+         case Service:      PR_int__handle_PRServiceReq( slave );    break; //resumes into Service lang env
          case Hardware: //for future expansion
          case IO:       //for future expansion
          case OSCall:   //for future expansion
@@ -704,13 +272,13 @@
        }
    #endif
             HOLISTIC__Record_Assigner_end;
-      return;
+      return FALSE;
     }
  
  ReturnAfterAssigningWork:  //All paths goto here.. to provide single point for holistic..
     {
             HOLISTIC__Record_Assigner_end;
-      return;
+      return TRUE;
     }
  }
 
@@ -774,7 +342,7 @@
  */
 inline
 void
-PRHandle_Dissipate( PRReqst *req, SlaveVP *slave )
+PRHandle_EndSlave( PRReqst *req, SlaveVP *slave )
  { PRProcess *process;
    PRLangEnv *protoLangEnv;
    
diff -r e5bd470b562b -r e6a68e7ea63f CoreController.c
--- a/CoreController.c	Mon Jan 14 15:31:23 2013 -0800
+++ b/CoreController.c	Mon Jan 14 16:10:37 2013 -0800
@@ -71,8 +71,9 @@
  { 
    int32           thisCoresIdx;
    int32           numRepetitionsWithNoWork;
+   bool32          foundWork;
    SlaveVP        *currVP;
-   AnimSlot       *currSlot, **animSlots;
+   AnimSlot       *animSlot;
    int32           currSlotIdx;
    volatile int32 *addrOfMasterLock; //thing pointed to is volatile, not ptr
    SlaveVP        *thisCoresMasterVP;
@@ -94,8 +95,7 @@
       //Assembly that saves addr of label of return instr -- label in assmbly
    recordCoreCtlrReturnLabelAddr((void**)&(_PRTopEnv->coreCtlrReturnPt));
 
-   animSlots = _PRTopEnv->allAnimSlots[ thisCoresIdx ];
-   currSlotIdx = 0; //start at slot 0, go up until one empty, then do master
+   animSlot = _PRTopEnv->allAnimSlots[ thisCoresIdx ];
    numRepetitionsWithNoWork = 0;
    addrOfMasterLock = &(_PRTopEnv->masterLock);
    thisCoresMasterVP = _PRTopEnv->masterVPs[ thisCoresIdx ];
@@ -136,77 +136,67 @@
              //Alternatively, the VP suspend primitive could just not bother
              // returning from switchToSlv, and instead jmp directly to here.
             
-      if( currSlotIdx >= NUM_ANIM_SLOTS ) goto SwitchToMaster;
-      currSlot = animSlots[ currSlotIdx ];
+      if(animSlot->slaveAssignedToSlot->typeOfVP == Idle)
+       { //The Holistic stuff turns on idle slaves..  but can also be in mode
+         // where have no idle slaves..  so, this IF statement can only be true
+         // executed when HOLISTIC is turned on..
+         numRepetitionsWithNoWork ++;
+               HOLISTIC__Record_last_work;
+       } 
+      
 
-      if( ! currSlot->needsWorkAssigned ) //slot does have slave assigned
-       { if(currSlot->slaveAssignedToSlot->typeOfVP == Idle)
-          { numRepetitionsWithNoWork ++;
-          } 
-         else 
-          { numRepetitionsWithNoWork = 0;     //reset back2back master count
+
+            HOLISTIC__Record_AppResponderInvocation_start;
+            MEAS__Capture_Pre_Master_Lock_Point;
+
+      int numTriesToGetLock = 0; int gotLock = 0;
+      while( currVP == NULL ) //keep going until get master lock
+       { 
+            //At this point, first thing to do is get lock.  But, want to
+            // reduce lock contention from cores with no work, so first
+            // check if this is a core with no work, and busy wait if so.
+            //Then, if it's been way too long without work, yield pthread
+         if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF)
+            doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 );
+         if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD )
+          { numRepetitionsWithNoWork = 0; pthread_yield(); }
+
+
+            //Now, try to get the lock
+         gotLock = __sync_bool_compare_and_swap( addrOfMasterLock,
+                                                 UNLOCKED, LOCKED );
+         if( gotLock )
+          {    //At this point, have successfully gotten master lock.
+               //So, break out of get-lock loop.
+            break;  //end while -- have a VP to animate now
           }
-         currSlotIdx ++;
-         currVP = currSlot->slaveAssignedToSlot;
-         HOLISTIC__Record_last_work;
-       }
-      else //slot is empty, so switch to master
-       {
-       SwitchToMaster:
-         currSlotIdx = 0; //doing switch to master, so start over at slot 0
-         currVP = NULL;
+            //Get here only when failed to get lock
 
-               MEAS__Capture_Pre_Master_Lock_Point;
-               HOLISTIC__Record_AppResponderInvocation_start;
+         numTriesToGetLock++;   //if too many, means too much contention
+         if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF ) 
+            doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 );
+         if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD ) 
+          { numTriesToGetLock = 0; pthread_yield(); }
+       } //while( currVP == NULL )
+            MEAS__Capture_Post_Master_Lock_Point;
 
-         int numTriesToGetLock = 0; int gotLock = 0;
-         while( currVP == NULL ) //keep going until get master lock
-          { 
-               //At this point, first thing to do is get lock.  But, want to
-               // reduce lock contention from cores with no work, so first
-               // check if this is a core with no work, and busy wait if so.
-               //Then, if it's been way too long without work, yield pthread
-            if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF)
-               doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 );
-            if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD )
-             { numRepetitionsWithNoWork = 0; pthread_yield(); }
+         //have master lock, perform master function, which manages request
+         // handling and assigning work to this core's slot
+      foundWork =
+         masterFunction( animSlot );
+      if( foundWork )
+         numRepetitionsWithNoWork = 0;
+      else
+         numRepetitionsWithNoWork += 1;
 
-               
-               //Now, try to get the lock
-            gotLock = __sync_bool_compare_and_swap( addrOfMasterLock,
-                                                    UNLOCKED, LOCKED );
-            if( gotLock )
-             {    //At this point, have run out of slaves, so tried to get
-                  // the master lock, and have successfully gotten it.
-                  //So, set the currVP to this core's masterVP and break out
-                  // of the get-lock loop.  Below, assembly code will switch
-                  // the core over to animating the masterVP.  When it's 
-                  // done, the masterVP will use assembly to switch the core
-                  // back to animating this core controller
-               currVP = thisCoresMasterVP;
-               numRepetitionsWithNoWork += 1;
-               break;  //end while -- have a VP to animate now
-             }
-               //Get here only when failed to get lock
-
-            numTriesToGetLock++;   //if too many, means too much contention
-            if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF ) 
-               doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 );
-            if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD ) 
-             { numTriesToGetLock = 0; pthread_yield(); }
-          } //while( currVP == NULL )
-               MEAS__Capture_Post_Master_Lock_Point;
-       } //else
-
-        HOLISTIC__Record_Work_start;
+            HOLISTIC__Record_Work_start;
 
       switchToSlv(currVP); //Slave suspend makes core "return" from this call
       flushRegisters();    //prevent GCC optimization from doing bad things 
 
-        HOLISTIC__Record_Work_end;
-      
              MEAS__Capture_End_Susp_in_CoreCtlr_ForSys;
-          
+             HOLISTIC__Record_Work_end;
+           
     }//while(1)
  }