# HG changeset patch
# User Some Random Person <seanhalle@yahoo.com>
# Date 1331869674 25200
# Node ID 421bde2a07d70b7f68897d24ed6a912221641dc4
# Parent  88fd85921d7f20505f80c0f086aeccb2615ba106
REMOVED  work-stealing, and all references to gates

diff -r 88fd85921d7f -r 421bde2a07d7 AnimationMaster.c
--- a/AnimationMaster.c	Thu Mar 15 20:35:18 2012 -0700
+++ b/AnimationMaster.c	Thu Mar 15 20:47:54 2012 -0700
@@ -190,204 +190,10 @@
        }
     }
 
-   
-   #ifdef SYS__TURN_ON_WORK_STEALING
-      /*If no slots filled, means no more work, look for work to steal. */
-   if( numSlotsFilled == 0 )
-    { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterVP );
-    }
-   #endif
-
          MEAS__Capture_Post_Master_Point;
    
    masterSwitchToCoreCtlr( masterVP );
    flushRegisters();
-   }//MasterLoop
-
-
+   }//while(1) 
  }
 
-
-//===========================  Work Stealing  ==============================
-
-/*This is first of two work-stealing approaches.  It's not used, but left
- * in the code as a simple illustration of the principle.  This version
- * has a race condition -- the core controllers are accessing their own
- * animation slots at the same time that this work-stealer on a different
- * core is..
- *Because the core controllers run outside the master lock, this interaction
- * is not protected.
- */
-void inline
-stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
-               SlaveVP *masterVP )
- { 
-   SlaveVP   *stolenSlv;
-   int32        coreIdx, i;
-   VMSQueueStruc *currQ;
-
-   stolenSlv = NULL;
-   coreIdx = masterVP->coreAnimatedBy;
-   for( i = 0; i < NUM_CORES -1; i++ )
-    {
-      if( coreIdx >= NUM_CORES -1 )
-       { coreIdx = 0;
-       }
-      else
-       { coreIdx++;
-       }
-      //TODO: fix this for coreCtlr scans slots
-//      currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
-      if( numInVMSQ( currQ ) > 0 )
-       { stolenSlv = readVMSQ (currQ );
-         break;
-       }
-    }
-
-   if( stolenSlv != NULL )
-    { currSlot->slaveAssignedToSlot = stolenSlv;
-      stolenSlv->schedSlot           = currSlot;
-      currSlot->needsSlaveAssigned  = FALSE;
-
-      writeVMSQ( stolenSlv, readyToAnimateQ );
-    }
- }
-
-/*This algorithm makes the common case fast.  Make the coreloop passive,
- * and show its progress.  Make the stealer control a gate that coreloop
- * has to pass.
- *To avoid interference, only one stealer at a time.  Use a global
- * stealer-lock, so only the stealer is slowed.
- *
- *The pattern is based on a gate -- stealer shuts the gate, then monitors
- * to be sure any already past make it all the way out, before starting.
- *So, have a "progress" measure just before the gate, then have two after it,
- * one is in a "waiting room" outside the gate, the other is at the exit.
- *Then, the stealer first shuts the gate, then checks the progress measure
- * outside it, then looks to see if the progress measure at the exit is the
- * same.  If yes, it knows the protected area is empty 'cause no other way
- * to get in and the last to get in also exited.
- *If the progress measure at the exit is not the same, then the stealer goes
- * into a loop checking both the waiting-area and the exit progress-measures
- * until one of them shows the same as the measure outside the gate.  Might
- * as well re-read the measure outside the gate each go around, just to be
- * sure.  It is guaranteed that one of the two will eventually match the one
- * outside the gate.
- *
- *Here's an informal proof of correctness:
- *The gate can be closed at any point, and have only four cases:
- *  1) coreloop made it past the gate-closing but not yet past the exit
- *  2) coreloop made it past the pre-gate progress update but not yet past
- *     the gate,
- *  3) coreloop is right before the pre-gate update
- *  4) coreloop is past the exit and far from the pre-gate update.
- *
- * Covering the cases in reverse order,
- *  4) is not a problem -- stealer will read pre-gate progress, see that it
- *     matches exit progress, and the gate is closed, so stealer can proceed.
- *  3) stealer will read pre-gate progress just after coreloop updates it..
- *     so stealer goes into a loop until the coreloop causes wait-progress
- *     to match pre-gate progress, so then stealer can proceed
- *  2) same as 3..
- *  1) stealer reads pre-gate progress, sees that it's different than exit,
- *     so goes into loop until exit matches pre-gate, now it knows coreloop
- *     is not in protected and cannot get back in, so can proceed.
- *
- *Implementation for the stealer:
- *
- *First, acquire the stealer lock -- only cores with no work to do will
- * compete to steal, so not a big performance penalty having only one --
- * will rarely have multiple stealers in a system with plenty of work -- and
- * in a system with little work, it doesn't matter.
- *
- *Note, have single-reader, single-writer pattern for all variables used to
- * communicate between stealer and victims
- *
- *So, scan the queues of the core controllers, until find non-empty.  Each core
- * has its own list that it scans.  The list goes in order from closest to
- * furthest core, so it steals first from close cores.  Later can add
- * taking info from the app about overlapping footprints, and scan all the
- * others then choose work with the most footprint overlap with the contents
- * of this core's cache.
- *
- *Now, have a victim want to take work from.  So, shut the gate in that
- * coreloop, by setting the "gate closed" var on its stack to TRUE.
- *Then, read the core's pre-gate progress and compare to the core's exit
- * progress.
- *If same, can proceed to take work from the coreloop's queue.  When done,
- * write FALSE to gate closed var.
- *If different, then enter a loop that reads the pre-gate progress, then
- * compares to exit progress then to wait progress.  When one of two
- * matches, proceed.  Take work from the coreloop's queue.  When done,
- * write FALSE to the gate closed var.
- * 
- */
-void inline
-gateProtected_stealWorkInto( SchedSlot *currSlot,
-                             VMSQueueStruc *myReadyToAnimateQ,
-                             SlaveVP *masterVP )
- {
-   SlaveVP     *stolenSlv;
-   int32          coreIdx, i, haveAVictim, gotLock;
-   VMSQueueStruc *victimsQ;
-
-   volatile GateStruc *vicGate;
-   int32               coreMightBeInProtected;
-
-
-
-      //see if any other cores have work available to steal
-   haveAVictim = FALSE;
-   coreIdx = masterVP->coreAnimatedBy;
-   for( i = 0; i < NUM_CORES -1; i++ )
-    {
-      if( coreIdx >= NUM_CORES -1 )
-       { coreIdx = 0;
-       }
-      else
-       { coreIdx++;
-       }
-      //TODO: fix this for coreCtlr scans slots
-//      victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
-      if( numInVMSQ( victimsQ ) > 0 )
-       { haveAVictim = TRUE;
-         vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ];
-         break;
-       }
-    }
-   if( !haveAVictim ) return;  //no work to steal, exit
-
-      //have a victim core, now get the stealer-lock
-   gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock),
-                                                          UNLOCKED, LOCKED );
-   if( !gotLock ) return; //go back to core controller, which will re-start master
-
-
-   //====== Start Gate-protection =======
-   vicGate->gateClosed = TRUE;
-   coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress;
-   while( coreMightBeInProtected )
-    {    //wait until sure
-      if( vicGate->preGateProgress == vicGate->waitProgress )
-         coreMightBeInProtected = FALSE;
-      if( vicGate->preGateProgress == vicGate->exitProgress )
-         coreMightBeInProtected = FALSE;
-    }
-
-   stolenSlv = readVMSQ ( victimsQ );
-
-   vicGate->gateClosed = FALSE;
-   //======= End Gate-protection  =======
-
-
-   if( stolenSlv != NULL )  //victim could have been in protected and taken
-    { currSlot->slaveAssignedToSlot = stolenSlv;
-      stolenSlv->schedSlot           = currSlot;
-      currSlot->needsSlaveAssigned  = FALSE;
-
-      writeVMSQ( stolenSlv, myReadyToAnimateQ );
-    }
-
-      //unlock the work stealing lock
-   _VMSMasterEnv->workStealingLock = UNLOCKED;
- }
diff -r 88fd85921d7f -r 421bde2a07d7 CoreController.c
--- a/CoreController.c	Thu Mar 15 20:35:18 2012 -0700
+++ b/CoreController.c	Thu Mar 15 20:47:54 2012 -0700
@@ -54,7 +54,7 @@
  *The reason for having the animation slots and core controller is to 
  * amortize the overhead of switching to the master VP and running it.  With
  * multiple animation slots, the time to switch-to-master and the code in
- * the master loop is divided by the number of animation slots.
+ * the animation master is divided by the number of animation slots.
  *The core controller and animation slots are not fundamental parts of VMS,
  * but rather optimizations put into the shared-semantic-state version of
  * VMS.  Other versions of VMS will not have a core controller nor scheduling
@@ -85,21 +85,12 @@
       //Variables used in random-backoff, for master-lock and waiting for work
    uint32_t seed1 = rand()%1000; // init random number generator for backoffs
    uint32_t seed2 = rand()%1000;
-      //Variable for work-stealing -- a gate protects a critical section
-   volatile GateStruc gate;      //on stack to avoid false-sharing
 
    
    //===============  Initializations ===================
    coreCtlrThdParams = (ThdParams *)paramsIn;
    thisCoresIdx = coreCtlrThdParams->coreNum;
 
-   gate.gateClosed      = FALSE;
-   gate.preGateProgress = 0;
-   gate.waitProgress    = 0;
-   gate.exitProgress    = 0;
-   //TODO: pad these to prevent false-sharing, and fix the race at startup
-   _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;
-
       //Assembly that saves addr of label of return instr -- label in assmbly
    recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt));
 
@@ -145,7 +136,6 @@
       if( currSlotIdx >= NUM_SCHED_SLOTS ) goto switchToMaster;
       currSlot = schedSlots[ currSlotIdx ];
 
-      
       if( ! currSlot->needsSlaveAssigned ) //slot does have slave assigned
        { numRepetitionsWithNoWork = 0;     //reset back2back master count
          currSlotIdx ++;
diff -r 88fd85921d7f -r 421bde2a07d7 Hardware_Dependent/VMS__primitives_asm.s
--- a/Hardware_Dependent/VMS__primitives_asm.s	Thu Mar 15 20:35:18 2012 -0700
+++ b/Hardware_Dependent/VMS__primitives_asm.s	Thu Mar 15 20:47:54 2012 -0700
@@ -77,7 +77,7 @@
 
 
 //switches to core controller from master. saves return address
-//Releases masterLock so the next MasterLoop can be executed
+//Releases masterLock so the next AnimationMaster can be executed
 /* SlaveVP  offsets:
  * 0x10  stackPtr
  * 0x18 framePtr
diff -r 88fd85921d7f -r 421bde2a07d7 Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h
--- a/Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h	Thu Mar 15 20:35:18 2012 -0700
+++ b/Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h	Thu Mar 15 20:47:54 2012 -0700
@@ -249,22 +249,22 @@
        TSCountLowHigh   startReqHdlr; \
        uint64           totalPluginCycles; \
        uint32           numPluginAnimations; \
-       uint64           cyclesTillStartMasterLoop; \
-       TSCountLowHigh   endMasterLoop;
+       uint64           cyclesTillStartAnimationMaster; \
+       TSCountLowHigh   endAnimationMaster;
 
-   #define MEAS__startMasterLoop_forSys \
+   #define MEAS__startAnimationMaster_forSys \
       TSCountLowHigh startStamp1, endStamp1; \
       saveTSCLowHigh( endStamp1 ); \
-      _VMSMasterEnv->cyclesTillStartMasterLoop = \
+      _VMSMasterEnv->cyclesTillStartAnimationMaster = \
       endStamp1.longVal - masterVP->startSusp.longVal;
 
    #define Meas_startReqHdlr_forSys \
         saveTSCLowHigh( startStamp1 ); \
         _VMSMasterEnv->startReqHdlr.longVal = startStamp1.longVal;
  
-   #define MEAS__endMasterLoop_forSys \
+   #define MEAS__endAnimationMaster_forSys \
       saveTSCLowHigh( startStamp1 ); \
-      _VMSMasterEnv->endMasterLoop.longVal = startStamp1.longVal;
+      _VMSMasterEnv->endAnimationMaster.longVal = startStamp1.longVal;
 
    /*A TSC is stored in VP first thing inside wrapper-lib
     * Now, measures cycles from there to here
@@ -285,9 +285,9 @@
    #define MEAS__Insert_System_Meas_Fields_into_Slave 
    #define MEAS__Insert_System_Meas_Fields_into_MasterEnv 
    #define MEAS__Make_Meas_Hists_for_System_Meas
-   #define MEAS__startMasterLoop_forSys 
+   #define MEAS__startAnimationMaster_forSys 
    #define MEAS__startReqHdlr_forSys
-   #define MEAS__endMasterLoop_forSys
+   #define MEAS__endAnimationMaster_forSys
    #define MEAS__Capture_End_Susp_in_CoreCtlr_ForSys
    #define MEAS__Print_Hists_for_System_Meas 
 #endif
diff -r 88fd85921d7f -r 421bde2a07d7 VMS.h
--- a/VMS.h	Thu Mar 15 20:35:18 2012 -0700
+++ b/VMS.h	Thu Mar 15 20:47:54 2012 -0700
@@ -41,7 +41,6 @@
 typedef struct _SlaveVP       SlaveVP;
 typedef struct _MasterVP      MasterVP;
 typedef struct _IntervalProbe IntervalProbe;
-typedef struct _GateStruc     GateStruc;
 
 
 typedef SlaveVP *(*SlaveAssigner)  ( void *, SchedSlot*); //semEnv, slot for HW info
@@ -171,12 +170,7 @@
       //Memory management related
    MallocArrays    *freeLists;
    int32            amtOfOutstandingMem;//total currently allocated
-   
-      //Work-stealing related
-   GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
-   int32            workStealingLock;
-   
-   
+      
       //=========== MEASUREMENT STUFF =============
        IntervalProbe   **intervalProbes;
        PrivDynArrayInfo *dynIntervalProbesInfo;
@@ -202,15 +196,6 @@
  }
 VMSExcp;
 
-struct _GateStruc
- {
-   int32 gateClosed;
-   int32 preGateProgress;
-   int32 waitProgress;
-   int32 exitProgress;
- };
-//GateStruc
-
 //=======================  OS Thread related  ===============================
 
 void * coreController( void *paramsIn );  //standard PThreads fn prototype
diff -r 88fd85921d7f -r 421bde2a07d7 VMS__startup_and_shutdown.c
--- a/VMS__startup_and_shutdown.c	Thu Mar 15 20:35:18 2012 -0700
+++ b/VMS__startup_and_shutdown.c	Thu Mar 15 20:47:54 2012 -0700
@@ -296,12 +296,10 @@
       masterVPs[ coreIdx ] = VMS_int__create_slaveVP( (TopLevelFnPtr)&animationMaster, (void*)masterEnv );
       masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx;
       allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core
-      _VMSMasterEnv->workStealingGates[ coreIdx ] = NULL;
     }
    _VMSMasterEnv->masterVPs        = masterVPs;
    _VMSMasterEnv->masterLock       = UNLOCKED;
    _VMSMasterEnv->allSchedSlots    = allSchedSlots;
-   _VMSMasterEnv->workStealingLock = UNLOCKED;
    _VMSMasterEnv->measHistsInfo = NULL; 
 
    //============================= MEASUREMENT STUFF ========================
@@ -441,10 +439,10 @@
 
 
 //TODO: look at architecting cleanest separation between request handler
-// and master loop, for dissipate, create, shutdown, and other non-semantic
+// and animation master, for dissipate, create, shutdown, and other non-semantic
 // requests.  Issue is chain: one removes requests from AppSlv, one dispatches
 // on type of request, and one handles each type..  but some types require
-// action from both request handler and master loop -- maybe just give the
+// action from both request handler and animation master -- maybe just give the
 // request handler calls like:  VMS__handle_X_request_type