changeset 232:421bde2a07d7 Common_Ancestor

REMOVED work-stealing, and all references to gates
author Some Random Person <seanhalle@yahoo.com>
date Thu, 15 Mar 2012 20:47:54 -0700
parents 88fd85921d7f
children a0ac58d8201c
files AnimationMaster.c CoreController.c Hardware_Dependent/VMS__primitives_asm.s Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h VMS.h VMS__startup_and_shutdown.c
diffstat 6 files changed, 14 insertions(+), 235 deletions(-) [+]
line diff
     1.1 --- a/AnimationMaster.c	Thu Mar 15 20:35:18 2012 -0700
     1.2 +++ b/AnimationMaster.c	Thu Mar 15 20:47:54 2012 -0700
     1.3 @@ -190,204 +190,10 @@
     1.4         }
     1.5      }
     1.6  
     1.7 -   
     1.8 -   #ifdef SYS__TURN_ON_WORK_STEALING
     1.9 -      /*If no slots filled, means no more work, look for work to steal. */
    1.10 -   if( numSlotsFilled == 0 )
    1.11 -    { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterVP );
    1.12 -    }
    1.13 -   #endif
    1.14 -
    1.15           MEAS__Capture_Post_Master_Point;
    1.16     
    1.17     masterSwitchToCoreCtlr( masterVP );
    1.18     flushRegisters();
    1.19 -   }//MasterLoop
    1.20 -
    1.21 -
    1.22 +   }//while(1) 
    1.23   }
    1.24  
    1.25 -
    1.26 -//===========================  Work Stealing  ==============================
    1.27 -
    1.28 -/*This is first of two work-stealing approaches.  It's not used, but left
    1.29 - * in the code as a simple illustration of the principle.  This version
    1.30 - * has a race condition -- the core controllers are accessing their own
    1.31 - * animation slots at the same time that this work-stealer on a different
    1.32 - * core is..
    1.33 - *Because the core controllers run outside the master lock, this interaction
    1.34 - * is not protected.
    1.35 - */
    1.36 -void inline
    1.37 -stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
    1.38 -               SlaveVP *masterVP )
    1.39 - { 
    1.40 -   SlaveVP   *stolenSlv;
    1.41 -   int32        coreIdx, i;
    1.42 -   VMSQueueStruc *currQ;
    1.43 -
    1.44 -   stolenSlv = NULL;
    1.45 -   coreIdx = masterVP->coreAnimatedBy;
    1.46 -   for( i = 0; i < NUM_CORES -1; i++ )
    1.47 -    {
    1.48 -      if( coreIdx >= NUM_CORES -1 )
    1.49 -       { coreIdx = 0;
    1.50 -       }
    1.51 -      else
    1.52 -       { coreIdx++;
    1.53 -       }
    1.54 -      //TODO: fix this for coreCtlr scans slots
    1.55 -//      currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
    1.56 -      if( numInVMSQ( currQ ) > 0 )
    1.57 -       { stolenSlv = readVMSQ (currQ );
    1.58 -         break;
    1.59 -       }
    1.60 -    }
    1.61 -
    1.62 -   if( stolenSlv != NULL )
    1.63 -    { currSlot->slaveAssignedToSlot = stolenSlv;
    1.64 -      stolenSlv->schedSlot           = currSlot;
    1.65 -      currSlot->needsSlaveAssigned  = FALSE;
    1.66 -
    1.67 -      writeVMSQ( stolenSlv, readyToAnimateQ );
    1.68 -    }
    1.69 - }
    1.70 -
    1.71 -/*This algorithm makes the common case fast.  Make the coreloop passive,
    1.72 - * and show its progress.  Make the stealer control a gate that coreloop
    1.73 - * has to pass.
    1.74 - *To avoid interference, only one stealer at a time.  Use a global
    1.75 - * stealer-lock, so only the stealer is slowed.
    1.76 - *
    1.77 - *The pattern is based on a gate -- stealer shuts the gate, then monitors
    1.78 - * to be sure any already past make it all the way out, before starting.
    1.79 - *So, have a "progress" measure just before the gate, then have two after it,
    1.80 - * one is in a "waiting room" outside the gate, the other is at the exit.
    1.81 - *Then, the stealer first shuts the gate, then checks the progress measure
    1.82 - * outside it, then looks to see if the progress measure at the exit is the
    1.83 - * same.  If yes, it knows the protected area is empty 'cause no other way
    1.84 - * to get in and the last to get in also exited.
    1.85 - *If the progress measure at the exit is not the same, then the stealer goes
    1.86 - * into a loop checking both the waiting-area and the exit progress-measures
    1.87 - * until one of them shows the same as the measure outside the gate.  Might
    1.88 - * as well re-read the measure outside the gate each go around, just to be
    1.89 - * sure.  It is guaranteed that one of the two will eventually match the one
    1.90 - * outside the gate.
    1.91 - *
    1.92 - *Here's an informal proof of correctness:
    1.93 - *The gate can be closed at any point, and have only four cases:
    1.94 - *  1) coreloop made it past the gate-closing but not yet past the exit
    1.95 - *  2) coreloop made it past the pre-gate progress update but not yet past
    1.96 - *     the gate,
    1.97 - *  3) coreloop is right before the pre-gate update
    1.98 - *  4) coreloop is past the exit and far from the pre-gate update.
    1.99 - *
   1.100 - * Covering the cases in reverse order,
   1.101 - *  4) is not a problem -- stealer will read pre-gate progress, see that it
   1.102 - *     matches exit progress, and the gate is closed, so stealer can proceed.
   1.103 - *  3) stealer will read pre-gate progress just after coreloop updates it..
   1.104 - *     so stealer goes into a loop until the coreloop causes wait-progress
   1.105 - *     to match pre-gate progress, so then stealer can proceed
   1.106 - *  2) same as 3..
   1.107 - *  1) stealer reads pre-gate progress, sees that it's different than exit,
   1.108 - *     so goes into loop until exit matches pre-gate, now it knows coreloop
   1.109 - *     is not in protected and cannot get back in, so can proceed.
   1.110 - *
   1.111 - *Implementation for the stealer:
   1.112 - *
   1.113 - *First, acquire the stealer lock -- only cores with no work to do will
   1.114 - * compete to steal, so not a big performance penalty having only one --
   1.115 - * will rarely have multiple stealers in a system with plenty of work -- and
   1.116 - * in a system with little work, it doesn't matter.
   1.117 - *
   1.118 - *Note, have single-reader, single-writer pattern for all variables used to
   1.119 - * communicate between stealer and victims
   1.120 - *
   1.121 - *So, scan the queues of the core controllers, until find non-empty.  Each core
   1.122 - * has its own list that it scans.  The list goes in order from closest to
   1.123 - * furthest core, so it steals first from close cores.  Later can add
   1.124 - * taking info from the app about overlapping footprints, and scan all the
   1.125 - * others then choose work with the most footprint overlap with the contents
   1.126 - * of this core's cache.
   1.127 - *
   1.128 - *Now, have a victim want to take work from.  So, shut the gate in that
   1.129 - * coreloop, by setting the "gate closed" var on its stack to TRUE.
   1.130 - *Then, read the core's pre-gate progress and compare to the core's exit
   1.131 - * progress.
   1.132 - *If same, can proceed to take work from the coreloop's queue.  When done,
   1.133 - * write FALSE to gate closed var.
   1.134 - *If different, then enter a loop that reads the pre-gate progress, then
   1.135 - * compares to exit progress then to wait progress.  When one of two
   1.136 - * matches, proceed.  Take work from the coreloop's queue.  When done,
   1.137 - * write FALSE to the gate closed var.
   1.138 - * 
   1.139 - */
   1.140 -void inline
   1.141 -gateProtected_stealWorkInto( SchedSlot *currSlot,
   1.142 -                             VMSQueueStruc *myReadyToAnimateQ,
   1.143 -                             SlaveVP *masterVP )
   1.144 - {
   1.145 -   SlaveVP     *stolenSlv;
   1.146 -   int32          coreIdx, i, haveAVictim, gotLock;
   1.147 -   VMSQueueStruc *victimsQ;
   1.148 -
   1.149 -   volatile GateStruc *vicGate;
   1.150 -   int32               coreMightBeInProtected;
   1.151 -
   1.152 -
   1.153 -
   1.154 -      //see if any other cores have work available to steal
   1.155 -   haveAVictim = FALSE;
   1.156 -   coreIdx = masterVP->coreAnimatedBy;
   1.157 -   for( i = 0; i < NUM_CORES -1; i++ )
   1.158 -    {
   1.159 -      if( coreIdx >= NUM_CORES -1 )
   1.160 -       { coreIdx = 0;
   1.161 -       }
   1.162 -      else
   1.163 -       { coreIdx++;
   1.164 -       }
   1.165 -      //TODO: fix this for coreCtlr scans slots
   1.166 -//      victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
   1.167 -      if( numInVMSQ( victimsQ ) > 0 )
   1.168 -       { haveAVictim = TRUE;
   1.169 -         vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ];
   1.170 -         break;
   1.171 -       }
   1.172 -    }
   1.173 -   if( !haveAVictim ) return;  //no work to steal, exit
   1.174 -
   1.175 -      //have a victim core, now get the stealer-lock
   1.176 -   gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock),
   1.177 -                                                          UNLOCKED, LOCKED );
   1.178 -   if( !gotLock ) return; //go back to core controller, which will re-start master
   1.179 -
   1.180 -
   1.181 -   //====== Start Gate-protection =======
   1.182 -   vicGate->gateClosed = TRUE;
   1.183 -   coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress;
   1.184 -   while( coreMightBeInProtected )
   1.185 -    {    //wait until sure
   1.186 -      if( vicGate->preGateProgress == vicGate->waitProgress )
   1.187 -         coreMightBeInProtected = FALSE;
   1.188 -      if( vicGate->preGateProgress == vicGate->exitProgress )
   1.189 -         coreMightBeInProtected = FALSE;
   1.190 -    }
   1.191 -
   1.192 -   stolenSlv = readVMSQ ( victimsQ );
   1.193 -
   1.194 -   vicGate->gateClosed = FALSE;
   1.195 -   //======= End Gate-protection  =======
   1.196 -
   1.197 -
   1.198 -   if( stolenSlv != NULL )  //victim could have been in protected and taken
   1.199 -    { currSlot->slaveAssignedToSlot = stolenSlv;
   1.200 -      stolenSlv->schedSlot           = currSlot;
   1.201 -      currSlot->needsSlaveAssigned  = FALSE;
   1.202 -
   1.203 -      writeVMSQ( stolenSlv, myReadyToAnimateQ );
   1.204 -    }
   1.205 -
   1.206 -      //unlock the work stealing lock
   1.207 -   _VMSMasterEnv->workStealingLock = UNLOCKED;
   1.208 - }
     2.1 --- a/CoreController.c	Thu Mar 15 20:35:18 2012 -0700
     2.2 +++ b/CoreController.c	Thu Mar 15 20:47:54 2012 -0700
     2.3 @@ -54,7 +54,7 @@
     2.4   *The reason for having the animation slots and core controller is to 
     2.5   * amortize the overhead of switching to the master VP and running it.  With
     2.6   * multiple animation slots, the time to switch-to-master and the code in
     2.7 - * the master loop is divided by the number of animation slots.
     2.8 + * the animation master is divided by the number of animation slots.
     2.9   *The core controller and animation slots are not fundamental parts of VMS,
    2.10   * but rather optimizations put into the shared-semantic-state version of
    2.11   * VMS.  Other versions of VMS will not have a core controller nor scheduling
    2.12 @@ -85,21 +85,12 @@
    2.13        //Variables used in random-backoff, for master-lock and waiting for work
    2.14     uint32_t seed1 = rand()%1000; // init random number generator for backoffs
    2.15     uint32_t seed2 = rand()%1000;
    2.16 -      //Variable for work-stealing -- a gate protects a critical section
    2.17 -   volatile GateStruc gate;      //on stack to avoid false-sharing
    2.18  
    2.19     
    2.20     //===============  Initializations ===================
    2.21     coreCtlrThdParams = (ThdParams *)paramsIn;
    2.22     thisCoresIdx = coreCtlrThdParams->coreNum;
    2.23  
    2.24 -   gate.gateClosed      = FALSE;
    2.25 -   gate.preGateProgress = 0;
    2.26 -   gate.waitProgress    = 0;
    2.27 -   gate.exitProgress    = 0;
    2.28 -   //TODO: pad these to prevent false-sharing, and fix the race at startup
    2.29 -   _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;
    2.30 -
    2.31        //Assembly that saves addr of label of return instr -- label in assmbly
    2.32     recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt));
    2.33  
    2.34 @@ -145,7 +136,6 @@
    2.35        if( currSlotIdx >= NUM_SCHED_SLOTS ) goto switchToMaster;
    2.36        currSlot = schedSlots[ currSlotIdx ];
    2.37  
    2.38 -      
    2.39        if( ! currSlot->needsSlaveAssigned ) //slot does have slave assigned
    2.40         { numRepetitionsWithNoWork = 0;     //reset back2back master count
    2.41           currSlotIdx ++;
     3.1 --- a/Hardware_Dependent/VMS__primitives_asm.s	Thu Mar 15 20:35:18 2012 -0700
     3.2 +++ b/Hardware_Dependent/VMS__primitives_asm.s	Thu Mar 15 20:47:54 2012 -0700
     3.3 @@ -77,7 +77,7 @@
     3.4  
     3.5  
     3.6  //switches to core controller from master. saves return address
     3.7 -//Releases masterLock so the next MasterLoop can be executed
     3.8 +//Releases masterLock so the next AnimationMaster can be executed
     3.9  /* SlaveVP  offsets:
    3.10   * 0x10  stackPtr
    3.11   * 0x18 framePtr
     4.1 --- a/Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h	Thu Mar 15 20:35:18 2012 -0700
     4.2 +++ b/Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h	Thu Mar 15 20:47:54 2012 -0700
     4.3 @@ -249,22 +249,22 @@
     4.4         TSCountLowHigh   startReqHdlr; \
     4.5         uint64           totalPluginCycles; \
     4.6         uint32           numPluginAnimations; \
     4.7 -       uint64           cyclesTillStartMasterLoop; \
     4.8 -       TSCountLowHigh   endMasterLoop;
     4.9 +       uint64           cyclesTillStartAnimationMaster; \
    4.10 +       TSCountLowHigh   endAnimationMaster;
    4.11  
    4.12 -   #define MEAS__startMasterLoop_forSys \
    4.13 +   #define MEAS__startAnimationMaster_forSys \
    4.14        TSCountLowHigh startStamp1, endStamp1; \
    4.15        saveTSCLowHigh( endStamp1 ); \
    4.16 -      _VMSMasterEnv->cyclesTillStartMasterLoop = \
    4.17 +      _VMSMasterEnv->cyclesTillStartAnimationMaster = \
    4.18        endStamp1.longVal - masterVP->startSusp.longVal;
    4.19  
    4.20     #define Meas_startReqHdlr_forSys \
    4.21          saveTSCLowHigh( startStamp1 ); \
    4.22          _VMSMasterEnv->startReqHdlr.longVal = startStamp1.longVal;
    4.23   
    4.24 -   #define MEAS__endMasterLoop_forSys \
    4.25 +   #define MEAS__endAnimationMaster_forSys \
    4.26        saveTSCLowHigh( startStamp1 ); \
    4.27 -      _VMSMasterEnv->endMasterLoop.longVal = startStamp1.longVal;
    4.28 +      _VMSMasterEnv->endAnimationMaster.longVal = startStamp1.longVal;
    4.29  
    4.30     /*A TSC is stored in VP first thing inside wrapper-lib
    4.31      * Now, measures cycles from there to here
    4.32 @@ -285,9 +285,9 @@
    4.33     #define MEAS__Insert_System_Meas_Fields_into_Slave 
    4.34     #define MEAS__Insert_System_Meas_Fields_into_MasterEnv 
    4.35     #define MEAS__Make_Meas_Hists_for_System_Meas
    4.36 -   #define MEAS__startMasterLoop_forSys 
    4.37 +   #define MEAS__startAnimationMaster_forSys 
    4.38     #define MEAS__startReqHdlr_forSys
    4.39 -   #define MEAS__endMasterLoop_forSys
    4.40 +   #define MEAS__endAnimationMaster_forSys
    4.41     #define MEAS__Capture_End_Susp_in_CoreCtlr_ForSys
    4.42     #define MEAS__Print_Hists_for_System_Meas 
    4.43  #endif
     5.1 --- a/VMS.h	Thu Mar 15 20:35:18 2012 -0700
     5.2 +++ b/VMS.h	Thu Mar 15 20:47:54 2012 -0700
     5.3 @@ -41,7 +41,6 @@
     5.4  typedef struct _SlaveVP       SlaveVP;
     5.5  typedef struct _MasterVP      MasterVP;
     5.6  typedef struct _IntervalProbe IntervalProbe;
     5.7 -typedef struct _GateStruc     GateStruc;
     5.8  
     5.9  
    5.10  typedef SlaveVP *(*SlaveAssigner)  ( void *, SchedSlot*); //semEnv, slot for HW info
    5.11 @@ -171,12 +170,7 @@
    5.12        //Memory management related
    5.13     MallocArrays    *freeLists;
    5.14     int32            amtOfOutstandingMem;//total currently allocated
    5.15 -   
    5.16 -      //Work-stealing related
    5.17 -   GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
    5.18 -   int32            workStealingLock;
    5.19 -   
    5.20 -   
    5.21 +      
    5.22        //=========== MEASUREMENT STUFF =============
    5.23         IntervalProbe   **intervalProbes;
    5.24         PrivDynArrayInfo *dynIntervalProbesInfo;
    5.25 @@ -202,15 +196,6 @@
    5.26   }
    5.27  VMSExcp;
    5.28  
    5.29 -struct _GateStruc
    5.30 - {
    5.31 -   int32 gateClosed;
    5.32 -   int32 preGateProgress;
    5.33 -   int32 waitProgress;
    5.34 -   int32 exitProgress;
    5.35 - };
    5.36 -//GateStruc
    5.37 -
    5.38  //=======================  OS Thread related  ===============================
    5.39  
    5.40  void * coreController( void *paramsIn );  //standard PThreads fn prototype
     6.1 --- a/VMS__startup_and_shutdown.c	Thu Mar 15 20:35:18 2012 -0700
     6.2 +++ b/VMS__startup_and_shutdown.c	Thu Mar 15 20:47:54 2012 -0700
     6.3 @@ -296,12 +296,10 @@
     6.4        masterVPs[ coreIdx ] = VMS_int__create_slaveVP( (TopLevelFnPtr)&animationMaster, (void*)masterEnv );
     6.5        masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx;
     6.6        allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core
     6.7 -      _VMSMasterEnv->workStealingGates[ coreIdx ] = NULL;
     6.8      }
     6.9     _VMSMasterEnv->masterVPs        = masterVPs;
    6.10     _VMSMasterEnv->masterLock       = UNLOCKED;
    6.11     _VMSMasterEnv->allSchedSlots    = allSchedSlots;
    6.12 -   _VMSMasterEnv->workStealingLock = UNLOCKED;
    6.13     _VMSMasterEnv->measHistsInfo = NULL; 
    6.14  
    6.15     //============================= MEASUREMENT STUFF ========================
    6.16 @@ -441,10 +439,10 @@
    6.17  
    6.18  
    6.19  //TODO: look at architecting cleanest separation between request handler
    6.20 -// and master loop, for dissipate, create, shutdown, and other non-semantic
    6.21 +// and animation master, for dissipate, create, shutdown, and other non-semantic
    6.22  // requests.  Issue is chain: one removes requests from AppSlv, one dispatches
    6.23  // on type of request, and one handles each type..  but some types require
    6.24 -// action from both request handler and master loop -- maybe just give the
    6.25 +// action from both request handler and animation master -- maybe just give the
    6.26  // request handler calls like:  VMS__handle_X_request_type
    6.27  
    6.28