diff SchedulingMaster.c @ 222:c88ce1db91ef

Compiles, but does not run properly -- and changed MasterLoop to SchedulingMaster
author Some Random Person <seanhalle@yahoo.com>
date Tue, 13 Mar 2012 10:02:06 -0700
parents
children b0b93147adfb
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/SchedulingMaster.c	Tue Mar 13 10:02:06 2012 -0700
     1.3 @@ -0,0 +1,349 @@
     1.4 +/*
     1.5 + * Copyright 2010  OpenSourceStewardshipFoundation
     1.6 + * 
     1.7 + * Licensed under BSD
     1.8 + */
     1.9 +
    1.10 +
    1.11 +
    1.12 +#include <stdio.h>
    1.13 +#include <stddef.h>
    1.14 +
    1.15 +#include "VMS.h"
    1.16 +
    1.17 +
    1.18 +//===========================================================================
    1.19 +void inline
    1.20 +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
    1.21 +               SlaveVP *masterVP );
    1.22 +
    1.23 +//===========================================================================
    1.24 +
    1.25 +
    1.26 +
    1.27 +/*This code is animated by the virtual Master processor.
    1.28 + *
    1.29 + *Polls each sched slot exactly once, hands any requests made by a newly
    1.30 + * done slave to the "request handler" plug-in function
    1.31 + *
    1.32 + *Any slots that need a Slv assigned are given to the "schedule"
    1.33 + * plug-in function, which tries to assign a Slv (slave) to it.
    1.34 + *
    1.35 + *When all slots needing a processor have been given to the schedule plug-in,
    1.36 + * a fraction of the slaves successfully scheduled are put into the
    1.37 + * work queue, then a continuation of this function is put in, then the rest
    1.38 + * of the Slvs that were successfully scheduled.
    1.39 + *
    1.40 + *The first thing the continuation does is busy-wait until the previous
    1.41 + * animation completes.  This is because an (unlikely) continuation may
    1.42 + * sneak through queue before previous continuation is done putting second
    1.43 + * part of scheduled slaves in, which is the only race condition.
    1.44 + *
    1.45 + */
    1.46 +
    1.47 +/*May 29, 2010 -- birth a Master during init so that first core controller to
    1.48 + * start running gets it and does all the stuff for a newly born --
    1.49 + * from then on, will be doing continuation, but do suspension self
    1.50 + * directly at end of master loop
    1.51 + *So VMS_WL__init just births the master virtual processor same way it births
    1.52 + * all the others -- then does any extra setup needed and puts it into the
    1.53 + * work queue.
    1.54 + *However means have to make masterEnv a global static volatile the same way
    1.55 + * did with readyToAnimateQ in core controller.  -- for performance, put the
    1.56 + * jump to the core controller directly in here, and have it directly jump back.
    1.57 + *
    1.58 + *
    1.59 + *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this
    1.60 + * avoids the suspected bug in the system stack that causes bizarre faults
    1.61 + * at random places in the system code.
    1.62 + *
    1.63 + *So, this function is coupled to each of the MasterVPs, -- meaning this
    1.64 + * function can't rely on a particular stack and frame -- each MasterVP that
    1.65 + * animates this function has a different one.
    1.66 + *
    1.67 + *At this point, the schedulingMaster does not write itself into the queue anymore,
    1.68 + * instead, the coreCtlr acquires the masterLock when it has nothing to
    1.69 + * animate, and then animates its own schedulingMaster.  However, still try to put
    1.70 + * several AppSlvs into the queue to amortize the startup cost of switching
    1.71 + * to the MasterVP.  Note, don't have to worry about latency of requests much
    1.72 + * because most requests generate work for same core -- only latency issue
    1.73 + * is case when other cores starved and one core's requests generate work
    1.74 + * for them -- so keep max in queue to 3 or 4..
    1.75 + */
    1.76 +void schedulingMaster( void *initData, SlaveVP *animatingSlv )
    1.77 + { 
    1.78 +   int32           slotIdx, numSlotsFilled;
    1.79 +   SlaveVP        *schedSlaveVP;
    1.80 +   SchedSlot      *currSlot, **schedSlots;
    1.81 +   MasterEnv      *masterEnv;
    1.82 +   VMSQueueStruc  *readyToAnimateQ;
    1.83 +   
    1.84 +   Sched_Assigner  slaveAssigner;
    1.85 +   RequestHandler  requestHandler;
    1.86 +   void           *semanticEnv;
    1.87 +
    1.88 +   int32           thisCoresIdx;
    1.89 +   SlaveVP        *masterVP;
    1.90 +   volatile        SlaveVP *volatileMasterVP;
    1.91 +   
    1.92 +   volatileMasterVP = animatingSlv;
    1.93 +   masterVP         = (SlaveVP*)volatileMasterVP; //used to force re-define after jmp
    1.94 +
    1.95 +      //First animation of each MasterVP will in turn animate this part
    1.96 +      // of setup code.. (Slv creator sets up the stack as if this function
    1.97 +      // was called normally, but actually get here by jmp)
    1.98 +      //So, setup values about stack ptr, jmp pt and all that
    1.99 +   //masterVP->resumeInstrPtr = &&schedulingMasterStartPt;
   1.100 +
   1.101 +
   1.102 +      //Note, got rid of writing the stack and frame ptr up here, because
   1.103 +      // only one
   1.104 +      // core can ever animate a given MasterVP, so don't need to communicate
   1.105 +      // new frame and stack ptr to the MasterVP storage before a second
   1.106 +      // version of that MasterVP can get animated on a different core.
   1.107 +      //Also got rid of the busy-wait.
   1.108 +
   1.109 +   
   1.110 +   //schedulingMasterStartPt:
   1.111 +   while(1){
   1.112 +       
   1.113 +      MEAS__Capture_Pre_Master_Point
   1.114 +
   1.115 +   masterEnv        = (MasterEnv*)_VMSMasterEnv;
   1.116 +   
   1.117 +      //GCC may optimize so doesn't always re-define from frame-storage
   1.118 +   masterVP         = (SlaveVP*)volatileMasterVP;  //just to make sure after jmp
   1.119 +   thisCoresIdx     = masterVP->coreAnimatedBy;
   1.120 +   readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
   1.121 +   schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
   1.122 +
   1.123 +   requestHandler   = masterEnv->requestHandler;
   1.124 +   slaveAssigner    = masterEnv->slaveAssigner;
   1.125 +   semanticEnv      = masterEnv->semanticEnv;
   1.126 +
   1.127 +
   1.128 +      //Poll each slot's Done flag
   1.129 +   numSlotsFilled = 0;
   1.130 +   for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
   1.131 +    {
   1.132 +      currSlot = schedSlots[ slotIdx ];
   1.133 +
   1.134 +      if( currSlot->workIsDone )
   1.135 +       {
   1.136 +         currSlot->workIsDone         = FALSE;
   1.137 +         currSlot->needsSlaveAssigned = TRUE;
   1.138 +
   1.139 +               MEAS__startReqHdlr;
   1.140 +               
   1.141 +            //process the requests made by the slave (held inside slave struc)
   1.142 +         (*requestHandler)( currSlot->slaveAssignedToSlot, semanticEnv );
   1.143 +         
   1.144 +               MEAS__endReqHdlr;
   1.145 +       }
   1.146 +      if( currSlot->needsSlaveAssigned )
   1.147 +       {    //give slot a new Slv
   1.148 +         schedSlaveVP =
   1.149 +          (*slaveAssigner)( semanticEnv, thisCoresIdx );
   1.150 +         
   1.151 +         if( schedSlaveVP != NULL )
   1.152 +          { currSlot->slaveAssignedToSlot = schedSlaveVP;
   1.153 +            schedSlaveVP->schedSlot       = currSlot;
   1.154 +            currSlot->needsSlaveAssigned  = FALSE;
   1.155 +            numSlotsFilled               += 1;
   1.156 +          }
   1.157 +       }
   1.158 +    }
   1.159 +
   1.160 +   
   1.161 +   #ifdef SYS__TURN_ON_WORK_STEALING
   1.162 +      //If no slots filled, means no more work, look for work to steal.
   1.163 +   if( numSlotsFilled == 0 )
   1.164 +    { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterVP );
   1.165 +    }
   1.166 +   #endif
   1.167 +
   1.168 +         MEAS__Capture_Post_Master_Point;
   1.169 +   
   1.170 +   masterSwitchToCoreCtlr(animatingSlv);
   1.171 +   flushRegisters();
   1.172 +   }//MasterLoop
   1.173 +
   1.174 +
   1.175 + }
   1.176 +
   1.177 +
   1.178 +
   1.179 +/*This has a race condition -- the coreloops are accessing their own queues
   1.180 + * at the same time that this work-stealer on a different core is trying to
   1.181 + */
   1.182 +void inline
   1.183 +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
   1.184 +               SlaveVP *masterVP )
   1.185 + { 
   1.186 +   SlaveVP   *stolenSlv;
   1.187 +   int32        coreIdx, i;
   1.188 +   VMSQueueStruc *currQ;
   1.189 +
   1.190 +   stolenSlv = NULL;
   1.191 +   coreIdx = masterVP->coreAnimatedBy;
   1.192 +   for( i = 0; i < NUM_CORES -1; i++ )
   1.193 +    {
   1.194 +      if( coreIdx >= NUM_CORES -1 )
   1.195 +       { coreIdx = 0;
   1.196 +       }
   1.197 +      else
   1.198 +       { coreIdx++;
   1.199 +       }
   1.200 +      currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
   1.201 +      if( numInVMSQ( currQ ) > 0 )
   1.202 +       { stolenSlv = readVMSQ (currQ );
   1.203 +         break;
   1.204 +       }
   1.205 +    }
   1.206 +
   1.207 +   if( stolenSlv != NULL )
   1.208 +    { currSlot->slaveAssignedToSlot = stolenSlv;
   1.209 +      stolenSlv->schedSlot           = currSlot;
   1.210 +      currSlot->needsSlaveAssigned  = FALSE;
   1.211 +
   1.212 +      writeVMSQ( stolenSlv, readyToAnimateQ );
   1.213 +    }
   1.214 + }
   1.215 +
   1.216 +/*This algorithm makes the common case fast.  Make the coreloop passive,
   1.217 + * and show its progress.  Make the stealer control a gate that coreloop
   1.218 + * has to pass.
   1.219 + *To avoid interference, only one stealer at a time.  Use a global
   1.220 + * stealer-lock.
   1.221 + *
   1.222 + *The pattern is based on a gate -- stealer shuts the gate, then monitors
   1.223 + * to be sure any already past make it all the way out, before starting.
   1.224 + *So, have a "progress" measure just before the gate, then have two after it,
   1.225 + * one is in a "waiting room" outside the gate, the other is at the exit.
   1.226 + *Then, the stealer first shuts the gate, then checks the progress measure
   1.227 + * outside it, then looks to see if the progress measure at the exit is the
   1.228 + * same.  If yes, it knows the protected area is empty 'cause no other way
   1.229 + * to get in and the last to get in also exited.
   1.230 + *If the progress measure at the exit is not the same, then the stealer goes
   1.231 + * into a loop checking both the waiting-area and the exit progress-measures
   1.232 + * until one of them shows the same as the measure outside the gate.  Might
   1.233 + * as well re-read the measure outside the gate each go around, just to be
   1.234 + * sure.  It is guaranteed that one of the two will eventually match the one
   1.235 + * outside the gate.
   1.236 + *
   1.237 + *Here's an informal proof of correctness:
   1.238 + *The gate can be closed at any point, and have only four cases:
   1.239 + *  1) coreloop made it past the gate-closing but not yet past the exit
   1.240 + *  2) coreloop made it past the pre-gate progress update but not yet past
   1.241 + *     the gate,
   1.242 + *  3) coreloop is right before the pre-gate update
   1.243 + *  4) coreloop is past the exit and far from the pre-gate update.
   1.244 + *
   1.245 + * Covering the cases in reverse order,
   1.246 + *  4) is not a problem -- stealer will read pre-gate progress, see that it
   1.247 + *     matches exit progress, and the gate is closed, so stealer can proceed.
   1.248 + *  3) stealer will read pre-gate progress just after coreloop updates it..
   1.249 + *     so stealer goes into a loop until the coreloop causes wait-progress
   1.250 + *     to match pre-gate progress, so then stealer can proceed
   1.251 + *  2) same as 3..
   1.252 + *  1) stealer reads pre-gate progress, sees that it's different than exit,
   1.253 + *     so goes into loop until exit matches pre-gate, now it knows coreloop
   1.254 + *     is not in protected and cannot get back in, so can proceed.
   1.255 + *
   1.256 + *Implementation for the stealer:
   1.257 + *
   1.258 + *First, acquire the stealer lock -- only cores with no work to do will
   1.259 + * compete to steal, so not a big performance penalty having only one --
   1.260 + * will rarely have multiple stealers in a system with plenty of work -- and
   1.261 + * in a system with little work, it doesn't matter.
   1.262 + *
   1.263 + *Note, have single-reader, single-writer pattern for all variables used to
   1.264 + * communicate between stealer and victims
   1.265 + *
   1.266 + *So, scan the queues of the core controllers, until find non-empty.  Each core
   1.267 + * has its own list that it scans.  The list goes in order from closest to
   1.268 + * furthest core, so it steals first from close cores.  Later can add
   1.269 + * taking info from the app about overlapping footprints, and scan all the
   1.270 + * others then choose work with the most footprint overlap with the contents
   1.271 + * of this core's cache.
   1.272 + *
   1.273 + *Now, have a victim want to take work from.  So, shut the gate in that
   1.274 + * coreloop, by setting the "gate closed" var on its stack to TRUE.
   1.275 + *Then, read the core's pre-gate progress and compare to the core's exit
   1.276 + * progress.
   1.277 + *If same, can proceed to take work from the coreloop's queue.  When done,
   1.278 + * write FALSE to gate closed var.
   1.279 + *If different, then enter a loop that reads the pre-gate progress, then
   1.280 + * compares to exit progress then to wait progress.  When one of two
   1.281 + * matches, proceed.  Take work from the coreloop's queue.  When done,
   1.282 + * write FALSE to the gate closed var.
   1.283 + * 
   1.284 + */
   1.285 +void inline
   1.286 +gateProtected_stealWorkInto( SchedSlot *currSlot,
   1.287 +                             VMSQueueStruc *myReadyToAnimateQ,
   1.288 +                             SlaveVP *masterVP )
   1.289 + {
   1.290 +   SlaveVP     *stolenSlv;
   1.291 +   int32          coreIdx, i, haveAVictim, gotLock;
   1.292 +   VMSQueueStruc *victimsQ;
   1.293 +
   1.294 +   volatile GateStruc *vicGate;
   1.295 +   int32               coreMightBeInProtected;
   1.296 +
   1.297 +
   1.298 +
   1.299 +      //see if any other cores have work available to steal
   1.300 +   haveAVictim = FALSE;
   1.301 +   coreIdx = masterVP->coreAnimatedBy;
   1.302 +   for( i = 0; i < NUM_CORES -1; i++ )
   1.303 +    {
   1.304 +      if( coreIdx >= NUM_CORES -1 )
   1.305 +       { coreIdx = 0;
   1.306 +       }
   1.307 +      else
   1.308 +       { coreIdx++;
   1.309 +       }
   1.310 +      victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
   1.311 +      if( numInVMSQ( victimsQ ) > 0 )
   1.312 +       { haveAVictim = TRUE;
   1.313 +         vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ];
   1.314 +         break;
   1.315 +       }
   1.316 +    }
   1.317 +   if( !haveAVictim ) return;  //no work to steal, exit
   1.318 +
   1.319 +      //have a victim core, now get the stealer-lock
   1.320 +   gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock),
   1.321 +                                                          UNLOCKED, LOCKED );
   1.322 +   if( !gotLock ) return; //go back to core controller, which will re-start master
   1.323 +
   1.324 +
   1.325 +   //====== Start Gate-protection =======
   1.326 +   vicGate->gateClosed = TRUE;
   1.327 +   coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress;
   1.328 +   while( coreMightBeInProtected )
   1.329 +    {    //wait until sure
   1.330 +      if( vicGate->preGateProgress == vicGate->waitProgress )
   1.331 +         coreMightBeInProtected = FALSE;
   1.332 +      if( vicGate->preGateProgress == vicGate->exitProgress )
   1.333 +         coreMightBeInProtected = FALSE;
   1.334 +    }
   1.335 +
   1.336 +   stolenSlv = readVMSQ ( victimsQ );
   1.337 +
   1.338 +   vicGate->gateClosed = FALSE;
   1.339 +   //======= End Gate-protection  =======
   1.340 +
   1.341 +
   1.342 +   if( stolenSlv != NULL )  //victim could have been in protected and taken
   1.343 +    { currSlot->slaveAssignedToSlot = stolenSlv;
   1.344 +      stolenSlv->schedSlot           = currSlot;
   1.345 +      currSlot->needsSlaveAssigned  = FALSE;
   1.346 +
   1.347 +      writeVMSQ( stolenSlv, myReadyToAnimateQ );
   1.348 +    }
   1.349 +
   1.350 +      //unlock the work stealing lock
   1.351 +   _VMSMasterEnv->workStealingLock = UNLOCKED;
   1.352 + }