VMS/VMS_Implementations/VMS_impls/VMS__MC_shared_impl: SchedulingMaster.c comparison

comparison SchedulingMaster.c @ 222:c88ce1db91ef

Compiles, but does not run properly -- and changed MasterLoop to SchedulingMaster

author	Some Random Person <seanhalle@yahoo.com>
date	Tue, 13 Mar 2012 10:02:06 -0700
parents
children	b0b93147adfb

comparison

equal deleted inserted replaced

--1:000000000000
+:671a6194db2d
+/*
+* Copyright 2010  OpenSourceStewardshipFoundation
+*
+* Licensed under BSD
+*/
+#include <stdio.h>
+#include <stddef.h>
+#include "VMS.h"
+//===========================================================================
+void inline
+stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
+SlaveVP *masterVP );
+//===========================================================================
+/*This code is animated by the virtual Master processor.
+*
+*Polls each sched slot exactly once, hands any requests made by a newly
+* done slave to the "request handler" plug-in function
+*
+*Any slots that need a Slv assigned are given to the "schedule"
+* plug-in function, which tries to assign a Slv (slave) to it.
+*
+*When all slots needing a processor have been given to the schedule plug-in,
+* a fraction of the slaves successfully scheduled are put into the
+* work queue, then a continuation of this function is put in, then the rest
+* of the Slvs that were successfully scheduled.
+*
+*The first thing the continuation does is busy-wait until the previous
+* animation completes.  This is because an (unlikely) continuation may
+* sneak through queue before previous continuation is done putting second
+* part of scheduled slaves in, which is the only race condition.
+*
+*/
+/*May 29, 2010 -- birth a Master during init so that first core controller to
+* start running gets it and does all the stuff for a newly born --
+* from then on, will be doing continuation, but do suspension self
+* directly at end of master loop
+*So VMS_WL__init just births the master virtual processor same way it births
+* all the others -- then does any extra setup needed and puts it into the
+* work queue.
+*However means have to make masterEnv a global static volatile the same way
+* did with readyToAnimateQ in core controller.  -- for performance, put the
+* jump to the core controller directly in here, and have it directly jump back.
+*
+*
+*Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this
+* avoids the suspected bug in the system stack that causes bizarre faults
+* at random places in the system code.
+*
+*So, this function is coupled to each of the MasterVPs, -- meaning this
+* function can't rely on a particular stack and frame -- each MasterVP that
+* animates this function has a different one.
+*
+*At this point, the schedulingMaster does not write itself into the queue anymore,
+* instead, the coreCtlr acquires the masterLock when it has nothing to
+* animate, and then animates its own schedulingMaster.  However, still try to put
+* several AppSlvs into the queue to amortize the startup cost of switching
+* to the MasterVP.  Note, don't have to worry about latency of requests much
+* because most requests generate work for same core -- only latency issue
+* is case when other cores starved and one core's requests generate work
+* for them -- so keep max in queue to 3 or 4..
+*/
+void schedulingMaster( void *initData, SlaveVP *animatingSlv )
+{
+int32           slotIdx, numSlotsFilled;
+SlaveVP        *schedSlaveVP;
+SchedSlot      *currSlot, **schedSlots;
+MasterEnv      *masterEnv;
+VMSQueueStruc  *readyToAnimateQ;
+Sched_Assigner  slaveAssigner;
+RequestHandler  requestHandler;
+void           *semanticEnv;
+int32           thisCoresIdx;
+SlaveVP        *masterVP;
+volatile        SlaveVP *volatileMasterVP;
+volatileMasterVP = animatingSlv;
+masterVP         = (SlaveVP*)volatileMasterVP; //used to force re-define after jmp
+//First animation of each MasterVP will in turn animate this part
+// of setup code.. (Slv creator sets up the stack as if this function
+// was called normally, but actually get here by jmp)
+//So, setup values about stack ptr, jmp pt and all that
+//masterVP->resumeInstrPtr = &&schedulingMasterStartPt;
+//Note, got rid of writing the stack and frame ptr up here, because
+// only one
+// core can ever animate a given MasterVP, so don't need to communicate
+// new frame and stack ptr to the MasterVP storage before a second
+// version of that MasterVP can get animated on a different core.
+//Also got rid of the busy-wait.
+//schedulingMasterStartPt:
+while(1){
+MEAS__Capture_Pre_Master_Point
+masterEnv        = (MasterEnv*)_VMSMasterEnv;
+//GCC may optimize so doesn't always re-define from frame-storage
+masterVP         = (SlaveVP*)volatileMasterVP;  //just to make sure after jmp
+thisCoresIdx     = masterVP->coreAnimatedBy;
+readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
+schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
+requestHandler   = masterEnv->requestHandler;
+slaveAssigner    = masterEnv->slaveAssigner;
+semanticEnv      = masterEnv->semanticEnv;
+//Poll each slot's Done flag
+numSlotsFilled = 0;
+for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
+{
+currSlot = schedSlots[ slotIdx ];
+if( currSlot->workIsDone )
+{
+currSlot->workIsDone         = FALSE;
+currSlot->needsSlaveAssigned = TRUE;
+MEAS__startReqHdlr;
+//process the requests made by the slave (held inside slave struc)
+(*requestHandler)( currSlot->slaveAssignedToSlot, semanticEnv );
+MEAS__endReqHdlr;
+}
+if( currSlot->needsSlaveAssigned )
+{    //give slot a new Slv
+schedSlaveVP =
+(*slaveAssigner)( semanticEnv, thisCoresIdx );
+if( schedSlaveVP != NULL )
+{ currSlot->slaveAssignedToSlot = schedSlaveVP;
+schedSlaveVP->schedSlot       = currSlot;
+currSlot->needsSlaveAssigned  = FALSE;
+numSlotsFilled               += 1;
+}
+}
+}
+#ifdef SYS__TURN_ON_WORK_STEALING
+//If no slots filled, means no more work, look for work to steal.
+if( numSlotsFilled == 0 )
+{ gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterVP );
+}
+#endif
+MEAS__Capture_Post_Master_Point;
+masterSwitchToCoreCtlr(animatingSlv);
+flushRegisters();
+}//MasterLoop
+}
+/*This has a race condition -- the coreloops are accessing their own queues
+* at the same time that this work-stealer on a different core is trying to
+*/
+void inline
+stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
+SlaveVP *masterVP )
+{
+SlaveVP   *stolenSlv;
+int32        coreIdx, i;
+VMSQueueStruc *currQ;
+stolenSlv = NULL;
+coreIdx = masterVP->coreAnimatedBy;
+for( i = 0; i < NUM_CORES -1; i++ )
+{
+if( coreIdx >= NUM_CORES -1 )
+{ coreIdx = 0;
+}
+else
+{ coreIdx++;
+}
+currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
+if( numInVMSQ( currQ ) > 0 )
+{ stolenSlv = readVMSQ (currQ );
+break;
+}
+}
+if( stolenSlv != NULL )
+{ currSlot->slaveAssignedToSlot = stolenSlv;
+stolenSlv->schedSlot           = currSlot;
+currSlot->needsSlaveAssigned  = FALSE;
+writeVMSQ( stolenSlv, readyToAnimateQ );
+}
+}
+/*This algorithm makes the common case fast.  Make the coreloop passive,
+* and show its progress.  Make the stealer control a gate that coreloop
+* has to pass.
+*To avoid interference, only one stealer at a time.  Use a global
+* stealer-lock.
+*
+*The pattern is based on a gate -- stealer shuts the gate, then monitors
+* to be sure any already past make it all the way out, before starting.
+*So, have a "progress" measure just before the gate, then have two after it,
+* one is in a "waiting room" outside the gate, the other is at the exit.
+*Then, the stealer first shuts the gate, then checks the progress measure
+* outside it, then looks to see if the progress measure at the exit is the
+* same.  If yes, it knows the protected area is empty 'cause no other way
+* to get in and the last to get in also exited.
+*If the progress measure at the exit is not the same, then the stealer goes
+* into a loop checking both the waiting-area and the exit progress-measures
+* until one of them shows the same as the measure outside the gate.  Might
+* as well re-read the measure outside the gate each go around, just to be
+* sure.  It is guaranteed that one of the two will eventually match the one
+* outside the gate.
+*
+*Here's an informal proof of correctness:
+*The gate can be closed at any point, and have only four cases:
+*  1) coreloop made it past the gate-closing but not yet past the exit
+*  2) coreloop made it past the pre-gate progress update but not yet past
+*     the gate,
+*  3) coreloop is right before the pre-gate update
+*  4) coreloop is past the exit and far from the pre-gate update.
+*
+* Covering the cases in reverse order,
+*  4) is not a problem -- stealer will read pre-gate progress, see that it
+*     matches exit progress, and the gate is closed, so stealer can proceed.
+*  3) stealer will read pre-gate progress just after coreloop updates it..
+*     so stealer goes into a loop until the coreloop causes wait-progress
+*     to match pre-gate progress, so then stealer can proceed
+*  2) same as 3..
+*  1) stealer reads pre-gate progress, sees that it's different than exit,
+*     so goes into loop until exit matches pre-gate, now it knows coreloop
+*     is not in protected and cannot get back in, so can proceed.
+*
+*Implementation for the stealer:
+*
+*First, acquire the stealer lock -- only cores with no work to do will
+* compete to steal, so not a big performance penalty having only one --
+* will rarely have multiple stealers in a system with plenty of work -- and
+* in a system with little work, it doesn't matter.
+*
+*Note, have single-reader, single-writer pattern for all variables used to
+* communicate between stealer and victims
+*
+*So, scan the queues of the core controllers, until find non-empty.  Each core
+* has its own list that it scans.  The list goes in order from closest to
+* furthest core, so it steals first from close cores.  Later can add
+* taking info from the app about overlapping footprints, and scan all the
+* others then choose work with the most footprint overlap with the contents
+* of this core's cache.
+*
+*Now, have a victim want to take work from.  So, shut the gate in that
+* coreloop, by setting the "gate closed" var on its stack to TRUE.
+*Then, read the core's pre-gate progress and compare to the core's exit
+* progress.
+*If same, can proceed to take work from the coreloop's queue.  When done,
+* write FALSE to gate closed var.
+*If different, then enter a loop that reads the pre-gate progress, then
+* compares to exit progress then to wait progress.  When one of two
+* matches, proceed.  Take work from the coreloop's queue.  When done,
+* write FALSE to the gate closed var.
+*
+*/
+void inline
+gateProtected_stealWorkInto( SchedSlot *currSlot,
+VMSQueueStruc *myReadyToAnimateQ,
+SlaveVP *masterVP )
+{
+SlaveVP     *stolenSlv;
+int32          coreIdx, i, haveAVictim, gotLock;
+VMSQueueStruc *victimsQ;
+volatile GateStruc *vicGate;
+int32               coreMightBeInProtected;
+//see if any other cores have work available to steal
+haveAVictim = FALSE;
+coreIdx = masterVP->coreAnimatedBy;
+for( i = 0; i < NUM_CORES -1; i++ )
+{
+if( coreIdx >= NUM_CORES -1 )
+{ coreIdx = 0;
+}
+else
+{ coreIdx++;
+}
+victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
+if( numInVMSQ( victimsQ ) > 0 )
+{ haveAVictim = TRUE;
+vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ];
+break;
+}
+}
+if( !haveAVictim ) return;  //no work to steal, exit
+//have a victim core, now get the stealer-lock
+gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock),
+UNLOCKED, LOCKED );
+if( !gotLock ) return; //go back to core controller, which will re-start master
+//====== Start Gate-protection =======
+vicGate->gateClosed = TRUE;
+coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress;
+while( coreMightBeInProtected )
+{    //wait until sure
+if( vicGate->preGateProgress == vicGate->waitProgress )
+coreMightBeInProtected = FALSE;
+if( vicGate->preGateProgress == vicGate->exitProgress )
+coreMightBeInProtected = FALSE;
+}
+stolenSlv = readVMSQ ( victimsQ );
+vicGate->gateClosed = FALSE;
+//======= End Gate-protection  =======
+if( stolenSlv != NULL )  //victim could have been in protected and taken
+{ currSlot->slaveAssignedToSlot = stolenSlv;
+stolenSlv->schedSlot           = currSlot;
+currSlot->needsSlaveAssigned  = FALSE;
+writeVMSQ( stolenSlv, myReadyToAnimateQ );
+}
+//unlock the work stealing lock
+_VMSMasterEnv->workStealingLock = UNLOCKED;
+}

Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl

comparison SchedulingMaster.c @ 222:c88ce1db91ef