Me@0: /*
Me@38:  * Copyright 2010  OpenSourceStewardshipFoundation
Me@43:  * 
Me@0:  * Licensed under BSD
Me@0:  */
Me@0: 
Me@0: 
Me@0: 
Me@0: #include <stdio.h>
Me@0: #include <malloc.h>
Me@9: #include <stddef.h>
Me@0: 
Me@0: #include "VMS.h"
Me@0: 
Me@0: 
Me@0: 
Me@0: /*This code is animated by the virtual Master processor.
Me@0:  *
Me@11:  *Polls each sched slot exactly once, hands any requests made by a newly
Me@11:  * done slave to the "request handler" plug-in function
Me@0:  *
Me@11:  *Any slots that need a virt procr assigned are given to the "schedule"
Me@11:  * plug-in function, which tries to assign a virt procr (slave) to it.
Me@0:  *
Me@11:  *When all slots needing a processor have been given to the schedule plug-in,
Me@11:  * a fraction of the procrs successfully scheduled are put into the
Me@11:  * work queue, then a continuation of this function is put in, then the rest
Me@11:  * of the virt procrs that were successfully scheduled.
Me@0:  *
Me@11:  *The first thing the continuation does is busy-wait until the previous
Me@11:  * animation completes.  This is because an (unlikely) continuation may
Me@11:  * sneak through queue before previous continuation is done putting second
Me@11:  * part of scheduled slaves in, which is the only race condition.
Me@0:  *
Me@0:  */
Me@0: 
Me@4: /*May 29, 2010 -- birth a Master during init so that first core loop to
Me@11:  * start running gets it and does all the stuff for a newly born --
Me@11:  * from then on, will be doing continuation, but do suspension self
Me@4:  * directly at end of master loop
Me@4:  *So VMS__init just births the master virtual processor same way it births
Me@4:  * all the others -- then does any extra setup needed and puts it into the
Me@4:  * work queue.
Me@4:  *However means have to make masterEnv a global static volatile the same way
Me@31:  * did with readyToAnimateQ in core loop.  -- for performance, put the
Me@11:  * jump to the core loop directly in here, and have it directly jump back.
Me@31:  *
Me@31:  *
Me@31:  *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this
Me@31:  * avoids the suspected bug in the system stack that causes bizarre faults
Me@31:  * at random places in the system code.
Me@31:  *
Me@31:  *So, this function is coupled to each of the MasterVPs, -- meaning this
Me@31:  * function can't rely on a particular stack and frame -- each MasterVP that
Me@31:  * animates this function has a different one.
Me@31:  *
Me@31:  *At this point, the masterLoop does not write itself into the queue anymore,
Me@31:  * instead, the coreLoop acquires the masterLock when it has nothing to
Me@31:  * animate, and then animates its own masterLoop.  However, still try to put
Me@31:  * several AppVPs into the queue to amortize the startup cost of switching
Me@31:  * to the MasterVP.  Note, don't have to worry about latency of requests much
Me@31:  * because most requests generate work for same core -- only latency issue
Me@31:  * is case when other cores starved and one core's requests generate work
Me@31:  * for them -- so keep max in queue to 3 or 4..
Me@4:  */
Me@31: void masterLoop( void *initData, VirtProcr *animatingPr )
Me@21:  { 
Me@31:    int             slotIdx;
Me@21:    VirtProcr      *schedVirtPr;
Me@31:    SchedSlot      *currSlot, **schedSlots;
Me@0:    MasterEnv      *masterEnv;
Me@31:    VMSQueueStruc  *readyToAnimateQ;
Me@4:    
Me@0:    SlaveScheduler  slaveScheduler;
Me@0:    RequestHandler  requestHandler;
Me@31:    void           *semanticEnv;
Me@0: 
Me@31:    int             thisCoresIdx;
Me@31:    VirtProcr      *masterPr;
Me@31:    volatile        VirtProcr *volatileMasterPr;
Me@31:    
Me@31:    volatileMasterPr = animatingPr;
Me@31:    masterPr         = volatileMasterPr; //used to force re-define after jmp
Me@31: 
Me@31:       //First animation of each MasterVP will in turn animate this part
Me@31:       // of setup code.. (VP creator sets up the stack as if this function
Me@31:       // was called normally, but actually get here by jmp)
Me@31:       //So, setup values about stack ptr, jmp pt and all that
Me@4:    masterPr->nextInstrPt = &&masterLoopStartPt;
Me@0: 
Me@26: 
Me@31:       //Note, got rid of writing the stack and frame ptr up here, because
Me@31:       // only one
Me@31:       // core can ever animate a given MasterVP, so don't need to communicate
Me@31:       // new frame and stack ptr to the MasterVP storage before a second
Me@31:       // version of that MasterVP can get animated on a different core.
Me@31:       //Also got rid of the busy-wait.
Me@26: 
Me@31:    
Me@4:    masterLoopStartPt:
Me@38:    //============================= MEASUREMENT STUFF ========================
Me@38:    #ifdef MEAS__TIME_MASTER
Me@38:       //Total Master time includes one coreloop time -- just assume the core
Me@38:       // loop time is same for Master as for AppVPs, even though it will be
Me@41:       // smaller due to high predictability of the fixed jmp.
Me@38:    saveLowTimeStampCountInto( masterPr->startMasterTSCLow );
Me@38:    #endif
Me@38:    //========================================================================
Me@0: 
Me@31:    masterEnv        = _VMSMasterEnv;
Me@4:    
Me@31: //TODO: check that compiles so that always re-define from frame-storage
Me@31:    masterPr         = volatileMasterPr;  //just to make sure after jmp
Me@31:    thisCoresIdx     = masterPr->coreAnimatedBy;
Me@31:    readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
Me@31:    schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
Me@4: 
Me@0:    requestHandler   = masterEnv->requestHandler;
Me@0:    slaveScheduler   = masterEnv->slaveScheduler;
Me@21:    semanticEnv      = masterEnv->semanticEnv;
Me@0: 
Me@0: 
Me@31:       //Poll each slot's Done flag
Me@26:    for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
Me@0:     {
Me@4:       currSlot = schedSlots[ slotIdx ];
Me@0: 
Me@4:       if( currSlot->workIsDone )
Me@0:        {
Me@4:          currSlot->workIsDone         = FALSE;
Me@4:          currSlot->needsProcrAssigned = TRUE;
Me@0: 
Me@0:             //process requests from slave to master
Me@21:          (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv );
Me@0:        }
Me@4:       if( currSlot->needsProcrAssigned )
Me@4:        {    //give slot a new virt procr
Me@21:          schedVirtPr =
Me@31:           (*slaveScheduler)( semanticEnv, thisCoresIdx );
Me@0:          
Me@21:          if( schedVirtPr != NULL )
Me@21:           { currSlot->procrAssignedToSlot = schedVirtPr;
Me@26:             schedVirtPr->schedSlot        = currSlot;
Me@26:             currSlot->needsProcrAssigned  = FALSE;
Me@4: 
Me@31:             writeSRSWQ( schedVirtPr, readyToAnimateQ );
Me@0:           }
Me@0:        }
Me@0:     }
Me@0: 
Me@26: 
Me@31:       //Save stack ptr and frame, restore CoreLoop's stack and frame,
Me@31:       // and clear the MasterLock
Me@21:       //TODO: cafefully verify don't need to force saving anything to stack
Me@21:       // before jumping back to core loop.
Me@31:    void           *stackPtrAddr, *framePtrAddr, *masterLockAddr;
Me@31:    void           *jmpPt, *coreLoopFramePtr, *coreLoopStackPtr;
Me@31: 
Me@21:    stackPtrAddr      = &(masterPr->stackPtr);
Me@21:    framePtrAddr      = &(masterPr->framePtr);
Me@31:    masterLockAddr    = &(_VMSMasterEnv->masterLock);
Me@21: 
Me@31:    jmpPt             = _VMSMasterEnv->coreLoopStartPt;
Me@21:    coreLoopFramePtr  = masterPr->coreLoopFramePtr;//need this only
Me@21:    coreLoopStackPtr  = masterPr->coreLoopStackPtr;//shouldn't need -- safety
Me@21:    
Me@38:    #ifdef MEAS__TIME_MASTER
Me@38:    saveLowTimeStampCountInto( masterPr->endMasterTSCLow );
Me@38:    #endif
Me@38: 
Me@21:    asm volatile("movl %0,     %%eax;  \
Me@21:                  movl %%esp, (%%eax); \
Me@21:                  movl %1,     %%eax;  \
Me@21:                  movl %%ebp, (%%eax); \
Me@21:                  movl %2, %%ebx;      \
Me@21:                  movl %3, %%eax;      \
Me@21:                  movl %4, %%esp;      \
Me@21:                  movl %5, %%ebp;      \
Me@21:                  movl $0x0, (%%ebx);  \
Me@30:                  jmp  %%eax;"         \
Me@21:    /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr),                \
Me@38:                    "=g"(masterLockAddr)                                     \
Me@21:    /* inputs  */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\
Me@21:    /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \
Me@21:                 );//can probably make clobber list empty -- but safe for now
Me@0:  }
Me@0: 
Me@0: