/*
 * Copyright 2010  OpenSourceStewardshipFoundation
 * 
 * Licensed under BSD
 */



#include <stdio.h>
#include <malloc.h>
#include <stddef.h>

#include "VMS.h"



/*This code is animated by the virtual Master processor.
 *
 *Polls each sched slot exactly once, hands any requests made by a newly
 * done slave to the "request handler" plug-in function
 *
 *Any slots that need a virt procr assigned are given to the "schedule"
 * plug-in function, which tries to assign a virt procr (slave) to it.
 *
 *When all slots needing a processor have been given to the schedule plug-in,
 * a fraction of the procrs successfully scheduled are put into the
 * work queue, then a continuation of this function is put in, then the rest
 * of the virt procrs that were successfully scheduled.
 *
 *The first thing the continuation does is busy-wait until the previous
 * animation completes.  This is because an (unlikely) continuation may
 * sneak through queue before previous continuation is done putting second
 * part of scheduled slaves in, which is the only race condition.
 *
 */

/*May 29, 2010 -- birth a Master during init so that first core loop to
 * start running gets it and does all the stuff for a newly born --
 * from then on, will be doing continuation, but do suspension self
 * directly at end of master loop
 *So VMS__init just births the master virtual processor same way it births
 * all the others -- then does any extra setup needed and puts it into the
 * work queue.
 *However means have to make masterEnv a global static volatile the same way
 * did with readyToAnimateQ in core loop.  -- for performance, put the
 * jump to the core loop directly in here, and have it directly jump back.
 *
 *
 *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this
 * avoids the suspected bug in the system stack that causes bizarre faults
 * at random places in the system code.
 *
 *So, this function is coupled to each of the MasterVPs, -- meaning this
 * function can't rely on a particular stack and frame -- each MasterVP that
 * animates this function has a different one.
 *
 *At this point, the masterLoop does not write itself into the queue anymore,
 * instead, the coreLoop acquires the masterLock when it has nothing to
 * animate, and then animates its own masterLoop.  However, still try to put
 * several AppVPs into the queue to amortize the startup cost of switching
 * to the MasterVP.  Note, don't have to worry about latency of requests much
 * because most requests generate work for same core -- only latency issue
 * is case when other cores starved and one core's requests generate work
 * for them -- so keep max in queue to 3 or 4..
 */
void masterLoop( void *initData, VirtProcr *animatingPr )
 { 
   int             slotIdx;
   VirtProcr      *schedVirtPr;
   SchedSlot      *currSlot, **schedSlots;
   MasterEnv      *masterEnv;
   VMSQueueStruc  *readyToAnimateQ;
   
   SlaveScheduler  slaveScheduler;
   RequestHandler  requestHandler;
   void           *semanticEnv;

   int             thisCoresIdx;
   VirtProcr      *masterPr;
   volatile        VirtProcr *volatileMasterPr;
   
   volatileMasterPr = animatingPr;
   masterPr         = volatileMasterPr; //used to force re-define after jmp

      //First animation of each MasterVP will in turn animate this part
      // of setup code.. (VP creator sets up the stack as if this function
      // was called normally, but actually get here by jmp)
      //So, setup values about stack ptr, jmp pt and all that
   masterPr->nextInstrPt = &&masterLoopStartPt;


      //Note, got rid of writing the stack and frame ptr up here, because
      // only one
      // core can ever animate a given MasterVP, so don't need to communicate
      // new frame and stack ptr to the MasterVP storage before a second
      // version of that MasterVP can get animated on a different core.
      //Also got rid of the busy-wait.

   
   masterLoopStartPt:
   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MASTER
   int startStamp, endStamp;
      //Total Master time includes one coreloop time -- just assume the core
      // loop time is same for Master as for AppVPs, even though it will be
      // smaller due to high predictability of the fixed jmp.
   saveLowTimeStampCountInto( startStamp );
   #endif
   //========================================================================

   masterEnv        = _VMSMasterEnv;
   
//TODO: check that compiles so that always re-define from frame-storage
   masterPr         = volatileMasterPr;  //just to make sure after jmp
   thisCoresIdx     = masterPr->coreAnimatedBy;
   readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
   schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];

   requestHandler   = masterEnv->requestHandler;
   slaveScheduler   = masterEnv->slaveScheduler;
   semanticEnv      = masterEnv->semanticEnv;


      //Poll each slot's Done flag
   for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
    {
      currSlot = schedSlots[ slotIdx ];

      if( currSlot->workIsDone )
       {
         currSlot->workIsDone         = FALSE;
         currSlot->needsProcrAssigned = TRUE;

            //process requests from slave to master
         (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv );
       }
      if( currSlot->needsProcrAssigned )
       {    //give slot a new virt procr
         schedVirtPr =
          (*slaveScheduler)( semanticEnv, thisCoresIdx );
         
         if( schedVirtPr != NULL )
          { currSlot->procrAssignedToSlot = schedVirtPr;
            schedVirtPr->schedSlot        = currSlot;
            currSlot->needsProcrAssigned  = FALSE;

            writeSRSWQ( schedVirtPr, readyToAnimateQ );
          }
       }
    }


      //Save stack ptr and frame, restore CoreLoop's stack and frame,
      // and clear the MasterLock
      //TODO: cafefully verify don't need to force saving anything to stack
      // before jumping back to core loop.
   void           *stackPtrAddr, *framePtrAddr, *masterLockAddr;
   void           *jmpPt, *coreLoopFramePtr, *coreLoopStackPtr;

   stackPtrAddr      = &(masterPr->stackPtr);
   framePtrAddr      = &(masterPr->framePtr);
   masterLockAddr    = &(_VMSMasterEnv->masterLock);

   jmpPt             = _VMSMasterEnv->coreLoopStartPt;
   coreLoopFramePtr  = masterPr->coreLoopFramePtr;//need this only
   coreLoopStackPtr  = masterPr->coreLoopStackPtr;//shouldn't need -- safety
   
   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MASTER
   saveLowTimeStampCountInto( endStamp );
   addIntervalToHist(startStamp,endStamp,_VMSMasterEnv->stats->masterTimeHist);
   #endif
   //========================================================================

   asm volatile("movl %0,     %%eax;  \
                 movl %%esp, (%%eax); \
                 movl %1,     %%eax;  \
                 movl %%ebp, (%%eax); \
                 movl %2, %%ebx;      \
                 movl %3, %%eax;      \
                 movl %4, %%esp;      \
                 movl %5, %%ebp;      \
                 movl $0x0, (%%ebx);  \
                 jmp  %%eax;"         \
   /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr),                \
                   "=g"(masterLockAddr)                                     \
   /* inputs  */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\
   /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \
                );//can probably make clobber list empty -- but safe for now
 }


