/*
 * Copyright 2010  OpenSourceCodeStewardshipFoundation
 *
 * Licensed under BSD
 */

#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>

#include "VMS.h"
#include "Queue_impl/BlockingQueue.h"


/*Setup has two phases:
 * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts
 *    the master virt procr into the work-queue, ready for first "call"
 * 2) Semantic layer then does its own init, which creates the seed virt
 *    procr inside the semantic layer, ready to schedule it when
 *    asked by the first run of the masterLoop.
 *
 *This part is bit weird because VMS really wants to be "always there", and
 * have applications attach and detach..  for now, this VMS is part of
 * the app, so the VMS system starts up as part of running the app.
 *
 *The semantic layer is isolated from the VMS internals by making the
 * semantic layer do setup to a state that it's ready with its
 * initial virt procrs, ready to schedule them to slots when the masterLoop
 * asks.  Without this pattern, the semantic layer's setup would
 * have to modify slots directly to assign the initial virt-procrs, and put
 * them into the workQ itself, breaking the isolation completely.
 *
 * 
 *The semantic layer creates the initial virt procr(s), and adds its
 * own environment to masterEnv, and fills in the pointers to
 * the requestHandler and slaveScheduler plug-in functions
 */

void
create_sched_slots( MasterEnv *masterEnv );


/*This allocates VMS data structures, populates the master VMSProc,
 * and master environment, and returns the master environment to the semantic
 * layer.
 */
void
VMS__init()
 { MasterEnv  *masterEnv;
   CASQueueStruc *workQ;

      //Make the central work-queue
   _VMSWorkQ = makeCASQ();
   workQ     = _VMSWorkQ;

   _VMSMasterEnv = malloc( sizeof(MasterEnv) );
   masterEnv     = _VMSMasterEnv;

      //create the master virtual processor
   masterEnv->masterVirtPr = VMS__create_procr( &masterLoop, masterEnv );

   create_sched_slots( masterEnv );

     //Set slot 0 to be the master virt procr & set flags just in case
   masterEnv->schedSlots[0]->needsProcrAssigned  = FALSE;  //says don't touch
   masterEnv->schedSlots[0]->workIsDone          = FALSE;  //says don't touch
   masterEnv->schedSlots[0]->procrAssignedToSlot = masterEnv->masterVirtPr;

      //First core loop to start up gets this, which will schedule seed Pr
      //TODO: debug: check address of masterVirtPr
//TODO: commented out for debugging -- put it back in!!
//   writeCASQ( masterEnv->masterVirtPr, workQ );

   numProcrsCreated = 1;
 }


void
create_sched_slots( MasterEnv *masterEnv )
 { SchedSlot  **schedSlots, **filledSlots;
   int i;

   schedSlots  = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
   filledSlots = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
   masterEnv->schedSlots  = schedSlots;
   masterEnv->filledSlots = filledSlots;

   for( i = 0; i < NUM_SCHED_SLOTS; i++ )
    {
      schedSlots[i] = malloc( sizeof(SchedSlot) );

         //Set state to mean "handling requests done, slot needs filling"
      schedSlots[i]->workIsDone         = FALSE;
      schedSlots[i]->needsProcrAssigned = TRUE;
    }
 }


/*Semantic layer calls this when it want the system to start running..
 *
 *This creates the core loops, pins them to physical cores, gives them the
 * pointer to the workQ, and starts them running.
 */
void
VMS__start()
 { int coreIdx;

   //TODO: Save "orig" stack pointer and frame ptr -- restore in VMS__end()
   //Create the win threads that animate the core loops
   for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
    {
      coreLoopThdParams[coreIdx] = (ThdParams *)malloc( sizeof(ThdParams) );
      coreLoopThdParams[coreIdx]->coreNum = coreIdx;

      coreLoopThdHandles[coreIdx] =
          CreateThread ( NULL, // Security attributes
                         0, // Stack size
                         coreLoop,
                         coreLoopThdParams[coreIdx],
                         CREATE_SUSPENDED,
                         &(coreLoopThdIds[coreIdx])
                        );
      ResumeThread( coreLoopThdHandles[coreIdx] ); //starts thread
    }
 }


/*Create stack, then create __cdecl structure on it and put initialData and
 * pointer to the new structure instance into the parameter positions on
 * the stack
 *Then put function pointer into nextInstrPt -- the stack is setup in std
 * call structure, so jumping to function ptr is same as a GCC generated
 * function call
 *No need to save registers on old stack frame, because there's no old
 * animator state to return to --
 *
 */
VirtProcr *
VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData )
 { VirtProcr *newPr;
   char      *stackLocs, *stackPtr;

   newPr              = malloc( sizeof(VirtProcr) );
   newPr->procrID     = numProcrsCreated++;
   newPr->nextInstrPt = fnPtr;
   newPr->initialData = initialData;

      //fnPtr takes two params -- void *initData & void *animProcr
      //alloc stack locations, make stackPtr be the highest addr minus room
      // for 2 params + return addr.  Return addr (NULL) is in loc pointed to
      // by stackPtr, initData at stackPtr + 4 bytes, animatingPr just above
   stackLocs = malloc( 0x100000 ); //1 meg stack -- default Win thread's size
   stackPtr = ( (char *)stackLocs + 0x100000 - 0x10 );
      //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp
   *( (int *)stackPtr + 2 ) = (int) newPr;  //rightmost param -- 32bit pointer
   *( (int *)stackPtr + 1 ) = (int) initialData;  //next  param to left
   newPr->stackPtr = stackPtr; //core loop will switch to this, then
   newPr->framePtr = stackPtr; //suspend loop will save new stack & frame ptr

   return newPr;
 }


/*This inserts the semantic-layer's data into the standard VMS carrier
 */
inline void
VMS__send_sem_request( void *semReqData, VirtProcr *callingPr )
 { SlaveReqst *req;

   req = malloc( sizeof(SlaveReqst) );
   req->slaveFrom      = callingPr;
   req->semReqData     = semReqData;
   req->nextRequest    = callingPr->requests;
   callingPr->requests = req;
 }

 /*there is a label inside this function -- save the addr of this label in
 * the callingPr struc, as the pick-up point from which to start the next
 * work-unit for that procr.  If turns out have to save registers, then
 * save them in the procr struc too.  Then do assembly jump to the CoreLoop's
 * "done with work-unit" label.  The procr struc is in the request in the
 * slave that animated the just-ended work-unit, so all the state is saved
 * there, and will get passed along, inside the request handler, to the
 * next work-unit for that procr.
 */
void
VMS__suspend_processor( VirtProcr *callingPr )
 { void *jmpPt, *stackPtrAddr, *framePtrAddr, *coreLoopStackPtr;
   void *coreLoopFramePtr;
   int coreIdx;

      //The request to master will cause this suspended virt procr to get
      // scheduled again at some future point -- to resume, core loop jumps
      // to the resume point (below), which causes restore of saved regs and
      // "return" from this call.
   callingPr->nextInstrPt = &&ResumePt;

      //return ownership of the virt procr and sched slot to Master virt pr
   callingPr->schedSlot->workIsDone = TRUE;
//   coreIdx = callingPr->coreAnimatedBy;

   stackPtrAddr      = &(callingPr->stackPtr);
   framePtrAddr      = &(callingPr->framePtr);
   
   jmpPt             = callingPr->coreLoopStartPt;
   coreLoopFramePtr  = callingPr->coreLoopFramePtr;//need this only
   coreLoopStackPtr  = callingPr->coreLoopStackPtr;//shouldn't need -- safety

      //Save the virt procr's stack and frame ptrs, restore coreloop's frame
      // ptr, then jump back to "start" of core loop
   asm volatile("movl %0,     %%eax;  \
                 movl %%esp, (%%eax); \
                 movl %1,     %%eax;  \
                 movl %%ebp, (%%eax); \
                 movl %2, %%eax;      \
                 movl %3, %%esp;      \
                 movl %4, %%ebp;      \
                 jmp  %%eax    "      \
   /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr) \
   /* inputs  */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\
   /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi"  \
                ); //list everything as clobbered to force GCC to save all
                   // live vars that are in regs on stack before this
                   // assembly, so that stack pointer is correct, before jmp

ResumePt:
   return;
 }

void
VMS__dissipate_animating_processor( VirtProcr *animatingPr )
 {

 }

/*This runs in main thread -- so can only signal to the core loop to shut
 * itself down --
 *
 *Want the master to decide when to shut down -- when semantic layer tells it
 * to -- say, when all the application-virtual processors have dissipated.
 *
 *Maybe return a special code from scheduling plug-in..  master checks and
 * when sees, it shuts down the core loops -- does this by scheduling a
 * special virt processor whose next instr pt is the core-end label.
 */
void
VMS__shutdown()
 { int coreIdx;
   VirtProcr *shutDownPr;
 
   //TODO: restore the "orig" stack pointer and frame ptr saved in VMS__start
   //create a "special" virtual processor, one for each core loop that has
   // the "loop end" point as its "next instr" point -- when the core loop
   // jumps to animate the virt procr, the jump lands it at its own
   // shut-down code.
   for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
    {
      shutDownPr = VMS__create_procr( NULL, NULL );
      shutDownPr->nextInstrPt = _VMSMasterEnv->coreLoopShutDownPt;
    }
 }


inline TSCount getTSCount()
 { unsigned int low, high;
   TSCount  out;

   saveTimeStampCountInto( low, high );
   out = high;
   out = (out << 32) + low;
   return out;
 }