Me@0: /*
Me@0:  * Copyright 2010  OpenSourceCodeStewardshipFoundation
Me@0:  *
Me@0:  * Licensed under BSD
Me@0:  */
Me@0: 
Me@0: #include <stdio.h>
Me@0: #include <stdlib.h>
Me@0: #include <malloc.h>
Me@0: 
Me@0: #include "VMS.h"
Me@0: #include "Queue_impl/BlockingQueue.h"
Me@0: 
Me@0: 
Me@0: /*Setup has two phases:
Me@0:  * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts
Me@8:  *    the master virt procr into the work-queue, ready for first "call"
Me@8:  * 2) Semantic layer then does its own init, which creates the seed virt
Me@8:  *    procr inside the semantic layer, ready to schedule it when
Me@0:  *    asked by the first run of the masterLoop.
Me@0:  *
Me@0:  *This part is bit weird because VMS really wants to be "always there", and
Me@0:  * have applications attach and detach..  for now, this VMS is part of
Me@0:  * the app, so the VMS system starts up as part of running the app.
Me@0:  *
Me@8:  *The semantic layer is isolated from the VMS internals by making the
Me@8:  * semantic layer do setup to a state that it's ready with its
Me@8:  * initial virt procrs, ready to schedule them to slots when the masterLoop
Me@0:  * asks.  Without this pattern, the semantic layer's setup would
Me@8:  * have to modify slots directly to assign the initial virt-procrs, and put
Me@0:  * them into the workQ itself, breaking the isolation completely.
Me@0:  *
Me@0:  * 
Me@8:  *The semantic layer creates the initial virt procr(s), and adds its
Me@8:  * own environment to masterEnv, and fills in the pointers to
Me@0:  * the requestHandler and slaveScheduler plug-in functions
Me@8:  */
Me@8: 
Me@8: void
Me@8: create_sched_slots( MasterEnv *masterEnv );
Me@8: 
Me@8: 
Me@8: /*This allocates VMS data structures, populates the master VMSProc,
Me@0:  * and master environment, and returns the master environment to the semantic
Me@0:  * layer.
Me@0:  */
Me@8: void
Me@8: VMS__init()
Me@1:  { MasterEnv  *masterEnv;
Me@12:    CASQueueStruc *workQ;
Me@1: 
Me@0:       //Make the central work-queue
Me@12:    _VMSWorkQ = makeCASQ();
Me@1:    workQ     = _VMSWorkQ;
Me@0: 
Me@1:    _VMSMasterEnv = malloc( sizeof(MasterEnv) );
Me@1:    masterEnv     = _VMSMasterEnv;
Me@0: 
Me@8:       //create the master virtual processor
Me@8:    masterEnv->masterVirtPr = VMS__create_procr( &masterLoop, masterEnv );
Me@0: 
Me@1:    create_sched_slots( masterEnv );
Me@0: 
Me@8:      //Set slot 0 to be the master virt procr & set flags just in case
Me@8:    masterEnv->schedSlots[0]->needsProcrAssigned  = FALSE;  //says don't touch
Me@8:    masterEnv->schedSlots[0]->workIsDone          = FALSE;  //says don't touch
Me@1:    masterEnv->schedSlots[0]->procrAssignedToSlot = masterEnv->masterVirtPr;
Me@1: 
Me@1:       //First core loop to start up gets this, which will schedule seed Pr
Me@1:       //TODO: debug: check address of masterVirtPr
Me@12: //TODO: commented out for debugging -- put it back in!!
Me@12: //   writeCASQ( masterEnv->masterVirtPr, workQ );
Me@12: 
Me@12:    numProcrsCreated = 1;
Me@0:  }
Me@0: 
Me@0: 
Me@0: void
Me@1: create_sched_slots( MasterEnv *masterEnv )
Me@8:  { SchedSlot  **schedSlots, **filledSlots;
Me@0:    int i;
Me@0: 
Me@8:    schedSlots  = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
Me@8:    filledSlots = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
Me@8:    masterEnv->schedSlots  = schedSlots;
Me@8:    masterEnv->filledSlots = filledSlots;
Me@8: 
Me@1:    for( i = 0; i < NUM_SCHED_SLOTS; i++ )
Me@0:     {
Me@8:       schedSlots[i] = malloc( sizeof(SchedSlot) );
Me@8: 
Me@1:          //Set state to mean "handling requests done, slot needs filling"
Me@8:       schedSlots[i]->workIsDone         = FALSE;
Me@8:       schedSlots[i]->needsProcrAssigned = TRUE;
Me@0:     }
Me@0:  }
Me@0: 
Me@8: 
Me@0: /*Semantic layer calls this when it want the system to start running..
Me@0:  *
Me@0:  *This creates the core loops, pins them to physical cores, gives them the
Me@0:  * pointer to the workQ, and starts them running.
Me@0:  */
Me@12: void
Me@0: VMS__start()
Me@12:  { int coreIdx;
Me@0: 
Me@8:    //Create the win threads that animate the core loops
Me@8:    for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
Me@8:     {
Me@12:       coreLoopThdParams[coreIdx] = (ThdParams *)malloc( sizeof(ThdParams) );
Me@12:       coreLoopThdParams[coreIdx]->coreNum = coreIdx;
Me@0: 
Me@12:       coreLoopThdHandles[coreIdx] =
Me@12:           CreateThread ( NULL, // Security attributes
Me@12:                          0, // Stack size
Me@12:                          coreLoop,
Me@12:                          coreLoopThdParams[coreIdx],
Me@12:                          CREATE_SUSPENDED,
Me@12:                          &(coreLoopThdIds[coreIdx])
Me@12:                         );
Me@12:       ResumeThread( coreLoopThdHandles[coreIdx] ); //starts thread
Me@8:     }
Me@8:  }
Me@0: 
Me@0: 
Me@0: 
Me@8: /*Create stack, then create __cdecl structure on it and put initialData and
Me@8:  * pointer to the new structure instance into the parameter positions on
Me@8:  * the stack
Me@8:  *Then put function pointer into nextInstrPt -- the stack is setup in std
Me@8:  * call structure, so jumping to function ptr is same as a GCC generated
Me@8:  * function call
Me@8:  *No need to save registers on old stack frame, because there's no old
Me@8:  * animator state to return to --
Me@8:  *
Me@8:  */
Me@8: VirtProcr *
Me@8: VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData )
Me@8:  { VirtProcr *newPr;
Me@8:    char      *stackLocs, *stackPtr;
Me@8: 
Me@8:    newPr              = malloc( sizeof(VirtProcr) );
Me@12:    newPr->procrID     = numProcrsCreated++;
Me@8:    newPr->nextInstrPt = fnPtr;
Me@8:    newPr->initialData = initialData;
Me@8: 
Me@8:       //alloc stack locations, make stackPtr be the highest addr minus room
Me@8:       // for 2 params.  Put initData at stackPtr, animatingPr just above
Me@8:    stackLocs = malloc( 0x100000 ); //1 meg stack -- default Win thread's size
Me@8:    stackPtr = ( (char *)stackLocs + 0x100000 - 0x8 );
Me@8:       //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp
Me@12:    *( (int *)stackPtr + 1) = (int) newPr;  //rightmost param -- 32bit pointer
Me@12:    *( (int *)stackPtr )    = (int) initialData;  //next  param to left
Me@8:    newPr->stackPtr = stackPtr; //core loop will switch to this, then
Me@8:    newPr->framePtr = stackPtr; //suspend loop will save new stack & frame ptr
Me@8: 
Me@8:    return newPr;
Me@8:  }
Me@8: 
Me@8: 
Me@8: /*This inserts the semantic-layer's data into the standard VMS carrier
Me@8:  */
Me@8: inline void
Me@8: VMS__send_sem_request( void *semReqData, VirtProcr *callingPr )
Me@8:  { SlaveReqst *req;
Me@8: 
Me@8:    req = malloc( sizeof(SlaveReqst) );
Me@8:    req->slaveFrom      = callingPr;
Me@8:    req->semReqData     = semReqData;
Me@8:    req->nextRequest    = callingPr->requests;
Me@8:    callingPr->requests = req;
Me@0:  }
Me@0: 
Me@0:  /*there is a label inside this function -- save the addr of this label in
Me@0:  * the callingPr struc, as the pick-up point from which to start the next
Me@0:  * work-unit for that procr.  If turns out have to save registers, then
Me@0:  * save them in the procr struc too.  Then do assembly jump to the CoreLoop's
Me@0:  * "done with work-unit" label.  The procr struc is in the request in the
Me@0:  * slave that animated the just-ended work-unit, so all the state is saved
Me@0:  * there, and will get passed along, inside the request handler, to the
Me@0:  * next work-unit for that procr.
Me@0:  */
Me@8: void
Me@1: VMS__suspend_processor( VirtProcr *callingPr )
Me@8:  { void *jmpPt, *stackPtr, *framePtr;
Me@0: 
Me@1:    callingPr->nextInstrPt = &&ResumePt;
Me@1: 
Me@1:       //return ownership of the virt procr and sched slot to Master virt pr
Me@1:    callingPr->schedSlot->workIsDone = TRUE;
Me@1: 
Me@1:    jmpPt    = callingPr->coreLoopStartPt;
Me@8:    stackPtr = &(callingPr->stackPtr);
Me@8:    framePtr = &(callingPr->framePtr);
Me@1: 
Me@1:       //put all regs in the clobber list to make sure GCC has saved all
Me@1:       // so safe to jump to core loop, where they *will* get clobbered
Me@8:    asm volatile("movl %%esp, %0; \
Me@8:                  movl %%ebp, %1; \
Me@8:                  jmp  %2         "
Me@8:    /* outputs */ : "=g" (stackPtr), "=g" (framePtr)
Me@1:    /* inputs  */ : "g" (jmpPt)
Me@1:    /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi"
Me@12:                 ); //list everything as clobbered to force GCC to save all
Me@12:                    // live vars that are in regs on stack before this
Me@12:                    // assembly, so that stack pointer is correct, before jmp
Me@1: 
Me@1: ResumePt:
Me@0:    return;
Me@0:  }
Me@0: 
Me@8: void
Me@8: VMS__dissipate_animating_processor( VirtProcr *animatingPr )
Me@8:  {
Me@0: 
Me@1:  }
Me@1: 
Me@8: /*This runs in main thread -- so can only signal to the core loop to shut
Me@8:  * itself down --
Me@8:  *
Me@8:  *Want the master to decide when to shut down -- when semantic layer tells it
Me@8:  * to -- say, when all the application-virtual processors have dissipated.
Me@8:  *
Me@8:  *Maybe return a special code from scheduling plug-in..  master checks and
Me@8:  * when sees, it shuts down the core loops -- does this by scheduling a
Me@8:  * special virt processor whose next instr pt is the core-end label.
Me@8:  */
Me@8: void
Me@8: VMS__shutdown()
Me@8:  { int coreIdx;
Me@8:  
Me@8:    //Create the win threads that animate the core loops
Me@8:    for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
Me@8:     {
Me@1: 
Me@8:     }
Me@12:  }
Me@12: 
Me@12: 
Me@12: 
Me@12: inline TSCount getTSCount()
Me@12:  { unsigned int low, high;
Me@12:    TSCount  out;
Me@12: 
Me@12:    saveTimeStampCountInto( low, high );
Me@12:    out = high;
Me@12:    out = (out << 32) + low;
Me@12:    return out;
Me@12:  }
Me@12: