Me@0: /*
Me@0:  * Copyright 2010  OpenSourceCodeStewardshipFoundation
Me@0:  *
Me@0:  * Licensed under BSD
Me@0:  */
Me@0: 
Me@0: #include <stdio.h>
Me@0: #include <stdlib.h>
Me@0: #include <malloc.h>
Me@0: 
Me@0: #include "VMS.h"
Me@0: #include "Queue_impl/BlockingQueue.h"
Me@0: 
Me@0: 
Me@22: //===========================================================================
Me@22: void
Me@22: shutdownFn( void *dummy, VirtProcr *dummy2 );
Me@22: 
Me@22: void
Me@22: create_sched_slots( MasterEnv *masterEnv );
Me@22: 
Me@22: //===========================================================================
Me@22: 
Me@0: /*Setup has two phases:
Me@0:  * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts
Me@8:  *    the master virt procr into the work-queue, ready for first "call"
Me@8:  * 2) Semantic layer then does its own init, which creates the seed virt
Me@8:  *    procr inside the semantic layer, ready to schedule it when
Me@0:  *    asked by the first run of the masterLoop.
Me@0:  *
Me@0:  *This part is bit weird because VMS really wants to be "always there", and
Me@0:  * have applications attach and detach..  for now, this VMS is part of
Me@0:  * the app, so the VMS system starts up as part of running the app.
Me@0:  *
Me@8:  *The semantic layer is isolated from the VMS internals by making the
Me@8:  * semantic layer do setup to a state that it's ready with its
Me@8:  * initial virt procrs, ready to schedule them to slots when the masterLoop
Me@0:  * asks.  Without this pattern, the semantic layer's setup would
Me@8:  * have to modify slots directly to assign the initial virt-procrs, and put
Me@0:  * them into the workQ itself, breaking the isolation completely.
Me@0:  *
Me@0:  * 
Me@8:  *The semantic layer creates the initial virt procr(s), and adds its
Me@8:  * own environment to masterEnv, and fills in the pointers to
Me@0:  * the requestHandler and slaveScheduler plug-in functions
Me@8:  */
Me@8: 
Me@8: /*This allocates VMS data structures, populates the master VMSProc,
Me@0:  * and master environment, and returns the master environment to the semantic
Me@0:  * layer.
Me@0:  */
Me@8: void
Me@8: VMS__init()
Me@1:  { MasterEnv  *masterEnv;
Me@12:    CASQueueStruc *workQ;
Me@1: 
Me@0:       //Make the central work-queue
Me@12:    _VMSWorkQ = makeCASQ();
Me@1:    workQ     = _VMSWorkQ;
Me@0: 
Me@1:    _VMSMasterEnv = malloc( sizeof(MasterEnv) );
Me@1:    masterEnv     = _VMSMasterEnv;
Me@0: 
Me@8:       //create the master virtual processor
Me@8:    masterEnv->masterVirtPr = VMS__create_procr( &masterLoop, masterEnv );
Me@0: 
Me@1:    create_sched_slots( masterEnv );
Me@0: 
Me@8:      //Set slot 0 to be the master virt procr & set flags just in case
Me@8:    masterEnv->schedSlots[0]->needsProcrAssigned  = FALSE;  //says don't touch
Me@8:    masterEnv->schedSlots[0]->workIsDone          = FALSE;  //says don't touch
Me@1:    masterEnv->schedSlots[0]->procrAssignedToSlot = masterEnv->masterVirtPr;
Me@22:    masterEnv->masterVirtPr->schedSlot = masterEnv->schedSlots[0];
Me@22:    
Me@1:       //First core loop to start up gets this, which will schedule seed Pr
Me@1:       //TODO: debug: check address of masterVirtPr
Me@22:    writeCASQ( masterEnv->masterVirtPr, workQ );
Me@12: 
Me@12:    numProcrsCreated = 1;
Me@0:  }
Me@0: 
Me@0: 
Me@0: void
Me@1: create_sched_slots( MasterEnv *masterEnv )
Me@8:  { SchedSlot  **schedSlots, **filledSlots;
Me@0:    int i;
Me@0: 
Me@8:    schedSlots  = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
Me@8:    filledSlots = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
Me@8:    masterEnv->schedSlots  = schedSlots;
Me@8:    masterEnv->filledSlots = filledSlots;
Me@8: 
Me@1:    for( i = 0; i < NUM_SCHED_SLOTS; i++ )
Me@0:     {
Me@8:       schedSlots[i] = malloc( sizeof(SchedSlot) );
Me@8: 
Me@1:          //Set state to mean "handling requests done, slot needs filling"
Me@8:       schedSlots[i]->workIsDone         = FALSE;
Me@8:       schedSlots[i]->needsProcrAssigned = TRUE;
Me@0:     }
Me@0:  }
Me@0: 
Me@8: 
Me@0: /*Semantic layer calls this when it want the system to start running..
Me@0:  *
Me@0:  *This creates the core loops, pins them to physical cores, gives them the
Me@0:  * pointer to the workQ, and starts them running.
Me@0:  */
Me@12: void
Me@0: VMS__start()
Me@12:  { int coreIdx;
Me@0: 
Me@14:    //TODO: Save "orig" stack pointer and frame ptr -- restore in VMS__end()
Me@8:    //Create the win threads that animate the core loops
Me@8:    for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
Me@8:     {
Me@12:       coreLoopThdParams[coreIdx] = (ThdParams *)malloc( sizeof(ThdParams) );
Me@12:       coreLoopThdParams[coreIdx]->coreNum = coreIdx;
Me@0: 
Me@12:       coreLoopThdHandles[coreIdx] =
Me@12:           CreateThread ( NULL, // Security attributes
Me@12:                          0, // Stack size
Me@12:                          coreLoop,
Me@12:                          coreLoopThdParams[coreIdx],
Me@12:                          CREATE_SUSPENDED,
Me@12:                          &(coreLoopThdIds[coreIdx])
Me@12:                         );
Me@12:       ResumeThread( coreLoopThdHandles[coreIdx] ); //starts thread
Me@8:     }
Me@8:  }
Me@0: 
Me@0: 
Me@0: 
Me@8: /*Create stack, then create __cdecl structure on it and put initialData and
Me@8:  * pointer to the new structure instance into the parameter positions on
Me@8:  * the stack
Me@8:  *Then put function pointer into nextInstrPt -- the stack is setup in std
Me@8:  * call structure, so jumping to function ptr is same as a GCC generated
Me@8:  * function call
Me@8:  *No need to save registers on old stack frame, because there's no old
Me@8:  * animator state to return to --
Me@8:  *
Me@8:  */
Me@8: VirtProcr *
Me@8: VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData )
Me@8:  { VirtProcr *newPr;
Me@8:    char      *stackLocs, *stackPtr;
Me@8: 
Me@8:    newPr              = malloc( sizeof(VirtProcr) );
Me@12:    newPr->procrID     = numProcrsCreated++;
Me@8:    newPr->nextInstrPt = fnPtr;
Me@8:    newPr->initialData = initialData;
Me@8: 
Me@14:       //fnPtr takes two params -- void *initData & void *animProcr
Me@8:       //alloc stack locations, make stackPtr be the highest addr minus room
Me@14:       // for 2 params + return addr.  Return addr (NULL) is in loc pointed to
Me@14:       // by stackPtr, initData at stackPtr + 4 bytes, animatingPr just above
Me@22:    stackLocs = malloc( VIRT_PROCR_STACK_SIZE );
Me@22:    newPr->startOfStack = stackLocs;
Me@22:    stackPtr = ( (char *)stackLocs + VIRT_PROCR_STACK_SIZE - 0x10 );
Me@8:       //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp
Me@22:    *( (int *)stackPtr + 2 ) = (int) newPr; //rightmost param -- 32bit pointer
Me@14:    *( (int *)stackPtr + 1 ) = (int) initialData;  //next  param to left
Me@8:    newPr->stackPtr = stackPtr; //core loop will switch to this, then
Me@8:    newPr->framePtr = stackPtr; //suspend loop will save new stack & frame ptr
Me@8: 
Me@8:    return newPr;
Me@8:  }
Me@8: 
Me@8: 
Me@0:  /*there is a label inside this function -- save the addr of this label in
Me@0:  * the callingPr struc, as the pick-up point from which to start the next
Me@0:  * work-unit for that procr.  If turns out have to save registers, then
Me@0:  * save them in the procr struc too.  Then do assembly jump to the CoreLoop's
Me@0:  * "done with work-unit" label.  The procr struc is in the request in the
Me@0:  * slave that animated the just-ended work-unit, so all the state is saved
Me@0:  * there, and will get passed along, inside the request handler, to the
Me@0:  * next work-unit for that procr.
Me@0:  */
Me@8: void
Me@22: VMS__suspend_procr( VirtProcr *callingPr )
Me@14:  { void *jmpPt, *stackPtrAddr, *framePtrAddr, *coreLoopStackPtr;
Me@14:    void *coreLoopFramePtr;
Me@0: 
Me@14:       //The request to master will cause this suspended virt procr to get
Me@14:       // scheduled again at some future point -- to resume, core loop jumps
Me@14:       // to the resume point (below), which causes restore of saved regs and
Me@14:       // "return" from this call.
Me@1:    callingPr->nextInstrPt = &&ResumePt;
Me@1: 
Me@1:       //return ownership of the virt procr and sched slot to Master virt pr
Me@1:    callingPr->schedSlot->workIsDone = TRUE;
Me@14: //   coreIdx = callingPr->coreAnimatedBy;
Me@1: 
Me@18:    stackPtrAddr      = &(callingPr->stackPtr);
Me@18:    framePtrAddr      = &(callingPr->framePtr);
Me@14:    
Me@14:    jmpPt             = callingPr->coreLoopStartPt;
Me@14:    coreLoopFramePtr  = callingPr->coreLoopFramePtr;//need this only
Me@18:    coreLoopStackPtr  = callingPr->coreLoopStackPtr;//shouldn't need -- safety
Me@1: 
Me@14:       //Save the virt procr's stack and frame ptrs, restore coreloop's frame
Me@14:       // ptr, then jump back to "start" of core loop
Me@22:       //Note, GCC compiles to assembly that saves esp and ebp in the stack
Me@22:       // frame -- so have to explicitly do assembly that saves to memory
Me@18:    asm volatile("movl %0,     %%eax;  \
Me@18:                  movl %%esp, (%%eax); \
Me@18:                  movl %1,     %%eax;  \
Me@18:                  movl %%ebp, (%%eax); \
Me@18:                  movl %2, %%eax;      \
Me@18:                  movl %3, %%esp;      \
Me@18:                  movl %4, %%ebp;      \
Me@18:                  jmp  %%eax    "      \
Me@18:    /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr) \
Me@18:    /* inputs  */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\
Me@18:    /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi"  \
Me@12:                 ); //list everything as clobbered to force GCC to save all
Me@12:                    // live vars that are in regs on stack before this
Me@12:                    // assembly, so that stack pointer is correct, before jmp
Me@1: 
Me@1: ResumePt:
Me@0:    return;
Me@0:  }
Me@0: 
Me@22: 
Me@22: 
Me@22: /*This is equivalent to "jump back to core loop" -- it's mainly only used
Me@22:  * just after adding dissipate request to a processor -- so the semantic
Me@22:  * layer is the only place it will be seen and/or used.
Me@22:  *
Me@22:  *It does almost the same thing as suspend, except don't need to save the
Me@22:  * stack nor set the nextInstrPt
Me@22:  *
Me@22:  *As of June 30, 2010  just implementing as a call to suspend -- just sugar
Me@22:  */
Me@8: void
Me@22: VMS__return_from_fn( VirtProcr *animatingPr )
Me@8:  {
Me@22:    VMS__suspend_procr( animatingPr );
Me@1:  }
Me@1: 
Me@22: 
Me@22: /*Not sure yet the form going to put "dissipate" in, so this is the third
Me@22:  * possibility -- the semantic layer can just make a macro that looks like
Me@22:  * a call to its name, then expands to a call to this.
Me@8:  *
Me@22:  *As of June 30, 2010  this looks like the top choice..
Me@8:  *
Me@22:  *This adds a request to dissipate, then suspends the processor so that the
Me@22:  * request handler will receive the request.  The request handler is what
Me@22:  * does the work of freeing memory and removing the processor from the
Me@22:  * semantic environment's data structures.
Me@22:  *The request handler also is what figures out when to shutdown the VMS
Me@22:  * system -- which causes all the core loop threads to die, and returns from
Me@22:  * the call that started up VMS to perform the work.
Me@22:  *
Me@22:  *This form is a bit misleading to understand if one is trying to figure out
Me@22:  * how VMS works -- it looks like a normal function call, but inside it
Me@22:  * sends a request to the request handler and suspends the processor, which
Me@22:  * jumps out of the VMS__dissipate_procr function, and out of all nestings
Me@22:  * above it, transferring the work of dissipating to the request handler,
Me@22:  * which then does the actual work -- causing the processor that animated
Me@22:  * the call of this function to disappear and the "hanging" state of this
Me@22:  * function to just poof into thin air -- the virtual processor's trace
Me@22:  * never returns from this call, but instead the virtual processor's trace
Me@22:  * gets suspended in this call and all the virt processor's state disap-
Me@22:  * pears -- making that suspend the last thing in the virt procr's trace.
Me@8:  */
Me@8: void
Me@22: VMS__dissipate_procr( VirtProcr *procrToDissipate )
Me@22:  { VMSReqst *req;
Me@22: 
Me@22:    req = malloc( sizeof(VMSReqst) );
Me@22: //   req->virtProcrFrom      = callingPr;
Me@22:    req->reqType               = dissipate;
Me@22:    req->nextReqst             = procrToDissipate->requests;
Me@22:    procrToDissipate->requests = req;
Me@22:    
Me@22:    VMS__suspend_procr( procrToDissipate );
Me@22: }
Me@22: 
Me@22: 
Me@22: /*This inserts the semantic-layer's request data into standard VMS carrier
Me@22:  */
Me@22: inline void
Me@22: VMS__send_sem_request( void *semReqData, VirtProcr *callingPr )
Me@22:  { VMSReqst *req;
Me@22: 
Me@22:    req = malloc( sizeof(VMSReqst) );
Me@22: //   req->virtProcrFrom      = callingPr;
Me@22:    req->reqType        = semantic;
Me@22:    req->semReqData     = semReqData;
Me@22:    req->nextReqst      = callingPr->requests;
Me@22:    callingPr->requests = req;
Me@22:  }
Me@22: 
Me@22: 
Me@22: /*This creates a request of type "dissipate" -- which will cause the virt
Me@22:  * processor's state and owned locations to be freed
Me@22:  */
Me@22: inline void
Me@22: VMS__send_dissipate_request( VirtProcr *procrToDissipate )
Me@22:  { VMSReqst *req;
Me@22: 
Me@22:    req = malloc( sizeof(VMSReqst) );
Me@22: //   req->virtProcrFrom      = callingPr;
Me@22:    req->reqType               = dissipate;
Me@22:    req->nextReqst             = procrToDissipate->requests;
Me@22:    procrToDissipate->requests = req;
Me@22:  }
Me@22: 
Me@22: 
Me@22: //TODO: add a semantic-layer supplied "freer" for the semantic-data portion
Me@22: // of a request -- IE call with both a virt procr and a fn-ptr to request
Me@22: // freer (or maybe put request freer as a field in virt procr?)
Me@22: void
Me@22: VMS__remove_and_free_top_request( VirtProcr *procrWithReq )
Me@22:  { VMSReqst *req;
Me@22: 
Me@22:    req = procrWithReq->requests;
Me@22:    procrWithReq->requests = procrWithReq->requests->nextReqst;
Me@22:    free( req );
Me@22:  }
Me@22: 
Me@22: /*This must be called by the request handler plugin -- it cannot be called
Me@22:  * from the semantic library "dissipate processor" function -- instead, the
Me@22:  * semantic layer has to generate a request for the plug-in to call this
Me@22:  * function.
Me@22:  *The reason is that this frees the virtual processor's stack -- which is
Me@22:  * still in use inside semantic library calls!
Me@22:  *
Me@22:  *This frees or recycles all the state owned by and comprising the animating
Me@22:  * virtual procr.  It frees any state that was malloc'd by the VMS system
Me@22:  * itself, and asks the VMS system to dis-own any VMS__malloc'd locations.
Me@22:  *If the dissipated processor is the sole (remaining) owner of VMS__malloc'd
Me@22:  * state, then that state gets freed (or sent to recycling) as a side-effect
Me@22:  * of dis-owning it.
Me@22:  */
Me@22: void
Me@22: VMS__free_procr_locs( VirtProcr *animatingPr )
Me@22:  {
Me@22:       //dis-own all locations owned by this processor, causing to be freed
Me@22:       // any locations that it is (was) sole owner of
Me@22:    //TODO: implement VMS__malloc system, including "give up ownership"
Me@22: 
Me@22:    VMS__remove_and_free_top_request( animatingPr );
Me@22:    free( animatingPr->startOfStack );
Me@22:    
Me@22:       //NOTE: animatingPr->semanticData should either have been allocated
Me@22:       // with VMS__malloc, or else freed in the request handler plug-in.
Me@22:       //NOTE: initialData was given to the processor, so should either have
Me@22:       // been alloc'd with VMS__malloc, or freed by the level above animPr.
Me@22:       //So, all that's left to free here is the VirtProcr struc itself
Me@22:    free( animatingPr );
Me@22:  }
Me@22: 
Me@22: 
Me@22: /*The semantic layer figures out when the work is done ( perhaps by a call
Me@22:  * in the application to "work all done", or perhaps all the virtual
Me@22:  * processors have dissipated.. a.s.o. )
Me@22:  *
Me@22:  *The semantic layer is responsible for making sure all work has fully
Me@22:  * completed before using this to shutdown the VMS system.
Me@22:  *
Me@22:  *After the semantic layer has determined it wants to shut down, the
Me@22:  * next time the Master Loop calls the scheduler plug-in, the scheduler
Me@22:  * then calls this function and returns the virtual processor it gets back.
Me@22:  *
Me@22:  *When the shut-down processor runs, it first frees all locations malloc'd to
Me@22:  * the VMS system (that wasn't
Me@22:  * specified as return-locations).  Then it creates one core-loop shut-down
Me@22:  * processor for each core loop and puts them all into the workQ.  When a
Me@22:  * core loop animates a core loop shut-down processor, it causes exit-thread
Me@22:  * to run, and when all core loop threads have exited, then the "wait for
Me@22:  * work to finish" in the main thread is woken, and the function-call that
Me@22:  * started all the work returns.
Me@22:  *
Me@22:  *The function animated by this processor performs the shut-down work.
Me@22:  */
Me@22: VirtProcr *
Me@22: VMS__create_the_shutdown_procr()
Me@22:  {
Me@22:    return VMS__create_procr( &shutdownFn, NULL );
Me@22:  }
Me@22: 
Me@22: 
Me@22: /*This is the function run by the special "shut-down" processor
Me@22:  * 
Me@22:  *The _VMSMasterEnv is needed by this shut down function, so the "wait"
Me@22:  * function run in the main loop has to free it, and the thread-related
Me@22:  * locations (coreLoopThdParams a.s.o.).
Me@22:  *However, the semantic environment and all data malloc'd to VMS can be
Me@22:  * freed here.
Me@22:  *
Me@22:  *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the
Me@22:  * locations it needs -- they will be automatically freed by the standard
Me@22:  * "free all owned locations"
Me@22:  *
Me@22:  *Free any locations malloc'd to the VMS system (that weren't
Me@22:  * specified as return-locations).
Me@22:  *Then create one core-loop shut-down processor for each core loop and puts
Me@22:  * them all into the workQ.
Me@22:  */
Me@22: void
Me@22: shutdownFn( void *dummy, VirtProcr *animatingPr )
Me@8:  { int coreIdx;
Me@14:    VirtProcr *shutDownPr;
Me@22:    CASQueueStruc *workQ = _VMSWorkQ;
Me@22: 
Me@22:       //free all the locations owned within the VMS system
Me@22:    //TODO: write VMS__malloc and free.. -- take the DKU malloc as starting pt
Me@22: 
Me@22:       //make the core loop shut-down processors and put them into the workQ
Me@8:    for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
Me@8:     {
Me@14:       shutDownPr = VMS__create_procr( NULL, NULL );
Me@14:       shutDownPr->nextInstrPt = _VMSMasterEnv->coreLoopShutDownPt;
Me@22:       writeCASQ( shutDownPr, workQ );
Me@8:     }
Me@22: 
Me@22:       //This is an issue: the animating processor of this function may not
Me@22:       // get its request handled before all the cores have shutdown.
Me@22:       //TODO: after all the threads stop, clean out the MasterEnv, the
Me@22:       // SemanticEnv, and the workQ before returning.
Me@22:    VMS__send_dissipate_request( animatingPr );
Me@22:    VMS__suspend_procr( animatingPr );  //will never come back from this
Me@12:  }
Me@12: 
Me@12: 
Me@12: 
Me@12: inline TSCount getTSCount()
Me@12:  { unsigned int low, high;
Me@12:    TSCount  out;
Me@12: 
Me@12:    saveTimeStampCountInto( low, high );
Me@12:    out = high;
Me@12:    out = (out << 32) + low;
Me@12:    return out;
Me@12:  }
Me@12: