# HG changeset patch
# User Me@portablequad
# Date 1315107711 25200
# Node ID d4c881c7f03af0b8666a7a3ce1f92de6f27fa6d0
# Parent  ac11b50220bd1e82394377612a316a247ecc4d9b
Added fn to send inter-master requests, and cleaned up code

diff -r ac11b50220bd -r d4c881c7f03a CoreLoop.c
--- a/CoreLoop.c	Tue Aug 30 21:55:04 2011 -0700
+++ b/CoreLoop.c	Sat Sep 03 20:41:51 2011 -0700
@@ -91,8 +91,8 @@
    readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
 
    #ifdef USE_WORK_STEALING
-      //Alg for work-stealing designed to make common case fast.  Comment
-      // in stealer code explains.
+      //protect access to readyToAnimateQ -- other cores also want access!
+      //Alg makes common case fast.  Comment in stealer code explains.
    gate.preGateProgress++;
    if( gate.gateClosed )
     {    //now, set coreloop's progress, so stealer can see that core loop
@@ -103,7 +103,7 @@
 
    currPr = (VirtProcr *) readVMSQ( readyToAnimateQ );
 
-      //Set the coreloop's progress, so stealer can see it has made it out
+      //Set coreloop's progress, so stealer can see this has made it out
       // of the protected area
    gate.exitProgress = gate.preGateProgress;
    #else
@@ -112,7 +112,8 @@
 
    if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
    else
-    {
+    { //no more Slaves, get master lock and switch to master Pr
+	
       //============================= MEASUREMENT STUFF =====================
       #ifdef MEAS__TIME_MASTER_LOCK
       int32 startStamp, endStamp;
@@ -120,17 +121,17 @@
       #endif
       //=====================================================================
       int tries = 0; int gotLock = 0;
-      while( currPr == NULL ) //if queue was empty, enter get masterLock loop
-       {    //queue was empty, so get master lock
+      while( currPr == NULL ) 
+       {    //didn't get lock, so keep trying
 
          gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock),
                                                           UNLOCKED, LOCKED );
          if( gotLock )
-          {    //run own MasterVP -- jmps to coreLoops startPt when done
+          {    //run own MasterVP
             currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
             if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
              {       DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n");
-               pthread_yield();
+               pthread_yield();   //this core has no slaves to schedule..
              }
             _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
             break;  //end while -- have a VP to animate now
@@ -152,7 +153,7 @@
     }
 
    
-   switchToVP(currPr); //The VPs return in here
+   switchToVP(currPr); //The VPs all return back to here
    flushRegisters();
    }//CoreLoop      
  }
@@ -160,7 +161,7 @@
 
 void *
 terminateCoreLoop(VirtProcr *currPr){
-   //first free shutdown VP that jumped here -- it first restores the
+   //first free the shutdown VP that jumped here -- it first restores the
    // coreloop's stack, so addr of currPr in stack frame is still correct
    VMS__dissipate_procr( currPr );
    pthread_exit( NULL );
diff -r ac11b50220bd -r d4c881c7f03a MasterLoop.c
--- a/MasterLoop.c	Tue Aug 30 21:55:04 2011 -0700
+++ b/MasterLoop.c	Sat Sep 03 20:41:51 2011 -0700
@@ -49,9 +49,7 @@
  *So VMS__init just births the master virtual processor same way it births
  * all the others -- then does any extra setup needed and puts it into the
  * work queue.
- *However means have to make masterEnv a global static volatile the same way
- * did with readyToAnimateQ in core loop.  -- for performance, put the
- * jump to the core loop directly in here, and have it directly jump back.
+ *However means have to make masterEnv a global static volatile.
  *
  *
  *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this
@@ -60,7 +58,7 @@
  *
  *So, this function is coupled to each of the MasterVPs, -- meaning this
  * function can't rely on a particular stack and frame -- each MasterVP that
- * animates this function has a different one.
+ * animates this function has a different stack.
  *
  *At this point, the masterLoop does not write itself into the queue anymore,
  * instead, the coreLoop acquires the masterLock when it has nothing to
@@ -96,24 +94,17 @@
       //So, setup values about stack ptr, jmp pt and all that
    //masterPr->nextInstrPt = &&masterLoopStartPt;
 
-
-      //Note, got rid of writing the stack and frame ptr up here, because
-      // only one
-      // core can ever animate a given MasterVP, so don't need to communicate
-      // new frame and stack ptr to the MasterVP storage before a second
-      // version of that MasterVP can get animated on a different core.
-      //Also got rid of the busy-wait.
-
-   
-   //masterLoopStartPt:
-   //The animating materVP suspends at end of this loop, then later resumes and
-   // comes back here 
-   while(1){
+      //Sept 2011
+      //Old code jumped directly to this point, but doesn't work on x64
+	  // So, just make this an endless loop, and do assembly function at end
+	  // that saves its own return addr, then jumps to core_loop.
+   while(1)
+   {
        
    //============================= MEASUREMENT STUFF ========================
    #ifdef MEAS__TIME_MASTER
       //Total Master time includes one coreloop time -- just assume the core
-      // loop time is same for Master as for AppVPs, even though it may be
+      // loop time is same for Master as is for AppVPs, even though it may be
       // smaller due to higher predictability of the fixed jmp.
    saveLowTimeStampCountInto( masterPr->startMasterTSCLow );
    #endif
@@ -122,7 +113,7 @@
    masterEnv        = (MasterEnv*)_VMSMasterEnv;
    
       //GCC may optimize so doesn't always re-define from frame-storage
-   masterPr         = (VirtProcr*)volatileMasterPr; //on stack, to be sure after jmp
+   masterPr         = (VirtProcr*)volatileMasterPr; //on stack, reload after jmp
    thisCoresIdx     = masterPr->coreAnimatedBy;
    readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
    schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
@@ -132,15 +123,15 @@
    semanticEnv      = masterEnv->semanticEnv;
 
       //First, check for requests from other MasterVPs, and handle them
-   if( masterEnv->requestsWaitingFor[thisCoresIdx] )
-    { masterReqQ = masterEnv->masterReqQs[thisCoresIdx];
-	  while( currReq = readVMSQ(masterReqQ) )
-       { handleMasterReq( currReq, semanticEnv, masterPr );
-       }	   
-	}
+   if( currReq = masterEnv->interMasterRequestsFor[thisCoresIdx] )
+    { do
+       { handleInterMasterReq( currReq, semanticEnv, masterPr );
+       }
+      while( currReq = currReq->nextReqst );
+    }
       //Now, take care of the SlaveVPs
-	  //Go through the slots -- if Slave there newly suspended, handle its request
-	  // then, either way, ask assigner to fill each slot
+      //Go through the slots -- if Slave there newly suspended, handle its request
+      // then, either way, ask assigner to fill each slot
    numSlotsFilled = 0;
    for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
     {
@@ -208,14 +199,15 @@
  * master_loop, others are handed off to the plugin.
  */
 void inline
-handleMasterReq( MasterReq *currReq, void *_semEnv, VirtProcr *masterPr )
- {
-   switch( currReq->reqType )
-    { case interVMSReq:
-            handleInterVMSReq(     (InterVMSCoreReq *)currReq, masterPr);
+handleInterMasterReq( InterMasterReqst *currReq, void *_semEnv,
+                                                    VirtProcr *masterPr )
+ { switch( currReq->reqType )
+    { case destVMSCore:
+            handleInterVMSCoreReq( (InterVMSCoreReqst *)currReq, masterPr);
          break;
-      case interPluginReq:
-	       (*interPluginReqHdlr)(  (InterPluginReq  *)currReq, _semEnv );
+      case destPlugin:
+	       (*interPluginReqHdlr)( ((InterPluginReqst  *)currReq)->pluginReq,
+                                                                _semEnv );
          break;
       default:
          break;
@@ -223,15 +215,16 @@
  }
 
 void inline
-handleInterVMSReq( InterVMSCoreReq *currReq, VirtProcr *masterPr )
+handleInterVMSReq( InterVMSCoreReqst *currReq, VirtProcr *masterPr )
  { 
    switch( currReq->reqType )
     {
-      case transfer_free:        handleTransferFree( currReq, masterPr );
+      case transfer_free_ptr:    handleTransferFree( currReq, masterPr );
          break;
     }
  }
  
+ 
 
 /*Work Stealing Alg -- racy one
  *This algorithm has a race condition -- the coreloops are accessing their
diff -r ac11b50220bd -r d4c881c7f03a VMS.c
--- a/VMS.c	Tue Aug 30 21:55:04 2011 -0700
+++ b/VMS.c	Sat Sep 03 20:41:51 2011 -0700
@@ -497,6 +497,19 @@
    VMS__suspend_procr( callingPr );
  }
 
+void inline
+VMS__send_inter_plugin_req( void *reqData, int32 targetMaster, 
+                                            VirtProcr *requestingMaster )
+ { _VMSMasterEnv->interMasterRequestsFor[targetMaster] = 
+                                            (InterMasterReqst *) reqData;
+ }
+
+void inline
+VMS__send_inter_VMSCore_req( InterVMSCoreReqst *reqData,
+                        int32 targetMaster, VirtProcr *requestingMaster )
+ { _VMSMasterEnv->interMasterRequestsFor[targetMaster] = 
+                                            (InterMasterReqst *) reqData;
+ }
 
 /*
  */
diff -r ac11b50220bd -r d4c881c7f03a VMS.h
--- a/VMS.h	Tue Aug 30 21:55:04 2011 -0700
+++ b/VMS.h	Sat Sep 03 20:41:51 2011 -0700
@@ -113,7 +113,7 @@
 
 typedef struct _SchedSlot     SchedSlot;
 typedef struct _VMSReqst      VMSReqst;
-typedef struct _VirtProcr     VirtProcr;
+typedef struct _InterMasterReqst InterMasterReqst;
 typedef struct _IntervalProbe IntervalProbe;
 typedef struct _GateStruc     GateStruc;
 
@@ -128,9 +128,12 @@
 //============= Requests ===========
 //
 
-enum VMSReqstType   //avoid starting enums at 0, for debug reasons
- {
-   semantic = 1,
+//VMS Request is the carrier for Slave to Master requests
+// it has an embedded sub-type request that is pulled out
+// inside the plugin's request handler
+enum VMSReqstType   //For Slave->Master requests
+ { 
+   semantic = 1,    //avoid starting enums at 0, for debug reasons
    createReq,
    dissipate,
    VMSSemantic      //goes with VMSSemReqst below
@@ -145,6 +148,9 @@
  };
 //VMSReqst
 
+//This is a sub-type of Slave->Master requests.
+// It's for Slaves to invoke built-in VMS-core functions that have language-like
+// behavior.
 enum VMSSemReqstType   //These are equivalent to semantic requests, but for
  {                     // VMS's services available directly to app, like OS
    createProbe = 1,    // and probe services -- like a VMS-wide built-in lang
@@ -157,7 +163,53 @@
    VirtProcr           *requestingPr;
    char                *nameStr;  //for create probe
  }
- VMSSemReq;
+VMSSemReq;
+
+//These are for Master to Master requests
+// They get re-cast to the appropriate sub-type of request
+enum InterMasterReqstType    //For Master->Master
+ {
+   destVMSCore = 1,          //avoid starting enums at 0, for debug reasons
+   destPlugin
+ };
+
+struct _InterMasterReqst //Doing a trick to save space & time -- allocate
+ {  // space for a sub-type then cast first as InterMaster then as sub-type
+   enum InterMasterReqstType  reqType;
+   InterMasterReqst *nextReqst;
+ };
+//InterMasterReqst  (defined above in typedef block)
+
+
+//These are a sub-type of InterMaster requests.  The inter-master req gets
+// re-cast to be of this type, after checking
+//This ones for requests between internals of VMS-core.. such as malloc
+enum InterVMSCoreReqType   
+ {
+   transfer_free_ptr = 1     //avoid starting enums at 0, for debug reasons
+ };
+
+typedef struct  //Doing a trick to save space & time -- allocate space
+ {              // for this, cast first as InterMaster then as this
+   enum InterMasterReqstType  reqType;  //duplicate InterMasterReqst at top
+   InterMasterReqst *nextReqst;
+   
+   enum InterVMSCoreReqType  secondReqType;
+   void                     *freePtr;  //pile up fields, add as needed
+ }
+InterVMSCoreReqst;
+
+
+//This is for requests between plugins on different cores
+// Here, after casting, the pluginReq is extracted and handed to plugin
+typedef struct  //Doing a trick to save space & time -- allocate space
+ {              // for this, cast first as InterMaster then as this
+   enum InterMasterReqstType  reqType;  //copy InterMasterReqst at top
+   InterMasterReqst          *nextReqst;
+   
+   void                      *pluginReq; //plugin will cast to approp type
+ }
+InterPluginReqst;
 
 
 //====================  Core data structures  ===================
@@ -209,9 +261,8 @@
 //VirtProcr
 
 
-/*WARNING: re-arranging this data structure could cause VP-switching
- *         assembly code to fail -- hard-codes offsets of fields
- *         (because -O3 messes with things otherwise)
+/*Master Env is the only global variable -- has entry points for any other
+ * data needed.  
  */
 typedef struct
  {
@@ -368,6 +419,14 @@
 inline void
 VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr );
 
+
+void inline
+VMS__send_inter_plugin_req( void *reqData, int32 targetMaster, 
+                                            VirtProcr *requestingMaster );
+void inline
+VMS__send_inter_VMSCore_req( InterVMSCoreReqst *reqData, int32 targetMaster,
+                                           VirtProcr *requestingMaster );
+
 VMSReqst *
 VMS__take_next_request_out_of( VirtProcr *procrWithReq );
 
diff -r ac11b50220bd -r d4c881c7f03a inter_VMS_request_handlers.h
--- a/inter_VMS_request_handlers.h	Tue Aug 30 21:55:04 2011 -0700
+++ b/inter_VMS_request_handlers.h	Sat Sep 03 20:41:51 2011 -0700
@@ -16,7 +16,7 @@
  */
 
 inline void
-handleMakeMutex(  VPThdSemReq *semReq, VPThdSemEnv *semEnv);
+handleTransferFree( MasterReq *masterReq, VirtProcr *masterPr );
 
 
 #endif	/* _MASTER_REQ_H */