changeset 139:99798e4438a6 DistributedMalloc2

Merge of Malloc2 and inter master requests
author Merten Sach <msach@mailbox.tu-berlin.de>
date Mon, 19 Sep 2011 16:12:01 +0200
parents 90cbb7b803ee 99343ffe1918
children 2c8f3cf6c058
files VMS.c VMS.h inter_VMS_requests.c inter_VMS_requests.h vmalloc.c vmalloc.h
diffstat 17 files changed, 617 insertions(+), 245 deletions(-) [+]
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/.hgignore	Mon Sep 19 16:12:01 2011 +0200
     1.3 @@ -0,0 +1,3 @@
     1.4 +syntax: glob
     1.5 +
     1.6 +*.o
     2.1 --- a/CoreLoop.c	Wed Sep 07 19:36:46 2011 +0200
     2.2 +++ b/CoreLoop.c	Mon Sep 19 16:12:01 2011 +0200
     2.3 @@ -70,7 +70,6 @@
     2.4        //Designate a core by a 1 in bit-position corresponding to the core
     2.5     CPU_ZERO(&coreMask);
     2.6     CPU_SET(coreLoopThdParams->coreNum,&coreMask);
     2.7 -   //coreMask = 1L << coreLoopThdParams->coreNum;
     2.8  
     2.9     pthread_t selfThd = pthread_self();
    2.10     errorCode =
    2.11 @@ -91,8 +90,8 @@
    2.12     readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
    2.13  
    2.14     #ifdef USE_WORK_STEALING
    2.15 -      //Alg for work-stealing designed to make common case fast.  Comment
    2.16 -      // in stealer code explains.
    2.17 +      //protect access to readyToAnimateQ -- other cores also want access!
    2.18 +      //Alg makes common case fast.  Comment in stealer code explains.
    2.19     gate.preGateProgress++;
    2.20     if( gate.gateClosed )
    2.21      {    //now, set coreloop's progress, so stealer can see that core loop
    2.22 @@ -103,7 +102,7 @@
    2.23  
    2.24     currPr = (VirtProcr *) readVMSQ( readyToAnimateQ );
    2.25  
    2.26 -      //Set the coreloop's progress, so stealer can see it has made it out
    2.27 +      //Set coreloop's progress, so stealer can see this has made it out
    2.28        // of the protected area
    2.29     gate.exitProgress = gate.preGateProgress;
    2.30     #else
    2.31 @@ -112,7 +111,8 @@
    2.32  
    2.33     if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
    2.34     else
    2.35 -    {
    2.36 +    { //no more Slaves, get master lock and switch to master Pr
    2.37 +	
    2.38        //============================= MEASUREMENT STUFF =====================
    2.39        #ifdef MEAS__TIME_MASTER_LOCK
    2.40        int32 startStamp, endStamp;
    2.41 @@ -120,17 +120,17 @@
    2.42        #endif
    2.43        //=====================================================================
    2.44        int tries = 0; int gotLock = 0;
    2.45 -      while( currPr == NULL ) //if queue was empty, enter get masterLock loop
    2.46 -       {    //queue was empty, so get master lock
    2.47 +      while( currPr == NULL ) 
    2.48 +       {    //didn't get lock, so keep trying
    2.49  
    2.50           gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock),
    2.51                                                            UNLOCKED, LOCKED );
    2.52           if( gotLock )
    2.53 -          {    //run own MasterVP -- jmps to coreLoops startPt when done
    2.54 +          {    //run own MasterVP
    2.55              currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
    2.56              if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
    2.57               {       DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n");
    2.58 -               pthread_yield();
    2.59 +               pthread_yield();   //this core has no slaves to schedule..
    2.60               }
    2.61              _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
    2.62              break;  //end while -- have a VP to animate now
    2.63 @@ -152,7 +152,7 @@
    2.64      }
    2.65  
    2.66     
    2.67 -   switchToVP(currPr); //The VPs return in here
    2.68 +   switchToVP(currPr); //The VPs all return back to here
    2.69     flushRegisters();
    2.70     }//CoreLoop      
    2.71   }
    2.72 @@ -160,16 +160,11 @@
    2.73  
    2.74  void *
    2.75  terminateCoreLoop(VirtProcr *currPr){
    2.76 -   //first free shutdown VP that jumped here -- it first restores the
    2.77 -   // coreloop's stack, so addr of currPr in stack frame is still correct
    2.78 -   VMS__dissipate_procr( currPr );
    2.79 -   pthread_exit( NULL );
    2.80 +    pthread_exit( NULL );
    2.81  }
    2.82  
    2.83  
    2.84  
    2.85 -#ifdef SEQUENTIAL
    2.86 -
    2.87  //===========================================================================
    2.88  /*This sequential version is exact same as threaded, except doesn't do the
    2.89   * pin-threads part, nor the wait until setup complete part.
    2.90 @@ -188,7 +183,7 @@
    2.91     thisCoresIdx = 0;
    2.92  
    2.93     //Save the return address in the SwitchVP function
    2.94 -   saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt));
    2.95 +   saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt));
    2.96  
    2.97     
    2.98     while(1){
    2.99 @@ -212,4 +207,3 @@
   2.100     flushRegisters();
   2.101     }
   2.102   }
   2.103 -#endif
     3.1 --- a/MasterLoop.c	Wed Sep 07 19:36:46 2011 +0200
     3.2 +++ b/MasterLoop.c	Mon Sep 19 16:12:01 2011 +0200
     3.3 @@ -11,12 +11,21 @@
     3.4  
     3.5  #include "VMS.h"
     3.6  #include "ProcrContext.h"
     3.7 -
     3.8 +#include "scheduling.h"
     3.9 +#include "inter_VMS_requests.h"
    3.10 +#include "inter_VMS_requests_handler.h"
    3.11  
    3.12  //===========================================================================
    3.13  void inline
    3.14  stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
    3.15 -               VirtProcr *masterPr );
    3.16 +               VirtProcr *masterPr);
    3.17 +
    3.18 +void inline
    3.19 +handleInterMasterReq( InterMasterReqst *currReq, void *_semEnv,
    3.20 +                                                    VirtProcr *masterPr);
    3.21 +
    3.22 +void inline
    3.23 +handleInterVMSCoreReq( InterVMSCoreReqst *currReq, VirtProcr *masterPr);
    3.24  
    3.25  //===========================================================================
    3.26  
    3.27 @@ -49,9 +58,7 @@
    3.28   *So VMS__init just births the master virtual processor same way it births
    3.29   * all the others -- then does any extra setup needed and puts it into the
    3.30   * work queue.
    3.31 - *However means have to make masterEnv a global static volatile the same way
    3.32 - * did with readyToAnimateQ in core loop.  -- for performance, put the
    3.33 - * jump to the core loop directly in here, and have it directly jump back.
    3.34 + *However means have to make masterEnv a global static volatile.
    3.35   *
    3.36   *
    3.37   *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this
    3.38 @@ -60,7 +67,7 @@
    3.39   *
    3.40   *So, this function is coupled to each of the MasterVPs, -- meaning this
    3.41   * function can't rely on a particular stack and frame -- each MasterVP that
    3.42 - * animates this function has a different one.
    3.43 + * animates this function has a different stack.
    3.44   *
    3.45   *At this point, the masterLoop does not write itself into the queue anymore,
    3.46   * instead, the coreLoop acquires the masterLock when it has nothing to
    3.47 @@ -89,39 +96,30 @@
    3.48     
    3.49     volatileMasterPr = animatingPr;
    3.50     masterPr         = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp
    3.51 +   masterEnv        = (MasterEnv*)_VMSMasterEnv;
    3.52  
    3.53        //First animation of each MasterVP will in turn animate this part
    3.54        // of setup code.. (VP creator sets up the stack as if this function
    3.55        // was called normally, but actually get here by jmp)
    3.56 -      //So, setup values about stack ptr, jmp pt and all that
    3.57 -   //masterPr->nextInstrPt = &&masterLoopStartPt;
    3.58  
    3.59 -
    3.60 -      //Note, got rid of writing the stack and frame ptr up here, because
    3.61 -      // only one
    3.62 -      // core can ever animate a given MasterVP, so don't need to communicate
    3.63 -      // new frame and stack ptr to the MasterVP storage before a second
    3.64 -      // version of that MasterVP can get animated on a different core.
    3.65 -      //Also got rid of the busy-wait.
    3.66 -
    3.67 -   
    3.68 -   //masterLoopStartPt:
    3.69 -   while(1){
    3.70 -       
    3.71 +      //Sept 2011
    3.72 +      //Old code jumped directly to this point, but doesn't work on x64
    3.73 +	  // So, just make this an endless loop, and do assembly function at end
    3.74 +	  // that saves its own return addr, then jumps to core_loop.
    3.75 +   while(1)
    3.76 +   {       
    3.77     //============================= MEASUREMENT STUFF ========================
    3.78     #ifdef MEAS__TIME_MASTER
    3.79        //Total Master time includes one coreloop time -- just assume the core
    3.80 -      // loop time is same for Master as for AppVPs, even though it may be
    3.81 +      // loop time is same for Master as is for AppVPs, even though it may be
    3.82        // smaller due to higher predictability of the fixed jmp.
    3.83     saveLowTimeStampCountInto( masterPr->startMasterTSCLow );
    3.84     #endif
    3.85     //========================================================================
    3.86  
    3.87 -   masterEnv        = (MasterEnv*)_VMSMasterEnv;
    3.88 -   
    3.89 -      //GCC may optimize so doesn't always re-define from frame-storage
    3.90 -   masterPr         = (VirtProcr*)volatileMasterPr;  //just to make sure after jmp
    3.91 +   //GCC may optimize so doesn't always re-define from frame-storage
    3.92     thisCoresIdx     = masterPr->coreAnimatedBy;
    3.93 +   masterEnv->currentMasterProcrID = thisCoresIdx;
    3.94     readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
    3.95     schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
    3.96  
    3.97 @@ -129,8 +127,28 @@
    3.98     slaveScheduler   = masterEnv->slaveScheduler;
    3.99     semanticEnv      = masterEnv->semanticEnv;
   3.100  
   3.101 -
   3.102 -      //Poll each slot's Done flag
   3.103 +      //First, check for requests from other MasterVPs, and handle them
   3.104 +   InterMasterReqst* currReqst = masterEnv->interMasterRequestsFor[thisCoresIdx];
   3.105 +   while(currReqst)
   3.106 +   {
   3.107 +       handleInterMasterReq( currReqst, semanticEnv, masterPr );
   3.108 +       currReqst = currReqst->nextReqst;
   3.109 +   }
   3.110 +   masterEnv->interMasterRequestsFor[thisCoresIdx] = NULL;
   3.111 +   
   3.112 +   //Second, check for own request that were handled for other MasterVPs
   3.113 +   currReqst = masterEnv->interMasterRequestsSentBy[thisCoresIdx];
   3.114 +   while(currReqst && currReqst->obsolete)
   3.115 +   {
   3.116 +       InterMasterReqst *nextReqst = currReqst->nextSentReqst;
   3.117 +       VMS__free(currReqst);
   3.118 +       currReqst = nextReqst;
   3.119 +   }
   3.120 +   masterEnv->interMasterRequestsSentBy[thisCoresIdx] = currReqst;
   3.121 +   
   3.122 +      //Now, take care of the SlaveVPs
   3.123 +      //Go through the slots -- if Slave there newly suspended, handle its request
   3.124 +      // then, either way, ask assigner to fill each slot
   3.125     numSlotsFilled = 0;
   3.126     for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
   3.127      {
   3.128 @@ -167,6 +185,7 @@
   3.129           if( schedVirtPr != NULL )
   3.130            { currSlot->procrAssignedToSlot = schedVirtPr;
   3.131              schedVirtPr->schedSlot        = currSlot;
   3.132 +            schedVirtPr->coreAnimatedBy   = thisCoresIdx;
   3.133              currSlot->needsProcrAssigned  = FALSE;
   3.134              numSlotsFilled               += 1;
   3.135              
   3.136 @@ -190,15 +209,56 @@
   3.137  
   3.138     masterSwitchToCoreLoop(animatingPr);
   3.139     flushRegisters();
   3.140 -   }//MasterLoop
   3.141 -
   3.142 -
   3.143 +   }//while(1)   MasterLoop
   3.144   }
   3.145  
   3.146 +/*This is for inter-master communication.  Either the master itself or
   3.147 + * the plugin sends one of these requests.  Some are handled here, by the
   3.148 + * master_loop, others are handed off to the plugin.
   3.149 + */
   3.150 +void inline
   3.151 +handleInterMasterReq( InterMasterReqst *currReq, void *_semEnv,
   3.152 +                                                    VirtProcr *masterPr )
   3.153 + { 
   3.154 +    
   3.155 +    switch( currReq->reqType )
   3.156 +    {  
   3.157 +      case destVMSCore:
   3.158 +         handleInterVMSCoreReq( (InterVMSCoreReqst *)currReq, masterPr);
   3.159 +         break;
   3.160 +      case destPlugin:
   3.161 +         _VMSMasterEnv->interPluginReqHdlr( ((InterPluginReqst  *)currReq)->pluginReq,
   3.162 +                                                                    _semEnv );
   3.163 +          break;
   3.164 +      default:
   3.165 +         break;
   3.166 +    }
   3.167 + }
   3.168  
   3.169 +void inline
   3.170 +handleInterVMSCoreReq( InterVMSCoreReqst *currReq, VirtProcr *masterPr )
   3.171 + { 
   3.172 +   switch( currReq->secondReqType )
   3.173 +    {
   3.174 +      case transfer_free_ptr:
   3.175 +          handleTransferFree( currReq, masterPr );
   3.176 +          currReq->obsolete = 1; //now the sender can free the structure
   3.177 +          break;
   3.178 +       case shutdownVP:
   3.179 +           currReq->obsolete = 1;
   3.180 +           handleShutdown(currReq, masterPr); 
   3.181 +           //The Execution of the MasterLoop ends here
   3.182 +           break;
   3.183 +      default:
   3.184 +          break;
   3.185 +    }
   3.186 +}
   3.187  
   3.188 -/*This has a race condition -- the coreloops are accessing their own queues
   3.189 - * at the same time that this work-stealer on a different core is trying to
   3.190 +/*Work Stealing Alg -- racy one
   3.191 + *This algorithm has a race condition -- the coreloops are accessing their
   3.192 + * own queues at the same time that this work-stealer on a different core
   3.193 + * is trying to.
   3.194 + *The second stealing alg, below, protects against this.
   3.195   */
   3.196  void inline
   3.197  stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
   3.198 @@ -234,7 +294,8 @@
   3.199      }
   3.200   }
   3.201  
   3.202 -/*This algorithm makes the common case fast.  Make the coreloop passive,
   3.203 +/*Work Stealing alg -- protected one
   3.204 + *This algorithm makes the common case fast.  Make the coreloop passive,
   3.205   * and show its progress.  Make the stealer control a gate that coreloop
   3.206   * has to pass.
   3.207   *To avoid interference, only one stealer at a time.  Use a global
   3.208 @@ -360,7 +421,7 @@
   3.209     //======= End Gate-protection  =======
   3.210  
   3.211  
   3.212 -   if( stolenPr != NULL )  //victim could have been in protected and taken
   3.213 +   if( stolenPr != NULL )  //victim could have been in protected and took it
   3.214      { currSlot->procrAssignedToSlot = stolenPr;
   3.215        stolenPr->schedSlot           = currSlot;
   3.216        currSlot->needsProcrAssigned  = FALSE;
     4.1 --- a/ProcrContext.c	Wed Sep 07 19:36:46 2011 +0200
     4.2 +++ b/ProcrContext.c	Mon Sep 19 16:12:01 2011 +0200
     4.3 @@ -4,6 +4,7 @@
     4.4  
     4.5  
     4.6  #include "VMS.h"
     4.7 +#include "ProcrContext.h"
     4.8  
     4.9  /*Create stack, then create __cdecl structure on it and put initialData and
    4.10   * pointer to the new structure instance into the parameter positions on
     5.1 --- a/ProcrContext.h	Wed Sep 07 19:36:46 2011 +0200
     5.2 +++ b/ProcrContext.h	Mon Sep 19 16:12:01 2011 +0200
     5.3 @@ -5,11 +5,56 @@
     5.4   * Author: seanhalle@yahoo.com
     5.5   * 
     5.6   */
     5.7 -
     5.8  #ifndef _ProcrContext_H
     5.9  #define	_ProcrContext_H
    5.10  #define _GNU_SOURCE
    5.11  
    5.12 +typedef struct _VirtProcr VirtProcr;
    5.13 +typedef struct _VMSReqst  VMSReqst;
    5.14 +typedef void  (*VirtProcrFnPtr)  ( void *, VirtProcr * ); //initData, animPr
    5.15 +
    5.16 +
    5.17 +#include "VMS_primitive_data_types.h"
    5.18 +#include "scheduling.h"
    5.19 +
    5.20 +/*WARNING: re-arranging this data structure could cause VP switching
    5.21 + *         assembly code to fail -- hard-codes offsets of fields
    5.22 + */
    5.23 +struct _VirtProcr
    5.24 + { int         procrID;  //for debugging -- count up each time create
    5.25 +   int         coreAnimatedBy;
    5.26 +   void       *startOfStack;
    5.27 +   void       *stackPtr;
    5.28 +   void       *framePtr;
    5.29 +   void       *nextInstrPt;
    5.30 +   
    5.31 +   void       *coreLoopStartPt;  //allows proto-runtime to be linked later
    5.32 +   void       *coreLoopFramePtr; //restore before jmp back to core loop
    5.33 +   void       *coreLoopStackPtr; //restore before jmp back to core loop
    5.34 +
    5.35 +   void       *initialData;
    5.36 +   
    5.37 +   SchedSlot  *schedSlot;
    5.38 +   VMSReqst   *requests;
    5.39 +
    5.40 +   void       *semanticData;
    5.41 +   void       *dataRetFromReq; //values returned from plugin to VP go here
    5.42 +
    5.43 +      //=========== MEASUREMENT STUFF ==========
    5.44 +   #ifdef MEAS__TIME_STAMP_SUSP
    5.45 +   unsigned int preSuspTSCLow;
    5.46 +   unsigned int postSuspTSCLow;
    5.47 +   #endif
    5.48 +   #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/
    5.49 +   unsigned int startMasterTSCLow;USE_GNU
    5.50 +   unsigned int endMasterTSCLow;
    5.51 +   #endif
    5.52 +      //========================================
    5.53 +   
    5.54 +   float64      createPtInSecs;  //have space but don't use on some configs
    5.55 + };
    5.56 +//VirtProcr
    5.57 +
    5.58  void saveCoreLoopReturnAddr(void **returnAddress);
    5.59  
    5.60  void switchToVP(VirtProcr *nextProcr);
    5.61 @@ -20,10 +65,12 @@
    5.62  
    5.63  void startVirtProcrFn();
    5.64  
    5.65 -void *asmTerminateCoreLoop(VirtProcr *currPr);
    5.66 +void asmTerminateCoreLoop(VirtProcr *currPr);
    5.67 +
    5.68 +void asmTerminateCoreLoopSeq(VirtProcr *currPr);
    5.69  
    5.70  #define flushRegisters() \
    5.71 -        asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15")
    5.72 +        asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15");
    5.73  
    5.74  inline VirtProcr *
    5.75  create_procr_helper( VirtProcr *newPr,       VirtProcrFnPtr  fnPtr,
     6.1 --- a/VMS.c	Wed Sep 07 19:36:46 2011 +0200
     6.2 +++ b/VMS.c	Mon Sep 19 16:12:01 2011 +0200
     6.3 @@ -13,6 +13,7 @@
     6.4  
     6.5  #include "VMS.h"
     6.6  #include "ProcrContext.h"
     6.7 +#include "scheduling.h"
     6.8  #include "Queue_impl/BlockingQueue.h"
     6.9  #include "Histogram/Histogram.h"
    6.10  
    6.11 @@ -105,8 +106,14 @@
    6.12          //Very first thing put into the master env is the free-list, seeded
    6.13          // with a massive initial chunk of memory.
    6.14          //After this, all other mallocs are VMS__malloc.
    6.15 -   _VMSMasterEnv->freeLists        = VMS_ext__create_free_list();
    6.16 -
    6.17 +   int i;
    6.18 +   for(i=0; i<NUM_CORES; i++)
    6.19 +   {
    6.20 +       _VMSMasterEnv->freeLists[i]        = VMS_ext__create_free_list();
    6.21 +       _VMSMasterEnv->interMasterRequestsFor[i] = NULL;
    6.22 +       _VMSMasterEnv->interMasterRequestsSentBy[i] = NULL;
    6.23 +   }
    6.24 +   _VMSMasterEnv->currentMasterProcrID = 0;
    6.25  
    6.26     //============================= MEASUREMENT STUFF ========================
    6.27     #ifdef MEAS__TIME_MALLOC
    6.28 @@ -497,6 +504,19 @@
    6.29     VMS__suspend_procr( callingPr );
    6.30   }
    6.31  
    6.32 +void inline
    6.33 +VMS__send_inter_plugin_req( void *reqData, int32 targetMaster, 
    6.34 +                                            VirtProcr *requestingMaster )
    6.35 + { _VMSMasterEnv->interMasterRequestsFor[targetMaster] = 
    6.36 +                                            (InterMasterReqst *) reqData;
    6.37 + }
    6.38 +
    6.39 +void inline
    6.40 +VMS__send_inter_VMSCore_req( InterVMSCoreReqst *reqData,
    6.41 +                        int32 targetMaster, VirtProcr *requestingMaster )
    6.42 + { _VMSMasterEnv->interMasterRequestsFor[targetMaster] = 
    6.43 +                                            (InterMasterReqst *) reqData;
    6.44 + }
    6.45  
    6.46  /*
    6.47   */
    6.48 @@ -542,18 +562,27 @@
    6.49  
    6.50     semReq = req->semReqData;
    6.51  
    6.52 -   newProbe          = VMS__malloc( sizeof(IntervalProbe) );
    6.53 -   newProbe->nameStr = VMS__strDup( semReq->nameStr );
    6.54 -   newProbe->hist    = NULL;
    6.55 -   newProbe->schedChoiceWasRecorded = FALSE;
    6.56 +   switch(semReq->reqType){
    6.57 +       case createProbe:
    6.58 +           newProbe          = VMS__malloc( sizeof(IntervalProbe) );
    6.59 +           newProbe->nameStr = VMS__strDup( (char*)semReq->data );
    6.60 +           newProbe->hist    = NULL;
    6.61 +           newProbe->schedChoiceWasRecorded = FALSE;
    6.62  
    6.63 -      //This runs in masterVP, so no race-condition worries
    6.64 -   newProbe->probeID =
    6.65 -             addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo );
    6.66 -
    6.67 -   requestingPr->dataRetFromReq = newProbe;
    6.68 -
    6.69 -   (*resumePrFnPtr)( requestingPr, semEnv );
    6.70 +           //This runs in masterVP, so no race-condition worries
    6.71 +           newProbe->probeID =
    6.72 +                   addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo );
    6.73 +           requestingPr->dataRetFromReq = newProbe;
    6.74 +           break;
    6.75 +       case interMasterReqst:
    6.76 +           sendInterMasterReqst(semReq->receiverID,
    6.77 +                   (InterMasterReqst*)semReq->data);
    6.78 +           break;
    6.79 +       default:
    6.80 +           break;       
    6.81 +   }
    6.82 +   
    6.83 +   resumePrFnPtr( requestingPr, semEnv );
    6.84   }
    6.85  
    6.86  
    6.87 @@ -589,8 +618,9 @@
    6.88        // itself
    6.89        //Note, should not stack-allocate initial data -- no guarantee, in
    6.90        // general that creating processor will outlive ones it creates.
    6.91 -   VMS__free( animatingPr->startOfStack );
    6.92 -   VMS__free( animatingPr );
    6.93 +     
    6.94 +   VMS__free( animatingPr->startOfStack);
    6.95 +   VMS__free( animatingPr);
    6.96   }
    6.97  
    6.98  
    6.99 @@ -629,14 +659,12 @@
   6.100  void
   6.101  VMS__shutdown()
   6.102   { int coreIdx;
   6.103 -   VirtProcr *shutDownPr;
   6.104 -
   6.105 -      //create the shutdown processors, one for each core loop -- put them
   6.106 -      // directly into the Q -- each core will die when gets one
   6.107 +   //Send a shutdown Request to all MasterLoops.
   6.108     for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
   6.109      {    //Note, this is running in the master
   6.110 -      shutDownPr = VMS__create_procr( &endOSThreadFn, NULL );
   6.111 -      writeVMSQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] );
   6.112 +       InterVMSCoreReqst *shutdownReqst = VMS__malloc(sizeof(InterVMSCoreReqst));
   6.113 +       shutdownReqst->secondReqType = shutdownVP;
   6.114 +       sendInterMasterReqst(coreIdx, (InterMasterReqst*)shutdownReqst);
   6.115      }
   6.116  
   6.117   }
   6.118 @@ -655,6 +683,7 @@
   6.119   * to core loop function -- note that this slices out a level of virtual
   6.120   * processors).
   6.121   */
   6.122 +/*
   6.123  void
   6.124  endOSThreadFn( void *initData, VirtProcr *animatingPr )
   6.125   { 
   6.126 @@ -664,6 +693,7 @@
   6.127      asmTerminateCoreLoop(animatingPr);
   6.128  #endif
   6.129   }
   6.130 + */
   6.131  
   6.132  
   6.133  /*This is called from the startup & shutdown
   6.134 @@ -671,6 +701,9 @@
   6.135  void
   6.136  VMS__cleanup_at_end_of_shutdown()
   6.137   { 
   6.138 +   // Set to zero so that all data structures are freed correctly 
   6.139 +   _VMSMasterEnv->currentMasterProcrID = 0;
   6.140 +   
   6.141     //unused
   6.142     //VMSQueueStruc **readyToAnimateQs;
   6.143     //int              coreIdx;
   6.144 @@ -751,7 +784,9 @@
   6.145     //========================================================================
   6.146  */
   6.147        //These are the only two that use system free 
   6.148 -   VMS_ext__free_free_list( _VMSMasterEnv->freeLists );
   6.149 +   int i;
   6.150 +   for(i=0; i<NUM_CORES; i++)
   6.151 +        VMS_ext__free_free_list( _VMSMasterEnv->freeLists[i]);
   6.152     free( (void *)_VMSMasterEnv );
   6.153   }
   6.154  
     7.1 --- a/VMS.h	Wed Sep 07 19:36:46 2011 +0200
     7.2 +++ b/VMS.h	Mon Sep 19 16:12:01 2011 +0200
     7.3 @@ -5,21 +5,20 @@
     7.4   * Author: seanhalle@yahoo.com
     7.5   * 
     7.6   */
     7.7 -
     7.8  #ifndef _VMS_H
     7.9  #define	_VMS_H
    7.10  #define _GNU_SOURCE
    7.11  
    7.12 +#include <pthread.h>
    7.13 +#include <sys/time.h>
    7.14 +
    7.15  #include "VMS_primitive_data_types.h"
    7.16  #include "Queue_impl/PrivateQueue.h"
    7.17  #include "Histogram/Histogram.h"
    7.18  #include "DynArray/DynArray.h"
    7.19  #include "Hash_impl/PrivateHash.h"
    7.20  #include "vmalloc.h"
    7.21 -
    7.22 -#include <pthread.h>
    7.23 -#include <sys/time.h>
    7.24 -
    7.25 +#include "inter_VMS_requests.h"
    7.26  
    7.27  //===============================  Debug  ===================================
    7.28  //
    7.29 @@ -50,9 +49,9 @@
    7.30  #define DEBUG2( bool, msg, p1, p2) \
    7.31  //   if(bool) {printf(msg, p1, p2); fflush(stdin);}
    7.32  
    7.33 -#define ERROR(msg) printf(msg);
    7.34 -#define ERROR1(msg, param) printf(msg, param); 
    7.35 -#define ERROR2(msg, p1, p2) printf(msg, p1, p2);
    7.36 +#define ERROR(msg) printf(msg)
    7.37 +#define ERROR1(msg, param) printf(msg, param);
    7.38 +#define ERROR2(msg, p1, p2) printf(msg, p1, p2)
    7.39  
    7.40  //===========================  STATS =======================
    7.41  
    7.42 @@ -90,7 +89,7 @@
    7.43  #define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */
    7.44  
    7.45     // memory for VMS__malloc
    7.46 -#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */
    7.47 +#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x4000000 /* 64M */
    7.48  
    7.49  #define CACHE_LINE 64
    7.50  #define PAGE_SIZE 4096
    7.51 @@ -111,26 +110,23 @@
    7.52  //===========================================================================
    7.53  typedef unsigned long long TSCount;
    7.54  
    7.55 -typedef struct _SchedSlot     SchedSlot;
    7.56 -typedef struct _VMSReqst      VMSReqst;
    7.57 -typedef struct _VirtProcr     VirtProcr;
    7.58  typedef struct _IntervalProbe IntervalProbe;
    7.59  typedef struct _GateStruc     GateStruc;
    7.60  
    7.61  
    7.62  typedef VirtProcr * (*SlaveScheduler)  ( void *, int );   //semEnv, coreIdx
    7.63  typedef void  (*RequestHandler)  ( VirtProcr *, void * ); //prWReqst, semEnv
    7.64 -typedef void  (*VirtProcrFnPtr)  ( void *, VirtProcr * ); //initData, animPr
    7.65 -typedef void    VirtProcrFn      ( void *, VirtProcr * ); //initData, animPr
    7.66  typedef void  (*ResumePrFnPtr)   ( VirtProcr *, void * );
    7.67  
    7.68 -
    7.69  //============= Requests ===========
    7.70  //
    7.71  
    7.72 -enum VMSReqstType   //avoid starting enums at 0, for debug reasons
    7.73 - {
    7.74 -   semantic = 1,
    7.75 +//VMS Request is the carrier for Slave to Master requests
    7.76 +// it has an embedded sub-type request that is pulled out
    7.77 +// inside the plugin's request handler
    7.78 +enum VMSReqstType   //For Slave->Master requests
    7.79 + { 
    7.80 +   semantic = 1,    //avoid starting enums at 0, for debug reasons
    7.81     createReq,
    7.82     dissipate,
    7.83     VMSSemantic      //goes with VMSSemReqst below
    7.84 @@ -145,73 +141,30 @@
    7.85   };
    7.86  //VMSReqst
    7.87  
    7.88 +//This is a sub-type of Slave->Master requests.
    7.89 +// It's for Slaves to invoke built-in VMS-core functions that have language-like
    7.90 +// behavior.
    7.91  enum VMSSemReqstType   //These are equivalent to semantic requests, but for
    7.92   {                     // VMS's services available directly to app, like OS
    7.93     createProbe = 1,    // and probe services -- like a VMS-wide built-in lang
    7.94     openFile,
    7.95 -   otherIO
    7.96 +   otherIO,
    7.97 +   interMasterReqst
    7.98   };
    7.99  
   7.100  typedef struct
   7.101   { enum VMSSemReqstType reqType;
   7.102 -   VirtProcr           *requestingPr;
   7.103 -   char                *nameStr;  //for create probe
   7.104 +   //VirtProcr           *requestingPr;
   7.105 +   int                  receiverID; //for inter master requests
   7.106 +   void                *data;
   7.107   }
   7.108 - VMSSemReq;
   7.109 +VMSSemReq;
   7.110  
   7.111  
   7.112  //====================  Core data structures  ===================
   7.113  
   7.114 -struct _SchedSlot
   7.115 - {
   7.116 -   int         workIsDone;
   7.117 -   int         needsProcrAssigned;
   7.118 -   VirtProcr  *procrAssignedToSlot;
   7.119 - };
   7.120 -//SchedSlot
   7.121 -
   7.122 -/*WARNING: re-arranging this data structure could cause VP switching
   7.123 - *         assembly code to fail -- hard-codes offsets of fields
   7.124 - */
   7.125 -struct _VirtProcr
   7.126 - { int         procrID;  //for debugging -- count up each time create
   7.127 -   int         coreAnimatedBy;
   7.128 -   void       *startOfStack;
   7.129 -   void       *stackPtr;
   7.130 -   void       *framePtr;
   7.131 -   void       *nextInstrPt;
   7.132 -   
   7.133 -   void       *coreLoopStartPt;  //allows proto-runtime to be linked later
   7.134 -   void       *coreLoopFramePtr; //restore before jmp back to core loop
   7.135 -   void       *coreLoopStackPtr; //restore before jmp back to core loop
   7.136 -
   7.137 -   void       *initialData;
   7.138 -
   7.139 -   SchedSlot  *schedSlot;
   7.140 -   VMSReqst   *requests;
   7.141 -
   7.142 -   void       *semanticData; //this livesUSE_GNU here for the life of VP
   7.143 -   void       *dataRetFromReq;//values returned from plugin to VP go here
   7.144 -
   7.145 -      //=========== MEASUREMENT STUFF ==========
   7.146 -   #ifdef MEAS__TIME_STAMP_SUSP
   7.147 -   unsigned int preSuspTSCLow;
   7.148 -   unsigned int postSuspTSCLow;
   7.149 -   #endif
   7.150 -   #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/
   7.151 -   unsigned int startMasterTSCLow;USE_GNU
   7.152 -   unsigned int endMasterTSCLow;
   7.153 -   #endif
   7.154 -      //========================================
   7.155 -   
   7.156 -   float64      createPtInSecs;  //have space but don't use on some configs
   7.157 - };
   7.158 -//VirtProcr
   7.159 -
   7.160 -
   7.161 -/*WARNING: re-arranging this data structure could cause VP-switching
   7.162 - *         assembly code to fail -- hard-codes offsets of fields
   7.163 - *         (because -O3 messes with things otherwise)
   7.164 +/*Master Env is the only global variable -- has entry points for any other
   7.165 + * data needed.  
   7.166   */
   7.167  typedef struct
   7.168   {
   7.169 @@ -219,44 +172,51 @@
   7.170     RequestHandler   requestHandler;
   7.171     
   7.172     SchedSlot     ***allSchedSlots;
   7.173 -   VMSQueueStruc **readyToAnimateQs;
   7.174 +   VMSQueueStruc  **readyToAnimateQs;
   7.175     VirtProcr      **masterVPs;
   7.176  
   7.177     void            *semanticEnv;
   7.178     void            *OSEventStruc;   //for future, when add I/O to BLIS
   7.179 -   MallocArrays    *freeLists;
   7.180 -   int32            amtOfOutstandingMem; //total currently allocated
   7.181  
   7.182     void            *coreLoopReturnPt;//addr to jump to to re-enter coreLoop
   7.183  
   7.184     int32            setupComplete;
   7.185     volatile int32   masterLock;
   7.186 +   
   7.187 +   MallocArrays    *freeLists[NUM_CORES];
   7.188 +   int32            amtOfOutstandingMem; //total currently allocated
   7.189  
   7.190     int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
   7.191 -   GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
   7.192 +   GateStruc       *workStealingGates[NUM_CORES]; //concurrent work-steal
   7.193     int32            workStealingLock;
   7.194     
   7.195 -   int32            numProcrsCreated; //gives ordering to processor creation
   7.196 +   InterMasterReqst*  interMasterRequestsFor[NUM_CORES];
   7.197 +   InterMasterReqst*  interMasterRequestsSentBy[NUM_CORES];
   7.198 +   RequestHandler     interPluginReqHdlr;
   7.199 +   
   7.200 +   int32              numProcrsCreated; //gives ordering to processor creation
   7.201 +   
   7.202 +   int32              currentMasterProcrID;
   7.203  
   7.204        //=========== MEASUREMENT STUFF =============
   7.205 -   IntervalProbe  **intervalProbes;
   7.206 -   PrivDynArrayInfo    *dynIntervalProbesInfo;
   7.207 -   HashTable       *probeNameHashTbl;
   7.208 -   int32            masterCreateProbeID;
   7.209 -   float64          createPtInSecs;
   7.210 -   Histogram      **measHists;
   7.211 -   PrivDynArrayInfo *measHistsInfo;
   7.212 +   IntervalProbe    **intervalProbes;
   7.213 +   PrivDynArrayInfo  *dynIntervalProbesInfo;
   7.214 +   HashTable         *probeNameHashTbl;
   7.215 +   int32              masterCreateProbeID;
   7.216 +   float64            createPtInSecs;
   7.217 +   Histogram        **measHists;
   7.218 +   PrivDynArrayInfo  *measHistsInfo;
   7.219     #ifdef MEAS__TIME_PLUGIN
   7.220 -   Histogram       *reqHdlrLowTimeHist;
   7.221 -   Histogram       *reqHdlrHighTimeHist;
   7.222 +   Histogram         *reqHdlrLowTimeHist;
   7.223 +   Histogram         *reqHdlrHighTimeHist;
   7.224     #endif
   7.225     #ifdef MEAS__TIME_MALLOC
   7.226 -   Histogram       *mallocTimeHist;
   7.227 -   Histogram       *freeTimeHist;
   7.228 +   Histogram         *mallocTimeHist;
   7.229 +   Histogram         *freeTimeHist;
   7.230     #endif
   7.231     #ifdef MEAS__TIME_MASTER_LOCK
   7.232 -   Histogram       *masterLockLowTimeHist;
   7.233 -   Histogram       *masterLockHighTimeHist;
   7.234 +   Histogram         *masterLockLowTimeHist;
   7.235 +   Histogram         *masterLockHighTimeHist;
   7.236     #endif
   7.237   }
   7.238  MasterEnv;
   7.239 @@ -303,8 +263,6 @@
   7.240  volatile MasterEnv      *_VMSMasterEnv;
   7.241  
   7.242  
   7.243 -
   7.244 -
   7.245  //===========================  Function Prototypes  =========================
   7.246  
   7.247  
   7.248 @@ -368,6 +326,14 @@
   7.249  inline void
   7.250  VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr );
   7.251  
   7.252 +
   7.253 +void inline
   7.254 +VMS__send_inter_plugin_req( void *reqData, int32 targetMaster, 
   7.255 +                                            VirtProcr *requestingMaster );
   7.256 +void inline
   7.257 +VMS__send_inter_VMSCore_req( InterVMSCoreReqst *reqData, int32 targetMaster,
   7.258 +                                           VirtProcr *requestingMaster );
   7.259 +
   7.260  VMSReqst *
   7.261  VMS__take_next_request_out_of( VirtProcr *procrWithReq );
   7.262  
   7.263 @@ -571,7 +537,6 @@
   7.264  
   7.265  //=====
   7.266  
   7.267 -#include "ProcrContext.h"
   7.268  #include "probes.h"
   7.269  #include "vutilities.h"
   7.270  
     8.1 --- a/contextSwitch.s	Wed Sep 07 19:36:46 2011 +0200
     8.2 +++ b/contextSwitch.s	Mon Sep 19 16:12:01 2011 +0200
     8.3 @@ -2,7 +2,17 @@
     8.4  
     8.5  
     8.6  .text
     8.7 -
     8.8 +/* VirtProcr  offsets:
     8.9 + * 0x10  stackPtr
    8.10 + * 0x18 framePtr
    8.11 + * 0x20 nextInstrPt
    8.12 + * 0x30 coreLoopFramePtr
    8.13 + * 0x38 coreLoopStackPtr
    8.14 + *
    8.15 + * _VMSMasterEnv  offsets:
    8.16 + * 0x38 coreLoopReturnPt
    8.17 + * 0x44 masterLock
    8.18 + */
    8.19  //Save return label address for the coreLoop to pointer
    8.20  //Arguments: Pointer to variable holding address
    8.21  .globl saveCoreLoopReturnAddr
    8.22 @@ -23,17 +33,6 @@
    8.23  
    8.24  //Switches form CoreLoop to VP ether a normal VP or the Master Loop
    8.25  //switch to virt procr's stack and frame ptr then jump to virt procr fn
    8.26 -/* VirtProcr  offsets:
    8.27 - * 0x10  stackPtr
    8.28 - * 0x18 framePtr
    8.29 - * 0x20 nextInstrPt
    8.30 - * 0x30 coreLoopFramePtr
    8.31 - * 0x38 coreLoopStackPtr
    8.32 - *
    8.33 - * _VMSMasterEnv  offsets:
    8.34 - * 0x48 coreLoopReturnPt
    8.35 - * 0x54 masterLock
    8.36 - */
    8.37  .globl switchToVP
    8.38  switchToVP:
    8.39      #VirtProcr in %rdi
    8.40 @@ -48,17 +47,6 @@
    8.41  
    8.42      
    8.43  //switches to core loop. saves return address
    8.44 -/* VirtProcr  offsets:
    8.45 - * 0x10  stackPtr
    8.46 - * 0x18 framePtr
    8.47 - * 0x20 nextInstrPt
    8.48 - * 0x30 coreLoopFramePtr
    8.49 - * 0x38 coreLoopStackPtr
    8.50 - *
    8.51 - * _VMSMasterEnv  offsets:
    8.52 - * 0x48 coreLoopReturnPt
    8.53 - * 0x54 masterLock
    8.54 - */
    8.55  .globl switchToCoreLoop
    8.56  switchToCoreLoop:
    8.57      #VirtProcr in %rdi
    8.58 @@ -69,7 +57,7 @@
    8.59      movq    0x30(%rdi), %rbp         #restore frame pointer
    8.60      movq    $_VMSMasterEnv, %rcx
    8.61      movq    (%rcx)    , %rcx
    8.62 -    movq    0x48(%rcx), %rax         #get CoreLoopStartPt
    8.63 +    movq    0x38(%rcx), %rax         #get CoreLoopStartPt
    8.64      jmp     *%rax                    #jmp to CoreLoop
    8.65  VPReturn:
    8.66      ret
    8.67 @@ -78,17 +66,6 @@
    8.68  
    8.69  //switches to core loop from master. saves return address
    8.70  //Releases masterLock so the next MasterLoop can be executed
    8.71 -/* VirtProcr  offsets:
    8.72 - * 0x10  stackPtr
    8.73 - * 0x18 framePtr
    8.74 - * 0x20 nextInstrPt
    8.75 - * 0x30 coreLoopFramePtr
    8.76 - * 0x38 coreLoopStackPtr
    8.77 - *
    8.78 - * _VMSMasterEnv  offsets:
    8.79 - * 0x48 coreLoopReturnPt
    8.80 - * 0x54 masterLock
    8.81 - */
    8.82  .globl masterSwitchToCoreLoop
    8.83  masterSwitchToCoreLoop:
    8.84      #VirtProcr in %rdi
    8.85 @@ -99,8 +76,8 @@
    8.86      movq    0x30(%rdi), %rbp         #restore frame pointer
    8.87      movq    $_VMSMasterEnv, %rcx
    8.88      movq    (%rcx)    , %rcx
    8.89 -    movq    0x48(%rcx), %rax         #get CoreLoopStartPt
    8.90 -    movl    $0x0      , 0x54(%rcx)   #release lock
    8.91 +    movq    0x38(%rcx), %rax         #get CoreLoopStartPt
    8.92 +    movl    $0x0      , 0x44(%rcx)   #release lock
    8.93      jmp     *%rax                    #jmp to CoreLoop
    8.94  MasterReturn:
    8.95      ret
    8.96 @@ -112,22 +89,14 @@
    8.97  // and virtPr is in %rdi
    8.98  // and both functions have the same argument.
    8.99  // do not save register of VP because this function will never return
   8.100 -/* VirtProcr  offsets:
   8.101 - * 0x10  stackPtr
   8.102 - * 0x18 framePtr
   8.103 - * 0x20 nextInstrPt
   8.104 - * 0x30 coreLoopFramePtr
   8.105 - * 0x38 coreLoopStackPtr
   8.106 - *
   8.107 - * _VMSMasterEnv  offsets:
   8.108 - * 0x48 coreLoopReturnPt
   8.109 - * 0x58 masterLock
   8.110 - */
   8.111  .globl asmTerminateCoreLoop
   8.112  asmTerminateCoreLoop:
   8.113      #VirtProcr in %rdi
   8.114      movq    0x38(%rdi), %rsp         #restore stack pointer
   8.115      movq    0x30(%rdi), %rbp         #restore frame pointer
   8.116 +    movq    $_VMSMasterEnv, %rcx
   8.117 +    movq    (%rcx)    , %rcx
   8.118 +    movl    $0x0      , 0x44(%rcx)   #release lock
   8.119      movq    $terminateCoreLoop, %rax
   8.120      jmp     *%rax                    #jmp to CoreLoop
   8.121  
   8.122 @@ -142,7 +111,6 @@
   8.123      movq    0x38(%rdi), %rsp         #restore stack pointer
   8.124      movq    0x30(%rdi), %rbp         #restore frame pointer
   8.125      #argument is in %rdi
   8.126 -    call    VMS__dissipate_procr
   8.127      movq    %rbp      , %rsp        #goto the coreLoops stack
   8.128      pop     %rbp        #restore the old framepointer
   8.129      ret                 #return from core loop
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/inter_VMS_requests.c	Mon Sep 19 16:12:01 2011 +0200
     9.3 @@ -0,0 +1,15 @@
     9.4 +#include "VMS.h"
     9.5 +#include "inter_VMS_requests.h"
     9.6 +
     9.7 +
     9.8 +void sendInterMasterReqst(int receiverID, InterMasterReqst* request)
     9.9 +{
    9.10 +    request->reqType = destVMSCore;
    9.11 +    request->obsolete = 0;
    9.12 +    request->nextReqst = _VMSMasterEnv->interMasterRequestsFor[receiverID];
    9.13 +    _VMSMasterEnv->interMasterRequestsFor[receiverID] = request;
    9.14 +    request->nextSentReqst = 
    9.15 +            _VMSMasterEnv->interMasterRequestsSentBy[_VMSMasterEnv->currentMasterProcrID];
    9.16 +    _VMSMasterEnv->interMasterRequestsSentBy[_VMSMasterEnv->currentMasterProcrID]
    9.17 +            = request;
    9.18 +}
    9.19 \ No newline at end of file
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/inter_VMS_requests.h	Mon Sep 19 16:12:01 2011 +0200
    10.3 @@ -0,0 +1,74 @@
    10.4 +/*
    10.5 + *  Copyright 2011 OpenSourceStewardshipFoundation.org
    10.6 + *  Licensed under GNU General Public License version 2
    10.7 + *
    10.8 + * Author(s): seanhalle@yahoo.com
    10.9 + *
   10.10 + */
   10.11 +
   10.12 +#ifndef _MASTER_REQ_H
   10.13 +#define	_MASTER_REQ_H
   10.14 +
   10.15 +typedef struct _InterMasterReqst InterMasterReqst;
   10.16 +
   10.17 +//These are for Master to Master requests
   10.18 +// They get re-cast to the appropriate sub-type of request
   10.19 +enum InterMasterReqstType    //For Master->Master
   10.20 + {
   10.21 +   destVMSCore = 1,          //avoid starting enums at 0, for debug reasons
   10.22 +   destPlugin
   10.23 + };
   10.24 +
   10.25 +struct _InterMasterReqst //Doing a trick to save space & time -- allocate
   10.26 + {  // space for a sub-type then cast first as InterMaster then as sub-type
   10.27 +   enum InterMasterReqstType  reqType;
   10.28 +   InterMasterReqst *nextReqst;
   10.29 +   InterMasterReqst *nextSentReqst;
   10.30 +   int32 obsolete;
   10.31 + };
   10.32 +//InterMasterReqst  (defined above in typedef block)
   10.33 +
   10.34 +
   10.35 +//These are a sub-type of InterMaster requests.  The inter-master req gets
   10.36 +// re-cast to be of this type, after checking
   10.37 +//This ones for requests between internals of VMS-core.. such as malloc
   10.38 +enum InterVMSCoreReqType   
   10.39 + {
   10.40 +   transfer_free_ptr = 1,     //avoid starting enums at 0, for debug reasons
   10.41 +   shutdownVP
   10.42 + };
   10.43 +
   10.44 +//Doing a trick to save space & time -- allocate space
   10.45 +// for this, cast first as InterMaster then as this
   10.46 +typedef struct  
   10.47 + {
   10.48 +   enum InterMasterReqstType  reqType;  //duplicate InterMasterReqst at top
   10.49 +   InterMasterReqst *nextReqst;
   10.50 +   InterMasterReqst *nextSentReqst;
   10.51 +   int32 obsolete;
   10.52 +   
   10.53 +   enum InterVMSCoreReqType  secondReqType;
   10.54 +   void                     *freePtr;  //pile up fields, add as needed
   10.55 + } InterVMSCoreReqst;
   10.56 +
   10.57 +//This is for requests between plugins on different cores
   10.58 +// Here, after casting, the pluginReq is extracted and handed to plugin
   10.59 +//Doing a trick to save space & time -- allocate space
   10.60 +// for this, cast first as InterMaster then as this
   10.61 +typedef struct  
   10.62 + {
   10.63 +   enum InterMasterReqstType  reqType;  //copy InterMasterReqst at top
   10.64 +   InterMasterReqst          *nextReqst;
   10.65 +   
   10.66 +   void                      *pluginReq; //plugin will cast to approp type
   10.67 + } InterPluginReqst;
   10.68 + 
   10.69 + /*
   10.70 +  * This has to be called from the MasterLoop!
   10.71 +  * Send inter master request. The request structure has to be malloced itself.
   10.72 +  * The sending VP will free the structure when the request is handled.
   10.73 +  */
   10.74 + void sendInterMasterReqst(int receiverID, InterMasterReqst* request);
   10.75 +
   10.76 +#endif	/* _MASTER_REQ_H */
   10.77 +
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/inter_VMS_requests_handler.c	Mon Sep 19 16:12:01 2011 +0200
    11.3 @@ -0,0 +1,41 @@
    11.4 +/*
    11.5 + * Copyright 2011  OpenSourceCodeStewardshipFoundation
    11.6 + *
    11.7 + * Licensed under GNU GPL version 2
    11.8 + */
    11.9 +
   11.10 +#include <stdio.h>
   11.11 +#include <stdlib.h>
   11.12 +
   11.13 +#include "VMS.h"
   11.14 +#include "ProcrContext.h"
   11.15 +#include "inter_VMS_requests.h"
   11.16 +#include "vmalloc.h"
   11.17 +
   11.18 +
   11.19 +
   11.20 +//==================================================================
   11.21 +/* The VMS__free in a different masterVP discovered the chunk it was
   11.22 + * given was originally allocated by this masterVP, so it sent the
   11.23 + * chunk over.  Simply call VMS__free here.
   11.24 + */
   11.25 +inline void
   11.26 +handleTransferFree( InterVMSCoreReqst *masterReq, VirtProcr *masterPr )
   11.27 + {
   11.28 +    VMS__free( masterReq->freePtr );
   11.29 + }
   11.30 +
   11.31 +
   11.32 +/*
   11.33 + * The starts the shutdown procedure.
   11.34 + */
   11.35 +inline void
   11.36 +handleShutdown( InterVMSCoreReqst *masterReq, VirtProcr *masterPr )
   11.37 +{
   11.38 +#ifdef SEQUENTIAL
   11.39 +    asmTerminateCoreLoopSeq(masterPr);
   11.40 +#else
   11.41 +    asmTerminateCoreLoop(masterPr);
   11.42 +#endif
   11.43 +}
   11.44 +
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/inter_VMS_requests_handler.h	Mon Sep 19 16:12:01 2011 +0200
    12.3 @@ -0,0 +1,26 @@
    12.4 +/*
    12.5 + *  Copyright 2011 OpenSourceStewardshipFoundation.org
    12.6 + *  Licensed under GNU General Public License version 2
    12.7 + *
    12.8 + * Author(s): seanhalle@yahoo.com
    12.9 + *
   12.10 + */
   12.11 +
   12.12 +#ifndef _MASTER_REQ_HANDLER_H
   12.13 +#define	_MASTER_REQ_HANDLER_H
   12.14 +
   12.15 +/*Defines everything specific to inter-master requests that
   12.16 + * are internal to VMS.
   12.17 + *The plugin has its own handlers for inter-master requests
   12.18 + * sent between plugin instances.
   12.19 + */
   12.20 +
   12.21 +inline void
   12.22 +handleTransferFree( InterVMSCoreReqst *masterReq, VirtProcr *masterPr );
   12.23 +
   12.24 +inline void
   12.25 +handleShutdown( InterVMSCoreReqst *masterReq, VirtProcr *masterPr );
   12.26 +
   12.27 +
   12.28 +#endif	/* _MASTER_REQ_HANDLER_H */
   12.29 +
    13.1 --- a/probes.c	Wed Sep 07 19:36:46 2011 +0200
    13.2 +++ b/probes.c	Mon Sep 19 16:12:01 2011 +0200
    13.3 @@ -113,7 +113,7 @@
    13.4     VMSSemReq reqData;
    13.5  
    13.6     reqData.reqType  = createProbe;
    13.7 -   reqData.nameStr  = nameStr;
    13.8 +   reqData.data  = (void*)nameStr;
    13.9  
   13.10     VMS__send_VMSSem_request( &reqData, animPr );
   13.11  
   13.12 @@ -338,7 +338,7 @@
   13.13  void
   13.14  generic_print_probe( void *_probe )
   13.15   { 
   13.16 -   IntervalProbe *probe = (IntervalProbe *)_probe;
   13.17 +   //IntervalProbe *probe = (IntervalProbe *)_probe;
   13.18     
   13.19     //TODO segfault in printf
   13.20     //print_probe_helper( probe );
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/scheduling.h	Mon Sep 19 16:12:01 2011 +0200
    14.3 @@ -0,0 +1,23 @@
    14.4 +/* 
    14.5 + * File:   scheduling.h
    14.6 + * Author: Merten Sachh
    14.7 + *
    14.8 + * Created on September 16, 2011, 2:28 PM
    14.9 + */
   14.10 +
   14.11 +#ifndef SCHEDULING_H
   14.12 +#define	SCHEDULING_H
   14.13 +
   14.14 +#include "ProcrContext.h"
   14.15 +
   14.16 +typedef struct _SchedSlot SchedSlot;
   14.17 +
   14.18 +struct _SchedSlot
   14.19 + {
   14.20 +   int         workIsDone;
   14.21 +   int         needsProcrAssigned;
   14.22 +   VirtProcr  *procrAssignedToSlot;
   14.23 + };
   14.24 + 
   14.25 +#endif	/* SCHEDULING_H */
   14.26 +
    15.1 --- a/vmalloc.c	Wed Sep 07 19:36:46 2011 +0200
    15.2 +++ b/vmalloc.c	Mon Sep 19 16:12:01 2011 +0200
    15.3 @@ -19,6 +19,12 @@
    15.4  
    15.5  #define MAX_UINT64 0xFFFFFFFFFFFFFFFF
    15.6  
    15.7 +inline void
    15.8 +sendFreeReqst_lib(int receiverID, void *ptrToFree, VirtProcr *animPr);
    15.9 +
   15.10 +inline void
   15.11 +sendFreeReqst_master(int receiverID, void *ptrToFree);
   15.12 +
   15.13  //A MallocProlog is a head element if the HigherInMem variable is NULL
   15.14  //A Chunk is free if the prevChunkInFreeList variable is NULL
   15.15  
   15.16 @@ -198,12 +204,32 @@
   15.17      return foundChunk;
   15.18  }
   15.19  
   15.20 +/*
   15.21 + * This function is called by code which is part of the master loop.
   15.22 + * This reads the animating coreID from the MasterEnv and calls the normal malloc
   15.23 + * in VMS__malloc_on_core
   15.24 + */
   15.25 +void *
   15.26 +VMS__malloc( size_t sizeRequested)
   15.27 +{
   15.28 +    return VMS__malloc_on_core(sizeRequested, _VMSMasterEnv->currentMasterProcrID);
   15.29 +}
   15.30 +
   15.31 +/*
   15.32 + * This is called by the plugin. This call to VMS_malloc_on_core is run on the
   15.33 + * slave VPs stack so there is no switch to the VMS runtime.
   15.34 + */
   15.35 +void *
   15.36 +VMS__malloc_in_lib(size_t sizeRequested, VirtProcr *VProcr)
   15.37 +{
   15.38 +    return VMS__malloc_on_core(sizeRequested, VProcr->coreAnimatedBy);
   15.39 +}
   15.40  
   15.41  /*
   15.42   * This is sequential code, meant to only be called from the Master, not from
   15.43   * any slave VPs.
   15.44   */
   15.45 -void *VMS__malloc( size_t sizeRequested )
   15.46 +void *VMS__malloc_on_core( size_t sizeRequested, int procrID )
   15.47   {     
   15.48     //============================= MEASUREMENT STUFF ========================
   15.49     #ifdef MEAS__TIME_MALLOC
   15.50 @@ -212,8 +238,9 @@
   15.51     #endif
   15.52     //========================================================================
   15.53     
   15.54 -   MallocArrays* freeLists = _VMSMasterEnv->freeLists;
   15.55 +   MallocArrays* freeLists = _VMSMasterEnv->freeLists[procrID];
   15.56     MallocProlog* foundChunk;
   15.57 +   MallocPrologAllocated* returnChunk;
   15.58     
   15.59     //Return a small chunk if the requested size is smaller than 128B
   15.60     if(sizeRequested <= LOWER_BOUND)
   15.61 @@ -224,9 +251,10 @@
   15.62         else
   15.63             foundChunk = removeSmallChunk(freeLists, freeListIdx);
   15.64         
   15.65 -       //Mark as allocated
   15.66 -       foundChunk->prevChunkInFreeList = NULL;      
   15.67 -       return foundChunk + 1;
   15.68 +       returnChunk = (MallocPrologAllocated*)foundChunk;
   15.69 +       returnChunk->prevChunkInFreeList = NULL;//indicates elem currently allocated
   15.70 +       returnChunk->procrID = procrID;  
   15.71 +       return returnChunk + 1;
   15.72     }
   15.73     
   15.74     //Calculate the expected container. Start one higher to have a Chunk that's
   15.75 @@ -239,7 +267,9 @@
   15.76         foundChunk = removeChunk(freeLists, containerIdx); 
   15.77     
   15.78     //Mark as allocated
   15.79 -   foundChunk->prevChunkInFreeList = NULL;      
   15.80 +   returnChunk = (MallocPrologAllocated*)foundChunk;
   15.81 +   returnChunk->prevChunkInFreeList = NULL;//indicates elem currently allocated
   15.82 +   returnChunk->procrID = procrID;
   15.83     
   15.84     //============================= MEASUREMENT STUFF ========================
   15.85     #ifdef MEAS__TIME_MALLOC
   15.86 @@ -249,7 +279,79 @@
   15.87     //========================================================================
   15.88     
   15.89     //skip over the prolog by adding its size to the pointer return
   15.90 -   return foundChunk + 1;
   15.91 +   return returnChunk + 1;
   15.92 + }
   15.93 + 
   15.94 + /*
   15.95 + * This free is called for a master loop. It decides whether the allocation of
   15.96 + * chunk was done on the same core. If it was it calls VMS__free_on_core 
   15.97 + * otherwise it sends a message to the responsible core.
   15.98 + */
   15.99 +void
  15.100 +VMS__free(void *ptrToFree)
  15.101 +{
  15.102 +    MallocPrologAllocated *chunk = (MallocPrologAllocated*)ptrToFree - 1;
  15.103 +    if(chunk->procrID == _VMSMasterEnv->currentMasterProcrID)
  15.104 +    {
  15.105 +        VMS__free_on_core(ptrToFree, _VMSMasterEnv->currentMasterProcrID);
  15.106 +    }
  15.107 +    else
  15.108 +    {
  15.109 +        sendFreeReqst_master(chunk->procrID, ptrToFree);
  15.110 +        
  15.111 +    }
  15.112 +}
  15.113 +
  15.114 +/*
  15.115 + * This free is called for the plugins. It decides whether the allocation of
  15.116 + * chunk was done on the same core. If it was it calls VMS__free_on_core 
  15.117 + * otherwise it sends a message to the responsible core.
  15.118 + */
  15.119 +void
  15.120 +VMS__free_in_lib(void *ptrToFree, VirtProcr *VProc)
  15.121 +{
  15.122 +    MallocPrologAllocated *chunk = (MallocPrologAllocated*)ptrToFree - 1;
  15.123 +    if(chunk->procrID == VProc->coreAnimatedBy)
  15.124 +    {
  15.125 +        VMS__free_on_core(ptrToFree, VProc->coreAnimatedBy);
  15.126 +    }
  15.127 +    else
  15.128 +    {
  15.129 +        sendFreeReqst_lib(chunk->procrID, ptrToFree, VProc);
  15.130 +    }
  15.131 +}
  15.132 +
  15.133 +/* 
  15.134 + * This is called form a masterVP and request an free from a different masterVP.
  15.135 + * The free of the request structure is done after the request is handled.
  15.136 + */
  15.137 +inline void
  15.138 +sendFreeReqst_master(int receiverID, void *ptrToFree)
  15.139 +{
  15.140 +   InterVMSCoreReqst *freeReqst = VMS__malloc(sizeof(InterVMSCoreReqst));
  15.141 +   freeReqst->freePtr = ptrToFree;
  15.142 +   freeReqst->secondReqType = transfer_free_ptr;
  15.143 +
  15.144 +   sendInterMasterReqst(receiverID, (InterMasterReqst*)freeReqst);
  15.145 + }
  15.146 +
  15.147 +/*
  15.148 + * This is called if the free is called from the plugin. This requests an inter
  15.149 + * master request from his master.
  15.150 + */
  15.151 +inline void
  15.152 +sendFreeReqst_lib(int receiverID, void *ptrToFree, VirtProcr *animPr )
  15.153 +{
  15.154 +   VMSSemReq reqData;
  15.155 +   InterVMSCoreReqst *freeReqst = VMS__malloc(sizeof(InterVMSCoreReqst));
  15.156 +   freeReqst->freePtr = ptrToFree;
  15.157 +   freeReqst->secondReqType = transfer_free_ptr;
  15.158 +
  15.159 +   reqData.reqType  = interMasterReqst;
  15.160 +   reqData.receiverID   = receiverID;
  15.161 +   reqData.data  = (void*)freeReqst;
  15.162 +
  15.163 +   VMS__send_VMSSem_request( (void*)&reqData, animPr );
  15.164   }
  15.165  
  15.166  /*
  15.167 @@ -257,7 +359,7 @@
  15.168   * any slave VPs.
  15.169   */
  15.170  void
  15.171 -VMS__free( void *ptrToFree )
  15.172 +VMS__free_on_core( void *ptrToFree, int procrID )
  15.173   {
  15.174      
  15.175     //============================= MEASUREMENT STUFF ========================
  15.176 @@ -267,7 +369,7 @@
  15.177     #endif
  15.178     //========================================================================
  15.179     
  15.180 -   MallocArrays* freeLists = _VMSMasterEnv->freeLists;
  15.181 +   MallocArrays* freeLists = _VMSMasterEnv->freeLists[procrID];
  15.182     MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1;
  15.183     uint32 containerIdx;
  15.184     
  15.185 @@ -323,8 +425,7 @@
  15.186  VMS_ext__create_free_list()
  15.187  {     
  15.188     //Initialize containers for small chunks and fill with zeros
  15.189 -   _VMSMasterEnv->freeLists = (MallocArrays*)malloc( sizeof(MallocArrays) );
  15.190 -   MallocArrays *freeLists = _VMSMasterEnv->freeLists;
  15.191 +   MallocArrays *freeLists = (MallocArrays*)malloc( sizeof(MallocArrays) );
  15.192     
  15.193     freeLists->smallChunks = 
  15.194             (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
  15.195 @@ -355,14 +456,14 @@
  15.196                          MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog*));
  15.197     firstChunk->nextChunkInFreeList = NULL;
  15.198     //previous element in the queue is the container
  15.199 -   firstChunk->prevChunkInFreeList = &freeLists->bigChunks[container-2];
  15.200 +   firstChunk->prevChunkInFreeList = (MallocProlog*)&freeLists->bigChunks[container-2];
  15.201     
  15.202     freeLists->bigChunks[container-2] = firstChunk;
  15.203     //Insert into bit search list
  15.204     if(container <= 65)
  15.205 -       freeLists->bigChunksSearchVector[0] |= ((uint64)1 << (container-2));
  15.206 +       freeLists->bigChunksSearchVector[0] = ((uint64)1 << (container-2));
  15.207     else
  15.208 -       freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (container-66));
  15.209 +       freeLists->bigChunksSearchVector[1] = ((uint64)1 << (container-66));
  15.210     
  15.211     //Create dummy chunk to mark the top of stack this is of course
  15.212     //never freed
  15.213 @@ -384,6 +485,6 @@
  15.214     free(freeLists->memSpace);
  15.215     free(freeLists->bigChunks);
  15.216     free(freeLists->smallChunks);
  15.217 -   
  15.218 +   free(freeLists);   
  15.219   }
  15.220  
    16.1 --- a/vmalloc.h	Wed Sep 07 19:36:46 2011 +0200
    16.2 +++ b/vmalloc.h	Mon Sep 19 16:12:01 2011 +0200
    16.3 @@ -6,13 +6,13 @@
    16.4   *
    16.5   * Created on November 14, 2009, 9:07 PM
    16.6   */
    16.7 -
    16.8  #ifndef _VMALLOC_H
    16.9  #define	_VMALLOC_H
   16.10  
   16.11  #include <malloc.h>
   16.12  #include <inttypes.h>
   16.13  #include "VMS_primitive_data_types.h"
   16.14 +#include "ProcrContext.h"
   16.15  
   16.16  #define SMALL_CHUNK_SIZE 32
   16.17  #define SMALL_CHUNK_COUNT 4
   16.18 @@ -30,12 +30,12 @@
   16.19     MallocProlog *prevChunkInFreeList;
   16.20     MallocProlog *nextHigherInMem;
   16.21     MallocProlog *nextLowerInMem;
   16.22 - };
   16.23 + }; 
   16.24  //MallocProlog
   16.25   
   16.26   typedef struct MallocArrays MallocArrays;
   16.27  
   16.28 - struct MallocArrays
   16.29 + struct MallocArrays 
   16.30   {
   16.31       MallocProlog **smallChunks;
   16.32       MallocProlog **bigChunks;
   16.33 @@ -43,24 +43,42 @@
   16.34       void         *memSpace;
   16.35       uint32       containerCount;
   16.36   };
   16.37 - //MallocArrays
   16.38 + //MallocArray
   16.39 +
   16.40 +
   16.41 + typedef struct
   16.42 +{
   16.43 +     uintptr_t procrID;
   16.44 +     MallocProlog *prevChunkInFreeList;
   16.45 +     MallocProlog *nextHigherInMem;
   16.46 +     MallocProlog *nextLowerInMem;
   16.47 + } MallocPrologAllocated;
   16.48 +
   16.49  
   16.50  typedef struct
   16.51   {
   16.52     MallocProlog *firstChunkInFreeList;
   16.53     int32         numInList; //TODO not used
   16.54 - }
   16.55 -FreeListHead;
   16.56 + } FreeListHead;
   16.57  
   16.58  void *
   16.59 -VMS__malloc( size_t sizeRequested );
   16.60 +VMS__malloc_on_core(size_t sizeRequested, int procrID);
   16.61  
   16.62  void *
   16.63 -VMS__malloc_aligned( size_t sizeRequested );
   16.64 +VMS__malloc(size_t sizeRequested);
   16.65 +
   16.66 +void *
   16.67 +VMS__malloc_in_lib(size_t sizeRequested, VirtProcr *VProc);
   16.68  
   16.69  void
   16.70  VMS__free( void *ptrToFree );
   16.71  
   16.72 +void
   16.73 +VMS__free_in_lib(void *ptrToFree, VirtProcr *VProc);
   16.74 +
   16.75 +void
   16.76 +VMS__free_on_core(void *ptrToFree, int procrID);
   16.77 +
   16.78  /*Allocates memory from the external system -- higher overhead
   16.79   */
   16.80  void *
   16.81 @@ -71,7 +89,6 @@
   16.82  void
   16.83  VMS__free_in_ext( void *ptrToFree );
   16.84  
   16.85 -
   16.86  MallocArrays *
   16.87  VMS_ext__create_free_list();
   16.88  
    17.1 --- a/vutilities.c	Wed Sep 07 19:36:46 2011 +0200
    17.2 +++ b/vutilities.c	Mon Sep 19 16:12:01 2011 +0200
    17.3 @@ -9,6 +9,7 @@
    17.4  
    17.5  #include <malloc.h>
    17.6  #include <stdlib.h>
    17.7 +#include <string.h>
    17.8  
    17.9  #include "VMS.h"
   17.10