changeset 209:0c83ea8adefc Common_Ancestor

Close to compilable version of common_ancestor -- still includes HW dep stuff
author Some Random Person <seanhalle@yahoo.com>
date Sun, 04 Mar 2012 14:26:35 -0800
parents eaf7e4c58c9e
children a18539c0bc37
files CoreLoop.c MasterLoop.c VMS.h VMS__HW_dependent.c VMS__HW_dependent.h VMS__HW_dependent.s VMS__PI.c VMS__WL.c VMS__int.c VMS__startup_and_shutdown.c VMS_defs__HW_specific.h VMS_defs__lang_specific.h VMS_defs__main.h probes.c probes.h vmalloc.c vmalloc.h vutilities.h
diffstat 18 files changed, 1163 insertions(+), 1269 deletions(-) [+]
line diff
     1.1 --- a/CoreLoop.c	Wed Feb 22 11:39:12 2012 -0800
     1.2 +++ b/CoreLoop.c	Sun Mar 04 14:26:35 2012 -0800
     1.3 @@ -6,7 +6,6 @@
     1.4  
     1.5  
     1.6  #include "VMS.h"
     1.7 -#include "ProcrContext.h"
     1.8  
     1.9  #include <stdlib.h>
    1.10  #include <stdio.h>
    1.11 @@ -15,14 +14,14 @@
    1.12  #include <pthread.h>
    1.13  #include <sched.h>
    1.14  
    1.15 -void *terminateCoreLoop(SlaveVP *currPr);
    1.16 +void *terminateCoreLoop(SlaveVP *currSlv);
    1.17  
    1.18  /*This is the loop that runs in the OS Thread pinned to each core
    1.19 - *Get virt procr from queue,
    1.20 - * save state of current animator, then load in state of virt procr, using
    1.21 - * jmp instr to switch the program-counter state -- making the virt procr
    1.22 + *Get Slv from queue,
    1.23 + * save state of current animator, then load in state of Slv, using
    1.24 + * jmp instr to switch the program-counter state -- making the Slv
    1.25   * the new animator.
    1.26 - *At some point, the virt procr will suspend itself by saving out its
    1.27 + *At some point, the Slv will suspend itself by saving out its
    1.28   * animator state (stack ptr, frame ptr, program counter) and switching
    1.29   * back to the OS Thread's animator state, which means restoring the
    1.30   * stack and frame and jumping to the core loop start point.
    1.31 @@ -34,7 +33,7 @@
    1.32   { 
    1.33     ThdParams      *coreLoopThdParams;
    1.34     int             thisCoresIdx;
    1.35 -   SlaveVP        *currPr;
    1.36 +   SlaveVP        *currSlv;
    1.37     VMSQueueStruc  *readyToAnimateQ;
    1.38     cpu_set_t       coreMask;  //has 1 in bit positions of allowed cores
    1.39     int             errorCode;
    1.40 @@ -78,7 +77,7 @@
    1.41     if(errorCode){ printf("\nset affinity failure\n"); exit(0); }
    1.42  
    1.43     
    1.44 -   //Save the return address in the SwitchVP function
    1.45 +   //Save the return address in the SwitchSlv function
    1.46     saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt));
    1.47  
    1.48     
    1.49 @@ -100,68 +99,55 @@
    1.50        while( gate.gateClosed ) /*busy wait*/;
    1.51      }
    1.52  
    1.53 -   currPr = (SlaveVP *) readVMSQ( readyToAnimateQ );
    1.54 +   currSlv = (SlaveVP *) readVMSQ( readyToAnimateQ );
    1.55  
    1.56        //Set the coreloop's progress, so stealer can see it has made it out
    1.57        // of the protected area
    1.58     gate.exitProgress = gate.preGateProgress;
    1.59     #else
    1.60 -   currPr = (SlaveVP *) readVMSQ( readyToAnimateQ );
    1.61 +   currSlv = (SlaveVP *) readVMSQ( readyToAnimateQ );
    1.62     #endif
    1.63  
    1.64 -   if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
    1.65 +   if( currSlv != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
    1.66     else
    1.67      {
    1.68 -      //============================= MEASUREMENT STUFF =====================
    1.69 -      #ifdef MEAS__TIME_MASTER_LOCK
    1.70 -      int32 startStamp, endStamp;
    1.71 -      saveLowTimeStampCountInto( startStamp );
    1.72 -      #endif
    1.73 -      //=====================================================================
    1.74 +            MEAS__Capture_Pre_Master_Lock_Point;
    1.75 +            
    1.76        int tries = 0; int gotLock = 0;
    1.77 -      while( currPr == NULL ) //if queue was empty, enter get masterLock loop
    1.78 +      while( currSlv == NULL ) //if queue was empty, enter get masterLock loop
    1.79         {    //queue was empty, so get master lock
    1.80  
    1.81           gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock),
    1.82                                                            UNLOCKED, LOCKED );
    1.83           if( gotLock )
    1.84            {    //run own MasterVP -- jmps to coreLoops startPt when done
    1.85 -            currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
    1.86 +            currSlv = _VMSMasterEnv->masterVPs[thisCoresIdx];
    1.87              if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
    1.88               {       DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n");
    1.89                 pthread_yield();
    1.90               }
    1.91              _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
    1.92 -            break;  //end while -- have a VP to animate now
    1.93 +            break;  //end while -- have a Slv to animate now
    1.94            }
    1.95  
    1.96           tries++;      //if too many, means master on other core taking too long
    1.97           if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); }
    1.98         }
    1.99 -      //============================= MEASUREMENT STUFF =====================
   1.100 -      #ifdef MEAS__TIME_MASTER_LOCK
   1.101 -      saveLowTimeStampCountInto( endStamp );
   1.102 -      addIntervalToHist( startStamp, endStamp,
   1.103 -                         _VMSMasterEnv->masterLockLowTimeHist );
   1.104 -      addIntervalToHist( startStamp, endStamp,
   1.105 -                         _VMSMasterEnv->masterLockHighTimeHist );
   1.106 -      #endif
   1.107 -      //=====================================================================
   1.108 -
   1.109 +            MEAS__Capture_Post_Master_Lock_Point;
   1.110      }
   1.111  
   1.112     
   1.113 -   switchToVP(currPr); //The VPs return in here
   1.114 +   switchToSlv(currSlv); //The Slvs return in here
   1.115     flushRegisters();
   1.116     }//CoreLoop      
   1.117   }
   1.118  
   1.119  
   1.120  void *
   1.121 -terminateCoreLoop(SlaveVP *currPr){
   1.122 -   //first free shutdown VP that jumped here -- it first restores the
   1.123 -   // coreloop's stack, so addr of currPr in stack frame is still correct
   1.124 -   VMS_int__dissipate_procr( currPr );
   1.125 +terminateCoreLoop(SlaveVP *currSlv){
   1.126 +   //first free shutdown Slv that jumped here -- it first restores the
   1.127 +   // coreloop's stack, so addr of currSlv in stack frame is still correct
   1.128 +   VMS_int__dissipate_SlaveVP( currSlv );
   1.129     pthread_exit( NULL );
   1.130  }
   1.131  
   1.132 @@ -176,7 +162,7 @@
   1.133  void *
   1.134  coreLoop_Seq( void *paramsIn )
   1.135   {
   1.136 -   SlaveVP      *currPr;
   1.137 +   SlaveVP      *currSlv;
   1.138     VMSQueueStruc *readyToAnimateQ;
   1.139     
   1.140     ThdParams      *coreLoopThdParams;
   1.141 @@ -186,7 +172,7 @@
   1.142  //   thisCoresIdx = coreLoopThdParams->coreNum;
   1.143     thisCoresIdx = 0;
   1.144  
   1.145 -   //Save the return address in the SwitchVP function
   1.146 +   //Save the return address in the SwitchSlv function
   1.147     saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt));
   1.148  
   1.149     
   1.150 @@ -195,19 +181,19 @@
   1.151        //_VMSWorkQ must be a global, static volatile var, so not kept in reg,
   1.152        // which forces reloading the pointer after each jmp to this point
   1.153     readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
   1.154 -   currPr = (SlaveVP *) readVMSQ( readyToAnimateQ );
   1.155 -   if( currPr == NULL )
   1.156 +   currSlv = (SlaveVP *) readVMSQ( readyToAnimateQ );
   1.157 +   if( currSlv == NULL )
   1.158      { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
   1.159         { printf("too many back to back MasterVP\n"); exit(1); }
   1.160        _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
   1.161        
   1.162 -      currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
   1.163 +      currSlv = _VMSMasterEnv->masterVPs[thisCoresIdx];
   1.164      }
   1.165     else
   1.166        _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
   1.167  
   1.168  
   1.169 -   switchToVP( currPr );
   1.170 +   switchToSlv( currSlv );
   1.171     flushRegisters();
   1.172     }
   1.173   }
     2.1 --- a/MasterLoop.c	Wed Feb 22 11:39:12 2012 -0800
     2.2 +++ b/MasterLoop.c	Sun Mar 04 14:26:35 2012 -0800
     2.3 @@ -10,13 +10,12 @@
     2.4  #include <stddef.h>
     2.5  
     2.6  #include "VMS.h"
     2.7 -#include "ProcrContext.h"
     2.8  
     2.9  
    2.10  //===========================================================================
    2.11  void inline
    2.12  stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
    2.13 -               SlaveVP *masterPr );
    2.14 +               SlaveVP *masterVP );
    2.15  
    2.16  //===========================================================================
    2.17  
    2.18 @@ -27,13 +26,13 @@
    2.19   *Polls each sched slot exactly once, hands any requests made by a newly
    2.20   * done slave to the "request handler" plug-in function
    2.21   *
    2.22 - *Any slots that need a virt procr assigned are given to the "schedule"
    2.23 - * plug-in function, which tries to assign a virt procr (slave) to it.
    2.24 + *Any slots that need a Slv assigned are given to the "schedule"
    2.25 + * plug-in function, which tries to assign a Slv (slave) to it.
    2.26   *
    2.27   *When all slots needing a processor have been given to the schedule plug-in,
    2.28 - * a fraction of the procrs successfully scheduled are put into the
    2.29 + * a fraction of the slaves successfully scheduled are put into the
    2.30   * work queue, then a continuation of this function is put in, then the rest
    2.31 - * of the virt procrs that were successfully scheduled.
    2.32 + * of the Slvs that were successfully scheduled.
    2.33   *
    2.34   *The first thing the continuation does is busy-wait until the previous
    2.35   * animation completes.  This is because an (unlikely) continuation may
    2.36 @@ -46,7 +45,7 @@
    2.37   * start running gets it and does all the stuff for a newly born --
    2.38   * from then on, will be doing continuation, but do suspension self
    2.39   * directly at end of master loop
    2.40 - *So VMS__init just births the master virtual processor same way it births
    2.41 + *So VMS_WL__init just births the master virtual processor same way it births
    2.42   * all the others -- then does any extra setup needed and puts it into the
    2.43   * work queue.
    2.44   *However means have to make masterEnv a global static volatile the same way
    2.45 @@ -65,36 +64,36 @@
    2.46   *At this point, the masterLoop does not write itself into the queue anymore,
    2.47   * instead, the coreLoop acquires the masterLock when it has nothing to
    2.48   * animate, and then animates its own masterLoop.  However, still try to put
    2.49 - * several AppVPs into the queue to amortize the startup cost of switching
    2.50 + * several AppSlvs into the queue to amortize the startup cost of switching
    2.51   * to the MasterVP.  Note, don't have to worry about latency of requests much
    2.52   * because most requests generate work for same core -- only latency issue
    2.53   * is case when other cores starved and one core's requests generate work
    2.54   * for them -- so keep max in queue to 3 or 4..
    2.55   */
    2.56 -void masterLoop( void *initData, SlaveVP *animatingPr )
    2.57 +void masterLoop( void *initData, SlaveVP *animatingSlv )
    2.58   { 
    2.59     int32           slotIdx, numSlotsFilled;
    2.60 -   SlaveVP      *schedVirtPr;
    2.61 +   SlaveVP        *schedSlaveVP;
    2.62     SchedSlot      *currSlot, **schedSlots;
    2.63     MasterEnv      *masterEnv;
    2.64     VMSQueueStruc  *readyToAnimateQ;
    2.65     
    2.66 -   Sched_Assigner  slaveScheduler;
    2.67 +   Sched_Assigner  slaveAssigner;
    2.68     RequestHandler  requestHandler;
    2.69     void           *semanticEnv;
    2.70  
    2.71     int32           thisCoresIdx;
    2.72 -   SlaveVP      *masterPr;
    2.73 -   volatile        SlaveVP *volatileMasterPr;
    2.74 +   SlaveVP      *masterVP;
    2.75 +   volatile        SlaveVP *volatileMasterVP;
    2.76     
    2.77 -   volatileMasterPr = animatingPr;
    2.78 -   masterPr         = (SlaveVP*)volatileMasterPr; //used to force re-define after jmp
    2.79 +   volatileMasterVP = animatingSlv;
    2.80 +   masterVP         = (SlaveVP*)volatileMasterVP; //used to force re-define after jmp
    2.81  
    2.82        //First animation of each MasterVP will in turn animate this part
    2.83 -      // of setup code.. (VP creator sets up the stack as if this function
    2.84 +      // of setup code.. (Slv creator sets up the stack as if this function
    2.85        // was called normally, but actually get here by jmp)
    2.86        //So, setup values about stack ptr, jmp pt and all that
    2.87 -   //masterPr->resumeInstrPtr = &&masterLoopStartPt;
    2.88 +   //masterVP->resumeInstrPtr = &&masterLoopStartPt;
    2.89  
    2.90  
    2.91        //Note, got rid of writing the stack and frame ptr up here, because
    2.92 @@ -108,25 +107,18 @@
    2.93     //masterLoopStartPt:
    2.94     while(1){
    2.95         
    2.96 -   //============================= MEASUREMENT STUFF ========================
    2.97 -   #ifdef MEAS__TIME_MASTER
    2.98 -      //Total Master time includes one coreloop time -- just assume the core
    2.99 -      // loop time is same for Master as for AppVPs, even though it may be
   2.100 -      // smaller due to higher predictability of the fixed jmp.
   2.101 -   saveLowTimeStampCountInto( masterPr->startMasterTSCLow );
   2.102 -   #endif
   2.103 -   //========================================================================
   2.104 +      MEAS__Capture_Pre_Master_Point
   2.105  
   2.106     masterEnv        = (MasterEnv*)_VMSMasterEnv;
   2.107     
   2.108        //GCC may optimize so doesn't always re-define from frame-storage
   2.109 -   masterPr         = (SlaveVP*)volatileMasterPr;  //just to make sure after jmp
   2.110 -   thisCoresIdx     = masterPr->coreAnimatedBy;
   2.111 +   masterVP         = (SlaveVP*)volatileMasterVP;  //just to make sure after jmp
   2.112 +   thisCoresIdx     = masterVP->coreAnimatedBy;
   2.113     readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
   2.114     schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
   2.115  
   2.116     requestHandler   = masterEnv->requestHandler;
   2.117 -   slaveScheduler   = masterEnv->slaveSchedAssigner;
   2.118 +   slaveAssigner   = masterEnv->slaveAssigner;
   2.119     semanticEnv      = masterEnv->semanticEnv;
   2.120  
   2.121  
   2.122 @@ -139,18 +131,18 @@
   2.123        if( currSlot->workIsDone )
   2.124         {
   2.125           currSlot->workIsDone         = FALSE;
   2.126 -         currSlot->needsProcrAssigned = TRUE;
   2.127 +         currSlot->needsSlaveAssigned = TRUE;
   2.128  
   2.129              //process requests from slave to master
   2.130                 //====================== MEASUREMENT STUFF ===================
   2.131 -               #ifdef MEAS__TIME_PLUGIN
   2.132 +               #ifdef MEAS__TURN_ON_PLUGIN_MEAS
   2.133                 int32 startStamp1, endStamp1;
   2.134                 saveLowTimeStampCountInto( startStamp1 );
   2.135                 #endif
   2.136                 //============================================================
   2.137 -         (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv );
   2.138 +         (*requestHandler)( currSlot->slaveAssignedToSlot, semanticEnv );
   2.139                 //====================== MEASUREMENT STUFF ===================
   2.140 -               #ifdef MEAS__TIME_PLUGIN
   2.141 +               #ifdef MEAS__TURN_ON_PLUGIN_MEAS
   2.142                 saveLowTimeStampCountInto( endStamp1 );
   2.143                 addIntervalToHist( startStamp1, endStamp1,
   2.144                                          _VMSMasterEnv->reqHdlrLowTimeHist );
   2.145 @@ -159,18 +151,18 @@
   2.146                 #endif
   2.147                 //============================================================
   2.148         }
   2.149 -      if( currSlot->needsProcrAssigned )
   2.150 -       {    //give slot a new virt procr
   2.151 -         schedVirtPr =
   2.152 -          (*slaveScheduler)( semanticEnv, thisCoresIdx );
   2.153 +      if( currSlot->needsSlaveAssigned )
   2.154 +       {    //give slot a new Slv
   2.155 +         schedSlaveVP =
   2.156 +          (*slaveAssigner)( semanticEnv, thisCoresIdx );
   2.157           
   2.158 -         if( schedVirtPr != NULL )
   2.159 -          { currSlot->procrAssignedToSlot = schedVirtPr;
   2.160 -            schedVirtPr->schedSlot        = currSlot;
   2.161 -            currSlot->needsProcrAssigned  = FALSE;
   2.162 +         if( schedSlaveVP != NULL )
   2.163 +          { currSlot->slaveAssignedToSlot = schedSlaveVP;
   2.164 +            schedSlaveVP->schedSlot        = currSlot;
   2.165 +            currSlot->needsSlaveAssigned  = FALSE;
   2.166              numSlotsFilled               += 1;
   2.167              
   2.168 -            writeVMSQ( schedVirtPr, readyToAnimateQ );
   2.169 +            writeVMSQ( schedSlaveVP, readyToAnimateQ );
   2.170            }
   2.171         }
   2.172      }
   2.173 @@ -179,16 +171,13 @@
   2.174     #ifdef USE_WORK_STEALING
   2.175        //If no slots filled, means no more work, look for work to steal.
   2.176     if( numSlotsFilled == 0 )
   2.177 -    { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr );
   2.178 +    { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterVP );
   2.179      }
   2.180     #endif
   2.181  
   2.182 +         MEAS__Capture_Post_Master_Point;
   2.183     
   2.184 -   #ifdef MEAS__TIME_MASTER
   2.185 -   saveLowTimeStampCountInto( masterPr->endMasterTSCLow );
   2.186 -   #endif
   2.187 -
   2.188 -   masterSwitchToCoreLoop(animatingPr);
   2.189 +   masterSwitchToCoreLoop(animatingSlv);
   2.190     flushRegisters();
   2.191     }//MasterLoop
   2.192  
   2.193 @@ -202,14 +191,14 @@
   2.194   */
   2.195  void inline
   2.196  stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
   2.197 -               SlaveVP *masterPr )
   2.198 +               SlaveVP *masterVP )
   2.199   { 
   2.200 -   SlaveVP   *stolenPr;
   2.201 +   SlaveVP   *stolenSlv;
   2.202     int32        coreIdx, i;
   2.203     VMSQueueStruc *currQ;
   2.204  
   2.205 -   stolenPr = NULL;
   2.206 -   coreIdx = masterPr->coreAnimatedBy;
   2.207 +   stolenSlv = NULL;
   2.208 +   coreIdx = masterVP->coreAnimatedBy;
   2.209     for( i = 0; i < NUM_CORES -1; i++ )
   2.210      {
   2.211        if( coreIdx >= NUM_CORES -1 )
   2.212 @@ -220,17 +209,17 @@
   2.213         }
   2.214        currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
   2.215        if( numInVMSQ( currQ ) > 0 )
   2.216 -       { stolenPr = readVMSQ (currQ );
   2.217 +       { stolenSlv = readVMSQ (currQ );
   2.218           break;
   2.219         }
   2.220      }
   2.221  
   2.222 -   if( stolenPr != NULL )
   2.223 -    { currSlot->procrAssignedToSlot = stolenPr;
   2.224 -      stolenPr->schedSlot           = currSlot;
   2.225 -      currSlot->needsProcrAssigned  = FALSE;
   2.226 +   if( stolenSlv != NULL )
   2.227 +    { currSlot->slaveAssignedToSlot = stolenSlv;
   2.228 +      stolenSlv->schedSlot           = currSlot;
   2.229 +      currSlot->needsSlaveAssigned  = FALSE;
   2.230  
   2.231 -      writeVMSQ( stolenPr, readyToAnimateQ );
   2.232 +      writeVMSQ( stolenSlv, readyToAnimateQ );
   2.233      }
   2.234   }
   2.235  
   2.236 @@ -306,9 +295,9 @@
   2.237  void inline
   2.238  gateProtected_stealWorkInto( SchedSlot *currSlot,
   2.239                               VMSQueueStruc *myReadyToAnimateQ,
   2.240 -                             SlaveVP *masterPr )
   2.241 +                             SlaveVP *masterVP )
   2.242   {
   2.243 -   SlaveVP     *stolenPr;
   2.244 +   SlaveVP     *stolenSlv;
   2.245     int32          coreIdx, i, haveAVictim, gotLock;
   2.246     VMSQueueStruc *victimsQ;
   2.247  
   2.248 @@ -319,7 +308,7 @@
   2.249  
   2.250        //see if any other cores have work available to steal
   2.251     haveAVictim = FALSE;
   2.252 -   coreIdx = masterPr->coreAnimatedBy;
   2.253 +   coreIdx = masterVP->coreAnimatedBy;
   2.254     for( i = 0; i < NUM_CORES -1; i++ )
   2.255      {
   2.256        if( coreIdx >= NUM_CORES -1 )
   2.257 @@ -354,18 +343,18 @@
   2.258           coreMightBeInProtected = FALSE;
   2.259      }
   2.260  
   2.261 -   stolenPr = readVMSQ ( victimsQ );
   2.262 +   stolenSlv = readVMSQ ( victimsQ );
   2.263  
   2.264     vicGate->gateClosed = FALSE;
   2.265     //======= End Gate-protection  =======
   2.266  
   2.267  
   2.268 -   if( stolenPr != NULL )  //victim could have been in protected and taken
   2.269 -    { currSlot->procrAssignedToSlot = stolenPr;
   2.270 -      stolenPr->schedSlot           = currSlot;
   2.271 -      currSlot->needsProcrAssigned  = FALSE;
   2.272 +   if( stolenSlv != NULL )  //victim could have been in protected and taken
   2.273 +    { currSlot->slaveAssignedToSlot = stolenSlv;
   2.274 +      stolenSlv->schedSlot           = currSlot;
   2.275 +      currSlot->needsSlaveAssigned  = FALSE;
   2.276  
   2.277 -      writeVMSQ( stolenPr, myReadyToAnimateQ );
   2.278 +      writeVMSQ( stolenSlv, myReadyToAnimateQ );
   2.279      }
   2.280  
   2.281        //unlock the work stealing lock
     3.1 --- a/VMS.h	Wed Feb 22 11:39:12 2012 -0800
     3.2 +++ b/VMS.h	Sun Mar 04 14:26:35 2012 -0800
     3.3 @@ -20,6 +20,10 @@
     3.4  #include <pthread.h>
     3.5  #include <sys/time.h>
     3.6  
     3.7 +#ifndef _LANG_NAME_
     3.8 +#define _LANG_NAME_ ""
     3.9 +#endif
    3.10 +
    3.11  //=================  Defines: included from separate files  =================
    3.12  //
    3.13  // Note: ALL defines are in other files, none are in here
    3.14 @@ -44,11 +48,15 @@
    3.15  typedef struct _GateStruc     GateStruc;
    3.16  
    3.17  
    3.18 -typedef SlaveVP * (*Sched_Assigner)  ( void *, int );   //semEnv, coreIdx
    3.19 -typedef void  (*RequestHandler)  ( SlaveVP *, void * ); //prWReqst, semEnv
    3.20 -typedef void  (*TopLevelFnPtr)  ( void *, SlaveVP * ); //initData, animPr
    3.21 -typedef void    TopLevelFn      ( void *, SlaveVP * ); //initData, animPr
    3.22 -typedef void  (*ResumeVPFnPtr)   ( SlaveVP *, void * );
    3.23 +typedef SlaveVP * (*Sched_Assigner) ( void *, int       ); //semEnv, coreIdx
    3.24 +typedef void      (*RequestHandler) ( SlaveVP *, void * ); //prWReqst, semEnv
    3.25 +typedef void      (*TopLevelFnPtr)  ( void *, SlaveVP * ); //initData, animSlv
    3.26 +typedef void        TopLevelFn      ( void *, SlaveVP * ); //initData, animSlv
    3.27 +typedef void      (*ResumeSlvFnPtr) ( SlaveVP *, void * );
    3.28 +
    3.29 +//============================ HW Dependent Fns ================================
    3.30 +
    3.31 +#include "VMS__HW_dependent.h"
    3.32  
    3.33  //============================= Statistics ==================================
    3.34  
    3.35 @@ -83,7 +91,7 @@
    3.36  
    3.37  typedef struct
    3.38   { enum VMSSemReqstType reqType;
    3.39 -   SlaveVP           *requestingPr;
    3.40 +   SlaveVP           *requestingSlv;
    3.41     char                *nameStr;  //for create probe
    3.42   }
    3.43   VMSSemReq;
    3.44 @@ -94,12 +102,12 @@
    3.45  struct _SchedSlot
    3.46   {
    3.47     int         workIsDone;
    3.48 -   int         needsProcrAssigned;
    3.49 -   SlaveVP  *procrAssignedToSlot;
    3.50 +   int         needsSlaveAssigned;
    3.51 +   SlaveVP  *slaveAssignedToSlot;
    3.52   };
    3.53  //SchedSlot
    3.54  
    3.55 -/*WARNING: re-arranging this data structure could cause VP switching
    3.56 +/*WARNING: re-arranging this data structure could cause Slv switching
    3.57   *         assembly code to fail -- hard-codes offsets of fields
    3.58   */
    3.59  struct _SlaveVP
    3.60 @@ -117,23 +125,11 @@
    3.61     SchedSlot  *schedSlot;
    3.62     VMSReqst   *requests;
    3.63  
    3.64 -   void       *semanticData; //this livesUSE_GNU here for the life of VP
    3.65 -   void       *dataRetFromReq;//values returned from plugin to VP go here
    3.66 +   void       *semanticData; //this livesUSE_GNU here for the life of Slv
    3.67 +   void       *dataRetFromReq;//values returned from plugin to Slv go here
    3.68  
    3.69        //=========== MEASUREMENT STUFF ==========
    3.70 -       #ifdef MEAS__TIME_STAMP_SUSP
    3.71 -       uint32  preSuspTSCLow;
    3.72 -       uint32  postSuspTSCLow;
    3.73 -       #endif
    3.74 -       #ifdef MEAS__TIME_MASTER /* in SlaveVP because multiple masterVPs*/
    3.75 -       uint32  startMasterTSCLow;USE_GNU
    3.76 -       uint32  endMasterTSCLow;
    3.77 -       #endif
    3.78 -       #ifdef MEAS__TIME_2011_SYS
    3.79 -       TSCountLowHigh  startSusp;
    3.80 -       uint64  totalSuspCycles;
    3.81 -       uint32  numGoodSusp;
    3.82 -       #endif
    3.83 +       MEAS__Insert_Meas_Fields_into_Slave;
    3.84        //========================================
    3.85     
    3.86     float64      createPtInSecs;  //have space but don't use on some configs
    3.87 @@ -141,18 +137,13 @@
    3.88  //SlaveVP
    3.89  
    3.90  
    3.91 -/*WARNING: re-arranging this data structure could cause VP-switching
    3.92 +/*WARNING: re-arranging this data structure could cause Slv-switching
    3.93   *         assembly code to fail -- hard-codes offsets of fields
    3.94   *         (because -O3 messes with things otherwise)
    3.95   */
    3.96  typedef struct
    3.97   {
    3.98 -   union{ //adds padding to put masterLock on its own cache-line to elim
    3.99 -          // false sharing (masterLock is most-accessed var in VMS)
   3.100 -        volatile int32   masterLock;
   3.101 -        char             padding[CACHE_LINE_SZ];    
   3.102 -   } masterLockUnion;
   3.103 -   Sched_Assigner   slaveSchedAssigner;
   3.104 +   Sched_Assigner   slaveAssigner;
   3.105     RequestHandler   requestHandler;
   3.106     
   3.107     SchedSlot     ***allSchedSlots;
   3.108 @@ -161,17 +152,19 @@
   3.109  
   3.110     void            *semanticEnv;
   3.111     void            *OSEventStruc;   //for future, when add I/O to BLIS
   3.112 -   MallocArrays    *freeLists;
   3.113 +   MallocArrays   *freeLists;
   3.114     int32            amtOfOutstandingMem; //total currently allocated
   3.115  
   3.116     void            *coreLoopReturnPt;//addr to jump to to re-enter coreLoop
   3.117  
   3.118     int32            setupComplete;
   3.119 -   //int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
   3.120 +   int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
   3.121 +   int32            masterLock __align_to_cacheline__;
   3.122     GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
   3.123     int32            workStealingLock;
   3.124     
   3.125 -   int32            numVPsCreated; //gives ordering to processor creation
   3.126 +   int32            numSlavesCreated; //gives ordering to processor creation
   3.127 +   int32            numSlavesAlive;   //used to detect when to shutdown
   3.128  
   3.129        //=========== MEASUREMENT STUFF =============
   3.130         IntervalProbe   **intervalProbes;
   3.131 @@ -181,28 +174,12 @@
   3.132         float64           createPtInSecs;
   3.133         Histogram       **measHists;
   3.134         PrivDynArrayInfo *measHistsInfo;
   3.135 -       #ifdef MEAS__TIME_PLUGIN
   3.136 -       Histogram       *reqHdlrLowTimeHist;
   3.137 -       Histogram       *reqHdlrHighTimeHist;
   3.138 -       #endif
   3.139 -       #ifdef MEAS__TIME_MALLOC
   3.140 -       Histogram       *mallocTimeHist;
   3.141 -       Histogram       *freeTimeHist;
   3.142 -       #endif
   3.143 -       #ifdef MEAS__TIME_MASTER_LOCK
   3.144 -       Histogram       *masterLockLowTimeHist;
   3.145 -       Histogram       *masterLockHighTimeHist;
   3.146 -       #endif
   3.147 -       #ifdef MEAS__TIME_2011_SYS
   3.148 -       TSCountLowHigh   startMaster;
   3.149 -       uint64           totalMasterCycles;
   3.150 -       uint32           numMasterAnimations;
   3.151 -       TSCountLowHigh   startReqHdlr;
   3.152 -       uint64           totalPluginCycles;
   3.153 -       uint32           numPluginAnimations;
   3.154 -       uint64           cyclesTillStartMasterLoop;
   3.155 -       TSCountLowHigh   endMasterLoop;
   3.156 -       #endif
   3.157 +       MEAS__Insert_Susp_Meas_Fields_into_MasterEnv;
   3.158 +       MEAS__Insert_Master_Meas_Fields_into_MasterEnv;
   3.159 +       MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv;
   3.160 +       MEAS__Insert_Malloc_Meas_Fields_into_MasterEnv;
   3.161 +       MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv;
   3.162 +       MEAS__Insert_System_Meas_Fields_into_MasterEnv;
   3.163        //==========================================
   3.164   }
   3.165  MasterEnv;
   3.166 @@ -237,28 +214,32 @@
   3.167   }
   3.168  ThdParams;
   3.169  
   3.170 +//=============================  Global Vars ================================
   3.171 +
   3.172  pthread_t       coreLoopThdHandles[ NUM_CORES ];  //pthread's virt-procr state
   3.173  ThdParams      *coreLoopThdParams [ NUM_CORES ];
   3.174  pthread_mutex_t suspendLock;
   3.175  pthread_cond_t  suspend_cond;
   3.176  
   3.177 -
   3.178 -
   3.179 -//=============================  Global Vars ================================
   3.180 -
   3.181  volatile MasterEnv      *_VMSMasterEnv __align_to_cacheline__;
   3.182  
   3.183  
   3.184 -
   3.185 -
   3.186  //=========================  Function Prototypes  ===========================
   3.187  
   3.188 +/* MEANING OF   WL  PI  SS  int
   3.189 + * These indicate which places the function is safe to use.  They stand for:
   3.190 + * WL: Wrapper Library
   3.191 + * PI: Plugin 
   3.192 + * SS: Startup and Shutdown
   3.193 + * int: internal to the VMS implementation
   3.194 + */
   3.195  
   3.196  //========== Setup and shutdown ==========
   3.197  void
   3.198 -VMS_int__init();
   3.199 +VMS_SS__init();
   3.200  
   3.201 -Fix seed-procr creation -- put box around language, have lang register stuff
   3.202 +//Fix; 
   3.203 +/*seed-procr creation -- put box around language, have lang register stuff
   3.204          with VMS.
   3.205          have main program explicitly INIT Lang! -- makes more sense to
   3.206          C programmers -- makes it clear that there's a transition.
   3.207 @@ -289,77 +270,83 @@
   3.208          lang's sync constructs -- VMS uses message system to establish tie-pt,
   3.209          each lang defines what a tie-point means to it..  (work with the
   3.210          diff semantics?)
   3.211 +*/
   3.212  void
   3.213 -VMS_WL__start_the_work_then_wait_until_done();
   3.214 +VMS_SS__start_the_work_then_wait_until_done();
   3.215  
   3.216  void
   3.217 -VMS_int__shutdown();
   3.218 +VMS_SS__shutdown();
   3.219  
   3.220  void
   3.221 -VMS_int__cleanup_at_end_of_shutdown();
   3.222 +VMS_SS__cleanup_at_end_of_shutdown();
   3.223  
   3.224  
   3.225  //==============    ===============
   3.226  
   3.227  inline SlaveVP *
   3.228 -VMS_int__create_procr( TopLevelFnPtr fnPtr, void *dataParam );
   3.229 +VMS_int__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam );
   3.230 +#define VMS_PI__create_slaveVP VMS_int__create_slaveVP
   3.231 +#define VMS_WL__create_slaveVP VMS_int__create_slaveVP
   3.232  
   3.233  inline void
   3.234 -VMS_int__point_slave_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr,
   3.235 +VMS_int__point_slaveVP_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr,
   3.236                              void    *dataParam);
   3.237 +#define VMS_PI__point_slaveVP_to_Fn  VMS_int__point_slaveVP_to_Fn
   3.238 +#define VMS_WL__point_slaveVP_to_Fn  VMS_int__point_slaveVP_to_Fn
   3.239  
   3.240  void
   3.241 -VMS_int__save_return_addr_into_ptd_to_loc(void *ptrToReturnAddrHoldingLoc);
   3.242 -
   3.243 -void
   3.244 -VMS_int__write_return_addr_from_ptd_to_loc(void *ptrToReturnAddrHoldingLoc);
   3.245 -
   3.246 -void
   3.247 -VMS_int__dissipate_procr( SlaveVP *procrToDissipate );
   3.248 +VMS_int__dissipate_SlaveVP( SlaveVP *slaveToDissipate );
   3.249 +#define VMS_PI__dissipate_SlaveVP VMS_int__dissipateSlaveVP
   3.250 +//From WL, dissipate a SlaveVP by sending a request
   3.251  
   3.252     //Use this to create processor inside entry point & other places outside
   3.253     // the VMS system boundary (IE, not run in slave nor Master)
   3.254  SlaveVP *
   3.255 -VMS_ext__create_procr( TopLevelFnPtr fnPtr, void *dataParam );
   3.256 +VMS_ext__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam );
   3.257  
   3.258  void
   3.259 -VMS_ext__dissipate_procr( SlaveVP *procrToDissipate );
   3.260 +VMS_ext__dissipate_slaveVP( SlaveVP *slaveToDissipate );
   3.261  
   3.262  void
   3.263 -VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstPr, VMSExcp *excpData );
   3.264 +VMS_int__throw_exception( char *msgStr, SlaveVP *reqstSlv, VMSExcp *excpData );
   3.265 +#define VMS_PI__throw_exception VMS_int__throw_exception
   3.266 +#define VMS_WL__throw_exception VMS_int__throw_exception
   3.267  
   3.268  void *
   3.269 -VMS_WL__give_sem_env_for( SlaveVP *animPr );
   3.270 +VMS_int__give_sem_env_for( SlaveVP *animSlv );
   3.271 +#define VMS_PI__give_sem_env_for  VMS_int__give_sem_env_for
   3.272 +#define VMS_SS__give_sem_env_for  VMS_int__give_sem_env_for
   3.273 +//No WL version -- not safe!  if use in WL, be sure data rd & wr is stable
   3.274  
   3.275  //==============  Request Related  ===============
   3.276  
   3.277  void
   3.278 -VMS_int__suspend_procr( SlaveVP *callingPr );
   3.279 +VMS_int__suspend_slaveVP_and_send_req( SlaveVP *callingSlv );
   3.280  
   3.281  inline void
   3.282 -VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, SlaveVP *callingPr );
   3.283 +VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, SlaveVP *callingSlv );
   3.284  
   3.285  inline void
   3.286 -VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingPr );
   3.287 +VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingSlv );
   3.288  
   3.289  void
   3.290 -VMS_WL__send_create_procr_req( void *semReqData, SlaveVP *reqstingPr );
   3.291 +VMS_WL__send_create_slaveVP_req( void *semReqData, SlaveVP *reqstingSlv );
   3.292  
   3.293  void inline
   3.294  VMS_WL__send_dissipate_req( SlaveVP *prToDissipate );
   3.295  
   3.296  inline void
   3.297 -VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingPr );
   3.298 +VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingSlv );
   3.299  
   3.300  VMSReqst *
   3.301 -VMS_PI__take_next_request_out_of( SlaveVP *procrWithReq );
   3.302 +VMS_PI__take_next_request_out_of( SlaveVP *slaveWithReq );
   3.303  
   3.304  inline void *
   3.305  VMS_PI__take_sem_reqst_from( VMSReqst *req );
   3.306  
   3.307  void inline
   3.308 -VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingPr, void *semEnv,
   3.309 -                       ResumeVPFnPtr resumePrFnPtr );
   3.310 +VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingSlv, void *semEnv,
   3.311 +                       ResumeSlvFnPtr resumeSlvFnPtr );
   3.312  
   3.313  //======================== MEASUREMENT ======================
   3.314  uint64
   3.315 @@ -368,8 +355,6 @@
   3.316  VMS_WL__give_num_plugin_animations();
   3.317  
   3.318  
   3.319 -
   3.320 -#include "VMS__HW_dependent.h"
   3.321  #include "probes.h"
   3.322  #include "vutilities.h"
   3.323  
     4.1 --- a/VMS__HW_dependent.c	Wed Feb 22 11:39:12 2012 -0800
     4.2 +++ b/VMS__HW_dependent.c	Sun Mar 04 14:26:35 2012 -0800
     4.3 @@ -12,7 +12,8 @@
     4.4   *No need to save registers on old stack frame, because there's no old
     4.5   * animator state to return to
     4.6   */
     4.7 -VMS_int__point_slave_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr,
     4.8 +inline void
     4.9 +VMS_int__point_slaveVP_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr,
    4.10                              void    *dataParam)
    4.11   { void  *stackPtr;
    4.12  
     5.1 --- a/VMS__HW_dependent.h	Wed Feb 22 11:39:12 2012 -0800
     5.2 +++ b/VMS__HW_dependent.h	Sun Mar 04 14:26:35 2012 -0800
     5.3 @@ -6,28 +6,75 @@
     5.4   * 
     5.5   */
     5.6  
     5.7 -#ifndef _ProcrContext_H
     5.8 -#define	_ProcrContext_H
     5.9 +#ifndef _VMS__HW_DEPENDENT_H
    5.10 +#define	_VMS__HW_DEPENDENT_H
    5.11  #define _GNU_SOURCE
    5.12  
    5.13 -void saveCoreLoopReturnAddr(void **returnAddress);
    5.14 +void 
    5.15 +saveCoreLoopReturnAddr(void **returnAddress);
    5.16  
    5.17 -void switchToVP(SlaveVP *nextProcr);
    5.18 +void 
    5.19 +switchToSlv(SlaveVP *nextSlave);
    5.20  
    5.21 -void switchToCoreLoop(SlaveVP *nextProcr);
    5.22 +void 
    5.23 +switchToCoreLoop(SlaveVP *nextSlave);
    5.24  
    5.25 -void masterSwitchToCoreLoop(SlaveVP *nextProcr);
    5.26 +void 
    5.27 +masterSwitchToCoreLoop(SlaveVP *nextSlave);
    5.28  
    5.29 -void startUpTopLevelFn();
    5.30 +void 
    5.31 +startUpTopLevelFn();
    5.32  
    5.33 -void *asmTerminateCoreLoop(SlaveVP *currPr);
    5.34 +void *
    5.35 +asmTerminateCoreLoop(SlaveVP *currSlv);
    5.36  
    5.37  #define flushRegisters() \
    5.38          asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15")
    5.39  
    5.40  inline SlaveVP *
    5.41 -create_procr_helper( SlaveVP *newPr,       TopLevelFnPtr  fnPtr,
    5.42 +create_slaveVP_helper( SlaveVP *newSlv,       TopLevelFnPtr  fnPtr,
    5.43                       void      *dataParam, void           *stackLocs );
    5.44  
    5.45 -#endif	/* _ProcrContext_H */
    5.46 +void
    5.47 +VMS_int__save_return_into_ptd_to_loc_then_do_ret(void *ptdToLoc);
    5.48  
    5.49 +void
    5.50 +VMS_int__return_to_addr_in_ptd_to_loc(void *ptdToLoc);
    5.51 +
    5.52 +//===================  Macros to Capture Measurements  ======================
    5.53 +//
    5.54 +//===== RDTSC wrapper ===== 
    5.55 +//Also runs with x86_64 code
    5.56 +#define saveTSCLowHigh(lowHighIn) \
    5.57 +   asm volatile("RDTSC;                   \
    5.58 +                 movl %%eax, %0;          \
    5.59 +                 movl %%edx, %1;"         \
    5.60 +   /* outputs */ : "=m" (lowHighIn.lowHigh[0]), "=m" (lowHighIn.lowHigh[1])\
    5.61 +   /* inputs  */ :                        \
    5.62 +   /* clobber */ : "%eax", "%edx"         \
    5.63 +                );
    5.64 +
    5.65 +#define saveTimeStampCountInto(low, high) \
    5.66 +   asm volatile("RDTSC;                   \
    5.67 +                 movl %%eax, %0;          \
    5.68 +                 movl %%edx, %1;"         \
    5.69 +   /* outputs */ : "=m" (low), "=m" (high)\
    5.70 +   /* inputs  */ :                        \
    5.71 +   /* clobber */ : "%eax", "%edx"         \
    5.72 +                );
    5.73 +
    5.74 +#define saveLowTimeStampCountInto(low)    \
    5.75 +   asm volatile("RDTSC;                   \
    5.76 +                 movl %%eax, %0;"         \
    5.77 +   /* outputs */ : "=m" (low)             \
    5.78 +   /* inputs  */ :                        \
    5.79 +   /* clobber */ : "%eax", "%edx"         \
    5.80 +                );
    5.81 +
    5.82 +   //For code that calculates normalization-offset between TSC counts of
    5.83 +   // different cores.
    5.84 +//#define NUM_TSC_ROUND_TRIPS 10
    5.85 +
    5.86 +
    5.87 +#endif	/* _VMS__HW_DEPENDENT_H */
    5.88 +
     6.1 --- a/VMS__HW_dependent.s	Wed Feb 22 11:39:12 2012 -0800
     6.2 +++ b/VMS__HW_dependent.s	Sun Mar 04 14:26:35 2012 -0800
     6.3 @@ -16,13 +16,13 @@
     6.4  // the top-level function, which was pointed to by the stack-ptr
     6.5  .globl startUpTopLevelFn
     6.6  startUpTopLevelFn:
     6.7 -    movq    %rdi      , %rsi #get second argument from first argument of switchVP
     6.8 +    movq    %rdi      , %rsi #get second argument from first argument of switchSlv
     6.9      movq    0x08(%rsp), %rdi #get first argument from stack
    6.10      movq    (%rsp)    , %rax #get top-level function's addr from stack
    6.11      jmp     *%rax            #jump to the top-level function
    6.12  
    6.13 -//Switches form CoreLoop to VP ether a normal VP or the Master Loop
    6.14 -//switch to virt procr's stack and frame ptr then jump to virt procr fn
    6.15 +//Switches form CoreLoop to Slv ether a normal Slv or the Master Loop
    6.16 +//switch to Slv's stack and frame ptr then jump to Slv fn
    6.17  /* SlaveVP  offsets:
    6.18   * 0x10  stackPtr
    6.19   * 0x18 framePtr
    6.20 @@ -34,15 +34,15 @@
    6.21   * 0x48 coreLoopReturnPt
    6.22   * 0x54 masterLock
    6.23   */
    6.24 -.globl switchToVP
    6.25 -switchToVP:
    6.26 +.globl switchToSlv
    6.27 +switchToSlv:
    6.28      #SlaveVP in %rdi
    6.29      movq    %rsp      , 0x38(%rdi)   #save core loop stack pointer 
    6.30      movq    %rbp      , 0x30(%rdi)   #save core loop frame pointer
    6.31      movq    0x10(%rdi), %rsp         #restore stack pointer
    6.32      movq    0x18(%rdi), %rbp         #restore frame pointer
    6.33      movq    0x20(%rdi), %rax         #get jmp pointer
    6.34 -    jmp     *%rax                    #jmp to VP
    6.35 +    jmp     *%rax                    #jmp to Slv
    6.36  coreLoopReturn:
    6.37      ret
    6.38  
    6.39 @@ -62,7 +62,7 @@
    6.40  .globl switchToCoreLoop
    6.41  switchToCoreLoop:
    6.42      #SlaveVP in %rdi
    6.43 -    movq    $VPReturn , 0x20(%rdi)   #store return address
    6.44 +    movq    $SlvReturn , 0x20(%rdi)   #store return address
    6.45      movq    %rsp      , 0x10(%rdi)   #save stack pointer 
    6.46      movq    %rbp      , 0x18(%rdi)   #save frame pointer
    6.47      movq    0x38(%rdi), %rsp         #restore stack pointer
    6.48 @@ -71,7 +71,7 @@
    6.49      movq    (%rcx)    , %rcx
    6.50      movq    0x48(%rcx), %rax         #get CoreLoopStartPt
    6.51      jmp     *%rax                    #jmp to CoreLoop
    6.52 -VPReturn:
    6.53 +SlvReturn:
    6.54      ret
    6.55  
    6.56  
    6.57 @@ -108,10 +108,10 @@
    6.58  
    6.59  //Switch to terminateCoreLoop
    6.60  //therefor switch to coreLoop context from master context
    6.61 -// no need to call because the stack is already set up for switchVP
    6.62 -// and virtPr is in %rdi
    6.63 +// no need to call because the stack is already set up for switchSlv
    6.64 +// and Slv is in %rdi
    6.65  // and both functions have the same argument.
    6.66 -// do not save register of VP because this function will never return
    6.67 +// do not save register of Slv because this function will never return
    6.68  /* SlaveVP  offsets:
    6.69   * 0x10  stackPtr
    6.70   * 0x18 framePtr
    6.71 @@ -134,7 +134,7 @@
    6.72  
    6.73  /*
    6.74   * This one for the sequential version is special. It discards the current stack
    6.75 - * and returns directly from the coreLoop after VMS__dissipate_procr was called
    6.76 + * and returns directly from the coreLoop after VMS_WL__dissipate_slaveVP was called
    6.77   */
    6.78  .globl asmTerminateCoreLoopSeq
    6.79  asmTerminateCoreLoopSeq:
    6.80 @@ -142,7 +142,7 @@
    6.81      movq    0x38(%rdi), %rsp         #restore stack pointer
    6.82      movq    0x30(%rdi), %rbp         #restore frame pointer
    6.83      #argument is in %rdi
    6.84 -    call    VMS__dissipate_procr
    6.85 +    call    VMS_int__dissipate_slaveVP
    6.86      movq    %rbp      , %rsp        #goto the coreLoops stack
    6.87      pop     %rbp        #restore the old framepointer
    6.88      ret                 #return from core loop
    6.89 @@ -150,18 +150,18 @@
    6.90  
    6.91  //Assembly code takes the return addr off the stack and saves
    6.92  // into the loc pointed to by rdi.  The return addr is at 0x8(%rbp) for 64bit
    6.93 -.globl asm_save_ret_to_singleton
    6.94 -VMS_int__save_return_addr_into_ptd_to_loc:
    6.95 +.globl VMS_int__save_return_into_ptd_to_loc_then_do_ret
    6.96 +VMS_int__save_return_into_ptd_to_loc_then_do_ret:
    6.97      movq 0x8(%rbp),     %rax  #get ret address, rbp is the same as in the calling function
    6.98 -    movq     %rax,     (%rdi) #write ret addr to endInstrAddr field
    6.99 +    movq     %rax,     (%rdi) #write ret addr into addr passed as param field
   6.100      ret
   6.101  
   6.102  
   6.103  //Assembly code changes the return addr on the stack to the one
   6.104 -// pointed to by the parameter. The stack's return addr is at 0x8(%rbp)
   6.105 -.globl asm_write_ret_from_singleton
   6.106 -VMS_int__write_return_addr_from_ptd_to_loc:
   6.107 -    movq    (%rdi),    %rax      #get return addr
   6.108 -    movq      %rax,    0x8(%rbp) #write return addr to the stack of the caller
   6.109 +// pointed to by the parameter, then returns. Stack's return addr is at 0x8(%rbp)
   6.110 +.globl VMS_int__return_to_addr_in_ptd_to_loc
   6.111 +VMS_int__return_to_addr_in_ptd_to_loc:
   6.112 +    movq    (%rdi),    %rax  #get return addr from addr passed as param
   6.113 +    movq     %rax, 0x8(%rbp) #write return addr to the stack of the caller
   6.114      ret
   6.115  
     7.1 --- a/VMS__PI.c	Wed Feb 22 11:39:12 2012 -0800
     7.2 +++ b/VMS__PI.c	Sun Mar 04 14:26:35 2012 -0800
     7.3 @@ -17,13 +17,13 @@
     7.4  /*
     7.5   */
     7.6  VMSReqst *
     7.7 -VMS_PI__take_next_request_out_of( SlaveVP *procrWithReq )
     7.8 +VMS_PI__take_next_request_out_of( SlaveVP *slaveWithReq )
     7.9   { VMSReqst *req;
    7.10  
    7.11 -   req = procrWithReq->requests;
    7.12 +   req = slaveWithReq->requests;
    7.13     if( req == NULL ) return NULL;
    7.14  
    7.15 -   procrWithReq->requests = procrWithReq->requests->nextReqst;
    7.16 +   slaveWithReq->requests = slaveWithReq->requests->nextReqst;
    7.17     return req;
    7.18   }
    7.19  
    7.20 @@ -51,8 +51,8 @@
    7.21   * Do the same for OS calls -- look later at it..
    7.22   */
    7.23  void inline
    7.24 -VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingPr, void *semEnv,
    7.25 -                       ResumeVPFnPtr resumePrFnPtr )
    7.26 +VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingSlv, void *semEnv,
    7.27 +                       ResumeSlvFnPtr resumeSlvFnPtr )
    7.28   { VMSSemReq     *semReq;
    7.29     IntervalProbe *newProbe;
    7.30  
    7.31 @@ -67,9 +67,9 @@
    7.32     newProbe->probeID =
    7.33               addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo );
    7.34  
    7.35 -   requestingPr->dataRetFromReq = newProbe;
    7.36 +   requestingSlv->dataRetFromReq = newProbe;
    7.37  
    7.38 -   (*resumePrFnPtr)( requestingPr, semEnv );
    7.39 +   (*resumeSlvFnPtr)( requestingSlv, semEnv );
    7.40   }
    7.41  
    7.42  
    7.43 @@ -77,7 +77,7 @@
    7.44   * the error message.
    7.45   */
    7.46  void
    7.47 -VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstPr, VMSExcp *excpData )
    7.48 +VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstSlv, VMSExcp *excpData )
    7.49   {
    7.50     printf("%s",msgStr);
    7.51     fflush(stdin);
     8.1 --- a/VMS__WL.c	Wed Feb 22 11:39:12 2012 -0800
     8.2 +++ b/VMS__WL.c	Sun Mar 04 14:26:35 2012 -0800
     8.3 @@ -14,38 +14,30 @@
     8.4  #include "VMS.h"
     8.5  
     8.6  
     8.7 -/*Anticipating multi-tasking
     8.8 - */
     8.9 -void *
    8.10 -VMS_WL__give_sem_env_for( SlaveVP *animPr )
    8.11 - {
    8.12 -   return _VMSMasterEnv->semanticEnv;
    8.13 - }
    8.14 -
    8.15  
    8.16  /*For this implementation of VMS, it may not make much sense to have the
    8.17   * system of requests for creating a new processor done this way.. but over
    8.18   * the scope of single-master, multi-master, mult-tasking, OS-implementing,
    8.19   * distributed-memory, and so on, this gives VMS implementation a chance to
    8.20 - * do stuff before suspend, in the AppVP, and in the Master before the plugin
    8.21 + * do stuff before suspend, in the SlaveVP, and in the Master before the plugin
    8.22   * is called, as well as in the lang-lib before this is called, and in the
    8.23   * plugin.  So, this gives both VMS and language implementations a chance to
    8.24   * intercept at various points and do order-dependent stuff.
    8.25   *Having a standard VMSNewPrReqData struc allows the language to create and
    8.26 - * free the struc, while VMS knows how to get the newPr if it wants it, and
    8.27 + * free the struc, while VMS knows how to get the newSlv if it wants it, and
    8.28   * it lets the lang have lang-specific data related to creation transported
    8.29   * to the plugin.
    8.30   */
    8.31  void
    8.32 -VMS_WL__send_create_procr_req( void *semReqData, SlaveVP *reqstingPr )
    8.33 +VMS_WL__send_create_slaveVP_req( void *semReqData, SlaveVP *reqstingSlv )
    8.34   { VMSReqst req;
    8.35  
    8.36     req.reqType          = createReq;
    8.37     req.semReqData       = semReqData;
    8.38 -   req.nextReqst        = reqstingPr->requests;
    8.39 -   reqstingPr->requests = &req;
    8.40 +   req.nextReqst        = reqstingSlv->requests;
    8.41 +   reqstingSlv->requests = &req;
    8.42  
    8.43 -   VMS_int__suspend_procr( reqstingPr );
    8.44 +   VMS_int__suspend_slaveVP_and_send_req( reqstingSlv );
    8.45   }
    8.46  
    8.47  
    8.48 @@ -61,24 +53,24 @@
    8.49   *This form is a bit misleading to understand if one is trying to figure out
    8.50   * how VMS works -- it looks like a normal function call, but inside it
    8.51   * sends a request to the request handler and suspends the processor, which
    8.52 - * jumps out of the VMS__dissipate_procr function, and out of all nestings
    8.53 + * jumps out of the VMS_WL__dissipate_slaveVP function, and out of all nestings
    8.54   * above it, transferring the work of dissipating to the request handler,
    8.55   * which then does the actual work -- causing the processor that animated
    8.56   * the call of this function to disappear and the "hanging" state of this
    8.57   * function to just poof into thin air -- the virtual processor's trace
    8.58   * never returns from this call, but instead the virtual processor's trace
    8.59   * gets suspended in this call and all the virt processor's state disap-
    8.60 - * pears -- making that suspend the last thing in the virt procr's trace.
    8.61 + * pears -- making that suspend the last thing in the Slv's trace.
    8.62   */
    8.63  void
    8.64 -VMS_WL__send_dissipate_req( SlaveVP *procrToDissipate )
    8.65 +VMS_WL__send_dissipate_req( SlaveVP *slaveToDissipate )
    8.66   { VMSReqst req;
    8.67  
    8.68     req.reqType                = dissipate;
    8.69 -   req.nextReqst              = procrToDissipate->requests;
    8.70 -   procrToDissipate->requests = &req;
    8.71 +   req.nextReqst              = slaveToDissipate->requests;
    8.72 +   slaveToDissipate->requests = &req;
    8.73  
    8.74 -   VMS_int__suspend_procr( procrToDissipate );
    8.75 +   VMS_int__suspend_slaveVP_and_send_req( slaveToDissipate );
    8.76   }
    8.77  
    8.78  
    8.79 @@ -95,14 +87,14 @@
    8.80   */
    8.81  inline void
    8.82  VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData,
    8.83 -                                          SlaveVP *callingPr )
    8.84 +                                          SlaveVP *callingSlv )
    8.85   { VMSReqst *req;
    8.86  
    8.87     req = VMS_int__malloc( sizeof(VMSReqst) );
    8.88     req->reqType         = semantic;
    8.89     req->semReqData      = semReqData;
    8.90 -   req->nextReqst       = callingPr->requests;
    8.91 -   callingPr->requests = req;
    8.92 +   req->nextReqst       = callingSlv->requests;
    8.93 +   callingSlv->requests = req;
    8.94   }
    8.95  
    8.96  /*This inserts the semantic-layer's request data into standard VMS carrier
    8.97 @@ -111,28 +103,28 @@
    8.98   *Then it does suspend, to cause request to be sent.
    8.99   */
   8.100  inline void
   8.101 -VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingPr )
   8.102 +VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingSlv )
   8.103   { VMSReqst req;
   8.104  
   8.105     req.reqType         = semantic;
   8.106     req.semReqData      = semReqData;
   8.107 -   req.nextReqst       = callingPr->requests;
   8.108 -   callingPr->requests = &req;
   8.109 +   req.nextReqst       = callingSlv->requests;
   8.110 +   callingSlv->requests = &req;
   8.111     
   8.112 -   VMS_int__suspend_procr( callingPr );
   8.113 +   VMS_int__suspend_slaveVP_and_send_req( callingSlv );
   8.114   }
   8.115  
   8.116  
   8.117  inline void
   8.118 -VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingPr )
   8.119 +VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingSlv )
   8.120   { VMSReqst req;
   8.121  
   8.122     req.reqType         = VMSSemantic;
   8.123     req.semReqData      = semReqData;
   8.124 -   req.nextReqst       = callingPr->requests; //gab any other preceeding 
   8.125 -   callingPr->requests = &req;
   8.126 +   req.nextReqst       = callingSlv->requests; //gab any other preceeding 
   8.127 +   callingSlv->requests = &req;
   8.128  
   8.129 -   VMS_int__suspend_procr( callingPr );
   8.130 +   VMS_int__suspend_slaveVP_and_send_req( callingSlv );
   8.131   }
   8.132  
   8.133  
     9.1 --- a/VMS__int.c	Wed Feb 22 11:39:12 2012 -0800
     9.2 +++ b/VMS__int.c	Sun Mar 04 14:26:35 2012 -0800
     9.3 @@ -15,18 +15,18 @@
     9.4  
     9.5  
     9.6  inline SlaveVP *
     9.7 -VMS_int__create_procr( TopLevelFnPtr fnPtr, void *dataParam )
     9.8 - { SlaveVP *newPr;
     9.9 +VMS_int__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam )
    9.10 + { SlaveVP *newSlv;
    9.11     void      *stackLocs;
    9.12  
    9.13 -   newPr      = VMS_int__malloc( sizeof(SlaveVP) );
    9.14 +   newSlv      = VMS_int__malloc( sizeof(SlaveVP) );
    9.15     stackLocs  = VMS_int__malloc( VIRT_PROCR_STACK_SIZE );
    9.16     if( stackLocs == 0 )
    9.17      { perror("VMS__malloc stack"); exit(1); }
    9.18  
    9.19 -   _VMSMasterEnv->numSlaves += 1;
    9.20 +   _VMSMasterEnv->numSlavesAlive += 1;
    9.21  
    9.22 -   return create_procr_helper( newPr, fnPtr, dataParam, stackLocs );
    9.23 +   return create_slaveVP_helper( newSlv, fnPtr, dataParam, stackLocs );
    9.24   }
    9.25  
    9.26  /* "ext" designates that it's for use outside the VMS system -- should only
    9.27 @@ -34,59 +34,49 @@
    9.28   * a VMS virtual processor.
    9.29   */
    9.30  inline SlaveVP *
    9.31 -VMS_ext__create_procr( TopLevelFnPtr fnPtr, void *dataParam )
    9.32 - { SlaveVP *newPr;
    9.33 +VMS_ext__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam )
    9.34 + { SlaveVP *newSlv;
    9.35     char      *stackLocs;
    9.36  
    9.37 -   newPr      = malloc( sizeof(SlaveVP) );
    9.38 +   newSlv      = malloc( sizeof(SlaveVP) );
    9.39     stackLocs  = malloc( VIRT_PROCR_STACK_SIZE );
    9.40     if( stackLocs == 0 )
    9.41      { perror("malloc stack"); exit(1); }
    9.42  
    9.43 -   return create_procr_helper( newPr, fnPtr, dataParam, stackLocs );
    9.44 +   _VMSMasterEnv->numSlavesAlive += 1;
    9.45 +
    9.46 +   return create_slaveVP_helper(newSlv, fnPtr, dataParam, stackLocs);
    9.47   }
    9.48  
    9.49  
    9.50  //===========================================================================
    9.51  /*there is a label inside this function -- save the addr of this label in
    9.52 - * the callingPr struc, as the pick-up point from which to start the next
    9.53 - * work-unit for that procr.  If turns out have to save registers, then
    9.54 - * save them in the procr struc too.  Then do assembly jump to the CoreLoop's
    9.55 - * "done with work-unit" label.  The procr struc is in the request in the
    9.56 + * the callingSlv struc, as the pick-up point from which to start the next
    9.57 + * work-unit for that slave.  If turns out have to save registers, then
    9.58 + * save them in the slave struc too.  Then do assembly jump to the CoreLoop's
    9.59 + * "done with work-unit" label.  The slave struc is in the request in the
    9.60   * slave that animated the just-ended work-unit, so all the state is saved
    9.61   * there, and will get passed along, inside the request handler, to the
    9.62 - * next work-unit for that procr.
    9.63 + * next work-unit for that slave.
    9.64   */
    9.65  void
    9.66 -VMS_int__suspend_procr( SlaveVP *animatingPr )
    9.67 +VMS_int__suspend_slaveVP_and_send_req( SlaveVP *animatingSlv )
    9.68   { 
    9.69  
    9.70 -      //The request to master will cause this suspended virt procr to get
    9.71 +      //The request to master will cause this suspended Slv to get
    9.72        // scheduled again at some future point -- to resume, core loop jumps
    9.73        // to the resume point (below), which causes restore of saved regs and
    9.74        // "return" from this call.
    9.75 -   //animatingPr->resumeInstrPtr = &&ResumePt;
    9.76 +   //animatingSlv->resumeInstrPtr = &&ResumePt;
    9.77  
    9.78 -      //return ownership of the virt procr and sched slot to Master virt pr
    9.79 -   animatingPr->schedSlot->workIsDone = TRUE;
    9.80 +      //return ownership of the Slv and sched slot to Master virt pr
    9.81 +   animatingSlv->schedSlot->workIsDone = TRUE;
    9.82  
    9.83 -   //===========================  Measurement stuff ========================
    9.84 -   #ifdef MEAS__TIME_STAMP_SUSP
    9.85 -      //record time stamp: compare to time-stamp recorded below
    9.86 -   saveLowTimeStampCountInto( animatingPr->preSuspTSCLow );
    9.87 -   #endif
    9.88 -   //=======================================================================
    9.89 -
    9.90 -   switchToCoreLoop(animatingPr);
    9.91 +         MEAS__Capture_Pre_Susp_Point;
    9.92 +   switchToCoreLoop(animatingSlv);
    9.93     flushRegisters();
    9.94 -
    9.95 -   //=======================================================================
    9.96 -
    9.97 -   #ifdef MEAS__TIME_STAMP_SUSP
    9.98 -      //NOTE: only take low part of count -- do sanity check when take diff
    9.99 -   saveLowTimeStampCountInto( animatingPr->postSuspTSCLow );
   9.100 -   #endif
   9.101 -
   9.102 +         MEAS__Capture_Post_Susp_Point;
   9.103 +		 
   9.104     return;
   9.105   }
   9.106  
   9.107 @@ -95,19 +85,19 @@
   9.108   * be called from main thread or other thread -- never from code animated by
   9.109   * a SlaveVP, nor from a masterVP.
   9.110   *
   9.111 - *Use this version to dissipate VPs created outside the VMS system.
   9.112 + *Use this version to dissipate Slvs created outside the VMS system.
   9.113   */
   9.114  void
   9.115 -VMS_ext__dissipate_procr( SlaveVP *procrToDissipate )
   9.116 +VMS_ext__dissipate_slaveVP( SlaveVP *slaveToDissipate )
   9.117   {
   9.118        //NOTE: dataParam was given to the processor, so should either have
   9.119 -      // been alloc'd with VMS__malloc, or freed by the level above animPr.
   9.120 +      // been alloc'd with VMS__malloc, or freed by the level above animSlv.
   9.121        //So, all that's left to free here is the stack and the SlaveVP struc
   9.122        // itself
   9.123        //Note, should not stack-allocate the data param -- no guarantee, in
   9.124        // general that creating processor will outlive ones it creates.
   9.125 -   free( procrToDissipate->startOfStack );
   9.126 -   free( procrToDissipate );
   9.127 +   free( slaveToDissipate->startOfStack );
   9.128 +   free( slaveToDissipate );
   9.129   }
   9.130  
   9.131  
   9.132 @@ -130,26 +120,32 @@
   9.133   * of dis-owning it.
   9.134   */
   9.135  void
   9.136 -VMS_int__dissipate_procr( SlaveVP *animatingPr )
   9.137 +VMS_int__dissipate_SlaveVP( SlaveVP *animatingSlv )
   9.138   {
   9.139        //dis-own all locations owned by this processor, causing to be freed
   9.140        // any locations that it is (was) sole owner of
   9.141 -//TODO: implement VMS__malloc system, including "give up ownership"
   9.142  
   9.143 -   _VMSMasterEnv->numSlaves -= 1;
   9.144 -   if( _VMSMasterEnv->numSlaves == 0 )
   9.145 +   _VMSMasterEnv->numSlavesAlive -= 1;
   9.146 +   if( _VMSMasterEnv->numSlavesAlive == 0 )
   9.147      {    //no more work, so shutdown
   9.148 -      VMS_int__shutdown();  //note, creates 4 shut-down processors
   9.149 +      VMS_SS__shutdown();  //note, creates 4 shut-down processors
   9.150      }
   9.151  
   9.152        //NOTE: dataParam was given to the processor, so should either have
   9.153 -      // been alloc'd with VMS__malloc, or freed by the level above animPr.
   9.154 +      // been alloc'd with VMS__malloc, or freed by the level above animSlv.
   9.155        //So, all that's left to free here is the stack and the SlaveVP struc
   9.156        // itself
   9.157        //Note, should not stack-allocate initial data -- no guarantee, in
   9.158        // general that creating processor will outlive ones it creates.
   9.159 -   VMS_int__free( animatingPr->startOfStack );
   9.160 -   VMS_int__free( animatingPr );
   9.161 +   VMS_int__free( animatingSlv->startOfStack );
   9.162 +   VMS_int__free( animatingSlv );
   9.163   }
   9.164  
   9.165 +/*Anticipating multi-tasking
   9.166 + */
   9.167 +void *
   9.168 +VMS_int__give_sem_env_for( SlaveVP *animSlv )
   9.169 + {
   9.170 +   return _VMSMasterEnv->semanticEnv;
   9.171 + }
   9.172  
    10.1 --- a/VMS__startup_and_shutdown.c	Wed Feb 22 11:39:12 2012 -0800
    10.2 +++ b/VMS__startup_and_shutdown.c	Sun Mar 04 14:26:35 2012 -0800
    10.3 @@ -12,7 +12,7 @@
    10.4  #include <sys/time.h>
    10.5  
    10.6  #include "VMS.h"
    10.7 -#include "VMS__HW_dependent.h"
    10.8 +//#include "VMS__HW_dependent.h"
    10.9  
   10.10  
   10.11  #define thdAttrs NULL
   10.12 @@ -34,7 +34,7 @@
   10.13  create_free_list();
   10.14  
   10.15  void
   10.16 -endOSThreadFn( void *initData, SlaveVP *animatingPr );
   10.17 +endOSThreadFn( void *initData, SlaveVP *animatingSlv );
   10.18  
   10.19  pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER;
   10.20  pthread_cond_t  suspend_cond  = PTHREAD_COND_INITIALIZER;
   10.21 @@ -43,9 +43,9 @@
   10.22  
   10.23  /*Setup has two phases:
   10.24   * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts
   10.25 - *    the master virt procr into the work-queue, ready for first "call"
   10.26 + *    the master Slv into the work-queue, ready for first "call"
   10.27   * 2) Semantic layer then does its own init, which creates the seed virt
   10.28 - *    procr inside the semantic layer, ready to schedule it when
   10.29 + *    slave inside the semantic layer, ready to schedule it when
   10.30   *    asked by the first run of the masterLoop.
   10.31   *
   10.32   *This part is bit weird because VMS really wants to be "always there", and
   10.33 @@ -54,15 +54,15 @@
   10.34   *
   10.35   *The semantic layer is isolated from the VMS internals by making the
   10.36   * semantic layer do setup to a state that it's ready with its
   10.37 - * initial virt procrs, ready to schedule them to slots when the masterLoop
   10.38 + * initial Slvs, ready to schedule them to slots when the masterLoop
   10.39   * asks.  Without this pattern, the semantic layer's setup would
   10.40   * have to modify slots directly to assign the initial virt-procrs, and put
   10.41   * them into the readyToAnimateQ itself, breaking the isolation completely.
   10.42   *
   10.43   * 
   10.44 - *The semantic layer creates the initial virt procr(s), and adds its
   10.45 + *The semantic layer creates the initial Slv(s), and adds its
   10.46   * own environment to masterEnv, and fills in the pointers to
   10.47 - * the requestHandler and slaveScheduler plug-in functions
   10.48 + * the requestHandler and slaveAssigner plug-in functions
   10.49   */
   10.50  
   10.51  /*This allocates VMS data structures, populates the master VMSProc,
   10.52 @@ -70,7 +70,7 @@
   10.53   * layer.
   10.54   */
   10.55  void
   10.56 -VMS_int__init()
   10.57 +VMS_SS__init()
   10.58   {
   10.59  
   10.60  #ifdef SEQUENTIAL
   10.61 @@ -97,24 +97,12 @@
   10.62          //Very first thing put into the master env is the free-list, seeded
   10.63          // with a massive initial chunk of memory.
   10.64          //After this, all other mallocs are VMS__malloc.
   10.65 -   _VMSMasterEnv->freeListHead        = VMS_ext__create_free_list();
   10.66 +   _VMSMasterEnv->freeLists        = VMS_ext__create_free_list();
   10.67  
   10.68  
   10.69 -   //============================= MEASUREMENT STUFF ========================
   10.70 -   #ifdef MEAS__TIME_MALLOC
   10.71 -   _VMSMasterEnv->mallocTimeHist  = makeFixedBinHistExt( 100, 0, 30,
   10.72 -                                                       "malloc_time_hist");
   10.73 -   _VMSMasterEnv->freeTimeHist  = makeFixedBinHistExt( 100, 0, 30,
   10.74 -                                                       "free_time_hist");
   10.75 -   #endif
   10.76 -   #ifdef MEAS__TIME_PLUGIN
   10.77 -   _VMSMasterEnv->reqHdlrLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,
   10.78 -                                                     "plugin_low_time_hist");
   10.79 -   _VMSMasterEnv->reqHdlrHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,
   10.80 -                                                    "plugin_high_time_hist");
   10.81 -   #endif
   10.82 -   //========================================================================
   10.83 -
   10.84 +         MEAS__Make_Meas_Hists_for_Malloc_Meas;
   10.85 +         MEAS__Make_Meas_Hists_for_Plugin_Meas;
   10.86 +   
   10.87     //===================== Only VMS__malloc after this ====================
   10.88     masterEnv     = (MasterEnv*)_VMSMasterEnv;
   10.89     
   10.90 @@ -125,15 +113,15 @@
   10.91        //One array for each core, 3 in array, core's masterVP scheds all
   10.92     allSchedSlots    = VMS_int__malloc( NUM_CORES * sizeof(SchedSlot *) );
   10.93  
   10.94 -   _VMSMasterEnv->numSlaves = 0;  //used to detect shut-down condition
   10.95 +   _VMSMasterEnv->numSlavesAlive = 0;  //used to detect shut-down condition
   10.96  
   10.97 -   _VMSMasterEnv->numVPsCreated = 0;  //used by create procr to set ID
   10.98 +   _VMSMasterEnv->numSlavesCreated = 0;  //used by create slave to set ID
   10.99     for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
  10.100      {    
  10.101        readyToAnimateQs[ coreIdx ] = makeVMSQ();
  10.102        
  10.103           //Q: should give masterVP core-specific info as its init data?
  10.104 -      masterVPs[ coreIdx ] = VMS_int__create_procr( (TopLevelFnPtr)&masterLoop, (void*)masterEnv );
  10.105 +      masterVPs[ coreIdx ] = VMS_int__create_slaveVP( (TopLevelFnPtr)&masterLoop, (void*)masterEnv );
  10.106        masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx;
  10.107        allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core
  10.108        _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0;
  10.109 @@ -146,10 +134,6 @@
  10.110     _VMSMasterEnv->workStealingLock = UNLOCKED;
  10.111  
  10.112  
  10.113 -      //Aug 19, 2010:  no longer need to place initial masterVP into queue
  10.114 -      // because coreLoop now controls -- animates its masterVP when no work
  10.115 -
  10.116 -
  10.117     //============================= MEASUREMENT STUFF ========================
  10.118     #ifdef STATS__TURN_ON_PROBES
  10.119     _VMSMasterEnv->dynIntervalProbesInfo =
  10.120 @@ -163,14 +147,10 @@
  10.121     _VMSMasterEnv->createPtInSecs =
  10.122                             timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0);
  10.123     #endif
  10.124 -   #ifdef MEAS__TIME_MASTER_LOCK
  10.125 -   _VMSMasterEnv->masterLockLowTimeHist  = makeFixedBinHist( 50, 0, 2,
  10.126 -                                                "master lock low time hist");
  10.127 -   _VMSMasterEnv->masterLockHighTimeHist  = makeFixedBinHist( 50, 0, 100,
  10.128 -                                               "master lock high time hist");
  10.129 -   #endif
  10.130     
  10.131 -   MakeTheMeasHists();
  10.132 +   MEAS__Make_Meas_Hists_for_Master_Lock_Meas
  10.133 +   
  10.134 +   MEAS__Make_Meas_Hists_for_Language();
  10.135     //========================================================================
  10.136   }
  10.137  
  10.138 @@ -187,7 +167,7 @@
  10.139  
  10.140           //Set state to mean "handling requests done, slot needs filling"
  10.141        schedSlots[i]->workIsDone         = FALSE;
  10.142 -      schedSlots[i]->needsProcrAssigned = TRUE;
  10.143 +      schedSlots[i]->needsSlaveAssigned = TRUE;
  10.144      }
  10.145     return schedSlots;
  10.146   }
  10.147 @@ -233,17 +213,17 @@
  10.148  
  10.149  
  10.150  void
  10.151 -VMS_WL__register_request_handler( RequestHandler requestHandler )
  10.152 +VMS_SS__register_request_handler( RequestHandler requestHandler )
  10.153   { _VMSMasterEnv->requestHandler = requestHandler;
  10.154   }
  10.155  
  10.156  
  10.157  void
  10.158 -VMS_WL__register_sched_assigner( Sched_Assigner schedAssigner )
  10.159 - { _VMSMasterEnv->slaveSchedAssigner = schedAssigner;
  10.160 +VMS_SS__register_sched_assigner( Sched_Assigner schedAssigner )
  10.161 + { _VMSMasterEnv->slaveAssigner = schedAssigner;
  10.162   }
  10.163  
  10.164 -VMS_WL__register_semantic_env( void *semanticEnv )
  10.165 +VMS_SS__register_semantic_env( void *semanticEnv )
  10.166   { _VMSMasterEnv->semanticEnv = semanticEnv;
  10.167   }
  10.168  
  10.169 @@ -254,7 +234,7 @@
  10.170   *Wrapper lib layer calls this when it wants the system to start running..
  10.171   */
  10.172  void
  10.173 -VMS_WL__start_the_work_then_wait_until_done()
  10.174 +VMS_SS__start_the_work_then_wait_until_done()
  10.175   { 
  10.176  #ifdef SEQUENTIAL
  10.177     /*Only difference between version with an OS thread pinned to each core and
  10.178 @@ -293,7 +273,7 @@
  10.179  
  10.180  //TODO: look at architecting cleanest separation between request handler
  10.181  // and master loop, for dissipate, create, shutdown, and other non-semantic
  10.182 -// requests.  Issue is chain: one removes requests from AppVP, one dispatches
  10.183 +// requests.  Issue is chain: one removes requests from AppSlv, one dispatches
  10.184  // on type of request, and one handles each type..  but some types require
  10.185  // action from both request handler and master loop -- maybe just give the
  10.186  // request handler calls like:  VMS__handle_X_request_type
  10.187 @@ -308,7 +288,7 @@
  10.188   *The _VMSMasterEnv is needed by this shut down function, so the create-seed-
  10.189   * and-wait function has to free a bunch of stuff after it detects the
  10.190   * threads have all died: the masterEnv, the thread-related locations,
  10.191 - * masterVP any AppVPs that might still be allocated and sitting in the
  10.192 + * masterVP any AppSlvs that might still be allocated and sitting in the
  10.193   * semantic environment, or have been orphaned in the _VMSWorkQ.
  10.194   * 
  10.195   *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the
  10.196 @@ -318,22 +298,22 @@
  10.197   *In here,create one core-loop shut-down processor for each core loop and put
  10.198   * them all directly into the readyToAnimateQ.
  10.199   *Note, this function can ONLY be called after the semantic environment no
  10.200 - * longer cares if AppVPs get animated after the point this is called.  In
  10.201 + * longer cares if AppSlvs get animated after the point this is called.  In
  10.202   * other words, this can be used as an abort, or else it should only be
  10.203 - * called when all AppVPs have finished dissipate requests -- only at that
  10.204 + * called when all AppSlvs have finished dissipate requests -- only at that
  10.205   * point is it sure that all results have completed.
  10.206   */
  10.207  void
  10.208 -VMS_int__shutdown()
  10.209 +VMS_SS__shutdown()
  10.210   { int coreIdx;
  10.211 -   SlaveVP *shutDownPr;
  10.212 +   SlaveVP *shutDownSlv;
  10.213  
  10.214        //create the shutdown processors, one for each core loop -- put them
  10.215        // directly into the Q -- each core will die when gets one
  10.216     for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
  10.217      {    //Note, this is running in the master
  10.218 -      shutDownPr = VMS_int__create_procr( &endOSThreadFn, NULL );
  10.219 -      writeVMSQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] );
  10.220 +      shutDownSlv = VMS_int__create_slaveVP( &endOSThreadFn, NULL );
  10.221 +      writeVMSQ( shutDownSlv, _VMSMasterEnv->readyToAnimateQs[coreIdx] );
  10.222      }
  10.223  
  10.224   }
  10.225 @@ -348,78 +328,37 @@
  10.226   * up just as if it never jumped out, before calling pthread_exit.
  10.227   *The end-point of core loop will free the stack and so forth of the
  10.228   * processor that animates this function, (this fn is transfering the
  10.229 - * animator of the AppVP that is in turn animating this function over
  10.230 + * animator of the AppSlv that is in turn animating this function over
  10.231   * to core loop function -- note that this slices out a level of virtual
  10.232   * processors).
  10.233   */
  10.234  void
  10.235 -endOSThreadFn( void *initData, SlaveVP *animatingPr )
  10.236 +endOSThreadFn( void *initData, SlaveVP *animatingSlv )
  10.237   { 
  10.238 -#ifdef SEQUENTIAL
  10.239 -    asmTerminateCoreLoopSeq(animatingPr);
  10.240 -#else
  10.241 -    asmTerminateCoreLoop(animatingPr);
  10.242 -#endif
  10.243 +   #ifdef SEQUENTIAL
  10.244 +    asmTerminateCoreLoopSeq(animatingSlv);
  10.245 +   #else
  10.246 +    asmTerminateCoreLoop(animatingSlv);
  10.247 +   #endif
  10.248   }
  10.249  
  10.250  
  10.251  /*This is called from the startup & shutdown
  10.252   */
  10.253  void
  10.254 -VMS_int__cleanup_at_end_of_shutdown()
  10.255 +VMS_SS__cleanup_at_end_of_shutdown()
  10.256   { 
  10.257 -   //unused
  10.258 -   //VMSQueueStruc **readyToAnimateQs;
  10.259 -   //int              coreIdx;
  10.260 -   //SlaveVP      **masterVPs;
  10.261 -   //SchedSlot     ***allSchedSlots; //ptr to array of ptrs
  10.262 -
  10.263        //Before getting rid of everything, print out any measurements made
  10.264     forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist );
  10.265     forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile);
  10.266     forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist );
  10.267 -   #ifdef MEAS__TIME_PLUGIN
  10.268 -   printHist( _VMSMasterEnv->reqHdlrLowTimeHist );
  10.269 -   saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist );
  10.270 -   printHist( _VMSMasterEnv->reqHdlrHighTimeHist );
  10.271 -   saveHistToFile( _VMSMasterEnv->reqHdlrHighTimeHist );
  10.272 -   freeHistExt( _VMSMasterEnv->reqHdlrLowTimeHist );
  10.273 -   freeHistExt( _VMSMasterEnv->reqHdlrHighTimeHist );
  10.274 -   #endif
  10.275 -   #ifdef MEAS__TIME_MALLOC
  10.276 -   printHist( _VMSMasterEnv->mallocTimeHist   );
  10.277 -   saveHistToFile( _VMSMasterEnv->mallocTimeHist   );
  10.278 -   printHist( _VMSMasterEnv->freeTimeHist     );
  10.279 -   saveHistToFile( _VMSMasterEnv->freeTimeHist     );
  10.280 -   freeHistExt( _VMSMasterEnv->mallocTimeHist );
  10.281 -   freeHistExt( _VMSMasterEnv->freeTimeHist   );
  10.282 -   #endif
  10.283 -   #ifdef MEAS__TIME_MASTER_LOCK
  10.284 -   printHist( _VMSMasterEnv->masterLockLowTimeHist );
  10.285 -   printHist( _VMSMasterEnv->masterLockHighTimeHist );
  10.286 -   #endif
  10.287 -   #ifdef MEAS__TIME_MASTER
  10.288 -   printHist( _VMSMasterEnv->pluginTimeHist );
  10.289 -   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
  10.290 -    {
  10.291 -      freeVMSQ( readyToAnimateQs[ coreIdx ] );
  10.292 -         //master VPs were created external to VMS, so use external free
  10.293 -      VMS_int__dissipate_procr( masterVPs[ coreIdx ] );
  10.294 -
  10.295 -      freeSchedSlots( allSchedSlots[ coreIdx ] );
  10.296 -    }
  10.297 -   #endif
  10.298 -   #ifdef MEAS__TIME_STAMP_SUSP
  10.299 -   printHist( _VMSMasterEnv->pluginTimeHist );
  10.300 -   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
  10.301 -    {
  10.302 -      freeVMSQ( readyToAnimateQs[ coreIdx ] );
  10.303 -         //master VPs were created external to VMS, so use external free
  10.304 -      VMS_int__dissipate_procr( masterVPs[ coreIdx ] );
  10.305 -
  10.306 -      freeSchedSlots( allSchedSlots[ coreIdx ] );
  10.307 -    }
  10.308 -   #endif
  10.309 +   
  10.310 +   MEAS__Print_Hists_for_Susp_Meas;
  10.311 +   MEAS__Print_Hists_for_Master_Meas;
  10.312 +   MEAS__Print_Hists_for_Master_Lock_Meas;
  10.313 +   MEAS__Print_Hists_for_Malloc_Meas;
  10.314 +   MEAS__Print_Hists_for_Plugin_Meas;
  10.315 +   
  10.316  
  10.317        //All the environment data has been allocated with VMS__malloc, so just
  10.318        // free its internal big-chunk and all inside it disappear.
  10.319 @@ -431,24 +370,24 @@
  10.320     for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
  10.321      {
  10.322        freeVMSQ( readyToAnimateQs[ coreIdx ] );
  10.323 -         //master VPs were created external to VMS, so use external free
  10.324 -      VMS__dissipate_procr( masterVPs[ coreIdx ] );
  10.325 +         //master Slvs were created external to VMS, so use external free
  10.326 +      VMS_int__dissipate_slaveVP( masterVPs[ coreIdx ] );
  10.327        
  10.328        freeSchedSlots( allSchedSlots[ coreIdx ] );
  10.329      }
  10.330     
  10.331 -   VMS__free( _VMSMasterEnv->readyToAnimateQs );
  10.332 -   VMS__free( _VMSMasterEnv->masterVPs );
  10.333 -   VMS__free( _VMSMasterEnv->allSchedSlots );
  10.334 +   VMS_int__free( _VMSMasterEnv->readyToAnimateQs );
  10.335 +   VMS_int__free( _VMSMasterEnv->masterVPs );
  10.336 +   VMS_int__free( _VMSMasterEnv->allSchedSlots );
  10.337     
  10.338     //============================= MEASUREMENT STUFF ========================
  10.339     #ifdef STATS__TURN_ON_PROBES
  10.340 -   freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS__free_probe);
  10.341 +   freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS_WL__free_probe);
  10.342     #endif
  10.343     //========================================================================
  10.344  */
  10.345        //These are the only two that use system free 
  10.346 -   VMS_ext__free_free_list( _VMSMasterEnv->freeListHead );
  10.347 +   VMS_ext__free_free_list( _VMSMasterEnv->freeLists );
  10.348     free( (void *)_VMSMasterEnv );
  10.349   }
  10.350  
    11.1 --- a/VMS_defs__HW_specific.h	Wed Feb 22 11:39:12 2012 -0800
    11.2 +++ b/VMS_defs__HW_specific.h	Sun Mar 04 14:26:35 2012 -0800
    11.3 @@ -27,7 +27,7 @@
    11.4     // stack size in virtual processors created
    11.5  #define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */
    11.6  
    11.7 -   // memory for VMS__malloc
    11.8 +   // memory for VMS_WL__malloc
    11.9  #define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x40000000 /* 1G */
   11.10  
   11.11     //Frequency of TS counts -- have to do tests to verify
    12.1 --- a/VMS_defs__lang_specific.h	Wed Feb 22 11:39:12 2012 -0800
    12.2 +++ b/VMS_defs__lang_specific.h	Sun Mar 04 14:26:35 2012 -0800
    12.3 @@ -13,80 +13,8 @@
    12.4  
    12.5  //===================  Language-specific Measurement Stuff ===================
    12.6  //
    12.7 -//TODO:  Figure out way to move these into language dir..
    12.8 -//   wrap them in #ifdef MEAS__...
    12.9 +//TODO:  move these into the language implementation directories
   12.10  //
   12.11 -#ifndef MAKE_HISTS_FOR_MEASUREMENTS
   12.12 -#define MakeTheMeasHists() 
   12.13 -#endif
   12.14 -
   12.15 -//===========================================================================
   12.16 -//VPThread
   12.17 -#ifdef VTHREAD
   12.18 -
   12.19 -#define createHistIdx      1  //note: starts at 1
   12.20 -#define mutexLockHistIdx   2
   12.21 -#define mutexUnlockHistIdx 3
   12.22 -#define condWaitHistIdx    4
   12.23 -#define condSignalHistIdx  5
   12.24 -
   12.25 -#define MakeTheMeasHists() \
   12.26 -   _VMSMasterEnv->measHistsInfo = \
   12.27 -              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
   12.28 -   makeAMeasHist( createHistIdx,      "create",        250, 0, 100 ) \
   12.29 -   makeAMeasHist( mutexLockHistIdx,   "mutex_lock",    50, 0, 100 ) \
   12.30 -   makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock",  50, 0, 100 ) \
   12.31 -   makeAMeasHist( condWaitHistIdx,    "cond_wait",     50, 0, 100 ) \
   12.32 -   makeAMeasHist( condSignalHistIdx,  "cond_signal",   50, 0, 100 )
   12.33 -
   12.34 -   
   12.35 -#define Meas_startCreate \
   12.36 -    int32 startStamp, endStamp; \
   12.37 -    saveLowTimeStampCountInto( startStamp ); \
   12.38 -
   12.39 -#define Meas_endCreate \
   12.40 -    saveLowTimeStampCountInto( endStamp ); \
   12.41 -    addIntervalToHist( startStamp, endStamp, \
   12.42 -                                 _VMSMasterEnv->measHists[ createHistIdx ] );
   12.43 -
   12.44 -#define Meas_startMutexLock \
   12.45 -    int32 startStamp, endStamp; \
   12.46 -    saveLowTimeStampCountInto( startStamp ); \
   12.47 -
   12.48 -#define Meas_endMutexLock \
   12.49 -    saveLowTimeStampCountInto( endStamp ); \
   12.50 -    addIntervalToHist( startStamp, endStamp, \
   12.51 -                              _VMSMasterEnv->measHists[ mutexLockHistIdx ] );
   12.52 -
   12.53 -#define Meas_startMutexUnlock \
   12.54 -    int32 startStamp, endStamp; \
   12.55 -    saveLowTimeStampCountInto( startStamp ); \
   12.56 -
   12.57 -#define Meas_endMutexUnlock \
   12.58 -    saveLowTimeStampCountInto( endStamp ); \
   12.59 -    addIntervalToHist( startStamp, endStamp, \
   12.60 -                            _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] );
   12.61 -
   12.62 -#define Meas_startCondWait \
   12.63 -    int32 startStamp, endStamp; \
   12.64 -    saveLowTimeStampCountInto( startStamp ); \
   12.65 -
   12.66 -#define Meas_endCondWait \
   12.67 -    saveLowTimeStampCountInto( endStamp ); \
   12.68 -    addIntervalToHist( startStamp, endStamp, \
   12.69 -                               _VMSMasterEnv->measHists[ condWaitHistIdx ] );
   12.70 -
   12.71 -#define Meas_startCondSignal \
   12.72 -    int32 startStamp, endStamp; \
   12.73 -    saveLowTimeStampCountInto( startStamp ); \
   12.74 -
   12.75 -#define Meas_endCondSignal \
   12.76 -    saveLowTimeStampCountInto( endStamp ); \
   12.77 -    addIntervalToHist( startStamp, endStamp, \
   12.78 -                             _VMSMasterEnv->measHists[ condSignalHistIdx ] );
   12.79 -
   12.80 -#endif
   12.81 -
   12.82  
   12.83  
   12.84  //===========================================================================
   12.85 @@ -97,7 +25,7 @@
   12.86  #define spawnHistIdx      1 //note: starts at 1
   12.87  #define syncHistIdx       2
   12.88  
   12.89 -#define MakeTheMeasHists() \
   12.90 +#define MEAS__Make_Meas_Hists_for_Language() \
   12.91     _VMSMasterEnv->measHistsInfo = \
   12.92            makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
   12.93      makeAMeasHist( spawnHistIdx,      "Spawn",        50, 0, 200 ) \
   12.94 @@ -133,7 +61,7 @@
   12.95  #define ReceiveFromToHistIdx   3
   12.96  #define ReceiveOfTypeHistIdx   4
   12.97  
   12.98 -#define MakeTheMeasHists() \
   12.99 +#define MEAS__Make_Meas_Hists_for_Language() \
  12.100     _VMSMasterEnv->measHistsInfo = \
  12.101                makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
  12.102      makeAMeasHist( SendFromToHistIdx,   "SendFromTo",    50, 0, 100 ) \
    13.1 --- a/VMS_defs__main.h	Wed Feb 22 11:39:12 2012 -0800
    13.2 +++ b/VMS_defs__main.h	Sun Mar 04 14:26:35 2012 -0800
    13.3 @@ -6,8 +6,8 @@
    13.4   * 
    13.5   */
    13.6  
    13.7 -#ifndef _VMS_DEFS_H
    13.8 -#define	_VMS_DEFS_H
    13.9 +#ifndef _VMS_DEFS_MAIN_H
   13.10 +#define	_VMS_DEFS_MAIN_H
   13.11  #define _GNU_SOURCE
   13.12  
   13.13  //===========================  VMS-wide defs  ===============================
   13.14 @@ -19,7 +19,7 @@
   13.15     // so these defs can be at the top, and writePrivQ defined later on..
   13.16  #define writeVMSQ     writePrivQ
   13.17  #define readVMSQ      readPrivQ
   13.18 -#define makeVMSQ      makeVMSPrivQ
   13.19 +#define makeVMSQ      makePrivQ
   13.20  #define numInVMSQ     numInPrivQ
   13.21  #define VMSQueueStruc PrivQueueStruc
   13.22  
   13.23 @@ -31,21 +31,21 @@
   13.24  //
   13.25  //When SEQUENTIAL is defined, VMS does sequential exe in the main thread
   13.26  // It still does co-routines and all the mechanisms are the same, it just
   13.27 -// has only a single thread and animates VPs one at a time
   13.28 +// has only a single thread and animates Slvs one at a time
   13.29  //#define SEQUENTIAL
   13.30  
   13.31  //#define USE_WORK_STEALING
   13.32  
   13.33  //turns on the probe-instrumentation in the application -- when not
   13.34  // defined, the calls to the probe functions turn into comments
   13.35 -#define STATS__ENABLE_PROBES
   13.36 +//#define STATS__ENABLE_PROBES
   13.37  //#define TURN_ON_DEBUG_PROBES
   13.38  
   13.39  //These defines turn types of bug messages on and off
   13.40  // be sure debug messages are un-commented (next block of defines)
   13.41  #define dbgAppFlow   TRUE /* Top level flow of application code -- general*/
   13.42  #define dbgProbes    FALSE /* for issues inside probes themselves*/
   13.43 -#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/
   13.44 +#define dbgB2BMaster FALSE /* in coreloop, back to back master Slvs*/
   13.45  #define dbgRqstHdlr  FALSE /* in request handler code*/
   13.46  
   13.47  //Comment or un- the substitute half to turn on/off types of debug message
   13.48 @@ -74,112 +74,310 @@
   13.49  
   13.50  //==================  Turn Measurement Things on and off ====================
   13.51  
   13.52 -//#define MEAS__TIME_2011_SYS
   13.53 +//#define MEAS__TURN_ON_SYSTEM_MEAS
   13.54  
   13.55 -//define this if any MEAS__... below are
   13.56 -//#define MAKE_HISTS_FOR_MEASUREMENTS
   13.57 -   //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and
   13.58 -   // compiled-in that saves the low part of the time stamp count just before
   13.59 -   // suspending a processor and just after resuming that processor.  It is
   13.60 -   // saved into a field added to VirtProcr.  Have to sanity-check for
   13.61 -   // rollover of low portion into high portion.
   13.62 -//#define MEAS__TIME_STAMP_SUSP
   13.63 -//#define MEAS__TIME_MASTER
   13.64 -//#define MEAS__TIME_PLUGIN
   13.65 -//#define MEAS__TIME_MALLOC
   13.66 -//#define MEAS__TIME_MASTER_LOCK
   13.67 +/*NOTE: define MEAS__TURN_ON_MAKE_HISTS if any other MEAS__... below are*/
   13.68 +//#define MEAS__TURN_ON_MAKE_HISTS
   13.69  
   13.70 -   //For code that calculates normalization-offset between TSC counts of
   13.71 -   // different cores.
   13.72 -//#define NUM_TSC_ROUND_TRIPS 10
   13.73 +//#define MEAS__TURN_ON_SUSP_MEAS
   13.74 +//#define MEAS__TURN_ON_MASTER_MEAS
   13.75 +//#define MEAS__TURN_ON_PLUGIN_MEAS
   13.76 +//#define MEAS__TURN_ON_MALLOC_MEAS
   13.77 +//#define MEAS__TURN_ON_MASTER_LOCK_MEAS
   13.78  
   13.79 +   /*turn on/off subtraction of create measurements from plugin meas*/
   13.80 +//#define MEAS__TURN_ON_EXCLUDE_CREATION_TIME 
   13.81  
   13.82  
   13.83 -//===================  Macros to Capture Measurements  ======================
   13.84 -//
   13.85 -//===== RDTSC wrapper ===== 
   13.86 -//Also runs with x86_64 code
   13.87 -#define saveTSCLowHigh(lowHighIn) \
   13.88 -   asm volatile("RDTSC;                   \
   13.89 -                 movl %%eax, %0;          \
   13.90 -                 movl %%edx, %1;"         \
   13.91 -   /* outputs */ : "=m" (lowHighIn.lowHigh[0]), "=m" (lowHighIn.lowHigh[1])\
   13.92 -   /* inputs  */ :                        \
   13.93 -   /* clobber */ : "%eax", "%edx"         \
   13.94 -                );
   13.95 -
   13.96 -#define saveTimeStampCountInto(low, high) \
   13.97 -   asm volatile("RDTSC;                   \
   13.98 -                 movl %%eax, %0;          \
   13.99 -                 movl %%edx, %1;"         \
  13.100 -   /* outputs */ : "=m" (low), "=m" (high)\
  13.101 -   /* inputs  */ :                        \
  13.102 -   /* clobber */ : "%eax", "%edx"         \
  13.103 -                );
  13.104 -
  13.105 -#define saveLowTimeStampCountInto(low)    \
  13.106 -   asm volatile("RDTSC;                   \
  13.107 -                 movl %%eax, %0;"         \
  13.108 -   /* outputs */ : "=m" (low)             \
  13.109 -   /* inputs  */ :                        \
  13.110 -   /* clobber */ : "%eax", "%edx"         \
  13.111 -                );
  13.112 -
  13.113  
  13.114  //==================  Macros define types of meas want  =====================
  13.115 -#ifdef MEAS__TIME_PLUGIN
  13.116  
  13.117 -#define Meas_startReqHdlr \
  13.118 -        int32 startStamp1, endStamp1; \
  13.119 -        saveLowTimeStampCountInto( startStamp1 );
  13.120 +#ifdef MEAS__TURN_ON_SUSP_MEAS
  13.121 +   #define MEAS__Insert_Susp_Meas_Fields_into_Slave \
  13.122 +       uint32  preSuspTSCLow; \
  13.123 +       uint32  postSuspTSCLow;
  13.124  
  13.125 -#define Meas_endReqHdlr \
  13.126 -        saveLowTimeStampCountInto( endStamp1 ); \
  13.127 -        addIntervalToHist( startStamp1, endStamp1, \
  13.128 -                           _VMSMasterEnv->reqHdlrLowTimeHist ); \
  13.129 -        addIntervalToHist( startStamp1, endStamp1, \
  13.130 -                           _VMSMasterEnv->reqHdlrHighTimeHist );
  13.131 -               
  13.132 -#elif defined MEAS__TIME_2011_SYS
  13.133 -#define Meas_startMasterLoop \
  13.134 -        TSCountLowHigh startStamp1, endStamp1; \
  13.135 -        saveTSCLowHigh( endStamp1 ); \
  13.136 -        _VMSMasterEnv->cyclesTillStartMasterLoop = \
  13.137 -        endStamp1.longVal - masterVP->startSusp.longVal;
  13.138 +   #define MEAS__Insert_Susp_Meas_Fields_into_MasterEnv \
  13.139 +       Histogram       *suspLowTimeHist; \
  13.140 +       Histogram       *suspHighTimeHist;
  13.141  
  13.142 -#define Meas_startReqHdlr \
  13.143 -        saveTSCLowHigh( startStamp1 ); \
  13.144 -        _VMSMasterEnv->startReqHdlr.longVal = startStamp1.longVal;
  13.145 +   #define MEAS__Make_Meas_Hists_for_Susp_Meas \
  13.146 +      _VMSMasterEnv->suspLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
  13.147 +                                                    "master_low_time_hist");\
  13.148 +      _VMSMasterEnv->suspHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
  13.149 +                                                    "master_high_time_hist");
  13.150 +      
  13.151 +      //record time stamp: compare to time-stamp recorded below
  13.152 +   #define MEAS__Capture_Pre_Susp_Point \
  13.153 +      saveLowTimeStampCountInto( animatingSlv->preSuspTSCLow );
  13.154 +   
  13.155 +      //NOTE: only take low part of count -- do sanity check when take diff
  13.156 +   #define MEAS__Capture_Post_Susp_Point \
  13.157 +      saveLowTimeStampCountInto( animatingSlv->postSuspTSCLow );\
  13.158 +      addIntervalToHist( preSuspTSCLow, postSuspTSCLow,\
  13.159 +                         _VMSMasterEnv->suspLowTimeHist ); \
  13.160 +      addIntervalToHist( preSuspTSCLow, postSuspTSCLow,\
  13.161 +                         _VMSMasterEnv->suspHighTimeHist );
  13.162  
  13.163 -#define Meas_endReqHdlr 
  13.164 +   #define MEAS__Print_Hists_for_Susp_Meas \
  13.165 +      printHist( _VMSMasterEnv->pluginTimeHist );
  13.166 +      
  13.167 +#else
  13.168 +   #define MEAS__Insert_Susp_Meas_Fields_into_Slave     
  13.169 +   #define MEAS__Insert_Susp_Meas_Fields_into_MasterEnv 
  13.170 +   #define MEAS__Make_Meas_Hists_for_Susp_Meas 
  13.171 +   #define MEAS__Capture_Pre_Susp_Point
  13.172 +   #define MEAS__Capture_Post_Susp_Point   
  13.173 +   #define MEAS__Print_Hists_for_Susp_Meas 
  13.174 +#endif
  13.175  
  13.176 -#define Meas_endMasterLoop \
  13.177 -        saveTSCLowHigh( startStamp1 ); \
  13.178 -        _VMSMasterEnv->endMasterLoop.longVal = startStamp1.longVal;
  13.179 +#ifdef MEAS__TURN_ON_MASTER_MEAS
  13.180 +   #define MEAS__Insert_Master_Meas_Fields_into_Slave \
  13.181 +       uint32  startMasterTSCLow; \
  13.182 +       uint32  endMasterTSCLow;
  13.183 +
  13.184 +   #define MEAS__Insert_Master_Meas_Fields_into_MasterEnv \
  13.185 +       Histogram       *masterLowTimeHist; \
  13.186 +       Histogram       *masterHighTimeHist;
  13.187 +
  13.188 +   #define MEAS__Make_Meas_Hists_for_Master_Meas \
  13.189 +      _VMSMasterEnv->masterLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
  13.190 +                                                    "master_low_time_hist");\
  13.191 +      _VMSMasterEnv->masterHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
  13.192 +                                                    "master_high_time_hist");
  13.193 +
  13.194 +      //Total Master time includes one coreloop time -- just assume the core
  13.195 +      // loop time is same for Master as for AppSlvs, even though it may be
  13.196 +      // smaller due to higher predictability of the fixed jmp.
  13.197 +   #define MEAS__Capture_Pre_Master_Point\
  13.198 +      saveLowTimeStampCountInto( masterVP->startMasterTSCLow );
  13.199 +
  13.200 +   #define MEAS__Capture_Post_Master_Point \
  13.201 +      saveLowTimeStampCountInto( masterVP->endMasterTSCLow );\
  13.202 +      addIntervalToHist( startMasterTSCLow, endMasterTSCLow,\
  13.203 +                         _VMSMasterEnv->masterLowTimeHist ); \
  13.204 +      addIntervalToHist( startMasterTSCLow, endMasterTSCLow,\
  13.205 +                         _VMSMasterEnv->masterHighTimeHist );
  13.206 +
  13.207 +   #define MEAS__Print_Hists_for_Master_Meas \
  13.208 +      printHist( _VMSMasterEnv->pluginTimeHist );
  13.209  
  13.210  #else
  13.211 -#define Meas_startMasterLoop 
  13.212 -#define Meas_startReqHdlr 
  13.213 -#define Meas_endReqHdlr 
  13.214 -#define Meas_endMasterLoop
  13.215 +   #define MEAS__Insert_Master_Meas_Fields_into_Slave
  13.216 +   #define MEAS__Insert_Master_Meas_Fields_into_MasterEnv 
  13.217 +   #define MEAS__Make_Meas_Hists_for_Master_Meas
  13.218 +   #define MEAS__Capture_Pre_Master_Point 
  13.219 +   #define MEAS__Capture_Post_Master_Point 
  13.220 +   #define MEAS__Print_Hists_for_Master_Meas 
  13.221  #endif
  13.222  
  13.223 +      
  13.224 +#ifdef MEAS__TURN_ON_MASTER_LOCK_MEAS
  13.225 +   #define MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv \
  13.226 +       Histogram       *masterLockLowTimeHist; \
  13.227 +       Histogram       *masterLockHighTimeHist;
  13.228 +
  13.229 +   #define MEAS__Make_Meas_Hists_for_Master_Lock_Meas \
  13.230 +      _VMSMasterEnv->masterLockLowTimeHist  = makeFixedBinHist( 50, 0, 2, \
  13.231 +                                               "master lock low time hist");\
  13.232 +      _VMSMasterEnv->masterLockHighTimeHist  = makeFixedBinHist( 50, 0, 100,\
  13.233 +                                               "master lock high time hist");
  13.234 +
  13.235 +   #define MEAS__Capture_Pre_Master_Lock_Point \
  13.236 +      int32 startStamp, endStamp; \
  13.237 +      saveLowTimeStampCountInto( startStamp );
  13.238 +
  13.239 +   #define MEAS__Capture_Post_Master_Lock_Point \
  13.240 +      saveLowTimeStampCountInto( endStamp ); \
  13.241 +      addIntervalToHist( startStamp, endStamp,\
  13.242 +                         _VMSMasterEnv->masterLockLowTimeHist ); \
  13.243 +      addIntervalToHist( startStamp, endStamp,\
  13.244 +                         _VMSMasterEnv->masterLockHighTimeHist );
  13.245 +
  13.246 +   #define MEAS__Print_Hists_for_Master_Lock_Meas \
  13.247 +      printHist( _VMSMasterEnv->masterLockLowTimeHist ); \
  13.248 +      printHist( _VMSMasterEnv->masterLockHighTimeHist );
  13.249 +      
  13.250 +#else
  13.251 +   #define MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv
  13.252 +   #define MEAS__Make_Meas_Hists_for_Master_Lock_Meas
  13.253 +   #define MEAS__Capture_Pre_Master_Lock_Point 
  13.254 +   #define MEAS__Capture_Post_Master_Lock_Point 
  13.255 +   #define MEAS__Print_Hists_for_Master_Lock_Meas
  13.256 +#endif
  13.257 +
  13.258 +
  13.259 +#ifdef MEAS__TURN_ON_MALLOC_MEAS
  13.260 +   #define MEAS__Insert_Malloc_Meas_Fields_into_MasterEnv\
  13.261 +       Histogram       *mallocTimeHist; \
  13.262 +       Histogram       *freeTimeHist;
  13.263 +
  13.264 +   #define MEAS__Make_Meas_Hists_for_Malloc_Meas \
  13.265 +      _VMSMasterEnv->mallocTimeHist  = makeFixedBinHistExt( 100, 0, 30,\
  13.266 +                                                       "malloc_time_hist");\
  13.267 +      _VMSMasterEnv->freeTimeHist  = makeFixedBinHistExt( 100, 0, 30,\
  13.268 +                                                       "free_time_hist");
  13.269 +
  13.270 +   #define MEAS__Capture_Pre_Malloc_Point \
  13.271 +      int32 startStamp, endStamp; \
  13.272 +      saveLowTimeStampCountInto( startStamp );
  13.273 +
  13.274 +   #define MEAS__Capture_Post_Malloc_Point \
  13.275 +      saveLowTimeStampCountInto( endStamp ); \
  13.276 +      addIntervalToHist( startStamp, endStamp,\
  13.277 +                         _VMSMasterEnv->mallocTimeHist ); 
  13.278 +
  13.279 +   #define MEAS__Capture_Pre_Free_Point \
  13.280 +      int32 startStamp, endStamp; \
  13.281 +      saveLowTimeStampCountInto( startStamp );
  13.282 +
  13.283 +   #define MEAS__Capture_Post_Free_Point \
  13.284 +      saveLowTimeStampCountInto( endStamp ); \
  13.285 +      addIntervalToHist( startStamp, endStamp,\
  13.286 +                         _VMSMasterEnv->freeTimeHist ); 
  13.287 +
  13.288 +   #define MEAS__Print_Hists_for_Malloc_Meas \
  13.289 +      printHist( _VMSMasterEnv->mallocTimeHist   ); \
  13.290 +      saveHistToFile( _VMSMasterEnv->mallocTimeHist   ); \
  13.291 +      printHist( _VMSMasterEnv->freeTimeHist     ); \
  13.292 +      saveHistToFile( _VMSMasterEnv->freeTimeHist     ); \
  13.293 +      freeHistExt( _VMSMasterEnv->mallocTimeHist ); \
  13.294 +      freeHistExt( _VMSMasterEnv->freeTimeHist   );
  13.295 +      
  13.296 +#else
  13.297 +   #define MEAS__Insert_Malloc_Meas_Fields_into_MasterEnv
  13.298 +   #define MEAS__Make_Meas_Hists_for_Malloc_Meas 
  13.299 +   #define MEAS__Capture_Pre_Malloc_Point
  13.300 +   #define MEAS__Capture_Post_Malloc_Point
  13.301 +   #define MEAS__Capture_Pre_Free_Point
  13.302 +   #define MEAS__Capture_Post_Free_Point
  13.303 +   #define MEAS__Print_Hists_for_Malloc_Meas 
  13.304 +#endif
  13.305 +
  13.306 +      
  13.307 +#ifdef MEAS__TURN_ON_SYSTEM_MEAS
  13.308 +   #define MEAS__Insert_System_Meas_Fields_into_Slave \
  13.309 +      TSCountLowHigh  startSusp; \
  13.310 +      uint64  totalSuspCycles; \
  13.311 +      uint32  numGoodSusp;
  13.312 +
  13.313 +   #define MEAS__Insert_System_Meas_Fields_into_MasterEnv \
  13.314 +       TSCountLowHigh   startMaster; \
  13.315 +       uint64           totalMasterCycles; \
  13.316 +       uint32           numMasterAnimations; \
  13.317 +       TSCountLowHigh   startReqHdlr; \
  13.318 +       uint64           totalPluginCycles; \
  13.319 +       uint32           numPluginAnimations; \
  13.320 +       uint64           cyclesTillStartMasterLoop; \
  13.321 +       TSCountLowHigh   endMasterLoop; 
  13.322 +
  13.323 +#else
  13.324 +   #define MEAS__Insert_System_Meas_Fields_into_Slave 
  13.325 +   #define MEAS__Insert_System_Meas_Fields_into_MasterEnv 
  13.326 +#endif
  13.327 +
  13.328 +
  13.329 +/*This macro's a bit weird -- the same macro is defined in three different
  13.330 + * ways, depending upon which defines are turned on
  13.331 + *That's because added the system meas, which interferes with plugin meas,
  13.332 + * but don't want to make plugin meas stop working..  this is compromise
  13.333 + */
  13.334 +#ifdef MEAS__TURN_ON_PLUGIN_MEAS 
  13.335 +   #define MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv \
  13.336 +      Histogram       *reqHdlrLowTimeHist; \
  13.337 +      Histogram       *reqHdlrHighTimeHist;
  13.338 +          
  13.339 +   #define MEAS__Make_Meas_Hists_for_Plugin_Meas \
  13.340 +      _VMSMasterEnv->reqHdlrLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
  13.341 +                                                    "plugin_low_time_hist");\
  13.342 +      _VMSMasterEnv->reqHdlrHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
  13.343 +                                                    "plugin_high_time_hist");
  13.344 +
  13.345 +   #define Meas_startReqHdlr \
  13.346 +      int32 startStamp1, endStamp1; \
  13.347 +      saveLowTimeStampCountInto( startStamp1 );
  13.348 +
  13.349 +   #define Meas_endReqHdlr \
  13.350 +      saveLowTimeStampCountInto( endStamp1 ); \
  13.351 +      addIntervalToHist( startStamp1, endStamp1, \
  13.352 +                           _VMSMasterEnv->reqHdlrLowTimeHist ); \
  13.353 +      addIntervalToHist( startStamp1, endStamp1, \
  13.354 +                           _VMSMasterEnv->reqHdlrHighTimeHist );
  13.355 +
  13.356 +   #define MEAS__Print_Hists_for_Plugin_Meas \
  13.357 +      printHist( _VMSMasterEnv->reqHdlrLowTimeHist ); \
  13.358 +      saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist ); \
  13.359 +      printHist( _VMSMasterEnv->reqHdlrHighTimeHist ); \
  13.360 +      saveHistToFile( _VMSMasterEnv->reqHdlrHighTimeHist ); \
  13.361 +      freeHistExt( _VMSMasterEnv->reqHdlrLowTimeHist ); \
  13.362 +      freeHistExt( _VMSMasterEnv->reqHdlrHighTimeHist );
  13.363 +               
  13.364 +#elif defined MEAS__TURN_ON_SYSTEM_MEAS
  13.365 +   #define MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv 
  13.366 +
  13.367 +   #define MEAS__Make_Meas_Hists_for_Plugin_Meas 
  13.368 +
  13.369 +   #define Meas_startMasterLoop \
  13.370 +      TSCountLowHigh startStamp1, endStamp1; \
  13.371 +      saveTSCLowHigh( endStamp1 ); \
  13.372 +      _VMSMasterEnv->cyclesTillStartMasterLoop = \
  13.373 +      endStamp1.longVal - masterVP->startSusp.longVal;
  13.374 +
  13.375 +   #define Meas_endMasterLoop \
  13.376 +      saveTSCLowHigh( startStamp1 ); \
  13.377 +      _VMSMasterEnv->endMasterLoop.longVal = startStamp1.longVal;
  13.378 +
  13.379 +   #define Meas_startReqHdlr \
  13.380 +      saveTSCLowHigh( startStamp1 ); \
  13.381 +      _VMSMasterEnv->startReqHdlr.longVal = startStamp1.longVal;
  13.382 +
  13.383 +   #define Meas_endReqHdlr 
  13.384 +
  13.385 +   #define MEAS__Print_Hists_for_Plugin_Meas 
  13.386 +
  13.387 +#else
  13.388 +   #define MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv
  13.389 +   #define MEAS__Make_Meas_Hists_for_Plugin_Meas
  13.390 +   #define Meas_startMasterLoop 
  13.391 +   #define Meas_endMasterLoop
  13.392 +   #define Meas_startReqHdlr 
  13.393 +   #define Meas_endReqHdlr 
  13.394 +   #define MEAS__Print_Hists_for_Plugin_Meas 
  13.395 +#endif
  13.396 +
  13.397 +
  13.398 +//Experiment in two-step macros -- if doesn't work, insert each separately
  13.399 +#define MEAS__Insert_Meas_Fields_into_Slave  \
  13.400 +   MEAS__Insert_Susp_Meas_Fields_into_Slave \
  13.401 +   MEAS__Insert_Master_Meas_Fields_into_Slave \
  13.402 +   MEAS__Insert_System_Meas_Fields_into_Slave
  13.403 +
  13.404 +
  13.405  //======================  Histogram Macros -- Create ========================
  13.406  //
  13.407  //
  13.408 -#ifdef MAKE_HISTS_FOR_MEASUREMENTS
  13.409 -#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \
  13.410 -   makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \
  13.411 -   _VMSMasterEnv->measHists[idx] =  \
  13.412 +
  13.413 +//The language implementation should include a definition of this macro,
  13.414 +// which creates all the histograms the language uses to collect measurements
  13.415 +// of plugin operation -- so, if the language didn't define it, must
  13.416 +// define it here (as empty), to avoid compile error
  13.417 +#ifndef MEAS__Make_Meas_Hists_for_Language
  13.418 +#define MEAS__Make_Meas_Hists_for_Language() /*consume parens!*/
  13.419 +#endif
  13.420 +
  13.421 +              
  13.422 +#ifdef MEAS__TURN_ON_MAKE_HISTS
  13.423 +   #define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \
  13.424 +      makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \
  13.425 +      _VMSMasterEnv->measHists[idx] =  \
  13.426                         makeFixedBinHist( numBins, startVal, binWidth, name );
  13.427  #else
  13.428 -#define makeAMeasHist( idx, name, numBins, startVal, binWidth )
  13.429 +   #define makeAMeasHist( idx, name, numBins, startVal, binWidth )
  13.430  #endif
  13.431  
  13.432 +//==============================  Probes  ===================================
  13.433  
  13.434 -#define MEAS__SUB_CREATE  /*turn on/off subtraction of create from plugin*/
  13.435  
  13.436 -#include "VMS_defs__lang_specific.h"
  13.437 -
  13.438 +//===========================================================================
  13.439  #endif	/* _VMS_DEFS_H */
  13.440  
    14.1 --- a/probes.c	Wed Feb 22 11:39:12 2012 -0800
    14.2 +++ b/probes.c	Sun Mar 04 14:26:35 2012 -0800
    14.3 @@ -13,100 +13,46 @@
    14.4  
    14.5  
    14.6  //====================  Probes =================
    14.7 -#ifdef STATS__USE_TSC_PROBES
    14.8 -
    14.9 -int32
   14.10 -VMS__create_histogram_probe( int32 numBins, float32 startValue,
   14.11 -                             float32 binWidth, char *nameStr )
   14.12 - { IntervalProbe *newProbe;
   14.13 -   int32 idx;
   14.14 -   FloatHist *hist;
   14.15 -
   14.16 -   idx = VMS__create_single_interval_probe( nameStr );
   14.17 -   newProbe =  _VMSMasterEnv->intervalProbes[ idx ];
   14.18 -
   14.19 -   hist =  makeFloatHistogram( numBins, startValue, binWidth );
   14.20 -   newProbe->hist = hist;
   14.21 -   return idx;
   14.22 - }
   14.23 -
   14.24 -void
   14.25 -VMS_impl__record_interval_start_in_probe( int32 probeID )
   14.26 - { IntervalProbe *probe;
   14.27 -
   14.28 -   probe = _VMSMasterEnv->intervalProbes[ probeID ];
   14.29 -   probe->startStamp = getTSCount();
   14.30 - }
   14.31 -
   14.32 -void
   14.33 -VMS_impl__record_interval_end_in_probe( int32 probeID )
   14.34 - { IntervalProbe *probe;
   14.35 -   TSCount endStamp;
   14.36 -
   14.37 -   endStamp = getTSCount();
   14.38 -
   14.39 -   probe = _VMSMasterEnv->intervalProbes[ probeID ];
   14.40 -   probe->endStamp = endStamp;
   14.41 -
   14.42 -   if( probe->hist != NULL )
   14.43 -    { TSCount interval = probe->endStamp - probe->startStamp;
   14.44 -         //if the interval is sane, then add to histogram
   14.45 -      if( interval < probe->hist->endOfRange * 10 )
   14.46 -         addToFloatHist( interval, probe->hist );
   14.47 -    }
   14.48 - }
   14.49 -
   14.50 -void
   14.51 -VMS_impl__print_stats_of_probe( int32 probeID )
   14.52 - { IntervalProbe *probe;
   14.53 -
   14.54 -   probe = _VMSMasterEnv->intervalProbes[ probeID ];
   14.55 -
   14.56 -   if( probe->hist == NULL )
   14.57 -    {
   14.58 -      printf("probe: %s, interval: %.6lf\n", probe->nameStr,probe->interval);
   14.59 -    }
   14.60 -
   14.61 -   else
   14.62 -    {
   14.63 -      printf( "probe: %s\n", probe->nameStr );
   14.64 -      printFloatHist( probe->hist );
   14.65 -    }
   14.66 - }
   14.67 -#else
   14.68 -
   14.69  /*
   14.70   * In practice, probe operations are called from the app, from inside slaves
   14.71 - *  -- so have to be sure each probe is single-VP owned, and be sure that
   14.72 + *  -- so have to be sure each probe is single-Slv owned, and be sure that
   14.73   *  any place common structures are modified it's done inside the master.
   14.74   * So -- the only place common structures are modified is during creation.
   14.75   *  after that, all mods are to individual instances.
   14.76   *
   14.77   * Thniking perhaps should change the semantics to be that probes are
   14.78   *  attached to the virtual processor -- and then everything is guaranteed
   14.79 - *  to be isolated -- except then can't take any intervals that span VPs,
   14.80 - *  and would have to transfer the probes to Master env when VP dissipates..
   14.81 + *  to be isolated -- except then can't take any intervals that span Slvs,
   14.82 + *  and would have to transfer the probes to Master env when Slv dissipates..
   14.83   *  gets messy..
   14.84   *
   14.85   * For now, just making so that probe creation causes a suspend, so that
   14.86   *  the dynamic array in the master env is only modified from the master
   14.87   * 
   14.88   */
   14.89 +
   14.90 +//============================  Helpers ===========================
   14.91 +inline void 
   14.92 +doNothing()
   14.93 + {
   14.94 + }
   14.95 +
   14.96 +
   14.97  IntervalProbe *
   14.98 -create_generic_probe( char *nameStr, SlaveVP *animPr )
   14.99 -{
  14.100 +create_generic_probe( char *nameStr, SlaveVP *animSlv )
  14.101 + {
  14.102     VMSSemReq reqData;
  14.103  
  14.104     reqData.reqType  = createProbe;
  14.105     reqData.nameStr  = nameStr;
  14.106  
  14.107 -   VMS_WL__send_VMSSem_request( &reqData, animPr );
  14.108 +   VMS_WL__send_VMSSem_request( &reqData, animSlv );
  14.109  
  14.110 -   return animPr->dataRetFromReq;
  14.111 +   return animSlv->dataRetFromReq;
  14.112   }
  14.113  
  14.114  /*Use this version from outside VMS -- it uses external malloc, and modifies
  14.115 - * dynamic array, so can't be animated in a slave VP
  14.116 + * dynamic array, so can't be animated in a slave Slv
  14.117   */
  14.118  IntervalProbe *
  14.119  ext__create_generic_probe( char *nameStr )
  14.120 @@ -125,24 +71,38 @@
  14.121     return newProbe;
  14.122   }
  14.123  
  14.124 +//============================ Fns def in header =======================
  14.125  
  14.126 -/*Only call from inside master or main startup/shutdown thread
  14.127 - */
  14.128 -void
  14.129 -VMS_impl__free_probe( IntervalProbe *probe )
  14.130 - { if( probe->hist != NULL )   freeDblHist( probe->hist );
  14.131 -   if( probe->nameStr != NULL) VMS_int__free( probe->nameStr );
  14.132 -   VMS_int__free( probe );
  14.133 +int32
  14.134 +VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animSlv )
  14.135 + { IntervalProbe *newProbe;
  14.136 +
  14.137 +   newProbe = create_generic_probe( nameStr, animSlv );
  14.138 +   
  14.139 +   return newProbe->probeID;
  14.140   }
  14.141  
  14.142 +int32
  14.143 +VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
  14.144 +               float64 binWidth, char   *nameStr, SlaveVP *animSlv )
  14.145 + { IntervalProbe *newProbe;
  14.146 +   DblHist *hist;
  14.147 +
  14.148 +   newProbe = create_generic_probe( nameStr, animSlv );
  14.149 +   
  14.150 +   hist =  makeDblHistogram( numBins, startValue, binWidth );
  14.151 +   newProbe->hist = hist;
  14.152 +   return newProbe->probeID;
  14.153 + }
  14.154 +
  14.155  
  14.156  int32
  14.157 -VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animPr)
  14.158 +VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animSlv)
  14.159   { IntervalProbe *newProbe;
  14.160     struct timeval *startStamp;
  14.161     float64 startSecs;
  14.162  
  14.163 -   newProbe           = create_generic_probe( nameStr, animPr );
  14.164 +   newProbe           = create_generic_probe( nameStr, animSlv );
  14.165     newProbe->endSecs  = 0;
  14.166  
  14.167     gettimeofday( &(newProbe->startStamp), NULL);
  14.168 @@ -174,30 +134,19 @@
  14.169     return newProbe->probeID;
  14.170   }
  14.171  
  14.172 -int32
  14.173 -VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr )
  14.174 - { IntervalProbe *newProbe;
  14.175  
  14.176 -   newProbe = create_generic_probe( nameStr, animPr );
  14.177 -   
  14.178 -   return newProbe->probeID;
  14.179 +/*Only call from inside master or main startup/shutdown thread
  14.180 + */
  14.181 +void
  14.182 +VMS_impl__free_probe( IntervalProbe *probe )
  14.183 + { if( probe->hist != NULL )   freeDblHist( probe->hist );
  14.184 +   if( probe->nameStr != NULL) VMS_int__free( probe->nameStr );
  14.185 +   VMS_int__free( probe );
  14.186   }
  14.187  
  14.188 -int32
  14.189 -VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
  14.190 -               float64 binWidth, char   *nameStr, SlaveVP *animPr )
  14.191 - { IntervalProbe *newProbe;
  14.192 -   DblHist *hist;
  14.193 -
  14.194 -   newProbe = create_generic_probe( nameStr, animPr );
  14.195 -   
  14.196 -   hist =  makeDblHistogram( numBins, startValue, binWidth );
  14.197 -   newProbe->hist = hist;
  14.198 -   return newProbe->probeID;
  14.199 - }
  14.200  
  14.201  void
  14.202 -VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr )
  14.203 +VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animSlv )
  14.204   { IntervalProbe *probe;
  14.205  
  14.206     //TODO: fix this To be in Master -- race condition
  14.207 @@ -206,8 +155,9 @@
  14.208     addValueIntoTable(probe->nameStr, probe, _VMSMasterEnv->probeNameHashTbl);
  14.209   }
  14.210  
  14.211 +
  14.212  IntervalProbe *
  14.213 -VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr )
  14.214 +VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animSlv )
  14.215   {
  14.216     //TODO: fix this To be in Master -- race condition
  14.217     return getValueFromTable( probeName, _VMSMasterEnv->probeNameHashTbl );
  14.218 @@ -215,21 +165,21 @@
  14.219  
  14.220  
  14.221  /*Everything is local to the animating procr, so no need for request, do
  14.222 - * work locally, in the anim Pr
  14.223 + * work locally, in the anim Slv
  14.224   */
  14.225  void
  14.226 -VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animatingPr )
  14.227 +VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animatingSlv )
  14.228   { IntervalProbe *probe;
  14.229   
  14.230     probe = _VMSMasterEnv->intervalProbes[ probeID ];
  14.231     probe->schedChoiceWasRecorded = TRUE;
  14.232 -   probe->coreNum = animatingPr->coreAnimatedBy;
  14.233 -   probe->procrID = animatingPr->procrID;
  14.234 -   probe->procrCreateSecs = animatingPr->createPtInSecs;
  14.235 +   probe->coreNum = animatingSlv->coreAnimatedBy;
  14.236 +   probe->slaveID = animatingSlv->procrID;
  14.237 +   probe->slaveCreateSecs = animatingSlv->createPtInSecs;
  14.238   }
  14.239  
  14.240  /*Everything is local to the animating procr, so no need for request, do
  14.241 - * work locally, in the anim Pr
  14.242 + * work locally, in the anim Slv
  14.243   */
  14.244  void
  14.245  VMS_impl__record_interval_start_in_probe( int32 probeID )
  14.246 @@ -237,44 +187,37 @@
  14.247  
  14.248           DEBUG( dbgProbes, "record start of interval\n" )
  14.249     probe = _VMSMasterEnv->intervalProbes[ probeID ];
  14.250 -   gettimeofday( &(probe->startStamp), NULL );
  14.251 +   probe->startStamp = getTSCount();
  14.252   }
  14.253  
  14.254  
  14.255  /*Everything is local to the animating procr, so no need for request, do
  14.256 - * work locally, in the anim Pr
  14.257 + * work locally, in the anim Slv
  14.258 + * 
  14.259 + *This should be safe to run inside SlaveVP -- weird behavior will be due
  14.260 + * to the logical error of having more than one interval open in overlapped.
  14.261   */
  14.262  void
  14.263  VMS_impl__record_interval_end_in_probe( int32 probeID )
  14.264   { IntervalProbe *probe;
  14.265 -   struct timeval *endStamp, *startStamp;
  14.266 -   float64 startSecs, endSecs;
  14.267 +   TSCount endStamp;
  14.268  
  14.269 +   endStamp = getTSCount();
  14.270 +   
  14.271           DEBUG( dbgProbes, "record end of interval\n" )
  14.272 -      //possible seg-fault if array resized by diff core right after this
  14.273 -      // one gets probe..?  Something like that?  Might be safe.. don't care
  14.274 +
  14.275     probe = _VMSMasterEnv->intervalProbes[ probeID ];
  14.276 -   gettimeofday( &(probe->endStamp), NULL);
  14.277 -
  14.278 -      //now turn into an interval held in a double
  14.279 -   startStamp = &(probe->startStamp);
  14.280 -   endStamp   = &(probe->endStamp);
  14.281 -
  14.282 -   startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 );
  14.283 -   endSecs   = endStamp->tv_sec   + ( endStamp->tv_usec / 1000000.0 );
  14.284 -
  14.285 -   probe->interval  = endSecs - startSecs;
  14.286 -   probe->startSecs = startSecs;
  14.287 -   probe->endSecs   = endSecs;
  14.288 +   probe->endStamp = endStamp;
  14.289  
  14.290     if( probe->hist != NULL )
  14.291 -    {
  14.292 +    { TSCount interval = probe->endStamp - probe->startStamp;
  14.293           //if the interval is sane, then add to histogram
  14.294 -      if( probe->interval < probe->hist->endOfRange * 10 )
  14.295 -         addToDblHist( probe->interval, probe->hist );
  14.296 +      if( interval < probe->hist->endOfRange * 10 )
  14.297 +         addToFloatHist( interval, probe->hist );
  14.298      }
  14.299   }
  14.300  
  14.301 +
  14.302  void
  14.303  print_probe_helper( IntervalProbe *probe )
  14.304   {
  14.305 @@ -283,7 +226,7 @@
  14.306     
  14.307     if( probe->schedChoiceWasRecorded )
  14.308      { printf( "coreNum: %d, procrID: %d, procrCreated: %0.6f | ",
  14.309 -              probe->coreNum, probe->procrID, probe->procrCreateSecs );
  14.310 +              probe->coreNum, probe->slaveID, probe->slaveCreateSecs );
  14.311      }
  14.312  
  14.313     if( probe->endSecs == 0 ) //just a single point in time
  14.314 @@ -318,22 +261,10 @@
  14.315   }
  14.316  
  14.317  
  14.318 -inline void doNothing(){};
  14.319 -
  14.320 -void
  14.321 -generic_print_probe( void *_probe )
  14.322 - { 
  14.323 -   IntervalProbe *probe = (IntervalProbe *)_probe;
  14.324 -   
  14.325 -   //TODO segfault in printf
  14.326 -   //print_probe_helper( probe );
  14.327 - }
  14.328 -
  14.329  void
  14.330  VMS_impl__print_stats_of_all_probes()
  14.331   {
  14.332     forAllInDynArrayDo( _VMSMasterEnv->dynIntervalProbesInfo,
  14.333 -                       &generic_print_probe );
  14.334 +                       &VMS_impl__print_stats_of_probe );
  14.335     fflush( stdout );
  14.336   }
  14.337 -#endif
    15.1 --- a/probes.h	Wed Feb 22 11:39:12 2012 -0800
    15.2 +++ b/probes.h	Sun Mar 04 14:26:35 2012 -0800
    15.3 @@ -28,16 +28,16 @@
    15.4  
    15.5     int32           schedChoiceWasRecorded;
    15.6     int32           coreNum;
    15.7 -   int32           procrID;
    15.8 -   float64         procrCreateSecs;
    15.9 +   int32           slaveID;
   15.10 +   float64         slaveCreateSecs;
   15.11  
   15.12 -   #ifdef STATS__USE_TSC_PROBES
   15.13 + //  #ifdef STATS__USE_TSC_PROBES
   15.14     TSCount    startStamp;
   15.15     TSCount    endStamp;
   15.16 -   #else
   15.17 -   struct timeval  startStamp;
   15.18 -   struct timeval  endStamp;
   15.19 -   #endif
   15.20 +//   #else
   15.21 +//   struct timeval  startStamp;
   15.22 +//   struct timeval  endStamp;
   15.23 +//   #endif
   15.24     float64         startSecs;
   15.25     float64         endSecs;
   15.26     float64         interval;
   15.27 @@ -45,136 +45,136 @@
   15.28   };
   15.29  
   15.30  
   15.31 +int32
   15.32 +VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animSlv );
   15.33 +
   15.34 +int32
   15.35 +VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
   15.36 +               float64 binWidth, char    *nameStr, SlaveVP *animSlv );
   15.37 +
   15.38 +int32
   15.39 +VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animSlv);
   15.40 +
   15.41 +int32
   15.42 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr );
   15.43 +
   15.44 +void
   15.45 +VMS_impl__free_probe( IntervalProbe *probe );
   15.46 +
   15.47 +void
   15.48 +VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animSlv );
   15.49 +
   15.50 +IntervalProbe *
   15.51 +VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animSlv );
   15.52 +
   15.53 +void
   15.54 +VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animSlv );
   15.55 +
   15.56 +void
   15.57 +VMS_impl__record_interval_start_in_probe( int32 probeID );
   15.58 +
   15.59 +void
   15.60 +VMS_impl__record_interval_end_in_probe( int32 probeID );
   15.61 +
   15.62 +void
   15.63 +VMS_impl__print_stats_of_probe( int32 probeID );
   15.64 +
   15.65 +void
   15.66 +VMS_impl__print_stats_of_all_probes();
   15.67 +
   15.68  
   15.69  //======================== Probes =============================
   15.70  //
   15.71  // Use macros to allow turning probes off with a #define switch
   15.72 +// This means probes have zero impact on performance when off
   15.73 +//=============================================================
   15.74 +#define VMS_App__record_time_point_into_new_probe VMS_WL__record_time_point_into_new_probe
   15.75 +#define VMS_ext__record_time_point_into_new_probe
   15.76 +#define VMS_App__create_single_interval_probe   VMS_WL__create_single_interval_probe
   15.77 +#define VMS_App__create_histogram_probe         VMS_WL__create_histogram_probe
   15.78 +#define VMS_App__index_probe_by_its_name        VMS_WL__index_probe_by_its_name
   15.79 +#define VMS_App__get_probe_by_name              VMS_WL__get_probe_by_name
   15.80 +#define VMS_App__record_sched_choice_into_probe VMS_WL__record_sched_choice_into_probe
   15.81 +#define VMS_App__record_interval_start_in_probe VMS_WL__record_interval_start_in_probe 
   15.82 +#define VMS_App__record_interval_end_in_probe   VMS_WL__record_interval_end_in_probe
   15.83 +#define VMS_App__print_stats_of_probe           VMS_WL__print_stats_of_probe
   15.84 +#define VMS_App__print_stats_of_all_probes      VMS_WL__print_stats_of_all_probes 
   15.85 +
   15.86  #ifdef STATS__ENABLE_PROBES
   15.87 -int32
   15.88 -VMS_impl__record_time_point_into_new_probe( char *nameStr,SlaveVP *animPr);
   15.89 -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \
   15.90 -        VMS_impl__record_time_point_in_new_probe( nameStr, animPr )
   15.91 +#define VMS_WL__record_time_point_into_new_probe( nameStr, animSlv ) \
   15.92 +        VMS_impl__record_time_point_in_new_probe( nameStr, animSlv )
   15.93  
   15.94 -int32
   15.95 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr );
   15.96  #define VMS_ext__record_time_point_into_new_probe( nameStr ) \
   15.97          VMS_ext_impl__record_time_point_into_new_probe( nameStr )
   15.98  
   15.99 +#define VMS_WL__create_single_interval_probe( nameStr, animSlv ) \
  15.100 +        VMS_impl__create_single_interval_probe( nameStr, animSlv )
  15.101  
  15.102 -int32
  15.103 -VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr );
  15.104 -#define VMS__create_single_interval_probe( nameStr, animPr ) \
  15.105 -        VMS_impl__create_single_interval_probe( nameStr, animPr )
  15.106 -
  15.107 -
  15.108 -int32
  15.109 -VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
  15.110 -               float64 binWidth, char    *nameStr, SlaveVP *animPr );
  15.111 -#define VMS__create_histogram_probe(      numBins, startValue,              \
  15.112 -                                          binWidth, nameStr, animPr )       \
  15.113 +#define VMS_WL__create_histogram_probe(      numBins, startValue,              \
  15.114 +                                          binWidth, nameStr, animSlv )       \
  15.115          VMS_impl__create_histogram_probe( numBins, startValue,              \
  15.116 -                                          binWidth, nameStr, animPr )
  15.117 -void
  15.118 -VMS_impl__free_probe( IntervalProbe *probe );
  15.119 -#define VMS__free_probe( probe ) \
  15.120 +                                          binWidth, nameStr, animSlv )
  15.121 +#define VMS_int__free_probe( probe ) \
  15.122          VMS_impl__free_probe( probe )
  15.123  
  15.124 -void
  15.125 -VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr );
  15.126 -#define VMS__index_probe_by_its_name( probeID, animPr ) \
  15.127 -        VMS_impl__index_probe_by_its_name( probeID, animPr )
  15.128 +#define VMS_WL__index_probe_by_its_name( probeID, animSlv ) \
  15.129 +        VMS_impl__index_probe_by_its_name( probeID, animSlv )
  15.130  
  15.131 -IntervalProbe *
  15.132 -VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr );
  15.133 -#define VMS__get_probe_by_name( probeID, animPr ) \
  15.134 -        VMS_impl__get_probe_by_name( probeName, animPr )
  15.135 +#define VMS_WL__get_probe_by_name( probeID, animSlv ) \
  15.136 +        VMS_impl__get_probe_by_name( probeName, animSlv )
  15.137  
  15.138 -void
  15.139 -VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animPr );
  15.140 -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \
  15.141 -        VMS_impl__record_sched_choice_into_probe( probeID, animPr )
  15.142 +#define VMS_WL__record_sched_choice_into_probe( probeID, animSlv ) \
  15.143 +        VMS_impl__record_sched_choice_into_probe( probeID, animSlv )
  15.144  
  15.145 -void
  15.146 -VMS_impl__record_interval_start_in_probe( int32 probeID );
  15.147 -#define VMS__record_interval_start_in_probe( probeID ) \
  15.148 +#define VMS_WL__record_interval_start_in_probe( probeID ) \
  15.149          VMS_impl__record_interval_start_in_probe( probeID )
  15.150  
  15.151 -void
  15.152 -VMS_impl__record_interval_end_in_probe( int32 probeID );
  15.153 -#define VMS__record_interval_end_in_probe( probeID ) \
  15.154 +#define VMS_WL__record_interval_end_in_probe( probeID ) \
  15.155          VMS_impl__record_interval_end_in_probe( probeID )
  15.156  
  15.157 -void
  15.158 -VMS_impl__print_stats_of_probe( int32 probeID );
  15.159 -#define VMS__print_stats_of_probe( probeID ) \
  15.160 +#define VMS_WL__print_stats_of_probe( probeID ) \
  15.161          VMS_impl__print_stats_of_probe( probeID )
  15.162  
  15.163 -void
  15.164 -VMS_impl__print_stats_of_all_probes();
  15.165 -#define VMS__print_stats_of_all_probes() \
  15.166 +#define VMS_WL__print_stats_of_all_probes() \
  15.167          VMS_impl__print_stats_of_all_probes()
  15.168  
  15.169  
  15.170  #else
  15.171 -int32
  15.172 -VMS_impl__record_time_point_into_new_probe( char *nameStr,SlaveVP *animPr);
  15.173 -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \
  15.174 +#define VMS_WL__record_time_point_into_new_probe( nameStr, animSlv ) \
  15.175         0 /* do nothing */
  15.176  
  15.177 -int32
  15.178 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr );
  15.179  #define VMS_ext__record_time_point_into_new_probe( nameStr ) \
  15.180         0 /* do nothing */
  15.181  
  15.182  
  15.183 -int32
  15.184 -VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr );
  15.185 -#define VMS__create_single_interval_probe( nameStr, animPr ) \
  15.186 +#define VMS_WL__create_single_interval_probe( nameStr, animSlv ) \
  15.187         0 /* do nothing */
  15.188  
  15.189  
  15.190 -int32
  15.191 -VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
  15.192 -               float64 binWidth, char    *nameStr, SlaveVP *animPr );
  15.193 -#define VMS__create_histogram_probe(      numBins, startValue,              \
  15.194 -                                          binWidth, nameStr, animPr )       \
  15.195 +#define VMS_WL__create_histogram_probe(      numBins, startValue,              \
  15.196 +                                          binWidth, nameStr, animSlv )       \
  15.197         0 /* do nothing */
  15.198  
  15.199 -void
  15.200 -VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr );
  15.201 -#define VMS__index_probe_by_its_name( probeID, animPr ) \
  15.202 +#define VMS_WL__index_probe_by_its_name( probeID, animSlv ) \
  15.203          /* do nothing */
  15.204  
  15.205 -IntervalProbe *
  15.206 -VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr );
  15.207 -#define VMS__get_probe_by_name( probeID, animPr ) \
  15.208 +#define VMS_WL__get_probe_by_name( probeID, animSlv ) \
  15.209         NULL /* do nothing */
  15.210  
  15.211 -void
  15.212 -VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animPr );
  15.213 -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \
  15.214 +#define VMS_WL__record_sched_choice_into_probe( probeID, animSlv ) \
  15.215          /* do nothing */
  15.216  
  15.217 -void
  15.218 -VMS_impl__record_interval_start_in_probe( int32 probeID );
  15.219 -#define VMS__record_interval_start_in_probe( probeID ) \
  15.220 +#define VMS_WL__record_interval_start_in_probe( probeID ) \
  15.221          /* do nothing */
  15.222  
  15.223 -void
  15.224 -VMS_impl__record_interval_end_in_probe( int32 probeID );
  15.225 -#define VMS__record_interval_end_in_probe( probeID ) \
  15.226 +#define VMS_WL__record_interval_end_in_probe( probeID ) \
  15.227          /* do nothing */
  15.228  
  15.229 -inline void doNothing();
  15.230 -void
  15.231 -VMS_impl__print_stats_of_probe( int32 probeID );
  15.232 -#define VMS__print_stats_of_probe( probeID ) \
  15.233 -        doNothing/* do nothing */
  15.234 +#define VMS_WL__print_stats_of_probe( probeID ) \
  15.235 +        ; /* do nothing */
  15.236  
  15.237 -void
  15.238 -VMS_impl__print_stats_of_all_probes();
  15.239 -#define VMS__print_stats_of_all_probes \
  15.240 -        doNothing/* do nothing */
  15.241 +#define VMS_WL__print_stats_of_all_probes() \
  15.242 +        ;/* do nothing */
  15.243  
  15.244  #endif   /* defined STATS__ENABLE_PROBES */
  15.245  
    16.1 --- a/vmalloc.c	Wed Feb 22 11:39:12 2012 -0800
    16.2 +++ b/vmalloc.c	Sun Mar 04 14:26:35 2012 -0800
    16.3 @@ -11,46 +11,200 @@
    16.4  #include <inttypes.h>
    16.5  #include <stdlib.h>
    16.6  #include <stdio.h>
    16.7 +#include <string.h>
    16.8 +#include <math.h>
    16.9  
   16.10  #include "VMS.h"
   16.11  #include "C_Libraries/Histogram/Histogram.h"
   16.12  
   16.13 -/*Helper function
   16.14 - *Insert a newly generated free chunk into the first spot on the free list.
   16.15 - * The chunk is cast as a MallocProlog, so the various pointers in it are
   16.16 - * accessed with C's help -- and the size of the prolog is easily added to
   16.17 - * the pointer when a chunk is returned to the app -- so C handles changes
   16.18 - * in pointer sizes among machines.
   16.19 - *
   16.20 - *The list head is a normal MallocProlog struct -- identified by its
   16.21 - * prevChunkInFreeList being NULL -- the only one.
   16.22 - *
   16.23 - *The end of the list is identified by next chunk being NULL, as usual.
   16.24 +#define MAX_UINT64 0xFFFFFFFFFFFFFFFF
   16.25 +
   16.26 +//A MallocProlog is a head element if the HigherInMem variable is NULL
   16.27 +//A Chunk is free if the prevChunkInFreeList variable is NULL
   16.28 +
   16.29 +/*
   16.30 + * This calculates the container which fits the given size.
   16.31   */
   16.32 -void inline
   16.33 -add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead )
   16.34 - { 
   16.35 -   chunk->nextChunkInFreeList     = listHead->nextChunkInFreeList;
   16.36 -   if( chunk->nextChunkInFreeList != NULL ) //if not last in free list
   16.37 -      chunk->nextChunkInFreeList->prevChunkInFreeList = chunk;
   16.38 -   chunk->prevChunkInFreeList     = listHead;
   16.39 -   listHead->nextChunkInFreeList  = chunk;
   16.40 - }
   16.41 +inline
   16.42 +uint32 getContainer(size_t size)
   16.43 +{
   16.44 +    return (log2(size)-LOG128)/LOG54;
   16.45 +}
   16.46  
   16.47 +/*
   16.48 + * Removes the first chunk of a freeList
   16.49 + * The chunk is removed but not set as free. There is no check if
   16.50 + * the free list is empty, so make sure this is not the case.
   16.51 + */
   16.52 +inline
   16.53 +MallocProlog *removeChunk(MallocArrays* freeLists, uint32 containerIdx)
   16.54 +{
   16.55 +    MallocProlog** container = &freeLists->bigChunks[containerIdx];
   16.56 +    MallocProlog*  removedChunk = *container;
   16.57 +    *container = removedChunk->nextChunkInFreeList;
   16.58 +    
   16.59 +    if(removedChunk->nextChunkInFreeList)
   16.60 +        removedChunk->nextChunkInFreeList->prevChunkInFreeList = 
   16.61 +                (MallocProlog*)container;
   16.62 +    
   16.63 +    if(*container == NULL)
   16.64 +    {
   16.65 +       if(containerIdx < 64)
   16.66 +           freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); 
   16.67 +       else
   16.68 +           freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64));
   16.69 +    }
   16.70 +    
   16.71 +    return removedChunk;
   16.72 +}
   16.73  
   16.74 -/*This is sequential code, meant to only be called from the Master, not from
   16.75 - * any slave VPs.
   16.76 - *Search down list, checking size by the nextHigherInMem pointer, to find
   16.77 - * first chunk bigger than size needed.
   16.78 - *Shave off the extra and make it into a new free-list element, hook it in
   16.79 - * then return the address of the found element plus size of prolog.
   16.80 - *
   16.81 +/*
   16.82 + * Removes the first chunk of a freeList
   16.83 + * The chunk is removed but not set as free. There is no check if
   16.84 + * the free list is empty, so make sure this is not the case.
   16.85 + */
   16.86 +inline
   16.87 +MallocProlog *removeSmallChunk(MallocArrays* freeLists, uint32 containerIdx)
   16.88 +{
   16.89 +    MallocProlog** container = &freeLists->smallChunks[containerIdx];
   16.90 +    MallocProlog*  removedChunk = *container;
   16.91 +    *container = removedChunk->nextChunkInFreeList;
   16.92 +    
   16.93 +    if(removedChunk->nextChunkInFreeList)
   16.94 +        removedChunk->nextChunkInFreeList->prevChunkInFreeList = 
   16.95 +                (MallocProlog*)container;
   16.96 +    
   16.97 +    return removedChunk;
   16.98 +}
   16.99 +
  16.100 +inline
  16.101 +size_t getChunkSize(MallocProlog* chunk)
  16.102 +{
  16.103 +    return (uintptr_t)chunk->nextHigherInMem -
  16.104 +            (uintptr_t)chunk - sizeof(MallocProlog);
  16.105 +}
  16.106 +
  16.107 +/*
  16.108 + * Removes a chunk from a free list.
  16.109 + */
  16.110 +inline
  16.111 +void extractChunk(MallocProlog* chunk, MallocArrays *freeLists)
  16.112 +{
  16.113 +   chunk->prevChunkInFreeList->nextChunkInFreeList = chunk->nextChunkInFreeList;
  16.114 +   if(chunk->nextChunkInFreeList)
  16.115 +       chunk->nextChunkInFreeList->prevChunkInFreeList = chunk->prevChunkInFreeList;
  16.116 +   
  16.117 +   //The last element in the list points to the container. If the container points
  16.118 +   //to NULL the container is empty
  16.119 +   if(*((void**)(chunk->prevChunkInFreeList)) == NULL && getChunkSize(chunk) >= BIG_LOWER_BOUND)
  16.120 +   {
  16.121 +       //Find the approppiate container because we do not know it
  16.122 +       uint64 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 3;
  16.123 +       if(containerIdx < (uint32)64)
  16.124 +           freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); 
  16.125 +       if(containerIdx < 128 && containerIdx >=64)
  16.126 +           freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64)); 
  16.127 +       
  16.128 +   }
  16.129 +}
  16.130 +
  16.131 +/*
  16.132 + * Merges two chunks.
  16.133 + * Chunk A has to be before chunk B in memory. Both have to be removed from
  16.134 + * a free list
  16.135 + */
  16.136 +inline
  16.137 +MallocProlog *mergeChunks(MallocProlog* chunkA, MallocProlog* chunkB)
  16.138 +{
  16.139 +    chunkA->nextHigherInMem = chunkB->nextHigherInMem;
  16.140 +    chunkB->nextHigherInMem->nextLowerInMem = chunkA;
  16.141 +    return chunkA;
  16.142 +}
  16.143 +/*
  16.144 + * Inserts a chunk into a free list.
  16.145 + */
  16.146 +inline
  16.147 +void insertChunk(MallocProlog* chunk, MallocProlog** container)
  16.148 +{
  16.149 +    chunk->nextChunkInFreeList = *container;
  16.150 +    chunk->prevChunkInFreeList = (MallocProlog*)container;
  16.151 +    if(*container)
  16.152 +        (*container)->prevChunkInFreeList = chunk;
  16.153 +    *container = chunk;
  16.154 +}
  16.155 +
  16.156 +/*
  16.157 + * Divides the chunk that a new chunk of newSize is created.
  16.158 + * There is no size check, so make sure the size value is valid.
  16.159 + */
  16.160 +inline
  16.161 +MallocProlog *divideChunk(MallocProlog* chunk, size_t newSize)
  16.162 +{
  16.163 +    MallocProlog* newChunk = (MallocProlog*)((uintptr_t)chunk->nextHigherInMem -
  16.164 +            newSize - sizeof(MallocProlog));
  16.165 +    
  16.166 +    newChunk->nextLowerInMem  = chunk;
  16.167 +    newChunk->nextHigherInMem = chunk->nextHigherInMem;
  16.168 +    
  16.169 +    chunk->nextHigherInMem->nextLowerInMem = newChunk;
  16.170 +    chunk->nextHigherInMem = newChunk;
  16.171 +    
  16.172 +    return newChunk;
  16.173 +}
  16.174 +
  16.175 +/* 
  16.176 + * Search for chunk in the list of big chunks. Split the block if it's too big
  16.177 + */
  16.178 +inline
  16.179 +MallocProlog *searchChunk(MallocArrays *freeLists, size_t sizeRequested, uint32 containerIdx)
  16.180 +{
  16.181 +    MallocProlog* foundChunk;
  16.182 +    
  16.183 +    uint64 searchVector = freeLists->bigChunksSearchVector[0];
  16.184 +    //set small chunk bits to zero
  16.185 +    searchVector &= MAX_UINT64 << containerIdx;
  16.186 +    containerIdx = __builtin_ffsl(searchVector);
  16.187 +
  16.188 +    if(containerIdx == 0)
  16.189 +    {
  16.190 +       searchVector = freeLists->bigChunksSearchVector[1];
  16.191 +       containerIdx = __builtin_ffsl(searchVector);
  16.192 +       if(containerIdx == 0)
  16.193 +       {
  16.194 +           printf("VMS malloc failed: low memory");
  16.195 +           exit(1);   
  16.196 +       }
  16.197 +       containerIdx += 64;
  16.198 +    }
  16.199 +    containerIdx--;
  16.200 +    
  16.201 +
  16.202 +    foundChunk = removeChunk(freeLists, containerIdx);
  16.203 +    size_t chunkSize     = getChunkSize(foundChunk);
  16.204 +
  16.205 +    //If the new chunk is larger than the requested size: split
  16.206 +    if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + BIG_LOWER_BOUND)
  16.207 +    {
  16.208 +       MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested);
  16.209 +       containerIdx = getContainer(getChunkSize(foundChunk)) - 1;
  16.210 +       insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]);
  16.211 +       if(containerIdx < 64)
  16.212 +           freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx);
  16.213 +       else
  16.214 +           freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64));
  16.215 +       foundChunk = newChunk;
  16.216 +    } 
  16.217 +    
  16.218 +    return foundChunk;
  16.219 +}
  16.220 +
  16.221 +
  16.222 +/*
  16.223 + * This is sequential code, meant to only be called from the Master, not from
  16.224 + * any slave Slvs.
  16.225   */
  16.226  void *VMS_int__malloc( size_t sizeRequested )
  16.227 - { MallocProlog *foundElem = NULL, *currElem, *newElem;
  16.228 -   ssize_t        amountExtra, sizeConsumed,sizeOfFound;
  16.229 -   uint32        foundElemIsTopOfHeap;
  16.230 -
  16.231 + {     
  16.232     //============================= MEASUREMENT STUFF ========================
  16.233     #ifdef MEAS__TIME_MALLOC
  16.234     int32 startStamp, endStamp;
  16.235 @@ -58,312 +212,101 @@
  16.236     #endif
  16.237     //========================================================================
  16.238     
  16.239 -      //step up the size to be aligned at 16-byte boundary, prob better ways
  16.240 -   sizeRequested = (sizeRequested + 16) & ~15;
  16.241 -   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
  16.242 -
  16.243 -   while( currElem != NULL )
  16.244 -    {    //check if size of currElem is big enough
  16.245 -      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
  16.246 -      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
  16.247 -      if( amountExtra > 0 )
  16.248 -       {    //found it, get out of loop
  16.249 -         foundElem = currElem;
  16.250 -         currElem = NULL;
  16.251 -       }
  16.252 -      else
  16.253 -         currElem = currElem->nextChunkInFreeList;
  16.254 -    }
  16.255 +   MallocArrays* freeLists = _VMSMasterEnv->freeLists;
  16.256 +   MallocProlog* foundChunk;
  16.257     
  16.258 -   if( foundElem == NULL )
  16.259 -    { ERROR("\nmalloc failed\n")
  16.260 -      return (void *)NULL;  //indicates malloc failed
  16.261 -    }
  16.262 -      //Using a kludge to identify the element that is the top chunk in the
  16.263 -      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
  16.264 -      // save addr of start of heap in head's nextLowerInMem
  16.265 -      //Will handle top of Heap specially
  16.266 -   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
  16.267 -                          _VMSMasterEnv->freeListHead->nextHigherInMem;
  16.268 +   //Return a small chunk if the requested size is smaller than 128B
  16.269 +   if(sizeRequested <= LOWER_BOUND)
  16.270 +   {
  16.271 +       uint32 freeListIdx = (sizeRequested-1)/SMALL_CHUNK_SIZE;
  16.272 +       if(freeLists->smallChunks[freeListIdx] == NULL)
  16.273 +           foundChunk = searchChunk(freeLists, SMALL_CHUNK_SIZE*(freeListIdx+1), 0);
  16.274 +       else
  16.275 +           foundChunk = removeSmallChunk(freeLists, freeListIdx);
  16.276 +       
  16.277 +       //Mark as allocated
  16.278 +       foundChunk->prevChunkInFreeList = NULL;      
  16.279 +       return foundChunk + 1;
  16.280 +   }
  16.281     
  16.282 -      //before shave off and try to insert new elem, remove found elem
  16.283 -      //note, foundElem will never be the head, so always has valid prevChunk
  16.284 -   foundElem->prevChunkInFreeList->nextChunkInFreeList =
  16.285 -                                              foundElem->nextChunkInFreeList;
  16.286 -   if( foundElem->nextChunkInFreeList != NULL )
  16.287 -    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
  16.288 -                                              foundElem->prevChunkInFreeList;
  16.289 -    }
  16.290 -   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
  16.291 +   //Calculate the expected container. Start one higher to have a Chunk that's
  16.292 +   //always big enough.
  16.293 +   uint32 containerIdx = getContainer(sizeRequested);
  16.294     
  16.295 -      //if enough, turn extra into new elem & insert it
  16.296 -   if( amountExtra > 64 )
  16.297 -    {   //make new elem by adding to addr of curr elem then casting
  16.298 -        sizeConsumed = sizeof(MallocProlog) + sizeRequested; 
  16.299 -        newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
  16.300 -        newElem->nextLowerInMem    = foundElem; //This is evil (but why?) 
  16.301 -        newElem->nextHigherInMem   = foundElem->nextHigherInMem; //This is evil (but why?)
  16.302 -        foundElem->nextHigherInMem = newElem;
  16.303 -        if( ! foundElemIsTopOfHeap )
  16.304 -        {  //there is no next higher for top of heap, so can't write to it
  16.305 -           newElem->nextHigherInMem->nextLowerInMem = newElem;
  16.306 -        }
  16.307 -        add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
  16.308 -    }
  16.309 +   if(freeLists->bigChunks[containerIdx] == NULL)
  16.310 +       foundChunk = searchChunk(freeLists, sizeRequested, containerIdx); 
  16.311     else
  16.312 -    {
  16.313 -      sizeConsumed = sizeOfFound;
  16.314 -    }
  16.315 -  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
  16.316 -
  16.317 +       foundChunk = removeChunk(freeLists, containerIdx); 
  16.318 +   
  16.319 +   //Mark as allocated
  16.320 +   foundChunk->prevChunkInFreeList = NULL;      
  16.321 +   
  16.322     //============================= MEASUREMENT STUFF ========================
  16.323     #ifdef MEAS__TIME_MALLOC
  16.324     saveLowTimeStampCountInto( endStamp );
  16.325     addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
  16.326     #endif
  16.327     //========================================================================
  16.328 -
  16.329 -      //skip over the prolog by adding its size to the pointer return
  16.330 -   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
  16.331 +   
  16.332 +   //skip over the prolog by adding its size to the pointer return
  16.333 +   return foundChunk + 1;
  16.334   }
  16.335  
  16.336 -/*This is sequential code, meant to only be called from the Master, not from
  16.337 - * any slave VPs.
  16.338 - *Search down list, checking size by the nextHigherInMem pointer, to find
  16.339 - * first chunk bigger than size needed.
  16.340 - *Shave off the extra and make it into a new free-list element, hook it in
  16.341 - * then return the address of the found element plus size of prolog.
  16.342 - *
  16.343 - * The difference to the regular malloc is, that all the allocated chunks are
  16.344 - * aligned and padded to the size of a CACHE_LINE_SZ. Thus creating a new chunk
  16.345 - * before the aligned chunk.
  16.346 - */
  16.347 -void *VMS_int__malloc_aligned( size_t sizeRequested )
  16.348 - { MallocProlog *foundElem = NULL, *currElem, *newElem;
  16.349 -   ssize_t        amountExtra, sizeConsumed,sizeOfFound,prevAmount;
  16.350 -   uint32        foundElemIsTopOfHeap;
  16.351 -
  16.352 -   //============================= MEASUREMENT STUFF ========================
  16.353 -   #ifdef MEAS__TIME_MALLOC
  16.354 -   uint32 startStamp, endStamp;
  16.355 -   saveLowTimeStampCountInto( startStamp );
  16.356 -   #endif
  16.357 -   //========================================================================
  16.358 -   
  16.359 -      //step up the size to be multiple of the cache line size
  16.360 -   sizeRequested = (sizeRequested + CACHE_LINE_SZ) & ~(CACHE_LINE_SZ-1);
  16.361 -   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
  16.362 -
  16.363 -   while( currElem != NULL )
  16.364 -    {    //check if size of currElem is big enough
  16.365 -      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
  16.366 -      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
  16.367 -      if( amountExtra > 0 )
  16.368 -       {    
  16.369 -         //look if the found element is already aligned
  16.370 -         if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE_SZ-1)) == 0){
  16.371 -             //found it, get out of loop
  16.372 -             foundElem = currElem;
  16.373 -             break;
  16.374 -         }else{
  16.375 -             //find first aligned address and check if it's still big enough
  16.376 -             //check also if the space before the aligned address is big enough
  16.377 -             //for a new element
  16.378 -             void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE_SZ) & ~((uintptr_t)(CACHE_LINE_SZ-1)));
  16.379 -             prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem;
  16.380 -             sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog);
  16.381 -             amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog);
  16.382 -             if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){
  16.383 -                 //found suitable element
  16.384 -                 //create new previous element and exit loop
  16.385 -                 MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1;
  16.386 -                 
  16.387 -                 //insert new element into free list
  16.388 -                 if(currElem->nextChunkInFreeList != NULL)
  16.389 -                     currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem;                     
  16.390 -                 newAlignedElem->prevChunkInFreeList = currElem;
  16.391 -                 newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList;
  16.392 -                 currElem->nextChunkInFreeList = newAlignedElem;
  16.393 -                 
  16.394 -                 //set higherInMem and lowerInMem
  16.395 -                 newAlignedElem->nextHigherInMem = currElem->nextHigherInMem;
  16.396 -                 foundElemIsTopOfHeap = currElem->nextHigherInMem ==
  16.397 -                          _VMSMasterEnv->freeListHead->nextHigherInMem;
  16.398 -                 if(!foundElemIsTopOfHeap)
  16.399 -                     currElem->nextHigherInMem->nextLowerInMem = newAlignedElem;
  16.400 -                 currElem->nextHigherInMem = newAlignedElem;
  16.401 -                 newAlignedElem->nextLowerInMem = currElem;
  16.402 -                 
  16.403 -                 //Found new element leaving loop
  16.404 -                 foundElem = newAlignedElem;
  16.405 -                 break;
  16.406 -             }
  16.407 -         }
  16.408 -         
  16.409 -       }
  16.410 -       currElem = currElem->nextChunkInFreeList;
  16.411 -    }
  16.412 -
  16.413 -   if( foundElem == NULL )
  16.414 -    { ERROR("\nmalloc failed\n")
  16.415 -      return (void *)NULL;  //indicates malloc failed
  16.416 -    }
  16.417 -      //Using a kludge to identify the element that is the top chunk in the
  16.418 -      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
  16.419 -      // save addr of start of heap in head's nextLowerInMem
  16.420 -      //Will handle top of Heap specially
  16.421 -   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
  16.422 -                          _VMSMasterEnv->freeListHead->nextHigherInMem;
  16.423 -
  16.424 -      //before shave off and try to insert new elem, remove found elem
  16.425 -      //note, foundElem will never be the head, so always has valid prevChunk
  16.426 -   foundElem->prevChunkInFreeList->nextChunkInFreeList =
  16.427 -                                              foundElem->nextChunkInFreeList;
  16.428 -   if( foundElem->nextChunkInFreeList != NULL )
  16.429 -    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
  16.430 -                                              foundElem->prevChunkInFreeList;
  16.431 -    }
  16.432 -   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
  16.433 -   
  16.434 -      //if enough, turn extra into new elem & insert it
  16.435 -   if( amountExtra > 64 )
  16.436 -    {    //make new elem by adding to addr of curr elem then casting
  16.437 -      sizeConsumed = sizeof(MallocProlog) + sizeRequested;
  16.438 -      newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
  16.439 -      newElem->nextHigherInMem   = foundElem->nextHigherInMem;
  16.440 -      newElem->nextLowerInMem    = foundElem;
  16.441 -      foundElem->nextHigherInMem = newElem;
  16.442 -      
  16.443 -      if( ! foundElemIsTopOfHeap )
  16.444 -       {    //there is no next higher for top of heap, so can't write to it
  16.445 -         newElem->nextHigherInMem->nextLowerInMem = newElem;
  16.446 -       }
  16.447 -      add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
  16.448 -    }
  16.449 -   else
  16.450 -    {
  16.451 -      sizeConsumed = sizeOfFound;
  16.452 -    }
  16.453 -  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
  16.454 -
  16.455 -   //============================= MEASUREMENT STUFF ========================
  16.456 -   #ifdef MEAS__TIME_MALLOC
  16.457 -   saveLowTimeStampCountInto( endStamp );
  16.458 -   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
  16.459 -   #endif
  16.460 -   //========================================================================
  16.461 -
  16.462 -      //skip over the prolog by adding its size to the pointer return
  16.463 -   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
  16.464 - }
  16.465 -
  16.466 -
  16.467 -/*This is sequential code -- only to be called from the Master
  16.468 - * When free, subtract the size of prolog from pointer, then cast it to a
  16.469 - * MallocProlog.  Then check the nextLower and nextHigher chunks to see if
  16.470 - * one or both are also free, and coalesce if so, and if neither free, then
  16.471 - * add this one to free-list.
  16.472 +/*
  16.473 + * This is sequential code, meant to only be called from the Master, not from
  16.474 + * any slave Slvs.
  16.475   */
  16.476  void
  16.477  VMS_int__free( void *ptrToFree )
  16.478 - { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem;
  16.479 -   size_t         sizeOfElem;
  16.480 -   uint32         lowerExistsAndIsFree, higherExistsAndIsFree;
  16.481 -
  16.482 + {
  16.483 +    
  16.484     //============================= MEASUREMENT STUFF ========================
  16.485     #ifdef MEAS__TIME_MALLOC
  16.486     int32 startStamp, endStamp;
  16.487     saveLowTimeStampCountInto( startStamp );
  16.488     #endif
  16.489     //========================================================================
  16.490 -
  16.491 -   if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem ||
  16.492 -       ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem )
  16.493 -    {    //outside the range of data owned by VMS's malloc, so do nothing
  16.494 -      return;
  16.495 -    }
  16.496 -      //subtract size of prolog to get pointer to prolog, then cast
  16.497 -   elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog));
  16.498 -   sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree);
  16.499 -
  16.500 -   if( elemToFree->prevChunkInFreeList != NULL )
  16.501 -    { printf( "error: freeing same element twice!" ); exit(1);
  16.502 -    }
  16.503 -
  16.504 -   _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem;
  16.505 -
  16.506 -   nextLowerElem  = elemToFree->nextLowerInMem;
  16.507 -   nextHigherElem = elemToFree->nextHigherInMem;
  16.508 -
  16.509 -   if( nextHigherElem == NULL )
  16.510 -      higherExistsAndIsFree = FALSE;
  16.511 -   else //okay exists, now check if in the free-list by checking back ptr
  16.512 -      higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL);
  16.513 -    
  16.514 -   if( nextLowerElem == NULL )
  16.515 -      lowerExistsAndIsFree = FALSE;
  16.516 -   else //okay, it exists, now check if it's free
  16.517 -      lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL);
  16.518 -    
  16.519 -
  16.520 -      //now, know what exists and what's free
  16.521 -   if( lowerExistsAndIsFree )
  16.522 -    { if( higherExistsAndIsFree )
  16.523 -       {    //both exist and are free, so coalesce all three
  16.524 -            //First, remove higher from free-list
  16.525 -         nextHigherElem->prevChunkInFreeList->nextChunkInFreeList =
  16.526 -                                         nextHigherElem->nextChunkInFreeList;
  16.527 -         if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list?
  16.528 -            nextHigherElem->nextChunkInFreeList->prevChunkInFreeList =
  16.529 -                                         nextHigherElem->prevChunkInFreeList;
  16.530 -            //Now, fix-up sequence-in-mem list -- by side-effect, this also
  16.531 -            // changes size of the lower elem, which is still in free-list
  16.532 -         nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem;
  16.533 -         if( nextHigherElem->nextHigherInMem !=
  16.534 -             _VMSMasterEnv->freeListHead->nextHigherInMem )
  16.535 -            nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem;
  16.536 -            //notice didn't do anything to elemToFree -- it simply is no
  16.537 -            // longer reachable from any of the lists.  Wonder if could be a
  16.538 -            // security leak because left valid addresses in it,
  16.539 -            // but don't care for now.
  16.540 +   
  16.541 +   MallocArrays* freeLists = _VMSMasterEnv->freeLists;
  16.542 +   MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1;
  16.543 +   uint32 containerIdx;
  16.544 +   
  16.545 +   //Check for free neighbors
  16.546 +   if(chunkToFree->nextLowerInMem)
  16.547 +   {
  16.548 +       if(chunkToFree->nextLowerInMem->prevChunkInFreeList != NULL)
  16.549 +       {//Chunk is not allocated
  16.550 +           extractChunk(chunkToFree->nextLowerInMem, freeLists);
  16.551 +           chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree);
  16.552         }
  16.553 -      else
  16.554 -       {    //lower is the only of the two that exists and is free,
  16.555 -            //In this case, no adjustment to free-list, just change mem-list.
  16.556 -            // By side-effect, changes size of the lower elem
  16.557 -         nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem;
  16.558 -         if( elemToFree->nextHigherInMem !=
  16.559 -             _VMSMasterEnv->freeListHead->nextHigherInMem )
  16.560 -            elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem;
  16.561 +   }
  16.562 +   if(chunkToFree->nextHigherInMem)
  16.563 +   {
  16.564 +       if(chunkToFree->nextHigherInMem->prevChunkInFreeList != NULL)
  16.565 +       {//Chunk is not allocated
  16.566 +           extractChunk(chunkToFree->nextHigherInMem, freeLists);
  16.567 +           chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem);
  16.568         }
  16.569 -    }
  16.570 +   }
  16.571 +   
  16.572 +   size_t chunkSize = getChunkSize(chunkToFree);
  16.573 +   if(chunkSize < BIG_LOWER_BOUND)
  16.574 +   {
  16.575 +       containerIdx =  (chunkSize/SMALL_CHUNK_SIZE)-1;
  16.576 +       if(containerIdx > SMALL_CHUNK_COUNT-1)
  16.577 +           containerIdx = SMALL_CHUNK_COUNT-1;
  16.578 +       insertChunk(chunkToFree, &freeLists->smallChunks[containerIdx]);
  16.579 +   }
  16.580     else
  16.581 -    {    //lower either doesn't exist or isn't free, so check higher
  16.582 -      if( higherExistsAndIsFree )
  16.583 -       {    //higher exists and is the only of the two free
  16.584 -            //First, in free-list, replace higher elem with the one to free
  16.585 -         elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList;
  16.586 -         elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList;
  16.587 -         elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree;
  16.588 -         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
  16.589 -            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
  16.590 -            //Now chg mem-list. By side-effect, changes size of elemToFree
  16.591 -         elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem;
  16.592 -         if( elemToFree->nextHigherInMem !=
  16.593 -             _VMSMasterEnv->freeListHead->nextHigherInMem )
  16.594 -            elemToFree->nextHigherInMem->nextLowerInMem = elemToFree;
  16.595 -       }
  16.596 -      else
  16.597 -       {    //neither lower nor higher is availabe to coalesce so add to list
  16.598 -            // this makes prev chunk ptr non-null, which indicates it's free
  16.599 -         elemToFree->nextChunkInFreeList =
  16.600 -                            _VMSMasterEnv->freeListHead->nextChunkInFreeList;
  16.601 -         _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree;
  16.602 -         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
  16.603 -            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
  16.604 -         elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead;
  16.605 -       }
  16.606 -    }
  16.607 +   {
  16.608 +       containerIdx = getContainer(getChunkSize(chunkToFree)) - 1;
  16.609 +       insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]);
  16.610 +       if(containerIdx < 64)
  16.611 +           freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx;
  16.612 +       else
  16.613 +           freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64);
  16.614 +   }   
  16.615 +   
  16.616     //============================= MEASUREMENT STUFF ========================
  16.617     #ifdef MEAS__TIME_MALLOC
  16.618     saveLowTimeStampCountInto( endStamp );
  16.619 @@ -373,82 +316,31 @@
  16.620  
  16.621   }
  16.622  
  16.623 -
  16.624 -/*Allocates memory from the external system -- higher overhead
  16.625 - *
  16.626 - *Because of Linux's malloc throwing bizarre random faults when malloc is
  16.627 - * used inside a VMS virtual processor, have to pass this as a request and
  16.628 - * have the core loop do it when it gets around to it -- will look for these
  16.629 - * chores leftover from the previous animation of masterVP the next time it
  16.630 - * goes to animate the masterVP -- so it takes two separate masterVP
  16.631 - * animations, separated by work, to complete an external malloc or
  16.632 - * external free request.
  16.633 - *
  16.634 - *Thinking core loop accepts signals -- just looks if signal-location is
  16.635 - * empty or not --
  16.636 +/*
  16.637 + * Designed to be called from the main thread outside of VMS, during init
  16.638   */
  16.639 -void *
  16.640 -VMS__malloc_in_ext( size_t sizeRequested )
  16.641 - {
  16.642 - /*
  16.643 -      //This is running in the master, so no chance for multiple cores to be
  16.644 -      // competing for the core's flag.
  16.645 -   if(  *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 )
  16.646 -    {    //something has already signalled to core loop, so save the signal
  16.647 -         // and look, next time master animated, to see if can send it.
  16.648 -         //Note, the addr to put a signal is in the coreloop's frame, so just
  16.649 -         // checks it each time through -- make it volatile to avoid GCC
  16.650 -         // optimizations -- it's a coreloop local var that only changes
  16.651 -         // after jumping away.  The signal includes the addr to send the
  16.652 -         //return to -- even if just empty return completion-signal
  16.653 -         //
  16.654 -         //save the signal in some queue that the master looks at each time
  16.655 -         // it starts up -- one loc says if empty for fast common case --
  16.656 -         //something like that -- want to hide this inside this call -- but
  16.657 -         // think this has to come as a request -- req handler gives procr
  16.658 -         // back to master loop, which gives it back to req handler at point
  16.659 -         // it sees that core loop has sent return signal.  Something like
  16.660 -         // that.
  16.661 -      saveTheSignal
  16.662 -
  16.663 -    }
  16.664 -  coreSigData->type = malloc;
  16.665 -  coreSigData->sizeToMalloc = sizeRequested;
  16.666 -  coreSigData->locToSignalCompletion = &figureOut;
  16.667 -   _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData;
  16.668 -  */
  16.669 -      //just risk system-stack faults until get this figured out
  16.670 -   return malloc( sizeRequested );
  16.671 - }
  16.672 -
  16.673 -
  16.674 -/*Frees memory that was allocated in the external system -- higher overhead
  16.675 - *
  16.676 - *As noted in external malloc comment, this is clunky 'cause the free has
  16.677 - * to be called in the core loop.
  16.678 - */
  16.679 -void
  16.680 -VMS__free_in_ext( void *ptrToFree )
  16.681 - {
  16.682 -      //just risk system-stack faults until get this figured out
  16.683 -   free( ptrToFree );
  16.684 -
  16.685 -      //TODO: fix this -- so 
  16.686 - }
  16.687 -
  16.688 -
  16.689 -/*Designed to be called from the main thread outside of VMS, during init
  16.690 - */
  16.691 -MallocProlog *
  16.692 +MallocArrays *
  16.693  VMS_ext__create_free_list()
  16.694 - { MallocProlog *freeListHead, *firstChunk;
  16.695 -
  16.696 -      //Note, this is running in the main thread -- all increases in malloc
  16.697 -      // mem and all frees of it must be done in this thread, with the
  16.698 -      // thread's original stack available
  16.699 -   freeListHead = malloc( sizeof(MallocProlog) );
  16.700 -   firstChunk   = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
  16.701 -   if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);}
  16.702 +{     
  16.703 +   //Initialize containers for small chunks and fill with zeros
  16.704 +   _VMSMasterEnv->freeLists = (MallocArrays*)malloc( sizeof(MallocArrays) );
  16.705 +   MallocArrays *freeLists = _VMSMasterEnv->freeLists;
  16.706 +   
  16.707 +   freeLists->smallChunks = 
  16.708 +           (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
  16.709 +   memset((void*)freeLists->smallChunks,
  16.710 +           0,SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
  16.711 +   
  16.712 +   //Calculate number of containers for big chunks
  16.713 +   uint32 container = getContainer(MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE)+1;
  16.714 +   freeLists->bigChunks = (MallocProlog**)malloc(container*sizeof(MallocProlog*));
  16.715 +   memset((void*)freeLists->bigChunks,0,container*sizeof(MallocProlog*));
  16.716 +   freeLists->containerCount = container;
  16.717 +   
  16.718 +   //Create first element in lastContainer 
  16.719 +   MallocProlog *firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
  16.720 +   if( firstChunk == NULL ) {printf("Can't allocate initial memory\n"); exit(1);}
  16.721 +   freeLists->memSpace = firstChunk;
  16.722     
  16.723     //Touch memory to avoid page faults
  16.724     void *ptr,*endPtr; 
  16.725 @@ -457,38 +349,47 @@
  16.726     {
  16.727         *(char*)ptr = 0;
  16.728     }
  16.729 -
  16.730 -   freeListHead->prevChunkInFreeList = NULL;
  16.731 -      //Use this addr to free the heap when cleanup
  16.732 -   freeListHead->nextLowerInMem      = firstChunk;
  16.733 -      //to identify top-of-heap elem, compare this addr to elem's next higher
  16.734 -   freeListHead->nextHigherInMem     = (void*)( (uintptr_t)firstChunk +
  16.735 -                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
  16.736 -   freeListHead->nextChunkInFreeList = firstChunk;
  16.737 -
  16.738 -   firstChunk->nextChunkInFreeList   = NULL;
  16.739 -   firstChunk->prevChunkInFreeList   = freeListHead;
  16.740 -      //next Higher has to be set to top of chunk, so can calc size in malloc
  16.741 -   firstChunk->nextHigherInMem       = (void*)( (uintptr_t)firstChunk +
  16.742 -                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
  16.743 -   firstChunk->nextLowerInMem        = NULL; //identifies as bott of heap
  16.744     
  16.745 -   _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet
  16.746 -
  16.747 -   return freeListHead;
  16.748 +   firstChunk->nextLowerInMem = NULL;
  16.749 +   firstChunk->nextHigherInMem = (MallocProlog*)((uintptr_t)firstChunk +
  16.750 +                        MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog));
  16.751 +   firstChunk->nextChunkInFreeList = NULL;
  16.752 +   //previous element in the queue is the container
  16.753 +   firstChunk->prevChunkInFreeList = &freeLists->bigChunks[container-2];
  16.754 +   
  16.755 +   freeLists->bigChunks[container-2] = firstChunk;
  16.756 +   //Insert into bit search list
  16.757 +   if(container <= 65)
  16.758 +   {
  16.759 +       freeLists->bigChunksSearchVector[0] = ((uint64)1 << (container-2));
  16.760 +       freeLists->bigChunksSearchVector[1] = 0;
  16.761 +   }   
  16.762 +   else
  16.763 +   {
  16.764 +       freeLists->bigChunksSearchVector[0] = 0;
  16.765 +       freeLists->bigChunksSearchVector[1] = ((uint64)1 << (container-66));
  16.766 +   }
  16.767 +   
  16.768 +   //Create dummy chunk to mark the top of stack this is of course
  16.769 +   //never freed
  16.770 +   MallocProlog *dummyChunk = firstChunk->nextHigherInMem;
  16.771 +   dummyChunk->nextHigherInMem = dummyChunk+1;
  16.772 +   dummyChunk->nextLowerInMem  = NULL;
  16.773 +   dummyChunk->nextChunkInFreeList = NULL;
  16.774 +   dummyChunk->prevChunkInFreeList = NULL;
  16.775 +   
  16.776 +   return freeLists;
  16.777   }
  16.778  
  16.779  
  16.780  /*Designed to be called from the main thread outside of VMS, during cleanup
  16.781   */
  16.782  void
  16.783 -VMS_ext__free_free_list( MallocProlog *freeListHead )
  16.784 +VMS_ext__free_free_list( MallocArrays *freeLists )
  16.785   {    
  16.786 -      //stashed a ptr to the one and only bug chunk malloc'd from OS in the
  16.787 -      // free list head's next lower in mem pointer
  16.788 -   free( freeListHead->nextLowerInMem );
  16.789 -
  16.790 -   //don't free the head -- it'll be in an array eventually -- free whole
  16.791 -   // array when all the free lists linked from it have already been freed
  16.792 +   free(freeLists->memSpace);
  16.793 +   free(freeLists->bigChunks);
  16.794 +   free(freeLists->smallChunks);
  16.795 +   
  16.796   }
  16.797  
    17.1 --- a/vmalloc.h	Wed Feb 22 11:39:12 2012 -0800
    17.2 +++ b/vmalloc.h	Sun Mar 04 14:26:35 2012 -0800
    17.3 @@ -14,6 +14,14 @@
    17.4  #include <inttypes.h>
    17.5  #include "VMS_primitive_data_types.h"
    17.6  
    17.7 +#define SMALL_CHUNK_SIZE 32
    17.8 +#define SMALL_CHUNK_COUNT 4
    17.9 +#define LOWER_BOUND     128  //Biggest chunk size that is created for the small chunks
   17.10 +#define BIG_LOWER_BOUND 160  //Smallest chunk size that is created for the big chunks
   17.11 +
   17.12 +#define LOG54 0.3219280948873623
   17.13 +#define LOG128 7
   17.14 +
   17.15  typedef struct _MallocProlog MallocProlog;
   17.16  
   17.17  struct _MallocProlog
   17.18 @@ -24,6 +32,18 @@
   17.19     MallocProlog *nextLowerInMem;
   17.20   };
   17.21  //MallocProlog
   17.22 + 
   17.23 + typedef struct MallocArrays MallocArrays;
   17.24 +
   17.25 + struct MallocArrays
   17.26 + {
   17.27 +     MallocProlog **smallChunks;
   17.28 +     MallocProlog **bigChunks;
   17.29 +     uint64       bigChunksSearchVector[2];
   17.30 +     void         *memSpace;
   17.31 +     uint32       containerCount;
   17.32 + };
   17.33 + //MallocArrays
   17.34  
   17.35  typedef struct
   17.36   {
   17.37 @@ -34,57 +54,38 @@
   17.38  
   17.39  void *
   17.40  VMS_int__malloc( size_t sizeRequested );
   17.41 +#define VMS_PI__malloc  VMS_int__malloc
   17.42 +#define VMS_WL__malloc  VMS_int__malloc /*TODO: Bug -- Not protected!! */
   17.43 +#define VMS_App__malloc VMS_int__malloc /*TODO: Bug -- Not protected!! */
   17.44  
   17.45  void *
   17.46  VMS_int__malloc_aligned( size_t sizeRequested );
   17.47 +#define VMS_PI__malloc_aligned VMS_int__malloc_aligned
   17.48 +#define VMS_WL__malloc_aligned VMS_int__malloc_aligned
   17.49  
   17.50  void
   17.51  VMS_int__free( void *ptrToFree );
   17.52 +#define VMS_PI__free  VMS_int__free
   17.53 +#define VMS_WL__free  VMS_int__free /*TODO: Bug -- Not protected!! */
   17.54 +#define VMS_App__free VMS_int__free /*TODO: Bug -- Not protected!! */
   17.55  
   17.56 -#define VMS_PI__malloc VMS_int__malloc
   17.57 -#define VMS_PI__malloc_aligned VMS_int__malloc_aligned
   17.58 -#define VMS_PI__free VMS_int__free
   17.59 -/* For now, the PI is protected by master lock, so int malloc fine
   17.60 -void *
   17.61 -VMS_PI__malloc( size_t sizeRequested );
   17.62  
   17.63 -void *
   17.64 -VMS_PI__malloc_aligned( size_t sizeRequested );
   17.65 -
   17.66 -void
   17.67 -VMS_PI__free( void *ptrToFree );
   17.68 -*/
   17.69 -
   17.70 -//TODO: protect WL malloc from concurrency!! shared freelist can be corrupted
   17.71 -#define VMS_WL__malloc VMS_int__malloc
   17.72 -#define VMS_WL__malloc_aligned VMS_int__malloc_aligned
   17.73 -#define VMS_WL__free VMS_int__free
   17.74 -/*
   17.75 -void *
   17.76 -VMS_WL__malloc( size_t sizeRequested );
   17.77 -
   17.78 -void *
   17.79 -VMS_WL__malloc_aligned( size_t sizeRequested );
   17.80 -
   17.81 -void
   17.82 -VMS_WL__free( void *ptrToFree );
   17.83 -*/
   17.84  
   17.85  /*Allocates memory from the external system -- higher overhead
   17.86   */
   17.87  void *
   17.88 -VMS__malloc_in_ext( size_t sizeRequested );
   17.89 +VMS_ext__malloc_in_ext( size_t sizeRequested );
   17.90  
   17.91  /*Frees memory that was allocated in the external system -- higher overhead
   17.92   */
   17.93  void
   17.94 -VMS__free_in_ext( void *ptrToFree );
   17.95 +VMS_ext__free_in_ext( void *ptrToFree );
   17.96  
   17.97  
   17.98 -MallocProlog *
   17.99 +MallocArrays *
  17.100  VMS_ext__create_free_list();
  17.101  
  17.102  void
  17.103 -VMS_ext__free_free_list( MallocProlog *freeListHead );
  17.104 +VMS_ext__free_free_list(MallocArrays *freeLists );
  17.105  
  17.106  #endif
  17.107 \ No newline at end of file
    18.1 --- a/vutilities.h	Wed Feb 22 11:39:12 2012 -0800
    18.2 +++ b/vutilities.h	Sun Mar 04 14:26:35 2012 -0800
    18.3 @@ -8,8 +8,8 @@
    18.4   */
    18.5  
    18.6  
    18.7 -#ifndef  _UTILITIES_H
    18.8 -#define	_UTILITIES_H
    18.9 +#ifndef  _VUTILITIES_H
   18.10 +#define	_VUTILITIES_H
   18.11  
   18.12  #include <string.h>
   18.13  #include "VMS_primitive_data_types.h"