# HG changeset patch
# User Some Random Person <seanhalle@yahoo.com>
# Date 1330899995 28800
# Node ID 0c83ea8adefce64c0b749801f9066ae4f837bc8c
# Parent  eaf7e4c58c9e0b9279d9cbf16e3a17c18b4a0bc1
Close to compilable version of common_ancestor -- still includes HW dep stuff

diff -r eaf7e4c58c9e -r 0c83ea8adefc CoreLoop.c
--- a/CoreLoop.c	Wed Feb 22 11:39:12 2012 -0800
+++ b/CoreLoop.c	Sun Mar 04 14:26:35 2012 -0800
@@ -6,7 +6,6 @@
 
 
 #include "VMS.h"
-#include "ProcrContext.h"
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -15,14 +14,14 @@
 #include <pthread.h>
 #include <sched.h>
 
-void *terminateCoreLoop(SlaveVP *currPr);
+void *terminateCoreLoop(SlaveVP *currSlv);
 
 /*This is the loop that runs in the OS Thread pinned to each core
- *Get virt procr from queue,
- * save state of current animator, then load in state of virt procr, using
- * jmp instr to switch the program-counter state -- making the virt procr
+ *Get Slv from queue,
+ * save state of current animator, then load in state of Slv, using
+ * jmp instr to switch the program-counter state -- making the Slv
  * the new animator.
- *At some point, the virt procr will suspend itself by saving out its
+ *At some point, the Slv will suspend itself by saving out its
  * animator state (stack ptr, frame ptr, program counter) and switching
  * back to the OS Thread's animator state, which means restoring the
  * stack and frame and jumping to the core loop start point.
@@ -34,7 +33,7 @@
  { 
    ThdParams      *coreLoopThdParams;
    int             thisCoresIdx;
-   SlaveVP        *currPr;
+   SlaveVP        *currSlv;
    VMSQueueStruc  *readyToAnimateQ;
    cpu_set_t       coreMask;  //has 1 in bit positions of allowed cores
    int             errorCode;
@@ -78,7 +77,7 @@
    if(errorCode){ printf("\nset affinity failure\n"); exit(0); }
 
    
-   //Save the return address in the SwitchVP function
+   //Save the return address in the SwitchSlv function
    saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt));
 
    
@@ -100,68 +99,55 @@
       while( gate.gateClosed ) /*busy wait*/;
     }
 
-   currPr = (SlaveVP *) readVMSQ( readyToAnimateQ );
+   currSlv = (SlaveVP *) readVMSQ( readyToAnimateQ );
 
       //Set the coreloop's progress, so stealer can see it has made it out
       // of the protected area
    gate.exitProgress = gate.preGateProgress;
    #else
-   currPr = (SlaveVP *) readVMSQ( readyToAnimateQ );
+   currSlv = (SlaveVP *) readVMSQ( readyToAnimateQ );
    #endif
 
-   if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
+   if( currSlv != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
    else
     {
-      //============================= MEASUREMENT STUFF =====================
-      #ifdef MEAS__TIME_MASTER_LOCK
-      int32 startStamp, endStamp;
-      saveLowTimeStampCountInto( startStamp );
-      #endif
-      //=====================================================================
+            MEAS__Capture_Pre_Master_Lock_Point;
+            
       int tries = 0; int gotLock = 0;
-      while( currPr == NULL ) //if queue was empty, enter get masterLock loop
+      while( currSlv == NULL ) //if queue was empty, enter get masterLock loop
        {    //queue was empty, so get master lock
 
          gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock),
                                                           UNLOCKED, LOCKED );
          if( gotLock )
           {    //run own MasterVP -- jmps to coreLoops startPt when done
-            currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
+            currSlv = _VMSMasterEnv->masterVPs[thisCoresIdx];
             if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
              {       DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n");
                pthread_yield();
              }
             _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
-            break;  //end while -- have a VP to animate now
+            break;  //end while -- have a Slv to animate now
           }
 
          tries++;      //if too many, means master on other core taking too long
          if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); }
        }
-      //============================= MEASUREMENT STUFF =====================
-      #ifdef MEAS__TIME_MASTER_LOCK
-      saveLowTimeStampCountInto( endStamp );
-      addIntervalToHist( startStamp, endStamp,
-                         _VMSMasterEnv->masterLockLowTimeHist );
-      addIntervalToHist( startStamp, endStamp,
-                         _VMSMasterEnv->masterLockHighTimeHist );
-      #endif
-      //=====================================================================
-
+            MEAS__Capture_Post_Master_Lock_Point;
     }
 
    
-   switchToVP(currPr); //The VPs return in here
+   switchToSlv(currSlv); //The Slvs return in here
    flushRegisters();
    }//CoreLoop      
  }
 
 
 void *
-terminateCoreLoop(SlaveVP *currPr){
-   //first free shutdown VP that jumped here -- it first restores the
-   // coreloop's stack, so addr of currPr in stack frame is still correct
-   VMS_int__dissipate_procr( currPr );
+terminateCoreLoop(SlaveVP *currSlv){
+   //first free shutdown Slv that jumped here -- it first restores the
+   // coreloop's stack, so addr of currSlv in stack frame is still correct
+   VMS_int__dissipate_SlaveVP( currSlv );
    pthread_exit( NULL );
 }
 
@@ -176,7 +162,7 @@
 void *
 coreLoop_Seq( void *paramsIn )
  {
-   SlaveVP      *currPr;
+   SlaveVP      *currSlv;
    VMSQueueStruc *readyToAnimateQ;
    
    ThdParams      *coreLoopThdParams;
@@ -186,7 +172,7 @@
 //   thisCoresIdx = coreLoopThdParams->coreNum;
    thisCoresIdx = 0;
 
-   //Save the return address in the SwitchVP function
+   //Save the return address in the SwitchSlv function
    saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt));
 
    
@@ -195,19 +181,19 @@
       //_VMSWorkQ must be a global, static volatile var, so not kept in reg,
       // which forces reloading the pointer after each jmp to this point
    readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
-   currPr = (SlaveVP *) readVMSQ( readyToAnimateQ );
-   if( currPr == NULL )
+   currSlv = (SlaveVP *) readVMSQ( readyToAnimateQ );
+   if( currSlv == NULL )
     { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
        { printf("too many back to back MasterVP\n"); exit(1); }
       _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
       
-      currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
+      currSlv = _VMSMasterEnv->masterVPs[thisCoresIdx];
     }
    else
       _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
 
 
-   switchToVP( currPr );
+   switchToSlv( currSlv );
    flushRegisters();
    }
  }
diff -r eaf7e4c58c9e -r 0c83ea8adefc MasterLoop.c
--- a/MasterLoop.c	Wed Feb 22 11:39:12 2012 -0800
+++ b/MasterLoop.c	Sun Mar 04 14:26:35 2012 -0800
@@ -10,13 +10,12 @@
 #include <stddef.h>
 
 #include "VMS.h"
-#include "ProcrContext.h"
 
 
 //===========================================================================
 void inline
 stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
-               SlaveVP *masterPr );
+               SlaveVP *masterVP );
 
 //===========================================================================
 
@@ -27,13 +26,13 @@
  *Polls each sched slot exactly once, hands any requests made by a newly
  * done slave to the "request handler" plug-in function
  *
- *Any slots that need a virt procr assigned are given to the "schedule"
- * plug-in function, which tries to assign a virt procr (slave) to it.
+ *Any slots that need a Slv assigned are given to the "schedule"
+ * plug-in function, which tries to assign a Slv (slave) to it.
  *
  *When all slots needing a processor have been given to the schedule plug-in,
- * a fraction of the procrs successfully scheduled are put into the
+ * a fraction of the slaves successfully scheduled are put into the
  * work queue, then a continuation of this function is put in, then the rest
- * of the virt procrs that were successfully scheduled.
+ * of the Slvs that were successfully scheduled.
  *
  *The first thing the continuation does is busy-wait until the previous
  * animation completes.  This is because an (unlikely) continuation may
@@ -46,7 +45,7 @@
  * start running gets it and does all the stuff for a newly born --
  * from then on, will be doing continuation, but do suspension self
  * directly at end of master loop
- *So VMS__init just births the master virtual processor same way it births
+ *So VMS_WL__init just births the master virtual processor same way it births
  * all the others -- then does any extra setup needed and puts it into the
  * work queue.
  *However means have to make masterEnv a global static volatile the same way
@@ -65,36 +64,36 @@
  *At this point, the masterLoop does not write itself into the queue anymore,
  * instead, the coreLoop acquires the masterLock when it has nothing to
  * animate, and then animates its own masterLoop.  However, still try to put
- * several AppVPs into the queue to amortize the startup cost of switching
+ * several AppSlvs into the queue to amortize the startup cost of switching
  * to the MasterVP.  Note, don't have to worry about latency of requests much
  * because most requests generate work for same core -- only latency issue
  * is case when other cores starved and one core's requests generate work
  * for them -- so keep max in queue to 3 or 4..
  */
-void masterLoop( void *initData, SlaveVP *animatingPr )
+void masterLoop( void *initData, SlaveVP *animatingSlv )
  { 
    int32           slotIdx, numSlotsFilled;
-   SlaveVP      *schedVirtPr;
+   SlaveVP        *schedSlaveVP;
    SchedSlot      *currSlot, **schedSlots;
    MasterEnv      *masterEnv;
    VMSQueueStruc  *readyToAnimateQ;
    
-   Sched_Assigner  slaveScheduler;
+   Sched_Assigner  slaveAssigner;
    RequestHandler  requestHandler;
    void           *semanticEnv;
 
    int32           thisCoresIdx;
-   SlaveVP      *masterPr;
-   volatile        SlaveVP *volatileMasterPr;
+   SlaveVP      *masterVP;
+   volatile        SlaveVP *volatileMasterVP;
    
-   volatileMasterPr = animatingPr;
-   masterPr         = (SlaveVP*)volatileMasterPr; //used to force re-define after jmp
+   volatileMasterVP = animatingSlv;
+   masterVP         = (SlaveVP*)volatileMasterVP; //used to force re-define after jmp
 
       //First animation of each MasterVP will in turn animate this part
-      // of setup code.. (VP creator sets up the stack as if this function
+      // of setup code.. (Slv creator sets up the stack as if this function
       // was called normally, but actually get here by jmp)
       //So, setup values about stack ptr, jmp pt and all that
-   //masterPr->resumeInstrPtr = &&masterLoopStartPt;
+   //masterVP->resumeInstrPtr = &&masterLoopStartPt;
 
 
       //Note, got rid of writing the stack and frame ptr up here, because
@@ -108,25 +107,18 @@
    //masterLoopStartPt:
    while(1){
        
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef MEAS__TIME_MASTER
-      //Total Master time includes one coreloop time -- just assume the core
-      // loop time is same for Master as for AppVPs, even though it may be
-      // smaller due to higher predictability of the fixed jmp.
-   saveLowTimeStampCountInto( masterPr->startMasterTSCLow );
-   #endif
-   //========================================================================
+      MEAS__Capture_Pre_Master_Point
 
    masterEnv        = (MasterEnv*)_VMSMasterEnv;
    
       //GCC may optimize so doesn't always re-define from frame-storage
-   masterPr         = (SlaveVP*)volatileMasterPr;  //just to make sure after jmp
-   thisCoresIdx     = masterPr->coreAnimatedBy;
+   masterVP         = (SlaveVP*)volatileMasterVP;  //just to make sure after jmp
+   thisCoresIdx     = masterVP->coreAnimatedBy;
    readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
    schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
 
    requestHandler   = masterEnv->requestHandler;
-   slaveScheduler   = masterEnv->slaveSchedAssigner;
+   slaveAssigner   = masterEnv->slaveAssigner;
    semanticEnv      = masterEnv->semanticEnv;
 
 
@@ -139,18 +131,18 @@
       if( currSlot->workIsDone )
        {
          currSlot->workIsDone         = FALSE;
-         currSlot->needsProcrAssigned = TRUE;
+         currSlot->needsSlaveAssigned = TRUE;
 
             //process requests from slave to master
                //====================== MEASUREMENT STUFF ===================
-               #ifdef MEAS__TIME_PLUGIN
+               #ifdef MEAS__TURN_ON_PLUGIN_MEAS
                int32 startStamp1, endStamp1;
                saveLowTimeStampCountInto( startStamp1 );
                #endif
                //============================================================
-         (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv );
+         (*requestHandler)( currSlot->slaveAssignedToSlot, semanticEnv );
                //====================== MEASUREMENT STUFF ===================
-               #ifdef MEAS__TIME_PLUGIN
+               #ifdef MEAS__TURN_ON_PLUGIN_MEAS
                saveLowTimeStampCountInto( endStamp1 );
                addIntervalToHist( startStamp1, endStamp1,
                                         _VMSMasterEnv->reqHdlrLowTimeHist );
@@ -159,18 +151,18 @@
                #endif
                //============================================================
        }
-      if( currSlot->needsProcrAssigned )
-       {    //give slot a new virt procr
-         schedVirtPr =
-          (*slaveScheduler)( semanticEnv, thisCoresIdx );
+      if( currSlot->needsSlaveAssigned )
+       {    //give slot a new Slv
+         schedSlaveVP =
+          (*slaveAssigner)( semanticEnv, thisCoresIdx );
          
-         if( schedVirtPr != NULL )
-          { currSlot->procrAssignedToSlot = schedVirtPr;
-            schedVirtPr->schedSlot        = currSlot;
-            currSlot->needsProcrAssigned  = FALSE;
+         if( schedSlaveVP != NULL )
+          { currSlot->slaveAssignedToSlot = schedSlaveVP;
+            schedSlaveVP->schedSlot        = currSlot;
+            currSlot->needsSlaveAssigned  = FALSE;
             numSlotsFilled               += 1;
             
-            writeVMSQ( schedVirtPr, readyToAnimateQ );
+            writeVMSQ( schedSlaveVP, readyToAnimateQ );
           }
        }
     }
@@ -179,16 +171,13 @@
    #ifdef USE_WORK_STEALING
       //If no slots filled, means no more work, look for work to steal.
    if( numSlotsFilled == 0 )
-    { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr );
+    { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterVP );
     }
    #endif
 
+         MEAS__Capture_Post_Master_Point;
    
-   #ifdef MEAS__TIME_MASTER
-   saveLowTimeStampCountInto( masterPr->endMasterTSCLow );
-   #endif
-
-   masterSwitchToCoreLoop(animatingPr);
+   masterSwitchToCoreLoop(animatingSlv);
    flushRegisters();
    }//MasterLoop
 
@@ -202,14 +191,14 @@
  */
 void inline
 stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
-               SlaveVP *masterPr )
+               SlaveVP *masterVP )
  { 
-   SlaveVP   *stolenPr;
+   SlaveVP   *stolenSlv;
    int32        coreIdx, i;
    VMSQueueStruc *currQ;
 
-   stolenPr = NULL;
-   coreIdx = masterPr->coreAnimatedBy;
+   stolenSlv = NULL;
+   coreIdx = masterVP->coreAnimatedBy;
    for( i = 0; i < NUM_CORES -1; i++ )
     {
       if( coreIdx >= NUM_CORES -1 )
@@ -220,17 +209,17 @@
        }
       currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
       if( numInVMSQ( currQ ) > 0 )
-       { stolenPr = readVMSQ (currQ );
+       { stolenSlv = readVMSQ (currQ );
          break;
        }
     }
 
-   if( stolenPr != NULL )
-    { currSlot->procrAssignedToSlot = stolenPr;
-      stolenPr->schedSlot           = currSlot;
-      currSlot->needsProcrAssigned  = FALSE;
+   if( stolenSlv != NULL )
+    { currSlot->slaveAssignedToSlot = stolenSlv;
+      stolenSlv->schedSlot           = currSlot;
+      currSlot->needsSlaveAssigned  = FALSE;
 
-      writeVMSQ( stolenPr, readyToAnimateQ );
+      writeVMSQ( stolenSlv, readyToAnimateQ );
     }
  }
 
@@ -306,9 +295,9 @@
 void inline
 gateProtected_stealWorkInto( SchedSlot *currSlot,
                              VMSQueueStruc *myReadyToAnimateQ,
-                             SlaveVP *masterPr )
+                             SlaveVP *masterVP )
  {
-   SlaveVP     *stolenPr;
+   SlaveVP     *stolenSlv;
    int32          coreIdx, i, haveAVictim, gotLock;
    VMSQueueStruc *victimsQ;
 
@@ -319,7 +308,7 @@
 
       //see if any other cores have work available to steal
    haveAVictim = FALSE;
-   coreIdx = masterPr->coreAnimatedBy;
+   coreIdx = masterVP->coreAnimatedBy;
    for( i = 0; i < NUM_CORES -1; i++ )
     {
       if( coreIdx >= NUM_CORES -1 )
@@ -354,18 +343,18 @@
          coreMightBeInProtected = FALSE;
     }
 
-   stolenPr = readVMSQ ( victimsQ );
+   stolenSlv = readVMSQ ( victimsQ );
 
    vicGate->gateClosed = FALSE;
    //======= End Gate-protection  =======
 
 
-   if( stolenPr != NULL )  //victim could have been in protected and taken
-    { currSlot->procrAssignedToSlot = stolenPr;
-      stolenPr->schedSlot           = currSlot;
-      currSlot->needsProcrAssigned  = FALSE;
+   if( stolenSlv != NULL )  //victim could have been in protected and taken
+    { currSlot->slaveAssignedToSlot = stolenSlv;
+      stolenSlv->schedSlot           = currSlot;
+      currSlot->needsSlaveAssigned  = FALSE;
 
-      writeVMSQ( stolenPr, myReadyToAnimateQ );
+      writeVMSQ( stolenSlv, myReadyToAnimateQ );
     }
 
       //unlock the work stealing lock
diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS.h
--- a/VMS.h	Wed Feb 22 11:39:12 2012 -0800
+++ b/VMS.h	Sun Mar 04 14:26:35 2012 -0800
@@ -20,6 +20,10 @@
 #include <pthread.h>
 #include <sys/time.h>
 
+#ifndef _LANG_NAME_
+#define _LANG_NAME_ ""
+#endif
+
 //=================  Defines: included from separate files  =================
 //
 // Note: ALL defines are in other files, none are in here
@@ -44,11 +48,15 @@
 typedef struct _GateStruc     GateStruc;
 
 
-typedef SlaveVP * (*Sched_Assigner)  ( void *, int );   //semEnv, coreIdx
-typedef void  (*RequestHandler)  ( SlaveVP *, void * ); //prWReqst, semEnv
-typedef void  (*TopLevelFnPtr)  ( void *, SlaveVP * ); //initData, animPr
-typedef void    TopLevelFn      ( void *, SlaveVP * ); //initData, animPr
-typedef void  (*ResumeVPFnPtr)   ( SlaveVP *, void * );
+typedef SlaveVP * (*Sched_Assigner) ( void *, int       ); //semEnv, coreIdx
+typedef void      (*RequestHandler) ( SlaveVP *, void * ); //prWReqst, semEnv
+typedef void      (*TopLevelFnPtr)  ( void *, SlaveVP * ); //initData, animSlv
+typedef void        TopLevelFn      ( void *, SlaveVP * ); //initData, animSlv
+typedef void      (*ResumeSlvFnPtr) ( SlaveVP *, void * );
+
+//============================ HW Dependent Fns ================================
+
+#include "VMS__HW_dependent.h"
 
 //============================= Statistics ==================================
 
@@ -83,7 +91,7 @@
 
 typedef struct
  { enum VMSSemReqstType reqType;
-   SlaveVP           *requestingPr;
+   SlaveVP           *requestingSlv;
    char                *nameStr;  //for create probe
  }
  VMSSemReq;
@@ -94,12 +102,12 @@
 struct _SchedSlot
  {
    int         workIsDone;
-   int         needsProcrAssigned;
-   SlaveVP  *procrAssignedToSlot;
+   int         needsSlaveAssigned;
+   SlaveVP  *slaveAssignedToSlot;
  };
 //SchedSlot
 
-/*WARNING: re-arranging this data structure could cause VP switching
+/*WARNING: re-arranging this data structure could cause Slv switching
  *         assembly code to fail -- hard-codes offsets of fields
  */
 struct _SlaveVP
@@ -117,23 +125,11 @@
    SchedSlot  *schedSlot;
    VMSReqst   *requests;
 
-   void       *semanticData; //this livesUSE_GNU here for the life of VP
-   void       *dataRetFromReq;//values returned from plugin to VP go here
+   void       *semanticData; //this livesUSE_GNU here for the life of Slv
+   void       *dataRetFromReq;//values returned from plugin to Slv go here
 
       //=========== MEASUREMENT STUFF ==========
-       #ifdef MEAS__TIME_STAMP_SUSP
-       uint32  preSuspTSCLow;
-       uint32  postSuspTSCLow;
-       #endif
-       #ifdef MEAS__TIME_MASTER /* in SlaveVP because multiple masterVPs*/
-       uint32  startMasterTSCLow;USE_GNU
-       uint32  endMasterTSCLow;
-       #endif
-       #ifdef MEAS__TIME_2011_SYS
-       TSCountLowHigh  startSusp;
-       uint64  totalSuspCycles;
-       uint32  numGoodSusp;
-       #endif
+       MEAS__Insert_Meas_Fields_into_Slave;
       //========================================
    
    float64      createPtInSecs;  //have space but don't use on some configs
@@ -141,18 +137,13 @@
 //SlaveVP
 
 
-/*WARNING: re-arranging this data structure could cause VP-switching
+/*WARNING: re-arranging this data structure could cause Slv-switching
  *         assembly code to fail -- hard-codes offsets of fields
  *         (because -O3 messes with things otherwise)
  */
 typedef struct
  {
-   union{ //adds padding to put masterLock on its own cache-line to elim
-          // false sharing (masterLock is most-accessed var in VMS)
-        volatile int32   masterLock;
-        char             padding[CACHE_LINE_SZ];    
-   } masterLockUnion;
-   Sched_Assigner   slaveSchedAssigner;
+   Sched_Assigner   slaveAssigner;
    RequestHandler   requestHandler;
    
    SchedSlot     ***allSchedSlots;
@@ -161,17 +152,19 @@
 
    void            *semanticEnv;
    void            *OSEventStruc;   //for future, when add I/O to BLIS
-   MallocArrays    *freeLists;
+   MallocArrays   *freeLists;
    int32            amtOfOutstandingMem; //total currently allocated
 
    void            *coreLoopReturnPt;//addr to jump to to re-enter coreLoop
 
    int32            setupComplete;
-   //int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
+   int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
+   int32            masterLock __align_to_cacheline__;
    GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
    int32            workStealingLock;
    
-   int32            numVPsCreated; //gives ordering to processor creation
+   int32            numSlavesCreated; //gives ordering to processor creation
+   int32            numSlavesAlive;   //used to detect when to shutdown
 
       //=========== MEASUREMENT STUFF =============
        IntervalProbe   **intervalProbes;
@@ -181,28 +174,12 @@
        float64           createPtInSecs;
        Histogram       **measHists;
        PrivDynArrayInfo *measHistsInfo;
-       #ifdef MEAS__TIME_PLUGIN
-       Histogram       *reqHdlrLowTimeHist;
-       Histogram       *reqHdlrHighTimeHist;
-       #endif
-       #ifdef MEAS__TIME_MALLOC
-       Histogram       *mallocTimeHist;
-       Histogram       *freeTimeHist;
-       #endif
-       #ifdef MEAS__TIME_MASTER_LOCK
-       Histogram       *masterLockLowTimeHist;
-       Histogram       *masterLockHighTimeHist;
-       #endif
-       #ifdef MEAS__TIME_2011_SYS
-       TSCountLowHigh   startMaster;
-       uint64           totalMasterCycles;
-       uint32           numMasterAnimations;
-       TSCountLowHigh   startReqHdlr;
-       uint64           totalPluginCycles;
-       uint32           numPluginAnimations;
-       uint64           cyclesTillStartMasterLoop;
-       TSCountLowHigh   endMasterLoop;
-       #endif
+       MEAS__Insert_Susp_Meas_Fields_into_MasterEnv;
+       MEAS__Insert_Master_Meas_Fields_into_MasterEnv;
+       MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv;
+       MEAS__Insert_Malloc_Meas_Fields_into_MasterEnv;
+       MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv;
+       MEAS__Insert_System_Meas_Fields_into_MasterEnv;
       //==========================================
  }
 MasterEnv;
@@ -237,28 +214,32 @@
  }
 ThdParams;
 
+//=============================  Global Vars ================================
+
 pthread_t       coreLoopThdHandles[ NUM_CORES ];  //pthread's virt-procr state
 ThdParams      *coreLoopThdParams [ NUM_CORES ];
 pthread_mutex_t suspendLock;
 pthread_cond_t  suspend_cond;
 
-
-
-//=============================  Global Vars ================================
-
 volatile MasterEnv      *_VMSMasterEnv __align_to_cacheline__;
 
 
-
-
 //=========================  Function Prototypes  ===========================
 
+/* MEANING OF   WL  PI  SS  int
+ * These indicate which places the function is safe to use.  They stand for:
+ * WL: Wrapper Library
+ * PI: Plugin 
+ * SS: Startup and Shutdown
+ * int: internal to the VMS implementation
+ */
 
 //========== Setup and shutdown ==========
 void
-VMS_int__init();
+VMS_SS__init();
 
-Fix seed-procr creation -- put box around language, have lang register stuff
+//Fix; 
+/*seed-procr creation -- put box around language, have lang register stuff
         with VMS.
         have main program explicitly INIT Lang! -- makes more sense to
         C programmers -- makes it clear that there's a transition.
@@ -289,77 +270,83 @@
         lang's sync constructs -- VMS uses message system to establish tie-pt,
         each lang defines what a tie-point means to it..  (work with the
         diff semantics?)
+*/
 void
-VMS_WL__start_the_work_then_wait_until_done();
+VMS_SS__start_the_work_then_wait_until_done();
 
 void
-VMS_int__shutdown();
+VMS_SS__shutdown();
 
 void
-VMS_int__cleanup_at_end_of_shutdown();
+VMS_SS__cleanup_at_end_of_shutdown();
 
 
 //==============    ===============
 
 inline SlaveVP *
-VMS_int__create_procr( TopLevelFnPtr fnPtr, void *dataParam );
+VMS_int__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam );
+#define VMS_PI__create_slaveVP VMS_int__create_slaveVP
+#define VMS_WL__create_slaveVP VMS_int__create_slaveVP
 
 inline void
-VMS_int__point_slave_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr,
+VMS_int__point_slaveVP_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr,
                             void    *dataParam);
+#define VMS_PI__point_slaveVP_to_Fn  VMS_int__point_slaveVP_to_Fn
+#define VMS_WL__point_slaveVP_to_Fn  VMS_int__point_slaveVP_to_Fn
 
 void
-VMS_int__save_return_addr_into_ptd_to_loc(void *ptrToReturnAddrHoldingLoc);
-
-void
-VMS_int__write_return_addr_from_ptd_to_loc(void *ptrToReturnAddrHoldingLoc);
-
-void
-VMS_int__dissipate_procr( SlaveVP *procrToDissipate );
+VMS_int__dissipate_SlaveVP( SlaveVP *slaveToDissipate );
+#define VMS_PI__dissipate_SlaveVP VMS_int__dissipateSlaveVP
+//From WL, dissipate a SlaveVP by sending a request
 
    //Use this to create processor inside entry point & other places outside
    // the VMS system boundary (IE, not run in slave nor Master)
 SlaveVP *
-VMS_ext__create_procr( TopLevelFnPtr fnPtr, void *dataParam );
+VMS_ext__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam );
 
 void
-VMS_ext__dissipate_procr( SlaveVP *procrToDissipate );
+VMS_ext__dissipate_slaveVP( SlaveVP *slaveToDissipate );
 
 void
-VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstPr, VMSExcp *excpData );
+VMS_int__throw_exception( char *msgStr, SlaveVP *reqstSlv, VMSExcp *excpData );
+#define VMS_PI__throw_exception VMS_int__throw_exception
+#define VMS_WL__throw_exception VMS_int__throw_exception
 
 void *
-VMS_WL__give_sem_env_for( SlaveVP *animPr );
+VMS_int__give_sem_env_for( SlaveVP *animSlv );
+#define VMS_PI__give_sem_env_for  VMS_int__give_sem_env_for
+#define VMS_SS__give_sem_env_for  VMS_int__give_sem_env_for
+//No WL version -- not safe!  if use in WL, be sure data rd & wr is stable
 
 //==============  Request Related  ===============
 
 void
-VMS_int__suspend_procr( SlaveVP *callingPr );
+VMS_int__suspend_slaveVP_and_send_req( SlaveVP *callingSlv );
 
 inline void
-VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, SlaveVP *callingPr );
+VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, SlaveVP *callingSlv );
 
 inline void
-VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingPr );
+VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingSlv );
 
 void
-VMS_WL__send_create_procr_req( void *semReqData, SlaveVP *reqstingPr );
+VMS_WL__send_create_slaveVP_req( void *semReqData, SlaveVP *reqstingSlv );
 
 void inline
 VMS_WL__send_dissipate_req( SlaveVP *prToDissipate );
 
 inline void
-VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingPr );
+VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingSlv );
 
 VMSReqst *
-VMS_PI__take_next_request_out_of( SlaveVP *procrWithReq );
+VMS_PI__take_next_request_out_of( SlaveVP *slaveWithReq );
 
 inline void *
 VMS_PI__take_sem_reqst_from( VMSReqst *req );
 
 void inline
-VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingPr, void *semEnv,
-                       ResumeVPFnPtr resumePrFnPtr );
+VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingSlv, void *semEnv,
+                       ResumeSlvFnPtr resumeSlvFnPtr );
 
 //======================== MEASUREMENT ======================
 uint64
@@ -368,8 +355,6 @@
 VMS_WL__give_num_plugin_animations();
 
 
-
-#include "VMS__HW_dependent.h"
 #include "probes.h"
 #include "vutilities.h"
 
diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__HW_dependent.c
--- a/VMS__HW_dependent.c	Wed Feb 22 11:39:12 2012 -0800
+++ b/VMS__HW_dependent.c	Sun Mar 04 14:26:35 2012 -0800
@@ -12,7 +12,8 @@
  *No need to save registers on old stack frame, because there's no old
  * animator state to return to
  */
-VMS_int__point_slave_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr,
+inline void
+VMS_int__point_slaveVP_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr,
                             void    *dataParam)
  { void  *stackPtr;
 
diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__HW_dependent.h
--- a/VMS__HW_dependent.h	Wed Feb 22 11:39:12 2012 -0800
+++ b/VMS__HW_dependent.h	Sun Mar 04 14:26:35 2012 -0800
@@ -6,28 +6,75 @@
  * 
  */
 
-#ifndef _ProcrContext_H
-#define	_ProcrContext_H
+#ifndef _VMS__HW_DEPENDENT_H
+#define	_VMS__HW_DEPENDENT_H
 #define _GNU_SOURCE
 
-void saveCoreLoopReturnAddr(void **returnAddress);
+void 
+saveCoreLoopReturnAddr(void **returnAddress);
 
-void switchToVP(SlaveVP *nextProcr);
+void 
+switchToSlv(SlaveVP *nextSlave);
 
-void switchToCoreLoop(SlaveVP *nextProcr);
+void 
+switchToCoreLoop(SlaveVP *nextSlave);
 
-void masterSwitchToCoreLoop(SlaveVP *nextProcr);
+void 
+masterSwitchToCoreLoop(SlaveVP *nextSlave);
 
-void startUpTopLevelFn();
+void 
+startUpTopLevelFn();
 
-void *asmTerminateCoreLoop(SlaveVP *currPr);
+void *
+asmTerminateCoreLoop(SlaveVP *currSlv);
 
 #define flushRegisters() \
         asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15")
 
 inline SlaveVP *
-create_procr_helper( SlaveVP *newPr,       TopLevelFnPtr  fnPtr,
+create_slaveVP_helper( SlaveVP *newSlv,       TopLevelFnPtr  fnPtr,
                      void      *dataParam, void           *stackLocs );
 
-#endif	/* _ProcrContext_H */
+void
+VMS_int__save_return_into_ptd_to_loc_then_do_ret(void *ptdToLoc);
 
+void
+VMS_int__return_to_addr_in_ptd_to_loc(void *ptdToLoc);
+
+//===================  Macros to Capture Measurements  ======================
+//
+//===== RDTSC wrapper ===== 
+//Also runs with x86_64 code
+#define saveTSCLowHigh(lowHighIn) \
+   asm volatile("RDTSC;                   \
+                 movl %%eax, %0;          \
+                 movl %%edx, %1;"         \
+   /* outputs */ : "=m" (lowHighIn.lowHigh[0]), "=m" (lowHighIn.lowHigh[1])\
+   /* inputs  */ :                        \
+   /* clobber */ : "%eax", "%edx"         \
+                );
+
+#define saveTimeStampCountInto(low, high) \
+   asm volatile("RDTSC;                   \
+                 movl %%eax, %0;          \
+                 movl %%edx, %1;"         \
+   /* outputs */ : "=m" (low), "=m" (high)\
+   /* inputs  */ :                        \
+   /* clobber */ : "%eax", "%edx"         \
+                );
+
+#define saveLowTimeStampCountInto(low)    \
+   asm volatile("RDTSC;                   \
+                 movl %%eax, %0;"         \
+   /* outputs */ : "=m" (low)             \
+   /* inputs  */ :                        \
+   /* clobber */ : "%eax", "%edx"         \
+                );
+
+   //For code that calculates normalization-offset between TSC counts of
+   // different cores.
+//#define NUM_TSC_ROUND_TRIPS 10
+
+
+#endif	/* _VMS__HW_DEPENDENT_H */
+
diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__HW_dependent.s
--- a/VMS__HW_dependent.s	Wed Feb 22 11:39:12 2012 -0800
+++ b/VMS__HW_dependent.s	Sun Mar 04 14:26:35 2012 -0800
@@ -16,13 +16,13 @@
 // the top-level function, which was pointed to by the stack-ptr
 .globl startUpTopLevelFn
 startUpTopLevelFn:
-    movq    %rdi      , %rsi #get second argument from first argument of switchVP
+    movq    %rdi      , %rsi #get second argument from first argument of switchSlv
     movq    0x08(%rsp), %rdi #get first argument from stack
     movq    (%rsp)    , %rax #get top-level function's addr from stack
     jmp     *%rax            #jump to the top-level function
 
-//Switches form CoreLoop to VP ether a normal VP or the Master Loop
-//switch to virt procr's stack and frame ptr then jump to virt procr fn
+//Switches form CoreLoop to Slv ether a normal Slv or the Master Loop
+//switch to Slv's stack and frame ptr then jump to Slv fn
 /* SlaveVP  offsets:
  * 0x10  stackPtr
  * 0x18 framePtr
@@ -34,15 +34,15 @@
  * 0x48 coreLoopReturnPt
  * 0x54 masterLock
  */
-.globl switchToVP
-switchToVP:
+.globl switchToSlv
+switchToSlv:
     #SlaveVP in %rdi
     movq    %rsp      , 0x38(%rdi)   #save core loop stack pointer 
     movq    %rbp      , 0x30(%rdi)   #save core loop frame pointer
     movq    0x10(%rdi), %rsp         #restore stack pointer
     movq    0x18(%rdi), %rbp         #restore frame pointer
     movq    0x20(%rdi), %rax         #get jmp pointer
-    jmp     *%rax                    #jmp to VP
+    jmp     *%rax                    #jmp to Slv
 coreLoopReturn:
     ret
 
@@ -62,7 +62,7 @@
 .globl switchToCoreLoop
 switchToCoreLoop:
     #SlaveVP in %rdi
-    movq    $VPReturn , 0x20(%rdi)   #store return address
+    movq    $SlvReturn , 0x20(%rdi)   #store return address
     movq    %rsp      , 0x10(%rdi)   #save stack pointer 
     movq    %rbp      , 0x18(%rdi)   #save frame pointer
     movq    0x38(%rdi), %rsp         #restore stack pointer
@@ -71,7 +71,7 @@
     movq    (%rcx)    , %rcx
     movq    0x48(%rcx), %rax         #get CoreLoopStartPt
     jmp     *%rax                    #jmp to CoreLoop
-VPReturn:
+SlvReturn:
     ret
 
 
@@ -108,10 +108,10 @@
 
 //Switch to terminateCoreLoop
 //therefor switch to coreLoop context from master context
-// no need to call because the stack is already set up for switchVP
-// and virtPr is in %rdi
+// no need to call because the stack is already set up for switchSlv
+// and Slv is in %rdi
 // and both functions have the same argument.
-// do not save register of VP because this function will never return
+// do not save register of Slv because this function will never return
 /* SlaveVP  offsets:
  * 0x10  stackPtr
  * 0x18 framePtr
@@ -134,7 +134,7 @@
 
 /*
  * This one for the sequential version is special. It discards the current stack
- * and returns directly from the coreLoop after VMS__dissipate_procr was called
+ * and returns directly from the coreLoop after VMS_WL__dissipate_slaveVP was called
  */
 .globl asmTerminateCoreLoopSeq
 asmTerminateCoreLoopSeq:
@@ -142,7 +142,7 @@
     movq    0x38(%rdi), %rsp         #restore stack pointer
     movq    0x30(%rdi), %rbp         #restore frame pointer
     #argument is in %rdi
-    call    VMS__dissipate_procr
+    call    VMS_int__dissipate_slaveVP
     movq    %rbp      , %rsp        #goto the coreLoops stack
     pop     %rbp        #restore the old framepointer
     ret                 #return from core loop
@@ -150,18 +150,18 @@
 
 //Assembly code takes the return addr off the stack and saves
 // into the loc pointed to by rdi.  The return addr is at 0x8(%rbp) for 64bit
-.globl asm_save_ret_to_singleton
-VMS_int__save_return_addr_into_ptd_to_loc:
+.globl VMS_int__save_return_into_ptd_to_loc_then_do_ret
+VMS_int__save_return_into_ptd_to_loc_then_do_ret:
     movq 0x8(%rbp),     %rax  #get ret address, rbp is the same as in the calling function
-    movq     %rax,     (%rdi) #write ret addr to endInstrAddr field
+    movq     %rax,     (%rdi) #write ret addr into addr passed as param field
     ret
 
 
 //Assembly code changes the return addr on the stack to the one
-// pointed to by the parameter. The stack's return addr is at 0x8(%rbp)
-.globl asm_write_ret_from_singleton
-VMS_int__write_return_addr_from_ptd_to_loc:
-    movq    (%rdi),    %rax      #get return addr
-    movq      %rax,    0x8(%rbp) #write return addr to the stack of the caller
+// pointed to by the parameter, then returns. Stack's return addr is at 0x8(%rbp)
+.globl VMS_int__return_to_addr_in_ptd_to_loc
+VMS_int__return_to_addr_in_ptd_to_loc:
+    movq    (%rdi),    %rax  #get return addr from addr passed as param
+    movq     %rax, 0x8(%rbp) #write return addr to the stack of the caller
     ret
 
diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__PI.c
--- a/VMS__PI.c	Wed Feb 22 11:39:12 2012 -0800
+++ b/VMS__PI.c	Sun Mar 04 14:26:35 2012 -0800
@@ -17,13 +17,13 @@
 /*
  */
 VMSReqst *
-VMS_PI__take_next_request_out_of( SlaveVP *procrWithReq )
+VMS_PI__take_next_request_out_of( SlaveVP *slaveWithReq )
  { VMSReqst *req;
 
-   req = procrWithReq->requests;
+   req = slaveWithReq->requests;
    if( req == NULL ) return NULL;
 
-   procrWithReq->requests = procrWithReq->requests->nextReqst;
+   slaveWithReq->requests = slaveWithReq->requests->nextReqst;
    return req;
  }
 
@@ -51,8 +51,8 @@
  * Do the same for OS calls -- look later at it..
  */
 void inline
-VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingPr, void *semEnv,
-                       ResumeVPFnPtr resumePrFnPtr )
+VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingSlv, void *semEnv,
+                       ResumeSlvFnPtr resumeSlvFnPtr )
  { VMSSemReq     *semReq;
    IntervalProbe *newProbe;
 
@@ -67,9 +67,9 @@
    newProbe->probeID =
              addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo );
 
-   requestingPr->dataRetFromReq = newProbe;
+   requestingSlv->dataRetFromReq = newProbe;
 
-   (*resumePrFnPtr)( requestingPr, semEnv );
+   (*resumeSlvFnPtr)( requestingSlv, semEnv );
  }
 
 
@@ -77,7 +77,7 @@
  * the error message.
  */
 void
-VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstPr, VMSExcp *excpData )
+VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstSlv, VMSExcp *excpData )
  {
    printf("%s",msgStr);
    fflush(stdin);
diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__WL.c
--- a/VMS__WL.c	Wed Feb 22 11:39:12 2012 -0800
+++ b/VMS__WL.c	Sun Mar 04 14:26:35 2012 -0800
@@ -14,38 +14,30 @@
 #include "VMS.h"
 
 
-/*Anticipating multi-tasking
- */
-void *
-VMS_WL__give_sem_env_for( SlaveVP *animPr )
- {
-   return _VMSMasterEnv->semanticEnv;
- }
-
 
 /*For this implementation of VMS, it may not make much sense to have the
  * system of requests for creating a new processor done this way.. but over
  * the scope of single-master, multi-master, mult-tasking, OS-implementing,
  * distributed-memory, and so on, this gives VMS implementation a chance to
- * do stuff before suspend, in the AppVP, and in the Master before the plugin
+ * do stuff before suspend, in the SlaveVP, and in the Master before the plugin
  * is called, as well as in the lang-lib before this is called, and in the
  * plugin.  So, this gives both VMS and language implementations a chance to
  * intercept at various points and do order-dependent stuff.
  *Having a standard VMSNewPrReqData struc allows the language to create and
- * free the struc, while VMS knows how to get the newPr if it wants it, and
+ * free the struc, while VMS knows how to get the newSlv if it wants it, and
  * it lets the lang have lang-specific data related to creation transported
  * to the plugin.
  */
 void
-VMS_WL__send_create_procr_req( void *semReqData, SlaveVP *reqstingPr )
+VMS_WL__send_create_slaveVP_req( void *semReqData, SlaveVP *reqstingSlv )
  { VMSReqst req;
 
    req.reqType          = createReq;
    req.semReqData       = semReqData;
-   req.nextReqst        = reqstingPr->requests;
-   reqstingPr->requests = &req;
+   req.nextReqst        = reqstingSlv->requests;
+   reqstingSlv->requests = &req;
 
-   VMS_int__suspend_procr( reqstingPr );
+   VMS_int__suspend_slaveVP_and_send_req( reqstingSlv );
  }
 
 
@@ -61,24 +53,24 @@
  *This form is a bit misleading to understand if one is trying to figure out
  * how VMS works -- it looks like a normal function call, but inside it
  * sends a request to the request handler and suspends the processor, which
- * jumps out of the VMS__dissipate_procr function, and out of all nestings
+ * jumps out of the VMS_WL__dissipate_slaveVP function, and out of all nestings
  * above it, transferring the work of dissipating to the request handler,
  * which then does the actual work -- causing the processor that animated
  * the call of this function to disappear and the "hanging" state of this
  * function to just poof into thin air -- the virtual processor's trace
  * never returns from this call, but instead the virtual processor's trace
  * gets suspended in this call and all the virt processor's state disap-
- * pears -- making that suspend the last thing in the virt procr's trace.
+ * pears -- making that suspend the last thing in the Slv's trace.
  */
 void
-VMS_WL__send_dissipate_req( SlaveVP *procrToDissipate )
+VMS_WL__send_dissipate_req( SlaveVP *slaveToDissipate )
  { VMSReqst req;
 
    req.reqType                = dissipate;
-   req.nextReqst              = procrToDissipate->requests;
-   procrToDissipate->requests = &req;
+   req.nextReqst              = slaveToDissipate->requests;
+   slaveToDissipate->requests = &req;
 
-   VMS_int__suspend_procr( procrToDissipate );
+   VMS_int__suspend_slaveVP_and_send_req( slaveToDissipate );
  }
 
 
@@ -95,14 +87,14 @@
  */
 inline void
 VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData,
-                                          SlaveVP *callingPr )
+                                          SlaveVP *callingSlv )
  { VMSReqst *req;
 
    req = VMS_int__malloc( sizeof(VMSReqst) );
    req->reqType         = semantic;
    req->semReqData      = semReqData;
-   req->nextReqst       = callingPr->requests;
-   callingPr->requests = req;
+   req->nextReqst       = callingSlv->requests;
+   callingSlv->requests = req;
  }
 
 /*This inserts the semantic-layer's request data into standard VMS carrier
@@ -111,28 +103,28 @@
  *Then it does suspend, to cause request to be sent.
  */
 inline void
-VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingPr )
+VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingSlv )
  { VMSReqst req;
 
    req.reqType         = semantic;
    req.semReqData      = semReqData;
-   req.nextReqst       = callingPr->requests;
-   callingPr->requests = &req;
+   req.nextReqst       = callingSlv->requests;
+   callingSlv->requests = &req;
    
-   VMS_int__suspend_procr( callingPr );
+   VMS_int__suspend_slaveVP_and_send_req( callingSlv );
  }
 
 
 inline void
-VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingPr )
+VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingSlv )
  { VMSReqst req;
 
    req.reqType         = VMSSemantic;
    req.semReqData      = semReqData;
-   req.nextReqst       = callingPr->requests; //gab any other preceeding 
-   callingPr->requests = &req;
+   req.nextReqst       = callingSlv->requests; //gab any other preceeding 
+   callingSlv->requests = &req;
 
-   VMS_int__suspend_procr( callingPr );
+   VMS_int__suspend_slaveVP_and_send_req( callingSlv );
  }
 
 
diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__int.c
--- a/VMS__int.c	Wed Feb 22 11:39:12 2012 -0800
+++ b/VMS__int.c	Sun Mar 04 14:26:35 2012 -0800
@@ -15,18 +15,18 @@
 
 
 inline SlaveVP *
-VMS_int__create_procr( TopLevelFnPtr fnPtr, void *dataParam )
- { SlaveVP *newPr;
+VMS_int__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam )
+ { SlaveVP *newSlv;
    void      *stackLocs;
 
-   newPr      = VMS_int__malloc( sizeof(SlaveVP) );
+   newSlv      = VMS_int__malloc( sizeof(SlaveVP) );
    stackLocs  = VMS_int__malloc( VIRT_PROCR_STACK_SIZE );
    if( stackLocs == 0 )
     { perror("VMS__malloc stack"); exit(1); }
 
-   _VMSMasterEnv->numSlaves += 1;
+   _VMSMasterEnv->numSlavesAlive += 1;
 
-   return create_procr_helper( newPr, fnPtr, dataParam, stackLocs );
+   return create_slaveVP_helper( newSlv, fnPtr, dataParam, stackLocs );
  }
 
 /* "ext" designates that it's for use outside the VMS system -- should only
@@ -34,59 +34,49 @@
  * a VMS virtual processor.
  */
 inline SlaveVP *
-VMS_ext__create_procr( TopLevelFnPtr fnPtr, void *dataParam )
- { SlaveVP *newPr;
+VMS_ext__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam )
+ { SlaveVP *newSlv;
    char      *stackLocs;
 
-   newPr      = malloc( sizeof(SlaveVP) );
+   newSlv      = malloc( sizeof(SlaveVP) );
    stackLocs  = malloc( VIRT_PROCR_STACK_SIZE );
    if( stackLocs == 0 )
     { perror("malloc stack"); exit(1); }
 
-   return create_procr_helper( newPr, fnPtr, dataParam, stackLocs );
+   _VMSMasterEnv->numSlavesAlive += 1;
+
+   return create_slaveVP_helper(newSlv, fnPtr, dataParam, stackLocs);
  }
 
 
 //===========================================================================
 /*there is a label inside this function -- save the addr of this label in
- * the callingPr struc, as the pick-up point from which to start the next
- * work-unit for that procr.  If turns out have to save registers, then
- * save them in the procr struc too.  Then do assembly jump to the CoreLoop's
- * "done with work-unit" label.  The procr struc is in the request in the
+ * the callingSlv struc, as the pick-up point from which to start the next
+ * work-unit for that slave.  If turns out have to save registers, then
+ * save them in the slave struc too.  Then do assembly jump to the CoreLoop's
+ * "done with work-unit" label.  The slave struc is in the request in the
  * slave that animated the just-ended work-unit, so all the state is saved
  * there, and will get passed along, inside the request handler, to the
- * next work-unit for that procr.
+ * next work-unit for that slave.
  */
 void
-VMS_int__suspend_procr( SlaveVP *animatingPr )
+VMS_int__suspend_slaveVP_and_send_req( SlaveVP *animatingSlv )
  { 
 
-      //The request to master will cause this suspended virt procr to get
+      //The request to master will cause this suspended Slv to get
       // scheduled again at some future point -- to resume, core loop jumps
       // to the resume point (below), which causes restore of saved regs and
       // "return" from this call.
-   //animatingPr->resumeInstrPtr = &&ResumePt;
+   //animatingSlv->resumeInstrPtr = &&ResumePt;
 
-      //return ownership of the virt procr and sched slot to Master virt pr
-   animatingPr->schedSlot->workIsDone = TRUE;
+      //return ownership of the Slv and sched slot to Master virt pr
+   animatingSlv->schedSlot->workIsDone = TRUE;
 
-   //===========================  Measurement stuff ========================
-   #ifdef MEAS__TIME_STAMP_SUSP
-      //record time stamp: compare to time-stamp recorded below
-   saveLowTimeStampCountInto( animatingPr->preSuspTSCLow );
-   #endif
-   //=======================================================================
-
-   switchToCoreLoop(animatingPr);
+         MEAS__Capture_Pre_Susp_Point;
+   switchToCoreLoop(animatingSlv);
    flushRegisters();
-
-   //=======================================================================
-
-   #ifdef MEAS__TIME_STAMP_SUSP
-      //NOTE: only take low part of count -- do sanity check when take diff
-   saveLowTimeStampCountInto( animatingPr->postSuspTSCLow );
-   #endif
-
+         MEAS__Capture_Post_Susp_Point;
+		 
    return;
  }
 
@@ -95,19 +85,19 @@
  * be called from main thread or other thread -- never from code animated by
  * a SlaveVP, nor from a masterVP.
  *
- *Use this version to dissipate VPs created outside the VMS system.
+ *Use this version to dissipate Slvs created outside the VMS system.
  */
 void
-VMS_ext__dissipate_procr( SlaveVP *procrToDissipate )
+VMS_ext__dissipate_slaveVP( SlaveVP *slaveToDissipate )
  {
       //NOTE: dataParam was given to the processor, so should either have
-      // been alloc'd with VMS__malloc, or freed by the level above animPr.
+      // been alloc'd with VMS__malloc, or freed by the level above animSlv.
       //So, all that's left to free here is the stack and the SlaveVP struc
       // itself
       //Note, should not stack-allocate the data param -- no guarantee, in
       // general that creating processor will outlive ones it creates.
-   free( procrToDissipate->startOfStack );
-   free( procrToDissipate );
+   free( slaveToDissipate->startOfStack );
+   free( slaveToDissipate );
  }
 
 
@@ -130,26 +120,32 @@
  * of dis-owning it.
  */
 void
-VMS_int__dissipate_procr( SlaveVP *animatingPr )
+VMS_int__dissipate_SlaveVP( SlaveVP *animatingSlv )
  {
       //dis-own all locations owned by this processor, causing to be freed
       // any locations that it is (was) sole owner of
-//TODO: implement VMS__malloc system, including "give up ownership"
 
-   _VMSMasterEnv->numSlaves -= 1;
-   if( _VMSMasterEnv->numSlaves == 0 )
+   _VMSMasterEnv->numSlavesAlive -= 1;
+   if( _VMSMasterEnv->numSlavesAlive == 0 )
     {    //no more work, so shutdown
-      VMS_int__shutdown();  //note, creates 4 shut-down processors
+      VMS_SS__shutdown();  //note, creates 4 shut-down processors
     }
 
       //NOTE: dataParam was given to the processor, so should either have
-      // been alloc'd with VMS__malloc, or freed by the level above animPr.
+      // been alloc'd with VMS__malloc, or freed by the level above animSlv.
       //So, all that's left to free here is the stack and the SlaveVP struc
       // itself
       //Note, should not stack-allocate initial data -- no guarantee, in
       // general that creating processor will outlive ones it creates.
-   VMS_int__free( animatingPr->startOfStack );
-   VMS_int__free( animatingPr );
+   VMS_int__free( animatingSlv->startOfStack );
+   VMS_int__free( animatingSlv );
  }
 
+/*Anticipating multi-tasking
+ */
+void *
+VMS_int__give_sem_env_for( SlaveVP *animSlv )
+ {
+   return _VMSMasterEnv->semanticEnv;
+ }
 
diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS__startup_and_shutdown.c
--- a/VMS__startup_and_shutdown.c	Wed Feb 22 11:39:12 2012 -0800
+++ b/VMS__startup_and_shutdown.c	Sun Mar 04 14:26:35 2012 -0800
@@ -12,7 +12,7 @@
 #include <sys/time.h>
 
 #include "VMS.h"
-#include "VMS__HW_dependent.h"
+//#include "VMS__HW_dependent.h"
 
 
 #define thdAttrs NULL
@@ -34,7 +34,7 @@
 create_free_list();
 
 void
-endOSThreadFn( void *initData, SlaveVP *animatingPr );
+endOSThreadFn( void *initData, SlaveVP *animatingSlv );
 
 pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER;
 pthread_cond_t  suspend_cond  = PTHREAD_COND_INITIALIZER;
@@ -43,9 +43,9 @@
 
 /*Setup has two phases:
  * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts
- *    the master virt procr into the work-queue, ready for first "call"
+ *    the master Slv into the work-queue, ready for first "call"
  * 2) Semantic layer then does its own init, which creates the seed virt
- *    procr inside the semantic layer, ready to schedule it when
+ *    slave inside the semantic layer, ready to schedule it when
  *    asked by the first run of the masterLoop.
  *
  *This part is bit weird because VMS really wants to be "always there", and
@@ -54,15 +54,15 @@
  *
  *The semantic layer is isolated from the VMS internals by making the
  * semantic layer do setup to a state that it's ready with its
- * initial virt procrs, ready to schedule them to slots when the masterLoop
+ * initial Slvs, ready to schedule them to slots when the masterLoop
  * asks.  Without this pattern, the semantic layer's setup would
  * have to modify slots directly to assign the initial virt-procrs, and put
  * them into the readyToAnimateQ itself, breaking the isolation completely.
  *
  * 
- *The semantic layer creates the initial virt procr(s), and adds its
+ *The semantic layer creates the initial Slv(s), and adds its
  * own environment to masterEnv, and fills in the pointers to
- * the requestHandler and slaveScheduler plug-in functions
+ * the requestHandler and slaveAssigner plug-in functions
  */
 
 /*This allocates VMS data structures, populates the master VMSProc,
@@ -70,7 +70,7 @@
  * layer.
  */
 void
-VMS_int__init()
+VMS_SS__init()
  {
 
 #ifdef SEQUENTIAL
@@ -97,24 +97,12 @@
         //Very first thing put into the master env is the free-list, seeded
         // with a massive initial chunk of memory.
         //After this, all other mallocs are VMS__malloc.
-   _VMSMasterEnv->freeListHead        = VMS_ext__create_free_list();
+   _VMSMasterEnv->freeLists        = VMS_ext__create_free_list();
 
 
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef MEAS__TIME_MALLOC
-   _VMSMasterEnv->mallocTimeHist  = makeFixedBinHistExt( 100, 0, 30,
-                                                       "malloc_time_hist");
-   _VMSMasterEnv->freeTimeHist  = makeFixedBinHistExt( 100, 0, 30,
-                                                       "free_time_hist");
-   #endif
-   #ifdef MEAS__TIME_PLUGIN
-   _VMSMasterEnv->reqHdlrLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,
-                                                     "plugin_low_time_hist");
-   _VMSMasterEnv->reqHdlrHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,
-                                                    "plugin_high_time_hist");
-   #endif
-   //========================================================================
-
+         MEAS__Make_Meas_Hists_for_Malloc_Meas;
+         MEAS__Make_Meas_Hists_for_Plugin_Meas;
+   
    //===================== Only VMS__malloc after this ====================
    masterEnv     = (MasterEnv*)_VMSMasterEnv;
    
@@ -125,15 +113,15 @@
       //One array for each core, 3 in array, core's masterVP scheds all
    allSchedSlots    = VMS_int__malloc( NUM_CORES * sizeof(SchedSlot *) );
 
-   _VMSMasterEnv->numSlaves = 0;  //used to detect shut-down condition
+   _VMSMasterEnv->numSlavesAlive = 0;  //used to detect shut-down condition
 
-   _VMSMasterEnv->numVPsCreated = 0;  //used by create procr to set ID
+   _VMSMasterEnv->numSlavesCreated = 0;  //used by create slave to set ID
    for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
     {    
       readyToAnimateQs[ coreIdx ] = makeVMSQ();
       
          //Q: should give masterVP core-specific info as its init data?
-      masterVPs[ coreIdx ] = VMS_int__create_procr( (TopLevelFnPtr)&masterLoop, (void*)masterEnv );
+      masterVPs[ coreIdx ] = VMS_int__create_slaveVP( (TopLevelFnPtr)&masterLoop, (void*)masterEnv );
       masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx;
       allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core
       _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0;
@@ -146,10 +134,6 @@
    _VMSMasterEnv->workStealingLock = UNLOCKED;
 
 
-      //Aug 19, 2010:  no longer need to place initial masterVP into queue
-      // because coreLoop now controls -- animates its masterVP when no work
-
-
    //============================= MEASUREMENT STUFF ========================
    #ifdef STATS__TURN_ON_PROBES
    _VMSMasterEnv->dynIntervalProbesInfo =
@@ -163,14 +147,10 @@
    _VMSMasterEnv->createPtInSecs =
                            timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0);
    #endif
-   #ifdef MEAS__TIME_MASTER_LOCK
-   _VMSMasterEnv->masterLockLowTimeHist  = makeFixedBinHist( 50, 0, 2,
-                                                "master lock low time hist");
-   _VMSMasterEnv->masterLockHighTimeHist  = makeFixedBinHist( 50, 0, 100,
-                                               "master lock high time hist");
-   #endif
    
-   MakeTheMeasHists();
+   MEAS__Make_Meas_Hists_for_Master_Lock_Meas
+   
+   MEAS__Make_Meas_Hists_for_Language();
    //========================================================================
  }
 
@@ -187,7 +167,7 @@
 
          //Set state to mean "handling requests done, slot needs filling"
       schedSlots[i]->workIsDone         = FALSE;
-      schedSlots[i]->needsProcrAssigned = TRUE;
+      schedSlots[i]->needsSlaveAssigned = TRUE;
     }
    return schedSlots;
  }
@@ -233,17 +213,17 @@
 
 
 void
-VMS_WL__register_request_handler( RequestHandler requestHandler )
+VMS_SS__register_request_handler( RequestHandler requestHandler )
  { _VMSMasterEnv->requestHandler = requestHandler;
  }
 
 
 void
-VMS_WL__register_sched_assigner( Sched_Assigner schedAssigner )
- { _VMSMasterEnv->slaveSchedAssigner = schedAssigner;
+VMS_SS__register_sched_assigner( Sched_Assigner schedAssigner )
+ { _VMSMasterEnv->slaveAssigner = schedAssigner;
  }
 
-VMS_WL__register_semantic_env( void *semanticEnv )
+VMS_SS__register_semantic_env( void *semanticEnv )
  { _VMSMasterEnv->semanticEnv = semanticEnv;
  }
 
@@ -254,7 +234,7 @@
  *Wrapper lib layer calls this when it wants the system to start running..
  */
 void
-VMS_WL__start_the_work_then_wait_until_done()
+VMS_SS__start_the_work_then_wait_until_done()
  { 
 #ifdef SEQUENTIAL
    /*Only difference between version with an OS thread pinned to each core and
@@ -293,7 +273,7 @@
 
 //TODO: look at architecting cleanest separation between request handler
 // and master loop, for dissipate, create, shutdown, and other non-semantic
-// requests.  Issue is chain: one removes requests from AppVP, one dispatches
+// requests.  Issue is chain: one removes requests from AppSlv, one dispatches
 // on type of request, and one handles each type..  but some types require
 // action from both request handler and master loop -- maybe just give the
 // request handler calls like:  VMS__handle_X_request_type
@@ -308,7 +288,7 @@
  *The _VMSMasterEnv is needed by this shut down function, so the create-seed-
  * and-wait function has to free a bunch of stuff after it detects the
  * threads have all died: the masterEnv, the thread-related locations,
- * masterVP any AppVPs that might still be allocated and sitting in the
+ * masterVP any AppSlvs that might still be allocated and sitting in the
  * semantic environment, or have been orphaned in the _VMSWorkQ.
  * 
  *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the
@@ -318,22 +298,22 @@
  *In here,create one core-loop shut-down processor for each core loop and put
  * them all directly into the readyToAnimateQ.
  *Note, this function can ONLY be called after the semantic environment no
- * longer cares if AppVPs get animated after the point this is called.  In
+ * longer cares if AppSlvs get animated after the point this is called.  In
  * other words, this can be used as an abort, or else it should only be
- * called when all AppVPs have finished dissipate requests -- only at that
+ * called when all AppSlvs have finished dissipate requests -- only at that
  * point is it sure that all results have completed.
  */
 void
-VMS_int__shutdown()
+VMS_SS__shutdown()
  { int coreIdx;
-   SlaveVP *shutDownPr;
+   SlaveVP *shutDownSlv;
 
       //create the shutdown processors, one for each core loop -- put them
       // directly into the Q -- each core will die when gets one
    for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
     {    //Note, this is running in the master
-      shutDownPr = VMS_int__create_procr( &endOSThreadFn, NULL );
-      writeVMSQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] );
+      shutDownSlv = VMS_int__create_slaveVP( &endOSThreadFn, NULL );
+      writeVMSQ( shutDownSlv, _VMSMasterEnv->readyToAnimateQs[coreIdx] );
     }
 
  }
@@ -348,78 +328,37 @@
  * up just as if it never jumped out, before calling pthread_exit.
  *The end-point of core loop will free the stack and so forth of the
  * processor that animates this function, (this fn is transfering the
- * animator of the AppVP that is in turn animating this function over
+ * animator of the AppSlv that is in turn animating this function over
  * to core loop function -- note that this slices out a level of virtual
  * processors).
  */
 void
-endOSThreadFn( void *initData, SlaveVP *animatingPr )
+endOSThreadFn( void *initData, SlaveVP *animatingSlv )
  { 
-#ifdef SEQUENTIAL
-    asmTerminateCoreLoopSeq(animatingPr);
-#else
-    asmTerminateCoreLoop(animatingPr);
-#endif
+   #ifdef SEQUENTIAL
+    asmTerminateCoreLoopSeq(animatingSlv);
+   #else
+    asmTerminateCoreLoop(animatingSlv);
+   #endif
  }
 
 
 /*This is called from the startup & shutdown
  */
 void
-VMS_int__cleanup_at_end_of_shutdown()
+VMS_SS__cleanup_at_end_of_shutdown()
  { 
-   //unused
-   //VMSQueueStruc **readyToAnimateQs;
-   //int              coreIdx;
-   //SlaveVP      **masterVPs;
-   //SchedSlot     ***allSchedSlots; //ptr to array of ptrs
-
       //Before getting rid of everything, print out any measurements made
    forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist );
    forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile);
    forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist );
-   #ifdef MEAS__TIME_PLUGIN
-   printHist( _VMSMasterEnv->reqHdlrLowTimeHist );
-   saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist );
-   printHist( _VMSMasterEnv->reqHdlrHighTimeHist );
-   saveHistToFile( _VMSMasterEnv->reqHdlrHighTimeHist );
-   freeHistExt( _VMSMasterEnv->reqHdlrLowTimeHist );
-   freeHistExt( _VMSMasterEnv->reqHdlrHighTimeHist );
-   #endif
-   #ifdef MEAS__TIME_MALLOC
-   printHist( _VMSMasterEnv->mallocTimeHist   );
-   saveHistToFile( _VMSMasterEnv->mallocTimeHist   );
-   printHist( _VMSMasterEnv->freeTimeHist     );
-   saveHistToFile( _VMSMasterEnv->freeTimeHist     );
-   freeHistExt( _VMSMasterEnv->mallocTimeHist );
-   freeHistExt( _VMSMasterEnv->freeTimeHist   );
-   #endif
-   #ifdef MEAS__TIME_MASTER_LOCK
-   printHist( _VMSMasterEnv->masterLockLowTimeHist );
-   printHist( _VMSMasterEnv->masterLockHighTimeHist );
-   #endif
-   #ifdef MEAS__TIME_MASTER
-   printHist( _VMSMasterEnv->pluginTimeHist );
-   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
-    {
-      freeVMSQ( readyToAnimateQs[ coreIdx ] );
-         //master VPs were created external to VMS, so use external free
-      VMS_int__dissipate_procr( masterVPs[ coreIdx ] );
-
-      freeSchedSlots( allSchedSlots[ coreIdx ] );
-    }
-   #endif
-   #ifdef MEAS__TIME_STAMP_SUSP
-   printHist( _VMSMasterEnv->pluginTimeHist );
-   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
-    {
-      freeVMSQ( readyToAnimateQs[ coreIdx ] );
-         //master VPs were created external to VMS, so use external free
-      VMS_int__dissipate_procr( masterVPs[ coreIdx ] );
-
-      freeSchedSlots( allSchedSlots[ coreIdx ] );
-    }
-   #endif
+   
+   MEAS__Print_Hists_for_Susp_Meas;
+   MEAS__Print_Hists_for_Master_Meas;
+   MEAS__Print_Hists_for_Master_Lock_Meas;
+   MEAS__Print_Hists_for_Malloc_Meas;
+   MEAS__Print_Hists_for_Plugin_Meas;
+   
 
       //All the environment data has been allocated with VMS__malloc, so just
       // free its internal big-chunk and all inside it disappear.
@@ -431,24 +370,24 @@
    for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
     {
       freeVMSQ( readyToAnimateQs[ coreIdx ] );
-         //master VPs were created external to VMS, so use external free
-      VMS__dissipate_procr( masterVPs[ coreIdx ] );
+         //master Slvs were created external to VMS, so use external free
+      VMS_int__dissipate_slaveVP( masterVPs[ coreIdx ] );
       
       freeSchedSlots( allSchedSlots[ coreIdx ] );
     }
    
-   VMS__free( _VMSMasterEnv->readyToAnimateQs );
-   VMS__free( _VMSMasterEnv->masterVPs );
-   VMS__free( _VMSMasterEnv->allSchedSlots );
+   VMS_int__free( _VMSMasterEnv->readyToAnimateQs );
+   VMS_int__free( _VMSMasterEnv->masterVPs );
+   VMS_int__free( _VMSMasterEnv->allSchedSlots );
    
    //============================= MEASUREMENT STUFF ========================
    #ifdef STATS__TURN_ON_PROBES
-   freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS__free_probe);
+   freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS_WL__free_probe);
    #endif
    //========================================================================
 */
       //These are the only two that use system free 
-   VMS_ext__free_free_list( _VMSMasterEnv->freeListHead );
+   VMS_ext__free_free_list( _VMSMasterEnv->freeLists );
    free( (void *)_VMSMasterEnv );
  }
 
diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS_defs__HW_specific.h
--- a/VMS_defs__HW_specific.h	Wed Feb 22 11:39:12 2012 -0800
+++ b/VMS_defs__HW_specific.h	Sun Mar 04 14:26:35 2012 -0800
@@ -27,7 +27,7 @@
    // stack size in virtual processors created
 #define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */
 
-   // memory for VMS__malloc
+   // memory for VMS_WL__malloc
 #define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x40000000 /* 1G */
 
    //Frequency of TS counts -- have to do tests to verify
diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS_defs__lang_specific.h
--- a/VMS_defs__lang_specific.h	Wed Feb 22 11:39:12 2012 -0800
+++ b/VMS_defs__lang_specific.h	Sun Mar 04 14:26:35 2012 -0800
@@ -13,80 +13,8 @@
 
 //===================  Language-specific Measurement Stuff ===================
 //
-//TODO:  Figure out way to move these into language dir..
-//   wrap them in #ifdef MEAS__...
+//TODO:  move these into the language implementation directories
 //
-#ifndef MAKE_HISTS_FOR_MEASUREMENTS
-#define MakeTheMeasHists() 
-#endif
-
-//===========================================================================
-//VPThread
-#ifdef VTHREAD
-
-#define createHistIdx      1  //note: starts at 1
-#define mutexLockHistIdx   2
-#define mutexUnlockHistIdx 3
-#define condWaitHistIdx    4
-#define condSignalHistIdx  5
-
-#define MakeTheMeasHists() \
-   _VMSMasterEnv->measHistsInfo = \
-              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
-   makeAMeasHist( createHistIdx,      "create",        250, 0, 100 ) \
-   makeAMeasHist( mutexLockHistIdx,   "mutex_lock",    50, 0, 100 ) \
-   makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock",  50, 0, 100 ) \
-   makeAMeasHist( condWaitHistIdx,    "cond_wait",     50, 0, 100 ) \
-   makeAMeasHist( condSignalHistIdx,  "cond_signal",   50, 0, 100 )
-
-   
-#define Meas_startCreate \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endCreate \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                                 _VMSMasterEnv->measHists[ createHistIdx ] );
-
-#define Meas_startMutexLock \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endMutexLock \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                              _VMSMasterEnv->measHists[ mutexLockHistIdx ] );
-
-#define Meas_startMutexUnlock \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endMutexUnlock \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                            _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] );
-
-#define Meas_startCondWait \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endCondWait \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                               _VMSMasterEnv->measHists[ condWaitHistIdx ] );
-
-#define Meas_startCondSignal \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endCondSignal \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                             _VMSMasterEnv->measHists[ condSignalHistIdx ] );
-
-#endif
-
 
 
 //===========================================================================
@@ -97,7 +25,7 @@
 #define spawnHistIdx      1 //note: starts at 1
 #define syncHistIdx       2
 
-#define MakeTheMeasHists() \
+#define MEAS__Make_Meas_Hists_for_Language() \
    _VMSMasterEnv->measHistsInfo = \
           makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
     makeAMeasHist( spawnHistIdx,      "Spawn",        50, 0, 200 ) \
@@ -133,7 +61,7 @@
 #define ReceiveFromToHistIdx   3
 #define ReceiveOfTypeHistIdx   4
 
-#define MakeTheMeasHists() \
+#define MEAS__Make_Meas_Hists_for_Language() \
    _VMSMasterEnv->measHistsInfo = \
               makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
     makeAMeasHist( SendFromToHistIdx,   "SendFromTo",    50, 0, 100 ) \
diff -r eaf7e4c58c9e -r 0c83ea8adefc VMS_defs__main.h
--- a/VMS_defs__main.h	Wed Feb 22 11:39:12 2012 -0800
+++ b/VMS_defs__main.h	Sun Mar 04 14:26:35 2012 -0800
@@ -6,8 +6,8 @@
  * 
  */
 
-#ifndef _VMS_DEFS_H
-#define	_VMS_DEFS_H
+#ifndef _VMS_DEFS_MAIN_H
+#define	_VMS_DEFS_MAIN_H
 #define _GNU_SOURCE
 
 //===========================  VMS-wide defs  ===============================
@@ -19,7 +19,7 @@
    // so these defs can be at the top, and writePrivQ defined later on..
 #define writeVMSQ     writePrivQ
 #define readVMSQ      readPrivQ
-#define makeVMSQ      makeVMSPrivQ
+#define makeVMSQ      makePrivQ
 #define numInVMSQ     numInPrivQ
 #define VMSQueueStruc PrivQueueStruc
 
@@ -31,21 +31,21 @@
 //
 //When SEQUENTIAL is defined, VMS does sequential exe in the main thread
 // It still does co-routines and all the mechanisms are the same, it just
-// has only a single thread and animates VPs one at a time
+// has only a single thread and animates Slvs one at a time
 //#define SEQUENTIAL
 
 //#define USE_WORK_STEALING
 
 //turns on the probe-instrumentation in the application -- when not
 // defined, the calls to the probe functions turn into comments
-#define STATS__ENABLE_PROBES
+//#define STATS__ENABLE_PROBES
 //#define TURN_ON_DEBUG_PROBES
 
 //These defines turn types of bug messages on and off
 // be sure debug messages are un-commented (next block of defines)
 #define dbgAppFlow   TRUE /* Top level flow of application code -- general*/
 #define dbgProbes    FALSE /* for issues inside probes themselves*/
-#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/
+#define dbgB2BMaster FALSE /* in coreloop, back to back master Slvs*/
 #define dbgRqstHdlr  FALSE /* in request handler code*/
 
 //Comment or un- the substitute half to turn on/off types of debug message
@@ -74,112 +74,310 @@
 
 //==================  Turn Measurement Things on and off ====================
 
-//#define MEAS__TIME_2011_SYS
+//#define MEAS__TURN_ON_SYSTEM_MEAS
 
-//define this if any MEAS__... below are
-//#define MAKE_HISTS_FOR_MEASUREMENTS
-   //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and
-   // compiled-in that saves the low part of the time stamp count just before
-   // suspending a processor and just after resuming that processor.  It is
-   // saved into a field added to VirtProcr.  Have to sanity-check for
-   // rollover of low portion into high portion.
-//#define MEAS__TIME_STAMP_SUSP
-//#define MEAS__TIME_MASTER
-//#define MEAS__TIME_PLUGIN
-//#define MEAS__TIME_MALLOC
-//#define MEAS__TIME_MASTER_LOCK
+/*NOTE: define MEAS__TURN_ON_MAKE_HISTS if any other MEAS__... below are*/
+//#define MEAS__TURN_ON_MAKE_HISTS
 
-   //For code that calculates normalization-offset between TSC counts of
-   // different cores.
-//#define NUM_TSC_ROUND_TRIPS 10
+//#define MEAS__TURN_ON_SUSP_MEAS
+//#define MEAS__TURN_ON_MASTER_MEAS
+//#define MEAS__TURN_ON_PLUGIN_MEAS
+//#define MEAS__TURN_ON_MALLOC_MEAS
+//#define MEAS__TURN_ON_MASTER_LOCK_MEAS
 
+   /*turn on/off subtraction of create measurements from plugin meas*/
+//#define MEAS__TURN_ON_EXCLUDE_CREATION_TIME 
 
 
-//===================  Macros to Capture Measurements  ======================
-//
-//===== RDTSC wrapper ===== 
-//Also runs with x86_64 code
-#define saveTSCLowHigh(lowHighIn) \
-   asm volatile("RDTSC;                   \
-                 movl %%eax, %0;          \
-                 movl %%edx, %1;"         \
-   /* outputs */ : "=m" (lowHighIn.lowHigh[0]), "=m" (lowHighIn.lowHigh[1])\
-   /* inputs  */ :                        \
-   /* clobber */ : "%eax", "%edx"         \
-                );
-
-#define saveTimeStampCountInto(low, high) \
-   asm volatile("RDTSC;                   \
-                 movl %%eax, %0;          \
-                 movl %%edx, %1;"         \
-   /* outputs */ : "=m" (low), "=m" (high)\
-   /* inputs  */ :                        \
-   /* clobber */ : "%eax", "%edx"         \
-                );
-
-#define saveLowTimeStampCountInto(low)    \
-   asm volatile("RDTSC;                   \
-                 movl %%eax, %0;"         \
-   /* outputs */ : "=m" (low)             \
-   /* inputs  */ :                        \
-   /* clobber */ : "%eax", "%edx"         \
-                );
-
 
 //==================  Macros define types of meas want  =====================
-#ifdef MEAS__TIME_PLUGIN
 
-#define Meas_startReqHdlr \
-        int32 startStamp1, endStamp1; \
-        saveLowTimeStampCountInto( startStamp1 );
+#ifdef MEAS__TURN_ON_SUSP_MEAS
+   #define MEAS__Insert_Susp_Meas_Fields_into_Slave \
+       uint32  preSuspTSCLow; \
+       uint32  postSuspTSCLow;
 
-#define Meas_endReqHdlr \
-        saveLowTimeStampCountInto( endStamp1 ); \
-        addIntervalToHist( startStamp1, endStamp1, \
-                           _VMSMasterEnv->reqHdlrLowTimeHist ); \
-        addIntervalToHist( startStamp1, endStamp1, \
-                           _VMSMasterEnv->reqHdlrHighTimeHist );
-               
-#elif defined MEAS__TIME_2011_SYS
-#define Meas_startMasterLoop \
-        TSCountLowHigh startStamp1, endStamp1; \
-        saveTSCLowHigh( endStamp1 ); \
-        _VMSMasterEnv->cyclesTillStartMasterLoop = \
-        endStamp1.longVal - masterVP->startSusp.longVal;
+   #define MEAS__Insert_Susp_Meas_Fields_into_MasterEnv \
+       Histogram       *suspLowTimeHist; \
+       Histogram       *suspHighTimeHist;
 
-#define Meas_startReqHdlr \
-        saveTSCLowHigh( startStamp1 ); \
-        _VMSMasterEnv->startReqHdlr.longVal = startStamp1.longVal;
+   #define MEAS__Make_Meas_Hists_for_Susp_Meas \
+      _VMSMasterEnv->suspLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
+                                                    "master_low_time_hist");\
+      _VMSMasterEnv->suspHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
+                                                    "master_high_time_hist");
+      
+      //record time stamp: compare to time-stamp recorded below
+   #define MEAS__Capture_Pre_Susp_Point \
+      saveLowTimeStampCountInto( animatingSlv->preSuspTSCLow );
+   
+      //NOTE: only take low part of count -- do sanity check when take diff
+   #define MEAS__Capture_Post_Susp_Point \
+      saveLowTimeStampCountInto( animatingSlv->postSuspTSCLow );\
+      addIntervalToHist( preSuspTSCLow, postSuspTSCLow,\
+                         _VMSMasterEnv->suspLowTimeHist ); \
+      addIntervalToHist( preSuspTSCLow, postSuspTSCLow,\
+                         _VMSMasterEnv->suspHighTimeHist );
 
-#define Meas_endReqHdlr 
+   #define MEAS__Print_Hists_for_Susp_Meas \
+      printHist( _VMSMasterEnv->pluginTimeHist );
+      
+#else
+   #define MEAS__Insert_Susp_Meas_Fields_into_Slave     
+   #define MEAS__Insert_Susp_Meas_Fields_into_MasterEnv 
+   #define MEAS__Make_Meas_Hists_for_Susp_Meas 
+   #define MEAS__Capture_Pre_Susp_Point
+   #define MEAS__Capture_Post_Susp_Point   
+   #define MEAS__Print_Hists_for_Susp_Meas 
+#endif
 
-#define Meas_endMasterLoop \
-        saveTSCLowHigh( startStamp1 ); \
-        _VMSMasterEnv->endMasterLoop.longVal = startStamp1.longVal;
+#ifdef MEAS__TURN_ON_MASTER_MEAS
+   #define MEAS__Insert_Master_Meas_Fields_into_Slave \
+       uint32  startMasterTSCLow; \
+       uint32  endMasterTSCLow;
+
+   #define MEAS__Insert_Master_Meas_Fields_into_MasterEnv \
+       Histogram       *masterLowTimeHist; \
+       Histogram       *masterHighTimeHist;
+
+   #define MEAS__Make_Meas_Hists_for_Master_Meas \
+      _VMSMasterEnv->masterLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
+                                                    "master_low_time_hist");\
+      _VMSMasterEnv->masterHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
+                                                    "master_high_time_hist");
+
+      //Total Master time includes one coreloop time -- just assume the core
+      // loop time is same for Master as for AppSlvs, even though it may be
+      // smaller due to higher predictability of the fixed jmp.
+   #define MEAS__Capture_Pre_Master_Point\
+      saveLowTimeStampCountInto( masterVP->startMasterTSCLow );
+
+   #define MEAS__Capture_Post_Master_Point \
+      saveLowTimeStampCountInto( masterVP->endMasterTSCLow );\
+      addIntervalToHist( startMasterTSCLow, endMasterTSCLow,\
+                         _VMSMasterEnv->masterLowTimeHist ); \
+      addIntervalToHist( startMasterTSCLow, endMasterTSCLow,\
+                         _VMSMasterEnv->masterHighTimeHist );
+
+   #define MEAS__Print_Hists_for_Master_Meas \
+      printHist( _VMSMasterEnv->pluginTimeHist );
 
 #else
-#define Meas_startMasterLoop 
-#define Meas_startReqHdlr 
-#define Meas_endReqHdlr 
-#define Meas_endMasterLoop
+   #define MEAS__Insert_Master_Meas_Fields_into_Slave
+   #define MEAS__Insert_Master_Meas_Fields_into_MasterEnv 
+   #define MEAS__Make_Meas_Hists_for_Master_Meas
+   #define MEAS__Capture_Pre_Master_Point 
+   #define MEAS__Capture_Post_Master_Point 
+   #define MEAS__Print_Hists_for_Master_Meas 
 #endif
 
+      
+#ifdef MEAS__TURN_ON_MASTER_LOCK_MEAS
+   #define MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv \
+       Histogram       *masterLockLowTimeHist; \
+       Histogram       *masterLockHighTimeHist;
+
+   #define MEAS__Make_Meas_Hists_for_Master_Lock_Meas \
+      _VMSMasterEnv->masterLockLowTimeHist  = makeFixedBinHist( 50, 0, 2, \
+                                               "master lock low time hist");\
+      _VMSMasterEnv->masterLockHighTimeHist  = makeFixedBinHist( 50, 0, 100,\
+                                               "master lock high time hist");
+
+   #define MEAS__Capture_Pre_Master_Lock_Point \
+      int32 startStamp, endStamp; \
+      saveLowTimeStampCountInto( startStamp );
+
+   #define MEAS__Capture_Post_Master_Lock_Point \
+      saveLowTimeStampCountInto( endStamp ); \
+      addIntervalToHist( startStamp, endStamp,\
+                         _VMSMasterEnv->masterLockLowTimeHist ); \
+      addIntervalToHist( startStamp, endStamp,\
+                         _VMSMasterEnv->masterLockHighTimeHist );
+
+   #define MEAS__Print_Hists_for_Master_Lock_Meas \
+      printHist( _VMSMasterEnv->masterLockLowTimeHist ); \
+      printHist( _VMSMasterEnv->masterLockHighTimeHist );
+      
+#else
+   #define MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv
+   #define MEAS__Make_Meas_Hists_for_Master_Lock_Meas
+   #define MEAS__Capture_Pre_Master_Lock_Point 
+   #define MEAS__Capture_Post_Master_Lock_Point 
+   #define MEAS__Print_Hists_for_Master_Lock_Meas
+#endif
+
+
+#ifdef MEAS__TURN_ON_MALLOC_MEAS
+   #define MEAS__Insert_Malloc_Meas_Fields_into_MasterEnv\
+       Histogram       *mallocTimeHist; \
+       Histogram       *freeTimeHist;
+
+   #define MEAS__Make_Meas_Hists_for_Malloc_Meas \
+      _VMSMasterEnv->mallocTimeHist  = makeFixedBinHistExt( 100, 0, 30,\
+                                                       "malloc_time_hist");\
+      _VMSMasterEnv->freeTimeHist  = makeFixedBinHistExt( 100, 0, 30,\
+                                                       "free_time_hist");
+
+   #define MEAS__Capture_Pre_Malloc_Point \
+      int32 startStamp, endStamp; \
+      saveLowTimeStampCountInto( startStamp );
+
+   #define MEAS__Capture_Post_Malloc_Point \
+      saveLowTimeStampCountInto( endStamp ); \
+      addIntervalToHist( startStamp, endStamp,\
+                         _VMSMasterEnv->mallocTimeHist ); 
+
+   #define MEAS__Capture_Pre_Free_Point \
+      int32 startStamp, endStamp; \
+      saveLowTimeStampCountInto( startStamp );
+
+   #define MEAS__Capture_Post_Free_Point \
+      saveLowTimeStampCountInto( endStamp ); \
+      addIntervalToHist( startStamp, endStamp,\
+                         _VMSMasterEnv->freeTimeHist ); 
+
+   #define MEAS__Print_Hists_for_Malloc_Meas \
+      printHist( _VMSMasterEnv->mallocTimeHist   ); \
+      saveHistToFile( _VMSMasterEnv->mallocTimeHist   ); \
+      printHist( _VMSMasterEnv->freeTimeHist     ); \
+      saveHistToFile( _VMSMasterEnv->freeTimeHist     ); \
+      freeHistExt( _VMSMasterEnv->mallocTimeHist ); \
+      freeHistExt( _VMSMasterEnv->freeTimeHist   );
+      
+#else
+   #define MEAS__Insert_Malloc_Meas_Fields_into_MasterEnv
+   #define MEAS__Make_Meas_Hists_for_Malloc_Meas 
+   #define MEAS__Capture_Pre_Malloc_Point
+   #define MEAS__Capture_Post_Malloc_Point
+   #define MEAS__Capture_Pre_Free_Point
+   #define MEAS__Capture_Post_Free_Point
+   #define MEAS__Print_Hists_for_Malloc_Meas 
+#endif
+
+      
+#ifdef MEAS__TURN_ON_SYSTEM_MEAS
+   #define MEAS__Insert_System_Meas_Fields_into_Slave \
+      TSCountLowHigh  startSusp; \
+      uint64  totalSuspCycles; \
+      uint32  numGoodSusp;
+
+   #define MEAS__Insert_System_Meas_Fields_into_MasterEnv \
+       TSCountLowHigh   startMaster; \
+       uint64           totalMasterCycles; \
+       uint32           numMasterAnimations; \
+       TSCountLowHigh   startReqHdlr; \
+       uint64           totalPluginCycles; \
+       uint32           numPluginAnimations; \
+       uint64           cyclesTillStartMasterLoop; \
+       TSCountLowHigh   endMasterLoop; 
+
+#else
+   #define MEAS__Insert_System_Meas_Fields_into_Slave 
+   #define MEAS__Insert_System_Meas_Fields_into_MasterEnv 
+#endif
+
+
+/*This macro's a bit weird -- the same macro is defined in three different
+ * ways, depending upon which defines are turned on
+ *That's because added the system meas, which interferes with plugin meas,
+ * but don't want to make plugin meas stop working..  this is compromise
+ */
+#ifdef MEAS__TURN_ON_PLUGIN_MEAS 
+   #define MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv \
+      Histogram       *reqHdlrLowTimeHist; \
+      Histogram       *reqHdlrHighTimeHist;
+          
+   #define MEAS__Make_Meas_Hists_for_Plugin_Meas \
+      _VMSMasterEnv->reqHdlrLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
+                                                    "plugin_low_time_hist");\
+      _VMSMasterEnv->reqHdlrHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,\
+                                                    "plugin_high_time_hist");
+
+   #define Meas_startReqHdlr \
+      int32 startStamp1, endStamp1; \
+      saveLowTimeStampCountInto( startStamp1 );
+
+   #define Meas_endReqHdlr \
+      saveLowTimeStampCountInto( endStamp1 ); \
+      addIntervalToHist( startStamp1, endStamp1, \
+                           _VMSMasterEnv->reqHdlrLowTimeHist ); \
+      addIntervalToHist( startStamp1, endStamp1, \
+                           _VMSMasterEnv->reqHdlrHighTimeHist );
+
+   #define MEAS__Print_Hists_for_Plugin_Meas \
+      printHist( _VMSMasterEnv->reqHdlrLowTimeHist ); \
+      saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist ); \
+      printHist( _VMSMasterEnv->reqHdlrHighTimeHist ); \
+      saveHistToFile( _VMSMasterEnv->reqHdlrHighTimeHist ); \
+      freeHistExt( _VMSMasterEnv->reqHdlrLowTimeHist ); \
+      freeHistExt( _VMSMasterEnv->reqHdlrHighTimeHist );
+               
+#elif defined MEAS__TURN_ON_SYSTEM_MEAS
+   #define MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv 
+
+   #define MEAS__Make_Meas_Hists_for_Plugin_Meas 
+
+   #define Meas_startMasterLoop \
+      TSCountLowHigh startStamp1, endStamp1; \
+      saveTSCLowHigh( endStamp1 ); \
+      _VMSMasterEnv->cyclesTillStartMasterLoop = \
+      endStamp1.longVal - masterVP->startSusp.longVal;
+
+   #define Meas_endMasterLoop \
+      saveTSCLowHigh( startStamp1 ); \
+      _VMSMasterEnv->endMasterLoop.longVal = startStamp1.longVal;
+
+   #define Meas_startReqHdlr \
+      saveTSCLowHigh( startStamp1 ); \
+      _VMSMasterEnv->startReqHdlr.longVal = startStamp1.longVal;
+
+   #define Meas_endReqHdlr 
+
+   #define MEAS__Print_Hists_for_Plugin_Meas 
+
+#else
+   #define MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv
+   #define MEAS__Make_Meas_Hists_for_Plugin_Meas
+   #define Meas_startMasterLoop 
+   #define Meas_endMasterLoop
+   #define Meas_startReqHdlr 
+   #define Meas_endReqHdlr 
+   #define MEAS__Print_Hists_for_Plugin_Meas 
+#endif
+
+
+//Experiment in two-step macros -- if doesn't work, insert each separately
+#define MEAS__Insert_Meas_Fields_into_Slave  \
+   MEAS__Insert_Susp_Meas_Fields_into_Slave \
+   MEAS__Insert_Master_Meas_Fields_into_Slave \
+   MEAS__Insert_System_Meas_Fields_into_Slave
+
+
 //======================  Histogram Macros -- Create ========================
 //
 //
-#ifdef MAKE_HISTS_FOR_MEASUREMENTS
-#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \
-   makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \
-   _VMSMasterEnv->measHists[idx] =  \
+
+//The language implementation should include a definition of this macro,
+// which creates all the histograms the language uses to collect measurements
+// of plugin operation -- so, if the language didn't define it, must
+// define it here (as empty), to avoid compile error
+#ifndef MEAS__Make_Meas_Hists_for_Language
+#define MEAS__Make_Meas_Hists_for_Language() /*consume parens!*/
+#endif
+
+              
+#ifdef MEAS__TURN_ON_MAKE_HISTS
+   #define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \
+      makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \
+      _VMSMasterEnv->measHists[idx] =  \
                        makeFixedBinHist( numBins, startVal, binWidth, name );
 #else
-#define makeAMeasHist( idx, name, numBins, startVal, binWidth )
+   #define makeAMeasHist( idx, name, numBins, startVal, binWidth )
 #endif
 
+//==============================  Probes  ===================================
 
-#define MEAS__SUB_CREATE  /*turn on/off subtraction of create from plugin*/
 
-#include "VMS_defs__lang_specific.h"
-
+//===========================================================================
 #endif	/* _VMS_DEFS_H */
 
diff -r eaf7e4c58c9e -r 0c83ea8adefc probes.c
--- a/probes.c	Wed Feb 22 11:39:12 2012 -0800
+++ b/probes.c	Sun Mar 04 14:26:35 2012 -0800
@@ -13,100 +13,46 @@
 
 
 //====================  Probes =================
-#ifdef STATS__USE_TSC_PROBES
-
-int32
-VMS__create_histogram_probe( int32 numBins, float32 startValue,
-                             float32 binWidth, char *nameStr )
- { IntervalProbe *newProbe;
-   int32 idx;
-   FloatHist *hist;
-
-   idx = VMS__create_single_interval_probe( nameStr );
-   newProbe =  _VMSMasterEnv->intervalProbes[ idx ];
-
-   hist =  makeFloatHistogram( numBins, startValue, binWidth );
-   newProbe->hist = hist;
-   return idx;
- }
-
-void
-VMS_impl__record_interval_start_in_probe( int32 probeID )
- { IntervalProbe *probe;
-
-   probe = _VMSMasterEnv->intervalProbes[ probeID ];
-   probe->startStamp = getTSCount();
- }
-
-void
-VMS_impl__record_interval_end_in_probe( int32 probeID )
- { IntervalProbe *probe;
-   TSCount endStamp;
-
-   endStamp = getTSCount();
-
-   probe = _VMSMasterEnv->intervalProbes[ probeID ];
-   probe->endStamp = endStamp;
-
-   if( probe->hist != NULL )
-    { TSCount interval = probe->endStamp - probe->startStamp;
-         //if the interval is sane, then add to histogram
-      if( interval < probe->hist->endOfRange * 10 )
-         addToFloatHist( interval, probe->hist );
-    }
- }
-
-void
-VMS_impl__print_stats_of_probe( int32 probeID )
- { IntervalProbe *probe;
-
-   probe = _VMSMasterEnv->intervalProbes[ probeID ];
-
-   if( probe->hist == NULL )
-    {
-      printf("probe: %s, interval: %.6lf\n", probe->nameStr,probe->interval);
-    }
-
-   else
-    {
-      printf( "probe: %s\n", probe->nameStr );
-      printFloatHist( probe->hist );
-    }
- }
-#else
-
 /*
  * In practice, probe operations are called from the app, from inside slaves
- *  -- so have to be sure each probe is single-VP owned, and be sure that
+ *  -- so have to be sure each probe is single-Slv owned, and be sure that
  *  any place common structures are modified it's done inside the master.
  * So -- the only place common structures are modified is during creation.
  *  after that, all mods are to individual instances.
  *
  * Thniking perhaps should change the semantics to be that probes are
  *  attached to the virtual processor -- and then everything is guaranteed
- *  to be isolated -- except then can't take any intervals that span VPs,
- *  and would have to transfer the probes to Master env when VP dissipates..
+ *  to be isolated -- except then can't take any intervals that span Slvs,
+ *  and would have to transfer the probes to Master env when Slv dissipates..
  *  gets messy..
  *
  * For now, just making so that probe creation causes a suspend, so that
  *  the dynamic array in the master env is only modified from the master
  * 
  */
+
+//============================  Helpers ===========================
+inline void 
+doNothing()
+ {
+ }
+
+
 IntervalProbe *
-create_generic_probe( char *nameStr, SlaveVP *animPr )
-{
+create_generic_probe( char *nameStr, SlaveVP *animSlv )
+ {
    VMSSemReq reqData;
 
    reqData.reqType  = createProbe;
    reqData.nameStr  = nameStr;
 
-   VMS_WL__send_VMSSem_request( &reqData, animPr );
+   VMS_WL__send_VMSSem_request( &reqData, animSlv );
 
-   return animPr->dataRetFromReq;
+   return animSlv->dataRetFromReq;
  }
 
 /*Use this version from outside VMS -- it uses external malloc, and modifies
- * dynamic array, so can't be animated in a slave VP
+ * dynamic array, so can't be animated in a slave Slv
  */
 IntervalProbe *
 ext__create_generic_probe( char *nameStr )
@@ -125,24 +71,38 @@
    return newProbe;
  }
 
+//============================ Fns def in header =======================
 
-/*Only call from inside master or main startup/shutdown thread
- */
-void
-VMS_impl__free_probe( IntervalProbe *probe )
- { if( probe->hist != NULL )   freeDblHist( probe->hist );
-   if( probe->nameStr != NULL) VMS_int__free( probe->nameStr );
-   VMS_int__free( probe );
+int32
+VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animSlv )
+ { IntervalProbe *newProbe;
+
+   newProbe = create_generic_probe( nameStr, animSlv );
+   
+   return newProbe->probeID;
  }
 
+int32
+VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
+               float64 binWidth, char   *nameStr, SlaveVP *animSlv )
+ { IntervalProbe *newProbe;
+   DblHist *hist;
+
+   newProbe = create_generic_probe( nameStr, animSlv );
+   
+   hist =  makeDblHistogram( numBins, startValue, binWidth );
+   newProbe->hist = hist;
+   return newProbe->probeID;
+ }
+
 
 int32
-VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animPr)
+VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animSlv)
  { IntervalProbe *newProbe;
    struct timeval *startStamp;
    float64 startSecs;
 
-   newProbe           = create_generic_probe( nameStr, animPr );
+   newProbe           = create_generic_probe( nameStr, animSlv );
    newProbe->endSecs  = 0;
 
    gettimeofday( &(newProbe->startStamp), NULL);
@@ -174,30 +134,19 @@
    return newProbe->probeID;
  }
 
-int32
-VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr )
- { IntervalProbe *newProbe;
 
-   newProbe = create_generic_probe( nameStr, animPr );
-   
-   return newProbe->probeID;
+/*Only call from inside master or main startup/shutdown thread
+ */
+void
+VMS_impl__free_probe( IntervalProbe *probe )
+ { if( probe->hist != NULL )   freeDblHist( probe->hist );
+   if( probe->nameStr != NULL) VMS_int__free( probe->nameStr );
+   VMS_int__free( probe );
  }
 
-int32
-VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
-               float64 binWidth, char   *nameStr, SlaveVP *animPr )
- { IntervalProbe *newProbe;
-   DblHist *hist;
-
-   newProbe = create_generic_probe( nameStr, animPr );
-   
-   hist =  makeDblHistogram( numBins, startValue, binWidth );
-   newProbe->hist = hist;
-   return newProbe->probeID;
- }
 
 void
-VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr )
+VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animSlv )
  { IntervalProbe *probe;
 
    //TODO: fix this To be in Master -- race condition
@@ -206,8 +155,9 @@
    addValueIntoTable(probe->nameStr, probe, _VMSMasterEnv->probeNameHashTbl);
  }
 
+
 IntervalProbe *
-VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr )
+VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animSlv )
  {
    //TODO: fix this To be in Master -- race condition
    return getValueFromTable( probeName, _VMSMasterEnv->probeNameHashTbl );
@@ -215,21 +165,21 @@
 
 
 /*Everything is local to the animating procr, so no need for request, do
- * work locally, in the anim Pr
+ * work locally, in the anim Slv
  */
 void
-VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animatingPr )
+VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animatingSlv )
  { IntervalProbe *probe;
  
    probe = _VMSMasterEnv->intervalProbes[ probeID ];
    probe->schedChoiceWasRecorded = TRUE;
-   probe->coreNum = animatingPr->coreAnimatedBy;
-   probe->procrID = animatingPr->procrID;
-   probe->procrCreateSecs = animatingPr->createPtInSecs;
+   probe->coreNum = animatingSlv->coreAnimatedBy;
+   probe->slaveID = animatingSlv->procrID;
+   probe->slaveCreateSecs = animatingSlv->createPtInSecs;
  }
 
 /*Everything is local to the animating procr, so no need for request, do
- * work locally, in the anim Pr
+ * work locally, in the anim Slv
  */
 void
 VMS_impl__record_interval_start_in_probe( int32 probeID )
@@ -237,44 +187,37 @@
 
          DEBUG( dbgProbes, "record start of interval\n" )
    probe = _VMSMasterEnv->intervalProbes[ probeID ];
-   gettimeofday( &(probe->startStamp), NULL );
+   probe->startStamp = getTSCount();
  }
 
 
 /*Everything is local to the animating procr, so no need for request, do
- * work locally, in the anim Pr
+ * work locally, in the anim Slv
+ * 
+ *This should be safe to run inside SlaveVP -- weird behavior will be due
+ * to the logical error of having more than one interval open in overlapped.
  */
 void
 VMS_impl__record_interval_end_in_probe( int32 probeID )
  { IntervalProbe *probe;
-   struct timeval *endStamp, *startStamp;
-   float64 startSecs, endSecs;
+   TSCount endStamp;
 
+   endStamp = getTSCount();
+   
          DEBUG( dbgProbes, "record end of interval\n" )
-      //possible seg-fault if array resized by diff core right after this
-      // one gets probe..?  Something like that?  Might be safe.. don't care
+
    probe = _VMSMasterEnv->intervalProbes[ probeID ];
-   gettimeofday( &(probe->endStamp), NULL);
-
-      //now turn into an interval held in a double
-   startStamp = &(probe->startStamp);
-   endStamp   = &(probe->endStamp);
-
-   startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 );
-   endSecs   = endStamp->tv_sec   + ( endStamp->tv_usec / 1000000.0 );
-
-   probe->interval  = endSecs - startSecs;
-   probe->startSecs = startSecs;
-   probe->endSecs   = endSecs;
+   probe->endStamp = endStamp;
 
    if( probe->hist != NULL )
-    {
+    { TSCount interval = probe->endStamp - probe->startStamp;
          //if the interval is sane, then add to histogram
-      if( probe->interval < probe->hist->endOfRange * 10 )
-         addToDblHist( probe->interval, probe->hist );
+      if( interval < probe->hist->endOfRange * 10 )
+         addToFloatHist( interval, probe->hist );
     }
  }
 
+
 void
 print_probe_helper( IntervalProbe *probe )
  {
@@ -283,7 +226,7 @@
    
    if( probe->schedChoiceWasRecorded )
     { printf( "coreNum: %d, procrID: %d, procrCreated: %0.6f | ",
-              probe->coreNum, probe->procrID, probe->procrCreateSecs );
+              probe->coreNum, probe->slaveID, probe->slaveCreateSecs );
     }
 
    if( probe->endSecs == 0 ) //just a single point in time
@@ -318,22 +261,10 @@
  }
 
 
-inline void doNothing(){};
-
-void
-generic_print_probe( void *_probe )
- { 
-   IntervalProbe *probe = (IntervalProbe *)_probe;
-   
-   //TODO segfault in printf
-   //print_probe_helper( probe );
- }
-
 void
 VMS_impl__print_stats_of_all_probes()
  {
    forAllInDynArrayDo( _VMSMasterEnv->dynIntervalProbesInfo,
-                       &generic_print_probe );
+                       &VMS_impl__print_stats_of_probe );
    fflush( stdout );
  }
-#endif
diff -r eaf7e4c58c9e -r 0c83ea8adefc probes.h
--- a/probes.h	Wed Feb 22 11:39:12 2012 -0800
+++ b/probes.h	Sun Mar 04 14:26:35 2012 -0800
@@ -28,16 +28,16 @@
 
    int32           schedChoiceWasRecorded;
    int32           coreNum;
-   int32           procrID;
-   float64         procrCreateSecs;
+   int32           slaveID;
+   float64         slaveCreateSecs;
 
-   #ifdef STATS__USE_TSC_PROBES
+ //  #ifdef STATS__USE_TSC_PROBES
    TSCount    startStamp;
    TSCount    endStamp;
-   #else
-   struct timeval  startStamp;
-   struct timeval  endStamp;
-   #endif
+//   #else
+//   struct timeval  startStamp;
+//   struct timeval  endStamp;
+//   #endif
    float64         startSecs;
    float64         endSecs;
    float64         interval;
@@ -45,136 +45,136 @@
  };
 
 
+int32
+VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animSlv );
+
+int32
+VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
+               float64 binWidth, char    *nameStr, SlaveVP *animSlv );
+
+int32
+VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animSlv);
+
+int32
+VMS_ext_impl__record_time_point_into_new_probe( char *nameStr );
+
+void
+VMS_impl__free_probe( IntervalProbe *probe );
+
+void
+VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animSlv );
+
+IntervalProbe *
+VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animSlv );
+
+void
+VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animSlv );
+
+void
+VMS_impl__record_interval_start_in_probe( int32 probeID );
+
+void
+VMS_impl__record_interval_end_in_probe( int32 probeID );
+
+void
+VMS_impl__print_stats_of_probe( int32 probeID );
+
+void
+VMS_impl__print_stats_of_all_probes();
+
 
 //======================== Probes =============================
 //
 // Use macros to allow turning probes off with a #define switch
+// This means probes have zero impact on performance when off
+//=============================================================
+#define VMS_App__record_time_point_into_new_probe VMS_WL__record_time_point_into_new_probe
+#define VMS_ext__record_time_point_into_new_probe
+#define VMS_App__create_single_interval_probe   VMS_WL__create_single_interval_probe
+#define VMS_App__create_histogram_probe         VMS_WL__create_histogram_probe
+#define VMS_App__index_probe_by_its_name        VMS_WL__index_probe_by_its_name
+#define VMS_App__get_probe_by_name              VMS_WL__get_probe_by_name
+#define VMS_App__record_sched_choice_into_probe VMS_WL__record_sched_choice_into_probe
+#define VMS_App__record_interval_start_in_probe VMS_WL__record_interval_start_in_probe 
+#define VMS_App__record_interval_end_in_probe   VMS_WL__record_interval_end_in_probe
+#define VMS_App__print_stats_of_probe           VMS_WL__print_stats_of_probe
+#define VMS_App__print_stats_of_all_probes      VMS_WL__print_stats_of_all_probes 
+
 #ifdef STATS__ENABLE_PROBES
-int32
-VMS_impl__record_time_point_into_new_probe( char *nameStr,SlaveVP *animPr);
-#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \
-        VMS_impl__record_time_point_in_new_probe( nameStr, animPr )
+#define VMS_WL__record_time_point_into_new_probe( nameStr, animSlv ) \
+        VMS_impl__record_time_point_in_new_probe( nameStr, animSlv )
 
-int32
-VMS_ext_impl__record_time_point_into_new_probe( char *nameStr );
 #define VMS_ext__record_time_point_into_new_probe( nameStr ) \
         VMS_ext_impl__record_time_point_into_new_probe( nameStr )
 
+#define VMS_WL__create_single_interval_probe( nameStr, animSlv ) \
+        VMS_impl__create_single_interval_probe( nameStr, animSlv )
 
-int32
-VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr );
-#define VMS__create_single_interval_probe( nameStr, animPr ) \
-        VMS_impl__create_single_interval_probe( nameStr, animPr )
-
-
-int32
-VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
-               float64 binWidth, char    *nameStr, SlaveVP *animPr );
-#define VMS__create_histogram_probe(      numBins, startValue,              \
-                                          binWidth, nameStr, animPr )       \
+#define VMS_WL__create_histogram_probe(      numBins, startValue,              \
+                                          binWidth, nameStr, animSlv )       \
         VMS_impl__create_histogram_probe( numBins, startValue,              \
-                                          binWidth, nameStr, animPr )
-void
-VMS_impl__free_probe( IntervalProbe *probe );
-#define VMS__free_probe( probe ) \
+                                          binWidth, nameStr, animSlv )
+#define VMS_int__free_probe( probe ) \
         VMS_impl__free_probe( probe )
 
-void
-VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr );
-#define VMS__index_probe_by_its_name( probeID, animPr ) \
-        VMS_impl__index_probe_by_its_name( probeID, animPr )
+#define VMS_WL__index_probe_by_its_name( probeID, animSlv ) \
+        VMS_impl__index_probe_by_its_name( probeID, animSlv )
 
-IntervalProbe *
-VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr );
-#define VMS__get_probe_by_name( probeID, animPr ) \
-        VMS_impl__get_probe_by_name( probeName, animPr )
+#define VMS_WL__get_probe_by_name( probeID, animSlv ) \
+        VMS_impl__get_probe_by_name( probeName, animSlv )
 
-void
-VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animPr );
-#define VMS__record_sched_choice_into_probe( probeID, animPr ) \
-        VMS_impl__record_sched_choice_into_probe( probeID, animPr )
+#define VMS_WL__record_sched_choice_into_probe( probeID, animSlv ) \
+        VMS_impl__record_sched_choice_into_probe( probeID, animSlv )
 
-void
-VMS_impl__record_interval_start_in_probe( int32 probeID );
-#define VMS__record_interval_start_in_probe( probeID ) \
+#define VMS_WL__record_interval_start_in_probe( probeID ) \
         VMS_impl__record_interval_start_in_probe( probeID )
 
-void
-VMS_impl__record_interval_end_in_probe( int32 probeID );
-#define VMS__record_interval_end_in_probe( probeID ) \
+#define VMS_WL__record_interval_end_in_probe( probeID ) \
         VMS_impl__record_interval_end_in_probe( probeID )
 
-void
-VMS_impl__print_stats_of_probe( int32 probeID );
-#define VMS__print_stats_of_probe( probeID ) \
+#define VMS_WL__print_stats_of_probe( probeID ) \
         VMS_impl__print_stats_of_probe( probeID )
 
-void
-VMS_impl__print_stats_of_all_probes();
-#define VMS__print_stats_of_all_probes() \
+#define VMS_WL__print_stats_of_all_probes() \
         VMS_impl__print_stats_of_all_probes()
 
 
 #else
-int32
-VMS_impl__record_time_point_into_new_probe( char *nameStr,SlaveVP *animPr);
-#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \
+#define VMS_WL__record_time_point_into_new_probe( nameStr, animSlv ) \
        0 /* do nothing */
 
-int32
-VMS_ext_impl__record_time_point_into_new_probe( char *nameStr );
 #define VMS_ext__record_time_point_into_new_probe( nameStr ) \
        0 /* do nothing */
 
 
-int32
-VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr );
-#define VMS__create_single_interval_probe( nameStr, animPr ) \
+#define VMS_WL__create_single_interval_probe( nameStr, animSlv ) \
        0 /* do nothing */
 
 
-int32
-VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
-               float64 binWidth, char    *nameStr, SlaveVP *animPr );
-#define VMS__create_histogram_probe(      numBins, startValue,              \
-                                          binWidth, nameStr, animPr )       \
+#define VMS_WL__create_histogram_probe(      numBins, startValue,              \
+                                          binWidth, nameStr, animSlv )       \
        0 /* do nothing */
 
-void
-VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr );
-#define VMS__index_probe_by_its_name( probeID, animPr ) \
+#define VMS_WL__index_probe_by_its_name( probeID, animSlv ) \
         /* do nothing */
 
-IntervalProbe *
-VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr );
-#define VMS__get_probe_by_name( probeID, animPr ) \
+#define VMS_WL__get_probe_by_name( probeID, animSlv ) \
        NULL /* do nothing */
 
-void
-VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animPr );
-#define VMS__record_sched_choice_into_probe( probeID, animPr ) \
+#define VMS_WL__record_sched_choice_into_probe( probeID, animSlv ) \
         /* do nothing */
 
-void
-VMS_impl__record_interval_start_in_probe( int32 probeID );
-#define VMS__record_interval_start_in_probe( probeID ) \
+#define VMS_WL__record_interval_start_in_probe( probeID ) \
         /* do nothing */
 
-void
-VMS_impl__record_interval_end_in_probe( int32 probeID );
-#define VMS__record_interval_end_in_probe( probeID ) \
+#define VMS_WL__record_interval_end_in_probe( probeID ) \
         /* do nothing */
 
-inline void doNothing();
-void
-VMS_impl__print_stats_of_probe( int32 probeID );
-#define VMS__print_stats_of_probe( probeID ) \
-        doNothing/* do nothing */
+#define VMS_WL__print_stats_of_probe( probeID ) \
+        ; /* do nothing */
 
-void
-VMS_impl__print_stats_of_all_probes();
-#define VMS__print_stats_of_all_probes \
-        doNothing/* do nothing */
+#define VMS_WL__print_stats_of_all_probes() \
+        ;/* do nothing */
 
 #endif   /* defined STATS__ENABLE_PROBES */
 
diff -r eaf7e4c58c9e -r 0c83ea8adefc vmalloc.c
--- a/vmalloc.c	Wed Feb 22 11:39:12 2012 -0800
+++ b/vmalloc.c	Sun Mar 04 14:26:35 2012 -0800
@@ -11,46 +11,200 @@
 #include <inttypes.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <string.h>
+#include <math.h>
 
 #include "VMS.h"
 #include "C_Libraries/Histogram/Histogram.h"
 
-/*Helper function
- *Insert a newly generated free chunk into the first spot on the free list.
- * The chunk is cast as a MallocProlog, so the various pointers in it are
- * accessed with C's help -- and the size of the prolog is easily added to
- * the pointer when a chunk is returned to the app -- so C handles changes
- * in pointer sizes among machines.
- *
- *The list head is a normal MallocProlog struct -- identified by its
- * prevChunkInFreeList being NULL -- the only one.
- *
- *The end of the list is identified by next chunk being NULL, as usual.
+#define MAX_UINT64 0xFFFFFFFFFFFFFFFF
+
+//A MallocProlog is a head element if the HigherInMem variable is NULL
+//A Chunk is free if the prevChunkInFreeList variable is NULL
+
+/*
+ * This calculates the container which fits the given size.
  */
-void inline
-add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead )
- { 
-   chunk->nextChunkInFreeList     = listHead->nextChunkInFreeList;
-   if( chunk->nextChunkInFreeList != NULL ) //if not last in free list
-      chunk->nextChunkInFreeList->prevChunkInFreeList = chunk;
-   chunk->prevChunkInFreeList     = listHead;
-   listHead->nextChunkInFreeList  = chunk;
- }
+inline
+uint32 getContainer(size_t size)
+{
+    return (log2(size)-LOG128)/LOG54;
+}
 
+/*
+ * Removes the first chunk of a freeList
+ * The chunk is removed but not set as free. There is no check if
+ * the free list is empty, so make sure this is not the case.
+ */
+inline
+MallocProlog *removeChunk(MallocArrays* freeLists, uint32 containerIdx)
+{
+    MallocProlog** container = &freeLists->bigChunks[containerIdx];
+    MallocProlog*  removedChunk = *container;
+    *container = removedChunk->nextChunkInFreeList;
+    
+    if(removedChunk->nextChunkInFreeList)
+        removedChunk->nextChunkInFreeList->prevChunkInFreeList = 
+                (MallocProlog*)container;
+    
+    if(*container == NULL)
+    {
+       if(containerIdx < 64)
+           freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); 
+       else
+           freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64));
+    }
+    
+    return removedChunk;
+}
 
-/*This is sequential code, meant to only be called from the Master, not from
- * any slave VPs.
- *Search down list, checking size by the nextHigherInMem pointer, to find
- * first chunk bigger than size needed.
- *Shave off the extra and make it into a new free-list element, hook it in
- * then return the address of the found element plus size of prolog.
- *
+/*
+ * Removes the first chunk of a freeList
+ * The chunk is removed but not set as free. There is no check if
+ * the free list is empty, so make sure this is not the case.
+ */
+inline
+MallocProlog *removeSmallChunk(MallocArrays* freeLists, uint32 containerIdx)
+{
+    MallocProlog** container = &freeLists->smallChunks[containerIdx];
+    MallocProlog*  removedChunk = *container;
+    *container = removedChunk->nextChunkInFreeList;
+    
+    if(removedChunk->nextChunkInFreeList)
+        removedChunk->nextChunkInFreeList->prevChunkInFreeList = 
+                (MallocProlog*)container;
+    
+    return removedChunk;
+}
+
+inline
+size_t getChunkSize(MallocProlog* chunk)
+{
+    return (uintptr_t)chunk->nextHigherInMem -
+            (uintptr_t)chunk - sizeof(MallocProlog);
+}
+
+/*
+ * Removes a chunk from a free list.
+ */
+inline
+void extractChunk(MallocProlog* chunk, MallocArrays *freeLists)
+{
+   chunk->prevChunkInFreeList->nextChunkInFreeList = chunk->nextChunkInFreeList;
+   if(chunk->nextChunkInFreeList)
+       chunk->nextChunkInFreeList->prevChunkInFreeList = chunk->prevChunkInFreeList;
+   
+   //The last element in the list points to the container. If the container points
+   //to NULL the container is empty
+   if(*((void**)(chunk->prevChunkInFreeList)) == NULL && getChunkSize(chunk) >= BIG_LOWER_BOUND)
+   {
+       //Find the approppiate container because we do not know it
+       uint64 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 3;
+       if(containerIdx < (uint32)64)
+           freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); 
+       if(containerIdx < 128 && containerIdx >=64)
+           freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64)); 
+       
+   }
+}
+
+/*
+ * Merges two chunks.
+ * Chunk A has to be before chunk B in memory. Both have to be removed from
+ * a free list
+ */
+inline
+MallocProlog *mergeChunks(MallocProlog* chunkA, MallocProlog* chunkB)
+{
+    chunkA->nextHigherInMem = chunkB->nextHigherInMem;
+    chunkB->nextHigherInMem->nextLowerInMem = chunkA;
+    return chunkA;
+}
+/*
+ * Inserts a chunk into a free list.
+ */
+inline
+void insertChunk(MallocProlog* chunk, MallocProlog** container)
+{
+    chunk->nextChunkInFreeList = *container;
+    chunk->prevChunkInFreeList = (MallocProlog*)container;
+    if(*container)
+        (*container)->prevChunkInFreeList = chunk;
+    *container = chunk;
+}
+
+/*
+ * Divides the chunk that a new chunk of newSize is created.
+ * There is no size check, so make sure the size value is valid.
+ */
+inline
+MallocProlog *divideChunk(MallocProlog* chunk, size_t newSize)
+{
+    MallocProlog* newChunk = (MallocProlog*)((uintptr_t)chunk->nextHigherInMem -
+            newSize - sizeof(MallocProlog));
+    
+    newChunk->nextLowerInMem  = chunk;
+    newChunk->nextHigherInMem = chunk->nextHigherInMem;
+    
+    chunk->nextHigherInMem->nextLowerInMem = newChunk;
+    chunk->nextHigherInMem = newChunk;
+    
+    return newChunk;
+}
+
+/* 
+ * Search for chunk in the list of big chunks. Split the block if it's too big
+ */
+inline
+MallocProlog *searchChunk(MallocArrays *freeLists, size_t sizeRequested, uint32 containerIdx)
+{
+    MallocProlog* foundChunk;
+    
+    uint64 searchVector = freeLists->bigChunksSearchVector[0];
+    //set small chunk bits to zero
+    searchVector &= MAX_UINT64 << containerIdx;
+    containerIdx = __builtin_ffsl(searchVector);
+
+    if(containerIdx == 0)
+    {
+       searchVector = freeLists->bigChunksSearchVector[1];
+       containerIdx = __builtin_ffsl(searchVector);
+       if(containerIdx == 0)
+       {
+           printf("VMS malloc failed: low memory");
+           exit(1);   
+       }
+       containerIdx += 64;
+    }
+    containerIdx--;
+    
+
+    foundChunk = removeChunk(freeLists, containerIdx);
+    size_t chunkSize     = getChunkSize(foundChunk);
+
+    //If the new chunk is larger than the requested size: split
+    if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + BIG_LOWER_BOUND)
+    {
+       MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested);
+       containerIdx = getContainer(getChunkSize(foundChunk)) - 1;
+       insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]);
+       if(containerIdx < 64)
+           freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx);
+       else
+           freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64));
+       foundChunk = newChunk;
+    } 
+    
+    return foundChunk;
+}
+
+
+/*
+ * This is sequential code, meant to only be called from the Master, not from
+ * any slave Slvs.
  */
 void *VMS_int__malloc( size_t sizeRequested )
- { MallocProlog *foundElem = NULL, *currElem, *newElem;
-   ssize_t        amountExtra, sizeConsumed,sizeOfFound;
-   uint32        foundElemIsTopOfHeap;
-
+ {     
    //============================= MEASUREMENT STUFF ========================
    #ifdef MEAS__TIME_MALLOC
    int32 startStamp, endStamp;
@@ -58,312 +212,101 @@
    #endif
    //========================================================================
    
-      //step up the size to be aligned at 16-byte boundary, prob better ways
-   sizeRequested = (sizeRequested + 16) & ~15;
-   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
-
-   while( currElem != NULL )
-    {    //check if size of currElem is big enough
-      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
-      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
-      if( amountExtra > 0 )
-       {    //found it, get out of loop
-         foundElem = currElem;
-         currElem = NULL;
-       }
-      else
-         currElem = currElem->nextChunkInFreeList;
-    }
+   MallocArrays* freeLists = _VMSMasterEnv->freeLists;
+   MallocProlog* foundChunk;
    
-   if( foundElem == NULL )
-    { ERROR("\nmalloc failed\n")
-      return (void *)NULL;  //indicates malloc failed
-    }
-      //Using a kludge to identify the element that is the top chunk in the
-      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
-      // save addr of start of heap in head's nextLowerInMem
-      //Will handle top of Heap specially
-   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
-                          _VMSMasterEnv->freeListHead->nextHigherInMem;
+   //Return a small chunk if the requested size is smaller than 128B
+   if(sizeRequested <= LOWER_BOUND)
+   {
+       uint32 freeListIdx = (sizeRequested-1)/SMALL_CHUNK_SIZE;
+       if(freeLists->smallChunks[freeListIdx] == NULL)
+           foundChunk = searchChunk(freeLists, SMALL_CHUNK_SIZE*(freeListIdx+1), 0);
+       else
+           foundChunk = removeSmallChunk(freeLists, freeListIdx);
+       
+       //Mark as allocated
+       foundChunk->prevChunkInFreeList = NULL;      
+       return foundChunk + 1;
+   }
    
-      //before shave off and try to insert new elem, remove found elem
-      //note, foundElem will never be the head, so always has valid prevChunk
-   foundElem->prevChunkInFreeList->nextChunkInFreeList =
-                                              foundElem->nextChunkInFreeList;
-   if( foundElem->nextChunkInFreeList != NULL )
-    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
-                                              foundElem->prevChunkInFreeList;
-    }
-   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
+   //Calculate the expected container. Start one higher to have a Chunk that's
+   //always big enough.
+   uint32 containerIdx = getContainer(sizeRequested);
    
-      //if enough, turn extra into new elem & insert it
-   if( amountExtra > 64 )
-    {   //make new elem by adding to addr of curr elem then casting
-        sizeConsumed = sizeof(MallocProlog) + sizeRequested; 
-        newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
-        newElem->nextLowerInMem    = foundElem; //This is evil (but why?) 
-        newElem->nextHigherInMem   = foundElem->nextHigherInMem; //This is evil (but why?)
-        foundElem->nextHigherInMem = newElem;
-        if( ! foundElemIsTopOfHeap )
-        {  //there is no next higher for top of heap, so can't write to it
-           newElem->nextHigherInMem->nextLowerInMem = newElem;
-        }
-        add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
-    }
+   if(freeLists->bigChunks[containerIdx] == NULL)
+       foundChunk = searchChunk(freeLists, sizeRequested, containerIdx); 
    else
-    {
-      sizeConsumed = sizeOfFound;
-    }
-  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
-
+       foundChunk = removeChunk(freeLists, containerIdx); 
+   
+   //Mark as allocated
+   foundChunk->prevChunkInFreeList = NULL;      
+   
    //============================= MEASUREMENT STUFF ========================
    #ifdef MEAS__TIME_MALLOC
    saveLowTimeStampCountInto( endStamp );
    addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
    #endif
    //========================================================================
-
-      //skip over the prolog by adding its size to the pointer return
-   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
+   
+   //skip over the prolog by adding its size to the pointer return
+   return foundChunk + 1;
  }
 
-/*This is sequential code, meant to only be called from the Master, not from
- * any slave VPs.
- *Search down list, checking size by the nextHigherInMem pointer, to find
- * first chunk bigger than size needed.
- *Shave off the extra and make it into a new free-list element, hook it in
- * then return the address of the found element plus size of prolog.
- *
- * The difference to the regular malloc is, that all the allocated chunks are
- * aligned and padded to the size of a CACHE_LINE_SZ. Thus creating a new chunk
- * before the aligned chunk.
- */
-void *VMS_int__malloc_aligned( size_t sizeRequested )
- { MallocProlog *foundElem = NULL, *currElem, *newElem;
-   ssize_t        amountExtra, sizeConsumed,sizeOfFound,prevAmount;
-   uint32        foundElemIsTopOfHeap;
-
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef MEAS__TIME_MALLOC
-   uint32 startStamp, endStamp;
-   saveLowTimeStampCountInto( startStamp );
-   #endif
-   //========================================================================
-   
-      //step up the size to be multiple of the cache line size
-   sizeRequested = (sizeRequested + CACHE_LINE_SZ) & ~(CACHE_LINE_SZ-1);
-   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
-
-   while( currElem != NULL )
-    {    //check if size of currElem is big enough
-      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
-      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
-      if( amountExtra > 0 )
-       {    
-         //look if the found element is already aligned
-         if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE_SZ-1)) == 0){
-             //found it, get out of loop
-             foundElem = currElem;
-             break;
-         }else{
-             //find first aligned address and check if it's still big enough
-             //check also if the space before the aligned address is big enough
-             //for a new element
-             void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE_SZ) & ~((uintptr_t)(CACHE_LINE_SZ-1)));
-             prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem;
-             sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog);
-             amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog);
-             if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){
-                 //found suitable element
-                 //create new previous element and exit loop
-                 MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1;
-                 
-                 //insert new element into free list
-                 if(currElem->nextChunkInFreeList != NULL)
-                     currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem;                     
-                 newAlignedElem->prevChunkInFreeList = currElem;
-                 newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList;
-                 currElem->nextChunkInFreeList = newAlignedElem;
-                 
-                 //set higherInMem and lowerInMem
-                 newAlignedElem->nextHigherInMem = currElem->nextHigherInMem;
-                 foundElemIsTopOfHeap = currElem->nextHigherInMem ==
-                          _VMSMasterEnv->freeListHead->nextHigherInMem;
-                 if(!foundElemIsTopOfHeap)
-                     currElem->nextHigherInMem->nextLowerInMem = newAlignedElem;
-                 currElem->nextHigherInMem = newAlignedElem;
-                 newAlignedElem->nextLowerInMem = currElem;
-                 
-                 //Found new element leaving loop
-                 foundElem = newAlignedElem;
-                 break;
-             }
-         }
-         
-       }
-       currElem = currElem->nextChunkInFreeList;
-    }
-
-   if( foundElem == NULL )
-    { ERROR("\nmalloc failed\n")
-      return (void *)NULL;  //indicates malloc failed
-    }
-      //Using a kludge to identify the element that is the top chunk in the
-      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
-      // save addr of start of heap in head's nextLowerInMem
-      //Will handle top of Heap specially
-   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
-                          _VMSMasterEnv->freeListHead->nextHigherInMem;
-
-      //before shave off and try to insert new elem, remove found elem
-      //note, foundElem will never be the head, so always has valid prevChunk
-   foundElem->prevChunkInFreeList->nextChunkInFreeList =
-                                              foundElem->nextChunkInFreeList;
-   if( foundElem->nextChunkInFreeList != NULL )
-    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
-                                              foundElem->prevChunkInFreeList;
-    }
-   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
-   
-      //if enough, turn extra into new elem & insert it
-   if( amountExtra > 64 )
-    {    //make new elem by adding to addr of curr elem then casting
-      sizeConsumed = sizeof(MallocProlog) + sizeRequested;
-      newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
-      newElem->nextHigherInMem   = foundElem->nextHigherInMem;
-      newElem->nextLowerInMem    = foundElem;
-      foundElem->nextHigherInMem = newElem;
-      
-      if( ! foundElemIsTopOfHeap )
-       {    //there is no next higher for top of heap, so can't write to it
-         newElem->nextHigherInMem->nextLowerInMem = newElem;
-       }
-      add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
-    }
-   else
-    {
-      sizeConsumed = sizeOfFound;
-    }
-  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
-
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef MEAS__TIME_MALLOC
-   saveLowTimeStampCountInto( endStamp );
-   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
-   #endif
-   //========================================================================
-
-      //skip over the prolog by adding its size to the pointer return
-   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
- }
-
-
-/*This is sequential code -- only to be called from the Master
- * When free, subtract the size of prolog from pointer, then cast it to a
- * MallocProlog.  Then check the nextLower and nextHigher chunks to see if
- * one or both are also free, and coalesce if so, and if neither free, then
- * add this one to free-list.
+/*
+ * This is sequential code, meant to only be called from the Master, not from
+ * any slave Slvs.
  */
 void
 VMS_int__free( void *ptrToFree )
- { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem;
-   size_t         sizeOfElem;
-   uint32         lowerExistsAndIsFree, higherExistsAndIsFree;
-
+ {
+    
    //============================= MEASUREMENT STUFF ========================
    #ifdef MEAS__TIME_MALLOC
    int32 startStamp, endStamp;
    saveLowTimeStampCountInto( startStamp );
    #endif
    //========================================================================
-
-   if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem ||
-       ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem )
-    {    //outside the range of data owned by VMS's malloc, so do nothing
-      return;
-    }
-      //subtract size of prolog to get pointer to prolog, then cast
-   elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog));
-   sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree);
-
-   if( elemToFree->prevChunkInFreeList != NULL )
-    { printf( "error: freeing same element twice!" ); exit(1);
-    }
-
-   _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem;
-
-   nextLowerElem  = elemToFree->nextLowerInMem;
-   nextHigherElem = elemToFree->nextHigherInMem;
-
-   if( nextHigherElem == NULL )
-      higherExistsAndIsFree = FALSE;
-   else //okay exists, now check if in the free-list by checking back ptr
-      higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL);
-    
-   if( nextLowerElem == NULL )
-      lowerExistsAndIsFree = FALSE;
-   else //okay, it exists, now check if it's free
-      lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL);
-    
-
-      //now, know what exists and what's free
-   if( lowerExistsAndIsFree )
-    { if( higherExistsAndIsFree )
-       {    //both exist and are free, so coalesce all three
-            //First, remove higher from free-list
-         nextHigherElem->prevChunkInFreeList->nextChunkInFreeList =
-                                         nextHigherElem->nextChunkInFreeList;
-         if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list?
-            nextHigherElem->nextChunkInFreeList->prevChunkInFreeList =
-                                         nextHigherElem->prevChunkInFreeList;
-            //Now, fix-up sequence-in-mem list -- by side-effect, this also
-            // changes size of the lower elem, which is still in free-list
-         nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem;
-         if( nextHigherElem->nextHigherInMem !=
-             _VMSMasterEnv->freeListHead->nextHigherInMem )
-            nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem;
-            //notice didn't do anything to elemToFree -- it simply is no
-            // longer reachable from any of the lists.  Wonder if could be a
-            // security leak because left valid addresses in it,
-            // but don't care for now.
+   
+   MallocArrays* freeLists = _VMSMasterEnv->freeLists;
+   MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1;
+   uint32 containerIdx;
+   
+   //Check for free neighbors
+   if(chunkToFree->nextLowerInMem)
+   {
+       if(chunkToFree->nextLowerInMem->prevChunkInFreeList != NULL)
+       {//Chunk is not allocated
+           extractChunk(chunkToFree->nextLowerInMem, freeLists);
+           chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree);
        }
-      else
-       {    //lower is the only of the two that exists and is free,
-            //In this case, no adjustment to free-list, just change mem-list.
-            // By side-effect, changes size of the lower elem
-         nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem;
-         if( elemToFree->nextHigherInMem !=
-             _VMSMasterEnv->freeListHead->nextHigherInMem )
-            elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem;
+   }
+   if(chunkToFree->nextHigherInMem)
+   {
+       if(chunkToFree->nextHigherInMem->prevChunkInFreeList != NULL)
+       {//Chunk is not allocated
+           extractChunk(chunkToFree->nextHigherInMem, freeLists);
+           chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem);
        }
-    }
+   }
+   
+   size_t chunkSize = getChunkSize(chunkToFree);
+   if(chunkSize < BIG_LOWER_BOUND)
+   {
+       containerIdx =  (chunkSize/SMALL_CHUNK_SIZE)-1;
+       if(containerIdx > SMALL_CHUNK_COUNT-1)
+           containerIdx = SMALL_CHUNK_COUNT-1;
+       insertChunk(chunkToFree, &freeLists->smallChunks[containerIdx]);
+   }
    else
-    {    //lower either doesn't exist or isn't free, so check higher
-      if( higherExistsAndIsFree )
-       {    //higher exists and is the only of the two free
-            //First, in free-list, replace higher elem with the one to free
-         elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList;
-         elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList;
-         elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree;
-         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
-            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
-            //Now chg mem-list. By side-effect, changes size of elemToFree
-         elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem;
-         if( elemToFree->nextHigherInMem !=
-             _VMSMasterEnv->freeListHead->nextHigherInMem )
-            elemToFree->nextHigherInMem->nextLowerInMem = elemToFree;
-       }
-      else
-       {    //neither lower nor higher is availabe to coalesce so add to list
-            // this makes prev chunk ptr non-null, which indicates it's free
-         elemToFree->nextChunkInFreeList =
-                            _VMSMasterEnv->freeListHead->nextChunkInFreeList;
-         _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree;
-         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
-            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
-         elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead;
-       }
-    }
+   {
+       containerIdx = getContainer(getChunkSize(chunkToFree)) - 1;
+       insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]);
+       if(containerIdx < 64)
+           freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx;
+       else
+           freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64);
+   }   
+   
    //============================= MEASUREMENT STUFF ========================
    #ifdef MEAS__TIME_MALLOC
    saveLowTimeStampCountInto( endStamp );
@@ -373,82 +316,31 @@
 
  }
 
-
-/*Allocates memory from the external system -- higher overhead
- *
- *Because of Linux's malloc throwing bizarre random faults when malloc is
- * used inside a VMS virtual processor, have to pass this as a request and
- * have the core loop do it when it gets around to it -- will look for these
- * chores leftover from the previous animation of masterVP the next time it
- * goes to animate the masterVP -- so it takes two separate masterVP
- * animations, separated by work, to complete an external malloc or
- * external free request.
- *
- *Thinking core loop accepts signals -- just looks if signal-location is
- * empty or not --
+/*
+ * Designed to be called from the main thread outside of VMS, during init
  */
-void *
-VMS__malloc_in_ext( size_t sizeRequested )
- {
- /*
-      //This is running in the master, so no chance for multiple cores to be
-      // competing for the core's flag.
-   if(  *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 )
-    {    //something has already signalled to core loop, so save the signal
-         // and look, next time master animated, to see if can send it.
-         //Note, the addr to put a signal is in the coreloop's frame, so just
-         // checks it each time through -- make it volatile to avoid GCC
-         // optimizations -- it's a coreloop local var that only changes
-         // after jumping away.  The signal includes the addr to send the
-         //return to -- even if just empty return completion-signal
-         //
-         //save the signal in some queue that the master looks at each time
-         // it starts up -- one loc says if empty for fast common case --
-         //something like that -- want to hide this inside this call -- but
-         // think this has to come as a request -- req handler gives procr
-         // back to master loop, which gives it back to req handler at point
-         // it sees that core loop has sent return signal.  Something like
-         // that.
-      saveTheSignal
-
-    }
-  coreSigData->type = malloc;
-  coreSigData->sizeToMalloc = sizeRequested;
-  coreSigData->locToSignalCompletion = &figureOut;
-   _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData;
-  */
-      //just risk system-stack faults until get this figured out
-   return malloc( sizeRequested );
- }
-
-
-/*Frees memory that was allocated in the external system -- higher overhead
- *
- *As noted in external malloc comment, this is clunky 'cause the free has
- * to be called in the core loop.
- */
-void
-VMS__free_in_ext( void *ptrToFree )
- {
-      //just risk system-stack faults until get this figured out
-   free( ptrToFree );
-
-      //TODO: fix this -- so 
- }
-
-
-/*Designed to be called from the main thread outside of VMS, during init
- */
-MallocProlog *
+MallocArrays *
 VMS_ext__create_free_list()
- { MallocProlog *freeListHead, *firstChunk;
-
-      //Note, this is running in the main thread -- all increases in malloc
-      // mem and all frees of it must be done in this thread, with the
-      // thread's original stack available
-   freeListHead = malloc( sizeof(MallocProlog) );
-   firstChunk   = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
-   if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);}
+{     
+   //Initialize containers for small chunks and fill with zeros
+   _VMSMasterEnv->freeLists = (MallocArrays*)malloc( sizeof(MallocArrays) );
+   MallocArrays *freeLists = _VMSMasterEnv->freeLists;
+   
+   freeLists->smallChunks = 
+           (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
+   memset((void*)freeLists->smallChunks,
+           0,SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
+   
+   //Calculate number of containers for big chunks
+   uint32 container = getContainer(MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE)+1;
+   freeLists->bigChunks = (MallocProlog**)malloc(container*sizeof(MallocProlog*));
+   memset((void*)freeLists->bigChunks,0,container*sizeof(MallocProlog*));
+   freeLists->containerCount = container;
+   
+   //Create first element in lastContainer 
+   MallocProlog *firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
+   if( firstChunk == NULL ) {printf("Can't allocate initial memory\n"); exit(1);}
+   freeLists->memSpace = firstChunk;
    
    //Touch memory to avoid page faults
    void *ptr,*endPtr; 
@@ -457,38 +349,47 @@
    {
        *(char*)ptr = 0;
    }
-
-   freeListHead->prevChunkInFreeList = NULL;
-      //Use this addr to free the heap when cleanup
-   freeListHead->nextLowerInMem      = firstChunk;
-      //to identify top-of-heap elem, compare this addr to elem's next higher
-   freeListHead->nextHigherInMem     = (void*)( (uintptr_t)firstChunk +
-                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
-   freeListHead->nextChunkInFreeList = firstChunk;
-
-   firstChunk->nextChunkInFreeList   = NULL;
-   firstChunk->prevChunkInFreeList   = freeListHead;
-      //next Higher has to be set to top of chunk, so can calc size in malloc
-   firstChunk->nextHigherInMem       = (void*)( (uintptr_t)firstChunk +
-                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
-   firstChunk->nextLowerInMem        = NULL; //identifies as bott of heap
    
-   _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet
-
-   return freeListHead;
+   firstChunk->nextLowerInMem = NULL;
+   firstChunk->nextHigherInMem = (MallocProlog*)((uintptr_t)firstChunk +
+                        MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog));
+   firstChunk->nextChunkInFreeList = NULL;
+   //previous element in the queue is the container
+   firstChunk->prevChunkInFreeList = &freeLists->bigChunks[container-2];
+   
+   freeLists->bigChunks[container-2] = firstChunk;
+   //Insert into bit search list
+   if(container <= 65)
+   {
+       freeLists->bigChunksSearchVector[0] = ((uint64)1 << (container-2));
+       freeLists->bigChunksSearchVector[1] = 0;
+   }   
+   else
+   {
+       freeLists->bigChunksSearchVector[0] = 0;
+       freeLists->bigChunksSearchVector[1] = ((uint64)1 << (container-66));
+   }
+   
+   //Create dummy chunk to mark the top of stack this is of course
+   //never freed
+   MallocProlog *dummyChunk = firstChunk->nextHigherInMem;
+   dummyChunk->nextHigherInMem = dummyChunk+1;
+   dummyChunk->nextLowerInMem  = NULL;
+   dummyChunk->nextChunkInFreeList = NULL;
+   dummyChunk->prevChunkInFreeList = NULL;
+   
+   return freeLists;
  }
 
 
 /*Designed to be called from the main thread outside of VMS, during cleanup
  */
 void
-VMS_ext__free_free_list( MallocProlog *freeListHead )
+VMS_ext__free_free_list( MallocArrays *freeLists )
  {    
-      //stashed a ptr to the one and only bug chunk malloc'd from OS in the
-      // free list head's next lower in mem pointer
-   free( freeListHead->nextLowerInMem );
-
-   //don't free the head -- it'll be in an array eventually -- free whole
-   // array when all the free lists linked from it have already been freed
+   free(freeLists->memSpace);
+   free(freeLists->bigChunks);
+   free(freeLists->smallChunks);
+   
  }
 
diff -r eaf7e4c58c9e -r 0c83ea8adefc vmalloc.h
--- a/vmalloc.h	Wed Feb 22 11:39:12 2012 -0800
+++ b/vmalloc.h	Sun Mar 04 14:26:35 2012 -0800
@@ -14,6 +14,14 @@
 #include <inttypes.h>
 #include "VMS_primitive_data_types.h"
 
+#define SMALL_CHUNK_SIZE 32
+#define SMALL_CHUNK_COUNT 4
+#define LOWER_BOUND     128  //Biggest chunk size that is created for the small chunks
+#define BIG_LOWER_BOUND 160  //Smallest chunk size that is created for the big chunks
+
+#define LOG54 0.3219280948873623
+#define LOG128 7
+
 typedef struct _MallocProlog MallocProlog;
 
 struct _MallocProlog
@@ -24,6 +32,18 @@
    MallocProlog *nextLowerInMem;
  };
 //MallocProlog
+ 
+ typedef struct MallocArrays MallocArrays;
+
+ struct MallocArrays
+ {
+     MallocProlog **smallChunks;
+     MallocProlog **bigChunks;
+     uint64       bigChunksSearchVector[2];
+     void         *memSpace;
+     uint32       containerCount;
+ };
+ //MallocArrays
 
 typedef struct
  {
@@ -34,57 +54,38 @@
 
 void *
 VMS_int__malloc( size_t sizeRequested );
+#define VMS_PI__malloc  VMS_int__malloc
+#define VMS_WL__malloc  VMS_int__malloc /*TODO: Bug -- Not protected!! */
+#define VMS_App__malloc VMS_int__malloc /*TODO: Bug -- Not protected!! */
 
 void *
 VMS_int__malloc_aligned( size_t sizeRequested );
+#define VMS_PI__malloc_aligned VMS_int__malloc_aligned
+#define VMS_WL__malloc_aligned VMS_int__malloc_aligned
 
 void
 VMS_int__free( void *ptrToFree );
+#define VMS_PI__free  VMS_int__free
+#define VMS_WL__free  VMS_int__free /*TODO: Bug -- Not protected!! */
+#define VMS_App__free VMS_int__free /*TODO: Bug -- Not protected!! */
 
-#define VMS_PI__malloc VMS_int__malloc
-#define VMS_PI__malloc_aligned VMS_int__malloc_aligned
-#define VMS_PI__free VMS_int__free
-/* For now, the PI is protected by master lock, so int malloc fine
-void *
-VMS_PI__malloc( size_t sizeRequested );
 
-void *
-VMS_PI__malloc_aligned( size_t sizeRequested );
-
-void
-VMS_PI__free( void *ptrToFree );
-*/
-
-//TODO: protect WL malloc from concurrency!! shared freelist can be corrupted
-#define VMS_WL__malloc VMS_int__malloc
-#define VMS_WL__malloc_aligned VMS_int__malloc_aligned
-#define VMS_WL__free VMS_int__free
-/*
-void *
-VMS_WL__malloc( size_t sizeRequested );
-
-void *
-VMS_WL__malloc_aligned( size_t sizeRequested );
-
-void
-VMS_WL__free( void *ptrToFree );
-*/
 
 /*Allocates memory from the external system -- higher overhead
  */
 void *
-VMS__malloc_in_ext( size_t sizeRequested );
+VMS_ext__malloc_in_ext( size_t sizeRequested );
 
 /*Frees memory that was allocated in the external system -- higher overhead
  */
 void
-VMS__free_in_ext( void *ptrToFree );
+VMS_ext__free_in_ext( void *ptrToFree );
 
 
-MallocProlog *
+MallocArrays *
 VMS_ext__create_free_list();
 
 void
-VMS_ext__free_free_list( MallocProlog *freeListHead );
+VMS_ext__free_free_list(MallocArrays *freeLists );
 
 #endif
\ No newline at end of file
diff -r eaf7e4c58c9e -r 0c83ea8adefc vutilities.h
--- a/vutilities.h	Wed Feb 22 11:39:12 2012 -0800
+++ b/vutilities.h	Sun Mar 04 14:26:35 2012 -0800
@@ -8,8 +8,8 @@
  */
 
 
-#ifndef  _UTILITIES_H
-#define	_UTILITIES_H
+#ifndef  _VUTILITIES_H
+#define	_VUTILITIES_H
 
 #include <string.h>
 #include "VMS_primitive_data_types.h"