# HG changeset patch
# User Me@portablequad
# Date 1329025645 28800
# Node ID 651ee45615ae3692f7b0e077bd81c8f6ab0eade9
# Parent  7cff4e13d5c423299ec9e8ea2d687a90f75f8a50
made  default  brch deprecated (see README)

diff -r 7cff4e13d5c4 -r 651ee45615ae .hgignore
--- a/.hgignore	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-syntax: glob
-
-*.o
diff -r 7cff4e13d5c4 -r 651ee45615ae .hgtags
--- a/.hgtags	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-9c3107044f86c36fea3a8f72f64910b1363555be Dec27_2010_about_to_add_sched_record
diff -r 7cff4e13d5c4 -r 651ee45615ae CoreLoop.c
--- a/CoreLoop.c	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,215 +0,0 @@
-/*
- * Copyright 2010  OpenSourceStewardshipFoundation
- *
- * Licensed under BSD
- */
-
-
-#include "VMS.h"
-#include "Queue_impl/BlockingQueue.h"
-#include "ProcrContext.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-
-#include <pthread.h>
-#include <sched.h>
-
-void *terminateCoreLoop(VirtProcr *currPr);
-
-/*This is the loop that runs in the OS Thread pinned to each core
- *Get virt procr from queue,
- * save state of current animator, then load in state of virt procr, using
- * jmp instr to switch the program-counter state -- making the virt procr
- * the new animator.
- *At some point, the virt procr will suspend itself by saving out its
- * animator state (stack ptr, frame ptr, program counter) and switching
- * back to the OS Thread's animator state, which means restoring the
- * stack and frame and jumping to the core loop start point.
- *This cycle then repeats, until a special shutdown virtual processor is
- * animated, which jumps to the end point at the bottom of core loop.
- */
-void *
-coreLoop( void *paramsIn )
- { 
-   ThdParams      *coreLoopThdParams;
-   int             thisCoresIdx;
-   VirtProcr      *currPr;
-   VMSQueueStruc *readyToAnimateQ;
-   cpu_set_t   coreMask;  //has 1 in bit positions of allowed cores
-   int             errorCode;
-
-      //work-stealing struc on stack to prevent false-sharing in cache-line
-   volatile GateStruc gate;
-   //preGateProgress, waitProgress, exitProgress, gateClosed;
-
-
-   coreLoopThdParams = (ThdParams *)paramsIn;
-   thisCoresIdx = coreLoopThdParams->coreNum;
-
-   gate.gateClosed      = FALSE;
-   gate.preGateProgress = 0;
-   gate.waitProgress    = 0;
-   gate.exitProgress    = 0;
-   _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup
-
-      //wait until signalled that setup is complete
-   pthread_mutex_lock(   &suspendLock );
-   while( !(_VMSMasterEnv->setupComplete) )
-    {
-      pthread_cond_wait( &suspend_cond,
-                         &suspendLock );
-    }
-   pthread_mutex_unlock( &suspendLock );
-
-      //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum );
-
-      //set thread affinity
-      //Linux requires pinning thd to core inside thread-function
-      //Designate a core by a 1 in bit-position corresponding to the core
-   CPU_ZERO(&coreMask);
-   CPU_SET(coreLoopThdParams->coreNum,&coreMask);
-   //coreMask = 1L << coreLoopThdParams->coreNum;
-
-   pthread_t selfThd = pthread_self();
-   errorCode =
-   pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
-   
-   if(errorCode){ printf("\nset affinity failure\n"); exit(0); }
-
-   
-   //Save the return address in the SwitchVP function
-   saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt));
-
-   
-   while(1){
-   
-      //Get virtual processor from queue
-      //The Q must be a global, static volatile var, so not kept in reg,
-      // which forces reloading the pointer after each jmp to this point
-   readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
-
-   #ifdef USE_WORK_STEALING
-      //Alg for work-stealing designed to make common case fast.  Comment
-      // in stealer code explains.
-   gate.preGateProgress++;
-   if( gate.gateClosed )
-    {    //now, set coreloop's progress, so stealer can see that core loop
-         // has made it into the waiting area.
-      gate.waitProgress = gate.preGateProgress;
-      while( gate.gateClosed ) /*busy wait*/;
-    }
-
-   currPr = (VirtProcr *) readVMSQ( readyToAnimateQ );
-
-      //Set the coreloop's progress, so stealer can see it has made it out
-      // of the protected area
-   gate.exitProgress = gate.preGateProgress;
-   #else
-   currPr = (VirtProcr *) readVMSQ( readyToAnimateQ );
-   #endif
-
-   if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
-   else
-    {
-      //============================= MEASUREMENT STUFF =====================
-      #ifdef MEAS__TIME_MASTER_LOCK
-      int32 startStamp, endStamp;
-      saveLowTimeStampCountInto( startStamp );
-      #endif
-      //=====================================================================
-      int tries = 0; int gotLock = 0;
-      while( currPr == NULL ) //if queue was empty, enter get masterLock loop
-       {    //queue was empty, so get master lock
-
-         gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock),
-                                                          UNLOCKED, LOCKED );
-         if( gotLock )
-          {    //run own MasterVP -- jmps to coreLoops startPt when done
-            currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
-            if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
-             {       DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n");
-               pthread_yield();
-             }
-            _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
-            break;  //end while -- have a VP to animate now
-          }
-
-         tries++;      //if too many, means master on other core taking too long
-         if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); }
-       }
-      //============================= MEASUREMENT STUFF =====================
-      #ifdef MEAS__TIME_MASTER_LOCK
-      saveLowTimeStampCountInto( endStamp );
-      addIntervalToHist( startStamp, endStamp,
-                         _VMSMasterEnv->masterLockLowTimeHist );
-      addIntervalToHist( startStamp, endStamp,
-                         _VMSMasterEnv->masterLockHighTimeHist );
-      #endif
-      //=====================================================================
-
-    }
-
-   
-   switchToVP(currPr); //The VPs return in here
-   flushRegisters();
-   }//CoreLoop      
- }
-
-
-void *
-terminateCoreLoop(VirtProcr *currPr){
-   //first free shutdown VP that jumped here -- it first restores the
-   // coreloop's stack, so addr of currPr in stack frame is still correct
-   VMS__dissipate_procr( currPr );
-   pthread_exit( NULL );
-}
-
-
-
-#ifdef SEQUENTIAL
-
-//===========================================================================
-/*This sequential version is exact same as threaded, except doesn't do the
- * pin-threads part, nor the wait until setup complete part.
- */
-void *
-coreLoop_Seq( void *paramsIn )
- {
-   VirtProcr      *currPr;
-   VMSQueueStruc *readyToAnimateQ;
-   
-   ThdParams      *coreLoopThdParams;
-   int             thisCoresIdx;
-   
-   coreLoopThdParams = (ThdParams *)paramsIn;
-//   thisCoresIdx = coreLoopThdParams->coreNum;
-   thisCoresIdx = 0;
-
-   //Save the return address in the SwitchVP function
-   saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt));
-
-   
-   while(1){
-      //Get virtual processor from queue
-      //_VMSWorkQ must be a global, static volatile var, so not kept in reg,
-      // which forces reloading the pointer after each jmp to this point
-   readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
-   currPr = (VirtProcr *) readVMSQ( readyToAnimateQ );
-   if( currPr == NULL )
-    { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
-       { printf("too many back to back MasterVP\n"); exit(1); }
-      _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
-      
-      currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
-    }
-   else
-      _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
-
-
-   switchToVP( currPr );
-   flushRegisters();
-   }
- }
-#endif
diff -r 7cff4e13d5c4 -r 651ee45615ae MasterLoop.c
--- a/MasterLoop.c	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,373 +0,0 @@
-/*
- * Copyright 2010  OpenSourceStewardshipFoundation
- * 
- * Licensed under BSD
- */
-
-
-
-#include <stdio.h>
-#include <stddef.h>
-
-#include "VMS.h"
-#include "ProcrContext.h"
-
-
-//===========================================================================
-void inline
-stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
-               VirtProcr *masterPr );
-
-//===========================================================================
-
-
-
-/*This code is animated by the virtual Master processor.
- *
- *Polls each sched slot exactly once, hands any requests made by a newly
- * done slave to the "request handler" plug-in function
- *
- *Any slots that need a virt procr assigned are given to the "schedule"
- * plug-in function, which tries to assign a virt procr (slave) to it.
- *
- *When all slots needing a processor have been given to the schedule plug-in,
- * a fraction of the procrs successfully scheduled are put into the
- * work queue, then a continuation of this function is put in, then the rest
- * of the virt procrs that were successfully scheduled.
- *
- *The first thing the continuation does is busy-wait until the previous
- * animation completes.  This is because an (unlikely) continuation may
- * sneak through queue before previous continuation is done putting second
- * part of scheduled slaves in, which is the only race condition.
- *
- */
-
-/*May 29, 2010 -- birth a Master during init so that first core loop to
- * start running gets it and does all the stuff for a newly born --
- * from then on, will be doing continuation, but do suspension self
- * directly at end of master loop
- *So VMS__init just births the master virtual processor same way it births
- * all the others -- then does any extra setup needed and puts it into the
- * work queue.
- *However means have to make masterEnv a global static volatile the same way
- * did with readyToAnimateQ in core loop.  -- for performance, put the
- * jump to the core loop directly in here, and have it directly jump back.
- *
- *
- *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this
- * avoids the suspected bug in the system stack that causes bizarre faults
- * at random places in the system code.
- *
- *So, this function is coupled to each of the MasterVPs, -- meaning this
- * function can't rely on a particular stack and frame -- each MasterVP that
- * animates this function has a different one.
- *
- *At this point, the masterLoop does not write itself into the queue anymore,
- * instead, the coreLoop acquires the masterLock when it has nothing to
- * animate, and then animates its own masterLoop.  However, still try to put
- * several AppVPs into the queue to amortize the startup cost of switching
- * to the MasterVP.  Note, don't have to worry about latency of requests much
- * because most requests generate work for same core -- only latency issue
- * is case when other cores starved and one core's requests generate work
- * for them -- so keep max in queue to 3 or 4..
- */
-void masterLoop( void *initData, VirtProcr *animatingPr )
- { 
-   int32           slotIdx, numSlotsFilled;
-   VirtProcr      *schedVirtPr;
-   SchedSlot      *currSlot, **schedSlots;
-   MasterEnv      *masterEnv;
-   VMSQueueStruc  *readyToAnimateQ;
-   
-   SlaveScheduler  slaveScheduler;
-   RequestHandler  requestHandler;
-   void           *semanticEnv;
-
-   int32           thisCoresIdx;
-   VirtProcr      *masterPr;
-   volatile        VirtProcr *volatileMasterPr;
-   
-   volatileMasterPr = animatingPr;
-   masterPr         = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp
-
-      //First animation of each MasterVP will in turn animate this part
-      // of setup code.. (VP creator sets up the stack as if this function
-      // was called normally, but actually get here by jmp)
-      //So, setup values about stack ptr, jmp pt and all that
-   //masterPr->nextInstrPt = &&masterLoopStartPt;
-
-
-      //Note, got rid of writing the stack and frame ptr up here, because
-      // only one
-      // core can ever animate a given MasterVP, so don't need to communicate
-      // new frame and stack ptr to the MasterVP storage before a second
-      // version of that MasterVP can get animated on a different core.
-      //Also got rid of the busy-wait.
-
-   
-   //masterLoopStartPt:
-   while(1){
-       
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef MEAS__TIME_MASTER
-      //Total Master time includes one coreloop time -- just assume the core
-      // loop time is same for Master as for AppVPs, even though it may be
-      // smaller due to higher predictability of the fixed jmp.
-   saveLowTimeStampCountInto( masterPr->startMasterTSCLow );
-   #endif
-   //========================================================================
-
-   masterEnv        = (MasterEnv*)_VMSMasterEnv;
-   
-      //GCC may optimize so doesn't always re-define from frame-storage
-   masterPr         = (VirtProcr*)volatileMasterPr;  //just to make sure after jmp
-   thisCoresIdx     = masterPr->coreAnimatedBy;
-   readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
-   schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
-
-   requestHandler   = masterEnv->requestHandler;
-   slaveScheduler   = masterEnv->slaveScheduler;
-   semanticEnv      = masterEnv->semanticEnv;
-
-
-      //Poll each slot's Done flag
-   numSlotsFilled = 0;
-   for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
-    {
-      currSlot = schedSlots[ slotIdx ];
-
-      if( currSlot->workIsDone )
-       {
-         currSlot->workIsDone         = FALSE;
-         currSlot->needsProcrAssigned = TRUE;
-
-            //process requests from slave to master
-               //====================== MEASUREMENT STUFF ===================
-               #ifdef MEAS__TIME_PLUGIN
-               int32 startStamp1, endStamp1;
-               saveLowTimeStampCountInto( startStamp1 );
-               #endif
-               //============================================================
-         (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv );
-               //====================== MEASUREMENT STUFF ===================
-               #ifdef MEAS__TIME_PLUGIN
-               saveLowTimeStampCountInto( endStamp1 );
-               addIntervalToHist( startStamp1, endStamp1,
-                                        _VMSMasterEnv->reqHdlrLowTimeHist );
-               addIntervalToHist( startStamp1, endStamp1,
-                                        _VMSMasterEnv->reqHdlrHighTimeHist );
-               #endif
-               //============================================================
-       }
-      if( currSlot->needsProcrAssigned )
-       {    //give slot a new virt procr
-         schedVirtPr =
-          (*slaveScheduler)( semanticEnv, thisCoresIdx );
-         
-         if( schedVirtPr != NULL )
-          { currSlot->procrAssignedToSlot = schedVirtPr;
-            schedVirtPr->schedSlot        = currSlot;
-            currSlot->needsProcrAssigned  = FALSE;
-            numSlotsFilled               += 1;
-            
-            writeVMSQ( schedVirtPr, readyToAnimateQ );
-          }
-       }
-    }
-
-   
-   #ifdef USE_WORK_STEALING
-      //If no slots filled, means no more work, look for work to steal.
-   if( numSlotsFilled == 0 )
-    { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr );
-    }
-   #endif
-
-   
-   #ifdef MEAS__TIME_MASTER
-   saveLowTimeStampCountInto( masterPr->endMasterTSCLow );
-   #endif
-
-   masterSwitchToCoreLoop(animatingPr);
-   flushRegisters();
-   }//MasterLoop
-
-
- }
-
-
-
-/*This has a race condition -- the coreloops are accessing their own queues
- * at the same time that this work-stealer on a different core is trying to
- */
-void inline
-stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
-               VirtProcr *masterPr )
- { 
-   VirtProcr   *stolenPr;
-   int32        coreIdx, i;
-   VMSQueueStruc *currQ;
-
-   stolenPr = NULL;
-   coreIdx = masterPr->coreAnimatedBy;
-   for( i = 0; i < NUM_CORES -1; i++ )
-    {
-      if( coreIdx >= NUM_CORES -1 )
-       { coreIdx = 0;
-       }
-      else
-       { coreIdx++;
-       }
-      currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
-      if( numInVMSQ( currQ ) > 0 )
-       { stolenPr = readVMSQ (currQ );
-         break;
-       }
-    }
-
-   if( stolenPr != NULL )
-    { currSlot->procrAssignedToSlot = stolenPr;
-      stolenPr->schedSlot           = currSlot;
-      currSlot->needsProcrAssigned  = FALSE;
-
-      writeVMSQ( stolenPr, readyToAnimateQ );
-    }
- }
-
-/*This algorithm makes the common case fast.  Make the coreloop passive,
- * and show its progress.  Make the stealer control a gate that coreloop
- * has to pass.
- *To avoid interference, only one stealer at a time.  Use a global
- * stealer-lock.
- *
- *The pattern is based on a gate -- stealer shuts the gate, then monitors
- * to be sure any already past make it all the way out, before starting.
- *So, have a "progress" measure just before the gate, then have two after it,
- * one is in a "waiting room" outside the gate, the other is at the exit.
- *Then, the stealer first shuts the gate, then checks the progress measure
- * outside it, then looks to see if the progress measure at the exit is the
- * same.  If yes, it knows the protected area is empty 'cause no other way
- * to get in and the last to get in also exited.
- *If the progress measure at the exit is not the same, then the stealer goes
- * into a loop checking both the waiting-area and the exit progress-measures
- * until one of them shows the same as the measure outside the gate.  Might
- * as well re-read the measure outside the gate each go around, just to be
- * sure.  It is guaranteed that one of the two will eventually match the one
- * outside the gate.
- *
- *Here's an informal proof of correctness:
- *The gate can be closed at any point, and have only four cases:
- *  1) coreloop made it past the gate-closing but not yet past the exit
- *  2) coreloop made it past the pre-gate progress update but not yet past
- *     the gate,
- *  3) coreloop is right before the pre-gate update
- *  4) coreloop is past the exit and far from the pre-gate update.
- *
- * Covering the cases in reverse order,
- *  4) is not a problem -- stealer will read pre-gate progress, see that it
- *     matches exit progress, and the gate is closed, so stealer can proceed.
- *  3) stealer will read pre-gate progress just after coreloop updates it..
- *     so stealer goes into a loop until the coreloop causes wait-progress
- *     to match pre-gate progress, so then stealer can proceed
- *  2) same as 3..
- *  1) stealer reads pre-gate progress, sees that it's different than exit,
- *     so goes into loop until exit matches pre-gate, now it knows coreloop
- *     is not in protected and cannot get back in, so can proceed.
- *
- *Implementation for the stealer:
- *
- *First, acquire the stealer lock -- only cores with no work to do will
- * compete to steal, so not a big performance penalty having only one --
- * will rarely have multiple stealers in a system with plenty of work -- and
- * in a system with little work, it doesn't matter.
- *
- *Note, have single-reader, single-writer pattern for all variables used to
- * communicate between stealer and victims
- *
- *So, scan the queues of the core loops, until find non-empty.  Each core
- * has its own list that it scans.  The list goes in order from closest to
- * furthest core, so it steals first from close cores.  Later can add
- * taking info from the app about overlapping footprints, and scan all the
- * others then choose work with the most footprint overlap with the contents
- * of this core's cache.
- *
- *Now, have a victim want to take work from.  So, shut the gate in that
- * coreloop, by setting the "gate closed" var on its stack to TRUE.
- *Then, read the core's pre-gate progress and compare to the core's exit
- * progress.
- *If same, can proceed to take work from the coreloop's queue.  When done,
- * write FALSE to gate closed var.
- *If different, then enter a loop that reads the pre-gate progress, then
- * compares to exit progress then to wait progress.  When one of two
- * matches, proceed.  Take work from the coreloop's queue.  When done,
- * write FALSE to the gate closed var.
- * 
- */
-void inline
-gateProtected_stealWorkInto( SchedSlot *currSlot,
-                             VMSQueueStruc *myReadyToAnimateQ,
-                             VirtProcr *masterPr )
- {
-   VirtProcr     *stolenPr;
-   int32          coreIdx, i, haveAVictim, gotLock;
-   VMSQueueStruc *victimsQ;
-
-   volatile GateStruc *vicGate;
-   int32               coreMightBeInProtected;
-
-
-
-      //see if any other cores have work available to steal
-   haveAVictim = FALSE;
-   coreIdx = masterPr->coreAnimatedBy;
-   for( i = 0; i < NUM_CORES -1; i++ )
-    {
-      if( coreIdx >= NUM_CORES -1 )
-       { coreIdx = 0;
-       }
-      else
-       { coreIdx++;
-       }
-      victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
-      if( numInVMSQ( victimsQ ) > 0 )
-       { haveAVictim = TRUE;
-         vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ];
-         break;
-       }
-    }
-   if( !haveAVictim ) return;  //no work to steal, exit
-
-      //have a victim core, now get the stealer-lock
-   gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock),
-                                                          UNLOCKED, LOCKED );
-   if( !gotLock ) return; //go back to core loop, which will re-start master
-
-
-   //====== Start Gate-protection =======
-   vicGate->gateClosed = TRUE;
-   coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress;
-   while( coreMightBeInProtected )
-    {    //wait until sure
-      if( vicGate->preGateProgress == vicGate->waitProgress )
-         coreMightBeInProtected = FALSE;
-      if( vicGate->preGateProgress == vicGate->exitProgress )
-         coreMightBeInProtected = FALSE;
-    }
-
-   stolenPr = readVMSQ ( victimsQ );
-
-   vicGate->gateClosed = FALSE;
-   //======= End Gate-protection  =======
-
-
-   if( stolenPr != NULL )  //victim could have been in protected and taken
-    { currSlot->procrAssignedToSlot = stolenPr;
-      stolenPr->schedSlot           = currSlot;
-      currSlot->needsProcrAssigned  = FALSE;
-
-      writeVMSQ( stolenPr, myReadyToAnimateQ );
-    }
-
-      //unlock the work stealing lock
-   _VMSMasterEnv->workStealingLock = UNLOCKED;
- }
diff -r 7cff4e13d5c4 -r 651ee45615ae ProcrContext.c
--- a/ProcrContext.c	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-/*
- * This File contains all hardware dependent C code.
- */
-
-
-#include "VMS.h"
-
-/*Create stack, then create __cdecl structure on it and put initialData and
- * pointer to the new structure instance into the parameter positions on
- * the stack
- *Then put function pointer into nextInstrPt -- the stack is setup in std
- * call structure, so jumping to function ptr is same as a GCC generated
- * function call
- *No need to save registers on old stack frame, because there's no old
- * animator state to return to --
- *
- */
-inline VirtProcr *
-create_procr_helper( VirtProcr *newPr,       VirtProcrFnPtr  fnPtr,
-                     void      *initialData, void           *stackLocs )
- {
-   void  *stackPtr;
-
-   newPr->startOfStack = stackLocs;
-   newPr->procrID      = _VMSMasterEnv->numProcrsCreated++;
-   newPr->initialData  = initialData;
-   newPr->requests     = NULL;
-   newPr->schedSlot    = NULL;
-
-   /*
-    * Hardware dependent part           
-    */
-   //instead of calling the function directly, call a wrapper function to fetch
-   //arguments from stack
-   newPr->nextInstrPt  = (VirtProcrFnPtr)&startVirtProcrFn;
-   
-    //fnPtr takes two params -- void *initData & void *animProcr
-    //alloc stack locations, make stackPtr be the highest addr minus room
-    // for 2 params + return addr.  Return addr (NULL) is in loc pointed to
-    // by stackPtr, initData at stackPtr + 8 bytes, animatingPr just above
-   stackPtr = ( (void *)stackLocs + VIRT_PROCR_STACK_SIZE - 4*sizeof(void*));
-   
-      //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp
-   *((VirtProcr**)stackPtr + 2 ) = newPr; //rightmost param
-   *((void**)stackPtr + 1 ) = initialData;  //next  param to left
-   *((void**)stackPtr) = (void*)fnPtr;
-   
-   /*
-    * end of Hardware dependent part           
-    */
-   
-   newPr->stackPtr = stackPtr; //core loop will switch to this, then
-   newPr->framePtr = stackPtr; //suspend loop will save new stack & frame ptr
-
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef STATS__TURN_ON_PROBES
-   //struct timeval timeStamp;
-   //gettimeofday( &(timeStamp), NULL);
-   //newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0) -
-   //                                            _VMSMasterEnv->createPtInSecs;
-   #endif
-   //========================================================================
-
-   return newPr;
- }
\ No newline at end of file
diff -r 7cff4e13d5c4 -r 651ee45615ae ProcrContext.h
--- a/ProcrContext.h	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-/*
- *  Copyright 2009 OpenSourceStewardshipFoundation.org
- *  Licensed under GNU General Public License version 2
- *
- * Author: seanhalle@yahoo.com
- * 
- */
-
-#ifndef _ProcrContext_H
-#define	_ProcrContext_H
-#define _GNU_SOURCE
-
-void saveCoreLoopReturnAddr(void **returnAddress);
-
-void switchToVP(VirtProcr *nextProcr);
-
-void switchToCoreLoop(VirtProcr *nextProcr);
-
-void masterSwitchToCoreLoop(VirtProcr *nextProcr);
-
-void startVirtProcrFn();
-
-void *asmTerminateCoreLoop(VirtProcr *currPr);
-
-#define flushRegisters() \
-        asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15")
-
-inline VirtProcr *
-create_procr_helper( VirtProcr *newPr,       VirtProcrFnPtr  fnPtr,
-                     void      *initialData, void           *stackLocs );
-
-#endif	/* _ProcrContext_H */
-
diff -r 7cff4e13d5c4 -r 651ee45615ae VMS.c
--- a/VMS.c	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,772 +0,0 @@
-/*
- * Copyright 2010  OpenSourceStewardshipFoundation
- *
- * Licensed under BSD
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <malloc.h>
-#include <inttypes.h>
-#include <sys/time.h>
-
-#include "VMS.h"
-#include "ProcrContext.h"
-#include "Queue_impl/BlockingQueue.h"
-#include "Histogram/Histogram.h"
-
-
-#define thdAttrs NULL
-
-//===========================================================================
-void
-shutdownFn( void *dummy, VirtProcr *dummy2 );
-
-SchedSlot **
-create_sched_slots();
-
-void
-create_masterEnv();
-
-void
-create_the_coreLoop_OS_threads();
-
-MallocProlog *
-create_free_list();
-
-void
-endOSThreadFn( void *initData, VirtProcr *animatingPr );
-
-pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER;
-pthread_cond_t  suspend_cond  = PTHREAD_COND_INITIALIZER;
-
-//===========================================================================
-
-/*Setup has two phases:
- * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts
- *    the master virt procr into the work-queue, ready for first "call"
- * 2) Semantic layer then does its own init, which creates the seed virt
- *    procr inside the semantic layer, ready to schedule it when
- *    asked by the first run of the masterLoop.
- *
- *This part is bit weird because VMS really wants to be "always there", and
- * have applications attach and detach..  for now, this VMS is part of
- * the app, so the VMS system starts up as part of running the app.
- *
- *The semantic layer is isolated from the VMS internals by making the
- * semantic layer do setup to a state that it's ready with its
- * initial virt procrs, ready to schedule them to slots when the masterLoop
- * asks.  Without this pattern, the semantic layer's setup would
- * have to modify slots directly to assign the initial virt-procrs, and put
- * them into the readyToAnimateQ itself, breaking the isolation completely.
- *
- * 
- *The semantic layer creates the initial virt procr(s), and adds its
- * own environment to masterEnv, and fills in the pointers to
- * the requestHandler and slaveScheduler plug-in functions
- */
-
-/*This allocates VMS data structures, populates the master VMSProc,
- * and master environment, and returns the master environment to the semantic
- * layer.
- */
-void
-VMS__init()
- {
-   create_masterEnv();
-   create_the_coreLoop_OS_threads();
- }
-
-#ifdef SEQUENTIAL
-
-/*To initialize the sequential version, just don't create the threads
- */
-void
-VMS__init_Seq()
- {
-   create_masterEnv();
- }
-
-#endif
-
-void
-create_masterEnv()
- { MasterEnv       *masterEnv;
-   VMSQueueStruc **readyToAnimateQs;
-   int              coreIdx;
-   VirtProcr      **masterVPs;
-   SchedSlot     ***allSchedSlots; //ptr to array of ptrs
-
-
-      //Make the master env, which holds everything else
-   _VMSMasterEnv = malloc( sizeof(MasterEnv) );
-
-        //Very first thing put into the master env is the free-list, seeded
-        // with a massive initial chunk of memory.
-        //After this, all other mallocs are VMS__malloc.
-   _VMSMasterEnv->freeListHead        = VMS_ext__create_free_list();
-
-
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef MEAS__TIME_MALLOC
-   _VMSMasterEnv->mallocTimeHist  = makeFixedBinHistExt( 100, 0, 30,
-                                                       "malloc_time_hist");
-   _VMSMasterEnv->freeTimeHist  = makeFixedBinHistExt( 100, 0, 30,
-                                                       "free_time_hist");
-   #endif
-   #ifdef MEAS__TIME_PLUGIN
-   _VMSMasterEnv->reqHdlrLowTimeHist  = makeFixedBinHistExt( 100, 0, 200,
-                                                     "plugin_low_time_hist");
-   _VMSMasterEnv->reqHdlrHighTimeHist  = makeFixedBinHistExt( 100, 0, 200,
-                                                    "plugin_high_time_hist");
-   #endif
-   //========================================================================
-
-   //===================== Only VMS__malloc after this ====================
-   masterEnv     = (MasterEnv*)_VMSMasterEnv;
-   
-      //Make a readyToAnimateQ for each core loop
-   readyToAnimateQs = VMS__malloc( NUM_CORES * sizeof(VMSQueueStruc *) );
-   masterVPs        = VMS__malloc( NUM_CORES * sizeof(VirtProcr *) );
-
-      //One array for each core, 3 in array, core's masterVP scheds all
-   allSchedSlots    = VMS__malloc( NUM_CORES * sizeof(SchedSlot *) );
-
-   _VMSMasterEnv->numProcrsCreated = 0;  //used by create procr
-   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
-    {    
-      readyToAnimateQs[ coreIdx ] = makeVMSQ();
-      
-         //Q: should give masterVP core-specific info as its init data?
-      masterVPs[ coreIdx ] = VMS__create_procr( (VirtProcrFnPtr)&masterLoop, (void*)masterEnv );
-      masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx;
-      allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core
-      _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0;
-      _VMSMasterEnv->workStealingGates[ coreIdx ] = NULL;
-    }
-   _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs;
-   _VMSMasterEnv->masterVPs        = masterVPs;
-   _VMSMasterEnv->masterLock       = UNLOCKED;
-   _VMSMasterEnv->allSchedSlots    = allSchedSlots;
-   _VMSMasterEnv->workStealingLock = UNLOCKED;
-
-
-      //Aug 19, 2010:  no longer need to place initial masterVP into queue
-      // because coreLoop now controls -- animates its masterVP when no work
-
-
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef STATS__TURN_ON_PROBES
-   _VMSMasterEnv->dynIntervalProbesInfo =
-              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->intervalProbes), 200);
-
-   _VMSMasterEnv->probeNameHashTbl = makeHashTable( 1000, &VMS__free );
-   
-      //put creation time directly into master env, for fast retrieval
-   struct timeval timeStamp;
-   gettimeofday( &(timeStamp), NULL);
-   _VMSMasterEnv->createPtInSecs =
-                           timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0);
-   #endif
-   #ifdef MEAS__TIME_MASTER_LOCK
-   _VMSMasterEnv->masterLockLowTimeHist  = makeFixedBinHist( 50, 0, 2,
-                                                "master lock low time hist");
-   _VMSMasterEnv->masterLockHighTimeHist  = makeFixedBinHist( 50, 0, 100,
-                                               "master lock high time hist");
-   #endif
-   
-   MakeTheMeasHists();
-   //========================================================================
-
- }
-
-SchedSlot **
-create_sched_slots()
- { SchedSlot  **schedSlots;
-   int i;
-
-   schedSlots  = VMS__malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
-
-   for( i = 0; i < NUM_SCHED_SLOTS; i++ )
-    {
-      schedSlots[i] = VMS__malloc( sizeof(SchedSlot) );
-
-         //Set state to mean "handling requests done, slot needs filling"
-      schedSlots[i]->workIsDone         = FALSE;
-      schedSlots[i]->needsProcrAssigned = TRUE;
-    }
-   return schedSlots;
- }
-
-
-void
-freeSchedSlots( SchedSlot **schedSlots )
- { int i;
-   for( i = 0; i < NUM_SCHED_SLOTS; i++ )
-    {
-      VMS__free( schedSlots[i] );
-    }
-   VMS__free( schedSlots );
- }
-
-
-void
-create_the_coreLoop_OS_threads()
- {
-   //========================================================================
-   //                      Create the Threads
-   int coreIdx, retCode;
-
-      //Need the threads to be created suspended, and wait for a signal
-      // before proceeding -- gives time after creating to initialize other
-      // stuff before the coreLoops set off.
-   _VMSMasterEnv->setupComplete = 0;
-
-      //Make the threads that animate the core loops
-   for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
-    { coreLoopThdParams[coreIdx]          = VMS__malloc( sizeof(ThdParams) );
-      coreLoopThdParams[coreIdx]->coreNum = coreIdx;
-
-      retCode =
-      pthread_create( &(coreLoopThdHandles[coreIdx]),
-                        thdAttrs,
-                       &coreLoop,
-               (void *)(coreLoopThdParams[coreIdx]) );
-      if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);}
-    }
- }
-
-/*Semantic layer calls this when it want the system to start running..
- *
- *This starts the core loops running then waits for them to exit.
- */
-void
-VMS__start_the_work_then_wait_until_done()
- { int coreIdx;
-      //Start the core loops running
-   
-      //tell the core loop threads that setup is complete
-      //get lock, to lock out any threads still starting up -- they'll see
-      // that setupComplete is true before entering while loop, and so never
-      // wait on the condition
-   pthread_mutex_lock(     &suspendLock );
-   _VMSMasterEnv->setupComplete = 1;
-   pthread_mutex_unlock(   &suspendLock );
-   pthread_cond_broadcast( &suspend_cond );
-   
-   
-      //wait for all to complete
-   for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
-    {
-      pthread_join( coreLoopThdHandles[coreIdx], NULL );
-    }
-   
-      //NOTE: do not clean up VMS env here -- semantic layer has to have
-      // a chance to clean up its environment first, then do a call to free
-      // the Master env and rest of VMS locations
- }
-
-#ifdef SEQUENTIAL
-/*Only difference between version with an OS thread pinned to each core and
- * the sequential version of VMS is VMS__init_Seq, this, and coreLoop_Seq.
- */
-void
-VMS__start_the_work_then_wait_until_done_Seq()
- {
-         //Instead of un-suspending threads, just call the one and only
-         // core loop (sequential version), in the main thread.
-      coreLoop_Seq( NULL );
-      flushRegisters();
-
- }
-#endif
-
-inline VirtProcr *
-VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData )
- { VirtProcr *newPr;
-   void      *stackLocs;
-
-   newPr      = VMS__malloc( sizeof(VirtProcr) );
-   stackLocs  = VMS__malloc( VIRT_PROCR_STACK_SIZE );
-   if( stackLocs == 0 )
-    { perror("VMS__malloc stack"); exit(1); }
-
-   return create_procr_helper( newPr, fnPtr, initialData, stackLocs );
- }
-
-/* "ext" designates that it's for use outside the VMS system -- should only
- * be called from main thread or other thread -- never from code animated by
- * a VMS virtual processor.
- */
-inline VirtProcr *
-VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData )
- { VirtProcr *newPr;
-   char      *stackLocs;
-
-   newPr      = malloc( sizeof(VirtProcr) );
-   stackLocs  = malloc( VIRT_PROCR_STACK_SIZE );
-   if( stackLocs == 0 )
-    { perror("malloc stack"); exit(1); }
-
-   return create_procr_helper( newPr, fnPtr, initialData, stackLocs );
- }
-
-
-/*Anticipating multi-tasking
- */
-void *
-VMS__give_sem_env_for( VirtProcr *animPr )
- {
-   return _VMSMasterEnv->semanticEnv;
- }
-//===========================================================================
-/*there is a label inside this function -- save the addr of this label in
- * the callingPr struc, as the pick-up point from which to start the next
- * work-unit for that procr.  If turns out have to save registers, then
- * save them in the procr struc too.  Then do assembly jump to the CoreLoop's
- * "done with work-unit" label.  The procr struc is in the request in the
- * slave that animated the just-ended work-unit, so all the state is saved
- * there, and will get passed along, inside the request handler, to the
- * next work-unit for that procr.
- */
-void
-VMS__suspend_procr( VirtProcr *animatingPr )
- { 
-
-      //The request to master will cause this suspended virt procr to get
-      // scheduled again at some future point -- to resume, core loop jumps
-      // to the resume point (below), which causes restore of saved regs and
-      // "return" from this call.
-   //animatingPr->nextInstrPt = &&ResumePt;
-
-      //return ownership of the virt procr and sched slot to Master virt pr
-   animatingPr->schedSlot->workIsDone = TRUE;
-
-   //===========================  Measurement stuff ========================
-   #ifdef MEAS__TIME_STAMP_SUSP
-      //record time stamp: compare to time-stamp recorded below
-   saveLowTimeStampCountInto( animatingPr->preSuspTSCLow );
-   #endif
-   //=======================================================================
-
-   switchToCoreLoop(animatingPr);
-   flushRegisters();
-
-   //=======================================================================
-
-   #ifdef MEAS__TIME_STAMP_SUSP
-      //NOTE: only take low part of count -- do sanity check when take diff
-   saveLowTimeStampCountInto( animatingPr->postSuspTSCLow );
-   #endif
-
-   return;
- }
-
-
-
-/*For this implementation of VMS, it may not make much sense to have the
- * system of requests for creating a new processor done this way.. but over
- * the scope of single-master, multi-master, mult-tasking, OS-implementing,
- * distributed-memory, and so on, this gives VMS implementation a chance to
- * do stuff before suspend, in the AppVP, and in the Master before the plugin
- * is called, as well as in the lang-lib before this is called, and in the
- * plugin.  So, this gives both VMS and language implementations a chance to
- * intercept at various points and do order-dependent stuff.
- *Having a standard VMSNewPrReqData struc allows the language to create and
- * free the struc, while VMS knows how to get the newPr if it wants it, and
- * it lets the lang have lang-specific data related to creation transported
- * to the plugin.
- */
-void
-VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr )
- { VMSReqst req;
-
-   req.reqType          = createReq;
-   req.semReqData       = semReqData;
-   req.nextReqst        = reqstingPr->requests;
-   reqstingPr->requests = &req;
-
-   VMS__suspend_procr( reqstingPr );
- }
-
-
-/*
- *This adds a request to dissipate, then suspends the processor so that the
- * request handler will receive the request.  The request handler is what
- * does the work of freeing memory and removing the processor from the
- * semantic environment's data structures.
- *The request handler also is what figures out when to shutdown the VMS
- * system -- which causes all the core loop threads to die, and returns from
- * the call that started up VMS to perform the work.
- *
- *This form is a bit misleading to understand if one is trying to figure out
- * how VMS works -- it looks like a normal function call, but inside it
- * sends a request to the request handler and suspends the processor, which
- * jumps out of the VMS__dissipate_procr function, and out of all nestings
- * above it, transferring the work of dissipating to the request handler,
- * which then does the actual work -- causing the processor that animated
- * the call of this function to disappear and the "hanging" state of this
- * function to just poof into thin air -- the virtual processor's trace
- * never returns from this call, but instead the virtual processor's trace
- * gets suspended in this call and all the virt processor's state disap-
- * pears -- making that suspend the last thing in the virt procr's trace.
- */
-void
-VMS__send_dissipate_req( VirtProcr *procrToDissipate )
- { VMSReqst req;
-
-   req.reqType                = dissipate;
-   req.nextReqst              = procrToDissipate->requests;
-   procrToDissipate->requests = &req;
-
-   VMS__suspend_procr( procrToDissipate );
- }
-
-
-/* "ext" designates that it's for use outside the VMS system -- should only
- * be called from main thread or other thread -- never from code animated by
- * a VMS virtual processor.
- *
- *Use this version to dissipate VPs created outside the VMS system.
- */
-void
-VMS_ext__dissipate_procr( VirtProcr *procrToDissipate )
- {
-      //NOTE: initialData was given to the processor, so should either have
-      // been alloc'd with VMS__malloc, or freed by the level above animPr.
-      //So, all that's left to free here is the stack and the VirtProcr struc
-      // itself
-      //Note, should not stack-allocate initial data -- no guarantee, in
-      // general that creating processor will outlive ones it creates.
-   free( procrToDissipate->startOfStack );
-   free( procrToDissipate );
- }
-
-
-
-/*This call's name indicates that request is malloc'd -- so req handler
- * has to free any extra requests tacked on before a send, using this.
- *
- * This inserts the semantic-layer's request data into standard VMS carrier
- * request data-struct that is mallocd.  The sem request doesn't need to
- * be malloc'd if this is called inside the same call chain before the
- * send of the last request is called.
- *
- *The request handler has to call VMS__free_VMSReq for any of these
- */
-inline void
-VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData,
-                                          VirtProcr *callingPr )
- { VMSReqst *req;
-
-   req = VMS__malloc( sizeof(VMSReqst) );
-   req->reqType         = semantic;
-   req->semReqData      = semReqData;
-   req->nextReqst       = callingPr->requests;
-   callingPr->requests = req;
- }
-
-/*This inserts the semantic-layer's request data into standard VMS carrier
- * request data-struct is allocated on stack of this call & ptr to it sent
- * to plugin
- *Then it does suspend, to cause request to be sent.
- */
-inline void
-VMS__send_sem_request( void *semReqData, VirtProcr *callingPr )
- { VMSReqst req;
-
-   req.reqType         = semantic;
-   req.semReqData      = semReqData;
-   req.nextReqst       = callingPr->requests;
-   callingPr->requests = &req;
-   
-   VMS__suspend_procr( callingPr );
- }
-
-
-inline void
-VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr )
- { VMSReqst req;
-
-   req.reqType         = VMSSemantic;
-   req.semReqData      = semReqData;
-   req.nextReqst       = callingPr->requests; //gab any other preceeding 
-   callingPr->requests = &req;
-
-   VMS__suspend_procr( callingPr );
- }
-
-
-/*
- */
-VMSReqst *
-VMS__take_next_request_out_of( VirtProcr *procrWithReq )
- { VMSReqst *req;
-
-   req = procrWithReq->requests;
-   if( req == NULL ) return NULL;
-
-   procrWithReq->requests = procrWithReq->requests->nextReqst;
-   return req;
- }
-
-
-inline void *
-VMS__take_sem_reqst_from( VMSReqst *req )
- {
-   return req->semReqData;
- }
-
-
-
-/* This is for OS requests and VMS infrastructure requests, such as to create
- *  a probe -- a probe is inside the heart of VMS-core, it's not part of any
- *  language -- but it's also a semantic thing that's triggered from and used
- *  in the application.. so it crosses abstractions..  so, need some special
- *  pattern here for handling such requests.
- * Doing this just like it were a second language sharing VMS-core.
- * 
- * This is called from the language's request handler when it sees a request
- *  of type VMSSemReq
- *
- * TODO: Later change this, to give probes their own separate plugin & have
- *  VMS-core steer the request to appropriate plugin
- * Do the same for OS calls -- look later at it..
- */
-void inline
-VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv,
-                       ResumePrFnPtr resumePrFnPtr )
- { VMSSemReq     *semReq;
-   IntervalProbe *newProbe;
-
-   semReq = req->semReqData;
-
-   newProbe          = VMS__malloc( sizeof(IntervalProbe) );
-   newProbe->nameStr = VMS__strDup( semReq->nameStr );
-   newProbe->hist    = NULL;
-   newProbe->schedChoiceWasRecorded = FALSE;
-
-      //This runs in masterVP, so no race-condition worries
-   newProbe->probeID =
-             addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo );
-
-   requestingPr->dataRetFromReq = newProbe;
-
-   (*resumePrFnPtr)( requestingPr, semEnv );
- }
-
-
-
-/*This must be called by the request handler plugin -- it cannot be called
- * from the semantic library "dissipate processor" function -- instead, the
- * semantic layer has to generate a request, and the plug-in calls this
- * function.
- *The reason is that this frees the virtual processor's stack -- which is
- * still in use inside semantic library calls!
- *
- *This frees or recycles all the state owned by and comprising the VMS
- * portion of the animating virtual procr.  The request handler must first
- * free any semantic data created for the processor that didn't use the
- * VMS_malloc mechanism.  Then it calls this, which first asks the malloc
- * system to disown any state that did use VMS_malloc, and then frees the
- * statck and the processor-struct itself.
- *If the dissipated processor is the sole (remaining) owner of VMS__malloc'd
- * state, then that state gets freed (or sent to recycling) as a side-effect
- * of dis-owning it.
- */
-void
-VMS__dissipate_procr( VirtProcr *animatingPr )
- {
-      //dis-own all locations owned by this processor, causing to be freed
-      // any locations that it is (was) sole owner of
-//TODO: implement VMS__malloc system, including "give up ownership"
-
-
-      //NOTE: initialData was given to the processor, so should either have
-      // been alloc'd with VMS__malloc, or freed by the level above animPr.
-      //So, all that's left to free here is the stack and the VirtProcr struc
-      // itself
-      //Note, should not stack-allocate initial data -- no guarantee, in
-      // general that creating processor will outlive ones it creates.
-   VMS__free( animatingPr->startOfStack );
-   VMS__free( animatingPr );
- }
-
-
-//TODO: look at architecting cleanest separation between request handler
-// and master loop, for dissipate, create, shutdown, and other non-semantic
-// requests.  Issue is chain: one removes requests from AppVP, one dispatches
-// on type of request, and one handles each type..  but some types require
-// action from both request handler and master loop -- maybe just give the
-// request handler calls like:  VMS__handle_X_request_type
-
-
-/*This is called by the semantic layer's request handler when it decides its
- * time to shut down the VMS system.  Calling this causes the core loop OS
- * threads to exit, which unblocks the entry-point function that started up
- * VMS, and allows it to grab the result and return to the original single-
- * threaded application.
- * 
- *The _VMSMasterEnv is needed by this shut down function, so the create-seed-
- * and-wait function has to free a bunch of stuff after it detects the
- * threads have all died: the masterEnv, the thread-related locations,
- * masterVP any AppVPs that might still be allocated and sitting in the
- * semantic environment, or have been orphaned in the _VMSWorkQ.
- * 
- *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the
- * locations it needs, and give ownership to masterVP.  Then, they will be
- * automatically freed.
- *
- *In here,create one core-loop shut-down processor for each core loop and put
- * them all directly into the readyToAnimateQ.
- *Note, this function can ONLY be called after the semantic environment no
- * longer cares if AppVPs get animated after the point this is called.  In
- * other words, this can be used as an abort, or else it should only be
- * called when all AppVPs have finished dissipate requests -- only at that
- * point is it sure that all results have completed.
- */
-void
-VMS__shutdown()
- { int coreIdx;
-   VirtProcr *shutDownPr;
-
-      //create the shutdown processors, one for each core loop -- put them
-      // directly into the Q -- each core will die when gets one
-   for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
-    {    //Note, this is running in the master
-      shutDownPr = VMS__create_procr( &endOSThreadFn, NULL );
-      writeVMSQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] );
-    }
-
- }
-
-
-/*Am trying to be cute, avoiding IF statement in coreLoop that checks for
- * a special shutdown procr.  Ended up with extra-complex shutdown sequence.
- *This function has the sole purpose of setting the stack and framePtr
- * to the coreLoop's stack and framePtr.. it does that then jumps to the
- * core loop's shutdown point -- might be able to just call Pthread_exit
- * from here, but am going back to the pthread's stack and setting everything
- * up just as if it never jumped out, before calling pthread_exit.
- *The end-point of core loop will free the stack and so forth of the
- * processor that animates this function, (this fn is transfering the
- * animator of the AppVP that is in turn animating this function over
- * to core loop function -- note that this slices out a level of virtual
- * processors).
- */
-void
-endOSThreadFn( void *initData, VirtProcr *animatingPr )
- { 
-#ifdef SEQUENTIAL
-    asmTerminateCoreLoopSeq(animatingPr);
-#else
-    asmTerminateCoreLoop(animatingPr);
-#endif
- }
-
-
-/*This is called from the startup & shutdown
- */
-void
-VMS__cleanup_at_end_of_shutdown()
- { 
-   //unused
-   //VMSQueueStruc **readyToAnimateQs;
-   //int              coreIdx;
-   //VirtProcr      **masterVPs;
-   //SchedSlot     ***allSchedSlots; //ptr to array of ptrs
-
-      //Before getting rid of everything, print out any measurements made
-   forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist );
-   forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile);
-   forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist );
-   #ifdef MEAS__TIME_PLUGIN
-   printHist( _VMSMasterEnv->reqHdlrLowTimeHist );
-   saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist );
-   printHist( _VMSMasterEnv->reqHdlrHighTimeHist );
-   saveHistToFile( _VMSMasterEnv->reqHdlrHighTimeHist );
-   freeHistExt( _VMSMasterEnv->reqHdlrLowTimeHist );
-   freeHistExt( _VMSMasterEnv->reqHdlrHighTimeHist );
-   #endif
-   #ifdef MEAS__TIME_MALLOC
-   printHist( _VMSMasterEnv->mallocTimeHist   );
-   saveHistToFile( _VMSMasterEnv->mallocTimeHist   );
-   printHist( _VMSMasterEnv->freeTimeHist     );
-   saveHistToFile( _VMSMasterEnv->freeTimeHist     );
-   freeHistExt( _VMSMasterEnv->mallocTimeHist );
-   freeHistExt( _VMSMasterEnv->freeTimeHist   );
-   #endif
-   #ifdef MEAS__TIME_MASTER_LOCK
-   printHist( _VMSMasterEnv->masterLockLowTimeHist );
-   printHist( _VMSMasterEnv->masterLockHighTimeHist );
-   #endif
-   #ifdef MEAS__TIME_MASTER
-   printHist( _VMSMasterEnv->pluginTimeHist );
-   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
-    {
-      freeVMSQ( readyToAnimateQs[ coreIdx ] );
-         //master VPs were created external to VMS, so use external free
-      VMS__dissipate_procr( masterVPs[ coreIdx ] );
-
-      freeSchedSlots( allSchedSlots[ coreIdx ] );
-    }
-   #endif
-   #ifdef MEAS__TIME_STAMP_SUSP
-   printHist( _VMSMasterEnv->pluginTimeHist );
-   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
-    {
-      freeVMSQ( readyToAnimateQs[ coreIdx ] );
-         //master VPs were created external to VMS, so use external free
-      VMS__dissipate_procr( masterVPs[ coreIdx ] );
-
-      freeSchedSlots( allSchedSlots[ coreIdx ] );
-    }
-   #endif
-
-      //All the environment data has been allocated with VMS__malloc, so just
-      // free its internal big-chunk and all inside it disappear.
-/*
-   readyToAnimateQs = _VMSMasterEnv->readyToAnimateQs;
-   masterVPs        = _VMSMasterEnv->masterVPs;
-   allSchedSlots    = _VMSMasterEnv->allSchedSlots;
-   
-   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
-    {
-      freeVMSQ( readyToAnimateQs[ coreIdx ] );
-         //master VPs were created external to VMS, so use external free
-      VMS__dissipate_procr( masterVPs[ coreIdx ] );
-      
-      freeSchedSlots( allSchedSlots[ coreIdx ] );
-    }
-   
-   VMS__free( _VMSMasterEnv->readyToAnimateQs );
-   VMS__free( _VMSMasterEnv->masterVPs );
-   VMS__free( _VMSMasterEnv->allSchedSlots );
-   
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef STATS__TURN_ON_PROBES
-   freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS__free_probe);
-   #endif
-   //========================================================================
-*/
-      //These are the only two that use system free 
-   VMS_ext__free_free_list( _VMSMasterEnv->freeListHead );
-   free( (void *)_VMSMasterEnv );
- }
-
-
-//================================
-
-
-/*Later, improve this -- for now, just exits the application after printing
- * the error message.
- */
-void
-VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData )
- {
-   printf("%s",msgStr);
-   fflush(stdin);
-   exit(1);
- }
-
diff -r 7cff4e13d5c4 -r 651ee45615ae VMS.h
--- a/VMS.h	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,579 +0,0 @@
-/*
- *  Copyright 2009 OpenSourceStewardshipFoundation.org
- *  Licensed under GNU General Public License version 2
- *
- * Author: seanhalle@yahoo.com
- * 
- */
-
-#ifndef _VMS_H
-#define	_VMS_H
-#define _GNU_SOURCE
-
-#include "VMS_primitive_data_types.h"
-#include "../../C_Libraries/Queue_impl/PrivateQueue.h"
-#include "../../C_Libraries/Histogram/Histogram.h"
-#include "../../C_Libraries/DynArray/DynArray.h"
-#include "../../C_Libraries/Hash_impl/PrivateHash.h"
-#include "vmalloc.h"
-
-#include <pthread.h>
-#include <sys/time.h>
-
-
-//===============================  Debug  ===================================
-//
-//When SEQUENTIAL is defined, VMS does sequential exe in the main thread
-// It still does co-routines and all the mechanisms are the same, it just
-// has only a single thread and animates VPs one at a time
-//#define SEQUENTIAL
-
-//#define USE_WORK_STEALING
-
-//turns on the probe-instrumentation in the application -- when not
-// defined, the calls to the probe functions turn into comments
-#define STATS__ENABLE_PROBES
-//#define TURN_ON_DEBUG_PROBES
-
-//These defines turn types of bug messages on and off
-// be sure debug messages are un-commented (next block of defines)
-#define dbgAppFlow   TRUE /* Top level flow of application code -- general*/
-#define dbgProbes    FALSE /* for issues inside probes themselves*/
-#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/
-#define dbgRqstHdlr  FALSE /* in request handler code*/
-
-//Comment or un- the substitute half to turn on/off types of debug message
-#define DEBUG(  bool, msg)         \
-//  if( bool){ printf(msg); fflush(stdin);}
-#define DEBUG1( bool, msg, param)  \
-//   if(bool){printf(msg, param); fflush(stdin);}
-#define DEBUG2( bool, msg, p1, p2) \
-//   if(bool) {printf(msg, p1, p2); fflush(stdin);}
-
-#define ERROR(msg) printf(msg);
-#define ERROR1(msg, param) printf(msg, param); 
-#define ERROR2(msg, p1, p2) printf(msg, p1, p2);
-
-//===========================  STATS =======================
-
-   //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and
-   // compiled-in that saves the low part of the time stamp count just before
-   // suspending a processor and just after resuming that processorsrc/VPThread_lib/VMS/VMS.h:322: warning: previous declaration of ‘VMS__create_procr’ was here.  It is
-   // saved into a field added to VirtProcr.  Have to sanity-check for
-   // rollover of low portion into high portion.
-//#define MEAS__TIME_STAMP_SUSP
-//#define MEAS__TIME_MASTER
-#define MEAS__TIME_PLUGIN
-#define MEAS__TIME_MALLOC
-//#define MEAS__TIME_MASTER_LOCK
-#define MEAS__NUM_TIMES_TO_RUN 100000
-
-   //For code that calculates normalization-offset between TSC counts of
-   // different cores.
-#define NUM_TSC_ROUND_TRIPS 10
-
-
-//=========================  Hardware related Constants =====================
-   //This value is the number of hardware threads in the shared memory
-   // machine
-//#define NUM_CORES        8
-
-   // tradeoff amortizing master fixed overhead vs imbalance potential
-   // when work-stealing, can make bigger, at risk of losing cache affinity
-#define NUM_SCHED_SLOTS  5
-
-#define MIN_WORK_UNIT_CYCLES 20000
-
-#define MASTERLOCK_RETRIES 10000
-
-   // stack size in virtual processors created
-#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */
-
-   // memory for VMS__malloc
-#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */
-
-#define CACHE_LINE 64
-#define PAGE_SIZE 4096
-
-
-//==============================
-
-#define SUCCESS 0
-
-#define writeVMSQ     writePrivQ
-#define readVMSQ      readPrivQ
-#define makeVMSQ      makeVMSPrivQ
-#define numInVMSQ     numInPrivQ
-#define VMSQueueStruc PrivQueueStruc
-
-
-
-//===========================================================================
-typedef unsigned long long TSCount;
-
-typedef struct _SchedSlot     SchedSlot;
-typedef struct _VMSReqst      VMSReqst;
-typedef struct _VirtProcr     VirtProcr;
-typedef struct _IntervalProbe IntervalProbe;
-typedef struct _GateStruc     GateStruc;
-
-
-typedef VirtProcr * (*SlaveScheduler)  ( void *, int );   //semEnv, coreIdx
-typedef void  (*RequestHandler)  ( VirtProcr *, void * ); //prWReqst, semEnv
-typedef void  (*VirtProcrFnPtr)  ( void *, VirtProcr * ); //initData, animPr
-typedef void    VirtProcrFn      ( void *, VirtProcr * ); //initData, animPr
-typedef void  (*ResumePrFnPtr)   ( VirtProcr *, void * );
-
-
-//============= Requests ===========
-//
-
-enum VMSReqstType   //avoid starting enums at 0, for debug reasons
- {
-   semantic = 1,
-   createReq,
-   dissipate,
-   VMSSemantic      //goes with VMSSemReqst below
- };
-
-struct _VMSReqst
- {
-   enum VMSReqstType  reqType;//used for dissipate and in future for IO requests
-   void              *semReqData;
-
-   VMSReqst *nextReqst;
- };
-//VMSReqst
-
-enum VMSSemReqstType   //These are equivalent to semantic requests, but for
- {                     // VMS's services available directly to app, like OS
-   createProbe = 1,    // and probe services -- like a VMS-wide built-in lang
-   openFile,
-   otherIO
- };
-
-typedef struct
- { enum VMSSemReqstType reqType;
-   VirtProcr           *requestingPr;
-   char                *nameStr;  //for create probe
- }
- VMSSemReq;
-
-
-//====================  Core data structures  ===================
-
-struct _SchedSlot
- {
-   int         workIsDone;
-   int         needsProcrAssigned;
-   VirtProcr  *procrAssignedToSlot;
- };
-//SchedSlot
-
-/*WARNING: re-arranging this data structure could cause VP switching
- *         assembly code to fail -- hard-codes offsets of fields
- */
-struct _VirtProcr
- { int         procrID;  //for debugging -- count up each time create
-   int         coreAnimatedBy;
-   void       *startOfStack;
-   void       *stackPtr;
-   void       *framePtr;
-   void       *nextInstrPt;
-   
-   void       *coreLoopStartPt;  //allows proto-runtime to be linked later
-   void       *coreLoopFramePtr; //restore before jmp back to core loop
-   void       *coreLoopStackPtr; //restore before jmp back to core loop
-
-   void       *initialData;
-
-   SchedSlot  *schedSlot;
-   VMSReqst   *requests;
-
-   void       *semanticData; //this livesUSE_GNU here for the life of VP
-   void       *dataRetFromReq;//values returned from plugin to VP go here
-
-      //=========== MEASUREMENT STUFF ==========
-   #ifdef MEAS__TIME_STAMP_SUSP
-   unsigned int preSuspTSCLow;
-   unsigned int postSuspTSCLow;
-   #endif
-   #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/
-   unsigned int startMasterTSCLow;USE_GNU
-   unsigned int endMasterTSCLow;
-   #endif
-      //========================================
-   
-   float64      createPtInSecs;  //have space but don't use on some configs
- };
-//VirtProcr
-
-
-/*WARNING: re-arranging this data structure could cause VP-switching
- *         assembly code to fail -- hard-codes offsets of fields
- *         (because -O3 messes with things otherwise)
- */
-typedef struct
- {
-   SlaveScheduler   slaveScheduler;
-   RequestHandler   requestHandler;
-   
-   SchedSlot     ***allSchedSlots;
-   VMSQueueStruc **readyToAnimateQs;
-   VirtProcr      **masterVPs;
-
-   void            *semanticEnv;
-   void            *OSEventStruc;   //for future, when add I/O to BLIS
-   MallocProlog    *freeListHead;
-   int32            amtOfOutstandingMem; //total currently allocated
-
-   void            *coreLoopReturnPt;//addr to jump to to re-enter coreLoop
-
-   int32            setupComplete;
-   volatile int32   masterLock;
-
-   int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
-   GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
-   int32            workStealingLock;
-   
-   int32            numProcrsCreated; //gives ordering to processor creation
-
-      //=========== MEASUREMENT STUFF =============
-   IntervalProbe  **intervalProbes;
-   PrivDynArrayInfo    *dynIntervalProbesInfo;
-   HashTable       *probeNameHashTbl;
-   int32            masterCreateProbeID;
-   float64          createPtInSecs;
-   Histogram      **measHists;
-   PrivDynArrayInfo *measHistsInfo;
-   #ifdef MEAS__TIME_PLUGIN
-   Histogram       *reqHdlrLowTimeHist;
-   Histogram       *reqHdlrHighTimeHist;
-   #endif
-   #ifdef MEAS__TIME_MALLOC
-   Histogram       *mallocTimeHist;
-   Histogram       *freeTimeHist;
-   #endif
-   #ifdef MEAS__TIME_MASTER_LOCK
-   Histogram       *masterLockLowTimeHist;
-   Histogram       *masterLockHighTimeHist;
-   #endif
- }
-MasterEnv;
-
-//=========================  Extra Stuff Data Strucs  =======================
-typedef struct
- {
-
- }
-VMSExcp;
-
-struct _GateStruc
- {
-   int32 gateClosed;
-   int32 preGateProgress;
-   int32 waitProgress;
-   int32 exitProgress;
- };
-//GateStruc
-
-//=======================  OS Thread related  ===============================
-
-void * coreLoop( void *paramsIn );  //standard PThreads fn prototype
-void * coreLoop_Seq( void *paramsIn );  //standard PThreads fn prototype
-void masterLoop( void *initData, VirtProcr *masterPr );
-
-
-typedef struct
- {
-   void           *endThdPt;
-   unsigned int    coreNum;
- }
-ThdParams;
-
-pthread_t       coreLoopThdHandles[ NUM_CORES ];  //pthread's virt-procr state
-ThdParams      *coreLoopThdParams [ NUM_CORES ];
-pthread_mutex_t suspendLock;
-pthread_cond_t  suspend_cond;
-
-
-
-//=====================  Global Vars ===================
-
-volatile MasterEnv      *_VMSMasterEnv;
-
-
-
-
-//===========================  Function Prototypes  =========================
-
-
-//========== Setup and shutdown ==========
-void
-VMS__init();
-
-void
-VMS__init_Seq();
-
-void
-VMS__start_the_work_then_wait_until_done();
-
-void
-VMS__start_the_work_then_wait_until_done_Seq();
-
-inline VirtProcr *
-VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
-
-void
-VMS__dissipate_procr( VirtProcr *procrToDissipate );
-
-   //Use this to create processor inside entry point & other places outside
-   // the VMS system boundary (IE, not run in slave nor Master)
-VirtProcr *
-VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
-
-void
-VMS_ext__dissipate_procr( VirtProcr *procrToDissipate );
-
-void
-VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData );
-
-void
-VMS__shutdown();
-
-void
-VMS__cleanup_at_end_of_shutdown();
-
-void *
-VMS__give_sem_env_for( VirtProcr *animPr );
-
-
-//==============  Request Related  ===============
-
-void
-VMS__suspend_procr( VirtProcr *callingPr );
-
-inline void
-VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr );
-
-inline void
-VMS__send_sem_request( void *semReqData, VirtProcr *callingPr );
-
-void
-VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr );
-
-void inline
-VMS__send_dissipate_req( VirtProcr *prToDissipate );
-
-inline void
-VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr );
-
-VMSReqst *
-VMS__take_next_request_out_of( VirtProcr *procrWithReq );
-
-inline void *
-VMS__take_sem_reqst_from( VMSReqst *req );
-
-void inline
-VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv,
-                       ResumePrFnPtr resumePrFnPtr );
-
-//======================== STATS ======================
-
-//===== RDTSC wrapper ===== //Also runs with x86_64 code
-
-#define saveTimeStampCountInto(low, high) \
-   asm volatile("RDTSC;                   \
-                 movl %%eax, %0;          \
-                 movl %%edx, %1;"         \
-   /* outputs */ : "=m" (low), "=m" (high)\
-   /* inputs  */ :                        \
-   /* clobber */ : "%eax", "%edx"         \
-                );
-
-#define saveLowTimeStampCountInto(low)    \
-   asm volatile("RDTSC;                   \
-                 movl %%eax, %0;"         \
-   /* outputs */ : "=m" (low)             \
-   /* inputs  */ :                        \
-   /* clobber */ : "%eax", "%edx"         \
-                );
-
-//====================
-#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \
-   makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \
-   _VMSMasterEnv->measHists[idx] =  \
-                       makeFixedBinHist( numBins, startVal, binWidth, name );
-
-
-#define MEAS__SUB_CREATE  /*turn on/off subtraction of create from plugin*/
-
-#ifdef VPTHREAD
-
-//VPThread
-#define createHistIdx      0
-#define mutexLockHistIdx   1
-#define mutexUnlockHistIdx 2
-#define condWaitHistIdx    3
-#define condSignalHistIdx  4
-
-#define MakeTheMeasHists() \
-   _VMSMasterEnv->measHistsInfo = \
-              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
-   makeAMeasHist( createHistIdx,      "create",        250, 0, 100 ) \
-   makeAMeasHist( mutexLockHistIdx,   "mutex_lock",    50, 0, 100 ) \
-   makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock",  50, 0, 100 ) \
-   makeAMeasHist( condWaitHistIdx,    "cond_wait",     50, 0, 100 ) \
-   makeAMeasHist( condSignalHistIdx,  "cond_signal",   50, 0, 100 )
-
-#endif
-
-
-#ifdef VCILK
-
-//VCilk
-#define spawnHistIdx      0
-#define syncHistIdx       1
-
-#define MakeTheMeasHists() \
-   _VMSMasterEnv->measHistsInfo = \
-              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
-    makeAMeasHist( spawnHistIdx,      "Spawn",        50, 0, 200 ) \
-    makeAMeasHist( syncHistIdx,       "Sync",         50, 0, 200 )
-
-
-#endif
-
-#ifdef SSR
-
-//SSR
-#define SendFromToHistIdx      0
-#define SendOfTypeHistIdx      1
-#define ReceiveFromToHistIdx   2
-#define ReceiveOfTypeHistIdx   3
-
-#define MakeTheMeasHists() \
-   _VMSMasterEnv->measHistsInfo = \
-              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
-    makeAMeasHist( SendFromToHistIdx,   "SendFromTo",    50, 0, 100 ) \
-    makeAMeasHist( SendOfTypeHistIdx,   "SendOfType",    50, 0, 100 ) \
-    makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \
-    makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 )
-
-#endif
-
-//===========================================================================
-//VPThread
-
-
-#define Meas_startCreate \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endCreate \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                                 _VMSMasterEnv->measHists[ createHistIdx ] );
-
-#define Meas_startMutexLock \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endMutexLock \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                              _VMSMasterEnv->measHists[ mutexLockHistIdx ] );
-
-#define Meas_startMutexUnlock \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endMutexUnlock \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                            _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] );
-
-#define Meas_startCondWait \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endCondWait \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                               _VMSMasterEnv->measHists[ condWaitHistIdx ] );
-
-#define Meas_startCondSignal \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endCondSignal \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                             _VMSMasterEnv->measHists[ condSignalHistIdx ] );
-
-//===========================================================================
-// VCilk
-#define Meas_startSpawn \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endSpawn \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                             _VMSMasterEnv->measHists[ spawnHistIdx ] );
-
-#define Meas_startSync \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endSync \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                             _VMSMasterEnv->measHists[ syncHistIdx ] );
-
-//===========================================================================
-// SSR
-#define Meas_startSendFromTo \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endSendFromTo \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                             _VMSMasterEnv->measHists[ SendFromToHistIdx ] );
-
-#define Meas_startSendOfType \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endSendOfType \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                             _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] );
-
-#define Meas_startReceiveFromTo \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endReceiveFromTo \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                             _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] );
-
-#define Meas_startReceiveOfType \
-    int32 startStamp, endStamp; \
-    saveLowTimeStampCountInto( startStamp ); \
-
-#define Meas_endReceiveOfType \
-    saveLowTimeStampCountInto( endStamp ); \
-    addIntervalToHist( startStamp, endStamp, \
-                             _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] );
-
-//=====
-
-#include "ProcrContext.h"
-#include "probes.h"
-#include "vutilities.h"
-
-#endif	/* _VMS_H */
-
diff -r 7cff4e13d5c4 -r 651ee45615ae VMS_primitive_data_types.h
--- a/VMS_primitive_data_types.h	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-/*
- *  Copyright 2009 OpenSourceStewardshipFoundation.org
- *  Licensed under GNU General Public License version 2
- *  
- * Author: seanhalle@yahoo.com
- *  
-
- */
-
-#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H
-#define	_BLIS_PRIMITIVE_DATA_TYPES_H
-
-
-/*For portability, need primitive data types that have a well defined
- * size, and well-defined layout into bytes
- *To do this, provide BLIS standard aliases for all primitive data types
- *These aliases must be used in all BLIS functions instead of the ANSI types
- *
- *These definitions will be replaced inside each specialization module
- * according to the compiler used in that module and the hardware being
- * specialized to.
- */
-/*
-#define    int8  char
-#define   uint8  char
-#define    int16 short
-#define   uint16 unsigned short
-#define    int32 int
-#define   uint32 unsigned int
-#define    int64 long long
-#define   uint64 unsigned long long
-#define  float32 float
-#define  float64 double
-*/
-typedef char               bool8;
-typedef char               int8;
-typedef char               uint8;
-typedef short              int16;
-typedef unsigned short     uint16;
-typedef int                int32;
-typedef unsigned int       uint32;
-typedef long long          int64;
-typedef unsigned long long uint64;
-typedef float              float32;
-typedef double             float64;
-//typedef double double      float128;
-#define float128 double double
-
-#define TRUE  1
-#define FALSE 0
-
-#endif	/* _BLIS_PRIMITIVE_DATA_TYPES_H */
-
diff -r 7cff4e13d5c4 -r 651ee45615ae __brch__DEPRECATED_README
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/__brch__DEPRECATED_README	Sat Feb 11 21:47:25 2012 -0800
@@ -0,0 +1,29 @@
+*DEPRECATED*  as of Feb 2012, this branch should not be used.  Too many variations of VMS for MC_shared exist.
+
+Instead, choose a branch that has the best implementation for the machine being run on.  For example, single-socket with 2 cores, or with 4 cores, or with 8 cores all have their own branches with code tuned to that number of cores.  AMD processors require different low-level tweaking than Intel, and so on.
+
+============== Background on Branch Naming ============
+
+There are two kinds of branchs: ones used to develop features, and ones tuned to particular hardware.  A given HW branch may combine features from several feature-branches, picking and choosing among them.
+
+Legacy branches, from before Feb 2012 have random names.  After Feb 2012, they're named by the scheme:
+
+feat__<feat_descr>__<HW_feat_dev_on>
+
+HW__<desc_of_HW_brch_tuned_for>
+
+where <HW_feat_dev_on> and <desc_of_HW_brch_tuned_for> follow the pattern:
+
+<num_socket> x <num_cores>_<Manuf>_<special_features>
+
+Examples:
+
+feat__exp_array_malloc
+
+feat__rand_backoff__4x10_Intel_WestmereEx
+
+HW__1x4_Intel_SandyBridge
+
+HW__4x10_Intel_WestmereEx
+
+HW__1x4_AMD_mobile
diff -r 7cff4e13d5c4 -r 651ee45615ae contextSwitch.s
--- a/contextSwitch.s	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,149 +0,0 @@
-.data
-
-
-.text
-
-//Save return label address for the coreLoop to pointer
-//Arguments: Pointer to variable holding address
-.globl saveCoreLoopReturnAddr
-saveCoreLoopReturnAddr:
-    movq    $coreLoopReturn, %rcx   #load label address
-    movq    %rcx, (%rdi)           #save address to pointer
-    ret
-
-
-//Initializes VirtProcrFn at first run for 64 bit mode
-//Puts argument from stack into registers
-.globl startVirtProcrFn
-startVirtProcrFn:
-    movq    %rdi      , %rsi #get second argument from first argument of switchVP
-    movq    0x08(%rsp), %rdi #get first argument
-    movq    (%rsp)    , %rax #get function addr
-    jmp     *%rax
-
-//Switches form CoreLoop to VP ether a normal VP or the Master Loop
-//switch to virt procr's stack and frame ptr then jump to virt procr fn
-/* VirtProcr  offsets:
- * 0x10  stackPtr
- * 0x18 framePtr
- * 0x20 nextInstrPt
- * 0x30 coreLoopFramePtr
- * 0x38 coreLoopStackPtr
- *
- * _VMSMasterEnv  offsets:
- * 0x48 coreLoopReturnPt
- * 0x54 masterLock
- */
-.globl switchToVP
-switchToVP:
-    #VirtProcr in %rdi
-    movq    %rsp      , 0x38(%rdi)   #save core loop stack pointer 
-    movq    %rbp      , 0x30(%rdi)   #save core loop frame pointer
-    movq    0x10(%rdi), %rsp         #restore stack pointer
-    movq    0x18(%rdi), %rbp         #restore frame pointer
-    movq    0x20(%rdi), %rax         #get jmp pointer
-    jmp     *%rax                    #jmp to VP
-coreLoopReturn:
-    ret
-
-    
-//switches to core loop. saves return address
-/* VirtProcr  offsets:
- * 0x10  stackPtr
- * 0x18 framePtr
- * 0x20 nextInstrPt
- * 0x30 coreLoopFramePtr
- * 0x38 coreLoopStackPtr
- *
- * _VMSMasterEnv  offsets:
- * 0x48 coreLoopReturnPt
- * 0x54 masterLock
- */
-.globl switchToCoreLoop
-switchToCoreLoop:
-    #VirtProcr in %rdi
-    movq    $VPReturn , 0x20(%rdi)   #store return address
-    movq    %rsp      , 0x10(%rdi)   #save stack pointer 
-    movq    %rbp      , 0x18(%rdi)   #save frame pointer
-    movq    0x38(%rdi), %rsp         #restore stack pointer
-    movq    0x30(%rdi), %rbp         #restore frame pointer
-    movq    $_VMSMasterEnv, %rcx
-    movq    (%rcx)    , %rcx
-    movq    0x48(%rcx), %rax         #get CoreLoopStartPt
-    jmp     *%rax                    #jmp to CoreLoop
-VPReturn:
-    ret
-
-
-
-//switches to core loop from master. saves return address
-//Releases masterLock so the next MasterLoop can be executed
-/* VirtProcr  offsets:
- * 0x10  stackPtr
- * 0x18 framePtr
- * 0x20 nextInstrPt
- * 0x30 coreLoopFramePtr
- * 0x38 coreLoopStackPtr
- *
- * _VMSMasterEnv  offsets:
- * 0x48 coreLoopReturnPt
- * 0x54 masterLock
- */
-.globl masterSwitchToCoreLoop
-masterSwitchToCoreLoop:
-    #VirtProcr in %rdi
-    movq    $MasterReturn, 0x20(%rdi)   #store return address
-    movq    %rsp      , 0x10(%rdi)   #save stack pointer 
-    movq    %rbp      , 0x18(%rdi)   #save frame pointer
-    movq    0x38(%rdi), %rsp         #restore stack pointer
-    movq    0x30(%rdi), %rbp         #restore frame pointer
-    movq    $_VMSMasterEnv, %rcx
-    movq    (%rcx)    , %rcx
-    movq    0x48(%rcx), %rax         #get CoreLoopStartPt
-    movl    $0x0      , 0x54(%rcx)   #release lock
-    jmp     *%rax                    #jmp to CoreLoop
-MasterReturn:
-    ret
-
-
-//Switch to terminateCoreLoop
-//therefor switch to coreLoop context from master context
-// no need to call because the stack is already set up for switchVP
-// and virtPr is in %rdi
-// and both functions have the same argument.
-// do not save register of VP because this function will never return
-/* VirtProcr  offsets:
- * 0x10  stackPtr
- * 0x18 framePtr
- * 0x20 nextInstrPt
- * 0x30 coreLoopFramePtr
- * 0x38 coreLoopStackPtr
- *
- * _VMSMasterEnv  offsets:
- * 0x48 coreLoopReturnPt
- * 0x58 masterLock
- */
-.globl asmTerminateCoreLoop
-asmTerminateCoreLoop:
-    #VirtProcr in %rdi
-    movq    0x38(%rdi), %rsp         #restore stack pointer
-    movq    0x30(%rdi), %rbp         #restore frame pointer
-    movq    $terminateCoreLoop, %rax
-    jmp     *%rax                    #jmp to CoreLoop
-
-
-/*
- * This one for the sequential version is special. It discards the current stack
- * and returns directly from the coreLoop after VMS__dissipate_procr was called
- */
-.globl asmTerminateCoreLoopSeq
-asmTerminateCoreLoopSeq:
-    #VirtProcr in %rdi
-    movq    0x38(%rdi), %rsp         #restore stack pointer
-    movq    0x30(%rdi), %rbp         #restore frame pointer
-    #argument is in %rdi
-    call    VMS__dissipate_procr
-    movq    %rbp      , %rsp        #goto the coreLoops stack
-    pop     %rbp        #restore the old framepointer
-    ret                 #return from core loop
-    
diff -r 7cff4e13d5c4 -r 651ee45615ae probes.c
--- a/probes.c	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,354 +0,0 @@
-/*
- * Copyright 2010  OpenSourceStewardshipFoundation
- *
- * Licensed under BSD
- */
-
-#include <stdio.h>
-#include <malloc.h>
-#include <sys/time.h>
-
-#include "VMS.h"
-#include "Queue_impl/BlockingQueue.h"
-#include "Histogram/Histogram.h"
-
-
-//================================ STATS ====================================
-
-inline TSCount getTSCount()
- { unsigned int low, high;
-   TSCount  out;
-
-   saveTimeStampCountInto( low, high );
-   out = high;
-   out = (out << 32) + low;
-   return out;
- }
-
-
-
-//====================  Probes =================
-#ifdef STATS__USE_TSC_PROBES
-
-int32
-VMS__create_histogram_probe( int32 numBins, float32 startValue,
-                             float32 binWidth, char *nameStr )
- { IntervalProbe *newProbe;
-   int32 idx;
-   FloatHist *hist;
-
-   idx = VMS__create_single_interval_probe( nameStr );
-   newProbe =  _VMSMasterEnv->intervalProbes[ idx ];
-
-   hist =  makeFloatHistogram( numBins, startValue, binWidth );
-   newProbe->hist = hist;
-   return idx;
- }
-
-void
-VMS_impl__record_interval_start_in_probe( int32 probeID )
- { IntervalProbe *probe;
-
-   probe = _VMSMasterEnv->intervalProbes[ probeID ];
-   probe->startStamp = getTSCount();
- }
-
-void
-VMS_impl__record_interval_end_in_probe( int32 probeID )
- { IntervalProbe *probe;
-   TSCount endStamp;
-
-   endStamp = getTSCount();
-
-   probe = _VMSMasterEnv->intervalProbes[ probeID ];
-   probe->endStamp = endStamp;
-
-   if( probe->hist != NULL )
-    { TSCount interval = probe->endStamp - probe->startStamp;
-         //if the interval is sane, then add to histogram
-      if( interval < probe->hist->endOfRange * 10 )
-         addToFloatHist( interval, probe->hist );
-    }
- }
-
-void
-VMS_impl__print_stats_of_probe( int32 probeID )
- { IntervalProbe *probe;
-
-   probe = _VMSMasterEnv->intervalProbes[ probeID ];
-
-   if( probe->hist == NULL )
-    {
-      printf("probe: %s, interval: %.6lf\n", probe->nameStr,probe->interval);
-    }
-
-   else
-    {
-      printf( "probe: %s\n", probe->nameStr );
-      printFloatHist( probe->hist );
-    }
- }
-#else
-
-/*
- * In practice, probe operations are called from the app, from inside slaves
- *  -- so have to be sure each probe is single-VP owned, and be sure that
- *  any place common structures are modified it's done inside the master.
- * So -- the only place common structures are modified is during creation.
- *  after that, all mods are to individual instances.
- *
- * Thniking perhaps should change the semantics to be that probes are
- *  attached to the virtual processor -- and then everything is guaranteed
- *  to be isolated -- except then can't take any intervals that span VPs,
- *  and would have to transfer the probes to Master env when VP dissipates..
- *  gets messy..
- *
- * For now, just making so that probe creation causes a suspend, so that
- *  the dynamic array in the master env is only modified from the master
- * 
- */
-IntervalProbe *
-create_generic_probe( char *nameStr, VirtProcr *animPr )
-{
-   VMSSemReq reqData;
-
-   reqData.reqType  = createProbe;
-   reqData.nameStr  = nameStr;
-
-   VMS__send_VMSSem_request( &reqData, animPr );
-
-   return animPr->dataRetFromReq;
- }
-
-/*Use this version from outside VMS -- it uses external malloc, and modifies
- * dynamic array, so can't be animated in a slave VP
- */
-IntervalProbe *
-ext__create_generic_probe( char *nameStr )
- { IntervalProbe *newProbe;
-   int32          nameLen;
-
-   newProbe          = malloc( sizeof(IntervalProbe) );
-   nameLen = strlen( nameStr );
-   newProbe->nameStr = malloc( nameLen );
-   memcpy( newProbe->nameStr, nameStr, nameLen );
-   newProbe->hist    = NULL;
-   newProbe->schedChoiceWasRecorded = FALSE;
-   newProbe->probeID =
-             addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo );
-
-   return newProbe;
- }
-
-
-/*Only call from inside master or main startup/shutdown thread
- */
-void
-VMS_impl__free_probe( IntervalProbe *probe )
- { if( probe->hist != NULL )   freeDblHist( probe->hist );
-   if( probe->nameStr != NULL) VMS__free( probe->nameStr );
-   VMS__free( probe );
- }
-
-
-int32
-VMS_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr)
- { IntervalProbe *newProbe;
-   struct timeval *startStamp;
-   float64 startSecs;
-
-   newProbe           = create_generic_probe( nameStr, animPr );
-   newProbe->endSecs  = 0;
-
-   gettimeofday( &(newProbe->startStamp), NULL);
-
-      //turn into a double
-   startStamp = &(newProbe->startStamp);
-   startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 );
-   newProbe->startSecs = startSecs;
-
-   return newProbe->probeID;
- }
-
-int32
-VMS_ext_impl__record_time_point_into_new_probe( char *nameStr )
- { IntervalProbe *newProbe;
-   struct timeval *startStamp;
-   float64 startSecs;
-
-   newProbe           = ext__create_generic_probe( nameStr );
-   newProbe->endSecs  = 0;
-
-   gettimeofday( &(newProbe->startStamp), NULL);
-
-      //turn into a double
-   startStamp = &(newProbe->startStamp);
-   startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 );
-   newProbe->startSecs = startSecs;
-
-   return newProbe->probeID;
- }
-
-int32
-VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr )
- { IntervalProbe *newProbe;
-
-   newProbe = create_generic_probe( nameStr, animPr );
-   
-   return newProbe->probeID;
- }
-
-int32
-VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
-               float64 binWidth, char   *nameStr, VirtProcr *animPr )
- { IntervalProbe *newProbe;
-   DblHist *hist;
-
-   newProbe = create_generic_probe( nameStr, animPr );
-   
-   hist =  makeDblHistogram( numBins, startValue, binWidth );
-   newProbe->hist = hist;
-   return newProbe->probeID;
- }
-
-void
-VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr )
- { IntervalProbe *probe;
-
-   //TODO: fix this To be in Master -- race condition
-   probe = _VMSMasterEnv->intervalProbes[ probeID ];
-
-   addValueIntoTable(probe->nameStr, probe, _VMSMasterEnv->probeNameHashTbl);
- }
-
-IntervalProbe *
-VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr )
- {
-   //TODO: fix this To be in Master -- race condition
-   return getValueFromTable( probeName, _VMSMasterEnv->probeNameHashTbl );
- }
-
-
-/*Everything is local to the animating procr, so no need for request, do
- * work locally, in the anim Pr
- */
-void
-VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animatingPr )
- { IntervalProbe *probe;
- 
-   probe = _VMSMasterEnv->intervalProbes[ probeID ];
-   probe->schedChoiceWasRecorded = TRUE;
-   probe->coreNum = animatingPr->coreAnimatedBy;
-   probe->procrID = animatingPr->procrID;
-   probe->procrCreateSecs = animatingPr->createPtInSecs;
- }
-
-/*Everything is local to the animating procr, so no need for request, do
- * work locally, in the anim Pr
- */
-void
-VMS_impl__record_interval_start_in_probe( int32 probeID )
- { IntervalProbe *probe;
-
-         DEBUG( dbgProbes, "record start of interval\n" )
-   probe = _VMSMasterEnv->intervalProbes[ probeID ];
-   gettimeofday( &(probe->startStamp), NULL );
- }
-
-
-/*Everything is local to the animating procr, so no need for request, do
- * work locally, in the anim Pr
- */
-void
-VMS_impl__record_interval_end_in_probe( int32 probeID )
- { IntervalProbe *probe;
-   struct timeval *endStamp, *startStamp;
-   float64 startSecs, endSecs;
-
-         DEBUG( dbgProbes, "record end of interval\n" )
-      //possible seg-fault if array resized by diff core right after this
-      // one gets probe..?  Something like that?  Might be safe.. don't care
-   probe = _VMSMasterEnv->intervalProbes[ probeID ];
-   gettimeofday( &(probe->endStamp), NULL);
-
-      //now turn into an interval held in a double
-   startStamp = &(probe->startStamp);
-   endStamp   = &(probe->endStamp);
-
-   startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 );
-   endSecs   = endStamp->tv_sec   + ( endStamp->tv_usec / 1000000.0 );
-
-   probe->interval  = endSecs - startSecs;
-   probe->startSecs = startSecs;
-   probe->endSecs   = endSecs;
-
-   if( probe->hist != NULL )
-    {
-         //if the interval is sane, then add to histogram
-      if( probe->interval < probe->hist->endOfRange * 10 )
-         addToDblHist( probe->interval, probe->hist );
-    }
- }
-
-void
-print_probe_helper( IntervalProbe *probe )
- {
-   printf( "\nprobe: %s, ",  probe->nameStr );
-   
-   
-   if( probe->schedChoiceWasRecorded )
-    { printf( "coreNum: %d, procrID: %d, procrCreated: %0.6f | ",
-              probe->coreNum, probe->procrID, probe->procrCreateSecs );
-    }
-
-   if( probe->endSecs == 0 ) //just a single point in time
-    {
-      printf( " time point: %.6f\n",
-              probe->startSecs - _VMSMasterEnv->createPtInSecs );
-    }
-   else if( probe->hist == NULL ) //just an interval
-    {
-      printf( " startSecs: %.6f interval: %.6f\n", 
-         (probe->startSecs - _VMSMasterEnv->createPtInSecs), probe->interval);
-    }
-   else  //a full histogram of intervals
-    {
-      printDblHist( probe->hist );
-    }
- }
-
-//TODO: change so pass around pointer to probe instead of its array-index..
-// will eliminate chance for timing of resize to cause problems with the
-// lookup -- even though don't think it actually can cause problems..
-// there's no need to pass index around -- have hash table for names, and
-// only need it once, then have ptr to probe..  the thing about enum the
-// index and use that as name is clunky in practice -- just hash.
-void
-VMS_impl__print_stats_of_probe( int32 probeID )
- { IntervalProbe *probe;
-
-   probe = _VMSMasterEnv->intervalProbes[ probeID ];
-
-   print_probe_helper( probe );
- }
-
-
-inline void doNothing(){};
-
-void
-generic_print_probe( void *_probe )
- { 
-   IntervalProbe *probe = (IntervalProbe *)_probe;
-   
-   //TODO segfault in printf
-   //print_probe_helper( probe );
- }
-
-void
-VMS_impl__print_stats_of_all_probes()
- {
-   forAllInDynArrayDo( _VMSMasterEnv->dynIntervalProbesInfo,
-                       &generic_print_probe );
-   fflush( stdout );
- }
-#endif
diff -r 7cff4e13d5c4 -r 651ee45615ae probes.h
--- a/probes.h	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,195 +0,0 @@
-/*
- *  Copyright 2009 OpenSourceStewardshipFoundation.org
- *  Licensed under GNU General Public License version 2
- *
- * Author: seanhalle@yahoo.com
- * 
- */
-
-#ifndef _PROBES_H
-#define	_PROBES_H
-#define _GNU_SOURCE
-
-#include "VMS_primitive_data_types.h"
-
-#include <sys/time.h>
-
-
-   //when STATS__TURN_ON_PROBES is defined allows using probes to measure
-   // time intervals.  The probes are macros that only compile to something
-   // when STATS__TURN_ON_PROBES is defined.  The probes are saved in the
-   // master env -- but only when this is defined.
-   //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday
-#define STATS__TURN_ON_PROBES
-//#define STATS__USE_TSC_PROBES
-#define STATS__USE_DBL_PROBES
-
-//typedef struct _IntervalProbe IntervalProbe; //in VMS.h
-
-struct _IntervalProbe
- {
-   char           *nameStr;
-   int32           probeID;
-
-   int32           schedChoiceWasRecorded;
-   int32           coreNum;
-   int32           procrID;
-   float64         procrCreateSecs;
-
-   #ifdef STATS__USE_TSC_PROBES
-   TSCount    startStamp;
-   TSCount    endStamp;
-   #else
-   struct timeval  startStamp;
-   struct timeval  endStamp;
-   #endif
-   float64         startSecs;
-   float64         endSecs;
-   float64         interval;
-   DblHist        *hist;//if NULL, then is single interval probe
- };
-
-
-//============================= Statistics ==================================
-
-   //Frequency of TS counts
-   //TODO: change freq for each machine
-#define TSCOUNT_FREQ 3180000000
-
-inline TSCount getTSCount();
-
-
-//======================== Probes =============================
-//
-// Use macros to allow turning probes off with a #define switch
-#ifdef STATS__ENABLE_PROBES
-int32
-VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr);
-#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \
-        VMS_impl__record_time_point_in_new_probe( nameStr, animPr )
-
-int32
-VMS_ext_impl__record_time_point_into_new_probe( char *nameStr );
-#define VMS_ext__record_time_point_into_new_probe( nameStr ) \
-        VMS_ext_impl__record_time_point_into_new_probe( nameStr )
-
-
-int32
-VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr );
-#define VMS__create_single_interval_probe( nameStr, animPr ) \
-        VMS_impl__create_single_interval_probe( nameStr, animPr )
-
-
-int32
-VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
-               float64 binWidth, char    *nameStr, VirtProcr *animPr );
-#define VMS__create_histogram_probe(      numBins, startValue,              \
-                                          binWidth, nameStr, animPr )       \
-        VMS_impl__create_histogram_probe( numBins, startValue,              \
-                                          binWidth, nameStr, animPr )
-void
-VMS_impl__free_probe( IntervalProbe *probe );
-#define VMS__free_probe( probe ) \
-        VMS_impl__free_probe( probe )
-
-void
-VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr );
-#define VMS__index_probe_by_its_name( probeID, animPr ) \
-        VMS_impl__index_probe_by_its_name( probeID, animPr )
-
-IntervalProbe *
-VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr );
-#define VMS__get_probe_by_name( probeID, animPr ) \
-        VMS_impl__get_probe_by_name( probeName, animPr )
-
-void
-VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr );
-#define VMS__record_sched_choice_into_probe( probeID, animPr ) \
-        VMS_impl__record_sched_choice_into_probe( probeID, animPr )
-
-void
-VMS_impl__record_interval_start_in_probe( int32 probeID );
-#define VMS__record_interval_start_in_probe( probeID ) \
-        VMS_impl__record_interval_start_in_probe( probeID )
-
-void
-VMS_impl__record_interval_end_in_probe( int32 probeID );
-#define VMS__record_interval_end_in_probe( probeID ) \
-        VMS_impl__record_interval_end_in_probe( probeID )
-
-void
-VMS_impl__print_stats_of_probe( int32 probeID );
-#define VMS__print_stats_of_probe( probeID ) \
-        VMS_impl__print_stats_of_probe( probeID )
-
-void
-VMS_impl__print_stats_of_all_probes();
-#define VMS__print_stats_of_all_probes() \
-        VMS_impl__print_stats_of_all_probes()
-
-
-#else
-int32
-VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr);
-#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \
-       0 /* do nothing */
-
-int32
-VMS_ext_impl__record_time_point_into_new_probe( char *nameStr );
-#define VMS_ext__record_time_point_into_new_probe( nameStr ) \
-       0 /* do nothing */
-
-
-int32
-VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr );
-#define VMS__create_single_interval_probe( nameStr, animPr ) \
-       0 /* do nothing */
-
-
-int32
-VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
-               float64 binWidth, char    *nameStr, VirtProcr *animPr );
-#define VMS__create_histogram_probe(      numBins, startValue,              \
-                                          binWidth, nameStr, animPr )       \
-       0 /* do nothing */
-
-void
-VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr );
-#define VMS__index_probe_by_its_name( probeID, animPr ) \
-        /* do nothing */
-
-IntervalProbe *
-VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr );
-#define VMS__get_probe_by_name( probeID, animPr ) \
-       NULL /* do nothing */
-
-void
-VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr );
-#define VMS__record_sched_choice_into_probe( probeID, animPr ) \
-        /* do nothing */
-
-void
-VMS_impl__record_interval_start_in_probe( int32 probeID );
-#define VMS__record_interval_start_in_probe( probeID ) \
-        /* do nothing */
-
-void
-VMS_impl__record_interval_end_in_probe( int32 probeID );
-#define VMS__record_interval_end_in_probe( probeID ) \
-        /* do nothing */
-
-inline void doNothing();
-void
-VMS_impl__print_stats_of_probe( int32 probeID );
-#define VMS__print_stats_of_probe( probeID ) \
-        doNothing/* do nothing */
-
-void
-VMS_impl__print_stats_of_all_probes();
-#define VMS__print_stats_of_all_probes \
-        doNothing/* do nothing */
-
-#endif   /* defined STATS__ENABLE_PROBES */
-
-#endif	/* _PROBES_H */
-
diff -r 7cff4e13d5c4 -r 651ee45615ae vmalloc.c
--- a/vmalloc.c	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,495 +0,0 @@
-/*
- *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
- *  Licensed under GNU General Public License version 2
- *
- * Author: seanhalle@yahoo.com
- *
- * Created on November 14, 2009, 9:07 PM
- */
-
-#include <malloc.h>
-#include <inttypes.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-#include "VMS.h"
-#include "Histogram/Histogram.h"
-
-/*Helper function
- *Insert a newly generated free chunk into the first spot on the free list.
- * The chunk is cast as a MallocProlog, so the various pointers in it are
- * accessed with C's help -- and the size of the prolog is easily added to
- * the pointer when a chunk is returned to the app -- so C handles changes
- * in pointer sizes among machines.
- *
- *The list head is a normal MallocProlog struct -- identified by its
- * prevChunkInFreeList being NULL -- the only one.
- *
- *The end of the list is identified by next chunk being NULL, as usual.
- */
-void inline
-add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead )
- { 
-   chunk->nextChunkInFreeList     = listHead->nextChunkInFreeList;
-   if( chunk->nextChunkInFreeList != NULL ) //if not last in free list
-      chunk->nextChunkInFreeList->prevChunkInFreeList = chunk;
-   chunk->prevChunkInFreeList     = listHead;
-   listHead->nextChunkInFreeList  = chunk;
- }
-
-
-/*This is sequential code, meant to only be called from the Master, not from
- * any slave VPs.
- *Search down list, checking size by the nextHigherInMem pointer, to find
- * first chunk bigger than size needed.
- *Shave off the extra and make it into a new free-list element, hook it in
- * then return the address of the found element plus size of prolog.
- *
- *Will find a
- */
-void *VMS__malloc( size_t sizeRequested )
- { MallocProlog *foundElem = NULL, *currElem, *newElem;
-   ssize_t        amountExtra, sizeConsumed,sizeOfFound;
-   uint32        foundElemIsTopOfHeap;
-
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef MEAS__TIME_MALLOC
-   int32 startStamp, endStamp;
-   saveLowTimeStampCountInto( startStamp );
-   #endif
-   //========================================================================
-   
-      //step up the size to be aligned at 16-byte boundary, prob better ways
-   sizeRequested = (sizeRequested + 16) & ~15;
-   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
-
-   while( currElem != NULL )
-    {    //check if size of currElem is big enough
-      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
-      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
-      if( amountExtra > 0 )
-       {    //found it, get out of loop
-         foundElem = currElem;
-         currElem = NULL;
-       }
-      else
-         currElem = currElem->nextChunkInFreeList;
-    }
-   
-   if( foundElem == NULL )
-    { ERROR("\nmalloc failed\n")
-      return (void *)NULL;  //indicates malloc failed
-    }
-      //Using a kludge to identify the element that is the top chunk in the
-      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
-      // save addr of start of heap in head's nextLowerInMem
-      //Will handle top of Heap specially
-   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
-                          _VMSMasterEnv->freeListHead->nextHigherInMem;
-   
-      //before shave off and try to insert new elem, remove found elem
-      //note, foundElem will never be the head, so always has valid prevChunk
-   foundElem->prevChunkInFreeList->nextChunkInFreeList =
-                                              foundElem->nextChunkInFreeList;
-   if( foundElem->nextChunkInFreeList != NULL )
-    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
-                                              foundElem->prevChunkInFreeList;
-    }
-   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
-   
-      //if enough, turn extra into new elem & insert it
-   if( amountExtra > 64 )
-    {   //make new elem by adding to addr of curr elem then casting
-        sizeConsumed = sizeof(MallocProlog) + sizeRequested; 
-        newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
-        newElem->nextLowerInMem    = foundElem; //This is evil (but why?) 
-        newElem->nextHigherInMem   = foundElem->nextHigherInMem; //This is evil (but why?)
-        foundElem->nextHigherInMem = newElem;
-        if( ! foundElemIsTopOfHeap )
-        {  //there is no next higher for top of heap, so can't write to it
-           newElem->nextHigherInMem->nextLowerInMem = newElem;
-        }
-        add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
-    }
-   else
-    {
-      sizeConsumed = sizeOfFound;
-    }
-  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
-
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef MEAS__TIME_MALLOC
-   saveLowTimeStampCountInto( endStamp );
-   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
-   #endif
-   //========================================================================
-
-      //skip over the prolog by adding its size to the pointer return
-   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
- }
-
-/*This is sequential code, meant to only be called from the Master, not from
- * any slave VPs.
- *Search down list, checking size by the nextHigherInMem pointer, to find
- * first chunk bigger than size needed.
- *Shave off the extra and make it into a new free-list element, hook it in
- * then return the address of the found element plus size of prolog.
- *
- * The difference to the regular malloc is, that all the allocated chunks are
- * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk
- * before the aligned chunk.
- */
-void *VMS__malloc_aligned( size_t sizeRequested )
- { MallocProlog *foundElem = NULL, *currElem, *newElem;
-   ssize_t        amountExtra, sizeConsumed,sizeOfFound,prevAmount;
-   uint32        foundElemIsTopOfHeap;
-
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef MEAS__TIME_MALLOC
-   uint32 startStamp, endStamp;
-   saveLowTimeStampCountInto( startStamp );
-   #endif
-   //========================================================================
-   
-      //step up the size to be multiple of the cache line size
-   sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1);
-   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
-
-   while( currElem != NULL )
-    {    //check if size of currElem is big enough
-      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
-      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
-      if( amountExtra > 0 )
-       {    
-         //look if the found element is already aligned
-         if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){
-             //found it, get out of loop
-             foundElem = currElem;
-             break;
-         }else{
-             //find first aligned address and check if it's still big enough
-             //check also if the space before the aligned address is big enough
-             //for a new element
-             void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1)));
-             prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem;
-             sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog);
-             amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog);
-             if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){
-                 //found suitable element
-                 //create new previous element and exit loop
-                 MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1;
-                 
-                 //insert new element into free list
-                 if(currElem->nextChunkInFreeList != NULL)
-                     currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem;                     
-                 newAlignedElem->prevChunkInFreeList = currElem;
-                 newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList;
-                 currElem->nextChunkInFreeList = newAlignedElem;
-                 
-                 //set higherInMem and lowerInMem
-                 newAlignedElem->nextHigherInMem = currElem->nextHigherInMem;
-                 foundElemIsTopOfHeap = currElem->nextHigherInMem ==
-                          _VMSMasterEnv->freeListHead->nextHigherInMem;
-                 if(!foundElemIsTopOfHeap)
-                     currElem->nextHigherInMem->nextLowerInMem = newAlignedElem;
-                 currElem->nextHigherInMem = newAlignedElem;
-                 newAlignedElem->nextLowerInMem = currElem;
-                 
-                 //Found new element leaving loop
-                 foundElem = newAlignedElem;
-                 break;
-             }
-         }
-         
-       }
-       currElem = currElem->nextChunkInFreeList;
-    }
-
-   if( foundElem == NULL )
-    { ERROR("\nmalloc failed\n")
-      return (void *)NULL;  //indicates malloc failed
-    }
-      //Using a kludge to identify the element that is the top chunk in the
-      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
-      // save addr of start of heap in head's nextLowerInMem
-      //Will handle top of Heap specially
-   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
-                          _VMSMasterEnv->freeListHead->nextHigherInMem;
-
-      //before shave off and try to insert new elem, remove found elem
-      //note, foundElem will never be the head, so always has valid prevChunk
-   foundElem->prevChunkInFreeList->nextChunkInFreeList =
-                                              foundElem->nextChunkInFreeList;
-   if( foundElem->nextChunkInFreeList != NULL )
-    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
-                                              foundElem->prevChunkInFreeList;
-    }
-   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
-   
-      //if enough, turn extra into new elem & insert it
-   if( amountExtra > 64 )
-    {    //make new elem by adding to addr of curr elem then casting
-      sizeConsumed = sizeof(MallocProlog) + sizeRequested;
-      newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
-      newElem->nextHigherInMem   = foundElem->nextHigherInMem;
-      newElem->nextLowerInMem    = foundElem;
-      foundElem->nextHigherInMem = newElem;
-      
-      if( ! foundElemIsTopOfHeap )
-       {    //there is no next higher for top of heap, so can't write to it
-         newElem->nextHigherInMem->nextLowerInMem = newElem;
-       }
-      add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
-    }
-   else
-    {
-      sizeConsumed = sizeOfFound;
-    }
-  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
-
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef MEAS__TIME_MALLOC
-   saveLowTimeStampCountInto( endStamp );
-   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
-   #endif
-   //========================================================================
-
-      //skip over the prolog by adding its size to the pointer return
-   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
- }
-
-
-/*This is sequential code -- only to be called from the Master
- * When free, subtract the size of prolog from pointer, then cast it to a
- * MallocProlog.  Then check the nextLower and nextHigher chunks to see if
- * one or both are also free, and coalesce if so, and if neither free, then
- * add this one to free-list.
- */
-void
-VMS__free( void *ptrToFree )
- { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem;
-   size_t         sizeOfElem;
-   uint32         lowerExistsAndIsFree, higherExistsAndIsFree;
-
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef MEAS__TIME_MALLOC
-   int32 startStamp, endStamp;
-   saveLowTimeStampCountInto( startStamp );
-   #endif
-   //========================================================================
-
-   if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem ||
-       ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem )
-    {    //outside the range of data owned by VMS's malloc, so do nothing
-      return;
-    }
-      //subtract size of prolog to get pointer to prolog, then cast
-   elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog));
-   sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree);
-
-   if( elemToFree->prevChunkInFreeList != NULL )
-    { printf( "error: freeing same element twice!" ); exit(1);
-    }
-
-   _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem;
-
-   nextLowerElem  = elemToFree->nextLowerInMem;
-   nextHigherElem = elemToFree->nextHigherInMem;
-
-   if( nextHigherElem == NULL )
-      higherExistsAndIsFree = FALSE;
-   else //okay exists, now check if in the free-list by checking back ptr
-      higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL);
-    
-   if( nextLowerElem == NULL )
-      lowerExistsAndIsFree = FALSE;
-   else //okay, it exists, now check if it's free
-      lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL);
-    
-
-      //now, know what exists and what's free
-   if( lowerExistsAndIsFree )
-    { if( higherExistsAndIsFree )
-       {    //both exist and are free, so coalesce all three
-            //First, remove higher from free-list
-         nextHigherElem->prevChunkInFreeList->nextChunkInFreeList =
-                                         nextHigherElem->nextChunkInFreeList;
-         if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list?
-            nextHigherElem->nextChunkInFreeList->prevChunkInFreeList =
-                                         nextHigherElem->prevChunkInFreeList;
-            //Now, fix-up sequence-in-mem list -- by side-effect, this also
-            // changes size of the lower elem, which is still in free-list
-         nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem;
-         if( nextHigherElem->nextHigherInMem !=
-             _VMSMasterEnv->freeListHead->nextHigherInMem )
-            nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem;
-            //notice didn't do anything to elemToFree -- it simply is no
-            // longer reachable from any of the lists.  Wonder if could be a
-            // security leak because left valid addresses in it,
-            // but don't care for now.
-       }
-      else
-       {    //lower is the only of the two that exists and is free,
-            //In this case, no adjustment to free-list, just change mem-list.
-            // By side-effect, changes size of the lower elem
-         nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem;
-         if( elemToFree->nextHigherInMem !=
-             _VMSMasterEnv->freeListHead->nextHigherInMem )
-            elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem;
-       }
-    }
-   else
-    {    //lower either doesn't exist or isn't free, so check higher
-      if( higherExistsAndIsFree )
-       {    //higher exists and is the only of the two free
-            //First, in free-list, replace higher elem with the one to free
-         elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList;
-         elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList;
-         elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree;
-         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
-            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
-            //Now chg mem-list. By side-effect, changes size of elemToFree
-         elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem;
-         if( elemToFree->nextHigherInMem !=
-             _VMSMasterEnv->freeListHead->nextHigherInMem )
-            elemToFree->nextHigherInMem->nextLowerInMem = elemToFree;
-       }
-      else
-       {    //neither lower nor higher is availabe to coalesce so add to list
-            // this makes prev chunk ptr non-null, which indicates it's free
-         elemToFree->nextChunkInFreeList =
-                            _VMSMasterEnv->freeListHead->nextChunkInFreeList;
-         _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree;
-         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
-            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
-         elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead;
-       }
-    }
-   //============================= MEASUREMENT STUFF ========================
-   #ifdef MEAS__TIME_MALLOC
-   saveLowTimeStampCountInto( endStamp );
-   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist );
-   #endif
-   //========================================================================
-
- }
-
-
-/*Allocates memory from the external system -- higher overhead
- *
- *Because of Linux's malloc throwing bizarre random faults when malloc is
- * used inside a VMS virtual processor, have to pass this as a request and
- * have the core loop do it when it gets around to it -- will look for these
- * chores leftover from the previous animation of masterVP the next time it
- * goes to animate the masterVP -- so it takes two separate masterVP
- * animations, separated by work, to complete an external malloc or
- * external free request.
- *
- *Thinking core loop accepts signals -- just looks if signal-location is
- * empty or not --
- */
-void *
-VMS__malloc_in_ext( size_t sizeRequested )
- {
- /*
-      //This is running in the master, so no chance for multiple cores to be
-      // competing for the core's flag.
-   if(  *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 )
-    {    //something has already signalled to core loop, so save the signal
-         // and look, next time master animated, to see if can send it.
-         //Note, the addr to put a signal is in the coreloop's frame, so just
-         // checks it each time through -- make it volatile to avoid GCC
-         // optimizations -- it's a coreloop local var that only changes
-         // after jumping away.  The signal includes the addr to send the
-         //return to -- even if just empty return completion-signal
-         //
-         //save the signal in some queue that the master looks at each time
-         // it starts up -- one loc says if empty for fast common case --
-         //something like that -- want to hide this inside this call -- but
-         // think this has to come as a request -- req handler gives procr
-         // back to master loop, which gives it back to req handler at point
-         // it sees that core loop has sent return signal.  Something like
-         // that.
-      saveTheSignal
-
-    }
-  coreSigData->type = malloc;
-  coreSigData->sizeToMalloc = sizeRequested;
-  coreSigData->locToSignalCompletion = &figureOut;
-   _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData;
-  */
-      //just risk system-stack faults until get this figured out
-   return malloc( sizeRequested );
- }
-
-
-/*Frees memory that was allocated in the external system -- higher overhead
- *
- *As noted in external malloc comment, this is clunky 'cause the free has
- * to be called in the core loop.
- */
-void
-VMS__free_in_ext( void *ptrToFree )
- {
-      //just risk system-stack faults until get this figured out
-   free( ptrToFree );
-
-      //TODO: fix this -- so 
- }
-
-
-/*Designed to be called from the main thread outside of VMS, during init
- */
-MallocProlog *
-VMS_ext__create_free_list()
- { MallocProlog *freeListHead, *firstChunk;
-
-      //Note, this is running in the main thread -- all increases in malloc
-      // mem and all frees of it must be done in this thread, with the
-      // thread's original stack available
-   freeListHead = malloc( sizeof(MallocProlog) );
-   firstChunk   = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
-   if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);}
-   
-   //Touch memory to avoid page faults
-   void *ptr,*endPtr; 
-   endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE;
-   for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE)
-   {
-       *(char*)ptr = 0;
-   }
-
-   freeListHead->prevChunkInFreeList = NULL;
-      //Use this addr to free the heap when cleanup
-   freeListHead->nextLowerInMem      = firstChunk;
-      //to identify top-of-heap elem, compare this addr to elem's next higher
-   freeListHead->nextHigherInMem     = (void*)( (uintptr_t)firstChunk +
-                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
-   freeListHead->nextChunkInFreeList = firstChunk;
-
-   firstChunk->nextChunkInFreeList   = NULL;
-   firstChunk->prevChunkInFreeList   = freeListHead;
-      //next Higher has to be set to top of chunk, so can calc size in malloc
-   firstChunk->nextHigherInMem       = (void*)( (uintptr_t)firstChunk +
-                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
-   firstChunk->nextLowerInMem        = NULL; //identifies as bott of heap
-   
-   _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet
-
-   return freeListHead;
- }
-
-
-/*Designed to be called from the main thread outside of VMS, during cleanup
- */
-void
-VMS_ext__free_free_list( MallocProlog *freeListHead )
- {    
-      //stashed a ptr to the one and only bug chunk malloc'd from OS in the
-      // free list head's next lower in mem pointer
-   free( freeListHead->nextLowerInMem );
-
-   //don't free the head -- it'll be in an array eventually -- free whole
-   // array when all the free lists linked from it have already been freed
- }
-
diff -r 7cff4e13d5c4 -r 651ee45615ae vmalloc.h
--- a/vmalloc.h	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-/*
- *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
- *  Licensed under GNU General Public License version 2
- *
- * Author: seanhalle@yahoo.com
- *
- * Created on November 14, 2009, 9:07 PM
- */
-
-#ifndef _VMALLOC_H
-#define	_VMALLOC_H
-
-#include <malloc.h>
-#include <inttypes.h>
-#include "VMS_primitive_data_types.h"
-
-typedef struct _MallocProlog MallocProlog;
-
-struct _MallocProlog
- {
-   MallocProlog *nextChunkInFreeList;
-   MallocProlog *prevChunkInFreeList;
-   MallocProlog *nextHigherInMem;
-   MallocProlog *nextLowerInMem;
- };
-//MallocProlog
-
-typedef struct
- {
-   MallocProlog *firstChunkInFreeList;
-   int32         numInList; //TODO not used
- }
-FreeListHead;
-
-void *
-VMS__malloc( size_t sizeRequested );
-
-void *
-VMS__malloc_aligned( size_t sizeRequested );
-
-void
-VMS__free( void *ptrToFree );
-
-/*Allocates memory from the external system -- higher overhead
- */
-void *
-VMS__malloc_in_ext( size_t sizeRequested );
-
-/*Frees memory that was allocated in the external system -- higher overhead
- */
-void
-VMS__free_in_ext( void *ptrToFree );
-
-
-MallocProlog *
-VMS_ext__create_free_list();
-
-void
-VMS_ext__free_free_list( MallocProlog *freeListHead );
-
-#endif
\ No newline at end of file
diff -r 7cff4e13d5c4 -r 651ee45615ae vutilities.c
--- a/vutilities.c	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-/*
- *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
- *  Licensed under GNU General Public License version 2
- *
- * Author: seanhalle@yahoo.com
- *
- * Created on November 14, 2009, 9:07 PM
- */
-
-#include <malloc.h>
-#include <stdlib.h>
-
-#include "VMS.h"
-
-
-inline char *
-VMS__strDup( char *str )
- { char *retStr;
-
-   retStr = VMS__malloc( strlen(str) + 1 );
-   if( str == NULL ) return str;
-   strcpy( retStr, str );
-
-   return retStr;
- }
diff -r 7cff4e13d5c4 -r 651ee45615ae vutilities.h
--- a/vutilities.h	Fri Feb 10 12:05:17 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-/*
- *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
- *  Licensed under GNU General Public License version 2
- *
- * Author: seanhalle@yahoo.com
- *
- * Created on November 14, 2009, 9:07 PM
- */
-
-
-#ifndef  _UTILITIES_H
-#define	_UTILITIES_H
-
-#include <string.h>
-#include "VMS_primitive_data_types.h"
-
-inline char *
-VMS__strDup( char *str );
- 
-#endif