# HG changeset patch
# User Me
# Date 1288497276 25200
# Node ID 8f7141a9272e10a0e7cd3c9d23d7db394510e410
# Parent  5388f1c2da6f17bfa3ba1bbbac67ea2062f7891a
Added VMS__malloc and probes, and major re-factoring to separate mallocs

diff -r 5388f1c2da6f -r 8f7141a9272e CoreLoop.c
--- a/CoreLoop.c	Thu Oct 14 17:07:23 2010 -0700
+++ b/CoreLoop.c	Sat Oct 30 20:54:36 2010 -0700
@@ -83,11 +83,14 @@
 CoreLoopStartPt:
    
       //Get virtual processor from queue
-      //_VMSWorkQ must be a global, static volatile var, so not kept in reg,
+      //The Q must be a global, static volatile var, so not kept in reg,
       // which forces reloading the pointer after each jmp to this point
    readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
 
    currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ );
+   
+   if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
+
    int tries = 0; int gotLock = 0;
    while( currPr == NULL )
     {    //no VPs ready to animate, so run MasterVP --later make "try Master"
@@ -101,24 +104,20 @@
          //check if get the MasterLock
       gotLock = __sync_bool_compare_and_swap( &(_VMSMasterEnv->masterLock), \
                                                  UNLOCKED, LOCKED );
-
       if( gotLock )
-       {
-            //run own MasterVP -- when its done, unlocks MasterLock and
+       {    //run own MasterVP -- when its done, unlocks MasterLock and
             // jumps back to coreLoops's startPt
          currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
+         if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 10000 )
+            printf("10000 back to back MasterVP\n");
+         _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
          break;  //end while -- have a VP to animate now
        }
-         //Aug 24, 2010 -- changed so each core loop only gets work scheduled
-         // by its own master, so now stay in loop until get lock
-//      currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ );
       
       tries++;
-//      if( tries % 10000 == 0 ) printf("empty tries: %d\n", tries/10000 );
-      if( tries % READYTOANIMATE_RETRIES == 0 ) pthread_yield();
+      if( tries > READYTOANIMATE_RETRIES ) { tries = 0; pthread_yield(); }
     }
    
-//   currPr->coreAnimatedBy  = coreLoopThdParams->coreNum;
 
       //switch to virt procr's stack and frame ptr then jump to virt procr fn
    void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \
@@ -205,11 +204,17 @@
    readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
    currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ );
    if( currPr == NULL )
+    { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] )
+         printf("back to back MasterVP\n");
+      _VMSMasterEnv->numMasterInARow[thisCoresIdx] = TRUE;
       currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
-   
+    }
+   else
+    _VMSMasterEnv->numMasterInARow[thisCoresIdx] = FALSE;
 
-//   printf("core %d loop procr addr: %d\n", coreLoopThdParams->coreNum, \
-//       (int)currPr ); fflush(stdin);
+         PRINT2_DEBUG("core %d loop procr addr: %d\n",\
+                       coreLoopThdParams->coreNum,    \
+                       (int)currPr )
 
       //switch to virt procr's stack and frame ptr then jump to virt procr
    void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \
diff -r 5388f1c2da6f -r 8f7141a9272e MasterLoop.c
--- a/MasterLoop.c	Thu Oct 14 17:07:23 2010 -0700
+++ b/MasterLoop.c	Sat Oct 30 20:54:36 2010 -0700
@@ -185,4 +185,3 @@
                 );//can probably make clobber list empty -- but safe for now
  }
 
-
diff -r 5388f1c2da6f -r 8f7141a9272e VMS.c
--- a/VMS.c	Thu Oct 14 17:07:23 2010 -0700
+++ b/VMS.c	Sat Oct 30 20:54:36 2010 -0700
@@ -6,7 +6,9 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <malloc.h>
+#include <sys/time.h>
 
 #include "VMS.h"
 #include "Queue_impl/BlockingQueue.h"
@@ -28,6 +30,10 @@
 void
 create_the_coreLoop_OS_threads();
 
+MallocProlog *
+create_free_list();
+
+
 pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER;
 pthread_cond_t  suspend_cond  = PTHREAD_COND_INITIALIZER;
 
@@ -100,67 +106,47 @@
    allSchedSlots    = malloc( NUM_CORES * sizeof(SchedSlot *) );
 
    for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
-    {
+    {    //running in main thread -- normal malloc inside makeSRSWQ
       readyToAnimateQs[ coreIdx ] = makeSRSWQ();
       
-         //Q: should give masterVP core-specific into as its init data?
-      masterVPs[ coreIdx ] = VMS__create_procr( &masterLoop, masterEnv );
+         //Q: should give masterVP core-specific info as its init data?
+      masterVPs[ coreIdx ] = VMS_ext__create_procr( &masterLoop, masterEnv );
       masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx;
       allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core
+      _VMSMasterEnv->numMasterInARow[ coreIdx ] = FALSE;
     }
    _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs;
    _VMSMasterEnv->masterVPs        = masterVPs;
+   _VMSMasterEnv->masterLock       = UNLOCKED;
    _VMSMasterEnv->allSchedSlots    = allSchedSlots;
-
+   _VMSMasterEnv->numProcrsCreated = 0;
 
 
       //Aug 19, 2010:  no longer need to place initial masterVP into queue
       // because coreLoop now controls -- animates its masterVP when no work
 
+   _VMSMasterEnv->freeListHead        = VMS__create_free_list();
+   _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet
 
-   //==================== malloc substitute ========================
-   //
-   //Testing whether malloc is using thread-local storage and therefore
-   // causing unreliable behavior.
-   //Just allocate a massive chunk of memory and roll own malloc/free and
-   // make app use VMS__malloc_to, which will suspend and perform malloc
-   // in the master, taking from this massive chunk.
+   //============================= MEASUREMENT STUFF ========================
+   #ifdef STATS__TURN_ON_PROBES
+      //creates intervalProbes array and sets pointer to it in masterEnv too
+   _VMSMasterEnv->dynIntervalProbesInfo =
+                  makeDynArrayOfSize( &(_VMSMasterEnv->intervalProbes), 20 );
 
-//   initFreeList();
+   _VMSMasterEnv->probeNameHashTbl = makeHashTable( 1000, NULL );
+   _VMSMasterEnv->masterCreateProbeID =
+       VMS_ext__record_time_point_into_new_probe( "masterCreateProbe" );
+      //Also put creation time directly into master env, for fast retrieval
+   struct timeval timeStamp;
+   gettimeofday( &(timeStamp), NULL);
+   _VMSMasterEnv->createPtInSecs =
+                           timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0);
+   #endif
+   //========================================================================
 
  }
 
-/*
-void
-initMasterMalloc()
- {
-   _VMSMasterEnv->mallocChunk = malloc( MASSIVE_MALLOC_SIZE );
-
-      //The free-list element is the first several locations of an
-      // allocated chunk -- the address given to the application is pre-
-      // pended with both the ownership structure and the free-list struc.
-      //So, write the values of these into the first locations of
-      // mallocChunk -- which marks it as free & puts in its size.
-   listElem = (FreeListElem *)_VMSMasterEnv->mallocChunk;
-   listElem->size = MASSIVE_MALLOC_SIZE - NUM_PREPEND_BYTES
-   listElem->next = NULL;
- }
-
-void
-dissipateMasterMalloc()
- {
-      //Just foo code -- to get going -- doing as if free list were link-list
-   currElem = _VMSMasterEnv->freeList;
-   while( currElem != NULL )
-    {
-      nextElem = currElem->next;
-      masterFree( currElem );
-      currElem = nextElem;
-    }
-   free( _VMSMasterEnv->freeList );
- }
- */
-
 SchedSlot **
 create_sched_slots()
  { SchedSlot  **schedSlots;
@@ -213,7 +199,7 @@
                         thdAttrs,
                        &coreLoop,
                (void *)(coreLoopThdParams[coreIdx]) );
-      if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(0);}
+      if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);}
     }
  }
 
@@ -225,12 +211,6 @@
 VMS__start_the_work_then_wait_until_done()
  { int coreIdx;
       //Start the core loops running
-//===========================================================================
-   TSCount  startCount, endCount;
-   unsigned long long count = 0, freq = 0;
-   double   runTime;
-
-      startCount = getTSCount();
    
       //tell the core loop threads that setup is complete
       //get lock, to lock out any threads still starting up -- they'll see
@@ -251,14 +231,6 @@
       //NOTE: do not clean up VMS env here -- semantic layer has to have
       // a chance to clean up its environment first, then do a call to free
       // the Master env and rest of VMS locations
-
-
-      endCount = getTSCount();
-      count = endCount - startCount;
-
-      runTime = (double)count / (double)TSCOUNT_FREQ;
-
-      printf("\n Time startup to shutdown: %f\n", runTime); fflush( stdin );
  }
 
 /*Only difference between version with an OS thread pinned to each core and
@@ -285,37 +257,73 @@
  * animator state to return to --
  *
  */
-VirtProcr *
-VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData )
- { VirtProcr *newPr;
-   char      *stackLocs, *stackPtr;
+inline VirtProcr *
+create_procr_helper( VirtProcr *newPr,       VirtProcrFnPtr  fnPtr,
+                     void      *initialData, char           *stackLocs )
+ {
+   char  *stackPtr;
 
-   newPr              = malloc( sizeof(VirtProcr) );
-   newPr->procrID     = numProcrsCreated++;
+   newPr->procrID     = _VMSMasterEnv->numProcrsCreated++;
    newPr->nextInstrPt = fnPtr;
    newPr->initialData = initialData;
    newPr->requests    = NULL;
    newPr->schedSlot   = NULL;
-//   newPr->coreLoopStartPt = _VMSMasterEnv->coreLoopStartPt;
 
       //fnPtr takes two params -- void *initData & void *animProcr
       //alloc stack locations, make stackPtr be the highest addr minus room
       // for 2 params + return addr.  Return addr (NULL) is in loc pointed to
       // by stackPtr, initData at stackPtr + 4 bytes, animatingPr just above
-   stackLocs = malloc( VIRT_PROCR_STACK_SIZE );
-   if(stackLocs == 0)
-   {perror("malloc stack"); exit(1);}
-   newPr->startOfStack = stackLocs;
    stackPtr = ( (char *)stackLocs + VIRT_PROCR_STACK_SIZE - 0x10 );
+   
       //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp
    *( (int *)stackPtr + 2 ) = (int) newPr; //rightmost param -- 32bit pointer
    *( (int *)stackPtr + 1 ) = (int) initialData;  //next  param to left
    newPr->stackPtr = stackPtr; //core loop will switch to this, then
    newPr->framePtr = stackPtr; //suspend loop will save new stack & frame ptr
 
+   //============================= MEASUREMENT STUFF ========================
+   #ifdef STATS__TURN_ON_PROBES
+   struct timeval timeStamp;
+   gettimeofday( &(timeStamp), NULL);
+   newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0);
+   #endif
+   //========================================================================
+
    return newPr;
  }
 
+inline VirtProcr *
+VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData )
+ { VirtProcr *newPr;
+   char      *stackLocs;
+
+   newPr      = VMS__malloc( sizeof(VirtProcr) );
+   stackLocs  = VMS__malloc( VIRT_PROCR_STACK_SIZE );
+   if( stackLocs == 0 )
+    { perror("VMS__malloc stack"); exit(1); }
+   newPr->startOfStack = stackLocs;
+
+   return create_procr_helper( newPr, fnPtr, initialData, stackLocs );
+ }
+
+/* "ext" designates that it's for use outside the VMS system -- should only
+ * be called from main thread or other thread -- never from code animated by
+ * a VMS virtual processor.
+ */
+inline VirtProcr *
+VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData )
+ { VirtProcr *newPr;
+   char      *stackLocs;
+
+   newPr      = malloc( sizeof(VirtProcr) );
+   stackLocs  = malloc( VIRT_PROCR_STACK_SIZE );
+   if( stackLocs == 0 )
+    { perror("malloc stack"); exit(1); }
+   newPr->startOfStack = stackLocs;
+
+   return create_procr_helper( newPr, fnPtr, initialData, stackLocs );
+ }
+
 
 /*there is a label inside this function -- save the addr of this label in
  * the callingPr struc, as the pick-up point from which to start the next
@@ -339,7 +347,6 @@
 
       //return ownership of the virt procr and sched slot to Master virt pr
    animatingPr->schedSlot->workIsDone = TRUE;
-//   coreIdx = callingPr->coreAnimatedBy;
 
    stackPtrAddr      = &(animatingPr->stackPtr);
    framePtrAddr      = &(animatingPr->framePtr);
@@ -390,6 +397,31 @@
 
 
 
+/*For this implementation of VMS, it may not make much sense to have the
+ * system of requests for creating a new processor done this way.. but over
+ * the scope of single-master, multi-master, mult-tasking, OS-implementing,
+ * distributed-memory, and so on, this gives VMS implementation a chance to
+ * do stuff before suspend, in the AppVP, and in the Master before the plugin
+ * is called, as well as in the lang-lib before this is called, and in the
+ * plugin.  So, this gives both VMS and language implementations a chance to
+ * intercept at various points and do order-dependent stuff.
+ *Having a standard VMSNewPrReqData struc allows the language to create and
+ * free the struc, while VMS knows how to get the newPr if it wants it, and
+ * it lets the lang have lang-specific data related to creation transported
+ * to the plugin.
+ */
+void
+VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr )
+ { VMSReqst req;
+
+   req.reqType          = createReq;
+   req.semReqData       = semReqData;
+   req.nextReqst        = reqstingPr->requests;
+   reqstingPr->requests = &req;
+
+   VMS__suspend_procr( reqstingPr );
+ }
+
 
 /*
  *This adds a request to dissipate, then suspends the processor so that the
@@ -414,80 +446,93 @@
  */
 void
 VMS__dissipate_procr( VirtProcr *procrToDissipate )
- { VMSReqst *req;
+ { VMSReqst req;
 
-   req = malloc( sizeof(VMSReqst) );
-//   req->virtProcrFrom      = callingPr;
-   req->reqType               = dissipate;
-   req->nextReqst             = procrToDissipate->requests;
-   procrToDissipate->requests = req;
-   
+   req.reqType                = dissipate;
+   req.nextReqst              = procrToDissipate->requests;
+   procrToDissipate->requests = &req;
+
    VMS__suspend_procr( procrToDissipate );
-}
+ }
+
+
+/* "ext" designates that it's for use outside the VMS system -- should only
+ * be called from main thread or other thread -- never from code animated by
+ * a VMS virtual processor.
+ *
+ *Use this version to dissipate VPs created outside the VMS system.
+ */
+void
+VMS_ext__dissipate_procr( VirtProcr *procrToDissipate )
+ {
+      //NOTE: initialData was given to the processor, so should either have
+      // been alloc'd with VMS__malloc, or freed by the level above animPr.
+      //So, all that's left to free here is the stack and the VirtProcr struc
+      // itself
+      //Note, should not stack-allocate initial data -- no guarantee, in
+      // general that creating processor will outlive ones it creates.
+   free( procrToDissipate->startOfStack );
+   free( procrToDissipate );
+ }
+
 
 
 /*This inserts the semantic-layer's request data into standard VMS carrier
+ * request data-struct is allocated on stack of this call & ptr to it sent
+ * to plugin
  */
 inline void
 VMS__add_sem_request( void *semReqData, VirtProcr *callingPr )
- { VMSReqst *req;
+ { VMSReqst req;
 
-   req = malloc( sizeof(VMSReqst) );
-//   req->virtProcrFrom      = callingPr;
-   req->reqType        = semantic;
-   req->semReqData     = semReqData;
-   req->nextReqst      = callingPr->requests;
-   callingPr->requests = req;
+   req.reqType         = semantic;
+   req.semReqData      = semReqData;
+   req.nextReqst       = callingPr->requests;
+   callingPr->requests = &req;
  }
 
+/*This inserts the semantic-layer's request data into standard VMS carrier
+ * request data-struct is allocated on stack of this call & ptr to it sent
+ * to plugin
+ *Then it does suspend, to cause request to be sent.
+ */
+inline void
+VMS__send_sem_request( void *semReqData, VirtProcr *callingPr )
+ { VMSReqst req;
 
-/*Use this to get first request before starting request handler's loop
+   req.reqType         = semantic;
+   req.semReqData      = semReqData;
+   req.nextReqst       = callingPr->requests;
+   callingPr->requests = &req;
+   
+   VMS__suspend_procr( callingPr );
+ }
+
+
+inline void
+VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr )
+ { VMSReqst req;
+
+   req.reqType         = VMSSemantic;
+   req.semReqData      = semReqData;
+   req.nextReqst       = callingPr->requests; //gab any other preceeding 
+   callingPr->requests = &req;
+
+   VMS__suspend_procr( callingPr );
+ }
+
+
+/*
  */
 VMSReqst *
-VMS__take_top_request_from( VirtProcr *procrWithReq )
- { VMSReqst *req;
-
-   req = procrWithReq->requests;
-   if( req == NULL ) return req;
-
-   procrWithReq->requests = procrWithReq->requests->nextReqst;
-   return req;
- }
-
-/*A subtle bug due to freeing then accessing "next" after freed caused this
- * form of call to be put in -- so call this at end of request handler loop
- * that iterates through the requests.
- */
-VMSReqst *
-VMS__free_top_and_give_next_request_from( VirtProcr *procrWithReq )
+VMS__take_next_request_out_of( VirtProcr *procrWithReq )
  { VMSReqst *req;
 
    req = procrWithReq->requests;
    if( req == NULL ) return NULL;
 
    procrWithReq->requests = procrWithReq->requests->nextReqst;
-   VMS__free_request( req );
-   return procrWithReq->requests;
- }
-
-
-//TODO: add a semantic-layer supplied "freer" for the semantic-data portion
-// of a request -- IE call with both a virt procr and a fn-ptr to request
-// freer (also maybe put sem request freer as a field in virt procr?)
-//MeasVMS relies right now on this only freeing VMS layer of request -- the
-// semantic portion of request is alloc'd and freed by request handler
-void
-VMS__free_request( VMSReqst *req )
- {
-   free( req );
- }
-
-
-
-inline int
-VMS__isSemanticReqst( VMSReqst *req )
- {
-   return ( req->reqType == semantic );
+   return req;
  }
 
 
@@ -497,36 +542,44 @@
    return req->semReqData;
  }
 
-inline int
-VMS__isDissipateReqst( VMSReqst *req )
- {
-   return ( req->reqType == dissipate );
- }
 
-inline int
-VMS__isCreateReqst( VMSReqst *req )
- {
-   return ( req->reqType == regCreated );
- }
 
-void
-VMS__send_req_to_register_new_procr(VirtProcr *newPr, VirtProcr *reqstingPr)
- { VMSReqst *req;
+/* This is for OS requests and VMS infrastructure requests, such as to create
+ *  a probe -- a probe is inside the heart of VMS-core, it's not part of any
+ *  language -- but it's also a semantic thing that's triggered from and used
+ *  in the application.. so it crosses abstractions..  so, need some special
+ *  pattern here for handling such requests.
+ * This is called from the language's request handler when it sees a request
+ *  of type VMSSemReq
+ */
+void inline
+VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv,
+                       ResumePrFnPtr resumePrFnPtr )
+ { VMSSemReq     *semReq;
+   IntervalProbe *newProbe;
+   int32          nameLen;
 
-   req                  = malloc( sizeof(VMSReqst) );
-   req->reqType         = regCreated;
-   req->semReqData      = newPr;
-   req->nextReqst       = reqstingPr->requests;
-   reqstingPr->requests = req;
+   semReq = req->semReqData;
 
-   VMS__suspend_procr( reqstingPr );
+   newProbe          = VMS__malloc( sizeof(IntervalProbe) );
+   nameLen = strlen( semReq->nameStr );
+   newProbe->nameStr = VMS__malloc( nameLen );
+   memcpy( newProbe->nameStr, semReq->nameStr, nameLen );
+   newProbe->hist    = NULL;
+   newProbe->schedChoiceWasRecorded = FALSE;
+   newProbe->probeID =
+             addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo );
+
+   requestingPr->dataReturnedFromReq = newProbe;
+
+   (*resumePrFnPtr)( requestingPr, semEnv );
  }
 
 
 
 /*This must be called by the request handler plugin -- it cannot be called
  * from the semantic library "dissipate processor" function -- instead, the
- * semantic layer has to generate a request for the plug-in to call this
+ * semantic layer has to generate a request, and the plug-in calls this
  * function.
  *The reason is that this frees the virtual processor's stack -- which is
  * still in use inside semantic library calls!
@@ -548,15 +601,15 @@
       // any locations that it is (was) sole owner of
 //TODO: implement VMS__malloc system, including "give up ownership"
 
-      //The dissipate request might still be attached, so remove and free it
-   VMS__free_top_and_give_next_request_from( animatingPr );
 
       //NOTE: initialData was given to the processor, so should either have
       // been alloc'd with VMS__malloc, or freed by the level above animPr.
       //So, all that's left to free here is the stack and the VirtProcr struc
       // itself
-   free( animatingPr->startOfStack );
-   free( animatingPr );
+      //Note, should not stack-allocate initial data -- no guarantee, in
+      // general that creating processor will outlive ones it creates.
+   VMS__free( animatingPr->startOfStack );
+   VMS__free( animatingPr );
  }
 
 
@@ -603,7 +656,7 @@
       //create the shutdown processors, one for each core loop -- put them
       // directly into the Q -- each core will die when gets one
    for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
-    {
+    {    //Note, this is running in the master
       shutDownPr = VMS__create_procr( &endOSThreadFn, NULL );
       writeSRSWQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] );
     }
@@ -664,8 +717,8 @@
    for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
     {
       freeSRSWQ( readyToAnimateQs[ coreIdx ] );
-
-      VMS__handle_dissipate_reqst( masterVPs[ coreIdx ] );
+         //master VPs were created external to VMS, so use external free
+      VMS_ext__dissipate_procr( masterVPs[ coreIdx ] );
       
       freeSchedSlots( allSchedSlots[ coreIdx ] );
     }
@@ -673,20 +726,15 @@
    free( _VMSMasterEnv->readyToAnimateQs );
    free( _VMSMasterEnv->masterVPs );
    free( _VMSMasterEnv->allSchedSlots );
+   
+   VMS_ext__free_free_list( _VMSMasterEnv->freeListHead );
+
+   //============================= MEASUREMENT STUFF ========================
+   #ifdef STATS__TURN_ON_PROBES
+   freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &free );
+   #endif
+   //========================================================================
 
    free( _VMSMasterEnv );
  }
 
-
-//===========================================================================
-
-inline TSCount getTSCount()
- { unsigned int low, high;
-   TSCount  out;
-
-   saveTimeStampCountInto( low, high );
-   out = high;
-   out = (out << 32) + low;
-   return out;
- }
-
diff -r 5388f1c2da6f -r 8f7141a9272e VMS.h
--- a/VMS.h	Thu Oct 14 17:07:23 2010 -0700
+++ b/VMS.h	Sat Oct 30 20:54:36 2010 -0700
@@ -13,17 +13,31 @@
 #include "VMS_primitive_data_types.h"
 #include "Queue_impl/BlockingQueue.h"
 #include "Histogram/Histogram.h"
+#include "DynArray/DynArray.h"
+#include "Hash_impl/PrivateHash.h"
+#include "vmalloc.h"
+
 #include <pthread.h>
+#include <sys/time.h>
 
+
+//===============================  Debug  ===================================
    //When SEQUENTIAL is defined, VMS does sequential exe in the main thread
    // It still does co-routines and all the mechanisms are the same, it just
    // has only a single thread and animates VPs one at a time
 //#define SEQUENTIAL
 
-#define PRINT_DEBUG(msg) //printf(msg); fflush(stdin);
+#define PRINT_DEBUG(msg)// printf(msg); fflush(stdin);
 #define PRINT1_DEBUG(msg, param) //printf(msg, param); fflush(stdin);
 #define PRINT2_DEBUG(msg, p1, p2) //printf(msg, p1, p2); fflush(stdin);
 
+#define PRINT_ERROR(msg) printf(msg); fflush(stdin);
+#define PRINT1_ERROR(msg, param) printf(msg, param); fflush(stdin);
+#define PRINT2_ERROR(msg, p1, p2) printf(msg, p1, p2); fflush(stdin);
+
+
+//===========================  STATS =======================
+
    //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and
    // compiled-in that saves the low part of the time stamp count just before
    // suspending a processor and just after resuming that processor.  It is
@@ -35,6 +49,8 @@
 
 #define NUM_TSC_ROUND_TRIPS 10
 
+
+//=========================  Hardware related Constants =====================
    //This value is the number of hardware threads in the shared memory
    // machine
 #define NUM_CORES        4
@@ -47,39 +63,75 @@
 #define READYTOANIMATE_RETRIES 10000
 
    // stack
-#define VIRT_PROCR_STACK_SIZE 0x10000
+#define VIRT_PROCR_STACK_SIZE 0x4000
 
-   //256M of total memory for VMS__malloc
-#define MASSIVE_MALLOC_SIZE 0x10000000
+   // memory for VMS__malloc -- 256M
+#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000
 
-#define NUM_PREPEND_BYTES sizeof(FreeListElem) + sizeof(ownerElem);
+
+//==============================
 
 #define SUCCESS 0
 
-#define writeVMSQ     writeCASQ
-#define readVMSQ      readCASQ
-#define makeVMSQ      makeCASQ
-#define VMSQueueStruc CASQueueStruc
+#define writeVMSQ     writeSRSWQ
+#define readVMSQ      readSRSWQ
+#define makeVMSQ      makeSRSWQ
+#define VMSQueueStruc SRSWQueueStruc
 
-//#define thdAttrs NULL  //For PThreads
 
-typedef struct _SchedSlot  SchedSlot;
-typedef struct _VMSReqst   VMSReqst;
-typedef struct _VirtProcr  VirtProcr;
+
+//===========================================================================
+typedef unsigned long long TSCount;
+
+typedef struct _SchedSlot     SchedSlot;
+typedef struct _VMSReqst      VMSReqst;
+typedef struct _VirtProcr     VirtProcr;
+typedef struct _IntervalProbe IntervalProbe;
 
 typedef VirtProcr * (*SlaveScheduler)  ( void *, int );   //semEnv, coreIdx
 typedef void  (*RequestHandler)  ( VirtProcr *, void * ); //prWReqst, semEnv
 typedef void  (*VirtProcrFnPtr)  ( void *, VirtProcr * ); //initData, animPr
 typedef void    VirtProcrFn      ( void *, VirtProcr * ); //initData, animPr
+typedef void  (*ResumePrFnPtr)   ( VirtProcr *, void * );
+
+
+//============= Requests ===========
+//
+
+enum VMSReqstType   //avoid starting enums at 0, for debug reasons
+ {
+   semantic = 1,
+   createReq,
+   dissipate,
+   VMSSemantic      //goes with VMSSemReqst below
+ };
+
+struct _VMSReqst
+ {
+   enum VMSReqstType  reqType;//used for dissipate and in future for IO requests
+   void              *semReqData;
+
+   VMSReqst *nextReqst;
+ };
+//VMSReqst
+
+enum VMSSemReqstType   //These are equivalent to semantic requests, but for
+ {                     // VMS's services available directly to app, like OS
+   createProbe = 1,    // and probe services -- like a VMS-wide built-in lang
+   openFile,
+   otherIO
+ };
 
 typedef struct
- {
-   void           *endThdPt;
-   unsigned int    coreNum;
+ { enum VMSSemReqstType reqType;
+   VirtProcr           *requestingPr;
+   char                *nameStr;  //for create probe
  }
-ThdParams;
+ VMSSemReq;
 
 
+//====================  Core data structures  ===================
+
 struct _SchedSlot
  {
    int         workIsDone;
@@ -87,24 +139,6 @@
    VirtProcr  *procrAssignedToSlot;
  };
 //SchedSlot
- 
-enum ReqstType
- {
-   semantic = 1,
-   dissipate,
-   regCreated,
-   IO
- };
-
-struct _VMSReqst
- {
-//   VirtProcr   *virtProcrFrom;
-   enum ReqstType  reqType;//used for dissipate and in future for IO requests
-   void           *semReqData;
-
-   VMSReqst *nextReqst;
- };
-//VMSReqst
 
 struct _VirtProcr
  { int         procrID;  //for debugging -- count up each time create
@@ -123,9 +157,10 @@
    SchedSlot  *schedSlot;
    VMSReqst   *requests;
 
-   void       *semanticData;
+   void       *semanticData; //this lives here for the life of VP
+   void       *dataReturnedFromReq;//values returned from plugin to VP go here
 
-   //============================= MEASUREMENT STUFF ========================
+      //=========== MEASUREMENT STUFF ==========
    #ifdef MEAS__TIME_STAMP_SUSP
    unsigned int preSuspTSCLow;
    unsigned int postSuspTSCLow;
@@ -134,12 +169,12 @@
    unsigned int startMasterTSCLow;
    unsigned int endMasterTSCLow;
    #endif
-   //========================================================================
+   
+   float64      createPtInSecs;  //have space but don't use on some configs
  };
 //VirtProcr
 
 
-
 typedef struct
  {
    SlaveScheduler   slaveScheduler;
@@ -151,35 +186,61 @@
 
    void            *semanticEnv;
    void            *OSEventStruc;   //for future, when add I/O to BLIS
+   MallocProlog    *freeListHead;
+   int32            amtOfOutstandingMem; //total currently allocated
 
    void            *coreLoopStartPt;//addr to jump to to re-enter coreLoop
    void            *coreLoopEndPt;  //addr to jump to to shut down a coreLoop
 
-   int              setupComplete;
-   int              masterLock;
+   int32            setupComplete;
+   int32            masterLock;
 
+   int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
+   int32            numProcrsCreated; //gives ordering to processor creation
+
+      //=========== MEASUREMENT STUFF =============
+   IntervalProbe  **intervalProbes;
+   DynArrayInfo    *dynIntervalProbesInfo;
+   HashTable       *probeNameHashTbl;
+   int32            masterCreateProbeID;
+   float64          createPtInSecs;
  }
 MasterEnv;
 
 
-//==========================================================
+
+
+//=======================  OS Thread related  ===============================
 
 void * coreLoop( void *paramsIn );  //standard PThreads fn prototype
 void * coreLoop_Seq( void *paramsIn );  //standard PThreads fn prototype
 void masterLoop( void *initData, VirtProcr *masterPr );
 
 
-//=====================  Global Vars ===================
-
+typedef struct
+ {
+   void           *endThdPt;
+   unsigned int    coreNum;
+ }
+ThdParams;
 
 pthread_t       coreLoopThdHandles[ NUM_CORES ];  //pthread's virt-procr state
 ThdParams      *coreLoopThdParams [ NUM_CORES ];
 pthread_mutex_t suspendLock;
 pthread_cond_t  suspend_cond;
 
+
+
+//=====================  Global Vars ===================
+
 volatile MasterEnv      *_VMSMasterEnv;
 
-//==========================
+
+
+
+//===========================  Function Prototypes  =========================
+
+//============== Setup and shutdown =============
 void
 VMS__init();
 
@@ -195,16 +256,28 @@
 VirtProcr *
 VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
 
+   //Use this to create processor inside entry point & other places outside
+   // the VMS system boundary (IE, not run in slave nor Master)
+VirtProcr *
+VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
+
 VirtProcr *
 VMS__create_the_shutdown_procr();
 
-//==========================
+void
+VMS__cleanup_after_shutdown();
+
+
+//==============  Request Related  ===============
+
+void
+VMS__suspend_procr( VirtProcr *callingPr );
+
 inline void
 VMS__add_sem_request( void *semReqData, VirtProcr *callingPr );
 
 void
-VMS__send_req_to_register_new_procr( VirtProcr *newPrToRegister,
-                                      VirtProcr *reqstingPr );
+VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr );
 
 void
 VMS__free_request( VMSReqst *req );
@@ -216,7 +289,7 @@
 VMS__take_top_request_from( VirtProcr *reqstingPr );
 
 VMSReqst *
-VMS__free_top_and_give_next_request_from( VirtProcr *procrWithReq );
+VMS__take_next_request_out_of( VirtProcr *procrWithReq );
 
 inline void *
 VMS__take_sem_reqst_from( VMSReqst *req );
@@ -232,25 +305,15 @@
 
 //==========================
 
-void
-VMS__suspend_procr( VirtProcr *callingPr );
-
-void
+void inline
 VMS__dissipate_procr( VirtProcr *prToDissipate );
 
 void
 VMS__handle_dissipate_reqst( VirtProcr *procrToDissipate );
 
-void
-VMS__cleanup_after_shutdown();
 
-//============================= Statistics ==================================
 
-typedef unsigned long long TSCount;
-
-   //Frequency of TS counts
-   //TODO: change freq for each machine
-#define TSCOUNT_FREQ 3180000000
+//===================== RDTSC wrapper ==================
 
 #define saveTimeStampCountInto(low, high) \
    asm volatile("RDTSC;                   \
@@ -269,11 +332,9 @@
    /* clobber */ : "%eax", "%edx"         \
                 );
 
-inline TSCount getTSCount();
+//======================== STATS ======================
 
-//===================== Debug ==========================
-int numProcrsCreated;
-
+#include "probes.h"
 
 #endif	/* _VMS_H */
 
diff -r 5388f1c2da6f -r 8f7141a9272e probes.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/probes.c	Sat Oct 30 20:54:36 2010 -0700
@@ -0,0 +1,342 @@
+/*
+ * Copyright 2010  OpenSourceStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <malloc.h>
+#include <sys/time.h>
+
+#include "VMS.h"
+#include "Queue_impl/BlockingQueue.h"
+#include "Histogram/Histogram.h"
+
+
+//================================ STATS ====================================
+
+inline TSCount getTSCount()
+ { unsigned int low, high;
+   TSCount  out;
+
+   saveTimeStampCountInto( low, high );
+   out = high;
+   out = (out << 32) + low;
+   return out;
+ }
+
+
+
+//====================  Probes =================
+#ifdef STATS__USE_TSC_PROBES
+int32
+VMS__create_single_interval_probe( char *nameStr )
+ { IntervalProbe *newProbe;
+   int32 idx;
+
+   newProbe = malloc( sizeof(IntervalProbe) );
+   newProbe->nameStr = nameStr;  //caller frees if not constant on stack
+   newProbe->hist = NULL;
+   idx = addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo );
+   return idx;
+ }
+
+int32
+VMS__create_histogram_probe( int32 numBins, float32 startValue,
+                             float32 binWidth, char *nameStr )
+ { IntervalProbe *newProbe;
+   int32 idx;
+   FloatHist *hist;
+
+   idx = VMS__create_single_interval_probe( nameStr );
+   newProbe =  _VMSMasterEnv->intervalProbes[ idx ];
+
+   hist =  makeFloatHistogram( numBins, startValue, binWidth );
+   newProbe->hist = hist;
+   return idx;
+ }
+
+void
+VMS_impl__record_interval_start_in_probe( int32 probeID )
+ { IntervalProbe *probe;
+
+   probe = _VMSMasterEnv->intervalProbes[ probeID ];
+   probe->startStamp = getTSCount();
+ }
+
+void
+VMS_impl__record_interval_end_in_probe( int32 probeID )
+ { IntervalProbe *probe;
+   TSCount endStamp;
+
+   endStamp = getTSCount();
+
+   probe = _VMSMasterEnv->intervalProbes[ probeID ];
+   probe->endStamp = endStamp;
+
+   if( probe->hist != NULL )
+    { TSCount interval = probe->endStamp - probe->startStamp;
+         //if the interval is sane, then add to histogram
+      if( interval < probe->hist->endOfRange * 10 )
+         addToFloatHist( interval, probe->hist );
+    }
+ }
+
+void
+VMS_impl__print_stats_of_probe( int32 probeID )
+ { IntervalProbe *probe;
+
+   probe = _VMSMasterEnv->intervalProbes[ probeID ];
+
+   if( probe->hist == NULL )
+    {
+      printf("probe: %s, interval: %.6lf\n", probe->nameStr,probe->interval);
+    }
+
+   else
+    {
+      printf( "probe: %s\n", probe->nameStr );
+      printFloatHist( probe->hist );
+    }
+ }
+#else
+#ifdef STATS__USE_DBL_PROBES
+
+/*
+ * In practice, probe operations are called from the app, from inside slaves
+ *  -- so have to be sure each probe is single-VP owned, and be sure that
+ *  any place common structures are modified it's done inside the master.
+ * So -- the only place common structures are modified is during creation.
+ *  after that, all mods are to individual instances.
+ *
+ * Thniking perhaps should change the semantics to be that probes are
+ *  attached to the virtual processor -- and then everything is guaranteed
+ *  to be isolated -- except then can't take any intervals that span VPs,
+ *  and would have to transfer the probes to Master env when VP dissipates..
+ *  gets messy..
+ *
+ * For now, just making so that probe creation causes a suspend, so that
+ *  the dynamic array in the master env is only modified from the master
+ * 
+ */
+IntervalProbe *
+create_generic_probe( char *nameStr, VirtProcr *animPr )
+ { IntervalProbe *newProbe;
+   int32 idx;
+   VMSSemReq reqData;
+
+   reqData.reqType  = createProbe;
+   reqData.nameStr  = nameStr;
+
+   VMS__send_VMSSem_request( reqData, animPr );
+
+   return animPr->dataReturnedFromReq;
+ }
+
+int32
+VMS_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr )
+ { IntervalProbe *newProbe;
+   struct timeval *startStamp;
+   float64 startSecs;
+
+   newProbe           = create_generic_probe( nameStr, animPr );
+   newProbe->endSecs  = 0;
+
+   gettimeofday( &(newProbe->startStamp), NULL);
+
+      //turn into a double
+   startStamp = &(newProbe->startStamp);
+   startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 );
+   newProbe->startSecs = startSecs;
+
+   return newProbe->probeID;
+ }
+
+int32
+VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr )
+ { IntervalProbe *newProbe;
+
+   newProbe = create_generic_probe( nameStr, animPr );
+   
+   return newProbe->probeID;
+ }
+
+int32
+VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
+               float64 binWidth, char   *nameStr, VirtProcr *animPr )
+ { IntervalProbe *newProbe;
+   DblHist *hist;
+
+   newProbe = create_generic_probe( nameStr, animPr );
+   
+   hist =  makeDblHistogram( numBins, startValue, binWidth );
+   newProbe->hist = hist;
+   return newProbe->probeID;
+ }
+
+void
+VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr )
+ { IntervalProbe *probe;
+
+   //TODO: fix this To be in Master -- race condition
+   probe = _VMSMasterEnv->intervalProbes[ probeID ];
+
+   addValueIntoTable(probe->nameStr, probe, _VMSMasterEnv->probeNameHashTbl);
+ }
+
+IntervalProbe *
+VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr )
+ {
+   //TODO: fix this To be in Master -- race condition
+   return getValueFromTable( probeName, _VMSMasterEnv->probeNameHashTbl );
+ }
+
+
+/*Everything is local to the animating procr, so no need for request, do
+ * work locally, in the anim Pr
+ */
+void
+VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animatingPr )
+ { IntervalProbe *probe;
+ 
+   probe = _VMSMasterEnv->intervalProbes[ probeID ];
+   probe->schedChoiceWasRecorded = TRUE;
+   probe->coreNum = animatingPr->coreAnimatedBy;
+   probe->procrID = animatingPr->procrID;
+   probe->procrCreateSecs = 0;
+ }
+
+/*Everything is local to the animating procr, so no need for request, do
+ * work locally, in the anim Pr
+ */
+void
+VMS_impl__record_interval_start_in_probe( int32 probeID )
+ { IntervalProbe *probe;
+
+   probe = _VMSMasterEnv->intervalProbes[ probeID ];
+   gettimeofday( &(probe->startStamp), NULL );
+ }
+
+
+/*Everything is local to the animating procr, so no need for request, do
+ * work locally, in the anim Pr
+ */
+void
+VMS_impl__record_interval_end_in_probe( int32 probeID )
+ { IntervalProbe *probe;
+   struct timeval *endStamp, *startStamp;
+   double startSecs, endSecs;
+
+      //possible seg-fault if array resized by diff core right after this
+      // one gets probe..?  Something like that?  Might be safe.. don't care
+   probe = _VMSMasterEnv->intervalProbes[ probeID ];
+   gettimeofday( &(probe->endStamp), NULL);
+
+      //now turn into an interval held in a double
+   startStamp = &(probe->startStamp);
+   endStamp   = &(probe->endStamp);
+
+   startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 );
+   endSecs   = endStamp->tv_sec   + ( endStamp->tv_usec / 1000000.0 );
+
+   probe->interval  = endSecs - startSecs;
+   probe->startSecs = startSecs;
+   probe->endSecs   = endSecs;
+
+   if( probe->hist != NULL )
+    {
+         //if the interval is sane, then add to histogram
+      if( probe->interval < probe->hist->endOfRange * 10 )
+         addToDblHist( probe->interval, probe->hist );
+    }
+ }
+
+void
+print_probe_helper( IntervalProbe *probe )
+ {
+   printf( "\nprobe: %s, ",  probe->nameStr );
+   
+   if( probe->schedChoiceWasRecorded )
+    { printf( "coreNum: %d, procrID: %d, procrCreated: %.6lf | ",
+              probe->coreNum, probe->procrID, probe->procrCreateSecs );
+    }
+
+   if( probe->endSecs == 0 ) //just a single point in time
+    {
+      printf( " time point: %.6lf\n",
+              probe->startSecs - _VMSMasterEnv->createPtInSecs );
+    }
+   else if( probe->hist == NULL ) //just an interval
+    {
+      printf( " startSecs: %.6lf, interval: %.6lf\n", 
+         probe->startSecs - _VMSMasterEnv->createPtInSecs, probe->interval);
+    }
+   else  //a full histogram of intervals
+    {
+      printDblHist( probe->hist );
+    }
+ }
+
+//TODO: change so pass around pointer to probe instead of its array-index..
+// will eliminate chance for timing of resize to cause problems with the
+// lookup -- even though don't think it actually can cause problems..
+// there's no need to pass index around -- have hash table for names, and
+// only need it once, then have ptr to probe..  the thing about enum the
+// index and use that as name is clunky in practice -- just hash.
+void
+VMS_impl__print_stats_of_probe( int32 probeID )
+ { IntervalProbe *probe;
+
+   probe = _VMSMasterEnv->intervalProbes[ probeID ];
+
+   print_probe_helper( probe );
+ }
+
+
+
+void
+generic_print_probe( void *_probe )
+ { IntervalProbe *probe;
+
+   probe = (IntervalProbe *)_probe;
+   print_probe_helper( probe );
+ }
+
+void
+VMS_impl__print_stats_of_all_probes()
+ { IntervalProbe *probe;
+
+   forAllInDynArrayDo( _VMSMasterEnv->dynIntervalProbesInfo,
+                       &generic_print_probe );
+   fflush( stdout );
+ }
+#endif
+#endif
+
+/* Junk left over from when trying the different ways to get time stamps..
+         struct timeval tim;
+         gettimeofday(&tim, NULL);
+         double t1=tim.tv_sec+(tim.tv_usec/1000000.0);
+
+         clock_t startClockStamp = clock();
+
+         TSCount startMultStamp = getTSCount();
+*/
+
+/*
+         TSCount endMultStamp = getTSCount();
+
+         dividerParams->numTSCsToExe = endMultStamp - startMultStamp;
+         printf("\ntime to execute: %d\n", endMultStamp - startMultStamp);
+
+         //==================================================================
+         clock_t endClockStamp = clock();
+         printf("%.4lf seconds of processing\n",
+                (endClockStamp - startClockStamp)/(double)CLOCKS_PER_SEC);
+
+         //==================================================================
+         gettimeofday(&tim, NULL);
+         double t2=tim.tv_sec+(tim.tv_usec/1000000.0);
+         printf("%.6lf seconds elapsed\n", t2-t1);
+*/
diff -r 5388f1c2da6f -r 8f7141a9272e probes.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/probes.h	Sat Oct 30 20:54:36 2010 -0700
@@ -0,0 +1,193 @@
+/*
+ *  Copyright 2009 OpenSourceStewardshipFoundation.org
+ *  Licensed under GNU General Public License version 2
+ *
+ * Author: seanhalle@yahoo.com
+ * 
+ */
+
+#ifndef _PROBES_H
+#define	_PROBES_H
+#define __USE_GNU
+
+#include "VMS_primitive_data_types.h"
+
+#include <sys/time.h>
+
+   //turns on the probe-instrumentation in the application -- when not
+   // defined, the calls to the probe functions turn into comments
+//#define STATS__ENABLE_PROBES
+
+   //when STATS__TURN_ON_PROBES is defined allows using probes to measure
+   // time intervals.  The probes are macros that only compile to something
+   // when STATS__TURN_ON_PROBES is defined.  The probes are saved in the
+   // master env -- but only when this is defined.
+   //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday
+#define STATS__TURN_ON_PROBES
+//#define STATS__USE_TSC_PROBES
+#define STATS__USE_DBL_PROBES
+
+//typedef struct _IntervalProbe IntervalProbe; //in VMS.h
+
+struct _IntervalProbe
+ {
+   char           *nameStr;
+   int32           probeID;
+
+   int32           schedChoiceWasRecorded;
+   int32           coreNum;
+   int32           procrID;
+   float64         procrCreateSecs;
+
+   #ifdef STATS__USE_TSC_PROBES
+   TSCount    startStamp;
+   TSCount    endStamp;
+   #else
+   struct timeval  startStamp;
+   struct timeval  endStamp;
+   #endif
+   float64         startSecs;
+   float64         endSecs;
+   float64         interval;
+   DblHist        *hist;//if NULL, then is single interval probe
+ };
+
+
+//============================= Statistics ==================================
+
+   //Frequency of TS counts
+   //TODO: change freq for each machine
+#define TSCOUNT_FREQ 3180000000
+
+inline TSCount getTSCount();
+
+
+//======================== Probes =============================
+//
+// Use macros to allow turning probes off with a #define switch
+#ifdef STATS__ENABLE_PROBES
+int32
+VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr);
+#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \
+        VMS_impl__record_time_point_in_new_probe( nameStr, animPr )
+
+int32
+VMS_ext_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr);
+#define VMS_ext__record_time_point_into_new_probe( nameStr ) \
+        VMS_ext_impl__record_time_point_into_new_probe_impl( nameStr )
+
+
+int32
+VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr );
+#define VMS__create_single_interval_probe( nameStr, animPr ) \
+        VMS_impl__create_single_interval_probe( nameStr, animPr )
+
+
+int32
+VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
+               float64 binWidth, char    *nameStr, VirtProcr *animPr );
+#define VMS__create_histogram_probe(      numBins, startValue,              \
+                                          binWidth, nameStr, animPr )       \
+        VMS_impl__create_histogram_probe( numBins, startValue,              \
+                                          binWidth, nameStr, animPr )
+
+void
+VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr );
+#define VMS__index_probe_by_its_name( probeID, animPr ) \
+        VMS_impl__index_probe_by_its_name( probeID, animPr )
+
+IntervalProbe *
+VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr );
+#define VMS__get_probe_by_name( probeID, animPr ) \
+        VMS_impl__get_probe_by_name( probeName, animPr )
+
+void
+VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr );
+#define VMS__record_sched_choice_into_probe( probeID, animPr ) \
+        VMS_impl__record_sched_choice_into_probe( probeID, animPr )
+
+void
+VMS_impl__record_interval_start_in_probe( int32 probeID );
+#define VMS__record_interval_start_in_probe( probeID ) \
+        VMS_impl__record_interval_start_in_probe( probeID )
+
+void
+VMS_impl__record_interval_end_in_probe( int32 probeID );
+#define VMS__record_interval_end_in_probe( probeID ) \
+        VMS_impl__record_interval_end_in_probe( probeID )
+
+void
+VMS_impl__print_stats_of_probe( int32 probeID );
+#define VMS__print_stats_of_probe( probeID ) \
+        VMS_impl__print_stats_of_probe( probeID )
+
+void
+VMS_impl__print_stats_of_all_probes();
+#define VMS__print_stats_of_all_probes \
+        VMS_impl__print_stats_of_all_probes
+
+
+#else
+int32
+VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr);
+#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \
+       0 /* do nothing */
+
+int32
+VMS_ext_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr);
+#define VMS_ext__record_time_point_into_new_probe( nameStr ) \
+       0 /* do nothing */
+
+
+int32
+VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr );
+#define VMS__create_single_interval_probe( nameStr, animPr ) \
+       0 /* do nothing */
+
+
+int32
+VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
+               float64 binWidth, char    *nameStr, VirtProcr *animPr );
+#define VMS__create_histogram_probe(      numBins, startValue,              \
+                                          binWidth, nameStr, animPr )       \
+       0 /* do nothing */
+
+void
+VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr );
+#define VMS__index_probe_by_its_name( probeID, animPr ) \
+        /* do nothing */
+
+IntervalProbe *
+VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr );
+#define VMS__get_probe_by_name( probeID, animPr ) \
+       NULL /* do nothing */
+
+void
+VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr );
+#define VMS__record_sched_choice_into_probe( probeID, animPr ) \
+        /* do nothing */
+
+void
+VMS_impl__record_interval_start_in_probe( int32 probeID );
+#define VMS__record_interval_start_in_probe( probeID ) \
+        /* do nothing */
+
+void
+VMS_impl__record_interval_end_in_probe( int32 probeID );
+#define VMS__record_interval_end_in_probe( probeID ) \
+        /* do nothing */
+
+void
+VMS_impl__print_stats_of_probe( int32 probeID );
+#define VMS__print_stats_of_probe( probeID ) \
+        /* do nothing */
+
+void
+VMS_impl__print_stats_of_all_probes();
+#define VMS__print_stats_of_all_probes \
+        /* do nothing */
+
+#endif   /* defined STATS__ENABLE_PROBES */
+
+#endif	/* _PROBES_H */
+
diff -r 5388f1c2da6f -r 8f7141a9272e vmalloc.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vmalloc.c	Sat Oct 30 20:54:36 2010 -0700
@@ -0,0 +1,256 @@
+/*
+ *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
+ *  Licensed under GNU General Public License version 2
+ *
+ * Author: seanhalle@yahoo.com
+ *
+ * Created on November 14, 2009, 9:07 PM
+ */
+
+#include <malloc.h>
+
+#include "VMS.h"
+
+/*Helper function
+ *Insert a newly generated free chunk into the first spot on the free list.
+ * The chunk is cast as a MallocProlog, so the various pointers in it are
+ * accessed with C's help -- and the size of the prolog is easily added to
+ * the pointer when a chunk is returned to the app -- so C handles changes
+ * in pointer sizes among machines.
+ *
+ *The list head is a normal MallocProlog struct -- identified by its
+ * prevChunkInFreeList being NULL -- the only one.
+ *
+ *The end of the list is identified by next chunk being NULL, as usual.
+ */
+void inline
+add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead )
+ { 
+   chunk->nextChunkInFreeList     = listHead->nextChunkInFreeList;
+   if( chunk->nextChunkInFreeList != NULL ) //if not last in free list
+      chunk->nextChunkInFreeList->prevChunkInFreeList = chunk;
+   chunk->prevChunkInFreeList     = listHead;
+   listHead->nextChunkInFreeList  = chunk;
+ }
+
+
+/*This is sequential code, meant to only be called from the Master, not from
+ * any slave VPs.
+ *Search down list, checking size by the nextHigherInMem pointer, to find
+ * first chunk bigger than size needed.
+ *Shave off the extra and make it into a new free-list element, hook it in
+ * then return the address of the found element plus size of prolog.
+ *
+ *Will find a
+ */
+void *
+VMS__malloc( int32 sizeRequested )
+ { MallocProlog *foundElem = NULL, *currElem, *newElem;
+   int32         amountExtra, foundElemIsTopOfHeap, sizeConsumed,sizeOfFound;
+
+      //step up the size to be aligned at 16-byte boundary, prob better ways
+   sizeRequested = ((sizeRequested + 16) >> 4) << 4;
+   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
+
+   while( currElem != NULL )
+    {    //check if size of currElem is big enough
+      sizeOfFound=(int32)((char*)currElem->nextHigherInMem -(char*)currElem);
+      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
+      if( amountExtra > 0 )
+       {    //found it, get out of loop
+         foundElem = currElem;
+         currElem = NULL;
+       }
+      else
+         currElem = currElem->nextChunkInFreeList;
+    }
+
+   if( foundElem == NULL )
+    { PRINT_ERROR("\nmalloc failed\n")
+      return NULL;  //indicates malloc failed
+    }
+      //Using a kludge to identify the element that is the top chunk in the
+      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
+      // save addr of start of heap in head's nextLowerInMem
+      //Will handle top of Heap specially
+   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
+                          _VMSMasterEnv->freeListHead->nextHigherInMem;
+
+      //before shave off and try to insert new elem, remove found elem
+      //note, foundElem will never be the head, so always has valid prevChunk
+   foundElem->prevChunkInFreeList->nextChunkInFreeList =
+                                              foundElem->nextChunkInFreeList;
+   if( foundElem->nextChunkInFreeList != NULL )
+    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
+                                              foundElem->prevChunkInFreeList;
+    }
+   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
+   
+      //if enough, turn extra into new elem & insert it
+   if( amountExtra > 64 )
+    {    //make new elem by adding to addr of curr elem then casting
+      sizeConsumed = sizeof(MallocProlog) + sizeRequested;
+      newElem = (MallocProlog *)( (char *)foundElem + sizeConsumed );
+      newElem->nextHigherInMem   = foundElem->nextHigherInMem;
+      newElem->nextLowerInMem    = foundElem;
+      foundElem->nextHigherInMem = newElem;
+      
+      if( ! foundElemIsTopOfHeap )
+       {    //there is no next higher for top of heap, so can't write to it
+         newElem->nextHigherInMem->nextLowerInMem = newElem;
+       }
+      add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
+    }
+   else
+    {
+      sizeConsumed = sizeOfFound;
+    }
+  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
+
+      //skip over the prolog by adding its size to the pointer return
+   return (void *)((char *)foundElem + sizeof(MallocProlog));
+ }
+
+
+/*This is sequential code -- only to be called from the Master
+ * When free, subtract the size of prolog from pointer, then cast it to a
+ * MallocProlog.  Then check the nextLower and nextHigher chunks to see if
+ * one or both are also free, and coalesce if so, and if neither free, then
+ * add this one to free-list.
+ */
+void
+VMS__free( void *ptrToFree )
+ { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem;
+   int32         lowerExistsAndIsFree, higherExistsAndIsFree, sizeOfElem;
+
+   if( ptrToFree < _VMSMasterEnv->freeListHead->nextLowerInMem ||
+       ptrToFree > _VMSMasterEnv->freeListHead->nextHigherInMem )
+    {    //outside the range of data owned by VMS's malloc, so do nothing
+      return;
+    }
+      //subtract size of prolog to get pointer to prolog, then cast
+   elemToFree = (MallocProlog *)((char *)ptrToFree - sizeof(MallocProlog));
+   sizeOfElem =(int32)((char*)elemToFree->nextHigherInMem-(char*)elemToFree);
+   _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem;
+
+   nextLowerElem  = elemToFree->nextLowerInMem;
+   nextHigherElem = elemToFree->nextHigherInMem;
+
+   if( nextHigherElem == NULL )
+      higherExistsAndIsFree = FALSE;
+   else //okay exists, now check if in the free-list by checking back ptr
+      higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL);
+    
+   if( nextLowerElem == NULL )
+      lowerExistsAndIsFree = FALSE;
+   else //okay, it exists, now check if it's free
+      lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL);
+    
+
+      //now, know what exists and what's free
+   if( lowerExistsAndIsFree )
+    { if( higherExistsAndIsFree )
+       {    //both exist and are free, so coalesce all three
+            //First, remove higher from free-list
+         nextHigherElem->prevChunkInFreeList->nextChunkInFreeList =
+                                         nextHigherElem->nextChunkInFreeList;
+         if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list?
+            nextHigherElem->nextChunkInFreeList->prevChunkInFreeList =
+                                         nextHigherElem->prevChunkInFreeList;
+            //Now, fix-up sequence-in-mem list -- by side-effect, this also
+            // changes size of the lower elem, which is still in free-list
+         nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem;
+         if( nextHigherElem->nextHigherInMem !=
+             _VMSMasterEnv->freeListHead->nextHigherInMem )
+            nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem;
+            //notice didn't do anything to elemToFree -- it simply is no
+            // longer reachable from any of the lists.  Wonder if could be a
+            // security leak because left valid addresses in it,
+            // but don't care for now.
+       }
+      else
+       {    //lower is the only of the two that exists and is free,
+            //In this case, no adjustment to free-list, just change mem-list.
+            // By side-effect, changes size of the lower elem
+         nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem;
+         if( elemToFree->nextHigherInMem !=
+             _VMSMasterEnv->freeListHead->nextHigherInMem )
+            elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem;
+       }
+    }
+   else
+    {    //lower either doesn't exist or isn't free, so check higher
+      if( higherExistsAndIsFree )
+       {    //higher exists and is the only of the two free
+            //First, in free-list, replace higher elem with the one to free
+         elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList;
+         elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList;
+         elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree;
+         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
+            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
+            //Now chg mem-list. By side-effect, changes size of elemToFree
+         elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem;
+         if( elemToFree->nextHigherInMem !=
+             _VMSMasterEnv->freeListHead->nextHigherInMem )
+            elemToFree->nextHigherInMem->nextLowerInMem = elemToFree;
+       }
+      else
+       {    //neither lower nor higher is availabe to coalesce so add to list
+            // this makes prev chunk ptr non-null, which indicates it's free
+         elemToFree->nextChunkInFreeList =
+                            _VMSMasterEnv->freeListHead->nextChunkInFreeList;
+         _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree;
+         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
+            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
+         elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead;
+       }
+    }
+
+ }
+
+
+/*Designed to be called from the main thread outside of VMS, during init
+ */
+MallocProlog *
+VMS__create_free_list()
+ { MallocProlog *freeListHead, *firstChunk;
+
+      //Note, this is running in the main thread -- all increases in malloc
+      // mem and all frees of it must be done in this thread, with the
+      // thread's original stack available
+   freeListHead = malloc( sizeof(MallocProlog) );
+   firstChunk   = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
+   if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);}
+
+   freeListHead->prevChunkInFreeList = NULL;
+      //Use this addr to free the heap when cleanup
+   freeListHead->nextLowerInMem      = firstChunk;
+      //to identify top-of-heap elem, compare this addr to elem's next higher
+   freeListHead->nextHigherInMem     = (char *)firstChunk +
+                                          MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE;
+   freeListHead->nextChunkInFreeList = firstChunk;
+
+   firstChunk->nextChunkInFreeList   = NULL;
+   firstChunk->prevChunkInFreeList   = freeListHead;
+      //next Higher has to be set to top of chunk, so can calc size in malloc
+   firstChunk->nextHigherInMem       = (char *)firstChunk +
+                                          MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE;
+   firstChunk->nextLowerInMem        = NULL; //identifies as bott of heap
+
+   return freeListHead;
+ }
+
+
+/*Designed to be called from the main thread outside of VMS, during cleanup
+ */
+void
+VMS_ext__free_free_list( MallocProlog *freeListHead )
+ {    
+      //stashed a ptr to the one and only bug chunk malloc'd from OS in the
+      // free list head's next lower in mem pointer
+   free( freeListHead->nextLowerInMem );
+
+   //don't free the head -- it'll be in an array eventually -- free whole
+   // array when all the free lists linked from it have already been freed
+ }
+
diff -r 5388f1c2da6f -r 8f7141a9272e vmalloc.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vmalloc.h	Sat Oct 30 20:54:36 2010 -0700
@@ -0,0 +1,41 @@
+/*
+ *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
+ *  Licensed under GNU General Public License version 2
+ *
+ * Author: seanhalle@yahoo.com
+ *
+ * Created on November 14, 2009, 9:07 PM
+ */
+
+#include <malloc.h>
+#include "VMS_primitive_data_types.h"
+
+typedef struct _MallocProlog MallocProlog;
+
+struct _MallocProlog
+ {
+   MallocProlog *nextChunkInFreeList;
+   MallocProlog *prevChunkInFreeList;
+   MallocProlog *nextHigherInMem;
+   MallocProlog *nextLowerInMem;
+ };
+//MallocProlog
+
+typedef struct
+ {
+   MallocProlog *firstChunkInFreeList;
+   int32         numInList;
+ }
+FreeListHead;
+
+void *
+VMS__malloc( int32 sizeRequested );
+
+void
+VMS__free( void *ptrToFree );
+
+MallocProlog *
+VMS__create_free_list();
+
+void
+VMS_ext__free_free_list( MallocProlog *freeListHead );