# HG changeset patch
# User Me
# Date 1288920447 25200
# Node ID 85b731b290f8921a9fd160bdfe98a681889133b0
# Parent  420a09d3f32a85cd56c048f390e0542c32fe83d3# Parent  f8508572f3de9080da6148d30219271c7ecff065
Merge between VCilk and SSR intermediate Nov 4

diff -r 420a09d3f32a -r 85b731b290f8 CoreLoop.c
--- a/CoreLoop.c	Thu Nov 04 17:57:39 2010 -0700
+++ b/CoreLoop.c	Thu Nov 04 18:27:27 2010 -0700
@@ -108,16 +108,19 @@
        {    //run own MasterVP -- when its done, unlocks MasterLock and
             // jumps back to coreLoops's startPt
          currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
-         if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 10000 )
-          { //printf("10000 back to back MasterVP\n");
+         if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 100 )
+          { //printf("1000 back to back MasterVP\n");
+            //TODO: turn this into work-stealing from another core
+            //only yield if no work to steal -- and count consecutive yields
+            // if too many of those, then sleep for 10ms or whatever
             pthread_yield();
           }
          _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
          break;  //end while -- have a VP to animate now
        }
       
-      tries++;
-      if( tries > READYTOANIMATE_RETRIES ) { tries = 0; pthread_yield(); }
+      tries++;      //if too many, means master on other core taking too long
+      if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); }
     }
    
 
@@ -206,17 +209,15 @@
    readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
    currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ );
    if( currPr == NULL )
-    { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] )
-         printf("back to back MasterVP\n");
-      _VMSMasterEnv->numMasterInARow[thisCoresIdx] = TRUE;
+    { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
+       { printf("too many back to back MasterVP\n"); exit(1); }
+      _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
+      
       currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
     }
    else
-    _VMSMasterEnv->numMasterInARow[thisCoresIdx] = FALSE;
+      _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
 
-         PRINT2_DEBUG("core %d loop procr addr: %d\n",\
-                       coreLoopThdParams->coreNum,    \
-                       (int)currPr )
 
       //switch to virt procr's stack and frame ptr then jump to virt procr
    void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \
diff -r 420a09d3f32a -r 85b731b290f8 MasterLoop.c
--- a/MasterLoop.c	Thu Nov 04 17:57:39 2010 -0700
+++ b/MasterLoop.c	Thu Nov 04 18:27:27 2010 -0700
@@ -7,7 +7,6 @@
 
 
 #include <stdio.h>
-#include <malloc.h>
 #include <stddef.h>
 
 #include "VMS.h"
diff -r 420a09d3f32a -r 85b731b290f8 VMS.c
--- a/VMS.c	Thu Nov 04 17:57:39 2010 -0700
+++ b/VMS.c	Thu Nov 04 18:27:27 2010 -0700
@@ -33,6 +33,8 @@
 MallocProlog *
 create_free_list();
 
+void
+endOSThreadFn( void *initData, VirtProcr *animatingPr );
 
 pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER;
 pthread_cond_t  suspend_cond  = PTHREAD_COND_INITIALIZER;
@@ -89,55 +91,55 @@
    int              coreIdx;
    VirtProcr      **masterVPs;
    SchedSlot     ***allSchedSlots; //ptr to array of ptrs
-   
+
+
       //Make the master env, which holds everything else
    _VMSMasterEnv = malloc( sizeof(MasterEnv) );
+
+        //Very first thing put into the master env is the free-list, seeded
+        // with a massive initial chunk of memory.
+        //After this, all other mallocs are VMS__malloc.
+   _VMSMasterEnv->freeListHead        = VMS_ext__create_free_list();
+
+   //===================== Only VMS__malloc after this ====================
    masterEnv     = _VMSMasterEnv;
-      //Need to set start pt here 'cause used by seed procr, which is created
-      // before the first core loop starts up. -- not sure how yet..
-//   masterEnv->coreLoopStartPt = ;
-//   masterEnv->coreLoopEndPt   = ;
    
       //Make a readyToAnimateQ for each core loop
-   readyToAnimateQs = malloc( NUM_CORES * sizeof(SRSWQueueStruc *) );
-   masterVPs        = malloc( NUM_CORES * sizeof(VirtProcr *) );
+   readyToAnimateQs = VMS__malloc( NUM_CORES * sizeof(SRSWQueueStruc *) );
+   masterVPs        = VMS__malloc( NUM_CORES * sizeof(VirtProcr *) );
 
       //One array for each core, 3 in array, core's masterVP scheds all
-   allSchedSlots    = malloc( NUM_CORES * sizeof(SchedSlot *) );
+   allSchedSlots    = VMS__malloc( NUM_CORES * sizeof(SchedSlot *) );
 
+   _VMSMasterEnv->numProcrsCreated = 0;  //used by create procr
    for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
-    {    //running in main thread -- normal malloc inside makeSRSWQ
+    {    
       readyToAnimateQs[ coreIdx ] = makeSRSWQ();
       
          //Q: should give masterVP core-specific info as its init data?
-      masterVPs[ coreIdx ] = VMS_ext__create_procr( &masterLoop, masterEnv );
+      masterVPs[ coreIdx ] = VMS__create_procr( &masterLoop, masterEnv );
       masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx;
       allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core
-      _VMSMasterEnv->numMasterInARow[ coreIdx ] = FALSE;
+      _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0;
     }
    _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs;
    _VMSMasterEnv->masterVPs        = masterVPs;
    _VMSMasterEnv->masterLock       = UNLOCKED;
    _VMSMasterEnv->allSchedSlots    = allSchedSlots;
-   _VMSMasterEnv->numProcrsCreated = 0;
 
 
       //Aug 19, 2010:  no longer need to place initial masterVP into queue
       // because coreLoop now controls -- animates its masterVP when no work
 
-   _VMSMasterEnv->freeListHead        = VMS__create_free_list();
-   _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet
 
    //============================= MEASUREMENT STUFF ========================
    #ifdef STATS__TURN_ON_PROBES
-      //creates intervalProbes array and sets pointer to it in masterEnv too
    _VMSMasterEnv->dynIntervalProbesInfo =
-                  makeDynArrayOfSize( &(_VMSMasterEnv->intervalProbes), 20 );
+              makePrivDynArrayOfSize( &(_VMSMasterEnv->intervalProbes), 200);
 
-   _VMSMasterEnv->probeNameHashTbl = makeHashTable( 1000, NULL );
-   _VMSMasterEnv->masterCreateProbeID =
-       VMS_ext__record_time_point_into_new_probe( "masterCreateProbe" );
-      //Also put creation time directly into master env, for fast retrieval
+   _VMSMasterEnv->probeNameHashTbl = makeHashTable( 1000, &VMS__free );
+   
+      //put creation time directly into master env, for fast retrieval
    struct timeval timeStamp;
    gettimeofday( &(timeStamp), NULL);
    _VMSMasterEnv->createPtInSecs =
@@ -152,11 +154,11 @@
  { SchedSlot  **schedSlots;
    int i;
 
-   schedSlots  = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
+   schedSlots  = VMS__malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
 
    for( i = 0; i < NUM_SCHED_SLOTS; i++ )
     {
-      schedSlots[i] = malloc( sizeof(SchedSlot) );
+      schedSlots[i] = VMS__malloc( sizeof(SchedSlot) );
 
          //Set state to mean "handling requests done, slot needs filling"
       schedSlots[i]->workIsDone         = FALSE;
@@ -171,9 +173,9 @@
  { int i;
    for( i = 0; i < NUM_SCHED_SLOTS; i++ )
     {
-      free( schedSlots[i] );
+      VMS__free( schedSlots[i] );
     }
-   free( schedSlots );
+   VMS__free( schedSlots );
  }
 
 
@@ -191,7 +193,7 @@
 
       //Make the threads that animate the core loops
    for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
-    { coreLoopThdParams[coreIdx]          = malloc( sizeof(ThdParams) );
+    { coreLoopThdParams[coreIdx]          = VMS__malloc( sizeof(ThdParams) );
       coreLoopThdParams[coreIdx]->coreNum = coreIdx;
 
       retCode =
@@ -263,11 +265,12 @@
  {
    char  *stackPtr;
 
-   newPr->procrID     = _VMSMasterEnv->numProcrsCreated++;
-   newPr->nextInstrPt = fnPtr;
-   newPr->initialData = initialData;
-   newPr->requests    = NULL;
-   newPr->schedSlot   = NULL;
+   newPr->startOfStack = stackLocs;
+   newPr->procrID      = _VMSMasterEnv->numProcrsCreated++;
+   newPr->nextInstrPt  = fnPtr;
+   newPr->initialData  = initialData;
+   newPr->requests     = NULL;
+   newPr->schedSlot    = NULL;
 
       //fnPtr takes two params -- void *initData & void *animProcr
       //alloc stack locations, make stackPtr be the highest addr minus room
@@ -285,7 +288,8 @@
    #ifdef STATS__TURN_ON_PROBES
    struct timeval timeStamp;
    gettimeofday( &(timeStamp), NULL);
-   newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0);
+   newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0) -
+                                               _VMSMasterEnv->createPtInSecs;
    #endif
    //========================================================================
 
@@ -301,7 +305,6 @@
    stackLocs  = VMS__malloc( VIRT_PROCR_STACK_SIZE );
    if( stackLocs == 0 )
     { perror("VMS__malloc stack"); exit(1); }
-   newPr->startOfStack = stackLocs;
 
    return create_procr_helper( newPr, fnPtr, initialData, stackLocs );
  }
@@ -319,7 +322,6 @@
    stackLocs  = malloc( VIRT_PROCR_STACK_SIZE );
    if( stackLocs == 0 )
     { perror("malloc stack"); exit(1); }
-   newPr->startOfStack = stackLocs;
 
    return create_procr_helper( newPr, fnPtr, initialData, stackLocs );
  }
@@ -477,18 +479,26 @@
 
 
 
-/*This inserts the semantic-layer's request data into standard VMS carrier
- * request data-struct is allocated on stack of this call & ptr to it sent
- * to plugin
+/*This call's name indicates that request is malloc'd -- so req handler
+ * has to free any extra requests tacked on before a send, using this.
+ *
+ * This inserts the semantic-layer's request data into standard VMS carrier
+ * request data-struct that is mallocd.  The sem request doesn't need to
+ * be malloc'd if this is called inside the same call chain before the
+ * send of the last request is called.
+ *
+ *The request handler has to call VMS__free_VMSReq for any of these
  */
 inline void
-VMS__add_sem_request( void *semReqData, VirtProcr *callingPr )
- { VMSReqst req;
+VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData,
+                                          VirtProcr *callingPr )
+ { VMSReqst *req;
 
-   req.reqType         = semantic;
-   req.semReqData      = semReqData;
-   req.nextReqst       = callingPr->requests;
-   callingPr->requests = &req;
+   req = VMS__malloc( sizeof(VMSReqst) );
+   req->reqType         = semantic;
+   req->semReqData      = semReqData;
+   req->nextReqst       = callingPr->requests;
+   callingPr->requests = req;
  }
 
 /*This inserts the semantic-layer's request data into standard VMS carrier
@@ -573,10 +583,12 @@
    memcpy( newProbe->nameStr, semReq->nameStr, nameLen );
    newProbe->hist    = NULL;
    newProbe->schedChoiceWasRecorded = FALSE;
+
+      //This runs in masterVP, so no race-condition worries
    newProbe->probeID =
              addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo );
 
-   requestingPr->dataReturnedFromReq = newProbe;
+   requestingPr->dataRetFromReq = newProbe;
 
    (*resumePrFnPtr)( requestingPr, semEnv );
  }
@@ -619,15 +631,13 @@
  }
 
 
-//TODO: re-architect so that have clean separation between request handler
+//TODO: look at architecting cleanest separation between request handler
 // and master loop, for dissipate, create, shutdown, and other non-semantic
 // requests.  Issue is chain: one removes requests from AppVP, one dispatches
 // on type of request, and one handles each type..  but some types require
 // action from both request handler and master loop -- maybe just give the
 // request handler calls like:  VMS__handle_X_request_type
 
-void
-endOSThreadFn( void *initData, VirtProcr *animatingPr );
 
 /*This is called by the semantic layer's request handler when it decides its
  * time to shut down the VMS system.  Calling this causes the core loop OS
@@ -641,10 +651,9 @@
  * masterVP any AppVPs that might still be allocated and sitting in the
  * semantic environment, or have been orphaned in the _VMSWorkQ.
  * 
- *NOTE: the semantic plug-in is expected to use VMS__malloc_to to get all the
+ *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the
  * locations it needs, and give ownership to masterVP.  Then, they will be
- * automatically freed when the masterVP is dissipated.  (This happens after
- * the core loop threads have all exited)
+ * automatically freed.
  *
  *In here,create one core-loop shut-down processor for each core loop and put
  * them all directly into the readyToAnimateQ.
@@ -655,7 +664,7 @@
  * point is it sure that all results have completed.
  */
 void
-VMS__handle_shutdown_reqst( void *dummy, VirtProcr *animatingPr )
+VMS__shutdown()
  { int coreIdx;
    VirtProcr *shutDownPr;
 
@@ -703,19 +712,19 @@
  }
 
 
-/*This is called after the threads have shut down and control has returned
- * to the semantic layer, in the entry point function in the main thread.
- * It has to free anything allocated during VMS_init, and any other alloc'd
- * locations that might be left over.
+/*This is called from the startup & shutdown
  */
 void
-VMS__cleanup_after_shutdown()
+VMS__cleanup_at_end_of_shutdown()
  { 
    SRSWQueueStruc **readyToAnimateQs;
    int              coreIdx;
    VirtProcr      **masterVPs;
    SchedSlot     ***allSchedSlots; //ptr to array of ptrs
 
+      //All the environment data has been allocated with VMS__malloc, so just
+      // free its internal big-chunk and all inside it disappear.
+/*
    readyToAnimateQs = _VMSMasterEnv->readyToAnimateQs;
    masterVPs        = _VMSMasterEnv->masterVPs;
    allSchedSlots    = _VMSMasterEnv->allSchedSlots;
@@ -724,23 +733,40 @@
     {
       freeSRSWQ( readyToAnimateQs[ coreIdx ] );
          //master VPs were created external to VMS, so use external free
-      VMS_ext__dissipate_procr( masterVPs[ coreIdx ] );
+      VMS__dissipate_procr( masterVPs[ coreIdx ] );
       
       freeSchedSlots( allSchedSlots[ coreIdx ] );
     }
    
-   free( _VMSMasterEnv->readyToAnimateQs );
-   free( _VMSMasterEnv->masterVPs );
-   free( _VMSMasterEnv->allSchedSlots );
+   VMS__free( _VMSMasterEnv->readyToAnimateQs );
+   VMS__free( _VMSMasterEnv->masterVPs );
+   VMS__free( _VMSMasterEnv->allSchedSlots );
    
-   VMS_ext__free_free_list( _VMSMasterEnv->freeListHead );
-
    //============================= MEASUREMENT STUFF ========================
    #ifdef STATS__TURN_ON_PROBES
-   freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &free );
+   freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS__free_probe);
    #endif
    //========================================================================
-
-   free( _VMSMasterEnv );
+*/
+      //These are the only two that use system free 
+   VMS_ext__free_free_list( _VMSMasterEnv->freeListHead );
+   free( (void *)_VMSMasterEnv );
  }
 
+
+//================================
+
+
+/*Later, improve this -- for now, just exits the application after printing
+ * the error message.
+ */
+void
+VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData )
+ {
+   printf(msgStr);
+   fflush(stdin);
+   exit(1);
+ }
+
+
+
diff -r 420a09d3f32a -r 85b731b290f8 VMS.h
--- a/VMS.h	Thu Nov 04 17:57:39 2010 -0700
+++ b/VMS.h	Thu Nov 04 18:27:27 2010 -0700
@@ -22,6 +22,10 @@
 
 
 //===============================  Debug  ===================================
+   //These defines turn types of bug messages on and off
+#define dbgProbes FALSE
+#define dbgAppFlow FALSE
+
    //When SEQUENTIAL is defined, VMS does sequential exe in the main thread
    // It still does co-routines and all the mechanisms are the same, it just
    // has only a single thread and animates VPs one at a time
@@ -32,7 +36,8 @@
 #define STATS__ENABLE_PROBES
 
 
-#define PRINT_DEBUG(msg)// printf(msg); fflush(stdin);
+#define DEBUG(msg)// printf(msg); fflush(stdin);
+#define DEBUG_MSG( bool, msg) //if( bool){ printf(msg); fflush(stdin);}
 #define PRINT1_DEBUG(msg, param) //printf(msg, param); fflush(stdin);
 #define PRINT2_DEBUG(msg, p1, p2) //printf(msg, p1, p2); fflush(stdin);
 
@@ -40,7 +45,6 @@
 #define PRINT1_ERROR(msg, param) printf(msg, param); fflush(stdin);
 #define PRINT2_ERROR(msg, p1, p2) printf(msg, p1, p2); fflush(stdin);
 
-
 //===========================  STATS =======================
 
    //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and
@@ -65,13 +69,13 @@
 
 #define MIN_WORK_UNIT_CYCLES 20000
 
-#define READYTOANIMATE_RETRIES 10000
+#define MASTERLOCK_RETRIES 10000
 
-   // stack
-#define VIRT_PROCR_STACK_SIZE 0x4000
+   // stack size in virtual processors created
+#define VIRT_PROCR_STACK_SIZE 0x4000 /* 16K */
 
-   // memory for VMS__malloc -- 256M
-#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000
+   // memory for VMS__malloc
+#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */
 
 
 //==============================
@@ -163,7 +167,7 @@
    VMSReqst   *requests;
 
    void       *semanticData; //this lives here for the life of VP
-   void       *dataReturnedFromReq;//values returned from plugin to VP go here
+   void       *dataRetFromReq;//values returned from plugin to VP go here
 
       //=========== MEASUREMENT STUFF ==========
    #ifdef MEAS__TIME_STAMP_SUSP
@@ -205,14 +209,19 @@
 
       //=========== MEASUREMENT STUFF =============
    IntervalProbe  **intervalProbes;
-   DynArrayInfo    *dynIntervalProbesInfo;
+   PrivDynArrayInfo    *dynIntervalProbesInfo;
    HashTable       *probeNameHashTbl;
    int32            masterCreateProbeID;
    float64          createPtInSecs;
  }
 MasterEnv;
 
+//=============================
+typedef struct
+ {
 
+ }
+VMSExcp;
 
 
 //=======================  OS Thread related  ===============================
@@ -245,7 +254,8 @@
 
 //===========================  Function Prototypes  =========================
 
-//============== Setup and shutdown =============
+
+//========== Setup and shutdown ==========
 void
 VMS__init();
 
@@ -261,16 +271,25 @@
 VirtProcr *
 VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
 
+void
+VMS__dissipate_procr( VirtProcr *procrToDissipate );
+
    //Use this to create processor inside entry point & other places outside
    // the VMS system boundary (IE, not run in slave nor Master)
 VirtProcr *
 VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
 
-VirtProcr *
-VMS__create_the_shutdown_procr();
+void
+VMS_ext__dissipate_procr( VirtProcr *procrToDissipate );
 
 void
-VMS__cleanup_after_shutdown();
+VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData );
+
+void
+VMS__shutdown();
+
+void
+VMS__cleanup_at_end_of_shutdown();
 
 
 //==============  Request Related  ===============
@@ -279,49 +298,29 @@
 VMS__suspend_procr( VirtProcr *callingPr );
 
 inline void
-VMS__add_sem_request( void *semReqData, VirtProcr *callingPr );
+VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr );
+
+inline void
+VMS__send_sem_request( void *semReqData, VirtProcr *callingPr );
 
 void
 VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr );
 
+void inline
+VMS__send_dissipate_req( VirtProcr *prToDissipate );
+
 inline void
 VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr );
 
-void
-VMS__free_request( VMSReqst *req );
-
-void
-VMS__remove_and_free_top_request( VirtProcr *reqstingPr );
-
 VMSReqst *
 VMS__take_next_request_out_of( VirtProcr *procrWithReq );
 
 inline void *
 VMS__take_sem_reqst_from( VMSReqst *req );
-//
-//VMSReqst *
-//VMS__take_top_request_from( VirtProcr *reqstingPr );
-//
-//inline int
-//VMS__isSemanticReqst( VMSReqst *req );
-//
-//inline int
-//VMS__isDissipateReqst( VMSReqst *req );
-//
-//inline int
-//VMS__isCreateReqst( VMSReqst *req );
 
-//==========================
+//======================== STATS ======================
 
-void inline
-VMS__send_dissipate_req( VirtProcr *prToDissipate );
-
-void
-VMS__dissipate_procr( VirtProcr *procrToDissipate );
-
-
-
-//===================== RDTSC wrapper ==================
+//===== RDTSC wrapper =====
 
 #define saveTimeStampCountInto(low, high) \
    asm volatile("RDTSC;                   \
@@ -339,8 +338,7 @@
    /* inputs  */ :                        \
    /* clobber */ : "%eax", "%edx"         \
                 );
-
-//======================== STATS ======================
+//=====
 
 #include "probes.h"
 
diff -r 420a09d3f32a -r 85b731b290f8 probes.c
--- a/probes.c	Thu Nov 04 17:57:39 2010 -0700
+++ b/probes.c	Thu Nov 04 18:27:27 2010 -0700
@@ -31,17 +31,6 @@
 
 //====================  Probes =================
 #ifdef STATS__USE_TSC_PROBES
-int32
-VMS__create_single_interval_probe( char *nameStr )
- { IntervalProbe *newProbe;
-   int32 idx;
-
-   newProbe = malloc( sizeof(IntervalProbe) );
-   newProbe->nameStr = nameStr;  //caller frees if not constant on stack
-   newProbe->hist = NULL;
-   idx = addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo );
-   return idx;
- }
 
 int32
 VMS__create_histogram_probe( int32 numBins, float32 startValue,
@@ -131,7 +120,7 @@
 
    VMS__send_VMSSem_request( &reqData, animPr );
 
-   return animPr->dataReturnedFromReq;
+   return animPr->dataRetFromReq;
  }
 
 /*Use this version from outside VMS -- it uses external malloc, and modifies
@@ -154,8 +143,19 @@
    return newProbe;
  }
 
+
+/*Only call from inside master or main startup/shutdown thread
+ */
+void
+VMS_impl__free_probe( IntervalProbe *probe )
+ { if( probe->hist != NULL )   freeDblHist( probe->hist );
+   if( probe->nameStr != NULL) VMS__free( probe->nameStr );
+   VMS__free( probe );
+ }
+
+
 int32
-VMS_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr )
+VMS_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr)
  { IntervalProbe *newProbe;
    struct timeval *startStamp;
    float64 startSecs;
@@ -243,7 +243,7 @@
    probe->schedChoiceWasRecorded = TRUE;
    probe->coreNum = animatingPr->coreAnimatedBy;
    probe->procrID = animatingPr->procrID;
-   probe->procrCreateSecs = 0;
+   probe->procrCreateSecs = animatingPr->createPtInSecs;
  }
 
 /*Everything is local to the animating procr, so no need for request, do
@@ -253,6 +253,7 @@
 VMS_impl__record_interval_start_in_probe( int32 probeID )
  { IntervalProbe *probe;
 
+         DEBUG_MSG( dbgProbes, "record start of interval\n" )
    probe = _VMSMasterEnv->intervalProbes[ probeID ];
    gettimeofday( &(probe->startStamp), NULL );
  }
@@ -265,8 +266,9 @@
 VMS_impl__record_interval_end_in_probe( int32 probeID )
  { IntervalProbe *probe;
    struct timeval *endStamp, *startStamp;
-   double startSecs, endSecs;
+   float64 startSecs, endSecs;
 
+         DEBUG_MSG( dbgProbes, "record end of interval\n" )
       //possible seg-fault if array resized by diff core right after this
       // one gets probe..?  Something like that?  Might be safe.. don't care
    probe = _VMSMasterEnv->intervalProbes[ probeID ];
diff -r 420a09d3f32a -r 85b731b290f8 probes.h
--- a/probes.h	Thu Nov 04 17:57:39 2010 -0700
+++ b/probes.h	Thu Nov 04 18:27:27 2010 -0700
@@ -87,6 +87,10 @@
                                           binWidth, nameStr, animPr )       \
         VMS_impl__create_histogram_probe( numBins, startValue,              \
                                           binWidth, nameStr, animPr )
+void
+VMS_impl__free_probe( IntervalProbe *probe );
+#define VMS__free_probe( probe ) \
+        VMS_impl__free_probe( probe )
 
 void
 VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr );
diff -r 420a09d3f32a -r 85b731b290f8 vmalloc.c
--- a/vmalloc.c	Thu Nov 04 17:57:39 2010 -0700
+++ b/vmalloc.c	Thu Nov 04 18:27:27 2010 -0700
@@ -8,6 +8,7 @@
  */
 
 #include <malloc.h>
+#include <stdlib.h>
 
 #include "VMS.h"
 
@@ -123,14 +124,19 @@
  { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem;
    int32         lowerExistsAndIsFree, higherExistsAndIsFree, sizeOfElem;
 
-   if( ptrToFree < _VMSMasterEnv->freeListHead->nextLowerInMem ||
-       ptrToFree > _VMSMasterEnv->freeListHead->nextHigherInMem )
+   if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem ||
+       ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem )
     {    //outside the range of data owned by VMS's malloc, so do nothing
       return;
     }
       //subtract size of prolog to get pointer to prolog, then cast
    elemToFree = (MallocProlog *)((char *)ptrToFree - sizeof(MallocProlog));
    sizeOfElem =(int32)((char*)elemToFree->nextHigherInMem-(char*)elemToFree);
+
+   if( elemToFree->prevChunkInFreeList != NULL )
+    { printf( "error: freeing same element twice!" ); exit(1);
+    }
+
    _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem;
 
    nextLowerElem  = elemToFree->nextLowerInMem;
@@ -209,10 +215,73 @@
  }
 
 
+/*Allocates memory from the external system -- higher overhead
+ *
+ *Because of Linux's malloc throwing bizarre random faults when malloc is
+ * used inside a VMS virtual processor, have to pass this as a request and
+ * have the core loop do it when it gets around to it -- will look for these
+ * chores leftover from the previous animation of masterVP the next time it
+ * goes to animate the masterVP -- so it takes two separate masterVP
+ * animations, separated by work, to complete an external malloc or
+ * external free request.
+ *
+ *Thinking core loop accepts signals -- just looks if signal-location is
+ * empty or not --
+ */
+void *
+VMS__malloc_in_ext( int32 sizeRequested )
+ {
+ /*
+      //This is running in the master, so no chance for multiple cores to be
+      // competing for the core's flag.
+   if(  *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 )
+    {    //something has already signalled to core loop, so save the signal
+         // and look, next time master animated, to see if can send it.
+         //Note, the addr to put a signal is in the coreloop's frame, so just
+         // checks it each time through -- make it volatile to avoid GCC
+         // optimizations -- it's a coreloop local var that only changes
+         // after jumping away.  The signal includes the addr to send the
+         //return to -- even if just empty return completion-signal
+         //
+         //save the signal in some queue that the master looks at each time
+         // it starts up -- one loc says if empty for fast common case --
+         //something like that -- want to hide this inside this call -- but
+         // think this has to come as a request -- req handler gives procr
+         // back to master loop, which gives it back to req handler at point
+         // it sees that core loop has sent return signal.  Something like
+         // that.
+      saveTheSignal
+
+    }
+  coreSigData->type = malloc;
+  coreSigData->sizeToMalloc = sizeRequested;
+  coreSigData->locToSignalCompletion = &figureOut;
+   _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData;
+  */
+      //just risk system-stack faults until get this figured out
+   return malloc( sizeRequested );
+ }
+
+
+/*Frees memory that was allocated in the external system -- higher overhead
+ *
+ *As noted in external malloc comment, this is clunky 'cause the free has
+ * to be called in the core loop.
+ */
+void
+VMS__free_in_ext( void *ptrToFree )
+ {
+      //just risk system-stack faults until get this figured out
+   free( ptrToFree );
+
+      //TODO: fix this -- so 
+ }
+
+
 /*Designed to be called from the main thread outside of VMS, during init
  */
 MallocProlog *
-VMS__create_free_list()
+VMS_ext__create_free_list()
  { MallocProlog *freeListHead, *firstChunk;
 
       //Note, this is running in the main thread -- all increases in malloc
@@ -226,16 +295,18 @@
       //Use this addr to free the heap when cleanup
    freeListHead->nextLowerInMem      = firstChunk;
       //to identify top-of-heap elem, compare this addr to elem's next higher
-   freeListHead->nextHigherInMem     = (char *)firstChunk +
-                                          MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE;
+   freeListHead->nextHigherInMem     = (void*)( (char*)firstChunk +
+                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
    freeListHead->nextChunkInFreeList = firstChunk;
 
    firstChunk->nextChunkInFreeList   = NULL;
    firstChunk->prevChunkInFreeList   = freeListHead;
       //next Higher has to be set to top of chunk, so can calc size in malloc
-   firstChunk->nextHigherInMem       = (char *)firstChunk +
-                                          MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE;
+   firstChunk->nextHigherInMem       = (void*)( (char*)firstChunk +
+                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
    firstChunk->nextLowerInMem        = NULL; //identifies as bott of heap
+   
+   _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet
 
    return freeListHead;
  }
diff -r 420a09d3f32a -r 85b731b290f8 vmalloc.h
--- a/vmalloc.h	Thu Nov 04 17:57:39 2010 -0700
+++ b/vmalloc.h	Thu Nov 04 18:27:27 2010 -0700
@@ -34,8 +34,19 @@
 void
 VMS__free( void *ptrToFree );
 
+/*Allocates memory from the external system -- higher overhead
+ */
+void *
+VMS__malloc_in_ext( int32 sizeRequested );
+
+/*Frees memory that was allocated in the external system -- higher overhead
+ */
+void
+VMS__free_in_ext( void *ptrToFree );
+
+
 MallocProlog *
-VMS__create_free_list();
+VMS_ext__create_free_list();
 
 void
 VMS_ext__free_free_list( MallocProlog *freeListHead );