# HG changeset patch
# User Merten Sach <msach@mailbox.tu-berlin.de>
# Date 1305123066 -7200
# Node ID 58d0c2b1d6a4ce96e6f57eceb9ea22464444a548
# Parent  5494943ed3a41879f66701373072eafab23d596a
removed warnings

diff -r 5494943ed3a4 -r 58d0c2b1d6a4 VCilk.h
--- a/VCilk.h	Wed May 11 15:29:58 2011 +0200
+++ b/VCilk.h	Wed May 11 16:11:06 2011 +0200
@@ -1,200 +1,200 @@
-/*
- *  Copyright 2009 OpenSourceStewardshipFoundation.org
- *  Licensed under GNU General Public License version 2
- *
- * Author: seanhalle@yahoo.com
- *
- */
-
-#ifndef _VCilk_H
-#define	_VCilk_H
-
-#include "VMS/Queue_impl/PrivateQueue.h"
-#include "VMS/Hash_impl/PrivateHash.h"
-#include "VMS/VMS.h"
-
-
-
-/*This header defines everything specific to the VCilk semantic plug-in
- */
-
-//===========================================================================
-#define NUM_STRUCS_IN_SEM_ENV 1000
-
-//===========================================================================
-typedef struct _VCilkSemReq   VCilkSemReq;
-typedef void  (*PtrToAtomicFn )   ( void * ); //executed atomically in master
-
-//===========================================================================
-
-
-/*WARNING: assembly hard-codes position of endInstrAddr as first field
- */
-typedef struct
- {
-   void           *endInstrAddr;
-   int32           hasBeenStarted;
-   int32           hasFinished;
-   PrivQueueStruc *waitQ;
- }
-VCilkSingleton;
-
-/*Semantic layer-specific data sent inside a request from lib called in app
- * to request handler called in MasterLoop
- */
-enum VCilkReqType
- {
-   syncReq = 1,
-   mallocReq,
-   freeReq,
-   singleton_fn_start,
-   singleton_fn_end,
-   singleton_data_start,
-   singleton_data_end,
-   atomic,
-   trans_start,
-   trans_end
- };
-
-struct _VCilkSemReq
- { enum VCilkReqType    reqType;
-   VirtProcr           *requestingPr;
-   
-   int32                sizeToMalloc;
-   void                *ptrToFree;
-   
-   VirtProcrFnPtr       fnPtr;
-   void                *initData;
-   int32                coreToSpawnOnto;
-
-   int32              singletonID;
-   VCilkSingleton     **singletonPtrAddr;
-
-   PtrToAtomicFn      fnToExecInMaster;
-   void              *dataForFn;
-
-   int32              transID;
- }
-/* VCilkSemReq */;
-
-typedef struct
- {
-   VirtProcr      *VPCurrentlyExecuting;
-   PrivQueueStruc *waitingVPQ;
- }
-VCilkTrans;
-
-typedef struct
- {
-   PrivQueueStruc **readyVPQs;
-   HashTable       *commHashTbl;
-   int32            numVirtPr;
-   int32            nextCoreToGetNewPr;
-   int32            primitiveStartTime;
-
-                       //fix limit on num with dynArray
-   VCilkSingleton     fnSingletons[NUM_STRUCS_IN_SEM_ENV];
-   VCilkTrans       transactionStrucs[NUM_STRUCS_IN_SEM_ENV];
- }
-VCilkSemEnv;
-
-typedef struct _TransListElem TransListElem;
-struct _TransListElem
- {
-   int32          transID;
-   TransListElem *nextTrans;
- };
-//TransListElem
-
-typedef struct
- {
-   int32          syncPending;
-   int32          numLiveChildren;
-   VirtProcr     *parentPr;
-   
-   int32          highestTransEntered;
-   TransListElem *lastTransEntered;
- }
-VCilkSemData;
-
-//===========================================================================
-
-void
-VCilk__create_seed_procr_and_do_work( VirtProcrFnPtr fn, void *initData );
-
-int32
-VCilk__giveMinWorkUnitCycles( float32 percentOverhead );
-
-void inline
-VCilk__start_primitive();
-
-int32 inline
-VCilk__end_primitive_and_give_cycles();
-
-int32
-VCilk__giveIdealNumWorkUnits();
-
-//=======================
-
-void
-VCilk__init();
-
-void
-VCilk__cleanup_at_end_of_shutdown();
-
-//=======================
-
-void inline
-VCilk__spawn( int32  coreToSpawnOnto, VirtProcrFnPtr  fnPtr,
-              void  *initData,        VirtProcr      *creatingPr );
-
-int32
-VCilk__give_number_of_cores_to_spawn_onto();
-
-void
-VCilk__sync( VirtProcr *animatingPr );
-
-void *
-VCilk__malloc( int32 sizeToMalloc, VirtProcr *animPr );
-
-void
-VCilk__free( void *ptrToFree, VirtProcr *animPr );
-
-void
-VCilk__dissipate_procr( VirtProcr *procrToDissipate );
-
-
-//======================= Concurrency Stuff ======================
-void
-VCilk__start_fn_singleton( int32 singletonID, VirtProcr *animPr );
-
-void
-VCilk__end_fn_singleton( int32 singletonID, VirtProcr *animPr );
-
-void
-VCilk__start_data_singleton( VCilkSingleton **singeltonAddr, VirtProcr *animPr );
-
-void
-VCilk__end_data_singleton( VCilkSingleton **singletonAddr, VirtProcr *animPr );
-
-void
-VCilk__animate_short_fn_in_isolation( PtrToAtomicFn ptrToFnToExecInMaster,
-                                      void *data, VirtProcr *animPr );
-
-void
-VCilk__start_transaction( int32 transactionID, VirtProcr *animPr );
-
-void
-VCilk__end_transaction( int32 transactionID, VirtProcr *animPr );
-
-
-//=========================  Internal use only  =============================
-void
-VCilk__Request_Handler( VirtProcr *requestingPr, void *_semEnv );
-
-VirtProcr *
-VCilk__schedule_virt_procr( void *_semEnv, int coreNum );
-
-
-#endif	/* _VCilk_H */
-
+/*
+ *  Copyright 2009 OpenSourceStewardshipFoundation.org
+ *  Licensed under GNU General Public License version 2
+ *
+ * Author: seanhalle@yahoo.com
+ *
+ */
+
+#ifndef _VCilk_H
+#define	_VCilk_H
+
+#include "VMS/Queue_impl/PrivateQueue.h"
+#include "VMS/Hash_impl/PrivateHash.h"
+#include "VMS/VMS.h"
+
+
+
+/*This header defines everything specific to the VCilk semantic plug-in
+ */
+
+//===========================================================================
+#define NUM_STRUCS_IN_SEM_ENV 1000
+
+//===========================================================================
+typedef struct _VCilkSemReq   VCilkSemReq;
+typedef void  (*PtrToAtomicFn )   ( void * ); //executed atomically in master
+
+//===========================================================================
+
+
+/*WARNING: assembly hard-codes position of endInstrAddr as first field
+ */
+typedef struct
+ {
+   void           *endInstrAddr;
+   int32           hasBeenStarted;
+   int32           hasFinished;
+   PrivQueueStruc *waitQ;
+ }
+VCilkSingleton;
+
+/*Semantic layer-specific data sent inside a request from lib called in app
+ * to request handler called in MasterLoop
+ */
+enum VCilkReqType
+ {
+   syncReq = 1,
+   mallocReq,
+   freeReq,
+   singleton_fn_start,
+   singleton_fn_end,
+   singleton_data_start,
+   singleton_data_end,
+   atomic,
+   trans_start,
+   trans_end
+ };
+
+struct _VCilkSemReq
+ { enum VCilkReqType    reqType;
+   VirtProcr           *requestingPr;
+   
+   int32                sizeToMalloc;
+   void                *ptrToFree;
+   
+   VirtProcrFnPtr       fnPtr;
+   void                *initData;
+   int32                coreToSpawnOnto;
+
+   int32              singletonID;
+   VCilkSingleton     **singletonPtrAddr;
+
+   PtrToAtomicFn      fnToExecInMaster;
+   void              *dataForFn;
+
+   int32              transID;
+ }
+/* VCilkSemReq */;
+
+typedef struct
+ {
+   VirtProcr      *VPCurrentlyExecuting;
+   PrivQueueStruc *waitingVPQ;
+ }
+VCilkTrans;
+
+typedef struct
+ {
+   PrivQueueStruc **readyVPQs;
+   HashTable       *commHashTbl;
+   int32            numVirtPr;
+   int32            nextCoreToGetNewPr;
+   int32            primitiveStartTime;
+
+                       //fix limit on num with dynArray
+   VCilkSingleton     fnSingletons[NUM_STRUCS_IN_SEM_ENV];
+   VCilkTrans       transactionStrucs[NUM_STRUCS_IN_SEM_ENV];
+ }
+VCilkSemEnv;
+
+typedef struct _TransListElem TransListElem;
+struct _TransListElem
+ {
+   int32          transID;
+   TransListElem *nextTrans;
+ };
+//TransListElem
+
+typedef struct
+ {
+   int32          syncPending;
+   int32          numLiveChildren;
+   VirtProcr     *parentPr;
+   
+   int32          highestTransEntered;
+   TransListElem *lastTransEntered;
+ }
+VCilkSemData;
+
+//===========================================================================
+
+void
+VCilk__create_seed_procr_and_do_work( VirtProcrFnPtr fn, void *initData );
+
+int32
+VCilk__giveMinWorkUnitCycles( float32 percentOverhead );
+
+void inline
+VCilk__start_primitive();
+
+int32 inline
+VCilk__end_primitive_and_give_cycles();
+
+int32
+VCilk__giveIdealNumWorkUnits();
+
+//=======================
+
+void
+VCilk__init();
+
+void
+VCilk__cleanup_at_end_of_shutdown();
+
+//=======================
+
+void inline
+VCilk__spawn( int32  coreToSpawnOnto, VirtProcrFnPtr  fnPtr,
+              void  *initData,        VirtProcr      *creatingPr );
+
+int32
+VCilk__give_number_of_cores_to_spawn_onto();
+
+void
+VCilk__sync( VirtProcr *animatingPr );
+
+void *
+VCilk__malloc( int32 sizeToMalloc, VirtProcr *animPr );
+
+void
+VCilk__free( void *ptrToFree, VirtProcr *animPr );
+
+void
+VCilk__dissipate_procr( VirtProcr *procrToDissipate );
+
+
+//======================= Concurrency Stuff ======================
+void
+VCilk__start_fn_singleton( int32 singletonID, VirtProcr *animPr );
+
+void
+VCilk__end_fn_singleton( int32 singletonID, VirtProcr *animPr );
+
+void
+VCilk__start_data_singleton( VCilkSingleton **singeltonAddr, VirtProcr *animPr );
+
+void
+VCilk__end_data_singleton( VCilkSingleton **singletonAddr, VirtProcr *animPr );
+
+void
+VCilk__animate_short_fn_in_isolation( PtrToAtomicFn ptrToFnToExecInMaster,
+                                      void *data, VirtProcr *animPr );
+
+void
+VCilk__start_transaction( int32 transactionID, VirtProcr *animPr );
+
+void
+VCilk__end_transaction( int32 transactionID, VirtProcr *animPr );
+
+
+//=========================  Internal use only  =============================
+void
+VCilk__Request_Handler( VirtProcr *requestingPr, void *_semEnv );
+
+VirtProcr *
+VCilk__schedule_virt_procr( void *_semEnv, int coreNum );
+
+
+#endif	/* _VCilk_H */
+
diff -r 5494943ed3a4 -r 58d0c2b1d6a4 VCilk_PluginFns.c
--- a/VCilk_PluginFns.c	Wed May 11 15:29:58 2011 +0200
+++ b/VCilk_PluginFns.c	Wed May 11 16:11:06 2011 +0200
@@ -1,555 +1,555 @@
-/*
- * Copyright 2010  OpenSourceCodeStewardshipFoundation
- *
- * Licensed under BSD
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "VMS/Queue_impl/PrivateQueue.h"
-#include "VCilk.h"
-
-
-
-//===========================================================================
-void inline
-handleSync( VirtProcr *requestingPr, VCilkSemEnv *semEnv );
-
-void inline
-handleMalloc( VCilkSemReq *semReq, VirtProcr *requestingPr,
-              VCilkSemEnv *semEnv );
-void inline
-handleFree( VCilkSemReq *semReq, VirtProcr *requestingPr,
-            VCilkSemEnv *semEnv );
-void inline
-handleDissipate( VirtProcr *requestingPr, VCilkSemEnv *semEnv );
-
-void inline
-handleSpawn( VMSReqst *req, VirtProcr *requestingPr, VCilkSemEnv *semEnv );
-
-void inline
-dispatchSemReq( VMSReqst *req, VirtProcr *requestingPr, VCilkSemEnv *semEnv);
-
-void inline
-handleTransEnd( VCilkSemReq *semReq, VirtProcr *requestingPr,
-                VCilkSemEnv*semEnv);
-void inline
-handleTransStart( VCilkSemReq *semReq, VirtProcr *requestingPr,
-                  VCilkSemEnv *semEnv );
-void inline
-handleAtomic( VCilkSemReq *semReq, VirtProcr *requestingPr,
-              VCilkSemEnv *semEnv);
-inline void
-handleStartFnSingleton( VCilkSemReq *semReq, VirtProcr *reqstingPr,
-                      VCilkSemEnv *semEnv );
-inline void
-handleEndFnSingleton( VCilkSemReq *semReq, VirtProcr *requestingPr,
-                    VCilkSemEnv *semEnv );
-inline void
-handleStartDataSingleton( VCilkSemReq *semReq, VirtProcr *reqstingPr,
-                      VCilkSemEnv *semEnv );
-inline void
-handleEndDataSingleton( VCilkSemReq *semReq, VirtProcr *requestingPr,
-                    VCilkSemEnv *semEnv );
-
-void inline
-resume_procr( VirtProcr *procr, VCilkSemEnv *semEnv );
-
-//===========================================================================
-
-
-//============================== Scheduler ==================================
-//
-/*For VCilk, scheduling a slave simply takes the next work-unit off the
- * ready-to-go work-unit queue and assigns it to the slaveToSched.
- *If the ready-to-go work-unit queue is empty, then nothing to schedule
- * to the slave -- return FALSE to let Master loop know scheduling that
- * slave failed.
- */
-VirtProcr *
-VCilk__schedule_virt_procr( void *_semEnv, int coreNum )
- { VirtProcr   *schedPr;
-   VCilkSemEnv *semEnv;
-
-   semEnv = (VCilkSemEnv *)_semEnv;
-
-   schedPr = readPrivQ( semEnv->readyVPQs[coreNum] );
-      //Note, using a non-blocking queue -- it returns NULL if queue empty
-
-   return( schedPr );
- }
-
-
-//===========================  Request Handler  =============================
-//
-/*Will get requests to send, to receive, and to create new processors.
- * Upon send, check the hash to see if a receive is waiting.
- * Upon receive, check hash to see if a send has already happened.
- * When other is not there, put in.  When other is there, the comm.
- *  completes, which means the receiver P gets scheduled and
- *  picks up right after the receive request.  So make the work-unit
- *  and put it into the queue of work-units ready to go.
- * Other request is create a new Processor, with the function to run in the
- *  Processor, and initial data.
- */
-void
-VCilk__Request_Handler( VirtProcr *requestingPr, void *_semEnv )
- { VCilkSemEnv *semEnv;
-   VMSReqst    *req;
-   VCilkSemReq *semReq;
- 
-
-   semEnv = (VCilkSemEnv *)_semEnv;
-
-   req = VMS__take_next_request_out_of( requestingPr );
-   
-   while( req != NULL )
-    {
-      switch( req->reqType )
-       { case semantic:     dispatchSemReq( req, requestingPr, semEnv );
-            break;
-         case createReq:    //create request has to come as a VMS request,
-                            // to allow MasterLoop to do stuff before gets
-                            // here, and maybe also stuff after all requests
-                            // done -- however, can still attach semantic
-                            // req data to req.
-                            handleSpawn(          req, requestingPr, semEnv);
-            break;
-         case dissipate:    handleDissipate(           requestingPr, semEnv);
-            break;
-         case VMSSemantic:  VMS__handle_VMSSemReq(req, requestingPr, semEnv,
-                                                              &resume_procr);
-            break;
-         default:
-            break;
-       }
-      
-      DoneHandlingReqst:
-
-      req = VMS__take_next_request_out_of( requestingPr );
-    } //while( req != NULL )
- }
-
-void inline
-dispatchSemReq( VMSReqst *req, VirtProcr *reqPr, VCilkSemEnv *semEnv )
- { VCilkSemReq *semReq;
-
-   semReq = VMS__take_sem_reqst_from(req);
-
-   if( semReq == NULL ) return;
-   switch( semReq->reqType )
-    {
-      case syncReq:         handleSync(                 reqPr, semEnv );
-         break;
-      case mallocReq:       handleMalloc(       semReq, reqPr, semEnv );
-         break;
-      case freeReq:         handleFree(         semReq, reqPr, semEnv );
-         break;
-      case singleton_fn_start:  handleStartFnSingleton(semReq, reqPr, semEnv);
-         break;
-      case singleton_fn_end:    handleEndFnSingleton(  semReq, reqPr, semEnv);
-         break;
-      case singleton_data_start:handleStartDataSingleton(semReq,reqPr,semEnv);
-         break;
-      case singleton_data_end:  handleEndDataSingleton(semReq, reqPr, semEnv);
-         break;
-      case atomic:          handleAtomic(       semReq, reqPr, semEnv );
-         break;
-      case trans_start:     handleTransStart(   semReq, reqPr, semEnv );
-         break;
-      case trans_end:       handleTransEnd(     semReq, reqPr, semEnv );
-         break;
-    }
-   //NOTE: semantic request data strucs allocated on stack in VCilk Lib calls
- }
-
-
-
-//=========================== Request Handlers ==============================
-void inline
-resume_procr( VirtProcr *procr, VCilkSemEnv *semEnv )
- {
-   writePrivQ( procr, semEnv->readyVPQs[ procr->coreAnimatedBy] );
- }
-
-
-
-
-/* check if list of live children is empty.
- * If yes, then resume.
- * If no, then set sync-pending flag.
- */
-inline void
-handleSync( VirtProcr *requestingPr, VCilkSemEnv *semEnv )
- {
-         Meas_startSync
-   if(((VCilkSemData *)(requestingPr->semanticData))->numLiveChildren  == 0 )
-    { //no live children to wait for
-      resume_procr( requestingPr, semEnv );
-    }
-   else
-    {
-      ((VCilkSemData *)(requestingPr->semanticData))->syncPending = TRUE;
-    }
-         Meas_endSync
- }
-
-/*
- */
-inline void
-handleMalloc( VCilkSemReq *semReq, VirtProcr *requestingPr,
-              VCilkSemEnv *semEnv )
- { void *ptr;
-   
-   ptr = VMS__malloc( semReq->sizeToMalloc );
-   requestingPr->dataRetFromReq = ptr;
-   resume_procr( requestingPr, semEnv );
- }
-
-/*
- */
-void inline
-handleFree( VCilkSemReq *semReq, VirtProcr *requestingPr,
-            VCilkSemEnv *semEnv )
- {
-   VMS__free( semReq->ptrToFree );
-   resume_procr( requestingPr, semEnv );
- }
-
-
-//============================== VMS requests ===============================
-/*Re-use this in the entry-point fn
- */
-inline VirtProcr *
-VCilk__create_procr_helper( VirtProcrFnPtr fnPtr, void  *initData,
-   VirtProcr *requestingPr, VCilkSemEnv *semEnv,  int32  coreToScheduleOnto )
- { VirtProcr    *newPr;
-   VCilkSemData *semData;
-
-      //This is running in master, so use internal version
-   newPr = VMS__create_procr( fnPtr, initData );
-
-   semData = VMS__malloc( sizeof(VCilkSemData) );
-
-   semData->numLiveChildren = 0;
-   semData->parentPr        = requestingPr;
-   semData->syncPending     = FALSE;
-   
-   semData->highestTransEntered = -1;
-   semData->lastTransEntered    = NULL;
-
-   newPr->semanticData = semData;
-
-   /* increase the number of live children of requester.
-    */
-   if( requestingPr != NULL ) //NULL when creating seed procr
-     ((VCilkSemData *)(requestingPr->semanticData))->numLiveChildren +=1;
-
-   semEnv->numVirtPr += 1;
-
-   //=================== Assign new processor to a core =====================
-   #ifdef SEQUENTIAL
-   newPr->coreAnimatedBy = 0;
-
-   #else
-
-   if(coreToScheduleOnto < 0 || coreToScheduleOnto >= NUM_CORES )
-    {    //out-of-range, so round-robin assignment
-      newPr->coreAnimatedBy = semEnv->nextCoreToGetNewPr;
-
-      if( semEnv->nextCoreToGetNewPr >= NUM_CORES - 1 )
-          semEnv->nextCoreToGetNewPr  = 0;
-      else
-          semEnv->nextCoreToGetNewPr += 1;
-    }
-   else //core num in-range, so use it
-    { newPr->coreAnimatedBy = coreToScheduleOnto;
-    }
-   #endif
-   //========================================================================
-
-   return newPr;
- }
-
-
-void inline
-handleSpawn( VMSReqst *req, VirtProcr *requestingPr, VCilkSemEnv *semEnv )
- { VCilkSemReq *semReq;
-   VirtProcr    *newPr;
-
-         Meas_startSpawn
-   semReq = VMS__take_sem_reqst_from( req );
-
-   newPr = VCilk__create_procr_helper( semReq->fnPtr, semReq->initData,
-                             requestingPr, semEnv, semReq->coreToSpawnOnto );
-
-      //For VPThread, caller needs ptr to created processor returned to it
-   requestingPr->dataRetFromReq = newPr;
-
-   resume_procr( newPr,        semEnv );
-   resume_procr( requestingPr, semEnv );
-         Meas_endSpawn
- }
-
-
-
-/*get parentVP & remove dissipator from parent's live children.
- *If this was last live child, check "sync pending" flag
- *-- if set, then resume the parentVP.
- */
-void inline
-handleDissipate( VirtProcr *requestingPr, VCilkSemEnv *semEnv )
- {
-   VirtProcr *
-   parentPr = ((VCilkSemData *)
-               (requestingPr->semanticData))->parentPr;
-   if( parentPr == NULL ) //means this is seed processor being dissipated
-    { //Just act normally, except don't deal with parent
-      // VMS__Free is implemented to ignore requests to free data from
-      // outside VMS, so all this processor's non-VMS allocated data will
-      // remain and be cleaned up outside
-    }
-   else
-    {
-      ((VCilkSemData *)(parentPr->semanticData))->numLiveChildren -= 1;
-      if( ((VCilkSemData *)
-            (parentPr->semanticData))->numLiveChildren <= 0 )
-       { //this was last live child of parent
-         if( ((VCilkSemData *)
-               (parentPr->semanticData))->syncPending == TRUE )
-          { //was waiting for last child to dissipate, so resume it
-            ((VCilkSemData *)
-              (parentPr->semanticData))->syncPending = FALSE;
-            resume_procr( parentPr, semEnv );
-          }
-       }
-    }
-
-   VMS__free( requestingPr->semanticData );
-   
-       //Now do normal dissipate
-   
-       //call VMS to free_all AppVP state -- stack and so on
-   VMS__dissipate_procr( requestingPr );
-
-   semEnv->numVirtPr -= 1;
-   if( semEnv->numVirtPr == 0 )
-    {    //no more work, so shutdown
-      VMS__shutdown();
-    }
- }
-
-
-//=============================== Atomic ====================================
-//
-/*Uses ID as index into array of flags.  If flag already set, resumes from
- * end-label.  Else, sets flag and resumes normally.
- */
-void inline
-handleStartSingleton_helper( VCilkSingleton *singleton, VirtProcr *reqstingPr,
-                             VCilkSemEnv    *semEnv )
- {
-   if( singleton->hasFinished )
-    {    //the code that sets the flag to true first sets the end instr addr
-      reqstingPr->dataRetFromReq = singleton->endInstrAddr;
-      resume_procr( reqstingPr, semEnv );
-      return;
-    }
-   else if( singleton->hasBeenStarted )
-    {    //singleton is in-progress in a diff slave, so wait for it to finish
-      writePrivQ(reqstingPr, singleton->waitQ );
-      return;
-    }
-   else
-    {    //hasn't been started, so this is the first attempt at the singleton
-      singleton->hasBeenStarted = TRUE;
-      reqstingPr->dataRetFromReq = 0x0;
-      resume_procr( reqstingPr, semEnv );
-      return;
-    }
- }
-void inline
-handleStartFnSingleton( VCilkSemReq *semReq, VirtProcr *requestingPr,
-                      VCilkSemEnv *semEnv )
- { VCilkSingleton *singleton;
-
-   singleton = &(semEnv->fnSingletons[ semReq->singletonID ]);
-   handleStartSingleton_helper( singleton, requestingPr, semEnv );
- }
-void inline
-handleStartDataSingleton( VCilkSemReq *semReq, VirtProcr *requestingPr,
-                      VCilkSemEnv *semEnv )
- { VCilkSingleton *singleton;
-
-   if( *(semReq->singletonPtrAddr) == NULL )
-    { singleton                 = VMS__malloc( sizeof(VCilkSingleton) );
-      singleton->waitQ          = makeVMSPrivQ();
-      singleton->endInstrAddr   = 0x0;
-      singleton->hasBeenStarted = FALSE;
-      singleton->hasFinished    = FALSE;
-      *(semReq->singletonPtrAddr)  = singleton;
-    }
-   else
-      singleton = *(semReq->singletonPtrAddr);
-   handleStartSingleton_helper( singleton, requestingPr, semEnv );
- }
-
-
-void inline
-handleEndSingleton_helper( VCilkSingleton *singleton, VirtProcr *requestingPr,
-                           VCilkSemEnv    *semEnv )
- { PrivQueueStruc *waitQ;
-   int32           numWaiting, i;
-   VirtProcr      *resumingPr;
-
-   if( singleton->hasFinished )
-    { //by definition, only one slave should ever be able to run end singleton
-      // so if this is true, is an error
-      //VMS__throw_exception( "singleton code ran twice", requestingPr, NULL);
-    }
-
-   singleton->hasFinished = TRUE;
-   waitQ = singleton->waitQ;
-   numWaiting = numInPrivQ( waitQ );
-   for( i = 0; i < numWaiting; i++ )
-    {    //they will resume inside start singleton, then jmp to end singleton
-      resumingPr = readPrivQ( waitQ );
-      resumingPr->dataRetFromReq = singleton->endInstrAddr;
-      resume_procr( resumingPr, semEnv );
-    }
-
-   resume_procr( requestingPr, semEnv );
-
-}
-void inline
-handleEndFnSingleton( VCilkSemReq *semReq, VirtProcr *requestingPr,
-                        VCilkSemEnv *semEnv )
- {
-   VCilkSingleton   *singleton;
-
-   singleton = &(semEnv->fnSingletons[ semReq->singletonID ]);
-   handleEndSingleton_helper( singleton, requestingPr, semEnv );
-  }
-void inline
-handleEndDataSingleton( VCilkSemReq *semReq, VirtProcr *requestingPr,
-                        VCilkSemEnv *semEnv )
- {
-   VCilkSingleton   *singleton;
-
-   singleton = *(semReq->singletonPtrAddr);
-   handleEndSingleton_helper( singleton, requestingPr, semEnv );
-  }
-
-
-/*This executes the function in the masterVP, take the function
- * pointer out of the request and call it, then resume the VP.
- */
-void inline
-handleAtomic( VCilkSemReq *semReq, VirtProcr *requestingPr,
-              VCilkSemEnv *semEnv )
- {
-   semReq->fnToExecInMaster( semReq->dataForFn );
-   resume_procr( requestingPr, semEnv );
- }
-
-/*First, it looks at the VP's semantic data, to see the highest transactionID
- * that VP
- * already has entered.  If the current ID is not larger, it throws an
- * exception stating a bug in the code.
- *Otherwise it puts the current ID
- * there, and adds the ID to a linked list of IDs entered -- the list is
- * used to check that exits are properly ordered.
- *Next it is uses transactionID as index into an array of transaction
- * structures.
- *If the "VP_currently_executing" field is non-null, then put requesting VP
- * into queue in the struct.  (At some point a holder will request
- * end-transaction, which will take this VP from the queue and resume it.)
- *If NULL, then write requesting into the field and resume.
- */
-void inline
-handleTransStart( VCilkSemReq *semReq, VirtProcr *requestingPr,
-                  VCilkSemEnv *semEnv )
- { VCilkSemData *semData;
-   TransListElem *nextTransElem;
-
-      //check ordering of entering transactions is correct
-   semData = requestingPr->semanticData;
-   if( semData->highestTransEntered > semReq->transID )
-    {    //throw VMS exception, which shuts down VMS.
-      VMS__throw_exception( "transID smaller than prev", requestingPr, NULL);
-    }
-      //add this trans ID to the list of transactions entered -- check when
-      // end a transaction
-   semData->highestTransEntered = semReq->transID;
-   nextTransElem = VMS__malloc( sizeof(TransListElem) );
-   nextTransElem->transID = semReq->transID;
-   nextTransElem->nextTrans = semData->lastTransEntered;
-   semData->lastTransEntered = nextTransElem;
-
-      //get the structure for this transaction ID
-   VCilkTrans *
-   transStruc = &(semEnv->transactionStrucs[ semReq->transID ]);
-
-   if( transStruc->VPCurrentlyExecuting == NULL )
-    {
-      transStruc->VPCurrentlyExecuting = requestingPr;
-      resume_procr( requestingPr, semEnv );
-    }
-   else
-    {    //note, might make future things cleaner if save request with VP and
-         // add this trans ID to the linked list when gets out of queue.
-         // but don't need for now, and lazy..
-      writePrivQ( requestingPr, transStruc->waitingVPQ );
-    }
- }
-
-
-/*Use the trans ID to get the transaction structure from the array.
- *Look at VP_currently_executing to be sure it's same as requesting VP.
- * If different, throw an exception, stating there's a bug in the code.
- *Next, take the first element off the list of entered transactions.
- * Check to be sure the ending transaction is the same ID as the next on
- * the list.  If not, incorrectly nested so throw an exception.
- *
- *Next, get from the queue in the structure.
- *If it's empty, set VP_currently_executing field to NULL and resume
- * requesting VP.
- *If get somethine, set VP_currently_executing to the VP from the queue, then
- * resume both.
- */
-void inline
-handleTransEnd( VCilkSemReq *semReq, VirtProcr *requestingPr,
-                VCilkSemEnv *semEnv )
- { VCilkSemData    *semData;
-   VirtProcr     *waitingPr;
-   VCilkTrans      *transStruc;
-   TransListElem *lastTrans;
-
-   transStruc = &(semEnv->transactionStrucs[ semReq->transID ]);
-
-      //make sure transaction ended in same VP as started it.
-   if( transStruc->VPCurrentlyExecuting != requestingPr )
-    {
-      VMS__throw_exception( "trans ended in diff VP", requestingPr, NULL );
-    }
-
-      //make sure nesting is correct -- last ID entered should == this ID
-   semData = requestingPr->semanticData;
-   lastTrans = semData->lastTransEntered;
-   if( lastTrans->transID != semReq->transID )
-    {
-      VMS__throw_exception( "trans incorrectly nested", requestingPr, NULL );
-    }
-
-   semData->lastTransEntered = semData->lastTransEntered->nextTrans;
-
-
-   waitingPr = readPrivQ( transStruc->waitingVPQ );
-   transStruc->VPCurrentlyExecuting = waitingPr;
-
-   if( waitingPr != NULL )
-      resume_procr( waitingPr, semEnv );
-
-   resume_procr( requestingPr, semEnv );
- }
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "VMS/Queue_impl/PrivateQueue.h"
+#include "VCilk.h"
+
+
+
+//===========================================================================
+void inline
+handleSync( VirtProcr *requestingPr, VCilkSemEnv *semEnv );
+
+void inline
+handleMalloc( VCilkSemReq *semReq, VirtProcr *requestingPr,
+              VCilkSemEnv *semEnv );
+void inline
+handleFree( VCilkSemReq *semReq, VirtProcr *requestingPr,
+            VCilkSemEnv *semEnv );
+void inline
+handleDissipate( VirtProcr *requestingPr, VCilkSemEnv *semEnv );
+
+void inline
+handleSpawn( VMSReqst *req, VirtProcr *requestingPr, VCilkSemEnv *semEnv );
+
+void inline
+dispatchSemReq( VMSReqst *req, VirtProcr *requestingPr, VCilkSemEnv *semEnv);
+
+void inline
+handleTransEnd( VCilkSemReq *semReq, VirtProcr *requestingPr,
+                VCilkSemEnv*semEnv);
+void inline
+handleTransStart( VCilkSemReq *semReq, VirtProcr *requestingPr,
+                  VCilkSemEnv *semEnv );
+void inline
+handleAtomic( VCilkSemReq *semReq, VirtProcr *requestingPr,
+              VCilkSemEnv *semEnv);
+inline void
+handleStartFnSingleton( VCilkSemReq *semReq, VirtProcr *reqstingPr,
+                      VCilkSemEnv *semEnv );
+inline void
+handleEndFnSingleton( VCilkSemReq *semReq, VirtProcr *requestingPr,
+                    VCilkSemEnv *semEnv );
+inline void
+handleStartDataSingleton( VCilkSemReq *semReq, VirtProcr *reqstingPr,
+                      VCilkSemEnv *semEnv );
+inline void
+handleEndDataSingleton( VCilkSemReq *semReq, VirtProcr *requestingPr,
+                    VCilkSemEnv *semEnv );
+
+void inline
+resume_procr( VirtProcr *procr, VCilkSemEnv *semEnv );
+
+//===========================================================================
+
+
+//============================== Scheduler ==================================
+//
+/*For VCilk, scheduling a slave simply takes the next work-unit off the
+ * ready-to-go work-unit queue and assigns it to the slaveToSched.
+ *If the ready-to-go work-unit queue is empty, then nothing to schedule
+ * to the slave -- return FALSE to let Master loop know scheduling that
+ * slave failed.
+ */
+VirtProcr *
+VCilk__schedule_virt_procr( void *_semEnv, int coreNum )
+ { VirtProcr   *schedPr;
+   VCilkSemEnv *semEnv;
+
+   semEnv = (VCilkSemEnv *)_semEnv;
+
+   schedPr = readPrivQ( semEnv->readyVPQs[coreNum] );
+      //Note, using a non-blocking queue -- it returns NULL if queue empty
+
+   return( schedPr );
+ }
+
+
+//===========================  Request Handler  =============================
+//
+/*Will get requests to send, to receive, and to create new processors.
+ * Upon send, check the hash to see if a receive is waiting.
+ * Upon receive, check hash to see if a send has already happened.
+ * When other is not there, put in.  When other is there, the comm.
+ *  completes, which means the receiver P gets scheduled and
+ *  picks up right after the receive request.  So make the work-unit
+ *  and put it into the queue of work-units ready to go.
+ * Other request is create a new Processor, with the function to run in the
+ *  Processor, and initial data.
+ */
+void
+VCilk__Request_Handler( VirtProcr *requestingPr, void *_semEnv )
+ { VCilkSemEnv *semEnv;
+   VMSReqst    *req;
+   VCilkSemReq *semReq;
+ 
+
+   semEnv = (VCilkSemEnv *)_semEnv;
+
+   req = VMS__take_next_request_out_of( requestingPr );
+   
+   while( req != NULL )
+    {
+      switch( req->reqType )
+       { case semantic:     dispatchSemReq( req, requestingPr, semEnv );
+            break;
+         case createReq:    //create request has to come as a VMS request,
+                            // to allow MasterLoop to do stuff before gets
+                            // here, and maybe also stuff after all requests
+                            // done -- however, can still attach semantic
+                            // req data to req.
+                            handleSpawn(          req, requestingPr, semEnv);
+            break;
+         case dissipate:    handleDissipate(           requestingPr, semEnv);
+            break;
+         case VMSSemantic:  VMS__handle_VMSSemReq(req, requestingPr, semEnv,
+                                                              &resume_procr);
+            break;
+         default:
+            break;
+       }
+      
+      DoneHandlingReqst:
+
+      req = VMS__take_next_request_out_of( requestingPr );
+    } //while( req != NULL )
+ }
+
+void inline
+dispatchSemReq( VMSReqst *req, VirtProcr *reqPr, VCilkSemEnv *semEnv )
+ { VCilkSemReq *semReq;
+
+   semReq = VMS__take_sem_reqst_from(req);
+
+   if( semReq == NULL ) return;
+   switch( semReq->reqType )
+    {
+      case syncReq:         handleSync(                 reqPr, semEnv );
+         break;
+      case mallocReq:       handleMalloc(       semReq, reqPr, semEnv );
+         break;
+      case freeReq:         handleFree(         semReq, reqPr, semEnv );
+         break;
+      case singleton_fn_start:  handleStartFnSingleton(semReq, reqPr, semEnv);
+         break;
+      case singleton_fn_end:    handleEndFnSingleton(  semReq, reqPr, semEnv);
+         break;
+      case singleton_data_start:handleStartDataSingleton(semReq,reqPr,semEnv);
+         break;
+      case singleton_data_end:  handleEndDataSingleton(semReq, reqPr, semEnv);
+         break;
+      case atomic:          handleAtomic(       semReq, reqPr, semEnv );
+         break;
+      case trans_start:     handleTransStart(   semReq, reqPr, semEnv );
+         break;
+      case trans_end:       handleTransEnd(     semReq, reqPr, semEnv );
+         break;
+    }
+   //NOTE: semantic request data strucs allocated on stack in VCilk Lib calls
+ }
+
+
+
+//=========================== Request Handlers ==============================
+void inline
+resume_procr( VirtProcr *procr, VCilkSemEnv *semEnv )
+ {
+   writePrivQ( procr, semEnv->readyVPQs[ procr->coreAnimatedBy] );
+ }
+
+
+
+
+/* check if list of live children is empty.
+ * If yes, then resume.
+ * If no, then set sync-pending flag.
+ */
+inline void
+handleSync( VirtProcr *requestingPr, VCilkSemEnv *semEnv )
+ {
+         Meas_startSync
+   if(((VCilkSemData *)(requestingPr->semanticData))->numLiveChildren  == 0 )
+    { //no live children to wait for
+      resume_procr( requestingPr, semEnv );
+    }
+   else
+    {
+      ((VCilkSemData *)(requestingPr->semanticData))->syncPending = TRUE;
+    }
+         Meas_endSync
+ }
+
+/*
+ */
+inline void
+handleMalloc( VCilkSemReq *semReq, VirtProcr *requestingPr,
+              VCilkSemEnv *semEnv )
+ { void *ptr;
+   
+   ptr = VMS__malloc( semReq->sizeToMalloc );
+   requestingPr->dataRetFromReq = ptr;
+   resume_procr( requestingPr, semEnv );
+ }
+
+/*
+ */
+void inline
+handleFree( VCilkSemReq *semReq, VirtProcr *requestingPr,
+            VCilkSemEnv *semEnv )
+ {
+   VMS__free( semReq->ptrToFree );
+   resume_procr( requestingPr, semEnv );
+ }
+
+
+//============================== VMS requests ===============================
+/*Re-use this in the entry-point fn
+ */
+inline VirtProcr *
+VCilk__create_procr_helper( VirtProcrFnPtr fnPtr, void  *initData,
+   VirtProcr *requestingPr, VCilkSemEnv *semEnv,  int32  coreToScheduleOnto )
+ { VirtProcr    *newPr;
+   VCilkSemData *semData;
+
+      //This is running in master, so use internal version
+   newPr = VMS__create_procr( fnPtr, initData );
+
+   semData = VMS__malloc( sizeof(VCilkSemData) );
+
+   semData->numLiveChildren = 0;
+   semData->parentPr        = requestingPr;
+   semData->syncPending     = FALSE;
+   
+   semData->highestTransEntered = -1;
+   semData->lastTransEntered    = NULL;
+
+   newPr->semanticData = semData;
+
+   /* increase the number of live children of requester.
+    */
+   if( requestingPr != NULL ) //NULL when creating seed procr
+     ((VCilkSemData *)(requestingPr->semanticData))->numLiveChildren +=1;
+
+   semEnv->numVirtPr += 1;
+
+   //=================== Assign new processor to a core =====================
+   #ifdef SEQUENTIAL
+   newPr->coreAnimatedBy = 0;
+
+   #else
+
+   if(coreToScheduleOnto < 0 || coreToScheduleOnto >= NUM_CORES )
+    {    //out-of-range, so round-robin assignment
+      newPr->coreAnimatedBy = semEnv->nextCoreToGetNewPr;
+
+      if( semEnv->nextCoreToGetNewPr >= NUM_CORES - 1 )
+          semEnv->nextCoreToGetNewPr  = 0;
+      else
+          semEnv->nextCoreToGetNewPr += 1;
+    }
+   else //core num in-range, so use it
+    { newPr->coreAnimatedBy = coreToScheduleOnto;
+    }
+   #endif
+   //========================================================================
+
+   return newPr;
+ }
+
+
+void inline
+handleSpawn( VMSReqst *req, VirtProcr *requestingPr, VCilkSemEnv *semEnv )
+ { VCilkSemReq *semReq;
+   VirtProcr    *newPr;
+
+         Meas_startSpawn
+   semReq = VMS__take_sem_reqst_from( req );
+
+   newPr = VCilk__create_procr_helper( semReq->fnPtr, semReq->initData,
+                             requestingPr, semEnv, semReq->coreToSpawnOnto );
+
+      //For VPThread, caller needs ptr to created processor returned to it
+   requestingPr->dataRetFromReq = newPr;
+
+   resume_procr( newPr,        semEnv );
+   resume_procr( requestingPr, semEnv );
+         Meas_endSpawn
+ }
+
+
+
+/*get parentVP & remove dissipator from parent's live children.
+ *If this was last live child, check "sync pending" flag
+ *-- if set, then resume the parentVP.
+ */
+void inline
+handleDissipate( VirtProcr *requestingPr, VCilkSemEnv *semEnv )
+ {
+   VirtProcr *
+   parentPr = ((VCilkSemData *)
+               (requestingPr->semanticData))->parentPr;
+   if( parentPr == NULL ) //means this is seed processor being dissipated
+    { //Just act normally, except don't deal with parent
+      // VMS__Free is implemented to ignore requests to free data from
+      // outside VMS, so all this processor's non-VMS allocated data will
+      // remain and be cleaned up outside
+    }
+   else
+    {
+      ((VCilkSemData *)(parentPr->semanticData))->numLiveChildren -= 1;
+      if( ((VCilkSemData *)
+            (parentPr->semanticData))->numLiveChildren <= 0 )
+       { //this was last live child of parent
+         if( ((VCilkSemData *)
+               (parentPr->semanticData))->syncPending == TRUE )
+          { //was waiting for last child to dissipate, so resume it
+            ((VCilkSemData *)
+              (parentPr->semanticData))->syncPending = FALSE;
+            resume_procr( parentPr, semEnv );
+          }
+       }
+    }
+
+   VMS__free( requestingPr->semanticData );
+   
+       //Now do normal dissipate
+   
+       //call VMS to free_all AppVP state -- stack and so on
+   VMS__dissipate_procr( requestingPr );
+
+   semEnv->numVirtPr -= 1;
+   if( semEnv->numVirtPr == 0 )
+    {    //no more work, so shutdown
+      VMS__shutdown();
+    }
+ }
+
+
+//=============================== Atomic ====================================
+//
+/*Uses ID as index into array of flags.  If flag already set, resumes from
+ * end-label.  Else, sets flag and resumes normally.
+ */
+void inline
+handleStartSingleton_helper( VCilkSingleton *singleton, VirtProcr *reqstingPr,
+                             VCilkSemEnv    *semEnv )
+ {
+   if( singleton->hasFinished )
+    {    //the code that sets the flag to true first sets the end instr addr
+      reqstingPr->dataRetFromReq = singleton->endInstrAddr;
+      resume_procr( reqstingPr, semEnv );
+      return;
+    }
+   else if( singleton->hasBeenStarted )
+    {    //singleton is in-progress in a diff slave, so wait for it to finish
+      writePrivQ(reqstingPr, singleton->waitQ );
+      return;
+    }
+   else
+    {    //hasn't been started, so this is the first attempt at the singleton
+      singleton->hasBeenStarted = TRUE;
+      reqstingPr->dataRetFromReq = 0x0;
+      resume_procr( reqstingPr, semEnv );
+      return;
+    }
+ }
+void inline
+handleStartFnSingleton( VCilkSemReq *semReq, VirtProcr *requestingPr,
+                      VCilkSemEnv *semEnv )
+ { VCilkSingleton *singleton;
+
+   singleton = &(semEnv->fnSingletons[ semReq->singletonID ]);
+   handleStartSingleton_helper( singleton, requestingPr, semEnv );
+ }
+void inline
+handleStartDataSingleton( VCilkSemReq *semReq, VirtProcr *requestingPr,
+                      VCilkSemEnv *semEnv )
+ { VCilkSingleton *singleton;
+
+   if( *(semReq->singletonPtrAddr) == NULL )
+    { singleton                 = VMS__malloc( sizeof(VCilkSingleton) );
+      singleton->waitQ          = makeVMSPrivQ();
+      singleton->endInstrAddr   = 0x0;
+      singleton->hasBeenStarted = FALSE;
+      singleton->hasFinished    = FALSE;
+      *(semReq->singletonPtrAddr)  = singleton;
+    }
+   else
+      singleton = *(semReq->singletonPtrAddr);
+   handleStartSingleton_helper( singleton, requestingPr, semEnv );
+ }
+
+
+void inline
+handleEndSingleton_helper( VCilkSingleton *singleton, VirtProcr *requestingPr,
+                           VCilkSemEnv    *semEnv )
+ { PrivQueueStruc *waitQ;
+   int32           numWaiting, i;
+   VirtProcr      *resumingPr;
+
+   if( singleton->hasFinished )
+    { //by definition, only one slave should ever be able to run end singleton
+      // so if this is true, is an error
+      //VMS__throw_exception( "singleton code ran twice", requestingPr, NULL);
+    }
+
+   singleton->hasFinished = TRUE;
+   waitQ = singleton->waitQ;
+   numWaiting = numInPrivQ( waitQ );
+   for( i = 0; i < numWaiting; i++ )
+    {    //they will resume inside start singleton, then jmp to end singleton
+      resumingPr = readPrivQ( waitQ );
+      resumingPr->dataRetFromReq = singleton->endInstrAddr;
+      resume_procr( resumingPr, semEnv );
+    }
+
+   resume_procr( requestingPr, semEnv );
+
+}
+void inline
+handleEndFnSingleton( VCilkSemReq *semReq, VirtProcr *requestingPr,
+                        VCilkSemEnv *semEnv )
+ {
+   VCilkSingleton   *singleton;
+
+   singleton = &(semEnv->fnSingletons[ semReq->singletonID ]);
+   handleEndSingleton_helper( singleton, requestingPr, semEnv );
+  }
+void inline
+handleEndDataSingleton( VCilkSemReq *semReq, VirtProcr *requestingPr,
+                        VCilkSemEnv *semEnv )
+ {
+   VCilkSingleton   *singleton;
+
+   singleton = *(semReq->singletonPtrAddr);
+   handleEndSingleton_helper( singleton, requestingPr, semEnv );
+  }
+
+
+/*This executes the function in the masterVP, take the function
+ * pointer out of the request and call it, then resume the VP.
+ */
+void inline
+handleAtomic( VCilkSemReq *semReq, VirtProcr *requestingPr,
+              VCilkSemEnv *semEnv )
+ {
+   semReq->fnToExecInMaster( semReq->dataForFn );
+   resume_procr( requestingPr, semEnv );
+ }
+
+/*First, it looks at the VP's semantic data, to see the highest transactionID
+ * that VP
+ * already has entered.  If the current ID is not larger, it throws an
+ * exception stating a bug in the code.
+ *Otherwise it puts the current ID
+ * there, and adds the ID to a linked list of IDs entered -- the list is
+ * used to check that exits are properly ordered.
+ *Next it is uses transactionID as index into an array of transaction
+ * structures.
+ *If the "VP_currently_executing" field is non-null, then put requesting VP
+ * into queue in the struct.  (At some point a holder will request
+ * end-transaction, which will take this VP from the queue and resume it.)
+ *If NULL, then write requesting into the field and resume.
+ */
+void inline
+handleTransStart( VCilkSemReq *semReq, VirtProcr *requestingPr,
+                  VCilkSemEnv *semEnv )
+ { VCilkSemData *semData;
+   TransListElem *nextTransElem;
+
+      //check ordering of entering transactions is correct
+   semData = requestingPr->semanticData;
+   if( semData->highestTransEntered > semReq->transID )
+    {    //throw VMS exception, which shuts down VMS.
+      VMS__throw_exception( "transID smaller than prev", requestingPr, NULL);
+    }
+      //add this trans ID to the list of transactions entered -- check when
+      // end a transaction
+   semData->highestTransEntered = semReq->transID;
+   nextTransElem = VMS__malloc( sizeof(TransListElem) );
+   nextTransElem->transID = semReq->transID;
+   nextTransElem->nextTrans = semData->lastTransEntered;
+   semData->lastTransEntered = nextTransElem;
+
+      //get the structure for this transaction ID
+   VCilkTrans *
+   transStruc = &(semEnv->transactionStrucs[ semReq->transID ]);
+
+   if( transStruc->VPCurrentlyExecuting == NULL )
+    {
+      transStruc->VPCurrentlyExecuting = requestingPr;
+      resume_procr( requestingPr, semEnv );
+    }
+   else
+    {    //note, might make future things cleaner if save request with VP and
+         // add this trans ID to the linked list when gets out of queue.
+         // but don't need for now, and lazy..
+      writePrivQ( requestingPr, transStruc->waitingVPQ );
+    }
+ }
+
+
+/*Use the trans ID to get the transaction structure from the array.
+ *Look at VP_currently_executing to be sure it's same as requesting VP.
+ * If different, throw an exception, stating there's a bug in the code.
+ *Next, take the first element off the list of entered transactions.
+ * Check to be sure the ending transaction is the same ID as the next on
+ * the list.  If not, incorrectly nested so throw an exception.
+ *
+ *Next, get from the queue in the structure.
+ *If it's empty, set VP_currently_executing field to NULL and resume
+ * requesting VP.
+ *If get somethine, set VP_currently_executing to the VP from the queue, then
+ * resume both.
+ */
+void inline
+handleTransEnd( VCilkSemReq *semReq, VirtProcr *requestingPr,
+                VCilkSemEnv *semEnv )
+ { VCilkSemData    *semData;
+   VirtProcr     *waitingPr;
+   VCilkTrans      *transStruc;
+   TransListElem *lastTrans;
+
+   transStruc = &(semEnv->transactionStrucs[ semReq->transID ]);
+
+      //make sure transaction ended in same VP as started it.
+   if( transStruc->VPCurrentlyExecuting != requestingPr )
+    {
+      VMS__throw_exception( "trans ended in diff VP", requestingPr, NULL );
+    }
+
+      //make sure nesting is correct -- last ID entered should == this ID
+   semData = requestingPr->semanticData;
+   lastTrans = semData->lastTransEntered;
+   if( lastTrans->transID != semReq->transID )
+    {
+      VMS__throw_exception( "trans incorrectly nested", requestingPr, NULL );
+    }
+
+   semData->lastTransEntered = semData->lastTransEntered->nextTrans;
+
+
+   waitingPr = readPrivQ( transStruc->waitingVPQ );
+   transStruc->VPCurrentlyExecuting = waitingPr;
+
+   if( waitingPr != NULL )
+      resume_procr( waitingPr, semEnv );
+
+   resume_procr( requestingPr, semEnv );
+ }
diff -r 5494943ed3a4 -r 58d0c2b1d6a4 VCilk_lib.c
--- a/VCilk_lib.c	Wed May 11 15:29:58 2011 +0200
+++ b/VCilk_lib.c	Wed May 11 16:11:06 2011 +0200
@@ -1,539 +1,539 @@
-/*
- * Copyright 2010  OpenSourceCodeStewardshipFoundation
- *
- * Licensed under BSD
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "VMS/VMS.h"
-#include "VCilk.h"
-#include "VMS/Queue_impl/PrivateQueue.h"
-#include "VMS/Hash_impl/PrivateHash.h"
-
-
-//==========================================================================
-
-void
-VCilk__init();
-
-void
-VCilk__init_Seq();
-
-void
-VCilk__init_Helper();
-//==========================================================================
-
-
-/*TODO: Q: dealing with library f()s and DKU vs WT vs FoR
- * (still want to do FoR, with time-lines as syntax, could be super cool)
- * A: thinking pin the coreLoops for all of BLIS -- let Master arbitrate
- * among library, DKU, WT, FoR -- all the patterns in terms of virtual
- * processors (or equivalently work-units), so Master picks which virt procr
- * from which portions of app (DKU, WT, FoR) onto which sched slots
- *Might even do hierarchy of masters -- group of sched slots for each core
- * has its own master, that keeps generated work local
- * single-reader-single-writer sync everywhere -- no atomic primitives (but
- * memory fences on architectures that need them)
- * Might have the different schedulers talk to each other, to negotiate
- * larger-grain sharing of resources, according to predicted critical
- * path, and expansion of work
- */
-
-
-
-//===========================================================================
-
-
-/*These are the library functions *called in the application*
- * 
- *There's a pattern for the outside sequential code to interact with the
- * VMS_HW code.
- *The VMS_HW system is inside a boundary..  every VCilk system is in its
- * own directory that contains the functions for each of the processor types.
- * One of the processor types is the "seed" processor that starts the
- * cascade of creating all the processors that do the work.
- *So, in the directory is a file called "EntryPoint.c" that contains the
- * function, named appropriately to the work performed, that the outside
- * sequential code calls.  This function follows a pattern:
- *1) it calls VCilk__init()
- *2) it creates the initial data for the seed processor, which is passed
- *    in to the function
- *3) it creates the seed VCilk processor, with the data to start it with.
- *4) it calls startVCilkThenWaitUntilWorkDone
- *5) it gets the returnValue from the transfer struc and returns that
- *    from the function
- *
- *For now, a new VCilk system has to be created via VCilk__init every
- * time an entry point function is called -- later, might add letting the
- * VCilk system be created once, and let all the entry points just reuse
- * it -- want to be as simple as possible now, and see by using what makes
- * sense for later..
- */
-
-
-
-//===========================================================================
-
-/*This is the "border crossing" function -- the thing that crosses from the
- * outside world, into the VMS_HW world.  It initializes and starts up the
- * VMS system, then creates one processor from the specified function and
- * puts it into the readyQ.  From that point, that one function is resp.
- * for creating all the other processors, that then create others, and so
- * forth.
- *When all the processors, including the seed, have dissipated, then this
- * function returns.  The results will have been written by side-effect via
- * pointers read from, or written into initData.
- *
- *NOTE: no Threads should exist in the outside program that might touch
- * any of the data reachable from initData passed in to here
- */
-void
-VCilk__create_seed_procr_and_do_work( VirtProcrFnPtr fnPtr, void *initData )
- { VCilkSemEnv *semEnv;
-   VirtProcr *seedPr;
-
-   #ifdef SEQUENTIAL
-   VCilk__init_Seq();  //debug sequential exe
-   #else
-   VCilk__init();      //normal multi-thd
-   #endif
-   semEnv = _VMSMasterEnv->semanticEnv;
-
-      //VCilk starts with one processor, which is put into initial environ,
-      // and which then calls create() to create more, thereby expanding work
-   seedPr = VCilk__create_procr_helper( fnPtr, initData, NULL, semEnv, -1 );
-   resume_procr( seedPr, semEnv );
-
-   #ifdef SEQUENTIAL
-   VMS__start_the_work_then_wait_until_done_Seq();  //debug sequential exe
-   #else
-   VMS__start_the_work_then_wait_until_done();      //normal multi-thd
-   #endif
-
-   VCilk__cleanup_at_end_of_shutdown();
- }
-
-
-int32 inline
-VCilk__giveMinWorkUnitCycles( float32 percentOverhead )
- {
-   return MIN_WORK_UNIT_CYCLES;
- }
-
-int32
-VCilk__giveIdealNumWorkUnits()
- {
-   return NUM_SCHED_SLOTS * NUM_CORES;
- }
-
-/*To measure how long a primitive operation takes, when calculating number of
- * sub-tasks to divide into.
- * For now, use TSC -- later, make these two macros with assembly that first
- * saves jump point, and second jumps back several times to get reliable time
- */
-void inline
-VCilk__start_primitive()
- { //int32 *saveAddr;
-   //saveAddr = &(((VCilkSemEnv *)(_VMSMasterEnv->semanticEnv))->primitiveStartTime);
-   saveLowTimeStampCountInto( (((VCilkSemEnv *)
-                        (_VMSMasterEnv->semanticEnv))->primitiveStartTime) );
- }
-
-/*Just quick and dirty for now -- make reliable later
- * will want this to jump back several times -- to be sure cache is warm
- * because don't want comm time included in calc-time measurement -- and
- * also to throw out any "weird" values due to OS interrupt or TSC rollover
- */
-int32 inline
-VCilk__end_primitive_and_give_cycles()
- { int32 endTime, startTime;
-   //TODO: fix by repeating time-measurement
-   saveLowTimeStampCountInto( endTime );
-   startTime = ( (VCilkSemEnv *)
-                 (_VMSMasterEnv->semanticEnv))->primitiveStartTime;
-   return (endTime - startTime);
- }
-
-//===========================================================================
-//
-/*Initializes all the data-structures for a VCilk system -- but doesn't
- * start it running yet!
- *
- *This and its callees run in main thread outside VMS
- * 
- *This sets up the semantic layer over the VMS system
- *
- *First, calls VMS_Setup, then creates own environment, making it ready
- * for creating the seed processor and then starting the work.
- */
-void
-VCilk__init()
- {
-   VMS__init();
-      //masterEnv, a global var, now is partially set up by init_VMS
-
-   VCilk__init_Helper();
- }
-
-void
-VCilk__init_Seq()
- {
-   VMS__init_Seq();
-      //masterEnv, a global var, now is partially set up by init_VMS
-
-   VCilk__init_Helper();
- }
-
-/*Runs in main thread before VMS system starts
- */
-void
-VCilk__init_Helper()
- { VCilkSemEnv     *semanticEnv;
-   PrivQueueStruc **readyVPQs;
-   int              coreIdx;
- 
-      //Hook up the semantic layer's plug-ins to the Master virt procr
-   _VMSMasterEnv->requestHandler = &VCilk__Request_Handler;
-   _VMSMasterEnv->slaveScheduler = &VCilk__schedule_virt_procr;
-
-      //create the semantic layer's environment (all its data) and add to
-      // the master environment
-   semanticEnv = VMS__malloc( sizeof( VCilkSemEnv ) );
-   _VMSMasterEnv->semanticEnv = semanticEnv;
-
-      //create the ready queue, hash tables used for pairing send to receive
-      // and so forth
-      //TODO: add hash tables for pairing sends with receives, and
-      // initialize the data ownership system
-   readyVPQs = VMS__malloc( NUM_CORES * sizeof(PrivQueueStruc *) );
-
-   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
-    {
-      readyVPQs[ coreIdx ] = makeVMSPrivQ();
-    }
-   
-   semanticEnv->readyVPQs = readyVPQs;
-   
-   semanticEnv->nextCoreToGetNewPr = 0;
-
-   //TODO: bug -- turn these arrays into dyn arrays to eliminate limit
-   //semanticEnv->singletonHasBeenExecutedFlags = makeDynArrayInfo( );
-   //semanticEnv->transactionStrucs = makeDynArrayInfo( );
-   //something like: setHighestIdx( dynArrayInfo, NUM_STRUCS_IN_SEM_ENV )
-   int32 i;
-   for( i = 0; i < NUM_STRUCS_IN_SEM_ENV; i++ )
-    {
-      semanticEnv->fnSingletons[i].endInstrAddr      = NULL;
-      semanticEnv->fnSingletons[i].hasBeenStarted    = FALSE;
-      semanticEnv->fnSingletons[i].hasFinished       = FALSE;
-      semanticEnv->fnSingletons[i].waitQ             = makeVMSPrivQ();
-      semanticEnv->transactionStrucs[i].waitingVPQ   = makeVMSPrivQ();
-    }
-
- }
-
-
-/*Runs in main thread, outside VMS
- *Frees any memory allocated by VCilk__init() then calls VMS's cleanup
- */
-void
-VCilk__cleanup_at_end_of_shutdown()
- { VCilkSemEnv *semanticEnv;
- 
-   semanticEnv = _VMSMasterEnv->semanticEnv;
-
-   /*
-   int32 coreIdx;
-   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
-    {
-      VMS__free( semanticEnv->readyVPQs[coreIdx]->startOfData );
-      VMS__free( semanticEnv->readyVPQs[coreIdx] );
-    }
-   VMS__free( semanticEnv->readyVPQs );
-   
-   VMS__free( _VMSMasterEnv->semanticEnv );
-    */
-   VMS__cleanup_at_end_of_shutdown();
- }
-
-
-//===========================================================================
-
-
-/*Spawn involves allocating mem as well as creating processor which itself
- * allocates, so has to be done inside master
- */
-void inline
-VCilk__spawn( int32  coreToSpawnOnto, VirtProcrFnPtr  fnPtr,
-              void  *initData,        VirtProcr      *requestingPr )
- { VCilkSemReq reqData;
-
-      //the semantic request data is on the stack and disappears when this
-      // call returns -- it's guaranteed to remain in the VP's stack for as
-      // long as the VP is suspended.
-   reqData.reqType         = 0; //know it's type because in a VMS create req
-   reqData.coreToSpawnOnto = coreToSpawnOnto;
-   reqData.fnPtr           = fnPtr;
-   reqData.initData        = initData;
-   reqData.requestingPr    = requestingPr;
-
-   VMS__send_create_procr_req( &reqData, requestingPr );
- } 
-
-
-int32
-VCilk__give_number_of_cores_to_spawn_onto()
- {
-   return NUM_CORES;
- }
-
-
-
-/*This runs inside slave VP, so can't do any freeing -- have to do in plugin
- */
-void inline
-VCilk__dissipate_procr( VirtProcr *procrToDissipate )
- { 
-
-   VMS__send_dissipate_req( procrToDissipate );
- }
-
-//===========================================================================
-
-void
-VCilk__sync( VirtProcr *animPr )
- { VCilkSemReq reqData;
- 
-   reqData.reqType      = syncReq;
-   reqData.requestingPr = animPr;
-
-   VMS__send_sem_request( &reqData, animPr );
- }
-
-
-
-void *
-VCilk__malloc( int32 sizeToMalloc, VirtProcr *animPr )
- { VCilkSemReq reqData;
-
-   reqData.reqType      = mallocReq;
-   reqData.requestingPr = animPr;
-   reqData.sizeToMalloc = sizeToMalloc;
-
-   VMS__send_sem_request( &reqData, animPr );
-
-   return animPr->dataRetFromReq;
- }
-
-
-/*Sends request to Master, which does the work of freeing
- */
-void
-VCilk__free( void *ptrToFree, VirtProcr *animPr )
- { VCilkSemReq reqData;
-
-   reqData.reqType      = freeReq;
-   reqData.requestingPr = animPr;
-   reqData.ptrToFree    = ptrToFree;
-
-   VMS__send_sem_request( &reqData, animPr );
- }
-
-//===========================================================================
-//
-/*A function singleton is a function whose body executes exactly once, on a
- * single core, no matter how many times the fuction is called and no
- * matter how many cores or the timing of cores calling it.
- *
- *A data singleton is a ticket attached to data.  That ticket can be used
- * to get the data through the function exactly once, no matter how many
- * times the data is given to the function, and no matter the timing of
- * trying to get the data through from different cores.
- */
-
-/*Fn singleton uses ID as index into array of singleton structs held in the
- * semantic environment.
- */
-void
-VCilk__start_fn_singleton( int32 singletonID,   VirtProcr *animPr )
- {
-   VCilkSemReq  reqData;
-
-      //
-   reqData.reqType     = singleton_fn_start;
-   reqData.singletonID = singletonID;
-
-   VMS__send_sem_request( &reqData, animPr );
-   if( animPr->dataRetFromReq ) //will be 0 or addr of label in end singleton
-    {
-      asm volatile("movl         %0,      %%eax;  \
-                    jmp                  *%%eax"  \
-      /* outputs */ :                             \
-      /* inputs  */ : "g"(animPr->dataRetFromReq) \
-      /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx","%edi","%esi"\
-                   );
-    }
- }
-
-/*Data singleton hands addr of loc holding a pointer to a singleton struct.
- * The start_data_singleton makes the structure and puts its addr into the
- * location.
- */
-void
-VCilk__start_data_singleton( VCilkSingleton **singletonAddr,  VirtProcr *animPr )
- {
-   VCilkSemReq  reqData;
-
-   if( *singletonAddr && (*singletonAddr)->hasFinished )
-      goto JmpToEndSingleton;
-      //
-   reqData.reqType       = singleton_data_start;
-   reqData.singletonPtrAddr = singletonAddr;
-
-   VMS__send_sem_request( &reqData, animPr );
-   if( animPr->dataRetFromReq ) //either 0 or end singleton's return addr
-    {    //Assembly code changes the return addr on the stack to the one
-         // saved into the singleton by the end-singleton-fn
-         //The return addr is at 0x4(%%ebp)
-      JmpToEndSingleton:
-      asm volatile("movl        %0,      %%eax;   \
-                    movl    (%%eax),     %%ebx;   \
-                    movl    (%%ebx),     %%eax;   \
-                    movl     %%eax,  0x4(%%ebp);" \
-      /* outputs */ :                             \
-      /* inputs  */ : "m"(singletonAddr) \
-      /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx","%edi","%esi"\
-                   );
-    }
-   //now, simply return
-   //will exit either from the start singleton call or the end-singleton call
- }
-
-/*Uses ID as index into array of flags.  If flag already set, resumes from
- * end-label.  Else, sets flag and resumes normally.
- *
- *Note, this call cannot be inlined because the instr addr at the label
- * inside is shared by all invocations of a given singleton ID.
- */
-void
-VCilk__end_fn_singleton( int32 singletonID, VirtProcr *animPr )
- {
-   VCilkSemReq  reqData;
-
-      //don't need this addr until after at least one singleton has reached
-      // this function
-   VCilkSemEnv *semEnv = VMS__give_sem_env_for( animPr );
-   semEnv->fnSingletons[ singletonID].endInstrAddr = &&EndSingletonInstrAddr;
-
-   reqData.reqType     = singleton_fn_end;
-   reqData.singletonID = singletonID;
-
-   VMS__send_sem_request( &reqData, animPr );
-
-EndSingletonInstrAddr:
-   return;
- }
-
-void
-VCilk__end_data_singleton(  VCilkSingleton **singletonPtrAddr, VirtProcr *animPr )
- {
-   VCilkSemReq  reqData;
-
-      //don't need this addr until after singleton struct has reached
-      // this function for first time
-      //do assembly that saves the return addr of this fn call into the
-      // data singleton -- that data-singleton can only be given to exactly
-      // one instance in the code of this function.  However, can use this
-      // function in different places for different data-singletons.
-//   (*(singletonAddr))->endInstrAddr =  &&EndDataSingletonInstrAddr;
-
-         //Assembly code takes the return addr off the stack and saves
-         // into the singleton.  The first field in the singleton is the
-         // "endInstrAddr" field, and the return addr is at 0x4(%%ebp)
-      asm volatile("movl 0x4(%%ebp),     %%eax;   \
-                    movl        %0,      %%ebx;   \
-                    movl    (%%ebx),     %%ecx;   \
-                    movl     %%eax,     (%%ecx);" \
-      /* outputs */ :                             \
-      /* inputs  */ : "m"(singletonPtrAddr) \
-      /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx","%edi","%esi"\
-                   );
-
-   reqData.reqType          = singleton_data_end;
-   reqData.singletonPtrAddr = singletonPtrAddr;
-
-   VMS__send_sem_request( &reqData, animPr );
- }
-
-/*This executes the function in the masterVP, so it executes in isolation
- * from any other copies -- only one copy of the function can ever execute
- * at a time.
- *
- *It suspends to the master, and the request handler takes the function
- * pointer out of the request and calls it, then resumes the VP.
- *Only very short functions should be called this way -- for longer-running
- * isolation, use transaction-start and transaction-end, which run the code
- * between as work-code.
- */
-void
-VCilk__animate_short_fn_in_isolation( PtrToAtomicFn ptrToFnToExecInMaster,
-                                    void *data, VirtProcr *animPr )
- {
-   VCilkSemReq  reqData;
-
-      //
-   reqData.reqType          = atomic;
-   reqData.fnToExecInMaster = ptrToFnToExecInMaster;
-   reqData.dataForFn        = data;
-
-   VMS__send_sem_request( &reqData, animPr );
- }
-
-
-/*This suspends to the master.
- *First, it looks at the VP's data, to see the highest transactionID that VP
- * already has entered.  If the current ID is not larger, it throws an
- * exception stating a bug in the code.  Otherwise it puts the current ID
- * there, and adds the ID to a linked list of IDs entered -- the list is
- * used to check that exits are properly ordered.
- *Next it is uses transactionID as index into an array of transaction
- * structures.
- *If the "VP_currently_executing" field is non-null, then put requesting VP
- * into queue in the struct.  (At some point a holder will request
- * end-transaction, which will take this VP from the queue and resume it.)
- *If NULL, then write requesting into the field and resume.
- */
-void
-VCilk__start_transaction( int32 transactionID, VirtProcr *animPr )
- {
-   VCilkSemReq  reqData;
-
-      //
-   reqData.reqType     = trans_start;
-   reqData.transID     = transactionID;
-
-   VMS__send_sem_request( &reqData, animPr );
- }
-
-/*This suspends to the master, then uses transactionID as index into an
- * array of transaction structures.
- *It looks at VP_currently_executing to be sure it's same as requesting VP.
- * If different, throws an exception, stating there's a bug in the code.
- *Next it looks at the queue in the structure.
- *If it's empty, it sets VP_currently_executing field to NULL and resumes.
- *If something in, gets it, sets VP_currently_executing to that VP, then
- * resumes both.
- */
-void
-VCilk__end_transaction( int32 transactionID, VirtProcr *animPr )
- {
-   VCilkSemReq  reqData;
-
-      //
-   reqData.reqType     = trans_end;
-   reqData.transID     = transactionID;
-
-   VMS__send_sem_request( &reqData, animPr );
- }
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "VMS/VMS.h"
+#include "VCilk.h"
+#include "VMS/Queue_impl/PrivateQueue.h"
+#include "VMS/Hash_impl/PrivateHash.h"
+
+
+//==========================================================================
+
+void
+VCilk__init();
+
+void
+VCilk__init_Seq();
+
+void
+VCilk__init_Helper();
+//==========================================================================
+
+
+/*TODO: Q: dealing with library f()s and DKU vs WT vs FoR
+ * (still want to do FoR, with time-lines as syntax, could be super cool)
+ * A: thinking pin the coreLoops for all of BLIS -- let Master arbitrate
+ * among library, DKU, WT, FoR -- all the patterns in terms of virtual
+ * processors (or equivalently work-units), so Master picks which virt procr
+ * from which portions of app (DKU, WT, FoR) onto which sched slots
+ *Might even do hierarchy of masters -- group of sched slots for each core
+ * has its own master, that keeps generated work local
+ * single-reader-single-writer sync everywhere -- no atomic primitives (but
+ * memory fences on architectures that need them)
+ * Might have the different schedulers talk to each other, to negotiate
+ * larger-grain sharing of resources, according to predicted critical
+ * path, and expansion of work
+ */
+
+
+
+//===========================================================================
+
+
+/*These are the library functions *called in the application*
+ * 
+ *There's a pattern for the outside sequential code to interact with the
+ * VMS_HW code.
+ *The VMS_HW system is inside a boundary..  every VCilk system is in its
+ * own directory that contains the functions for each of the processor types.
+ * One of the processor types is the "seed" processor that starts the
+ * cascade of creating all the processors that do the work.
+ *So, in the directory is a file called "EntryPoint.c" that contains the
+ * function, named appropriately to the work performed, that the outside
+ * sequential code calls.  This function follows a pattern:
+ *1) it calls VCilk__init()
+ *2) it creates the initial data for the seed processor, which is passed
+ *    in to the function
+ *3) it creates the seed VCilk processor, with the data to start it with.
+ *4) it calls startVCilkThenWaitUntilWorkDone
+ *5) it gets the returnValue from the transfer struc and returns that
+ *    from the function
+ *
+ *For now, a new VCilk system has to be created via VCilk__init every
+ * time an entry point function is called -- later, might add letting the
+ * VCilk system be created once, and let all the entry points just reuse
+ * it -- want to be as simple as possible now, and see by using what makes
+ * sense for later..
+ */
+
+
+
+//===========================================================================
+
+/*This is the "border crossing" function -- the thing that crosses from the
+ * outside world, into the VMS_HW world.  It initializes and starts up the
+ * VMS system, then creates one processor from the specified function and
+ * puts it into the readyQ.  From that point, that one function is resp.
+ * for creating all the other processors, that then create others, and so
+ * forth.
+ *When all the processors, including the seed, have dissipated, then this
+ * function returns.  The results will have been written by side-effect via
+ * pointers read from, or written into initData.
+ *
+ *NOTE: no Threads should exist in the outside program that might touch
+ * any of the data reachable from initData passed in to here
+ */
+void
+VCilk__create_seed_procr_and_do_work( VirtProcrFnPtr fnPtr, void *initData )
+ { VCilkSemEnv *semEnv;
+   VirtProcr *seedPr;
+
+   #ifdef SEQUENTIAL
+   VCilk__init_Seq();  //debug sequential exe
+   #else
+   VCilk__init();      //normal multi-thd
+   #endif
+   semEnv = _VMSMasterEnv->semanticEnv;
+
+      //VCilk starts with one processor, which is put into initial environ,
+      // and which then calls create() to create more, thereby expanding work
+   seedPr = (VirtProcr*)VCilk__create_procr_helper( fnPtr, initData, NULL, semEnv, -1 );
+   resume_procr( seedPr, semEnv );
+
+   #ifdef SEQUENTIAL
+   VMS__start_the_work_then_wait_until_done_Seq();  //debug sequential exe
+   #else
+   VMS__start_the_work_then_wait_until_done();      //normal multi-thd
+   #endif
+
+   VCilk__cleanup_at_end_of_shutdown();
+ }
+
+
+int32 inline
+VCilk__giveMinWorkUnitCycles( float32 percentOverhead )
+ {
+   return MIN_WORK_UNIT_CYCLES;
+ }
+
+int32
+VCilk__giveIdealNumWorkUnits()
+ {
+   return NUM_SCHED_SLOTS * NUM_CORES;
+ }
+
+/*To measure how long a primitive operation takes, when calculating number of
+ * sub-tasks to divide into.
+ * For now, use TSC -- later, make these two macros with assembly that first
+ * saves jump point, and second jumps back several times to get reliable time
+ */
+void inline
+VCilk__start_primitive()
+ { //int32 *saveAddr;
+   //saveAddr = &(((VCilkSemEnv *)(_VMSMasterEnv->semanticEnv))->primitiveStartTime);
+   saveLowTimeStampCountInto( (((VCilkSemEnv *)
+                        (_VMSMasterEnv->semanticEnv))->primitiveStartTime) );
+ }
+
+/*Just quick and dirty for now -- make reliable later
+ * will want this to jump back several times -- to be sure cache is warm
+ * because don't want comm time included in calc-time measurement -- and
+ * also to throw out any "weird" values due to OS interrupt or TSC rollover
+ */
+int32 inline
+VCilk__end_primitive_and_give_cycles()
+ { int32 endTime, startTime;
+   //TODO: fix by repeating time-measurement
+   saveLowTimeStampCountInto( endTime );
+   startTime = ( (VCilkSemEnv *)
+                 (_VMSMasterEnv->semanticEnv))->primitiveStartTime;
+   return (endTime - startTime);
+ }
+
+//===========================================================================
+//
+/*Initializes all the data-structures for a VCilk system -- but doesn't
+ * start it running yet!
+ *
+ *This and its callees run in main thread outside VMS
+ * 
+ *This sets up the semantic layer over the VMS system
+ *
+ *First, calls VMS_Setup, then creates own environment, making it ready
+ * for creating the seed processor and then starting the work.
+ */
+void
+VCilk__init()
+ {
+   VMS__init();
+      //masterEnv, a global var, now is partially set up by init_VMS
+
+   VCilk__init_Helper();
+ }
+
+void
+VCilk__init_Seq()
+ {
+   VMS__init_Seq();
+      //masterEnv, a global var, now is partially set up by init_VMS
+
+   VCilk__init_Helper();
+ }
+
+/*Runs in main thread before VMS system starts
+ */
+void
+VCilk__init_Helper()
+ { VCilkSemEnv     *semanticEnv;
+   PrivQueueStruc **readyVPQs;
+   int              coreIdx;
+ 
+      //Hook up the semantic layer's plug-ins to the Master virt procr
+   _VMSMasterEnv->requestHandler = &VCilk__Request_Handler;
+   _VMSMasterEnv->slaveScheduler = &VCilk__schedule_virt_procr;
+
+      //create the semantic layer's environment (all its data) and add to
+      // the master environment
+   semanticEnv = VMS__malloc( sizeof( VCilkSemEnv ) );
+   _VMSMasterEnv->semanticEnv = semanticEnv;
+
+      //create the ready queue, hash tables used for pairing send to receive
+      // and so forth
+      //TODO: add hash tables for pairing sends with receives, and
+      // initialize the data ownership system
+   readyVPQs = VMS__malloc( NUM_CORES * sizeof(PrivQueueStruc *) );
+
+   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
+    {
+      readyVPQs[ coreIdx ] = makeVMSPrivQ();
+    }
+   
+   semanticEnv->readyVPQs = readyVPQs;
+   
+   semanticEnv->nextCoreToGetNewPr = 0;
+
+   //TODO: bug -- turn these arrays into dyn arrays to eliminate limit
+   //semanticEnv->singletonHasBeenExecutedFlags = makeDynArrayInfo( );
+   //semanticEnv->transactionStrucs = makeDynArrayInfo( );
+   //something like: setHighestIdx( dynArrayInfo, NUM_STRUCS_IN_SEM_ENV )
+   int32 i;
+   for( i = 0; i < NUM_STRUCS_IN_SEM_ENV; i++ )
+    {
+      semanticEnv->fnSingletons[i].endInstrAddr      = NULL;
+      semanticEnv->fnSingletons[i].hasBeenStarted    = FALSE;
+      semanticEnv->fnSingletons[i].hasFinished       = FALSE;
+      semanticEnv->fnSingletons[i].waitQ             = makeVMSPrivQ();
+      semanticEnv->transactionStrucs[i].waitingVPQ   = makeVMSPrivQ();
+    }
+
+ }
+
+
+/*Runs in main thread, outside VMS
+ *Frees any memory allocated by VCilk__init() then calls VMS's cleanup
+ */
+void
+VCilk__cleanup_at_end_of_shutdown()
+ { VCilkSemEnv *semanticEnv;
+ 
+   semanticEnv = _VMSMasterEnv->semanticEnv;
+
+   /*
+   int32 coreIdx;
+   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
+    {
+      VMS__free( semanticEnv->readyVPQs[coreIdx]->startOfData );
+      VMS__free( semanticEnv->readyVPQs[coreIdx] );
+    }
+   VMS__free( semanticEnv->readyVPQs );
+   
+   VMS__free( _VMSMasterEnv->semanticEnv );
+    */
+   VMS__cleanup_at_end_of_shutdown();
+ }
+
+
+//===========================================================================
+
+
+/*Spawn involves allocating mem as well as creating processor which itself
+ * allocates, so has to be done inside master
+ */
+void inline
+VCilk__spawn( int32  coreToSpawnOnto, VirtProcrFnPtr  fnPtr,
+              void  *initData,        VirtProcr      *requestingPr )
+ { VCilkSemReq reqData;
+
+      //the semantic request data is on the stack and disappears when this
+      // call returns -- it's guaranteed to remain in the VP's stack for as
+      // long as the VP is suspended.
+   reqData.reqType         = 0; //know it's type because in a VMS create req
+   reqData.coreToSpawnOnto = coreToSpawnOnto;
+   reqData.fnPtr           = fnPtr;
+   reqData.initData        = initData;
+   reqData.requestingPr    = requestingPr;
+
+   VMS__send_create_procr_req( &reqData, requestingPr );
+ } 
+
+
+int32
+VCilk__give_number_of_cores_to_spawn_onto()
+ {
+   return NUM_CORES;
+ }
+
+
+
+/*This runs inside slave VP, so can't do any freeing -- have to do in plugin
+ */
+void inline
+VCilk__dissipate_procr( VirtProcr *procrToDissipate )
+ { 
+
+   VMS__send_dissipate_req( procrToDissipate );
+ }
+
+//===========================================================================
+
+void
+VCilk__sync( VirtProcr *animPr )
+ { VCilkSemReq reqData;
+ 
+   reqData.reqType      = syncReq;
+   reqData.requestingPr = animPr;
+
+   VMS__send_sem_request( &reqData, animPr );
+ }
+
+
+
+void *
+VCilk__malloc( int32 sizeToMalloc, VirtProcr *animPr )
+ { VCilkSemReq reqData;
+
+   reqData.reqType      = mallocReq;
+   reqData.requestingPr = animPr;
+   reqData.sizeToMalloc = sizeToMalloc;
+
+   VMS__send_sem_request( &reqData, animPr );
+
+   return animPr->dataRetFromReq;
+ }
+
+
+/*Sends request to Master, which does the work of freeing
+ */
+void
+VCilk__free( void *ptrToFree, VirtProcr *animPr )
+ { VCilkSemReq reqData;
+
+   reqData.reqType      = freeReq;
+   reqData.requestingPr = animPr;
+   reqData.ptrToFree    = ptrToFree;
+
+   VMS__send_sem_request( &reqData, animPr );
+ }
+
+//===========================================================================
+//
+/*A function singleton is a function whose body executes exactly once, on a
+ * single core, no matter how many times the fuction is called and no
+ * matter how many cores or the timing of cores calling it.
+ *
+ *A data singleton is a ticket attached to data.  That ticket can be used
+ * to get the data through the function exactly once, no matter how many
+ * times the data is given to the function, and no matter the timing of
+ * trying to get the data through from different cores.
+ */
+
+/*Fn singleton uses ID as index into array of singleton structs held in the
+ * semantic environment.
+ */
+void
+VCilk__start_fn_singleton( int32 singletonID,   VirtProcr *animPr )
+ {
+   VCilkSemReq  reqData;
+
+      //
+   reqData.reqType     = singleton_fn_start;
+   reqData.singletonID = singletonID;
+
+   VMS__send_sem_request( &reqData, animPr );
+   if( animPr->dataRetFromReq ) //will be 0 or addr of label in end singleton
+    {
+      asm volatile("movl         %0,      %%eax;  \
+                    jmp                  *%%eax"  \
+      /* outputs */ :                             \
+      /* inputs  */ : "g"(animPr->dataRetFromReq) \
+      /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx","%edi","%esi"\
+                   );
+    }
+ }
+
+/*Data singleton hands addr of loc holding a pointer to a singleton struct.
+ * The start_data_singleton makes the structure and puts its addr into the
+ * location.
+ */
+void
+VCilk__start_data_singleton( VCilkSingleton **singletonAddr,  VirtProcr *animPr )
+ {
+   VCilkSemReq  reqData;
+
+   if( *singletonAddr && (*singletonAddr)->hasFinished )
+      goto JmpToEndSingleton;
+      //
+   reqData.reqType       = singleton_data_start;
+   reqData.singletonPtrAddr = singletonAddr;
+
+   VMS__send_sem_request( &reqData, animPr );
+   if( animPr->dataRetFromReq ) //either 0 or end singleton's return addr
+    {    //Assembly code changes the return addr on the stack to the one
+         // saved into the singleton by the end-singleton-fn
+         //The return addr is at 0x4(%%ebp)
+      JmpToEndSingleton:
+      asm volatile("movl        %0,      %%eax;   \
+                    movl    (%%eax),     %%ebx;   \
+                    movl    (%%ebx),     %%eax;   \
+                    movl     %%eax,  0x4(%%ebp);" \
+      /* outputs */ :                             \
+      /* inputs  */ : "m"(singletonAddr) \
+      /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx","%edi","%esi"\
+                   );
+    }
+   //now, simply return
+   //will exit either from the start singleton call or the end-singleton call
+ }
+
+/*Uses ID as index into array of flags.  If flag already set, resumes from
+ * end-label.  Else, sets flag and resumes normally.
+ *
+ *Note, this call cannot be inlined because the instr addr at the label
+ * inside is shared by all invocations of a given singleton ID.
+ */
+void
+VCilk__end_fn_singleton( int32 singletonID, VirtProcr *animPr )
+ {
+   VCilkSemReq  reqData;
+
+      //don't need this addr until after at least one singleton has reached
+      // this function
+   VCilkSemEnv *semEnv = VMS__give_sem_env_for( animPr );
+   semEnv->fnSingletons[ singletonID].endInstrAddr = &&EndSingletonInstrAddr;
+
+   reqData.reqType     = singleton_fn_end;
+   reqData.singletonID = singletonID;
+
+   VMS__send_sem_request( &reqData, animPr );
+
+EndSingletonInstrAddr:
+   return;
+ }
+
+void
+VCilk__end_data_singleton(  VCilkSingleton **singletonPtrAddr, VirtProcr *animPr )
+ {
+   VCilkSemReq  reqData;
+
+      //don't need this addr until after singleton struct has reached
+      // this function for first time
+      //do assembly that saves the return addr of this fn call into the
+      // data singleton -- that data-singleton can only be given to exactly
+      // one instance in the code of this function.  However, can use this
+      // function in different places for different data-singletons.
+//   (*(singletonAddr))->endInstrAddr =  &&EndDataSingletonInstrAddr;
+
+         //Assembly code takes the return addr off the stack and saves
+         // into the singleton.  The first field in the singleton is the
+         // "endInstrAddr" field, and the return addr is at 0x4(%%ebp)
+      asm volatile("movl 0x4(%%ebp),     %%eax;   \
+                    movl        %0,      %%ebx;   \
+                    movl    (%%ebx),     %%ecx;   \
+                    movl     %%eax,     (%%ecx);" \
+      /* outputs */ :                             \
+      /* inputs  */ : "m"(singletonPtrAddr) \
+      /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx","%edi","%esi"\
+                   );
+
+   reqData.reqType          = singleton_data_end;
+   reqData.singletonPtrAddr = singletonPtrAddr;
+
+   VMS__send_sem_request( &reqData, animPr );
+ }
+
+/*This executes the function in the masterVP, so it executes in isolation
+ * from any other copies -- only one copy of the function can ever execute
+ * at a time.
+ *
+ *It suspends to the master, and the request handler takes the function
+ * pointer out of the request and calls it, then resumes the VP.
+ *Only very short functions should be called this way -- for longer-running
+ * isolation, use transaction-start and transaction-end, which run the code
+ * between as work-code.
+ */
+void
+VCilk__animate_short_fn_in_isolation( PtrToAtomicFn ptrToFnToExecInMaster,
+                                    void *data, VirtProcr *animPr )
+ {
+   VCilkSemReq  reqData;
+
+      //
+   reqData.reqType          = atomic;
+   reqData.fnToExecInMaster = ptrToFnToExecInMaster;
+   reqData.dataForFn        = data;
+
+   VMS__send_sem_request( &reqData, animPr );
+ }
+
+
+/*This suspends to the master.
+ *First, it looks at the VP's data, to see the highest transactionID that VP
+ * already has entered.  If the current ID is not larger, it throws an
+ * exception stating a bug in the code.  Otherwise it puts the current ID
+ * there, and adds the ID to a linked list of IDs entered -- the list is
+ * used to check that exits are properly ordered.
+ *Next it is uses transactionID as index into an array of transaction
+ * structures.
+ *If the "VP_currently_executing" field is non-null, then put requesting VP
+ * into queue in the struct.  (At some point a holder will request
+ * end-transaction, which will take this VP from the queue and resume it.)
+ *If NULL, then write requesting into the field and resume.
+ */
+void
+VCilk__start_transaction( int32 transactionID, VirtProcr *animPr )
+ {
+   VCilkSemReq  reqData;
+
+      //
+   reqData.reqType     = trans_start;
+   reqData.transID     = transactionID;
+
+   VMS__send_sem_request( &reqData, animPr );
+ }
+
+/*This suspends to the master, then uses transactionID as index into an
+ * array of transaction structures.
+ *It looks at VP_currently_executing to be sure it's same as requesting VP.
+ * If different, throws an exception, stating there's a bug in the code.
+ *Next it looks at the queue in the structure.
+ *If it's empty, it sets VP_currently_executing field to NULL and resumes.
+ *If something in, gets it, sets VP_currently_executing to that VP, then
+ * resumes both.
+ */
+void
+VCilk__end_transaction( int32 transactionID, VirtProcr *animPr )
+ {
+   VCilkSemReq  reqData;
+
+      //
+   reqData.reqType     = trans_end;
+   reqData.transID     = transactionID;
+
+   VMS__send_sem_request( &reqData, animPr );
+ }