# HG changeset patch
# User Me
# Date 1284748442 25200
# Node ID 4aca264971b5335b11a2635639863cdb6d6ab020

Initial add  --  works w/matrix mult on 9x9 but dies on larger

diff -r 000000000000 -r 4aca264971b5 DESIGN_NOTES__VPThread__lib.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/DESIGN_NOTES__VPThread__lib.txt	Fri Sep 17 11:34:02 2010 -0700
@@ -0,0 +1,82 @@
+
+Implement VPThread this way:
+
+We implemented a subset of PThreads functionality, called VMSPThd, that
+includes: mutex_lock, mutex_unlock, cond_wait, and cond_notify, which we name
+as VMSPThd__mutix_lock and so forth. \ All VMSPThd functions take a reference
+to the AppVP that is animating the function call, in addition to any other
+parameters.
+
+A mutex variable is an integer, returned by VMSPThd__mutex_create(), which is
+used inside the request handler as a key to lookup an entry in a hash table,
+that lives in the SemanticEnv. \ Such an entry has a field holding a
+reference to the AppVP that currently owns the lock, and a queue of AppVPs
+waiting to acquire the lock. \
+
+Acquiring a lock is done with VMSPThd__mutex_lock(), which generates a
+request. \ Recall that all request sends cause the suspention of the AppVP
+that is animating the library call that generates the request, in this case
+the AppVP animating VMSPThd__mutex_lock() is suspended. \ The request
+includes a reference to that animating AppVP, and the mutex integer value.
+\ When the request reaches the request handler, the mutex integer is used as
+key to look up the hash entry, then if the owner field is null (or the same
+as the AppVP in the request), the AppVP in the request is placed into the
+owner field, and that AppVP is queued to be scheduled for re-animation.
+\ However, if a different AppVP is listed in the owner field, then the AppVP
+in the request is added to the queue of those trying to acquire. \ Notice
+that this is a purely sequential algorithm that systematic reasoning can be
+used on.
+
+VMSPThd__mutex_unlock(), meanwhile, generates a request that causes the
+request handler to queue for re-animation the AppVP that animated the call.
+\ It also pops the queue of AppVPs waiting to acquire the lock, and writes
+the AppVP that comes out as the current owner of the lock and queues that
+AppVP for re-animation (unless the popped value is null, in which case the
+current owner is just set to null).
+
+Implementing condition variables takes a similar approach, in that
+VMSPThd__init_cond() returns an integer that is then used to look up an entry
+in a hash table, where the entry contains a queue of AppVPs waiting on the
+condition variable. \ VMSPThd__cond_wait() generates a request that pushes
+the AppVP into the queue, while VMSPThd__cond_signal() takes a wait request
+from the queue.
+
+Notice that this is again a purely sequential algorithm, and sidesteps issues
+such as ``simultaneous'' wait and signal requests -- the wait and signal get
+serialized automatically, even though they take place at the same instant of
+program virtual time. \
+
+It is the fact of having a program virtual time that allows ``virtual
+simultaneous'' actions to be handled <em|outside> of the virtual time. \ That
+ability to escape outside of the virtual time is what enables a
+<em|sequential> algorithm to handle the simultaneity that is at the heart of
+making implementing locks in physical time so intricately tricky
+<inactive|<cite|LamportLockImpl>> <inactive|<cite|DijkstraLockPaper>>
+<inactive|<cite|LamportRelativisticTimePaper>>.\
+
+What's nice about this approach is that the design and implementation are
+simple and straight forward. \ It took just X days to design, implement, and
+debug, and is in a form that should be amenable to proof of freedom from race
+conditions, given a correct implementation of VMS. \ The hash-table based
+approach also makes it reasonably high performance, with (essentially) no
+slowdown when the number of locks or number of AppVPs grows large.
+
+===========================
+Behavior:
+Cond variables are half of a two-piece mechanism.  The other half is a mutex.
+ Every cond var owns a mutex -- the two intrinsically work
+ together, as a pair.  The mutex must only be used with the condition var
+ and not used on its own in other ways.
+
+cond_wait is called with a cond-var and its mutex.
+The animating processor must have acquired the mutex before calling cond_wait
+The call adds the animating processor to the queue associated with the cond
+variable and then calls mutex_unlock on the mutex.
+
+cond_signal can only be called after acquiring the cond var's mutex.  It is
+called with the cond-var.
+ The call takes the next processor from the condition-var's wait queue and
+ transfers it to the waiting-for-lock queue of the cond-var's mutex.
+The processor that called the cond_signal next has to perform a mutex_unlock
+ on the cond-var's mutex -- that, finally, lets the waiting processor acquire
+ the mutex and proceed.
diff -r 000000000000 -r 4aca264971b5 VPThread.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/VPThread.h	Fri Sep 17 11:34:02 2010 -0700
@@ -0,0 +1,153 @@
+/*
+ *  Copyright 2009 OpenSourceStewardshipFoundation.org
+ *  Licensed under GNU General Public License version 2
+ *
+ * Author: seanhalle@yahoo.com
+ *
+ */
+
+#ifndef _VPThread_H
+#define	_VPThread_H
+
+#include "VMS/VMS.h"
+#include "VMS/Queue_impl/PrivateQueue.h"
+#include "VMS/DynArray/DynArray.h"
+
+
+//===========================================================================
+#define INIT_NUM_MUTEX 10000
+#define INIT_NUM_COND  10000
+//===========================================================================
+
+/*This header defines everything specific to the VPThread semantic plug-in
+ */
+typedef struct _VPThreadSemReq   VPThreadSemReq;
+
+
+/*Semantic layer-specific data sent inside a request from lib called in app
+ * to request handler called in MasterLoop
+ */
+enum VPThreadReqType
+ {
+   make_mutex = 1,
+   mutex_lock,
+   mutex_unlock,
+   make_cond,
+   cond_wait,
+   cond_signal,
+   make_procr
+ };
+
+struct _VPThreadSemReq
+ { enum VPThreadReqType reqType;
+   VirtProcr           *requestingPr;
+   int32                mutexIdx;
+   int32                condIdx;
+   void                *initData;
+   VirtProcrFnPtr       fnPtr;
+ }
+/* VPThreadSemReq */;
+
+typedef struct
+ {
+      //Standard stuff will be in most every semantic env
+   PrivQueueStruc **readyVPQs;
+   int32            numVirtPr;
+   int32            nextCoreToGetNewPr;
+
+      //Specific to this semantic layer
+   int32            currMutexIdx;
+   DynArray32      *mutexDynArray;
+   
+   int32            currCondIdx;
+   DynArray32      *condDynArray;
+
+   void            *applicationGlobals;
+ }
+VPThreadSemEnv;
+
+
+typedef struct
+ {
+   int32           mutexIdx;
+   VirtProcr      *holderOfLock;
+   PrivQueueStruc *waitingQueue;
+ }
+VPTMutex;
+
+
+typedef struct
+ {
+   int32           condIdx;
+   PrivQueueStruc *waitingQueue;
+   VPTMutex       *partnerMutex;
+ }
+VPTCond;
+
+
+//===========================================================================
+
+void
+VPThread__create_seed_procr_and_do_work( VirtProcrFnPtr fn, void *initData );
+
+//=======================
+
+inline VirtProcr *
+VPThread__create_thread( VirtProcrFnPtr fnPtr, void *initData,
+                          VirtProcr *creatingPr );
+
+void
+VPThread__dissipate_thread( VirtProcr *procrToDissipate );
+
+//=======================
+void
+VPThread__set_globals_to( void *globals );
+
+void *
+VPThread__give_globals();
+
+//=======================
+int32
+VPThread__make_mutex( VirtProcr *animPr );
+
+void
+VPThread__mutex_lock( int32 mutexIdx, VirtProcr *acquiringPr );
+                                                    
+void
+VPThread__mutex_unlock( int32 mutexIdx, VirtProcr *releasingPr );
+
+
+//=======================
+int32
+VPThread__make_cond( int32 ownedMutexIdx, VirtProcr *animPr);
+
+void
+VPThread__cond_wait( int32 condIdx, VirtProcr *waitingPr);
+
+void *
+VPThread__cond_signal( int32 condIdx, VirtProcr *signallingPr );
+
+
+
+
+//=========================  Internal use only  =============================
+void
+VPThread__Request_Handler( VirtProcr *requestingPr, void *_semEnv );
+
+VirtProcr *
+VPThread__schedule_virt_procr( void *_semEnv, int coreNum );
+
+//=======================
+void
+VPThread__free_semantic_request( VPThreadSemReq *semReq );
+
+//=======================
+
+void
+VPThread__init();
+
+void
+VPThread__cleanup_after_shutdown();
+
+#endif	/* _VPThread_H */
+
diff -r 000000000000 -r 4aca264971b5 VPThread__PluginFns.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/VPThread__PluginFns.c	Fri Sep 17 11:34:02 2010 -0700
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <malloc.h>
+
+#include "VMS/Queue_impl/PrivateQueue.h"
+#include "VPThread.h"
+#include "VPThread__Request_Handlers.h"
+
+
+/*Will get requests to send, to receive, and to create new processors.
+ * Upon send, check the hash to see if a receive is waiting.
+ * Upon receive, check hash to see if a send has already happened.
+ * When other is not there, put in.  When other is there, the comm.
+ *  completes, which means the receiver P gets scheduled and
+ *  picks up right after the receive request.  So make the work-unit
+ *  and put it into the queue of work-units ready to go.
+ * Other request is create a new Processor, with the function to run in the
+ *  Processor, and initial data.
+ */
+void
+VPThread__Request_Handler( VirtProcr *requestingPr, void *_semEnv )
+ { VPThreadSemEnv *semEnv;
+   VMSReqst    *req;
+   VPThreadSemReq *semReq;
+ 
+   semEnv = (VPThreadSemEnv *)_semEnv;
+
+   req = VMS__take_top_request_from( requestingPr );
+   
+   while( req != NULL )
+    {
+      if( VMS__isSemanticReqst( req ) )
+       {
+         semReq = VMS__take_sem_reqst_from( req );
+         if( semReq == NULL ) goto DoneHandlingReqst;
+         switch( semReq->reqType )
+          {
+            case make_mutex:     handleMakeMutex(  semReq, semEnv);
+               break;
+            case mutex_lock:     handleMutexLock(  semReq, semEnv);
+               break;
+            case mutex_unlock:   handleMutexUnlock(semReq, semEnv);
+               break;
+            case make_cond:      handleMakeCond(   semReq, semEnv);
+               break;
+            case cond_wait:      handleCondWait(   semReq, semEnv);
+               break;
+            case cond_signal:    handleCondSignal( semReq, semEnv);
+               //need? VPThread__free_semantic_request( semReq );
+               break;
+            case make_procr:     handleMakeProcr(  semReq, semEnv);
+               break;
+            //TODO: make sure free the semantic request!
+          }
+         //NOTE: freeing semantic request data strucs handled inside these
+       }
+      else if( VMS__isDissipateReqst( req ) ) //Standard VMS request
+       {    //Another standard VMS request that the plugin has to handle
+            //This time, plugin has to free the semantic data it may have
+            // allocated into the virt procr -- and clear the AppVP out of
+            // any data structs the plug-in may have put it into, like hash
+            // tables.
+
+            //Now, call VMS to free all AppVP state -- stack and so on
+         VMS__handle_dissipate_reqst( requestingPr );
+
+            //Keep count of num AppVPs, so know when to shutdown
+         semEnv->numVirtPr -= 1;
+         if( semEnv->numVirtPr == 0 )
+          {    //no more work, so shutdown
+            VMS__handle_shutdown_reqst( requestingPr );
+          }
+       }
+
+      DoneHandlingReqst:
+         //Here, free VMS's request structure, no matter what -- even though
+         // semantic request struc instances may still be around..
+         //This call frees VMS's portion, then returns the next request
+      req = VMS__free_top_and_give_next_request_from( requestingPr );
+    } //while( req != NULL )
+ }
+
+//===========================================================================
+
+
+/*For VPThread, scheduling a slave simply takes the next work-unit off the
+ * ready-to-go work-unit queue and assigns it to the slaveToSched.
+ *If the ready-to-go work-unit queue is empty, then nothing to schedule
+ * to the slave -- return FALSE to let Master loop know scheduling that
+ * slave failed.
+ */
+VirtProcr *
+VPThread__schedule_virt_procr( void *_semEnv, int coreNum )
+ { VirtProcr   *schedPr;
+   VPThreadSemEnv *semEnv;
+
+   semEnv = (VPThreadSemEnv *)_semEnv;
+
+   schedPr = readPrivQ( semEnv->readyVPQs[coreNum] );
+      //Note, using a non-blocking queue -- it returns NULL if queue empty
+
+   return( schedPr );
+ }
+
diff -r 000000000000 -r 4aca264971b5 VPThread__Request_Handlers.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/VPThread__Request_Handlers.c	Fri Sep 17 11:34:02 2010 -0700
@@ -0,0 +1,222 @@
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <malloc.h>
+
+#include "VMS/VMS.h"
+#include "VMS/Queue_impl/PrivateQueue.h"
+#include "VMS/Hash_impl/PrivateHash.h"
+#include "VPThread.h"
+
+
+
+//===============================  Mutexes  =================================
+/*The semantic request has a mutexIdx value, which acts as index into array.
+ */
+void
+handleMakeMutex( VPThreadSemReq *semReq, VPThreadSemEnv *semEnv)
+ { VPTMutex  *newMutex;
+   VirtProcr  *pr;
+   int32       mutexIdx;
+
+   newMutex = malloc( sizeof(VPTMutex) );
+   newMutex->waitingQueue = makePrivQ();
+   newMutex->holderOfLock = NULL;
+   newMutex->mutexIdx = semEnv->currMutexIdx++;
+   mutexIdx = newMutex->mutexIdx;
+
+      //The mutex struc contains an int that identifies it -- use that as
+      // its index within the array of mutexes.  Add the new mutex to array.
+   makeArray32BigEnoughForIndex( semEnv->mutexDynArray, mutexIdx );
+   semEnv->mutexDynArray->array[ mutexIdx ] = newMutex;
+
+      //Now communicate the mutex's identifying int back to requesting procr
+   semReq->requestingPr->semanticData = newMutex->mutexIdx;
+
+      //re-animate the requester
+   pr = semReq->requestingPr;
+   writePrivQ( pr,    semEnv->readyVPQs[pr->coreAnimatedBy] );
+ }
+
+
+void
+handleMutexLock( VPThreadSemReq *semReq, VPThreadSemEnv *semEnv)
+ { VPTMutex  *mutex;
+   VirtProcr  *pr;
+
+   //===================  Deterministic Replay  ======================
+   #ifdef RECORD_DETERMINISTIC_REPLAY
+   
+   #endif
+   //=================================================================
+      //lookup mutex struc, using mutexIdx as index
+   mutex = semEnv->mutexDynArray->array[ semReq->mutexIdx ];
+
+      //see if mutex is free or not
+   if( mutex->holderOfLock == NULL ) //none holding, give lock to requester
+    {
+      mutex->holderOfLock = semReq->requestingPr;
+
+         //re-animate requester, now that it has the lock
+      pr = semReq->requestingPr;
+      writePrivQ( pr,    semEnv->readyVPQs[pr->coreAnimatedBy] );
+    }
+   else //queue up requester to wait for release of lock
+    {
+      writePrivQ( semReq->requestingPr, mutex->waitingQueue );
+    }
+ }
+
+/*
+ */
+void
+handleMutexUnlock( VPThreadSemReq *semReq, VPThreadSemEnv *semEnv)
+ { VPTMutex  *mutex;
+   VirtProcr  *pr;
+
+      //lookup mutex struc, using mutexIdx as index
+   mutex = semEnv->mutexDynArray->array[ semReq->mutexIdx ];
+
+      //set new holder of mutex-lock to be next in queue (NULL if empty)
+   mutex->holderOfLock = readPrivQ( mutex->waitingQueue );
+
+      //if have new non-NULL holder, re-animate it
+   if( mutex->holderOfLock != NULL )
+    {
+      pr = mutex->holderOfLock;
+      writePrivQ( pr,    semEnv->readyVPQs[pr->coreAnimatedBy] );
+    }
+
+      //re-animate the releaser of the lock
+   pr = semReq->requestingPr;
+   writePrivQ( pr,    semEnv->readyVPQs[pr->coreAnimatedBy] );
+ }
+
+//===========================  Condition Vars  ==============================
+/*The semantic request has the cond-var value and mutex value, which are the
+ * indexes into the array.  Not worrying about having too many mutexes or
+ * cond vars created, so using array instead of hash table, for speed.
+ */
+
+
+/*Make cond has to be called with the mutex that the cond is paired to
+ * Don't have to implement this way, but was confusing learning cond vars
+ * until deduced that each cond var owns a mutex that is used only for
+ * interacting with that cond var.  So, make this pairing explicit.
+ */
+void
+handleMakeCond( VPThreadSemReq *semReq, VPThreadSemEnv *semEnv)
+ { VPTCond   *newCond;
+   VirtProcr  *pr;
+   int32       condIdx;
+
+   newCond = malloc( sizeof(VPTCond) );
+   newCond->partnerMutex = semEnv->mutexDynArray->array[ semReq->mutexIdx ];
+
+   newCond->waitingQueue = makePrivQ();
+   newCond->condIdx = semEnv->currCondIdx++;
+   condIdx = newCond->condIdx;
+
+      //The cond struc contains an int that identifies it -- use that as
+      // its index within the array of conds.  Add the new cond to array.
+   makeArray32BigEnoughForIndex( semEnv->condDynArray, condIdx );
+   semEnv->condDynArray->array[ condIdx ] = newCond;
+
+      //Now communicate the cond's identifying int back to requesting procr
+   semReq->requestingPr->semanticData = newCond->condIdx;
+   
+      //re-animate the requester
+   pr = semReq->requestingPr;
+   writePrivQ( pr,    semEnv->readyVPQs[pr->coreAnimatedBy] );
+ }
+
+
+/*Mutex has already been paired to the cond var, so don't need to send the
+ * mutex, just the cond var.  Don't have to do this, but want to bitch-slap
+ * the designers of Posix standard  ; )
+ */
+void
+handleCondWait( VPThreadSemReq *semReq, VPThreadSemEnv *semEnv)
+ { VPTCond   *cond;
+   VPTMutex  *mutex;
+   VirtProcr  *pr;
+
+      //get cond struc out of array of them that's in the sem env
+   cond = semEnv->condDynArray->array[ semReq->condIdx ];
+
+      //add requester to queue of wait-ers
+   writePrivQ( semReq->requestingPr, cond->waitingQueue );
+    
+      //unlock mutex -- can't reuse above handler 'cause not queuing releaser
+   mutex = cond->partnerMutex;
+   mutex->holderOfLock = readPrivQ( mutex->waitingQueue );
+
+   if( mutex->holderOfLock != NULL )
+    {
+      pr = mutex->holderOfLock;
+      writePrivQ( pr,    semEnv->readyVPQs[pr->coreAnimatedBy] );
+   }
+ }
+
+
+/*Note that have to implement this such that guarantee the waiter is the one
+ * that gets the lock
+ */
+void
+handleCondSignal( VPThreadSemReq *semReq, VPThreadSemEnv *semEnv)
+ { VPTCond   *cond;
+   VPTMutex  *mutex;
+   VirtProcr  *waitingPr, *pr;
+
+      //get cond struc out of array of them that's in the sem env
+   cond = semEnv->condDynArray->array[ semReq->condIdx ];
+   
+      //take next waiting procr out of queue
+   waitingPr = readPrivQ( cond->waitingQueue );
+
+      //transfer waiting procr to wait queue of mutex
+      // mutex is guaranteed to be held by signalling procr, so no check
+   mutex = cond->partnerMutex;
+   pushPrivQ( waitingPr, mutex->waitingQueue ); //is first out when read
+
+      //re-animate the signalling procr
+   pr = semReq->requestingPr;
+   writePrivQ( pr,    semEnv->readyVPQs[pr->coreAnimatedBy] );
+ }
+
+
+
+/*Make cond has to be called with the mutex that the cond is paired to
+ * Don't have to implement this way, but was confusing learning cond vars
+ * until deduced that each cond var owns a mutex that is used only for
+ * interacting with that cond var.  So, make this pairing explicit.
+ */
+void
+handleMakeProcr( VPThreadSemReq *semReq, VPThreadSemEnv *semEnv)
+ { VirtProcr  *newPr, *pr;
+
+   newPr = VMS__create_procr( semReq->fnPtr, semReq->initData );
+
+   semEnv->numVirtPr += 1;
+
+      //Assign new processor to next core in line & queue it up
+   #ifdef SEQUENTIAL
+   newPr->coreAnimatedBy = 0;
+   #else
+   newPr->coreAnimatedBy = semEnv->nextCoreToGetNewPr;
+   if( semEnv->nextCoreToGetNewPr >= NUM_CORES - 1 )
+       semEnv->nextCoreToGetNewPr  = 0;
+   else
+       semEnv->nextCoreToGetNewPr += 1;
+   #endif
+   writePrivQ( newPr, semEnv->readyVPQs[newPr->coreAnimatedBy] );
+
+      //re-animate the requester
+   pr = semReq->requestingPr;
+   writePrivQ( pr,    semEnv->readyVPQs[pr->coreAnimatedBy] );
+ }
diff -r 000000000000 -r 4aca264971b5 VPThread__Request_Handlers.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/VPThread__Request_Handlers.h	Fri Sep 17 11:34:02 2010 -0700
@@ -0,0 +1,33 @@
+/*
+ *  Copyright 2009 OpenSourceStewardshipFoundation.org
+ *  Licensed under GNU General Public License version 2
+ *
+ * Author: seanhalle@yahoo.com
+ *
+ */
+
+#ifndef _VPThread_REQ_H
+#define	_VPThread_REQ_H
+
+#include "VPThread.h"
+
+/*This header defines everything specific to the VPThread semantic plug-in
+ */
+
+void
+handleMakeMutex(  VPThreadSemReq *semReq, VPThreadSemEnv *semEnv);
+void
+handleMutexLock(  VPThreadSemReq *semReq, VPThreadSemEnv *semEnv);
+void
+handleMutexUnlock(VPThreadSemReq *semReq, VPThreadSemEnv *semEnv);
+void
+handleMakeCond(   VPThreadSemReq *semReq, VPThreadSemEnv *semEnv);
+void
+handleCondWait(   VPThreadSemReq *semReq, VPThreadSemEnv *semEnv);
+void
+handleCondSignal( VPThreadSemReq *semReq, VPThreadSemEnv *semEnv);
+void
+handleMakeProcr(  VPThreadSemReq *semReq, VPThreadSemEnv *semEnv);
+
+#endif	/* _VPThread_REQ_H */
+
diff -r 000000000000 -r 4aca264971b5 VPThread__lib.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/VPThread__lib.c	Fri Sep 17 11:34:02 2010 -0700
@@ -0,0 +1,379 @@
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <malloc.h>
+
+#include "VMS/VMS.h"
+#include "VPThread.h"
+#include "VMS/Queue_impl/PrivateQueue.h"
+#include "VMS/Hash_impl/PrivateHash.h"
+
+
+//==========================================================================
+
+void
+VPThread__init();
+
+void
+VPThread__init_Seq();
+
+void
+VPThread__init_Helper();
+//==========================================================================
+
+
+/*TODO: Q: dealing with library f()s and DKU vs WT vs FoR
+ * (still want to do FoR, with time-lines as syntax, could be super cool)
+ * A: thinking pin the coreLoops for all of BLIS -- let Master arbitrate
+ * among library, DKU, WT, FoR -- all the patterns in terms of virtual
+ * processors (or equivalently work-units), so Master picks which virt procr
+ * from which portions of app (DKU, WT, FoR) onto which sched slots
+ *Might even do hierarchy of masters -- group of sched slots for each core
+ * has its own master, that keeps generated work local
+ * single-reader-single-writer sync everywhere -- no atomic primitives
+ * Might have the different schedulers talk to each other, to negotiate
+ * larger-grain sharing of resources, according to predicted critical
+ * path, and expansion of work
+ */
+
+
+
+//===========================================================================
+
+
+/*These are the library functions *called in the application*
+ * 
+ *There's a pattern for the outside sequential code to interact with the
+ * VMS_HW code.
+ *The VMS_HW system is inside a boundary..  every VPThread system is in its
+ * own directory that contains the functions for each of the processor types.
+ * One of the processor types is the "seed" processor that starts the
+ * cascade of creating all the processors that do the work.
+ *So, in the directory is a file called "EntryPoint.c" that contains the
+ * function, named appropriately to the work performed, that the outside
+ * sequential code calls.  This function follows a pattern:
+ *1) it calls VPThread__init()
+ *2) it creates the initial data for the seed processor, which is passed
+ *    in to the function
+ *3) it creates the seed VPThread processor, with the data to start it with.
+ *4) it calls startVPThreadThenWaitUntilWorkDone
+ *5) it gets the returnValue from the transfer struc and returns that
+ *    from the function
+ *
+ *For now, a new VPThread system has to be created via VPThread__init every
+ * time an entry point function is called -- later, might add letting the
+ * VPThread system be created once, and let all the entry points just reuse
+ * it -- want to be as simple as possible now, and see by using what makes
+ * sense for later..
+ */
+
+
+
+//===========================================================================
+
+/*This is the "border crossing" function -- the thing that crosses from the
+ * outside world, into the VMS_HW world.  It initializes and starts up the
+ * VMS system, then creates one processor from the specified function and
+ * puts it into the readyQ.  From that point, that one function is resp.
+ * for creating all the other processors, that then create others, and so
+ * forth.
+ *When all the processors, including the seed, have dissipated, then this
+ * function returns.  The results will have been written by side-effect via
+ * pointers read from, or written into initData.
+ *
+ *NOTE: no Threads should exist in the outside program that might touch
+ * any of the data reachable from initData passed in to here
+ */
+void
+VPThread__create_seed_procr_and_do_work( VirtProcrFnPtr fnPtr, void *initData )
+ { VPThreadSemEnv *semEnv;
+   VirtProcr *seedPr;
+
+   #ifdef SEQUENTIAL
+   VPThread__init_Seq();  //debug sequential exe
+   #else
+   VPThread__init();      //normal multi-thd
+   #endif
+   semEnv = _VMSMasterEnv->semanticEnv;
+
+      //VPThread starts with one processor, which is put into initial environ,
+      // and which then calls create() to create more, thereby expanding work
+   seedPr = VMS__create_procr( fnPtr, initData );
+
+   seedPr->coreAnimatedBy = semEnv->nextCoreToGetNewPr++;
+
+   writePrivQ( seedPr, semEnv->readyVPQs[seedPr->coreAnimatedBy] );
+   semEnv->numVirtPr = 1;
+
+   #ifdef SEQUENTIAL
+   VMS__start_the_work_then_wait_until_done_Seq();  //debug sequential exe
+   #else
+   VMS__start_the_work_then_wait_until_done();      //normal multi-thd
+   #endif
+
+   VPThread__cleanup_after_shutdown();
+ }
+
+
+//===========================================================================
+
+/*Initializes all the data-structures for a VPThread system -- but doesn't
+ * start it running yet!
+ *
+ * 
+ *This sets up the semantic layer over the VMS system
+ *
+ *First, calls VMS_Setup, then creates own environment, making it ready
+ * for creating the seed processor and then starting the work.
+ */
+void
+VPThread__init()
+ {
+   VMS__init();
+      //masterEnv, a global var, now is partially set up by init_VMS
+
+   VPThread__init_Helper();
+ }
+
+void
+VPThread__init_Seq()
+ {
+   VMS__init_Seq();
+      //masterEnv, a global var, now is partially set up by init_VMS
+
+   VPThread__init_Helper();
+ }
+
+void
+VPThread__init_Helper()
+ { VPThreadSemEnv       *semanticEnv;
+   PrivQueueStruc **readyVPQs;
+   int              coreIdx;
+ 
+      //Hook up the semantic layer's plug-ins to the Master virt procr
+   _VMSMasterEnv->requestHandler = &VPThread__Request_Handler;
+   _VMSMasterEnv->slaveScheduler = &VPThread__schedule_virt_procr;
+
+      //create the semantic layer's environment (all its data) and add to
+      // the master environment
+   semanticEnv = malloc( sizeof( VPThreadSemEnv ) );
+   _VMSMasterEnv->semanticEnv = semanticEnv;
+
+      //create the ready queue
+   readyVPQs = malloc( NUM_CORES * sizeof(PrivQueueStruc *) );
+
+   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
+    {
+      readyVPQs[ coreIdx ] = makePrivQ();
+    }
+   
+   semanticEnv->readyVPQs          = readyVPQs;
+   
+   semanticEnv->numVirtPr          = 0;
+   semanticEnv->nextCoreToGetNewPr = 0;
+
+   semanticEnv->currMutexIdx       = 0;
+   semanticEnv->mutexDynArray      = createDynArray32( INIT_NUM_MUTEX );
+
+   semanticEnv->currCondIdx        = 0;
+   semanticEnv->condDynArray       = createDynArray32( INIT_NUM_COND );
+ }
+
+
+/*Frees any memory allocated by VPThread__init() then calls VMS__shutdown
+ */
+void
+VPThread__cleanup_after_shutdown()
+ { VPThreadSemEnv *semEnv;
+   int32           coreIdx,     idx,   highestIdx;
+   VPTMutex      **mutexArray, *mutex;
+   VPTCond       **condArray, *cond;
+ 
+   semEnv = _VMSMasterEnv->semanticEnv;
+
+//TODO: double check that all sem env locations freed
+
+   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
+    {
+      free( semEnv->readyVPQs[coreIdx]->startOfData );
+      free( semEnv->readyVPQs[coreIdx] );
+    }
+   
+   free( semEnv->readyVPQs );
+
+   
+   //==== Free mutexes and mutex array ====
+   mutexArray = semEnv->mutexDynArray->array;
+   highestIdx = semEnv->mutexDynArray->highestIdxInArray;
+   for( idx=0; idx < highestIdx; idx++ )
+    { mutex = mutexArray[ idx ];
+      if( mutex == NULL ) continue;
+      free( mutex );
+    }
+   free( mutexArray );
+   free( semEnv->mutexDynArray );
+   //======================================
+   
+
+   //==== Free conds and cond array ====
+   condArray  = semEnv->condDynArray->array;
+   highestIdx = semEnv->condDynArray->highestIdxInArray;
+   for( idx=0; idx < highestIdx; idx++ )
+    { cond = condArray[ idx ];
+      if( cond == NULL ) continue;
+      free( cond );
+    }
+   free( condArray );
+   free( semEnv->condDynArray );
+   //===================================
+
+   
+   free( _VMSMasterEnv->semanticEnv );
+   VMS__cleanup_after_shutdown();
+ }
+
+
+//===========================================================================
+
+/*
+ */
+VirtProcr *
+VPThread__create_thread( VirtProcrFnPtr fnPtr, void *initData,
+                          VirtProcr *animPr )
+ { VPThreadSemReq *reqData;
+
+   reqData = malloc( sizeof(VPThreadSemReq) );
+   reqData->reqType      = make_procr;
+   reqData->initData     = initData;
+   reqData->fnPtr        = fnPtr;
+   reqData->requestingPr = animPr;
+
+   VMS__add_sem_request( reqData, animPr );
+   VMS__suspend_procr( animPr ); //will suspend then resume and continue
+   return animPr->semanticData;  //result communicated back via semData field
+ }
+
+
+inline void
+VPThread__dissipate_thread( VirtProcr *procrToDissipate )
+ {
+   VMS__dissipate_procr( procrToDissipate );
+ }
+
+
+//===========================================================================
+
+void
+VPThread__set_globals_to( void *globals )
+ {
+   ((VPThreadSemEnv *)
+    (_VMSMasterEnv->semanticEnv))->applicationGlobals = globals;
+ }
+
+void *
+VPThread__give_globals()
+ {
+   return((VPThreadSemEnv *)
+          (_VMSMasterEnv->semanticEnv))->applicationGlobals;
+ }
+
+
+
+//===========================================================================
+
+int32
+VPThread__make_mutex( VirtProcr *animPr )
+ { VPThreadSemReq *reqData;
+
+   reqData = malloc( sizeof(VPThreadSemReq) );
+   reqData->reqType      = make_mutex;
+   reqData->requestingPr = animPr;
+
+   VMS__add_sem_request( reqData, animPr );
+   VMS__suspend_procr( animPr ); //will suspend then resume and continue
+   return animPr->semanticData;  //result communicated back via semData field
+ }
+
+void
+VPThread__mutex_lock( int32 mutexIdx, VirtProcr *acquiringPr )
+ { VPThreadSemReq *reqData;
+
+   reqData = malloc( sizeof(VPThreadSemReq) );
+   reqData->reqType      = mutex_lock;
+   reqData->mutexIdx     = mutexIdx;
+   reqData->requestingPr = acquiringPr;
+
+   VMS__add_sem_request( reqData, acquiringPr );
+   VMS__suspend_procr( acquiringPr ); //will resume when has the lock
+ }
+
+void
+VPThread__mutex_unlock( int32 mutexIdx, VirtProcr *releasingPr )
+ { VPThreadSemReq *reqData;
+
+   reqData = malloc( sizeof(VPThreadSemReq) );
+   reqData->reqType      = mutex_unlock;
+   reqData->mutexIdx     = mutexIdx;
+   reqData->requestingPr = releasingPr;
+
+   VMS__add_sem_request( reqData, releasingPr );
+   VMS__suspend_procr( releasingPr ); //lock released when resumes
+ }
+
+
+//=======================
+int32
+VPThread__make_cond( int32 ownedMutexIdx, VirtProcr *animPr)
+ { VPThreadSemReq *reqData;
+
+   reqData = malloc( sizeof(VPThreadSemReq) );
+   reqData->reqType      = make_cond;
+   reqData->mutexIdx     = ownedMutexIdx;
+   reqData->requestingPr = animPr;
+
+   VMS__add_sem_request( reqData, animPr );
+   VMS__suspend_procr( animPr ); //will suspend then resume and continue
+   return animPr->semanticData;  //result communicated back via semData field
+ }
+
+void
+VPThread__cond_wait( int32 condIdx, VirtProcr *waitingPr)
+ { VPThreadSemReq *reqData;
+
+   reqData = malloc( sizeof(VPThreadSemReq) );
+   reqData->reqType      = cond_wait;
+   reqData->condIdx      = condIdx;
+   reqData->requestingPr = waitingPr;
+
+   VMS__add_sem_request( reqData, waitingPr );
+   VMS__suspend_procr( waitingPr ); //resume when signalled & has lock
+ }
+
+void *
+VPThread__cond_signal( int32 condIdx, VirtProcr *signallingPr )
+ { VPThreadSemReq *reqData;
+
+   reqData = malloc( sizeof(VPThreadSemReq) );
+   reqData->reqType      = cond_signal;
+   reqData->condIdx      = condIdx;
+   reqData->requestingPr = signallingPr;
+
+   VMS__add_sem_request( reqData, signallingPr );
+   VMS__suspend_procr( signallingPr );//resumes right away, still having lock
+ }
+//===========================================================================
+
+/*Just thin wrapper for now -- semantic request is still a simple thing
+ * (July 3, 2010)
+ */
+inline void
+VPThread__free_semantic_request( VPThreadSemReq *semReq )
+ {
+   free( semReq );
+ }
+