Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VCilk_impls > VCilk__MC_shared_impl

changeset 0:b456b67cddd0
Initial add -- works, with vmalloc + probes version of VMS
author: Me
date: Sat, 30 Oct 2010 20:51:40 -0700
children: b6ce47a0909b
files: VCilk.h VCilk_PluginFns.c VCilk__DESIGN_NOTES.txt VCilk_lib.c
diffstat: 4 files changed, 775 insertions(+), 0 deletions(-) [+]
[-]

VCilk.h 121

VCilk_PluginFns.c 285

VCilk__DESIGN_NOTES.txt 28

VCilk_lib.c 341 VCilk.h 121 VCilk_PluginFns.c 285 VCilk__DESIGN_NOTES.txt 28 VCilk_lib.c 341
VCilk.h 121
VCilk_PluginFns.c 285
VCilk__DESIGN_NOTES.txt 28
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/VCilk.h	Sat Oct 30 20:51:40 2010 -0700
     1.3 @@ -0,0 +1,121 @@
     1.4 +/*
     1.5 + *  Copyright 2009 OpenSourceStewardshipFoundation.org
     1.6 + *  Licensed under GNU General Public License version 2
     1.7 + *
     1.8 + * Author: seanhalle@yahoo.com
     1.9 + *
    1.10 + */
    1.11 +
    1.12 +#ifndef _VCilk_H
    1.13 +#define	_VCilk_H
    1.14 +
    1.15 +#include "VMS/Queue_impl/PrivateQueue.h"
    1.16 +#include "VMS/Hash_impl/PrivateHash.h"
    1.17 +#include "VMS/VMS.h"
    1.18 +
    1.19 +/*This header defines everything specific to the VCilk semantic plug-in
    1.20 + */
    1.21 +typedef struct _VCilkSemReq   VCilkSemReq;
    1.22 +
    1.23 +
    1.24 +/*Semantic layer-specific data sent inside a request from lib called in app
    1.25 + * to request handler called in MasterLoop
    1.26 + */
    1.27 +enum VCilkReqType
    1.28 + {
    1.29 +   syncReq = 1,
    1.30 +   mallocReq,
    1.31 +   freeReq
    1.32 + };
    1.33 +
    1.34 +struct _VCilkSemReq
    1.35 + { enum VCilkReqType    reqType;
    1.36 +   VirtProcr           *requestingPr;
    1.37 +   int32                sizeToMalloc;
    1.38 +   void                *ptrToFree;
    1.39 +   VirtProcrFnPtr       fnPtr;
    1.40 +   void                *initData;
    1.41 +   int32                coreToSpawnOnto;
    1.42 + }
    1.43 +/* VCilkSemReq */;
    1.44 +
    1.45 +typedef struct
    1.46 + {
    1.47 +   PrivQueueStruc **readyVPQs;
    1.48 +   HashTable       *commHashTbl;
    1.49 +   int32            numVirtPr;
    1.50 +   int32            nextCoreToGetNewPr;
    1.51 +   int32            primitiveStartTime;
    1.52 + }
    1.53 +VCilkSemEnv;
    1.54 +
    1.55 +typedef struct
    1.56 + {
    1.57 +   int32 syncPending;
    1.58 +   int32 numLiveChildren;
    1.59 +   VirtProcr *parentPr;
    1.60 + }
    1.61 +VCilkSemData;
    1.62 +
    1.63 +//===========================================================================
    1.64 +
    1.65 +void
    1.66 +VCilk__create_seed_procr_and_do_work( VirtProcrFnPtr fn, void *initData );
    1.67 +
    1.68 +int32
    1.69 +VCilk__giveMinWorkUnitCycles( float32 percentOverhead );
    1.70 +
    1.71 +void inline
    1.72 +VCilk__start_primitive();
    1.73 +
    1.74 +int32 inline
    1.75 +VCilk__end_primitive_and_give_cycles();
    1.76 +
    1.77 +int32
    1.78 +VCilk__giveIdealNumWorkUnits();
    1.79 +
    1.80 +//=======================
    1.81 +
    1.82 +void
    1.83 +VCilk__init();
    1.84 +
    1.85 +void
    1.86 +VCilk__cleanup_after_shutdown();
    1.87 +
    1.88 +//=======================
    1.89 +
    1.90 +void inline
    1.91 +VCilk__spawn( int32  coreToSpawnOnto, VirtProcrFnPtr  fnPtr,
    1.92 +              void  *initData,        VirtProcr      *creatingPr );
    1.93 +
    1.94 +int32
    1.95 +VCilk__give_number_of_cores_to_spawn_onto();
    1.96 +
    1.97 +void
    1.98 +VCilk__sync( VirtProcr *animatingPr );
    1.99 +
   1.100 +void *
   1.101 +VCilk__malloc( int32 sizeToMalloc, VirtProcr *animPr );
   1.102 +
   1.103 +void
   1.104 +VCilk__free( void *ptrToFree, VirtProcr *animPr );
   1.105 +
   1.106 +void
   1.107 +VCilk__dissipate_procr( VirtProcr *procrToDissipate );
   1.108 +
   1.109 +//=======================
   1.110 +
   1.111 +void
   1.112 +VCilk__free_semantic_request( VCilkSemReq *semReq );
   1.113 +
   1.114 +
   1.115 +//=========================  Internal use only  =============================
   1.116 +void
   1.117 +VCilk__Request_Handler( VirtProcr *requestingPr, void *_semEnv );
   1.118 +
   1.119 +VirtProcr *
   1.120 +VCilk__schedule_virt_procr( void *_semEnv, int coreNum );
   1.121 +
   1.122 +
   1.123 +#endif	/* _VCilk_H */
   1.124 +

     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/VCilk_PluginFns.c	Sat Oct 30 20:51:40 2010 -0700
     2.3 @@ -0,0 +1,285 @@
     2.4 +/*
     2.5 + * Copyright 2010  OpenSourceCodeStewardshipFoundation
     2.6 + *
     2.7 + * Licensed under BSD
     2.8 + */
     2.9 +
    2.10 +#include <stdio.h>
    2.11 +#include <stdlib.h>
    2.12 +#include <malloc.h>
    2.13 +
    2.14 +#include "VMS/Queue_impl/PrivateQueue.h"
    2.15 +#include "VCilk.h"
    2.16 +
    2.17 +
    2.18 +
    2.19 +//===========================================================================
    2.20 +void inline
    2.21 +handleSync( VirtProcr *requestingPr, VCilkSemEnv *semEnv );
    2.22 +
    2.23 +void inline
    2.24 +handleMalloc( VCilkSemReq *semReq, VirtProcr *requestingPr,
    2.25 +              VCilkSemEnv *semEnv );
    2.26 +void inline
    2.27 +handleFree( VCilkSemReq *semReq, VirtProcr *requestingPr,
    2.28 +            VCilkSemEnv *semEnv );
    2.29 +void inline
    2.30 +handleDissipate( VirtProcr *requestingPr, VCilkSemEnv *semEnv );
    2.31 +
    2.32 +void inline
    2.33 +handleSpawn( VCilkSemReq *semReq, VirtProcr *requestingPr,
    2.34 +             VCilkSemEnv *semEnv  );
    2.35 +
    2.36 +void inline
    2.37 +dispatchSemReq( VCilkSemReq *semReq, VirtProcr *requestingPr,
    2.38 +                VCilkSemEnv *semEnv );
    2.39 +
    2.40 +void inline
    2.41 +resumePr( VirtProcr *procr, VCilkSemEnv *semEnv );
    2.42 +
    2.43 +//===========================================================================
    2.44 +
    2.45 +
    2.46 +/*Will get requests to send, to receive, and to create new processors.
    2.47 + * Upon send, check the hash to see if a receive is waiting.
    2.48 + * Upon receive, check hash to see if a send has already happened.
    2.49 + * When other is not there, put in.  When other is there, the comm.
    2.50 + *  completes, which means the receiver P gets scheduled and
    2.51 + *  picks up right after the receive request.  So make the work-unit
    2.52 + *  and put it into the queue of work-units ready to go.
    2.53 + * Other request is create a new Processor, with the function to run in the
    2.54 + *  Processor, and initial data.
    2.55 + */
    2.56 +void
    2.57 +VCilk__Request_Handler( VirtProcr *requestingPr, void *_semEnv )
    2.58 + { VCilkSemEnv *semEnv;
    2.59 +   VMSReqst    *req;
    2.60 +   VCilkSemReq *semReq;
    2.61 + 
    2.62 +   semEnv = (VCilkSemEnv *)_semEnv;
    2.63 +
    2.64 +   req = VMS__take_next_request_out_of( requestingPr );
    2.65 +   
    2.66 +   while( req != NULL )
    2.67 +    {
    2.68 +      switch( req->reqType )
    2.69 +       { case semantic:     dispatchSemReq( VMS__take_sem_reqst_from(req),
    2.70 +                                            requestingPr, semEnv );
    2.71 +            break;
    2.72 +         case createReq:    //create request has to come as a VMS request,
    2.73 +                            // to allow MasterLoop to do stuff before gets
    2.74 +                            // here, and maybe also stuff after all requests
    2.75 +                            // done -- however, can still attach semantic
    2.76 +                            // req data to req.
    2.77 +                            semReq = VMS__take_sem_reqst_from( req );
    2.78 +                            handleSpawn( semReq, requestingPr, semEnv );
    2.79 +            break;
    2.80 +         case dissipate:    handleDissipate( requestingPr, semEnv );
    2.81 +            break;
    2.82 +         case VMSSemantic:  VMS__handle_VMSSemReq(req, requestingPr, semEnv,
    2.83 +                                                  &resumePr );
    2.84 +            break;
    2.85 +         default:
    2.86 +            break;
    2.87 +       }
    2.88 +      
    2.89 +      DoneHandlingReqst:
    2.90 +
    2.91 +      req = VMS__take_next_request_out_of( requestingPr );
    2.92 +    } //while( req != NULL )
    2.93 +
    2.94 + }
    2.95 +
    2.96 +void inline
    2.97 +dispatchSemReq( VCilkSemReq *semReq, VirtProcr *requestingPr,
    2.98 +                VCilkSemEnv *semEnv )
    2.99 + {
   2.100 +   if( semReq == NULL ) return;
   2.101 +   switch( semReq->reqType )
   2.102 +    {
   2.103 +      case syncReq:       handleSync(           requestingPr, semEnv );
   2.104 +         break;
   2.105 +      case mallocReq:     handleMalloc( semReq, requestingPr, semEnv );
   2.106 +         break;
   2.107 +      case freeReq:       handleFree(   semReq, requestingPr, semEnv );
   2.108 +         break;
   2.109 +    }
   2.110 +   //NOTE: semantic request data strucs allocated on stack in VCilk Lib calls
   2.111 + }
   2.112 +
   2.113 +
   2.114 +//============================== Scheduler ==================================
   2.115 +
   2.116 +
   2.117 +/*For VCilk, scheduling a slave simply takes the next work-unit off the
   2.118 + * ready-to-go work-unit queue and assigns it to the slaveToSched.
   2.119 + *If the ready-to-go work-unit queue is empty, then nothing to schedule
   2.120 + * to the slave -- return FALSE to let Master loop know scheduling that
   2.121 + * slave failed.
   2.122 + */
   2.123 +VirtProcr *
   2.124 +VCilk__schedule_virt_procr( void *_semEnv, int coreNum )
   2.125 + { VirtProcr   *schedPr;
   2.126 +   VCilkSemEnv *semEnv;
   2.127 +
   2.128 +   semEnv = (VCilkSemEnv *)_semEnv;
   2.129 +
   2.130 +   schedPr = readPrivQ( semEnv->readyVPQs[coreNum] );
   2.131 +      //Note, using a non-blocking queue -- it returns NULL if queue empty
   2.132 +
   2.133 +   return( schedPr );
   2.134 + }
   2.135 +
   2.136 +
   2.137 +//=========================== Request Handlers ==============================
   2.138 +void inline
   2.139 +resumePr( VirtProcr *procr, VCilkSemEnv *semEnv )
   2.140 + {
   2.141 +   writePrivQ( procr, semEnv->readyVPQs[ procr->coreAnimatedBy] );
   2.142 + }
   2.143 +
   2.144 +
   2.145 +
   2.146 +
   2.147 +/* check if list of live children is empty.
   2.148 + * If yes, then resume.
   2.149 + * If no, then set sync-pending flag.
   2.150 + */
   2.151 +void
   2.152 +handleSync( VirtProcr *requestingPr, VCilkSemEnv *semEnv )
   2.153 + {
   2.154 +   if(((VCilkSemData *)(requestingPr->semanticData))->numLiveChildren  == 0 )
   2.155 +    { //no live children to wait for
   2.156 +      resumePr( requestingPr, semEnv );
   2.157 +    }
   2.158 +   else
   2.159 +    {
   2.160 +      ((VCilkSemData *)(requestingPr->semanticData))->syncPending = TRUE;
   2.161 +    }
   2.162 + }
   2.163 +
   2.164 +/*
   2.165 + */
   2.166 +void
   2.167 +handleMalloc( VCilkSemReq *semReq, VirtProcr *requestingPr,
   2.168 +              VCilkSemEnv *semEnv )
   2.169 + { void *ptr;
   2.170 +   
   2.171 +   ptr = VMS__malloc( semReq->sizeToMalloc );
   2.172 +   requestingPr->dataReturnedFromReq = ptr;
   2.173 +   resumePr( requestingPr, semEnv );
   2.174 + }
   2.175 +
   2.176 +/*
   2.177 + */
   2.178 +void inline
   2.179 +handleFree( VCilkSemReq *semReq, VirtProcr *requestingPr,
   2.180 +            VCilkSemEnv *semEnv )
   2.181 + {
   2.182 +   VMS__free( semReq->ptrToFree );
   2.183 +   resumePr( requestingPr, semEnv );
   2.184 + }
   2.185 +
   2.186 +
   2.187 +
   2.188 +
   2.189 +/* 
   2.190 + */
   2.191 +void inline
   2.192 +handleSpawn( VCilkSemReq *semReq, VirtProcr *requestingPr,
   2.193 +             VCilkSemEnv *semEnv  )
   2.194 + {
   2.195 +   VirtProcr    *newPr;
   2.196 +   VCilkSemData *semanticData;
   2.197 +
   2.198 +      //This is running in master, so use internal version
   2.199 +   newPr = VMS__create_procr( semReq->fnPtr, semReq->initData );
   2.200 +
   2.201 +   semanticData = VMS__malloc( sizeof(VCilkSemData) );
   2.202 +
   2.203 +   semanticData->numLiveChildren = 0;
   2.204 +   semanticData->parentPr        = NULL;
   2.205 +   semanticData->syncPending     = FALSE;
   2.206 +
   2.207 +   newPr->semanticData = semanticData;
   2.208 +
   2.209 +   /* add newly created to the list of live children of requester.
   2.210 +    * In newly created, add pointer to VP requesting, as the parentVP
   2.211 +    */
   2.212 +   ((VCilkSemData *)(requestingPr->semanticData))->numLiveChildren +=1;
   2.213 +   ((VCilkSemData *)(newPr->semanticData))->parentPr = requestingPr;
   2.214 +
   2.215 +   semEnv->numVirtPr += 1;
   2.216 +
   2.217 +      //Assign new processor to a core & transition it to ready
   2.218 +   #ifdef SEQUENTIAL
   2.219 +   newPr->coreAnimatedBy = 0;
   2.220 +
   2.221 +   #else
   2.222 +   int32
   2.223 +   coreToSpawnOnto = semReq->coreToSpawnOnto;
   2.224 +
   2.225 +   if(coreToSpawnOnto < 0 || coreToSpawnOnto >= NUM_CORES )
   2.226 +    {    //out-of-range, so round-robin assignment
   2.227 +      newPr->coreAnimatedBy = semEnv->nextCoreToGetNewPr;
   2.228 +      if( semEnv->nextCoreToGetNewPr >= NUM_CORES - 1 )
   2.229 +          semEnv->nextCoreToGetNewPr  = 0;
   2.230 +      else
   2.231 +          semEnv->nextCoreToGetNewPr += 1;
   2.232 +    }
   2.233 +   else //core num in-range, so use it
   2.234 +    { newPr->coreAnimatedBy = coreToSpawnOnto;
   2.235 +    }
   2.236 +   #endif
   2.237 +
   2.238 +   resumePr( newPr,        semEnv );
   2.239 +   resumePr( requestingPr, semEnv );
   2.240 + }
   2.241 +
   2.242 +
   2.243 +/*get parentVP & remove dissipator from parent's live children.
   2.244 + *If this was last live child, check "sync pending" flag
   2.245 + *-- if set, then resume the parentVP.
   2.246 + */
   2.247 +void inline
   2.248 +handleDissipate( VirtProcr *requestingPr, VCilkSemEnv *semEnv )
   2.249 + {
   2.250 +   VirtProcr *
   2.251 +   parentPr = ((VCilkSemData *)
   2.252 +               (requestingPr->semanticData))->parentPr;
   2.253 +   if( parentPr == NULL ) //means this is seed processor being dissipated
   2.254 +    { //Just act normally, except don't deal with parent
   2.255 +      // VMS__Free is implemented to ignore requests to free data from
   2.256 +      // outside VMS, so all this processor's non-VMS allocated data will
   2.257 +      // remain and be cleaned up outside
   2.258 +    }
   2.259 +   else
   2.260 +    {
   2.261 +      ((VCilkSemData *)(parentPr->semanticData))->numLiveChildren -= 1;
   2.262 +      if( ((VCilkSemData *)
   2.263 +           (parentPr->semanticData))->numLiveChildren <= 0 )
   2.264 +       { //this was last live child of parent
   2.265 +         if( ((VCilkSemData *)
   2.266 +              (parentPr->semanticData))->syncPending == TRUE )
   2.267 +          { //was waiting for last child to dissipate, so resume it
   2.268 +            ((VCilkSemData *)
   2.269 +             (parentPr->semanticData))->syncPending = FALSE;
   2.270 +            resumePr( parentPr, semEnv );
   2.271 +          }
   2.272 +       }
   2.273 +    }
   2.274 +
   2.275 +   VMS__free( requestingPr->semanticData );
   2.276 +   
   2.277 +       //Now do normal dissipate
   2.278 +   
   2.279 +       //call VMS to free_all AppVP state -- stack and so on
   2.280 +   VMS__handle_dissipate_reqst( requestingPr );
   2.281 +
   2.282 +   semEnv->numVirtPr -= 1;
   2.283 +   if( semEnv->numVirtPr == 0 )
   2.284 +    {    //no more work, so shutdown
   2.285 +      VMS__handle_shutdown_reqst( requestingPr );
   2.286 +    }
   2.287 + }
   2.288 +

     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/VCilk__DESIGN_NOTES.txt	Sat Oct 30 20:51:40 2010 -0700
     3.3 @@ -0,0 +1,28 @@
     3.4 +
     3.5 +
     3.6 +
     3.7 + the design:
     3.8 +
     3.9 +Only has the spawn and sync calls, nothing else.
    3.10 +
    3.11 +For spawn, creates a new VP
    3.12 +
    3.13 +For sync, waits for all VPs created by itself to dissipate.
    3.14 +
    3.15 +To implement these, in request handler:
    3.16 +For spawn:
    3.17 + create new virtual processor
    3.18 + In requester, add newly created to the list of live children
    3.19 + In newly created, add pointer to requester, as the parentVP
    3.20 +
    3.21 +For Dissipate:
    3.22 + remove dissipator from its parent's list of live children.
    3.23 + If this was last in list, check "sync pending" flag 
    3.24 + -- if set, then resume the parentVP.
    3.25 +
    3.26 +For Sync:
    3.27 + check if list of live of children is empty.  
    3.28 + If yes, then resume.  
    3.29 + If no, then set sync-pending flag and remain suspended
    3.30 +
    3.31 +That's it.  Quick and simple,

     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/VCilk_lib.c	Sat Oct 30 20:51:40 2010 -0700
     4.3 @@ -0,0 +1,341 @@
     4.4 +/*
     4.5 + * Copyright 2010  OpenSourceCodeStewardshipFoundation
     4.6 + *
     4.7 + * Licensed under BSD
     4.8 + */
     4.9 +
    4.10 +#include <stdio.h>
    4.11 +#include <stdlib.h>
    4.12 +#include <malloc.h>
    4.13 +
    4.14 +#include "VMS/VMS.h"
    4.15 +#include "VCilk.h"
    4.16 +#include "VMS/Queue_impl/PrivateQueue.h"
    4.17 +#include "VMS/Hash_impl/PrivateHash.h"
    4.18 +
    4.19 +
    4.20 +//==========================================================================
    4.21 +
    4.22 +void
    4.23 +VCilk__init();
    4.24 +
    4.25 +void
    4.26 +VCilk__init_Seq();
    4.27 +
    4.28 +void
    4.29 +VCilk__init_Helper();
    4.30 +//==========================================================================
    4.31 +
    4.32 +
    4.33 +/*TODO: Q: dealing with library f()s and DKU vs WT vs FoR
    4.34 + * (still want to do FoR, with time-lines as syntax, could be super cool)
    4.35 + * A: thinking pin the coreLoops for all of BLIS -- let Master arbitrate
    4.36 + * among library, DKU, WT, FoR -- all the patterns in terms of virtual
    4.37 + * processors (or equivalently work-units), so Master picks which virt procr
    4.38 + * from which portions of app (DKU, WT, FoR) onto which sched slots
    4.39 + *Might even do hierarchy of masters -- group of sched slots for each core
    4.40 + * has its own master, that keeps generated work local
    4.41 + * single-reader-single-writer sync everywhere -- no atomic primitives (but
    4.42 + * memory fences on architectures that need them)
    4.43 + * Might have the different schedulers talk to each other, to negotiate
    4.44 + * larger-grain sharing of resources, according to predicted critical
    4.45 + * path, and expansion of work
    4.46 + */
    4.47 +
    4.48 +
    4.49 +
    4.50 +//===========================================================================
    4.51 +
    4.52 +
    4.53 +/*These are the library functions *called in the application*
    4.54 + * 
    4.55 + *There's a pattern for the outside sequential code to interact with the
    4.56 + * VMS_HW code.
    4.57 + *The VMS_HW system is inside a boundary..  every VCilk system is in its
    4.58 + * own directory that contains the functions for each of the processor types.
    4.59 + * One of the processor types is the "seed" processor that starts the
    4.60 + * cascade of creating all the processors that do the work.
    4.61 + *So, in the directory is a file called "EntryPoint.c" that contains the
    4.62 + * function, named appropriately to the work performed, that the outside
    4.63 + * sequential code calls.  This function follows a pattern:
    4.64 + *1) it calls VCilk__init()
    4.65 + *2) it creates the initial data for the seed processor, which is passed
    4.66 + *    in to the function
    4.67 + *3) it creates the seed VCilk processor, with the data to start it with.
    4.68 + *4) it calls startVCilkThenWaitUntilWorkDone
    4.69 + *5) it gets the returnValue from the transfer struc and returns that
    4.70 + *    from the function
    4.71 + *
    4.72 + *For now, a new VCilk system has to be created via VCilk__init every
    4.73 + * time an entry point function is called -- later, might add letting the
    4.74 + * VCilk system be created once, and let all the entry points just reuse
    4.75 + * it -- want to be as simple as possible now, and see by using what makes
    4.76 + * sense for later..
    4.77 + */
    4.78 +
    4.79 +
    4.80 +
    4.81 +//===========================================================================
    4.82 +
    4.83 +/*This is the "border crossing" function -- the thing that crosses from the
    4.84 + * outside world, into the VMS_HW world.  It initializes and starts up the
    4.85 + * VMS system, then creates one processor from the specified function and
    4.86 + * puts it into the readyQ.  From that point, that one function is resp.
    4.87 + * for creating all the other processors, that then create others, and so
    4.88 + * forth.
    4.89 + *When all the processors, including the seed, have dissipated, then this
    4.90 + * function returns.  The results will have been written by side-effect via
    4.91 + * pointers read from, or written into initData.
    4.92 + *
    4.93 + *NOTE: no Threads should exist in the outside program that might touch
    4.94 + * any of the data reachable from initData passed in to here
    4.95 + */
    4.96 +void
    4.97 +VCilk__create_seed_procr_and_do_work( VirtProcrFnPtr fnPtr, void *initData )
    4.98 + { VCilkSemEnv *semEnv;
    4.99 +   VirtProcr *seedPr;
   4.100 +
   4.101 +   #ifdef SEQUENTIAL
   4.102 +   VCilk__init_Seq();  //debug sequential exe
   4.103 +   #else
   4.104 +   VCilk__init();      //normal multi-thd
   4.105 +   #endif
   4.106 +   semEnv = _VMSMasterEnv->semanticEnv;
   4.107 +
   4.108 +      //VCilk starts with one processor, which is put into initial environ,
   4.109 +      // and which then calls create() to create more, thereby expanding work
   4.110 +      //Note, have to use external version of VMS__create_procr because
   4.111 +      // internal version uses VMS__malloc, which hasn't been set up by here
   4.112 +   seedPr = VMS_ext__create_procr( fnPtr, initData );
   4.113 +   VCilkSemData *
   4.114 +   semanticData = malloc( sizeof(VCilkSemData) );
   4.115 +
   4.116 +   semanticData->numLiveChildren = 0;
   4.117 +   semanticData->parentPr        = NULL;
   4.118 +   semanticData->syncPending     = FALSE;
   4.119 +
   4.120 +   seedPr->semanticData = semanticData;
   4.121 +   seedPr->coreAnimatedBy = semEnv->nextCoreToGetNewPr++;
   4.122 +
   4.123 +   writePrivQ( seedPr, semEnv->readyVPQs[seedPr->coreAnimatedBy] );
   4.124 +   semEnv->numVirtPr = 1;
   4.125 +
   4.126 +   #ifdef SEQUENTIAL
   4.127 +   VMS__start_the_work_then_wait_until_done_Seq();  //debug sequential exe
   4.128 +   #else
   4.129 +   VMS__start_the_work_then_wait_until_done();      //normal multi-thd
   4.130 +   #endif
   4.131 +
   4.132 +   VCilk__cleanup_after_shutdown();
   4.133 + }
   4.134 +
   4.135 +
   4.136 +int32 inline
   4.137 +VCilk__giveMinWorkUnitCycles( float32 percentOverhead )
   4.138 + {
   4.139 +   return MIN_WORK_UNIT_CYCLES;
   4.140 + }
   4.141 +
   4.142 +int32
   4.143 +VCilk__giveIdealNumWorkUnits()
   4.144 + {
   4.145 +   return NUM_SCHED_SLOTS * NUM_CORES;
   4.146 + }
   4.147 +
   4.148 +/*To measure how long a primitive operation takes, when calculating number of
   4.149 + * sub-tasks to divide into.
   4.150 + * For now, use TSC -- later, make these two macros with assembly that first
   4.151 + * saves jump point, and second jumps back several times to get reliable time
   4.152 + */
   4.153 +void inline
   4.154 +VCilk__start_primitive()
   4.155 + { //int32 *saveAddr;
   4.156 +   //saveAddr = &(((VCilkSemEnv *)(_VMSMasterEnv->semanticEnv))->primitiveStartTime);
   4.157 +   saveLowTimeStampCountInto( (((VCilkSemEnv *)
   4.158 +                        (_VMSMasterEnv->semanticEnv))->primitiveStartTime) );
   4.159 + }
   4.160 +
   4.161 +/*Just quick and dirty for now -- make reliable later
   4.162 + * will want this to jump back several times -- to be sure cache is warm
   4.163 + * because don't want comm time included in calc-time measurement -- and
   4.164 + * also to throw out any "weird" values due to OS interrupt or TSC rollover
   4.165 + */
   4.166 +int32 inline
   4.167 +VCilk__end_primitive_and_give_cycles()
   4.168 + { int32 endTime, startTime;
   4.169 +   //TODO: fix by repeating time-measurement
   4.170 +   saveLowTimeStampCountInto( endTime );
   4.171 +   startTime = ((VCilkSemEnv *)(_VMSMasterEnv->semanticEnv))->primitiveStartTime;
   4.172 +   return (endTime - startTime);
   4.173 + }
   4.174 +
   4.175 +//===========================================================================
   4.176 +//
   4.177 +/*Initializes all the data-structures for a VCilk system -- but doesn't
   4.178 + * start it running yet!
   4.179 + *
   4.180 + *This and its callees run in main thread outside VMS
   4.181 + * 
   4.182 + *This sets up the semantic layer over the VMS system
   4.183 + *
   4.184 + *First, calls VMS_Setup, then creates own environment, making it ready
   4.185 + * for creating the seed processor and then starting the work.
   4.186 + */
   4.187 +void
   4.188 +VCilk__init()
   4.189 + {
   4.190 +   VMS__init();
   4.191 +      //masterEnv, a global var, now is partially set up by init_VMS
   4.192 +
   4.193 +   VCilk__init_Helper();
   4.194 + }
   4.195 +
   4.196 +void
   4.197 +VCilk__init_Seq()
   4.198 + {
   4.199 +   VMS__init_Seq();
   4.200 +      //masterEnv, a global var, now is partially set up by init_VMS
   4.201 +
   4.202 +   VCilk__init_Helper();
   4.203 + }
   4.204 +
   4.205 +/*Runs in main thread before VMS system starts
   4.206 + */
   4.207 +void
   4.208 +VCilk__init_Helper()
   4.209 + { VCilkSemEnv     *semanticEnv;
   4.210 +   PrivQueueStruc **readyVPQs;
   4.211 +   int              coreIdx;
   4.212 + 
   4.213 +      //Hook up the semantic layer's plug-ins to the Master virt procr
   4.214 +   _VMSMasterEnv->requestHandler = &VCilk__Request_Handler;
   4.215 +   _VMSMasterEnv->slaveScheduler = &VCilk__schedule_virt_procr;
   4.216 +
   4.217 +      //create the semantic layer's environment (all its data) and add to
   4.218 +      // the master environment
   4.219 +   semanticEnv = malloc( sizeof( VCilkSemEnv ) );
   4.220 +   _VMSMasterEnv->semanticEnv = semanticEnv;
   4.221 +
   4.222 +      //create the ready queue, hash tables used for pairing send to receive
   4.223 +      // and so forth
   4.224 +      //TODO: add hash tables for pairing sends with receives, and
   4.225 +      // initialize the data ownership system
   4.226 +   readyVPQs = malloc( NUM_CORES * sizeof(PrivQueueStruc *) );
   4.227 +
   4.228 +   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
   4.229 +    {
   4.230 +      readyVPQs[ coreIdx ] = makePrivQ();
   4.231 +    }
   4.232 +   
   4.233 +   semanticEnv->readyVPQs = readyVPQs;
   4.234 +   
   4.235 +   semanticEnv->nextCoreToGetNewPr = 0;
   4.236 + }
   4.237 +
   4.238 +
   4.239 +/*Runs in main thread, outside VMS
   4.240 + *Frees any memory allocated by VCilk__init() then calls VMS's cleanup
   4.241 + */
   4.242 +void
   4.243 +VCilk__cleanup_after_shutdown()
   4.244 + { VCilkSemEnv *semanticEnv;
   4.245 +   int coreIdx;
   4.246 + 
   4.247 +   semanticEnv = _VMSMasterEnv->semanticEnv;
   4.248 +
   4.249 +//TODO: double check all sem env locations freed
   4.250 +
   4.251 +   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
   4.252 +    {
   4.253 +      free( semanticEnv->readyVPQs[coreIdx]->startOfData );
   4.254 +      free( semanticEnv->readyVPQs[coreIdx] );
   4.255 +    }
   4.256 +   free( semanticEnv->readyVPQs );
   4.257 +   
   4.258 +   free( _VMSMasterEnv->semanticEnv );
   4.259 +   VMS__cleanup_after_shutdown();
   4.260 + }
   4.261 +
   4.262 +
   4.263 +//===========================================================================
   4.264 +
   4.265 +
   4.266 +/*Spawn involves allocating mem as well as creating processor which itself
   4.267 + * allocates, so has to be done inside master
   4.268 + */
   4.269 +void inline
   4.270 +VCilk__spawn( int32  coreToSpawnOnto, VirtProcrFnPtr  fnPtr,
   4.271 +              void  *initData,        VirtProcr      *requestingPr )
   4.272 + { VCilkSemReq reqData;
   4.273 +
   4.274 +      //the semantic request data is on the stack and disappears when this
   4.275 +      // call returns -- it's guaranteed to remain in the VP's stack for as
   4.276 +      // long as the VP is suspended.
   4.277 +   reqData.reqType         = 0; //know it's type because in a VMS create req
   4.278 +   reqData.coreToSpawnOnto = coreToSpawnOnto;
   4.279 +   reqData.fnPtr           = fnPtr;
   4.280 +   reqData.initData        = initData;
   4.281 +   reqData.requestingPr    = requestingPr;
   4.282 +
   4.283 +   VMS__send_create_procr_req( &reqData, requestingPr );
   4.284 + } 
   4.285 +
   4.286 +
   4.287 +int32
   4.288 +VCilk__give_number_of_cores_to_spawn_onto()
   4.289 + {
   4.290 +   return NUM_CORES;
   4.291 + }
   4.292 +
   4.293 +
   4.294 +
   4.295 +/*This runs inside slave VP, so can't do any freeing -- have to do in plugin
   4.296 + */
   4.297 +void inline
   4.298 +VCilk__dissipate_procr( VirtProcr *procrToDissipate )
   4.299 + { 
   4.300 +
   4.301 +   VMS__dissipate_procr( procrToDissipate );
   4.302 + }
   4.303 +
   4.304 +//===========================================================================
   4.305 +
   4.306 +void
   4.307 +VCilk__sync( VirtProcr *animPr )
   4.308 + { VCilkSemReq reqData;
   4.309 + 
   4.310 +   reqData.reqType      = syncReq;
   4.311 +   reqData.requestingPr = animPr;
   4.312 +
   4.313 +   VMS__send_sem_request( &reqData, animPr );
   4.314 + }
   4.315 +
   4.316 +
   4.317 +
   4.318 +void *
   4.319 +VCilk__malloc( int32 sizeToMalloc, VirtProcr *animPr )
   4.320 + { VCilkSemReq reqData;
   4.321 +
   4.322 +   reqData.reqType      = mallocReq;
   4.323 +   reqData.requestingPr = animPr;
   4.324 +   reqData.sizeToMalloc = sizeToMalloc;
   4.325 +
   4.326 +   VMS__send_sem_request( &reqData, animPr );
   4.327 +
   4.328 +   return animPr->dataReturnedFromReq;
   4.329 + }
   4.330 +
   4.331 +
   4.332 +/*Sends request to Master, which does the work of freeing
   4.333 + */
   4.334 +void
   4.335 +VCilk__free( void *ptrToFree, VirtProcr *animPr )
   4.336 + { VCilkSemReq reqData;
   4.337 +
   4.338 +   reqData.reqType      = freeReq;
   4.339 +   reqData.requestingPr = animPr;
   4.340 +   reqData.ptrToFree    = ptrToFree;
   4.341 +
   4.342 +   VMS__send_sem_request( &reqData, animPr );
   4.343 + }
   4.344 +