# HG changeset patch # User Me # Date 1288497100 25200 # Node ID b456b67cddd03b8344e74dabdea3d46b304ceba4 Initial add -- works, with vmalloc + probes version of VMS diff -r 000000000000 -r b456b67cddd0 VCilk.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCilk.h Sat Oct 30 20:51:40 2010 -0700 @@ -0,0 +1,121 @@ +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + */ + +#ifndef _VCilk_H +#define _VCilk_H + +#include "VMS/Queue_impl/PrivateQueue.h" +#include "VMS/Hash_impl/PrivateHash.h" +#include "VMS/VMS.h" + +/*This header defines everything specific to the VCilk semantic plug-in + */ +typedef struct _VCilkSemReq VCilkSemReq; + + +/*Semantic layer-specific data sent inside a request from lib called in app + * to request handler called in MasterLoop + */ +enum VCilkReqType + { + syncReq = 1, + mallocReq, + freeReq + }; + +struct _VCilkSemReq + { enum VCilkReqType reqType; + VirtProcr *requestingPr; + int32 sizeToMalloc; + void *ptrToFree; + VirtProcrFnPtr fnPtr; + void *initData; + int32 coreToSpawnOnto; + } +/* VCilkSemReq */; + +typedef struct + { + PrivQueueStruc **readyVPQs; + HashTable *commHashTbl; + int32 numVirtPr; + int32 nextCoreToGetNewPr; + int32 primitiveStartTime; + } +VCilkSemEnv; + +typedef struct + { + int32 syncPending; + int32 numLiveChildren; + VirtProcr *parentPr; + } +VCilkSemData; + +//=========================================================================== + +void +VCilk__create_seed_procr_and_do_work( VirtProcrFnPtr fn, void *initData ); + +int32 +VCilk__giveMinWorkUnitCycles( float32 percentOverhead ); + +void inline +VCilk__start_primitive(); + +int32 inline +VCilk__end_primitive_and_give_cycles(); + +int32 +VCilk__giveIdealNumWorkUnits(); + +//======================= + +void +VCilk__init(); + +void +VCilk__cleanup_after_shutdown(); + +//======================= + +void inline +VCilk__spawn( int32 coreToSpawnOnto, VirtProcrFnPtr fnPtr, + void *initData, VirtProcr *creatingPr ); + +int32 +VCilk__give_number_of_cores_to_spawn_onto(); + +void +VCilk__sync( VirtProcr *animatingPr ); + +void * +VCilk__malloc( int32 sizeToMalloc, VirtProcr *animPr ); + +void +VCilk__free( void *ptrToFree, VirtProcr *animPr ); + +void +VCilk__dissipate_procr( VirtProcr *procrToDissipate ); + +//======================= + +void +VCilk__free_semantic_request( VCilkSemReq *semReq ); + + +//========================= Internal use only ============================= +void +VCilk__Request_Handler( VirtProcr *requestingPr, void *_semEnv ); + +VirtProcr * +VCilk__schedule_virt_procr( void *_semEnv, int coreNum ); + + +#endif /* _VCilk_H */ + diff -r 000000000000 -r b456b67cddd0 VCilk_PluginFns.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCilk_PluginFns.c Sat Oct 30 20:51:40 2010 -0700 @@ -0,0 +1,285 @@ +/* + * Copyright 2010 OpenSourceCodeStewardshipFoundation + * + * Licensed under BSD + */ + +#include +#include +#include + +#include "VMS/Queue_impl/PrivateQueue.h" +#include "VCilk.h" + + + +//=========================================================================== +void inline +handleSync( VirtProcr *requestingPr, VCilkSemEnv *semEnv ); + +void inline +handleMalloc( VCilkSemReq *semReq, VirtProcr *requestingPr, + VCilkSemEnv *semEnv ); +void inline +handleFree( VCilkSemReq *semReq, VirtProcr *requestingPr, + VCilkSemEnv *semEnv ); +void inline +handleDissipate( VirtProcr *requestingPr, VCilkSemEnv *semEnv ); + +void inline +handleSpawn( VCilkSemReq *semReq, VirtProcr *requestingPr, + VCilkSemEnv *semEnv ); + +void inline +dispatchSemReq( VCilkSemReq *semReq, VirtProcr *requestingPr, + VCilkSemEnv *semEnv ); + +void inline +resumePr( VirtProcr *procr, VCilkSemEnv *semEnv ); + +//=========================================================================== + + +/*Will get requests to send, to receive, and to create new processors. + * Upon send, check the hash to see if a receive is waiting. + * Upon receive, check hash to see if a send has already happened. + * When other is not there, put in. When other is there, the comm. + * completes, which means the receiver P gets scheduled and + * picks up right after the receive request. So make the work-unit + * and put it into the queue of work-units ready to go. + * Other request is create a new Processor, with the function to run in the + * Processor, and initial data. + */ +void +VCilk__Request_Handler( VirtProcr *requestingPr, void *_semEnv ) + { VCilkSemEnv *semEnv; + VMSReqst *req; + VCilkSemReq *semReq; + + semEnv = (VCilkSemEnv *)_semEnv; + + req = VMS__take_next_request_out_of( requestingPr ); + + while( req != NULL ) + { + switch( req->reqType ) + { case semantic: dispatchSemReq( VMS__take_sem_reqst_from(req), + requestingPr, semEnv ); + break; + case createReq: //create request has to come as a VMS request, + // to allow MasterLoop to do stuff before gets + // here, and maybe also stuff after all requests + // done -- however, can still attach semantic + // req data to req. + semReq = VMS__take_sem_reqst_from( req ); + handleSpawn( semReq, requestingPr, semEnv ); + break; + case dissipate: handleDissipate( requestingPr, semEnv ); + break; + case VMSSemantic: VMS__handle_VMSSemReq(req, requestingPr, semEnv, + &resumePr ); + break; + default: + break; + } + + DoneHandlingReqst: + + req = VMS__take_next_request_out_of( requestingPr ); + } //while( req != NULL ) + + } + +void inline +dispatchSemReq( VCilkSemReq *semReq, VirtProcr *requestingPr, + VCilkSemEnv *semEnv ) + { + if( semReq == NULL ) return; + switch( semReq->reqType ) + { + case syncReq: handleSync( requestingPr, semEnv ); + break; + case mallocReq: handleMalloc( semReq, requestingPr, semEnv ); + break; + case freeReq: handleFree( semReq, requestingPr, semEnv ); + break; + } + //NOTE: semantic request data strucs allocated on stack in VCilk Lib calls + } + + +//============================== Scheduler ================================== + + +/*For VCilk, scheduling a slave simply takes the next work-unit off the + * ready-to-go work-unit queue and assigns it to the slaveToSched. + *If the ready-to-go work-unit queue is empty, then nothing to schedule + * to the slave -- return FALSE to let Master loop know scheduling that + * slave failed. + */ +VirtProcr * +VCilk__schedule_virt_procr( void *_semEnv, int coreNum ) + { VirtProcr *schedPr; + VCilkSemEnv *semEnv; + + semEnv = (VCilkSemEnv *)_semEnv; + + schedPr = readPrivQ( semEnv->readyVPQs[coreNum] ); + //Note, using a non-blocking queue -- it returns NULL if queue empty + + return( schedPr ); + } + + +//=========================== Request Handlers ============================== +void inline +resumePr( VirtProcr *procr, VCilkSemEnv *semEnv ) + { + writePrivQ( procr, semEnv->readyVPQs[ procr->coreAnimatedBy] ); + } + + + + +/* check if list of live children is empty. + * If yes, then resume. + * If no, then set sync-pending flag. + */ +void +handleSync( VirtProcr *requestingPr, VCilkSemEnv *semEnv ) + { + if(((VCilkSemData *)(requestingPr->semanticData))->numLiveChildren == 0 ) + { //no live children to wait for + resumePr( requestingPr, semEnv ); + } + else + { + ((VCilkSemData *)(requestingPr->semanticData))->syncPending = TRUE; + } + } + +/* + */ +void +handleMalloc( VCilkSemReq *semReq, VirtProcr *requestingPr, + VCilkSemEnv *semEnv ) + { void *ptr; + + ptr = VMS__malloc( semReq->sizeToMalloc ); + requestingPr->dataReturnedFromReq = ptr; + resumePr( requestingPr, semEnv ); + } + +/* + */ +void inline +handleFree( VCilkSemReq *semReq, VirtProcr *requestingPr, + VCilkSemEnv *semEnv ) + { + VMS__free( semReq->ptrToFree ); + resumePr( requestingPr, semEnv ); + } + + + + +/* + */ +void inline +handleSpawn( VCilkSemReq *semReq, VirtProcr *requestingPr, + VCilkSemEnv *semEnv ) + { + VirtProcr *newPr; + VCilkSemData *semanticData; + + //This is running in master, so use internal version + newPr = VMS__create_procr( semReq->fnPtr, semReq->initData ); + + semanticData = VMS__malloc( sizeof(VCilkSemData) ); + + semanticData->numLiveChildren = 0; + semanticData->parentPr = NULL; + semanticData->syncPending = FALSE; + + newPr->semanticData = semanticData; + + /* add newly created to the list of live children of requester. + * In newly created, add pointer to VP requesting, as the parentVP + */ + ((VCilkSemData *)(requestingPr->semanticData))->numLiveChildren +=1; + ((VCilkSemData *)(newPr->semanticData))->parentPr = requestingPr; + + semEnv->numVirtPr += 1; + + //Assign new processor to a core & transition it to ready + #ifdef SEQUENTIAL + newPr->coreAnimatedBy = 0; + + #else + int32 + coreToSpawnOnto = semReq->coreToSpawnOnto; + + if(coreToSpawnOnto < 0 || coreToSpawnOnto >= NUM_CORES ) + { //out-of-range, so round-robin assignment + newPr->coreAnimatedBy = semEnv->nextCoreToGetNewPr; + if( semEnv->nextCoreToGetNewPr >= NUM_CORES - 1 ) + semEnv->nextCoreToGetNewPr = 0; + else + semEnv->nextCoreToGetNewPr += 1; + } + else //core num in-range, so use it + { newPr->coreAnimatedBy = coreToSpawnOnto; + } + #endif + + resumePr( newPr, semEnv ); + resumePr( requestingPr, semEnv ); + } + + +/*get parentVP & remove dissipator from parent's live children. + *If this was last live child, check "sync pending" flag + *-- if set, then resume the parentVP. + */ +void inline +handleDissipate( VirtProcr *requestingPr, VCilkSemEnv *semEnv ) + { + VirtProcr * + parentPr = ((VCilkSemData *) + (requestingPr->semanticData))->parentPr; + if( parentPr == NULL ) //means this is seed processor being dissipated + { //Just act normally, except don't deal with parent + // VMS__Free is implemented to ignore requests to free data from + // outside VMS, so all this processor's non-VMS allocated data will + // remain and be cleaned up outside + } + else + { + ((VCilkSemData *)(parentPr->semanticData))->numLiveChildren -= 1; + if( ((VCilkSemData *) + (parentPr->semanticData))->numLiveChildren <= 0 ) + { //this was last live child of parent + if( ((VCilkSemData *) + (parentPr->semanticData))->syncPending == TRUE ) + { //was waiting for last child to dissipate, so resume it + ((VCilkSemData *) + (parentPr->semanticData))->syncPending = FALSE; + resumePr( parentPr, semEnv ); + } + } + } + + VMS__free( requestingPr->semanticData ); + + //Now do normal dissipate + + //call VMS to free_all AppVP state -- stack and so on + VMS__handle_dissipate_reqst( requestingPr ); + + semEnv->numVirtPr -= 1; + if( semEnv->numVirtPr == 0 ) + { //no more work, so shutdown + VMS__handle_shutdown_reqst( requestingPr ); + } + } + diff -r 000000000000 -r b456b67cddd0 VCilk__DESIGN_NOTES.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCilk__DESIGN_NOTES.txt Sat Oct 30 20:51:40 2010 -0700 @@ -0,0 +1,28 @@ + + + + the design: + +Only has the spawn and sync calls, nothing else. + +For spawn, creates a new VP + +For sync, waits for all VPs created by itself to dissipate. + +To implement these, in request handler: +For spawn: + create new virtual processor + In requester, add newly created to the list of live children + In newly created, add pointer to requester, as the parentVP + +For Dissipate: + remove dissipator from its parent's list of live children. + If this was last in list, check "sync pending" flag + -- if set, then resume the parentVP. + +For Sync: + check if list of live of children is empty. + If yes, then resume. + If no, then set sync-pending flag and remain suspended + +That's it. Quick and simple, diff -r 000000000000 -r b456b67cddd0 VCilk_lib.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCilk_lib.c Sat Oct 30 20:51:40 2010 -0700 @@ -0,0 +1,341 @@ +/* + * Copyright 2010 OpenSourceCodeStewardshipFoundation + * + * Licensed under BSD + */ + +#include +#include +#include + +#include "VMS/VMS.h" +#include "VCilk.h" +#include "VMS/Queue_impl/PrivateQueue.h" +#include "VMS/Hash_impl/PrivateHash.h" + + +//========================================================================== + +void +VCilk__init(); + +void +VCilk__init_Seq(); + +void +VCilk__init_Helper(); +//========================================================================== + + +/*TODO: Q: dealing with library f()s and DKU vs WT vs FoR + * (still want to do FoR, with time-lines as syntax, could be super cool) + * A: thinking pin the coreLoops for all of BLIS -- let Master arbitrate + * among library, DKU, WT, FoR -- all the patterns in terms of virtual + * processors (or equivalently work-units), so Master picks which virt procr + * from which portions of app (DKU, WT, FoR) onto which sched slots + *Might even do hierarchy of masters -- group of sched slots for each core + * has its own master, that keeps generated work local + * single-reader-single-writer sync everywhere -- no atomic primitives (but + * memory fences on architectures that need them) + * Might have the different schedulers talk to each other, to negotiate + * larger-grain sharing of resources, according to predicted critical + * path, and expansion of work + */ + + + +//=========================================================================== + + +/*These are the library functions *called in the application* + * + *There's a pattern for the outside sequential code to interact with the + * VMS_HW code. + *The VMS_HW system is inside a boundary.. every VCilk system is in its + * own directory that contains the functions for each of the processor types. + * One of the processor types is the "seed" processor that starts the + * cascade of creating all the processors that do the work. + *So, in the directory is a file called "EntryPoint.c" that contains the + * function, named appropriately to the work performed, that the outside + * sequential code calls. This function follows a pattern: + *1) it calls VCilk__init() + *2) it creates the initial data for the seed processor, which is passed + * in to the function + *3) it creates the seed VCilk processor, with the data to start it with. + *4) it calls startVCilkThenWaitUntilWorkDone + *5) it gets the returnValue from the transfer struc and returns that + * from the function + * + *For now, a new VCilk system has to be created via VCilk__init every + * time an entry point function is called -- later, might add letting the + * VCilk system be created once, and let all the entry points just reuse + * it -- want to be as simple as possible now, and see by using what makes + * sense for later.. + */ + + + +//=========================================================================== + +/*This is the "border crossing" function -- the thing that crosses from the + * outside world, into the VMS_HW world. It initializes and starts up the + * VMS system, then creates one processor from the specified function and + * puts it into the readyQ. From that point, that one function is resp. + * for creating all the other processors, that then create others, and so + * forth. + *When all the processors, including the seed, have dissipated, then this + * function returns. The results will have been written by side-effect via + * pointers read from, or written into initData. + * + *NOTE: no Threads should exist in the outside program that might touch + * any of the data reachable from initData passed in to here + */ +void +VCilk__create_seed_procr_and_do_work( VirtProcrFnPtr fnPtr, void *initData ) + { VCilkSemEnv *semEnv; + VirtProcr *seedPr; + + #ifdef SEQUENTIAL + VCilk__init_Seq(); //debug sequential exe + #else + VCilk__init(); //normal multi-thd + #endif + semEnv = _VMSMasterEnv->semanticEnv; + + //VCilk starts with one processor, which is put into initial environ, + // and which then calls create() to create more, thereby expanding work + //Note, have to use external version of VMS__create_procr because + // internal version uses VMS__malloc, which hasn't been set up by here + seedPr = VMS_ext__create_procr( fnPtr, initData ); + VCilkSemData * + semanticData = malloc( sizeof(VCilkSemData) ); + + semanticData->numLiveChildren = 0; + semanticData->parentPr = NULL; + semanticData->syncPending = FALSE; + + seedPr->semanticData = semanticData; + seedPr->coreAnimatedBy = semEnv->nextCoreToGetNewPr++; + + writePrivQ( seedPr, semEnv->readyVPQs[seedPr->coreAnimatedBy] ); + semEnv->numVirtPr = 1; + + #ifdef SEQUENTIAL + VMS__start_the_work_then_wait_until_done_Seq(); //debug sequential exe + #else + VMS__start_the_work_then_wait_until_done(); //normal multi-thd + #endif + + VCilk__cleanup_after_shutdown(); + } + + +int32 inline +VCilk__giveMinWorkUnitCycles( float32 percentOverhead ) + { + return MIN_WORK_UNIT_CYCLES; + } + +int32 +VCilk__giveIdealNumWorkUnits() + { + return NUM_SCHED_SLOTS * NUM_CORES; + } + +/*To measure how long a primitive operation takes, when calculating number of + * sub-tasks to divide into. + * For now, use TSC -- later, make these two macros with assembly that first + * saves jump point, and second jumps back several times to get reliable time + */ +void inline +VCilk__start_primitive() + { //int32 *saveAddr; + //saveAddr = &(((VCilkSemEnv *)(_VMSMasterEnv->semanticEnv))->primitiveStartTime); + saveLowTimeStampCountInto( (((VCilkSemEnv *) + (_VMSMasterEnv->semanticEnv))->primitiveStartTime) ); + } + +/*Just quick and dirty for now -- make reliable later + * will want this to jump back several times -- to be sure cache is warm + * because don't want comm time included in calc-time measurement -- and + * also to throw out any "weird" values due to OS interrupt or TSC rollover + */ +int32 inline +VCilk__end_primitive_and_give_cycles() + { int32 endTime, startTime; + //TODO: fix by repeating time-measurement + saveLowTimeStampCountInto( endTime ); + startTime = ((VCilkSemEnv *)(_VMSMasterEnv->semanticEnv))->primitiveStartTime; + return (endTime - startTime); + } + +//=========================================================================== +// +/*Initializes all the data-structures for a VCilk system -- but doesn't + * start it running yet! + * + *This and its callees run in main thread outside VMS + * + *This sets up the semantic layer over the VMS system + * + *First, calls VMS_Setup, then creates own environment, making it ready + * for creating the seed processor and then starting the work. + */ +void +VCilk__init() + { + VMS__init(); + //masterEnv, a global var, now is partially set up by init_VMS + + VCilk__init_Helper(); + } + +void +VCilk__init_Seq() + { + VMS__init_Seq(); + //masterEnv, a global var, now is partially set up by init_VMS + + VCilk__init_Helper(); + } + +/*Runs in main thread before VMS system starts + */ +void +VCilk__init_Helper() + { VCilkSemEnv *semanticEnv; + PrivQueueStruc **readyVPQs; + int coreIdx; + + //Hook up the semantic layer's plug-ins to the Master virt procr + _VMSMasterEnv->requestHandler = &VCilk__Request_Handler; + _VMSMasterEnv->slaveScheduler = &VCilk__schedule_virt_procr; + + //create the semantic layer's environment (all its data) and add to + // the master environment + semanticEnv = malloc( sizeof( VCilkSemEnv ) ); + _VMSMasterEnv->semanticEnv = semanticEnv; + + //create the ready queue, hash tables used for pairing send to receive + // and so forth + //TODO: add hash tables for pairing sends with receives, and + // initialize the data ownership system + readyVPQs = malloc( NUM_CORES * sizeof(PrivQueueStruc *) ); + + for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) + { + readyVPQs[ coreIdx ] = makePrivQ(); + } + + semanticEnv->readyVPQs = readyVPQs; + + semanticEnv->nextCoreToGetNewPr = 0; + } + + +/*Runs in main thread, outside VMS + *Frees any memory allocated by VCilk__init() then calls VMS's cleanup + */ +void +VCilk__cleanup_after_shutdown() + { VCilkSemEnv *semanticEnv; + int coreIdx; + + semanticEnv = _VMSMasterEnv->semanticEnv; + +//TODO: double check all sem env locations freed + + for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) + { + free( semanticEnv->readyVPQs[coreIdx]->startOfData ); + free( semanticEnv->readyVPQs[coreIdx] ); + } + free( semanticEnv->readyVPQs ); + + free( _VMSMasterEnv->semanticEnv ); + VMS__cleanup_after_shutdown(); + } + + +//=========================================================================== + + +/*Spawn involves allocating mem as well as creating processor which itself + * allocates, so has to be done inside master + */ +void inline +VCilk__spawn( int32 coreToSpawnOnto, VirtProcrFnPtr fnPtr, + void *initData, VirtProcr *requestingPr ) + { VCilkSemReq reqData; + + //the semantic request data is on the stack and disappears when this + // call returns -- it's guaranteed to remain in the VP's stack for as + // long as the VP is suspended. + reqData.reqType = 0; //know it's type because in a VMS create req + reqData.coreToSpawnOnto = coreToSpawnOnto; + reqData.fnPtr = fnPtr; + reqData.initData = initData; + reqData.requestingPr = requestingPr; + + VMS__send_create_procr_req( &reqData, requestingPr ); + } + + +int32 +VCilk__give_number_of_cores_to_spawn_onto() + { + return NUM_CORES; + } + + + +/*This runs inside slave VP, so can't do any freeing -- have to do in plugin + */ +void inline +VCilk__dissipate_procr( VirtProcr *procrToDissipate ) + { + + VMS__dissipate_procr( procrToDissipate ); + } + +//=========================================================================== + +void +VCilk__sync( VirtProcr *animPr ) + { VCilkSemReq reqData; + + reqData.reqType = syncReq; + reqData.requestingPr = animPr; + + VMS__send_sem_request( &reqData, animPr ); + } + + + +void * +VCilk__malloc( int32 sizeToMalloc, VirtProcr *animPr ) + { VCilkSemReq reqData; + + reqData.reqType = mallocReq; + reqData.requestingPr = animPr; + reqData.sizeToMalloc = sizeToMalloc; + + VMS__send_sem_request( &reqData, animPr ); + + return animPr->dataReturnedFromReq; + } + + +/*Sends request to Master, which does the work of freeing + */ +void +VCilk__free( void *ptrToFree, VirtProcr *animPr ) + { VCilkSemReq reqData; + + reqData.reqType = freeReq; + reqData.requestingPr = animPr; + reqData.ptrToFree = ptrToFree; + + VMS__send_sem_request( &reqData, animPr ); + } +