# HG changeset patch # User Me # Date 1274582278 25200 # Node ID a5fe730dfc2ec53fc99b446f7ee59fa54e248cea Initial add -- for sourceforge repositories diff -r 000000000000 -r a5fe730dfc2e CoreLoop.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CoreLoop.c Sat May 22 19:37:58 2010 -0700 @@ -0,0 +1,62 @@ +/* + * Copyright 2010 OpenSourceCodeStewardshipFoundation + * + * Licensed under BSD + */ + + + + + +#include "VMS.h" +#include "Queue_impl/BlockingQueue.h" + +#include +#include + + +/*This is the loop that runs in the PThread pinned to each core + * get work-unit struc from queue, + * call function-ptr, passing it pointer to data + * transfer return value to slave's "requests" pointer + * write the slave's "Done" flag and repeat. + */ +//pthread_create requires ptr to func that takes void * and returns void * +void * coreLoop( void *paramsIn ) + { time_t startTime, endTime, timeToExecute; + WorkUnit *currWorkUnit; + foobar *workFn; + SlaveReqst *requestsFromSlave; + + ThdParams *thdParams; + QueueStruc *workQ; + + // Get the communication queues out of the param passed in + thdParams = (ThdParams *)paramsIn; + + workQ = thdParams -> workQ; + + // Get to work! + while( TRUE ) + { + // get work-unit struc from queue + currWorkUnit = (WorkUnit *) readQ( workQ ); + workFn = currWorkUnit->ptrToWorkFunc; + + time(&startTime); //put time at call into var + + // call function-ptr, passing it pointer to data + requestsFromSlave = + (*workFn)( currWorkUnit->workData ); + + time(&endTime); + timeToExecute = endTime - startTime; + + printf( "timeToComputePiece: %s", ctime(&timeToExecute) ); + + // transfer return value to slave's "requests" pointer + currWorkUnit->slaveAssignedTo->requestsToMaster = requestsFromSlave; + // write the slave's "Done" flag and repeat. + currWorkUnit->slaveAssignedTo->doneFlag = TRUE; + } + } diff -r 000000000000 -r a5fe730dfc2e DESIGN_NOTES.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESIGN_NOTES.txt Sat May 22 19:37:58 2010 -0700 @@ -0,0 +1,40 @@ + +This project is for a paper -- the goal is to implement and measure the + performance of the Virtual Master-Slave system as an alternative to the + Thread model. + +Going to implement exactly one application -- a do-nothing benchmark kernel + that just measures scheduling overhead. + +Implement VMS this way: + +function to give to PThread that does the loop: + +get from queue, call function-ptr, passing it work-unit ptr (which contains a + pointer to data declared as void * in the application), then write the + "Done" flag and repeat. + + +In an application: + +divide work up into pieces self, just make it a busy-wait loop that writes + data, then reads it back, declared volatile. At end of work, call fn that + makes a continuation and notifies the Master its there, then return. + + +Master: + +A loop that polls each virtual slave exactly once, processing each whose + Done flag is set, it forces work-units to be one-to-one with slaves, so + as soon as done polling slaves, it makes a continuation of itself, puts + half the scheduled slaves in, clears its own Done flag, puts its + continuation in, the other half of the scheduled slaves, then writes its + own Done flag. + +When continuation first runs, checks if own Done flag set -- if not, busy- + waits until set, then proceeds. This ensures it doesn't overlap with + tail-end of previous -- IE, continuation may sneak through queue before + previous done putting second half of scheduled slaves in. This is the only + race condition. + + diff -r 000000000000 -r a5fe730dfc2e MasterLoop.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MasterLoop.c Sat May 22 19:37:58 2010 -0700 @@ -0,0 +1,113 @@ +/* + * Copyright 2010 OpenSourceCodeStewardshipFoundation + * + * Licensed under BSD + */ + + + +#include +#include + +#include "VMS.h" + + + +/*This code is animated by the virtual Master processor. + *Note, it is animated on a different level in virtual processor hierarchy + * than the CoreLoop -- this is the code pointed to in a work-unit that the + * coreLoop jumps to + * + *Polls each virtual slave exactly once, hands any requests made by the slave + * to the "request handler" plug-in function + * + *Any slaves that have no work-unit assigned are given to the "schedule" + * plug-in function, which tries to assign a work-unit to it. + * + *When all slaves that need work-units have been given to the schedule plug-in, + * half of the ones that were successfully scheduled are put into the work + * queue, then a continuation of this function is put in, then the rest of the + * slaves that were successfully scheduled. + * + *The first thing this function does is busy-wait until the previous work-unit + * running this function is done. This ensures it doesn't overlap with + * tail-end of previous -- IE, continuation may sneak through queue before + * previous done putting second half of scheduled slaves in. This is the only + * race condition. + * + */ + +void masterLoop( void *data ) + { bool8 success; + int slaveIdx, numScheduled, numInFirstHalf, schedSlaveIdx; + VMSProcr currSlave, *virtSlaves; + MasterEnv *masterEnv; + SlaveScheduler slaveScheduler; + RequestHandler requestHandler; + + + masterEnv = (MasterEnv *)data; + + requestHandler = masterEnv->requestHandler; + slaveScheduler = masterEnv->slaveScheduler; + virtSlaves = masterEnv->virtSlaves; + + //if another continuation of Master still running, busy-wait + while( masterEnv->stillRunning ) /*busy wait*/ ; + + //this is the only master running now, set flag again + masterEnv->stillRunning = 1; + + //prepare for scheduling + masterEnv->numScheduled = 0; + + //Poll each slave structure's Done flag + for( slaveIdx = 0; slaveIdx < NUM_SLAVES; slaveIdx++) + { + currSlave = virtSlaves[ slaveIdx ]; + + if( currSlave->workIsDone ) + { + currSlave->workIsDone = FALSE; + currSlave->needsWorkAssigned = TRUE; + + //process requests from slave to master + (*requestHandler)( currSlave ); + } + if( currSlave->needsWorkAssigned ) + { //give slave a new work-unit + success = + (*slaveScheduler)( currSlave, masterEnv ); + + if( success ) + { addToVect( currSlave, &(masterEnv->scheduledSlaves), + &(masterEnv->numScheduled) ); + currSlave->needsWorkAssigned = FALSE; + } + } + } + + //put half scheduled slaves in, then continuation, then other half + VMSProcr **scheduledSlaves; + numInFirstHalf = masterEnv->numScheduled / 2; + scheduledSlaves = masterEnv->scheduledSlaves; + for( schedSlaveIdx = 0; schedSlaveIdx < numInFirstHalf; schedSlaveIdx++) + { + writeQ( scheduledSlaves[ schedSlaveIdx ], workQ ); + } + + //enqueue continuation of this loop + // note that After this enqueue, continuation might sneak through + writeQ( masterEnv->masterWorkUnit, workQ ); + for( schedSlaveIdx = numInFirstHalf; + schedSlaveIdx < numScheduled; + schedSlaveIdx++) + { + writeQ( scheduledSlaves[ schedSlaveIdx ]->workUnitToDo, workQ ); + } + + //all done, so okay for continuation to proceed + masterEnv->stillRunning = 0; + } + + diff -r 000000000000 -r a5fe730dfc2e VMS.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VMS.c Sat May 22 19:37:58 2010 -0700 @@ -0,0 +1,176 @@ +/* + * Copyright 2010 OpenSourceCodeStewardshipFoundation + * + * Licensed under BSD + */ + +#include +#include +#include + +#include "VMS.h" +#include "Queue_impl/BlockingQueue.h" + + +/*Setup has two phases: + * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts + * the master work-unit into the work-queue + * 2) Semantic layer then does its own init, which creates the initial + * work-units inside the semantic layer, ready to schedule them when + * asked by the first run of the masterLoop. + * + *This part is bit weird because VMS really wants to be "always there", and + * have applications attach and detach.. for now, this VMS is part of + * the app, so the VMS system starts up as part of running the app. + * + *The semantic layer is fully isolated from the VMS internasl by + * making the semantic layer setup into a state that it's ready with its + * initial work-units, ready to schedule them to slaves when the masterLoop + * asks. Without this pattern, the semantic layer's setup would + * have to modify slaves directly to assign the initial work-units, and put + * them into the workQ itself, breaking the isolation completely. + * + * + *The semantic layer creates the initial work-unit(s), and adds its + * own environment data to masterEnv, and fills in the pointers to + * the requestHandler and slaveScheduler plug-in functions + * + *This allocates VMS data structures, populates the master VMSProc, + * and master environment, and returns the master environment to the semantic + * layer. + */ + //Global vars are all inside VMS.h +MasterEnv * +init_VMS( ) + { + //Make the central work-queue + workQ = makeQ(); + + masterEnv = malloc( sizeof(MasterEnv) ); + + create_master( masterEnv ); + + create_slaves( masterEnv ); + + //When coreLoops start up, the first thing + writeQ( masterEnv->masterWorkUnit, workQ ); + } + + + +/*Fill up the virtual master data structure, which is already alloc'd in the + * masterEnv. + *The virtual Master is the same structure as a virtual slave, but it + * isn't in the array of virtual slaves. + * The reason it's the same structure is so that the coreLoop doesn't + * have to differentiate -- all work units are assigned to a VMSProcr, and + * the core loop treats them all the same way, whether it's the virtual + * master continuation or a slave's work-unit. + *Note: masterLoop is jumped into an back out of, so have to be careful with + * register usage and saving all persistent-across-calls state to masterEnv + */ +void +create_master( MasterEnv *masterEnv ) + { VMSProcr virtMaster; + + virtMaster = &(masterEnv->virtMaster); + virtMaster->workUnitToDo = malloc( sizeof( WorkUnit ) ); + virtMaster->workUnitToDo->workData = masterEnv; + //TODO: figure out call structure: what GCC will do with regs + // will jump to the masterLoop from the coreLoop -- what regs need + // saving, from before jump to after -- and what reg to put masterEnv + // pointer in when jump to masterLoop + virtMaster->workUnitToDo->addrToJumpTo = &masterLoop; + virtMaster->workUnitToDo->slaveAssignedTo = virtMaster; + } + +void +create_slaves( MasterEnv *masterEnv ) + { VMSProcr *virtSlaves; + int i; + + virtSlaves = masterEnv->virtSlaves; //TODO: make sure this is right + for( i = 0; i < NUM_SLAVES; i++ ) + { + //Set state to mean "everything done, schedule work to slave" + virtSlaves[i].workIsDone = FALSE; + virtSlaves[i].needsWorkAssigned = TRUE; + } + } + +/*Semantic layer calls this when it want the system to start running.. + * + *This creates the core loops, pins them to physical cores, gives them the + * pointer to the workQ, and starts them running. + */ + void +VMS__start() + { int retCode, coreIdx; + +//TODO: still just skeleton code -- figure out right way to do this + + //Create the PThread loops that take from work-queue, and start them + for( coreIdx=0; coreIdx < NUM_WORKERS; coreIdx++ ) + { + thdParams[coreIdx] = (ThdParams *)malloc( sizeof(ThdParams) ); + thdParams[coreIdx]->workQ = workQ; + thdParams[coreIdx]->id = coreIdx; + + //Now make and start thd.. the coreLoopThds entry + // has all the info needed to later stop the thread. + retCode = + pthread_create( &(coreLoopThds[coreIdx]), thdAttrs, &coreLoop, + (void *)(thdParams[coreIdx]) ); + if( retCode != 0 ) + { //error + printf("ERROR creating coreLoop %d, code: %d\n", coreIdx, retCode); + exit(-1); + } + + pinThdToCore( ); //figure out how to specify this.. + + startThd(); //look up PThread call to start the thread running, if it's + // not automatic + } + } + + /*there is a label inside this function -- save the addr of this label in + * the callingPr struc, as the pick-up point from which to start the next + * work-unit for that procr. If turns out have to save registers, then + * save them in the procr struc too. Then do assembly jump to the CoreLoop's + * "done with work-unit" label. The procr struc is in the request in the + * slave that animated the just-ended work-unit, so all the state is saved + * there, and will get passed along, inside the request handler, to the + * next work-unit for that procr. + */ +VMS__save_ret_and_jump_to_CoreLoop( callingPr ) + { + //TODO: figure out how to save the addr of a label into a mem loc + //NOTE: because resume pt is inside the VMS fn, it's always the same, no + // matter what the semantic layer is, no matter what semantic libr called. + callingPr->resumePt = &resumeNextWorkUnitPt; + save_processor_state_in( callingPr ); //save x86 regs, if GCC needs it to + coreLoopRetPt = callingPr->coreLoopRetPt; + //TODO: figure out how to do jump correctly -- target addr is constant + asm( jmp coreLoopRetPt ); + +resumeNextWorkUnitPt: + return; + } + + +/*The semantic virt procr is available in the request sent from the slave + * + * The request handler has to add the work-unit created to the semantic + * virtual processor the work-unit is a section of its time-line -- does this when create the + * work-unit -- means the procr data struc is available in the request sent + * from the slave, from which the new work-unit is generated.. + */ +VMS__add_request_to_slave( SlaveReqst req, VMSProcr callingPr ) + { VMSProcr slave; + slave = callingPr->workUnit->currSlave + req->nextRequest = callingPr->workUnit->currSlave->requests = req; + } + + + diff -r 000000000000 -r a5fe730dfc2e VMS.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VMS.h Sat May 22 19:37:58 2010 -0700 @@ -0,0 +1,105 @@ +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + */ + +#ifndef _VMS_H +#define _VMS_H + + +#include "VMS_primitive_data_types.h" +#include "Queue_impl/BlockingQueue.h" + +//This value is the number of hardware threads in the shared memory +// machine +#define NUM_WORKERS 4 +#define NUM_SLAVES 8 + +#define SUCCESS 0 + +#define thdAttrs NULL + +typedef struct WorkUnit WorkUnit; +typedef struct VMSProcr VMSProcr; +typedef struct SlaveReqst SlaveReqst; + +typedef bool8 (*SlaveScheduler) ( void * ); +typedef void (*RequestHandler) ( SlaveReqst * ); + +typedef struct + { + QueueStruc *workQ; + unsigned int id; + } +ThdParams; + +//This is application-level data of the scheduler that runs in the master +// virtual processor. This data is at a higher level than the slave data- +// struc, which is part of the virtualization infrastructure.. this +// MasterEnv sits on top of that level +typedef struct + { + VMSProcr virtSlaves[ NUM_SLAVES ]; + VMSProcr virtMaster; + + SlaveScheduler slaveScheduler; + RequestHandler requestHandler; + + int stillRunning; + WorkUnit *masterWorkUnit; + + VMSProcr **scheduledSlaves; + int numScheduled; + + void *OSEventStruc; + void *semanticEnv; + } +MasterEnv; + + +struct WorkUnit + { + VMSProcr *slaveAssignedTo; + void *addrToJumpTo; + void *workData; + + void *pluginSpecific; + }; + + +struct VMSProcr + { + WorkUnit *workUnitToDo; + SlaveReqst *requestsToMaster; + int workIsDone; + int needsWorkAssigned; + }; + +struct SlaveReqst + { + VMSProcr *slaveFrom; + int reqType; + void *reqData; + + SlaveReqst *nextRequest; + }; + + + +void * coreLoop( void *paramsIn ); //standard PThreads fn prototype + + +//===================== Global Vars =================== + +pthread_t coreLoopThds[ NUM_WORKERS ]; // std struc, holds thread info +QueueStruc *workQ; +ThdParams thdParams[ NUM_WORKERS ]; + +MasterEnv *masterEnv; + + +#endif /* _VMS_H */ + diff -r 000000000000 -r a5fe730dfc2e VMS_primitive_data_types.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VMS_primitive_data_types.h Sat May 22 19:37:58 2010 -0700 @@ -0,0 +1,53 @@ +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + + */ + +#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H +#define _BLIS_PRIMITIVE_DATA_TYPES_H + + +/*For portability, need primitive data types that have a well defined + * size, and well-defined layout into bytes + *To do this, provide BLIS standard aliases for all primitive data types + *These aliases must be used in all BLIS functions instead of the ANSI types + * + *These definitions will be replaced inside each specialization module + * according to the compiler used in that module and the hardware being + * specialized to. + */ +/* +#define int8 char +#define uint8 char +#define int16 short +#define uint16 unsigned short +#define int32 int +#define uint32 unsigned int +#define int64 long long +#define uint64 unsigned long long +#define float32 float +#define float64 double +*/ +typedef char bool8; +typedef char int8; +typedef char uint8; +typedef short int16; +typedef unsigned short uint16; +typedef int int32; +typedef unsigned int uint32; +typedef long long int64; +typedef unsigned long long uint64; +typedef float float32; +typedef double float64; +//typedef double double float128; +#define float128 double double + +#define TRUE 1 +#define FALSE 0 + +#endif /* _BLIS_PRIMITIVE_DATA_TYPES_H */ +