Me@42: /* Me@42: * Copyright 2009 OpenSourceStewardshipFoundation.org Me@42: * Licensed under GNU General Public License version 2 Me@42: * Me@42: * Author: seanhalle@yahoo.com Me@42: * Me@42: */ Me@42: Me@42: #ifndef _VMS_H Me@42: #define _VMS_H Me@42: #define __USE_GNU Me@42: Me@42: #include "VMS_primitive_data_types.h" Me@55: #include "Queue_impl/PrivateQueue.h" Me@42: #include "Histogram/Histogram.h" Me@50: #include "DynArray/DynArray.h" Me@50: #include "Hash_impl/PrivateHash.h" Me@50: #include "vmalloc.h" Me@50: Me@42: #include Me@50: #include Me@42: Me@50: Me@50: //=============================== Debug =================================== Me@55: // Me@45: //When SEQUENTIAL is defined, VMS does sequential exe in the main thread Me@42: // It still does co-routines and all the mechanisms are the same, it just Me@42: // has only a single thread and animates VPs one at a time Me@45: //#define SEQUENTIAL Me@42: Me@55: //#define USE_WORK_STEALING Me@55: Me@52: //turns on the probe-instrumentation in the application -- when not Me@52: // defined, the calls to the probe functions turn into comments Me@52: #define STATS__ENABLE_PROBES Me@60: //#define TURN_ON_DEBUG_PROBES Me@52: Me@55: //These defines turn types of bug messages on and off Me@55: // be sure debug messages are un-commented (next block of defines) Me@60: #define dbgProbes FALSE /* for issues inside probes themselves*/ Me@60: #define dbgAppFlow FALSE /* Top level flow of application code -- general*/ Me@60: #define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ Me@60: #define dbgRqstHdlr FALSE /* in request handler code*/ Me@52: Me@55: //Comment or un- the substitute half to turn on/off types of debug message Me@55: #define DEBUG( bool, msg) \ Me@60: // if( bool){ printf(msg); fflush(stdin);} Me@55: #define DEBUG1( bool, msg, param) \ Me@60: // if(bool){printf(msg, param); fflush(stdin);} Me@55: #define DEBUG2( bool, msg, p1, p2) \ Me@60: // if(bool) {printf(msg, p1, p2); fflush(stdin);} Me@45: Me@55: #define ERROR(msg) printf(msg); fflush(stdin); Me@55: #define ERROR1(msg, param) printf(msg, param); fflush(stdin); Me@55: #define ERROR2(msg, p1, p2) printf(msg, p1, p2); fflush(stdin); Me@50: Me@50: //=========================== STATS ======================= Me@50: Me@45: //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and Me@42: // compiled-in that saves the low part of the time stamp count just before Me@42: // suspending a processor and just after resuming that processor. It is Me@42: // saved into a field added to VirtProcr. Have to sanity-check for Me@42: // rollover of low portion into high portion. Me@42: #define MEAS__TIME_STAMP_SUSP Me@42: #define MEAS__TIME_MASTER Me@42: #define MEAS__NUM_TIMES_TO_RUN 100000 Me@42: Me@55: //For code that calculates normalization-offset between TSC counts of Me@55: // different cores. Me@45: #define NUM_TSC_ROUND_TRIPS 10 Me@45: Me@50: Me@50: //========================= Hardware related Constants ===================== Me@42: //This value is the number of hardware threads in the shared memory Me@42: // machine Me@42: #define NUM_CORES 4 Me@42: Me@55: // tradeoff amortizing master fixed overhead vs imbalance potential Me@55: // when work-stealing, can make bigger, at risk of losing cache affinity Me@55: #define NUM_SCHED_SLOTS 5 Me@42: Me@45: #define MIN_WORK_UNIT_CYCLES 20000 Me@45: Me@53: #define MASTERLOCK_RETRIES 10000 Me@42: Me@54: // stack size in virtual processors created Me@54: #define VIRT_PROCR_STACK_SIZE 0x4000 /* 16K */ Me@42: Me@54: // memory for VMS__malloc Me@54: #define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */ Me@42: Me@50: Me@50: //============================== Me@42: Me@42: #define SUCCESS 0 Me@42: Me@55: #define writeVMSQ writePrivQ Me@55: #define readVMSQ readPrivQ Me@55: #define makeVMSQ makePrivQ Me@55: #define numInVMSQ numInPrivQ Me@55: #define VMSQueueStruc PrivQueueStruc Me@42: Me@42: Me@50: Me@50: //=========================================================================== Me@50: typedef unsigned long long TSCount; Me@50: Me@50: typedef struct _SchedSlot SchedSlot; Me@50: typedef struct _VMSReqst VMSReqst; Me@50: typedef struct _VirtProcr VirtProcr; Me@50: typedef struct _IntervalProbe IntervalProbe; Me@55: typedef struct _GateStruc GateStruc; Me@55: Me@42: Me@42: typedef VirtProcr * (*SlaveScheduler) ( void *, int ); //semEnv, coreIdx Me@42: typedef void (*RequestHandler) ( VirtProcr *, void * ); //prWReqst, semEnv Me@42: typedef void (*VirtProcrFnPtr) ( void *, VirtProcr * ); //initData, animPr Me@42: typedef void VirtProcrFn ( void *, VirtProcr * ); //initData, animPr Me@50: typedef void (*ResumePrFnPtr) ( VirtProcr *, void * ); Me@50: Me@50: Me@50: //============= Requests =========== Me@50: // Me@50: Me@50: enum VMSReqstType //avoid starting enums at 0, for debug reasons Me@50: { Me@50: semantic = 1, Me@50: createReq, Me@50: dissipate, Me@50: VMSSemantic //goes with VMSSemReqst below Me@50: }; Me@50: Me@50: struct _VMSReqst Me@50: { Me@50: enum VMSReqstType reqType;//used for dissipate and in future for IO requests Me@50: void *semReqData; Me@50: Me@50: VMSReqst *nextReqst; Me@50: }; Me@50: //VMSReqst Me@50: Me@50: enum VMSSemReqstType //These are equivalent to semantic requests, but for Me@50: { // VMS's services available directly to app, like OS Me@50: createProbe = 1, // and probe services -- like a VMS-wide built-in lang Me@50: openFile, Me@50: otherIO Me@50: }; Me@42: Me@42: typedef struct Me@50: { enum VMSSemReqstType reqType; Me@50: VirtProcr *requestingPr; Me@50: char *nameStr; //for create probe Me@42: } Me@50: VMSSemReq; Me@42: Me@42: Me@50: //==================== Core data structures =================== Me@50: Me@42: struct _SchedSlot Me@42: { Me@42: int workIsDone; Me@42: int needsProcrAssigned; Me@42: VirtProcr *procrAssignedToSlot; Me@42: }; Me@42: //SchedSlot Me@42: Me@62: /*WARNING: re-arranging this data structure could cause VP switching Me@62: * assembly code to fail -- hard-codes offsets of fields Me@62: */ Me@42: struct _VirtProcr Me@42: { int procrID; //for debugging -- count up each time create Me@42: int coreAnimatedBy; Me@42: void *startOfStack; Me@42: void *stackPtr; Me@42: void *framePtr; Me@42: void *nextInstrPt; Me@42: Me@42: void *coreLoopStartPt; //allows proto-runtime to be linked later Me@42: void *coreLoopFramePtr; //restore before jmp back to core loop Me@42: void *coreLoopStackPtr; //restore before jmp back to core loop Me@42: Me@42: void *initialData; Me@42: Me@42: SchedSlot *schedSlot; Me@42: VMSReqst *requests; Me@42: Me@50: void *semanticData; //this lives here for the life of VP Me@53: void *dataRetFromReq;//values returned from plugin to VP go here Me@42: Me@50: //=========== MEASUREMENT STUFF ========== Me@42: #ifdef MEAS__TIME_STAMP_SUSP Me@42: unsigned int preSuspTSCLow; Me@42: unsigned int postSuspTSCLow; Me@42: #endif Me@42: #ifdef MEAS__TIME_MASTER Me@42: unsigned int startMasterTSCLow; Me@42: unsigned int endMasterTSCLow; Me@42: #endif Me@50: Me@50: float64 createPtInSecs; //have space but don't use on some configs Me@42: }; Me@42: //VirtProcr Me@42: Me@42: Me@62: /*WARNING: re-arranging this data structure could cause VP-switching Me@62: * assembly code to fail -- hard-codes offsets of fields Me@62: * (because -O3 messes with things otherwise) Me@62: */ Me@42: typedef struct Me@42: { Me@42: SlaveScheduler slaveScheduler; Me@42: RequestHandler requestHandler; Me@42: Me@42: SchedSlot ***allSchedSlots; Me@55: VMSQueueStruc **readyToAnimateQs; Me@42: VirtProcr **masterVPs; Me@42: Me@42: void *semanticEnv; Me@42: void *OSEventStruc; //for future, when add I/O to BLIS Me@50: MallocProlog *freeListHead; Me@50: int32 amtOfOutstandingMem; //total currently allocated Me@42: Me@42: void *coreLoopStartPt;//addr to jump to to re-enter coreLoop Me@42: void *coreLoopEndPt; //addr to jump to to shut down a coreLoop Me@42: Me@50: int32 setupComplete; Me@50: int32 masterLock; Me@42: Me@50: int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP Me@59: GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal Me@55: int32 workStealingLock; Me@55: Me@50: int32 numProcrsCreated; //gives ordering to processor creation Me@50: Me@50: //=========== MEASUREMENT STUFF ============= Me@50: IntervalProbe **intervalProbes; Me@53: PrivDynArrayInfo *dynIntervalProbesInfo; Me@50: HashTable *probeNameHashTbl; Me@50: int32 masterCreateProbeID; Me@50: float64 createPtInSecs; Me@42: } Me@42: MasterEnv; Me@42: Me@55: //========================= Extra Stuff Data Strucs ======================= Me@54: typedef struct Me@54: { Me@42: Me@54: } Me@54: VMSExcp; Me@50: Me@55: struct _GateStruc Me@55: { Me@55: int32 gateClosed; Me@55: int32 preGateProgress; Me@55: int32 waitProgress; Me@55: int32 exitProgress; Me@55: }; Me@55: //GateStruc Me@50: Me@50: //======================= OS Thread related =============================== Me@42: Me@42: void * coreLoop( void *paramsIn ); //standard PThreads fn prototype Me@42: void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype Me@42: void masterLoop( void *initData, VirtProcr *masterPr ); Me@42: Me@42: Me@50: typedef struct Me@50: { Me@50: void *endThdPt; Me@50: unsigned int coreNum; Me@50: } Me@50: ThdParams; Me@42: Me@42: pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state Me@42: ThdParams *coreLoopThdParams [ NUM_CORES ]; Me@42: pthread_mutex_t suspendLock; Me@42: pthread_cond_t suspend_cond; Me@42: Me@50: Me@50: Me@50: //===================== Global Vars =================== Me@50: Me@42: volatile MasterEnv *_VMSMasterEnv; Me@42: Me@50: Me@50: Me@50: Me@50: //=========================== Function Prototypes ========================= Me@50: Me@53: Me@53: //========== Setup and shutdown ========== Me@42: void Me@42: VMS__init(); Me@42: Me@42: void Me@42: VMS__init_Seq(); Me@42: Me@42: void Me@42: VMS__start_the_work_then_wait_until_done(); Me@42: Me@42: void Me@42: VMS__start_the_work_then_wait_until_done_Seq(); Me@42: Me@42: VirtProcr * Me@42: VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); Me@42: Me@53: void Me@53: VMS__dissipate_procr( VirtProcr *procrToDissipate ); Me@53: Me@50: //Use this to create processor inside entry point & other places outside Me@50: // the VMS system boundary (IE, not run in slave nor Master) Me@50: VirtProcr * Me@50: VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); Me@50: Me@53: void Me@53: VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ); Me@42: Me@50: void Me@54: VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ); Me@54: Me@54: void Me@53: VMS__shutdown(); Me@53: Me@53: void Me@53: VMS__cleanup_at_end_of_shutdown(); Me@50: Me@50: Me@50: //============== Request Related =============== Me@50: Me@50: void Me@50: VMS__suspend_procr( VirtProcr *callingPr ); Me@50: Me@42: inline void Me@53: VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr ); Me@53: Me@53: inline void Me@53: VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ); Me@42: Me@42: void Me@50: VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ); Me@42: Me@53: void inline Me@53: VMS__send_dissipate_req( VirtProcr *prToDissipate ); Me@53: Me@52: inline void Me@52: VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ); Me@52: Me@42: VMSReqst * Me@50: VMS__take_next_request_out_of( VirtProcr *procrWithReq ); Me@42: Me@42: inline void * Me@42: VMS__take_sem_reqst_from( VMSReqst *req ); Me@42: Me@53: //======================== STATS ====================== Me@42: Me@53: //===== RDTSC wrapper ===== Me@42: Me@42: #define saveTimeStampCountInto(low, high) \ Me@42: asm volatile("RDTSC; \ Me@42: movl %%eax, %0; \ Me@42: movl %%edx, %1;" \ Me@42: /* outputs */ : "=m" (low), "=m" (high)\ Me@42: /* inputs */ : \ Me@42: /* clobber */ : "%eax", "%edx" \ Me@42: ); Me@42: Me@42: #define saveLowTimeStampCountInto(low) \ Me@42: asm volatile("RDTSC; \ Me@42: movl %%eax, %0;" \ Me@42: /* outputs */ : "=m" (low) \ Me@42: /* inputs */ : \ Me@42: /* clobber */ : "%eax", "%edx" \ Me@42: ); Me@53: //===== Me@42: Me@55: #include "SwitchAnimators.h" Me@50: #include "probes.h" Me@42: Me@42: #endif /* _VMS_H */ Me@42: