diff VMS.h @ 61:984f7d78bfdf

Merge See what happens -- merged test stuff into Nov 8 VMS version
author SeanHalle
date Thu, 11 Nov 2010 06:19:51 -0800
parents 054006c26b92 7b799a46cc87
children
line diff
     1.1 --- a/VMS.h	Tue Oct 26 18:31:34 2010 -0700
     1.2 +++ b/VMS.h	Thu Nov 11 06:19:51 2010 -0800
     1.3 @@ -7,22 +7,54 @@
     1.4   */
     1.5  
     1.6  #ifndef _VMS_H
     1.7 -#define _VMS_H
     1.8 +#define	_VMS_H
     1.9  #define __USE_GNU
    1.10  
    1.11  #include "VMS_primitive_data_types.h"
    1.12 -#include "Queue_impl/BlockingQueue.h"
    1.13 +#include "Queue_impl/PrivateQueue.h"
    1.14  #include "Histogram/Histogram.h"
    1.15 +#include "DynArray/DynArray.h"
    1.16 +#include "Hash_impl/PrivateHash.h"
    1.17 +#include "vmalloc.h"
    1.18 +
    1.19  #include <pthread.h>
    1.20 +#include <sys/time.h>
    1.21  
    1.22 +
    1.23 +//===============================  Debug  ===================================
    1.24 +//
    1.25     //When SEQUENTIAL is defined, VMS does sequential exe in the main thread
    1.26     // It still does co-routines and all the mechanisms are the same, it just
    1.27     // has only a single thread and animates VPs one at a time
    1.28  //#define SEQUENTIAL
    1.29  
    1.30 -#define PRINT_DEBUG(msg) //printf(msg); fflush(stdin);
    1.31 -#define PRINT1_DEBUG(msg, param) //printf(msg, param); fflush(stdin);
    1.32 -#define PRINT2_DEBUG(msg, p1, p2) //printf(msg, p1, p2); fflush(stdin);
    1.33 +//#define USE_WORK_STEALING
    1.34 +
    1.35 +   //turns on the probe-instrumentation in the application -- when not
    1.36 +   // defined, the calls to the probe functions turn into comments
    1.37 +#define STATS__ENABLE_PROBES
    1.38 +//#define TURN_ON_DEBUG_PROBES
    1.39 +
    1.40 +   //These defines turn types of bug messages on and off
    1.41 +   // be sure debug messages are un-commented (next block of defines)
    1.42 +#define dbgProbes    FALSE /* for issues inside probes themselves*/
    1.43 +#define dbgAppFlow   FALSE /* Top level flow of application code -- general*/
    1.44 +#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/
    1.45 +#define dbgRqstHdlr  FALSE /* in request handler code*/
    1.46 +
    1.47 +   //Comment or un- the substitute half to turn on/off types of debug message
    1.48 +#define DEBUG(  bool, msg)         \
    1.49 +//   if( bool){ printf(msg); fflush(stdin);}
    1.50 +#define DEBUG1( bool, msg, param)  \
    1.51 +//   if(bool){printf(msg, param); fflush(stdin);}
    1.52 +#define DEBUG2( bool, msg, p1, p2) \
    1.53 +//   if(bool) {printf(msg, p1, p2); fflush(stdin);}
    1.54 +
    1.55 +#define ERROR(msg) printf(msg); fflush(stdin);
    1.56 +#define ERROR1(msg, param) printf(msg, param); fflush(stdin);
    1.57 +#define ERROR2(msg, p1, p2) printf(msg, p1, p2); fflush(stdin);
    1.58 +
    1.59 +//===========================  STATS =======================
    1.60  
    1.61     //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and
    1.62     // compiled-in that saves the low part of the time stamp count just before
    1.63 @@ -33,53 +65,97 @@
    1.64  #define MEAS__TIME_MASTER
    1.65  #define MEAS__NUM_TIMES_TO_RUN 100000
    1.66  
    1.67 +   //For code that calculates normalization-offset between TSC counts of
    1.68 +   // different cores.
    1.69  #define NUM_TSC_ROUND_TRIPS 10
    1.70  
    1.71 +
    1.72 +//=========================  Hardware related Constants =====================
    1.73     //This value is the number of hardware threads in the shared memory
    1.74     // machine
    1.75  #define NUM_CORES        4
    1.76  
    1.77 -   // balance amortizing master fixed overhead vs imbalance potential
    1.78 -#define NUM_SCHED_SLOTS  3
    1.79 +   // tradeoff amortizing master fixed overhead vs imbalance potential
    1.80 +   // when work-stealing, can make bigger, at risk of losing cache affinity
    1.81 +#define NUM_SCHED_SLOTS  5
    1.82  
    1.83  #define MIN_WORK_UNIT_CYCLES 20000
    1.84  
    1.85 -#define READYTOANIMATE_RETRIES 10000
    1.86 +#define MASTERLOCK_RETRIES 10000
    1.87  
    1.88 -   // stack
    1.89 -#define VIRT_PROCR_STACK_SIZE 0x10000
    1.90 +   // stack size in virtual processors created
    1.91 +#define VIRT_PROCR_STACK_SIZE 0x4000 /* 16K */
    1.92  
    1.93 -   //256M of total memory for VMS__malloc
    1.94 -#define MASSIVE_MALLOC_SIZE 0x10000000
    1.95 +   // memory for VMS__malloc
    1.96 +#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */
    1.97  
    1.98 -#define NUM_PREPEND_BYTES sizeof(FreeListElem) + sizeof(ownerElem);
    1.99 +
   1.100 +//==============================
   1.101  
   1.102  #define SUCCESS 0
   1.103  
   1.104 -#define writeVMSQ     writeSRSWQ
   1.105 -#define readVMSQ      readSRSWQ
   1.106 -#define makeVMSQ      makeSRSWQ
   1.107 -#define VMSQueueStruc SRSWQueueStruc
   1.108 +#define writeVMSQ     writePrivQ
   1.109 +#define readVMSQ      readPrivQ
   1.110 +#define makeVMSQ      makePrivQ
   1.111 +#define numInVMSQ     numInPrivQ
   1.112 +#define VMSQueueStruc PrivQueueStruc
   1.113  
   1.114 -//#define thdAttrs NULL  //For PThreads
   1.115  
   1.116 -typedef struct _SchedSlot  SchedSlot;
   1.117 -typedef struct _VMSReqst   VMSReqst;
   1.118 -typedef struct _VirtProcr  VirtProcr;
   1.119 +
   1.120 +//===========================================================================
   1.121 +typedef unsigned long long TSCount;
   1.122 +
   1.123 +typedef struct _SchedSlot     SchedSlot;
   1.124 +typedef struct _VMSReqst      VMSReqst;
   1.125 +typedef struct _VirtProcr     VirtProcr;
   1.126 +typedef struct _IntervalProbe IntervalProbe;
   1.127 +typedef struct _GateStruc     GateStruc;
   1.128 +
   1.129  
   1.130  typedef VirtProcr * (*SlaveScheduler)  ( void *, int );   //semEnv, coreIdx
   1.131  typedef void  (*RequestHandler)  ( VirtProcr *, void * ); //prWReqst, semEnv
   1.132  typedef void  (*VirtProcrFnPtr)  ( void *, VirtProcr * ); //initData, animPr
   1.133  typedef void    VirtProcrFn      ( void *, VirtProcr * ); //initData, animPr
   1.134 +typedef void  (*ResumePrFnPtr)   ( VirtProcr *, void * );
   1.135 +
   1.136 +
   1.137 +//============= Requests ===========
   1.138 +//
   1.139 +
   1.140 +enum VMSReqstType   //avoid starting enums at 0, for debug reasons
   1.141 + {
   1.142 +   semantic = 1,
   1.143 +   createReq,
   1.144 +   dissipate,
   1.145 +   VMSSemantic      //goes with VMSSemReqst below
   1.146 + };
   1.147 +
   1.148 +struct _VMSReqst
   1.149 + {
   1.150 +   enum VMSReqstType  reqType;//used for dissipate and in future for IO requests
   1.151 +   void              *semReqData;
   1.152 +
   1.153 +   VMSReqst *nextReqst;
   1.154 + };
   1.155 +//VMSReqst
   1.156 +
   1.157 +enum VMSSemReqstType   //These are equivalent to semantic requests, but for
   1.158 + {                     // VMS's services available directly to app, like OS
   1.159 +   createProbe = 1,    // and probe services -- like a VMS-wide built-in lang
   1.160 +   openFile,
   1.161 +   otherIO
   1.162 + };
   1.163  
   1.164  typedef struct
   1.165 - {
   1.166 -   void           *endThdPt;
   1.167 -   unsigned int    coreNum;
   1.168 + { enum VMSSemReqstType reqType;
   1.169 +   VirtProcr           *requestingPr;
   1.170 +   char                *nameStr;  //for create probe
   1.171   }
   1.172 -ThdParams;
   1.173 + VMSSemReq;
   1.174  
   1.175  
   1.176 +//====================  Core data structures  ===================
   1.177 +
   1.178  struct _SchedSlot
   1.179   {
   1.180     int         workIsDone;
   1.181 @@ -87,24 +163,6 @@
   1.182     VirtProcr  *procrAssignedToSlot;
   1.183   };
   1.184  //SchedSlot
   1.185 - 
   1.186 -enum ReqstType
   1.187 - {
   1.188 -   semantic = 1,
   1.189 -   dissipate,
   1.190 -   regCreated,
   1.191 -   IO
   1.192 - };
   1.193 -
   1.194 -struct _VMSReqst
   1.195 - {
   1.196 -//   VirtProcr   *virtProcrFrom;
   1.197 -   enum ReqstType  reqType;//used for dissipate and in future for IO requests
   1.198 -   void           *semReqData;
   1.199 -
   1.200 -   VMSReqst *nextReqst;
   1.201 - };
   1.202 -//VMSReqst
   1.203  
   1.204  struct _VirtProcr
   1.205   { int         procrID;  //for debugging -- count up each time create
   1.206 @@ -123,9 +181,10 @@
   1.207     SchedSlot  *schedSlot;
   1.208     VMSReqst   *requests;
   1.209  
   1.210 -   void       *semanticData;
   1.211 +   void       *semanticData; //this lives here for the life of VP
   1.212 +   void       *dataRetFromReq;//values returned from plugin to VP go here
   1.213  
   1.214 -   //============================= MEASUREMENT STUFF ========================
   1.215 +      //=========== MEASUREMENT STUFF ==========
   1.216     #ifdef MEAS__TIME_STAMP_SUSP
   1.217     unsigned int preSuspTSCLow;
   1.218     unsigned int postSuspTSCLow;
   1.219 @@ -134,7 +193,8 @@
   1.220     unsigned int startMasterTSCLow;
   1.221     unsigned int endMasterTSCLow;
   1.222     #endif
   1.223 -   //========================================================================
   1.224 +   
   1.225 +   float64      createPtInSecs;  //have space but don't use on some configs
   1.226   };
   1.227  //VirtProcr
   1.228  
   1.229 @@ -158,37 +218,79 @@
   1.230  
   1.231     void            *semanticEnv;
   1.232     void            *OSEventStruc;   //for future, when add I/O to BLIS
   1.233 +   MallocProlog    *freeListHead;
   1.234 +   int32            amtOfOutstandingMem; //total currently allocated
   1.235  
   1.236     void            *coreLoopStartPt;//addr to jump to to re-enter coreLoop
   1.237     void            *coreLoopEndPt;  //addr to jump to to shut down a coreLoop
   1.238  
   1.239 -   int              setupComplete;
   1.240 -   int              masterLock;
   1.241 +   int32            setupComplete;
   1.242 +   int32            masterLock;
   1.243  
   1.244     VMSStats        *stats;
   1.245 +   int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
   1.246 +   GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
   1.247 +   int32            workStealingLock;
   1.248 +   
   1.249 +   int32            numProcrsCreated; //gives ordering to processor creation
   1.250 +
   1.251 +      //=========== MEASUREMENT STUFF =============
   1.252 +   IntervalProbe  **intervalProbes;
   1.253 +   PrivDynArrayInfo    *dynIntervalProbesInfo;
   1.254 +   HashTable       *probeNameHashTbl;
   1.255 +   int32            masterCreateProbeID;
   1.256 +   float64          createPtInSecs;
   1.257   }
   1.258  MasterEnv;
   1.259  
   1.260 +//=========================  Extra Stuff Data Strucs  =======================
   1.261 +typedef struct
   1.262 + {
   1.263  
   1.264 -//==========================================================
   1.265 + }
   1.266 +VMSExcp;
   1.267 +
   1.268 +struct _GateStruc
   1.269 + {
   1.270 +   int32 gateClosed;
   1.271 +   int32 preGateProgress;
   1.272 +   int32 waitProgress;
   1.273 +   int32 exitProgress;
   1.274 + };
   1.275 +//GateStruc
   1.276 +
   1.277 +//=======================  OS Thread related  ===============================
   1.278  
   1.279  void * coreLoop( void *paramsIn );  //standard PThreads fn prototype
   1.280  void * coreLoop_Seq( void *paramsIn );  //standard PThreads fn prototype
   1.281  void masterLoop( void *initData, VirtProcr *masterPr );
   1.282  
   1.283  
   1.284 -//=====================  Global Vars ===================
   1.285 -
   1.286 +typedef struct
   1.287 + {
   1.288 +   void           *endThdPt;
   1.289 +   unsigned int    coreNum;
   1.290 + }
   1.291 +ThdParams;
   1.292  
   1.293  pthread_t       coreLoopThdHandles[ NUM_CORES ];  //pthread's virt-procr state
   1.294  ThdParams      *coreLoopThdParams [ NUM_CORES ];
   1.295  pthread_mutex_t suspendLock;
   1.296  pthread_cond_t  suspend_cond;
   1.297  
   1.298 +
   1.299 +
   1.300 +//=====================  Global Vars ===================
   1.301 +
   1.302  volatile MasterEnv      *_VMSMasterEnv;
   1.303  
   1.304  
   1.305 -//==========================
   1.306 +
   1.307 +
   1.308 +//===========================  Function Prototypes  =========================
   1.309 +
   1.310 +
   1.311 +//========== Setup and shutdown ==========
   1.312  void
   1.313  VMS__init();
   1.314  
   1.315 @@ -204,69 +306,59 @@
   1.316  VirtProcr *
   1.317  VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
   1.318  
   1.319 +void
   1.320 +VMS__dissipate_procr( VirtProcr *procrToDissipate );
   1.321 +
   1.322 +   //Use this to create processor inside entry point & other places outside
   1.323 +   // the VMS system boundary (IE, not run in slave nor Master)
   1.324  VirtProcr *
   1.325 -VMS__create_the_shutdown_procr();
   1.326 -
   1.327 -//==========================
   1.328 -inline void
   1.329 -VMS__add_sem_request( void *semReqData, VirtProcr *callingPr );
   1.330 +VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
   1.331  
   1.332  void
   1.333 -VMS__send_req_to_register_new_procr( VirtProcr *newPrToRegister,
   1.334 -                                      VirtProcr *reqstingPr );
   1.335 +VMS_ext__dissipate_procr( VirtProcr *procrToDissipate );
   1.336  
   1.337  void
   1.338 -VMS__free_request( VMSReqst *req );
   1.339 +VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData );
   1.340  
   1.341  void
   1.342 -VMS__remove_and_free_top_request( VirtProcr *reqstingPr );
   1.343 +VMS__shutdown();
   1.344 +
   1.345 +void
   1.346 +VMS__cleanup_at_end_of_shutdown();
   1.347 +
   1.348 +
   1.349 +//==============  Request Related  ===============
   1.350 +
   1.351 +void
   1.352 +VMS__suspend_procr( VirtProcr *callingPr );
   1.353 +
   1.354 +inline void
   1.355 +VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr );
   1.356 +
   1.357 +inline void
   1.358 +VMS__send_sem_request( void *semReqData, VirtProcr *callingPr );
   1.359 +
   1.360 +void
   1.361 +VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr );
   1.362 +
   1.363 +void inline
   1.364 +VMS__send_dissipate_req( VirtProcr *prToDissipate );
   1.365 +
   1.366 +inline void
   1.367 +VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr );
   1.368  
   1.369  VMSReqst *
   1.370 -VMS__take_top_request_from( VirtProcr *reqstingPr );
   1.371 -
   1.372 -VMSReqst *
   1.373 -VMS__free_top_and_give_next_request_from( VirtProcr *procrWithReq );
   1.374 +VMS__take_next_request_out_of( VirtProcr *procrWithReq );
   1.375  
   1.376  inline void *
   1.377  VMS__take_sem_reqst_from( VMSReqst *req );
   1.378  
   1.379 -inline int
   1.380 -VMS__isSemanticReqst( VMSReqst *req );
   1.381 -
   1.382 -inline int
   1.383 -VMS__isDissipateReqst( VMSReqst *req );
   1.384 -
   1.385 -inline int
   1.386 -VMS__isCreateReqst( VMSReqst *req );
   1.387 -
   1.388 -//==========================
   1.389 -
   1.390 -void
   1.391 -VMS__suspend_procr( VirtProcr *callingPr );
   1.392 -
   1.393 -void
   1.394 -VMS__dissipate_procr( VirtProcr *prToDissipate );
   1.395 -
   1.396 -void
   1.397 -VMS__handle_dissipate_reqst( VirtProcr *procrToDissipate );
   1.398 -
   1.399 -void
   1.400 -VMS__cleanup_after_shutdown();
   1.401 -
   1.402 -//==========================
   1.403 -void
   1.404 -measureTSCOffsetsAsCore0();
   1.405 -
   1.406 -void
   1.407 -measureTSCOffsetsAsRemoteCore( int coreIdx );
   1.408 -
   1.409 -//============================= Statistics ==================================
   1.410 -
   1.411 -typedef unsigned long long TSCount;
   1.412 -
   1.413     //Frequency of TS counts
   1.414     //TODO: change freq for each machine
   1.415  #define TSCOUNT_FREQ 3180000000
   1.416 +//======================== STATS ======================
   1.417 +
   1.418 +//===== RDTSC wrapper =====
   1.419  
   1.420  #define saveTimeStampCountInto(low, high) \
   1.421     asm volatile("RDTSC;                   \
   1.422 @@ -284,10 +376,12 @@
   1.423     /* inputs  */ :                        \
   1.424     /* clobber */ : "%eax", "%edx"         \
   1.425                  );
   1.426 +//=====
   1.427  
   1.428 -inline TSCount getTSC();
   1.429 +#include "SwitchAnimators.h"
   1.430 +#include "probes.h"
   1.431  
   1.432 -inline TSCount getTSC();
   1.433 +
   1.434  
   1.435  //===================== Debug ==========================
   1.436  int numProcrsCreated;
   1.437 @@ -298,4 +392,3 @@
   1.438  TSCount  *pingTimes;
   1.439  
   1.440  #endif	/* _VMS_H */
   1.441 -