diff VMS.h @ 168:d7c0c0a8187a

Merged default branch
author Merten Sach <msach@mailbox.tu-berlin.de>
date Wed, 02 Nov 2011 14:59:29 +0100
parents 395f58384a5c 9661b8cc8318
children d83f59e6e2db
line diff
     1.1 --- a/VMS.h	Thu Sep 15 17:31:33 2011 +0200
     1.2 +++ b/VMS.h	Wed Nov 02 14:59:29 2011 +0100
     1.3 @@ -1,627 +1,627 @@
     1.4 -/*
     1.5 - *  Copyright 2009 OpenSourceStewardshipFoundation.org
     1.6 - *  Licensed under GNU General Public License version 2
     1.7 - *
     1.8 - * Author: seanhalle@yahoo.com
     1.9 - * 
    1.10 - */
    1.11 -
    1.12 -#ifndef _VMS_H
    1.13 -#define	_VMS_H
    1.14 -#define _GNU_SOURCE
    1.15 -
    1.16 -#include "VMS_primitive_data_types.h"
    1.17 -#include "Queue_impl/PrivateQueue.h"
    1.18 -#include "Histogram/Histogram.h"
    1.19 -#include "DynArray/DynArray.h"
    1.20 -#include "Hash_impl/PrivateHash.h"
    1.21 -#include "vmalloc.h"
    1.22 -#include "Counters/Counters.h"
    1.23 -#include "dependency.h"
    1.24 -
    1.25 -#include <pthread.h>
    1.26 -#include <sys/time.h>
    1.27 -
    1.28 -
    1.29 -//===============================  Debug  ===================================
    1.30 -//
    1.31 -//When SEQUENTIAL is defined, VMS does sequential exe in the main thread
    1.32 -// It still does co-routines and all the mechanisms are the same, it just
    1.33 -// has only a single thread and animates VPs one at a time
    1.34 -//#define SEQUENTIAL
    1.35 -
    1.36 -//#define USE_WORK_STEALING
    1.37 -
    1.38 -//turns on the probe-instrumentation in the application -- when not
    1.39 -// defined, the calls to the probe functions turn into comments
    1.40 -//#define STATS__ENABLE_PROBES
    1.41 -//#define TURN_ON_DEBUG_PROBES
    1.42 -
    1.43 -//These defines turn types of bug messages on and off
    1.44 -// be sure debug messages are un-commented (next block of defines)
    1.45 -#define dbgAppFlow   FALSE /* Top level flow of application code -- general*/
    1.46 -#define dbgProbes    FALSE /* for issues inside probes themselves*/
    1.47 -#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/
    1.48 -#define dbgRqstHdlr  FALSE /* in request handler code*/
    1.49 -#define dbgDependency TRUE /* in request handler code, print dependencies */
    1.50 -
    1.51 -//Comment or un- the substitute half to turn on/off types of debug message
    1.52 -#define DEBUG(  bool, msg)         \
    1.53 -  if( bool){ printf(msg); fflush(stdin);}
    1.54 -#define DEBUG1( bool, msg, param)  \
    1.55 -   if(bool){printf(msg, param); fflush(stdin);}
    1.56 -#define DEBUG2( bool, msg, p1, p2) \
    1.57 -   if(bool) {printf(msg, p1, p2); fflush(stdin);}
    1.58 -
    1.59 -#define ERROR(msg) printf(msg);
    1.60 -#define ERROR1(msg, param) printf(msg, param); 
    1.61 -#define ERROR2(msg, p1, p2) printf(msg, p1, p2);
    1.62 -
    1.63 -//===========================  STATS =======================
    1.64 -
    1.65 -   //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and
    1.66 -   // compiled-in that saves the low part of the time stamp count just before
    1.67 -   // suspending a processor and just after resuming that processor.  It is
    1.68 -   // saved into a field added to VirtProcr.  Have to sanity-check for
    1.69 -   // rollover of low portion into high portion.
    1.70 -//#define MEAS__TIME_STAMP_SUSP
    1.71 -//#define MEAS__TIME_MASTER
    1.72 -//#define MEAS__TIME_PLUGIN
    1.73 -//#define MEAS__TIME_MALLOC
    1.74 -//#define MEAS__TIME_MASTER_LOCK
    1.75 -//#define MEAS__NUM_TIMES_TO_RUN 100000
    1.76 -
    1.77 -   //For code that calculates normalization-offset between TSC counts of
    1.78 -   // different cores.
    1.79 -//#define NUM_TSC_ROUND_TRIPS 10
    1.80 -
    1.81 -#define MEAS__PERF_COUNTERS
    1.82 -#define DETECT_DEPENDENCIES
    1.83 -
    1.84 -//=========================  Hardware related Constants =====================
    1.85 -   //This value is the number of hardware threads in the shared memory
    1.86 -   // machine
    1.87 -#define NUM_CORES        2
    1.88 -
    1.89 -   // tradeoff amortizing master fixed overhead vs imbalance potential
    1.90 -   // when work-stealing, can make bigger, at risk of losing cache affinity
    1.91 -#define NUM_SCHED_SLOTS  5
    1.92 -
    1.93 -#define MIN_WORK_UNIT_CYCLES 20000
    1.94 -
    1.95 -#define MASTERLOCK_RETRIES 10000
    1.96 -
    1.97 -   // stack size in virtual processors created
    1.98 -#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */
    1.99 -
   1.100 -   // memory for VMS__malloc
   1.101 -#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */
   1.102 -
   1.103 -#define CACHE_LINE 64
   1.104 -#define PAGE_SIZE 4096
   1.105 -
   1.106 -
   1.107 -//==============================
   1.108 -
   1.109 -#define SUCCESS 0
   1.110 -
   1.111 -#define writeVMSQ     writePrivQ
   1.112 -#define readVMSQ      readPrivQ
   1.113 -#define makeVMSQ      makeVMSPrivQ
   1.114 -#define numInVMSQ     numInPrivQ
   1.115 -#define VMSQueueStruc PrivQueueStruc
   1.116 -
   1.117 -
   1.118 -
   1.119 -//===========================================================================
   1.120 -typedef unsigned long long TSCount;
   1.121 -
   1.122 -typedef struct _SchedSlot     SchedSlot;
   1.123 -typedef struct _VMSReqst      VMSReqst;
   1.124 -typedef struct _VirtProcr     VirtProcr;
   1.125 -typedef struct _IntervalProbe IntervalProbe;
   1.126 -typedef struct _GateStruc     GateStruc;
   1.127 -
   1.128 -
   1.129 -typedef VirtProcr * (*SlaveScheduler)  ( void *, int );   //semEnv, coreIdx
   1.130 -typedef void  (*RequestHandler)  ( VirtProcr *, void * ); //prWReqst, semEnv
   1.131 -typedef void  (*VirtProcrFnPtr)  ( void *, VirtProcr * ); //initData, animPr
   1.132 -typedef void    VirtProcrFn      ( void *, VirtProcr * ); //initData, animPr
   1.133 -typedef void  (*ResumePrFnPtr)   ( VirtProcr *, void * );
   1.134 -
   1.135 -
   1.136 -//============= Requests ===========
   1.137 -//
   1.138 -
   1.139 -enum VMSReqstType   //avoid starting enums at 0, for debug reasons
   1.140 - {
   1.141 -   semantic = 1,
   1.142 -   createReq,
   1.143 -   dissipate,
   1.144 -   VMSSemantic      //goes with VMSSemReqst below
   1.145 - };
   1.146 -
   1.147 -struct _VMSReqst
   1.148 - {
   1.149 -   enum VMSReqstType  reqType;//used for dissipate and in future for IO requests
   1.150 -   void              *semReqData;
   1.151 -
   1.152 -   VMSReqst *nextReqst;
   1.153 - };
   1.154 -//VMSReqst
   1.155 -
   1.156 -enum VMSSemReqstType   //These are equivalent to semantic requests, but for
   1.157 - {                     // VMS's services available directly to app, like OS
   1.158 -   createProbe = 1,    // and probe services -- like a VMS-wide built-in lang
   1.159 -   openFile,
   1.160 -   otherIO
   1.161 - };
   1.162 -
   1.163 -typedef struct
   1.164 - { enum VMSSemReqstType reqType;
   1.165 -   VirtProcr           *requestingPr;
   1.166 -   char                *nameStr;  //for create probe
   1.167 - }
   1.168 - VMSSemReq;
   1.169 -
   1.170 -
   1.171 -//====================  Core data structures  ===================
   1.172 -
   1.173 -struct _SchedSlot
   1.174 - {
   1.175 -   int         workIsDone;
   1.176 -   int         needsProcrAssigned;
   1.177 -   VirtProcr  *procrAssignedToSlot;
   1.178 - };
   1.179 -//SchedSlot
   1.180 -
   1.181 -/*WARNING: re-arranging this data structure could cause VP switching
   1.182 - *         assembly code to fail -- hard-codes offsets of fields
   1.183 - */
   1.184 -struct _VirtProcr
   1.185 - { int         procrID;  //for debugging -- count up each time create
   1.186 -   int         coreAnimatedBy;
   1.187 -   void       *startOfStack;
   1.188 -   void       *stackPtr;
   1.189 -   void       *framePtr;
   1.190 -   void       *nextInstrPt;
   1.191 -   
   1.192 -   void       *coreLoopStartPt;  //allows proto-runtime to be linked later
   1.193 -   void       *coreLoopFramePtr; //restore before jmp back to core loop
   1.194 -   void       *coreLoopStackPtr; //restore before jmp back to core loop
   1.195 -
   1.196 -   void       *initialData;
   1.197 -
   1.198 -   SchedSlot  *schedSlot;
   1.199 -   VMSReqst   *requests;
   1.200 -
   1.201 -   void       *semanticData; //this livesUSE_GNU here for the life of VP
   1.202 -   void       *dataRetFromReq;//values returned from plugin to VP go here
   1.203 -
   1.204 -      //=========== MEASUREMENT STUFF ==========
   1.205 -   #ifdef MEAS__TIME_STAMP_SUSP
   1.206 -   unsigned int preSuspTSCLow;
   1.207 -   unsigned int postSuspTSCLow;
   1.208 -   #endif
   1.209 -   #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/
   1.210 -   unsigned int startMasterTSCLow;USE_GNU
   1.211 -   unsigned int endMasterTSCLow;
   1.212 -   #endif
   1.213 -   #ifdef MEAS__PERF_COUNTERS //
   1.214 -   CounterRecord** counter_history;
   1.215 -   PrivDynArrayInfo* counter_history_array_info;
   1.216 -   #endif
   1.217 -      //========================================
   1.218 -   
   1.219 -   float64      createPtInSecs;  //have space but don't use on some configs
   1.220 - };
   1.221 -//VirtProcr
   1.222 -
   1.223 -
   1.224 -/*WARNING: re-arranging this data structure could cause VP-switching
   1.225 - *         assembly code to fail -- hard-codes offsets of fields
   1.226 - *         (because -O3 messes with things otherwise)
   1.227 - */
   1.228 -typedef struct
   1.229 - {
   1.230 -   SlaveScheduler   slaveScheduler;
   1.231 -   RequestHandler   requestHandler;
   1.232 -   
   1.233 -   SchedSlot     ***allSchedSlots;
   1.234 -   VMSQueueStruc **readyToAnimateQs;
   1.235 -   VirtProcr      **masterVPs;
   1.236 -
   1.237 -   void            *semanticEnv;
   1.238 -   void            *OSEventStruc;   //for future, when add I/O to BLIS
   1.239 -   MallocProlog    *freeListHead;
   1.240 -   int32            amtOfOutstandingMem; //total currently allocated
   1.241 -
   1.242 -   void            *coreLoopReturnPt;//addr to jump to to re-enter coreLoop
   1.243 -
   1.244 -   int32            setupComplete;
   1.245 -   volatile int32   masterLock;
   1.246 -
   1.247 -   int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
   1.248 -   GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
   1.249 -   int32            workStealingLock;
   1.250 -   
   1.251 -   int32            numProcrsCreated; //gives ordering to processor creation
   1.252 -
   1.253 -      //=========== MEASUREMENT STUFF =============
   1.254 -   IntervalProbe  **intervalProbes;
   1.255 -   PrivDynArrayInfo    *dynIntervalProbesInfo;
   1.256 -   HashTable       *probeNameHashTbl;
   1.257 -   int32            masterCreateProbeID;
   1.258 -   float64          createPtInSecs;
   1.259 -   Histogram      **measHists;
   1.260 -   PrivDynArrayInfo *measHistsInfo;
   1.261 -   #ifdef MEAS__TIME_PLUGIN
   1.262 -   Histogram       *reqHdlrLowTimeHist;
   1.263 -   Histogram       *reqHdlrHighTimeHist;
   1.264 -   #endif
   1.265 -   #ifdef MEAS__TIME_MALLOC
   1.266 -   Histogram       *mallocTimeHist;
   1.267 -   Histogram       *freeTimeHist;
   1.268 -   #endif
   1.269 -   #ifdef MEAS__TIME_MASTER_LOCK
   1.270 -   Histogram       *masterLockLowTimeHist;
   1.271 -   Histogram       *masterLockHighTimeHist;
   1.272 -   #endif
   1.273 -   #ifdef MEAS__PERF_COUNTERS
   1.274 -   int cycles_counter_fd[NUM_CORES];
   1.275 -   int instrs_counter_fd[NUM_CORES];
   1.276 -   FILE* counteroutput;
   1.277 -   #endif
   1.278 -   #ifdef DETECT_DEPENDENCIES
   1.279 -   Dependency** dependencies;
   1.280 -   PrivDynArrayInfo* dependenciesInfo;
   1.281 -   #endif
   1.282 -   #ifdef MEAS__PERF_COUNTERS //
   1.283 -   CounterRecord** counter_history;
   1.284 -   PrivDynArrayInfo* counter_history_array_info;
   1.285 -   #endif
   1.286 - }
   1.287 -MasterEnv;
   1.288 -
   1.289 -//=========================  Extra Stuff Data Strucs  =======================
   1.290 -typedef struct
   1.291 - {
   1.292 -
   1.293 - }
   1.294 -VMSExcp;
   1.295 -
   1.296 -struct _GateStruc
   1.297 - {
   1.298 -   int32 gateClosed;
   1.299 -   int32 preGateProgress;
   1.300 -   int32 waitProgress;
   1.301 -   int32 exitProgress;
   1.302 - };
   1.303 -//GateStruc
   1.304 -
   1.305 -//=======================  OS Thread related  ===============================
   1.306 -
   1.307 -void * coreLoop( void *paramsIn );  //standard PThreads fn prototype
   1.308 -void * coreLoop_Seq( void *paramsIn );  //standard PThreads fn prototype
   1.309 -void masterLoop( void *initData, VirtProcr *masterPr );
   1.310 -
   1.311 -
   1.312 -typedef struct
   1.313 - {
   1.314 -   void           *endThdPt;
   1.315 -   unsigned int    coreNum;
   1.316 - }
   1.317 -ThdParams;
   1.318 -
   1.319 -pthread_t       coreLoopThdHandles[ NUM_CORES ];  //pthread's virt-procr state
   1.320 -ThdParams      *coreLoopThdParams [ NUM_CORES ];
   1.321 -pthread_mutex_t suspendLock;
   1.322 -pthread_cond_t  suspend_cond;
   1.323 -
   1.324 -
   1.325 -
   1.326 -//=====================  Global Vars ===================
   1.327 -
   1.328 -volatile MasterEnv      *_VMSMasterEnv;
   1.329 -
   1.330 -
   1.331 -
   1.332 -
   1.333 -//===========================  Function Prototypes  =========================
   1.334 -
   1.335 -
   1.336 -//========== Setup and shutdown ==========
   1.337 -void
   1.338 -VMS__init();
   1.339 -
   1.340 -void
   1.341 -VMS__init_Seq();
   1.342 -
   1.343 -void
   1.344 -VMS__start_the_work_then_wait_until_done();
   1.345 -
   1.346 -void
   1.347 -VMS__start_the_work_then_wait_until_done_Seq();
   1.348 -
   1.349 -inline VirtProcr *
   1.350 -VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
   1.351 -
   1.352 -void
   1.353 -VMS__dissipate_procr( VirtProcr *procrToDissipate );
   1.354 -
   1.355 -   //Use this to create processor inside entry point & other places outside
   1.356 -   // the VMS system boundary (IE, not run in slave nor Master)
   1.357 -VirtProcr *
   1.358 -VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
   1.359 -
   1.360 -void
   1.361 -VMS_ext__dissipate_procr( VirtProcr *procrToDissipate );
   1.362 -
   1.363 -void
   1.364 -VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData );
   1.365 -
   1.366 -void
   1.367 -VMS__shutdown();
   1.368 -
   1.369 -void
   1.370 -VMS__cleanup_at_end_of_shutdown();
   1.371 -
   1.372 -void *
   1.373 -VMS__give_sem_env_for( VirtProcr *animPr );
   1.374 -
   1.375 -
   1.376 -//==============  Request Related  ===============
   1.377 -
   1.378 -void
   1.379 -VMS__suspend_procr( VirtProcr *callingPr );
   1.380 -
   1.381 -inline void
   1.382 -VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr );
   1.383 -
   1.384 -/*inline*/ __attribute__ ((noinline)) void
   1.385 -VMS__send_sem_request( void *semReqData, VirtProcr *callingPr );
   1.386 -
   1.387 -void
   1.388 -VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr );
   1.389 -
   1.390 -void /*inline**/ __attribute__ ((noinline))
   1.391 -VMS__send_dissipate_req( VirtProcr *prToDissipate );
   1.392 -
   1.393 -/*inline**/ __attribute__ ((noinline)) void
   1.394 -VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr );
   1.395 -
   1.396 -VMSReqst *
   1.397 -VMS__take_next_request_out_of( VirtProcr *procrWithReq );
   1.398 -
   1.399 -inline void *
   1.400 -VMS__take_sem_reqst_from( VMSReqst *req );
   1.401 -
   1.402 -void inline
   1.403 -VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv,
   1.404 -                       ResumePrFnPtr resumePrFnPtr );
   1.405 -
   1.406 -//======================== STATS ======================
   1.407 -
   1.408 -//===== RDTSC wrapper ===== //Also runs with x86_64 code
   1.409 -
   1.410 -#define saveTimeStampCountInto(low, high) \
   1.411 -   asm volatile("RDTSC;                   \
   1.412 -                 movl %%eax, %0;          \
   1.413 -                 movl %%edx, %1;"         \
   1.414 -   /* outputs */ : "=m" (low), "=m" (high)\
   1.415 -   /* inputs  */ :                        \
   1.416 -   /* clobber */ : "%eax", "%edx"         \
   1.417 -                );
   1.418 -
   1.419 -#define saveLowTimeStampCountInto(low)    \
   1.420 -   asm volatile("RDTSC;                   \
   1.421 -                 movl %%eax, %0;"         \
   1.422 -   /* outputs */ : "=m" (low)             \
   1.423 -   /* inputs  */ :                        \
   1.424 -   /* clobber */ : "%eax", "%edx"         \
   1.425 -                );
   1.426 -
   1.427 -//====================
   1.428 -#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \
   1.429 -   makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \
   1.430 -   _VMSMasterEnv->measHists[idx] =  \
   1.431 -                       makeFixedBinHist( numBins, startVal, binWidth, name );
   1.432 -
   1.433 -#define saveCyclesAndInstrs(core,cycles,instrs) do{ \
   1.434 -   int cycles_fd = _VMSMasterEnv->cycles_counter_fd[core]; \
   1.435 -   int instrs_fd = _VMSMasterEnv->instrs_counter_fd[core]; \
   1.436 -   int nread;                                           \
   1.437 -                                                        \
   1.438 -   nread = read(cycles_fd,&(cycles),sizeof(cycles));    \
   1.439 -   if(nread<0){                                         \
   1.440 -       perror("Error reading cycles counter");          \
   1.441 -       cycles = 0;                                      \
   1.442 -   }                                                    \
   1.443 -                                                        \
   1.444 -   nread = read(instrs_fd,&(instrs),sizeof(instrs));    \
   1.445 -   if(nread<0){                                         \
   1.446 -       perror("Error reading cycles counter");          \
   1.447 -       instrs = 0;                                      \
   1.448 -   }                                                    \
   1.449 -} while (0) 
   1.450 -
   1.451 -#define getReturnAddressBeforeLibraryCall(vp_ptr, res_ptr) do{     \
   1.452 -void* frame_ptr0 = vp_ptr->framePtr;                               \
   1.453 -void* frame_ptr1 = *((void**)frame_ptr0);                          \
   1.454 -void* frame_ptr2 = *((void**)frame_ptr1);                          \
   1.455 -void* frame_ptr3 = *((void**)frame_ptr2);                          \
   1.456 -void* ret_addr = *((void**)frame_ptr3 + 1);                        \
   1.457 -*res_ptr = ret_addr;                                               \
   1.458 -} while (0)
   1.459 -
   1.460 -#define MEAS__SUB_CREATE  /*turn on/off subtraction of create from plugin*/
   1.461 -
   1.462 -#ifdef VPTHREAD
   1.463 -
   1.464 -//VPThread
   1.465 -#define createHistIdx      1
   1.466 -#define mutexLockHistIdx   2
   1.467 -#define mutexUnlockHistIdx 3
   1.468 -#define condWaitHistIdx    4
   1.469 -#define condSignalHistIdx  5
   1.470 -
   1.471 -#define MakeTheMeasHists() \
   1.472 -   _VMSMasterEnv->measHistsInfo = \
   1.473 -              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
   1.474 -   makeAMeasHist( createHistIdx,      "create",        250, 0, 100 ) \
   1.475 -   makeAMeasHist( mutexLockHistIdx,   "mutex_lock",    50, 0, 100 ) \
   1.476 -   makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock",  50, 0, 100 ) \
   1.477 -   makeAMeasHist( condWaitHistIdx,    "cond_wait",     50, 0, 100 ) \
   1.478 -   makeAMeasHist( condSignalHistIdx,  "cond_signal",   50, 0, 100 )
   1.479 -
   1.480 -#endif
   1.481 -
   1.482 -
   1.483 -#ifdef VCILK
   1.484 -
   1.485 -//VCilk
   1.486 -#define spawnHistIdx      1
   1.487 -#define syncHistIdx       2
   1.488 -
   1.489 -#define MakeTheMeasHists() \
   1.490 -   _VMSMasterEnv->measHistsInfo = \
   1.491 -              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
   1.492 -    makeAMeasHist( spawnHistIdx,      "Spawn",        50, 0, 200 ) \
   1.493 -    makeAMeasHist( syncHistIdx,       "Sync",         50, 0, 200 )
   1.494 -
   1.495 -
   1.496 -#endif
   1.497 -
   1.498 -#ifdef SSR
   1.499 -
   1.500 -//SSR
   1.501 -#define SendFromToHistIdx      1
   1.502 -#define SendOfTypeHistIdx      2
   1.503 -#define ReceiveFromToHistIdx   3
   1.504 -#define ReceiveOfTypeHistIdx   4
   1.505 -
   1.506 -#define MakeTheMeasHists() \
   1.507 -   _VMSMasterEnv->measHistsInfo = \
   1.508 -              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
   1.509 -    makeAMeasHist( SendFromToHistIdx,   "SendFromTo",    50, 0, 100 ) \
   1.510 -    makeAMeasHist( SendOfTypeHistIdx,   "SendOfType",    50, 0, 100 ) \
   1.511 -    makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \
   1.512 -    makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 )
   1.513 -
   1.514 -#endif
   1.515 -
   1.516 -//===========================================================================
   1.517 -//VPThread
   1.518 -
   1.519 -
   1.520 -#define Meas_startCreate \
   1.521 -    int32 startStamp, endStamp; \
   1.522 -    saveLowTimeStampCountInto( startStamp ); \
   1.523 -
   1.524 -#define Meas_endCreate \
   1.525 -    saveLowTimeStampCountInto( endStamp ); \
   1.526 -    addIntervalToHist( startStamp, endStamp, \
   1.527 -                                 _VMSMasterEnv->measHists[ createHistIdx ] );
   1.528 -
   1.529 -#define Meas_startMutexLock \
   1.530 -    int32 startStamp, endStamp; \
   1.531 -    saveLowTimeStampCountInto( startStamp ); \
   1.532 -
   1.533 -#define Meas_endMutexLock \
   1.534 -    saveLowTimeStampCountInto( endStamp ); \
   1.535 -    addIntervalToHist( startStamp, endStamp, \
   1.536 -                              _VMSMasterEnv->measHists[ mutexLockHistIdx ] );
   1.537 -
   1.538 -#define Meas_startMutexUnlock \
   1.539 -    int32 startStamp, endStamp; \
   1.540 -    saveLowTimeStampCountInto( startStamp ); \
   1.541 -
   1.542 -#define Meas_endMutexUnlock \
   1.543 -    saveLowTimeStampCountInto( endStamp ); \
   1.544 -    addIntervalToHist( startStamp, endStamp, \
   1.545 -                            _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] );
   1.546 -
   1.547 -#define Meas_startCondWait \
   1.548 -    int32 startStamp, endStamp; \
   1.549 -    saveLowTimeStampCountInto( startStamp ); \
   1.550 -
   1.551 -#define Meas_endCondWait \
   1.552 -    saveLowTimeStampCountInto( endStamp ); \
   1.553 -    addIntervalToHist( startStamp, endStamp, \
   1.554 -                               _VMSMasterEnv->measHists[ condWaitHistIdx ] );
   1.555 -
   1.556 -#define Meas_startCondSignal \
   1.557 -    int32 startStamp, endStamp; \
   1.558 -    saveLowTimeStampCountInto( startStamp ); \
   1.559 -
   1.560 -#define Meas_endCondSignal \
   1.561 -    saveLowTimeStampCountInto( endStamp ); \
   1.562 -    addIntervalToHist( startStamp, endStamp, \
   1.563 -                             _VMSMasterEnv->measHists[ condSignalHistIdx ] );
   1.564 -
   1.565 -//===========================================================================
   1.566 -// VCilk
   1.567 -#define Meas_startSpawn \
   1.568 -    int32 startStamp, endStamp; \
   1.569 -    saveLowTimeStampCountInto( startStamp ); \
   1.570 -
   1.571 -#define Meas_endSpawn \
   1.572 -    saveLowTimeStampCountInto( endStamp ); \
   1.573 -    addIntervalToHist( startStamp, endStamp, \
   1.574 -                             _VMSMasterEnv->measHists[ spawnHistIdx ] );
   1.575 -
   1.576 -#define Meas_startSync \
   1.577 -    int32 startStamp, endStamp; \
   1.578 -    saveLowTimeStampCountInto( startStamp ); \
   1.579 -
   1.580 -#define Meas_endSync \
   1.581 -    saveLowTimeStampCountInto( endStamp ); \
   1.582 -    addIntervalToHist( startStamp, endStamp, \
   1.583 -                             _VMSMasterEnv->measHists[ syncHistIdx ] );
   1.584 -
   1.585 -//===========================================================================
   1.586 -// SSR
   1.587 -#define Meas_startSendFromTo \
   1.588 -    int32 startStamp, endStamp; \
   1.589 -    saveLowTimeStampCountInto( startStamp ); \
   1.590 -
   1.591 -#define Meas_endSendFromTo \
   1.592 -    saveLowTimeStampCountInto( endStamp ); \
   1.593 -    addIntervalToHist( startStamp, endStamp, \
   1.594 -                             _VMSMasterEnv->measHists[ SendFromToHistIdx ] );
   1.595 -
   1.596 -#define Meas_startSendOfType \
   1.597 -    int32 startStamp, endStamp; \
   1.598 -    saveLowTimeStampCountInto( startStamp ); \
   1.599 -
   1.600 -#define Meas_endSendOfType \
   1.601 -    saveLowTimeStampCountInto( endStamp ); \
   1.602 -    addIntervalToHist( startStamp, endStamp, \
   1.603 -                             _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] );
   1.604 -
   1.605 -#define Meas_startReceiveFromTo \
   1.606 -    int32 startStamp, endStamp; \
   1.607 -    saveLowTimeStampCountInto( startStamp ); \
   1.608 -
   1.609 -#define Meas_endReceiveFromTo \
   1.610 -    saveLowTimeStampCountInto( endStamp ); \
   1.611 -    addIntervalToHist( startStamp, endStamp, \
   1.612 -                             _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] );
   1.613 -
   1.614 -#define Meas_startReceiveOfType \
   1.615 -    int32 startStamp, endStamp; \
   1.616 -    saveLowTimeStampCountInto( startStamp ); \
   1.617 -
   1.618 -#define Meas_endReceiveOfType \
   1.619 -    saveLowTimeStampCountInto( endStamp ); \
   1.620 -    addIntervalToHist( startStamp, endStamp, \
   1.621 -                             _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] );
   1.622 -
   1.623 -//=====
   1.624 -
   1.625 -#include "ProcrContext.h"
   1.626 -#include "probes.h"
   1.627 -#include "vutilities.h"
   1.628 -
   1.629 -#endif	/* _VMS_H */
   1.630 -
   1.631 +/*
   1.632 + *  Copyright 2009 OpenSourceStewardshipFoundation.org
   1.633 + *  Licensed under GNU General Public License version 2
   1.634 + *
   1.635 + * Author: seanhalle@yahoo.com
   1.636 + * 
   1.637 + */
   1.638 +
   1.639 +#ifndef _VMS_H
   1.640 +#define	_VMS_H
   1.641 +#define _GNU_SOURCE
   1.642 +
   1.643 +#include "VMS_primitive_data_types.h"
   1.644 +#include "Queue_impl/PrivateQueue.h"
   1.645 +#include "Histogram/Histogram.h"
   1.646 +#include "DynArray/DynArray.h"
   1.647 +#include "Hash_impl/PrivateHash.h"
   1.648 +#include "vmalloc.h"
   1.649 +#include "Counters/Counters.h"
   1.650 +#include "dependency.h"
   1.651 +
   1.652 +#include <pthread.h>
   1.653 +#include <sys/time.h>
   1.654 +
   1.655 +
   1.656 +//===============================  Debug  ===================================
   1.657 +//
   1.658 +//When SEQUENTIAL is defined, VMS does sequential exe in the main thread
   1.659 +// It still does co-routines and all the mechanisms are the same, it just
   1.660 +// has only a single thread and animates VPs one at a time
   1.661 +//#define SEQUENTIAL
   1.662 +
   1.663 +//#define USE_WORK_STEALING
   1.664 +
   1.665 +//turns on the probe-instrumentation in the application -- when not
   1.666 +// defined, the calls to the probe functions turn into comments
   1.667 +//#define STATS__ENABLE_PROBES
   1.668 +//#define TURN_ON_DEBUG_PROBES
   1.669 +
   1.670 +//These defines turn types of bug messages on and off
   1.671 +// be sure debug messages are un-commented (next block of defines)
   1.672 +#define dbgAppFlow   FALSE /* Top level flow of application code -- general*/
   1.673 +#define dbgProbes    FALSE /* for issues inside probes themselves*/
   1.674 +#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/
   1.675 +#define dbgRqstHdlr  FALSE /* in request handler code*/
   1.676 +#define dbgDependency TRUE /* in request handler code, print dependencies */
   1.677 +
   1.678 +//Comment or un- the substitute half to turn on/off types of debug message
   1.679 +#define DEBUG(  bool, msg)         \
   1.680 +  if( bool){ printf(msg); fflush(stdin);}
   1.681 +#define DEBUG1( bool, msg, param)  \
   1.682 +   if(bool){printf(msg, param); fflush(stdin);}
   1.683 +#define DEBUG2( bool, msg, p1, p2) \
   1.684 +   if(bool) {printf(msg, p1, p2); fflush(stdin);}
   1.685 +
   1.686 +#define ERROR(msg) printf(msg);
   1.687 +#define ERROR1(msg, param) printf(msg, param); 
   1.688 +#define ERROR2(msg, p1, p2) printf(msg, p1, p2);
   1.689 +
   1.690 +//===========================  STATS =======================
   1.691 +
   1.692 +   //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and
   1.693 +   // compiled-in that saves the low part of the time stamp count just before
   1.694 +   // suspending a processor and just after resuming that processor.  It is
   1.695 +   // saved into a field added to VirtProcr.  Have to sanity-check for
   1.696 +   // rollover of low portion into high portion.
   1.697 +//#define MEAS__TIME_STAMP_SUSP
   1.698 +//#define MEAS__TIME_MASTER
   1.699 +//#define MEAS__TIME_PLUGIN
   1.700 +//#define MEAS__TIME_MALLOC
   1.701 +//#define MEAS__TIME_MASTER_LOCK
   1.702 +//#define MEAS__NUM_TIMES_TO_RUN 100000
   1.703 +
   1.704 +   //For code that calculates normalization-offset between TSC counts of
   1.705 +   // different cores.
   1.706 +//#define NUM_TSC_ROUND_TRIPS 10
   1.707 +
   1.708 +#define MEAS__PERF_COUNTERS
   1.709 +#define DETECT_DEPENDENCIES
   1.710 +
   1.711 +//=========================  Hardware related Constants =====================
   1.712 +   //This value is the number of hardware threads in the shared memory
   1.713 +   // machine
   1.714 +//#define NUM_CORES        8
   1.715 +
   1.716 +   // tradeoff amortizing master fixed overhead vs imbalance potential
   1.717 +   // when work-stealing, can make bigger, at risk of losing cache affinity
   1.718 +#define NUM_SCHED_SLOTS  5
   1.719 +
   1.720 +#define MIN_WORK_UNIT_CYCLES 20000
   1.721 +
   1.722 +#define MASTERLOCK_RETRIES 10000
   1.723 +
   1.724 +   // stack size in virtual processors created
   1.725 +#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */
   1.726 +
   1.727 +   // memory for VMS__malloc
   1.728 +#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */
   1.729 +
   1.730 +#define CACHE_LINE 64
   1.731 +#define PAGE_SIZE 4096
   1.732 +
   1.733 +
   1.734 +//==============================
   1.735 +
   1.736 +#define SUCCESS 0
   1.737 +
   1.738 +#define writeVMSQ     writePrivQ
   1.739 +#define readVMSQ      readPrivQ
   1.740 +#define makeVMSQ      makeVMSPrivQ
   1.741 +#define numInVMSQ     numInPrivQ
   1.742 +#define VMSQueueStruc PrivQueueStruc
   1.743 +
   1.744 +
   1.745 +
   1.746 +//===========================================================================
   1.747 +typedef unsigned long long TSCount;
   1.748 +
   1.749 +typedef struct _SchedSlot     SchedSlot;
   1.750 +typedef struct _VMSReqst      VMSReqst;
   1.751 +typedef struct _VirtProcr     VirtProcr;
   1.752 +typedef struct _IntervalProbe IntervalProbe;
   1.753 +typedef struct _GateStruc     GateStruc;
   1.754 +
   1.755 +
   1.756 +typedef VirtProcr * (*SlaveScheduler)  ( void *, int );   //semEnv, coreIdx
   1.757 +typedef void  (*RequestHandler)  ( VirtProcr *, void * ); //prWReqst, semEnv
   1.758 +typedef void  (*VirtProcrFnPtr)  ( void *, VirtProcr * ); //initData, animPr
   1.759 +typedef void    VirtProcrFn      ( void *, VirtProcr * ); //initData, animPr
   1.760 +typedef void  (*ResumePrFnPtr)   ( VirtProcr *, void * );
   1.761 +
   1.762 +
   1.763 +//============= Requests ===========
   1.764 +//
   1.765 +
   1.766 +enum VMSReqstType   //avoid starting enums at 0, for debug reasons
   1.767 + {
   1.768 +   semantic = 1,
   1.769 +   createReq,
   1.770 +   dissipate,
   1.771 +   VMSSemantic      //goes with VMSSemReqst below
   1.772 + };
   1.773 +
   1.774 +struct _VMSReqst
   1.775 + {
   1.776 +   enum VMSReqstType  reqType;//used for dissipate and in future for IO requests
   1.777 +   void              *semReqData;
   1.778 +
   1.779 +   VMSReqst *nextReqst;
   1.780 + };
   1.781 +//VMSReqst
   1.782 +
   1.783 +enum VMSSemReqstType   //These are equivalent to semantic requests, but for
   1.784 + {                     // VMS's services available directly to app, like OS
   1.785 +   createProbe = 1,    // and probe services -- like a VMS-wide built-in lang
   1.786 +   openFile,
   1.787 +   otherIO
   1.788 + };
   1.789 +
   1.790 +typedef struct
   1.791 + { enum VMSSemReqstType reqType;
   1.792 +   VirtProcr           *requestingPr;
   1.793 +   char                *nameStr;  //for create probe
   1.794 + }
   1.795 + VMSSemReq;
   1.796 +
   1.797 +
   1.798 +//====================  Core data structures  ===================
   1.799 +
   1.800 +struct _SchedSlot
   1.801 + {
   1.802 +   int         workIsDone;
   1.803 +   int         needsProcrAssigned;
   1.804 +   VirtProcr  *procrAssignedToSlot;
   1.805 + };
   1.806 +//SchedSlot
   1.807 +
   1.808 +/*WARNING: re-arranging this data structure could cause VP switching
   1.809 + *         assembly code to fail -- hard-codes offsets of fields
   1.810 + */
   1.811 +struct _VirtProcr
   1.812 + { int         procrID;  //for debugging -- count up each time create
   1.813 +   int         coreAnimatedBy;
   1.814 +   void       *startOfStack;
   1.815 +   void       *stackPtr;
   1.816 +   void       *framePtr;
   1.817 +   void       *nextInstrPt;
   1.818 +   
   1.819 +   void       *coreLoopStartPt;  //allows proto-runtime to be linked later
   1.820 +   void       *coreLoopFramePtr; //restore before jmp back to core loop
   1.821 +   void       *coreLoopStackPtr; //restore before jmp back to core loop
   1.822 +
   1.823 +   void       *initialData;
   1.824 +
   1.825 +   SchedSlot  *schedSlot;
   1.826 +   VMSReqst   *requests;
   1.827 +
   1.828 +   void       *semanticData; //this livesUSE_GNU here for the life of VP
   1.829 +   void       *dataRetFromReq;//values returned from plugin to VP go here
   1.830 +
   1.831 +      //=========== MEASUREMENT STUFF ==========
   1.832 +   #ifdef MEAS__TIME_STAMP_SUSP
   1.833 +   unsigned int preSuspTSCLow;
   1.834 +   unsigned int postSuspTSCLow;
   1.835 +   #endif
   1.836 +   #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/
   1.837 +   unsigned int startMasterTSCLow;USE_GNU
   1.838 +   unsigned int endMasterTSCLow;
   1.839 +   #endif
   1.840 +   #ifdef MEAS__PERF_COUNTERS //
   1.841 +   CounterRecord** counter_history;
   1.842 +   PrivDynArrayInfo* counter_history_array_info;
   1.843 +   #endif
   1.844 +      //========================================
   1.845 +   
   1.846 +   float64      createPtInSecs;  //have space but don't use on some configs
   1.847 + };
   1.848 +//VirtProcr
   1.849 +
   1.850 +
   1.851 +/*WARNING: re-arranging this data structure could cause VP-switching
   1.852 + *         assembly code to fail -- hard-codes offsets of fields
   1.853 + *         (because -O3 messes with things otherwise)
   1.854 + */
   1.855 +typedef struct
   1.856 + {
   1.857 +   SlaveScheduler   slaveScheduler;
   1.858 +   RequestHandler   requestHandler;
   1.859 +   
   1.860 +   SchedSlot     ***allSchedSlots;
   1.861 +   VMSQueueStruc **readyToAnimateQs;
   1.862 +   VirtProcr      **masterVPs;
   1.863 +
   1.864 +   void            *semanticEnv;
   1.865 +   void            *OSEventStruc;   //for future, when add I/O to BLIS
   1.866 +   MallocProlog    *freeListHead;
   1.867 +   int32            amtOfOutstandingMem; //total currently allocated
   1.868 +
   1.869 +   void            *coreLoopReturnPt;//addr to jump to to re-enter coreLoop
   1.870 +
   1.871 +   int32            setupComplete;
   1.872 +   volatile int32   masterLock;
   1.873 +
   1.874 +   int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
   1.875 +   GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
   1.876 +   int32            workStealingLock;
   1.877 +   
   1.878 +   int32            numProcrsCreated; //gives ordering to processor creation
   1.879 +
   1.880 +      //=========== MEASUREMENT STUFF =============
   1.881 +   IntervalProbe  **intervalProbes;
   1.882 +   PrivDynArrayInfo    *dynIntervalProbesInfo;
   1.883 +   HashTable       *probeNameHashTbl;
   1.884 +   int32            masterCreateProbeID;
   1.885 +   float64          createPtInSecs;
   1.886 +   Histogram      **measHists;
   1.887 +   PrivDynArrayInfo *measHistsInfo;
   1.888 +   #ifdef MEAS__TIME_PLUGIN
   1.889 +   Histogram       *reqHdlrLowTimeHist;
   1.890 +   Histogram       *reqHdlrHighTimeHist;
   1.891 +   #endif
   1.892 +   #ifdef MEAS__TIME_MALLOC
   1.893 +   Histogram       *mallocTimeHist;
   1.894 +   Histogram       *freeTimeHist;
   1.895 +   #endif
   1.896 +   #ifdef MEAS__TIME_MASTER_LOCK
   1.897 +   Histogram       *masterLockLowTimeHist;
   1.898 +   Histogram       *masterLockHighTimeHist;
   1.899 +   #endif
   1.900 +   #ifdef MEAS__PERF_COUNTERS
   1.901 +   int cycles_counter_fd[NUM_CORES];
   1.902 +   int instrs_counter_fd[NUM_CORES];
   1.903 +   FILE* counteroutput;
   1.904 +   #endif
   1.905 +   #ifdef DETECT_DEPENDENCIES
   1.906 +   Dependency** dependencies;
   1.907 +   PrivDynArrayInfo* dependenciesInfo;
   1.908 +   #endif
   1.909 +   #ifdef MEAS__PERF_COUNTERS //
   1.910 +   CounterRecord** counter_history;
   1.911 +   PrivDynArrayInfo* counter_history_array_info;
   1.912 +   #endif
   1.913 + }
   1.914 +MasterEnv;
   1.915 +
   1.916 +//=========================  Extra Stuff Data Strucs  =======================
   1.917 +typedef struct
   1.918 + {
   1.919 +
   1.920 + }
   1.921 +VMSExcp;
   1.922 +
   1.923 +struct _GateStruc
   1.924 + {
   1.925 +   int32 gateClosed;
   1.926 +   int32 preGateProgress;
   1.927 +   int32 waitProgress;
   1.928 +   int32 exitProgress;
   1.929 + };
   1.930 +//GateStruc
   1.931 +
   1.932 +//=======================  OS Thread related  ===============================
   1.933 +
   1.934 +void * coreLoop( void *paramsIn );  //standard PThreads fn prototype
   1.935 +void * coreLoop_Seq( void *paramsIn );  //standard PThreads fn prototype
   1.936 +void masterLoop( void *initData, VirtProcr *masterPr );
   1.937 +
   1.938 +
   1.939 +typedef struct
   1.940 + {
   1.941 +   void           *endThdPt;
   1.942 +   unsigned int    coreNum;
   1.943 + }
   1.944 +ThdParams;
   1.945 +
   1.946 +pthread_t       coreLoopThdHandles[ NUM_CORES ];  //pthread's virt-procr state
   1.947 +ThdParams      *coreLoopThdParams [ NUM_CORES ];
   1.948 +pthread_mutex_t suspendLock;
   1.949 +pthread_cond_t  suspend_cond;
   1.950 +
   1.951 +
   1.952 +
   1.953 +//=====================  Global Vars ===================
   1.954 +
   1.955 +volatile MasterEnv      *_VMSMasterEnv;
   1.956 +
   1.957 +
   1.958 +
   1.959 +
   1.960 +//===========================  Function Prototypes  =========================
   1.961 +
   1.962 +
   1.963 +//========== Setup and shutdown ==========
   1.964 +void
   1.965 +VMS__init();
   1.966 +
   1.967 +void
   1.968 +VMS__init_Seq();
   1.969 +
   1.970 +void
   1.971 +VMS__start_the_work_then_wait_until_done();
   1.972 +
   1.973 +void
   1.974 +VMS__start_the_work_then_wait_until_done_Seq();
   1.975 +
   1.976 +inline VirtProcr *
   1.977 +VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
   1.978 +
   1.979 +void
   1.980 +VMS__dissipate_procr( VirtProcr *procrToDissipate );
   1.981 +
   1.982 +   //Use this to create processor inside entry point & other places outside
   1.983 +   // the VMS system boundary (IE, not run in slave nor Master)
   1.984 +VirtProcr *
   1.985 +VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
   1.986 +
   1.987 +void
   1.988 +VMS_ext__dissipate_procr( VirtProcr *procrToDissipate );
   1.989 +
   1.990 +void
   1.991 +VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData );
   1.992 +
   1.993 +void
   1.994 +VMS__shutdown();
   1.995 +
   1.996 +void
   1.997 +VMS__cleanup_at_end_of_shutdown();
   1.998 +
   1.999 +void *
  1.1000 +VMS__give_sem_env_for( VirtProcr *animPr );
  1.1001 +
  1.1002 +
  1.1003 +//==============  Request Related  ===============
  1.1004 +
  1.1005 +void
  1.1006 +VMS__suspend_procr( VirtProcr *callingPr );
  1.1007 +
  1.1008 +inline void
  1.1009 +VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr );
  1.1010 +
  1.1011 +/*inline*/ __attribute__ ((noinline)) void
  1.1012 +VMS__send_sem_request( void *semReqData, VirtProcr *callingPr );
  1.1013 +
  1.1014 +void
  1.1015 +VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr );
  1.1016 +
  1.1017 +void /*inline**/ __attribute__ ((noinline))
  1.1018 +VMS__send_dissipate_req( VirtProcr *prToDissipate );
  1.1019 +
  1.1020 +/*inline**/ __attribute__ ((noinline)) void
  1.1021 +VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr );
  1.1022 +
  1.1023 +VMSReqst *
  1.1024 +VMS__take_next_request_out_of( VirtProcr *procrWithReq );
  1.1025 +
  1.1026 +inline void *
  1.1027 +VMS__take_sem_reqst_from( VMSReqst *req );
  1.1028 +
  1.1029 +void inline
  1.1030 +VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv,
  1.1031 +                       ResumePrFnPtr resumePrFnPtr );
  1.1032 +
  1.1033 +//======================== STATS ======================
  1.1034 +
  1.1035 +//===== RDTSC wrapper ===== //Also runs with x86_64 code
  1.1036 +
  1.1037 +#define saveTimeStampCountInto(low, high) \
  1.1038 +   asm volatile("RDTSC;                   \
  1.1039 +                 movl %%eax, %0;          \
  1.1040 +                 movl %%edx, %1;"         \
  1.1041 +   /* outputs */ : "=m" (low), "=m" (high)\
  1.1042 +   /* inputs  */ :                        \
  1.1043 +   /* clobber */ : "%eax", "%edx"         \
  1.1044 +                );
  1.1045 +
  1.1046 +#define saveLowTimeStampCountInto(low)    \
  1.1047 +   asm volatile("RDTSC;                   \
  1.1048 +                 movl %%eax, %0;"         \
  1.1049 +   /* outputs */ : "=m" (low)             \
  1.1050 +   /* inputs  */ :                        \
  1.1051 +   /* clobber */ : "%eax", "%edx"         \
  1.1052 +                );
  1.1053 +
  1.1054 +//====================
  1.1055 +#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \
  1.1056 +   makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \
  1.1057 +   _VMSMasterEnv->measHists[idx] =  \
  1.1058 +                       makeFixedBinHist( numBins, startVal, binWidth, name );
  1.1059 +
  1.1060 +#define saveCyclesAndInstrs(core,cycles,instrs) do{ \
  1.1061 +   int cycles_fd = _VMSMasterEnv->cycles_counter_fd[core]; \
  1.1062 +   int instrs_fd = _VMSMasterEnv->instrs_counter_fd[core]; \
  1.1063 +   int nread;                                           \
  1.1064 +                                                        \
  1.1065 +   nread = read(cycles_fd,&(cycles),sizeof(cycles));    \
  1.1066 +   if(nread<0){                                         \
  1.1067 +       perror("Error reading cycles counter");          \
  1.1068 +       cycles = 0;                                      \
  1.1069 +   }                                                    \
  1.1070 +                                                        \
  1.1071 +   nread = read(instrs_fd,&(instrs),sizeof(instrs));    \
  1.1072 +   if(nread<0){                                         \
  1.1073 +       perror("Error reading cycles counter");          \
  1.1074 +       instrs = 0;                                      \
  1.1075 +   }                                                    \
  1.1076 +} while (0) 
  1.1077 +
  1.1078 +#define getReturnAddressBeforeLibraryCall(vp_ptr, res_ptr) do{     \
  1.1079 +void* frame_ptr0 = vp_ptr->framePtr;                               \
  1.1080 +void* frame_ptr1 = *((void**)frame_ptr0);                          \
  1.1081 +void* frame_ptr2 = *((void**)frame_ptr1);                          \
  1.1082 +void* frame_ptr3 = *((void**)frame_ptr2);                          \
  1.1083 +void* ret_addr = *((void**)frame_ptr3 + 1);                        \
  1.1084 +*res_ptr = ret_addr;                                               \
  1.1085 +} while (0)
  1.1086 +
  1.1087 +#define MEAS__SUB_CREATE  /*turn on/off subtraction of create from plugin*/
  1.1088 +
  1.1089 +#ifdef VPTHREAD
  1.1090 +
  1.1091 +//VPThread
  1.1092 +#define createHistIdx      0
  1.1093 +#define mutexLockHistIdx   1
  1.1094 +#define mutexUnlockHistIdx 2
  1.1095 +#define condWaitHistIdx    3
  1.1096 +#define condSignalHistIdx  4
  1.1097 +
  1.1098 +#define MakeTheMeasHists() \
  1.1099 +   _VMSMasterEnv->measHistsInfo = \
  1.1100 +              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
  1.1101 +   makeAMeasHist( createHistIdx,      "create",        250, 0, 100 ) \
  1.1102 +   makeAMeasHist( mutexLockHistIdx,   "mutex_lock",    50, 0, 100 ) \
  1.1103 +   makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock",  50, 0, 100 ) \
  1.1104 +   makeAMeasHist( condWaitHistIdx,    "cond_wait",     50, 0, 100 ) \
  1.1105 +   makeAMeasHist( condSignalHistIdx,  "cond_signal",   50, 0, 100 )
  1.1106 +
  1.1107 +#endif
  1.1108 +
  1.1109 +
  1.1110 +#ifdef VCILK
  1.1111 +
  1.1112 +//VCilk
  1.1113 +#define spawnHistIdx      0
  1.1114 +#define syncHistIdx       1
  1.1115 +
  1.1116 +#define MakeTheMeasHists() \
  1.1117 +   _VMSMasterEnv->measHistsInfo = \
  1.1118 +              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
  1.1119 +    makeAMeasHist( spawnHistIdx,      "Spawn",        50, 0, 200 ) \
  1.1120 +    makeAMeasHist( syncHistIdx,       "Sync",         50, 0, 200 )
  1.1121 +
  1.1122 +
  1.1123 +#endif
  1.1124 +
  1.1125 +#ifdef SSR
  1.1126 +
  1.1127 +//SSR
  1.1128 +#define SendFromToHistIdx      0
  1.1129 +#define SendOfTypeHistIdx      1
  1.1130 +#define ReceiveFromToHistIdx   2
  1.1131 +#define ReceiveOfTypeHistIdx   3
  1.1132 +
  1.1133 +#define MakeTheMeasHists() \
  1.1134 +   _VMSMasterEnv->measHistsInfo = \
  1.1135 +              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
  1.1136 +    makeAMeasHist( SendFromToHistIdx,   "SendFromTo",    50, 0, 100 ) \
  1.1137 +    makeAMeasHist( SendOfTypeHistIdx,   "SendOfType",    50, 0, 100 ) \
  1.1138 +    makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \
  1.1139 +    makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 )
  1.1140 +
  1.1141 +#endif
  1.1142 +
  1.1143 +//===========================================================================
  1.1144 +//VPThread
  1.1145 +
  1.1146 +
  1.1147 +#define Meas_startCreate \
  1.1148 +    int32 startStamp, endStamp; \
  1.1149 +    saveLowTimeStampCountInto( startStamp ); \
  1.1150 +
  1.1151 +#define Meas_endCreate \
  1.1152 +    saveLowTimeStampCountInto( endStamp ); \
  1.1153 +    addIntervalToHist( startStamp, endStamp, \
  1.1154 +                                 _VMSMasterEnv->measHists[ createHistIdx ] );
  1.1155 +
  1.1156 +#define Meas_startMutexLock \
  1.1157 +    int32 startStamp, endStamp; \
  1.1158 +    saveLowTimeStampCountInto( startStamp ); \
  1.1159 +
  1.1160 +#define Meas_endMutexLock \
  1.1161 +    saveLowTimeStampCountInto( endStamp ); \
  1.1162 +    addIntervalToHist( startStamp, endStamp, \
  1.1163 +                              _VMSMasterEnv->measHists[ mutexLockHistIdx ] );
  1.1164 +
  1.1165 +#define Meas_startMutexUnlock \
  1.1166 +    int32 startStamp, endStamp; \
  1.1167 +    saveLowTimeStampCountInto( startStamp ); \
  1.1168 +
  1.1169 +#define Meas_endMutexUnlock \
  1.1170 +    saveLowTimeStampCountInto( endStamp ); \
  1.1171 +    addIntervalToHist( startStamp, endStamp, \
  1.1172 +                            _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] );
  1.1173 +
  1.1174 +#define Meas_startCondWait \
  1.1175 +    int32 startStamp, endStamp; \
  1.1176 +    saveLowTimeStampCountInto( startStamp ); \
  1.1177 +
  1.1178 +#define Meas_endCondWait \
  1.1179 +    saveLowTimeStampCountInto( endStamp ); \
  1.1180 +    addIntervalToHist( startStamp, endStamp, \
  1.1181 +                               _VMSMasterEnv->measHists[ condWaitHistIdx ] );
  1.1182 +
  1.1183 +#define Meas_startCondSignal \
  1.1184 +    int32 startStamp, endStamp; \
  1.1185 +    saveLowTimeStampCountInto( startStamp ); \
  1.1186 +
  1.1187 +#define Meas_endCondSignal \
  1.1188 +    saveLowTimeStampCountInto( endStamp ); \
  1.1189 +    addIntervalToHist( startStamp, endStamp, \
  1.1190 +                             _VMSMasterEnv->measHists[ condSignalHistIdx ] );
  1.1191 +
  1.1192 +//===========================================================================
  1.1193 +// VCilk
  1.1194 +#define Meas_startSpawn \
  1.1195 +    int32 startStamp, endStamp; \
  1.1196 +    saveLowTimeStampCountInto( startStamp ); \
  1.1197 +
  1.1198 +#define Meas_endSpawn \
  1.1199 +    saveLowTimeStampCountInto( endStamp ); \
  1.1200 +    addIntervalToHist( startStamp, endStamp, \
  1.1201 +                             _VMSMasterEnv->measHists[ spawnHistIdx ] );
  1.1202 +
  1.1203 +#define Meas_startSync \
  1.1204 +    int32 startStamp, endStamp; \
  1.1205 +    saveLowTimeStampCountInto( startStamp ); \
  1.1206 +
  1.1207 +#define Meas_endSync \
  1.1208 +    saveLowTimeStampCountInto( endStamp ); \
  1.1209 +    addIntervalToHist( startStamp, endStamp, \
  1.1210 +                             _VMSMasterEnv->measHists[ syncHistIdx ] );
  1.1211 +
  1.1212 +//===========================================================================
  1.1213 +// SSR
  1.1214 +#define Meas_startSendFromTo \
  1.1215 +    int32 startStamp, endStamp; \
  1.1216 +    saveLowTimeStampCountInto( startStamp ); \
  1.1217 +
  1.1218 +#define Meas_endSendFromTo \
  1.1219 +    saveLowTimeStampCountInto( endStamp ); \
  1.1220 +    addIntervalToHist( startStamp, endStamp, \
  1.1221 +                             _VMSMasterEnv->measHists[ SendFromToHistIdx ] );
  1.1222 +
  1.1223 +#define Meas_startSendOfType \
  1.1224 +    int32 startStamp, endStamp; \
  1.1225 +    saveLowTimeStampCountInto( startStamp ); \
  1.1226 +
  1.1227 +#define Meas_endSendOfType \
  1.1228 +    saveLowTimeStampCountInto( endStamp ); \
  1.1229 +    addIntervalToHist( startStamp, endStamp, \
  1.1230 +                             _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] );
  1.1231 +
  1.1232 +#define Meas_startReceiveFromTo \
  1.1233 +    int32 startStamp, endStamp; \
  1.1234 +    saveLowTimeStampCountInto( startStamp ); \
  1.1235 +
  1.1236 +#define Meas_endReceiveFromTo \
  1.1237 +    saveLowTimeStampCountInto( endStamp ); \
  1.1238 +    addIntervalToHist( startStamp, endStamp, \
  1.1239 +                             _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] );
  1.1240 +
  1.1241 +#define Meas_startReceiveOfType \
  1.1242 +    int32 startStamp, endStamp; \
  1.1243 +    saveLowTimeStampCountInto( startStamp ); \
  1.1244 +
  1.1245 +#define Meas_endReceiveOfType \
  1.1246 +    saveLowTimeStampCountInto( endStamp ); \
  1.1247 +    addIntervalToHist( startStamp, endStamp, \
  1.1248 +                             _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] );
  1.1249 +
  1.1250 +//=====
  1.1251 +
  1.1252 +#include "ProcrContext.h"
  1.1253 +#include "probes.h"
  1.1254 +#include "vutilities.h"
  1.1255 +
  1.1256 +#endif	/* _VMS_H */
  1.1257 +