view VMS.h @ 108:3bc3b89630c7

perf counters
author engelhardt@cray1
date Tue, 26 Jul 2011 15:36:24 +0200
parents 97e26095c01f
children 659299627e70
line source
1 /*
2 * Copyright 2009 OpenSourceStewardshipFoundation.org
3 * Licensed under GNU General Public License version 2
4 *
5 * Author: seanhalle@yahoo.com
6 *
7 */
9 #ifndef _VMS_H
10 #define _VMS_H
11 #define _GNU_SOURCE
13 #include "VMS_primitive_data_types.h"
14 #include "Queue_impl/PrivateQueue.h"
15 #include "Histogram/Histogram.h"
16 #include "DynArray/DynArray.h"
17 #include "Hash_impl/PrivateHash.h"
18 #include "vmalloc.h"
19 #include "Counters/Counters.h"
21 #include <pthread.h>
22 #include <sys/time.h>
25 //=============================== Debug ===================================
26 //
27 //When SEQUENTIAL is defined, VMS does sequential exe in the main thread
28 // It still does co-routines and all the mechanisms are the same, it just
29 // has only a single thread and animates VPs one at a time
30 //#define SEQUENTIAL
32 //#define USE_WORK_STEALING
34 //turns on the probe-instrumentation in the application -- when not
35 // defined, the calls to the probe functions turn into comments
36 #define STATS__ENABLE_PROBES
37 //#define TURN_ON_DEBUG_PROBES
39 //These defines turn types of bug messages on and off
40 // be sure debug messages are un-commented (next block of defines)
41 #define dbgAppFlow TRUE /* Top level flow of application code -- general*/
42 #define dbgProbes FALSE /* for issues inside probes themselves*/
43 #define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/
44 #define dbgRqstHdlr FALSE /* in request handler code*/
46 //Comment or un- the substitute half to turn on/off types of debug message
47 #define DEBUG( bool, msg) \
48 // if( bool){ printf(msg); fflush(stdin);}
49 #define DEBUG1( bool, msg, param) \
50 // if(bool){printf(msg, param); fflush(stdin);}
51 #define DEBUG2( bool, msg, p1, p2) \
52 // if(bool) {printf(msg, p1, p2); fflush(stdin);}
54 #define ERROR(msg) printf(msg);
55 #define ERROR1(msg, param) printf(msg, param);
56 #define ERROR2(msg, p1, p2) printf(msg, p1, p2);
58 //=========================== STATS =======================
60 //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and
61 // compiled-in that saves the low part of the time stamp count just before
62 // suspending a processor and just after resuming that processor. It is
63 // saved into a field added to VirtProcr. Have to sanity-check for
64 // rollover of low portion into high portion.
65 //#define MEAS__TIME_STAMP_SUSP
66 //#define MEAS__TIME_MASTER
67 #define MEAS__TIME_PLUGIN
68 #define MEAS__TIME_MALLOC
69 //#define MEAS__TIME_MASTER_LOCK
70 #define MEAS__NUM_TIMES_TO_RUN 100000
72 //For code that calculates normalization-offset between TSC counts of
73 // different cores.
74 #define NUM_TSC_ROUND_TRIPS 10
76 #define MEAS__PERF_COUNTERS
78 //========================= Hardware related Constants =====================
79 //This value is the number of hardware threads in the shared memory
80 // machine
81 #define NUM_CORES 2
83 // tradeoff amortizing master fixed overhead vs imbalance potential
84 // when work-stealing, can make bigger, at risk of losing cache affinity
85 #define NUM_SCHED_SLOTS 5
87 #define MIN_WORK_UNIT_CYCLES 20000
89 #define MASTERLOCK_RETRIES 10000
91 // stack size in virtual processors created
92 #define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */
94 // memory for VMS__malloc
95 #define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */
97 #define CACHE_LINE 64
98 #define PAGE_SIZE 4096
101 //==============================
103 #define SUCCESS 0
105 #define writeVMSQ writePrivQ
106 #define readVMSQ readPrivQ
107 #define makeVMSQ makeVMSPrivQ
108 #define numInVMSQ numInPrivQ
109 #define VMSQueueStruc PrivQueueStruc
113 //===========================================================================
114 typedef unsigned long long TSCount;
116 typedef struct _SchedSlot SchedSlot;
117 typedef struct _VMSReqst VMSReqst;
118 typedef struct _VirtProcr VirtProcr;
119 typedef struct _IntervalProbe IntervalProbe;
120 typedef struct _GateStruc GateStruc;
123 typedef VirtProcr * (*SlaveScheduler) ( void *, int ); //semEnv, coreIdx
124 typedef void (*RequestHandler) ( VirtProcr *, void * ); //prWReqst, semEnv
125 typedef void (*VirtProcrFnPtr) ( void *, VirtProcr * ); //initData, animPr
126 typedef void VirtProcrFn ( void *, VirtProcr * ); //initData, animPr
127 typedef void (*ResumePrFnPtr) ( VirtProcr *, void * );
130 //============= Requests ===========
131 //
133 enum VMSReqstType //avoid starting enums at 0, for debug reasons
134 {
135 semantic = 1,
136 createReq,
137 dissipate,
138 VMSSemantic //goes with VMSSemReqst below
139 };
141 struct _VMSReqst
142 {
143 enum VMSReqstType reqType;//used for dissipate and in future for IO requests
144 void *semReqData;
146 VMSReqst *nextReqst;
147 };
148 //VMSReqst
150 enum VMSSemReqstType //These are equivalent to semantic requests, but for
151 { // VMS's services available directly to app, like OS
152 createProbe = 1, // and probe services -- like a VMS-wide built-in lang
153 openFile,
154 otherIO
155 };
157 typedef struct
158 { enum VMSSemReqstType reqType;
159 VirtProcr *requestingPr;
160 char *nameStr; //for create probe
161 }
162 VMSSemReq;
165 //==================== Core data structures ===================
167 struct _SchedSlot
168 {
169 int workIsDone;
170 int needsProcrAssigned;
171 VirtProcr *procrAssignedToSlot;
172 };
173 //SchedSlot
175 /*WARNING: re-arranging this data structure could cause VP switching
176 * assembly code to fail -- hard-codes offsets of fields
177 */
178 struct _VirtProcr
179 { int procrID; //for debugging -- count up each time create
180 int coreAnimatedBy;
181 void *startOfStack;
182 void *stackPtr;
183 void *framePtr;
184 void *nextInstrPt;
186 void *coreLoopStartPt; //allows proto-runtime to be linked later
187 void *coreLoopFramePtr; //restore before jmp back to core loop
188 void *coreLoopStackPtr; //restore before jmp back to core loop
190 void *initialData;
192 SchedSlot *schedSlot;
193 VMSReqst *requests;
195 void *semanticData; //this livesUSE_GNU here for the life of VP
196 void *dataRetFromReq;//values returned from plugin to VP go here
198 //=========== MEASUREMENT STUFF ==========
199 #ifdef MEAS__TIME_STAMP_SUSP
200 unsigned int preSuspTSCLow;
201 unsigned int postSuspTSCLow;
202 #endif
203 #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/
204 unsigned int startMasterTSCLow;USE_GNU
205 unsigned int endMasterTSCLow;
206 #endif
207 #ifdef MEAS__PERF_COUNTERS //
208 CounterRecord** counter_history;
209 PrivDynArrayInfo* counter_history_array_info;
210 #endif
211 //========================================
213 float64 createPtInSecs; //have space but don't use on some configs
214 };
215 //VirtProcr
218 /*WARNING: re-arranging this data structure could cause VP-switching
219 * assembly code to fail -- hard-codes offsets of fields
220 * (because -O3 messes with things otherwise)
221 */
222 typedef struct
223 {
224 SlaveScheduler slaveScheduler;
225 RequestHandler requestHandler;
227 SchedSlot ***allSchedSlots;
228 VMSQueueStruc **readyToAnimateQs;
229 VirtProcr **masterVPs;
231 void *semanticEnv;
232 void *OSEventStruc; //for future, when add I/O to BLIS
233 MallocProlog *freeListHead;
234 int32 amtOfOutstandingMem; //total currently allocated
236 void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop
238 int32 setupComplete;
239 volatile int32 masterLock;
241 int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP
242 GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal
243 int32 workStealingLock;
245 int32 numProcrsCreated; //gives ordering to processor creation
247 //=========== MEASUREMENT STUFF =============
248 IntervalProbe **intervalProbes;
249 PrivDynArrayInfo *dynIntervalProbesInfo;
250 HashTable *probeNameHashTbl;
251 int32 masterCreateProbeID;
252 float64 createPtInSecs;
253 Histogram **measHists;
254 PrivDynArrayInfo *measHistsInfo;
255 #ifdef MEAS__TIME_PLUGIN
256 Histogram *reqHdlrLowTimeHist;
257 Histogram *reqHdlrHighTimeHist;
258 #endif
259 #ifdef MEAS__TIME_MALLOC
260 Histogram *mallocTimeHist;
261 Histogram *freeTimeHist;
262 #endif
263 #ifdef MEAS__TIME_MASTER_LOCK
264 Histogram *masterLockLowTimeHist;
265 Histogram *masterLockHighTimeHist;
266 #endif
267 #ifdef MEAS__PERF_COUNTERS
268 int cycles_counter_fd[NUM_CORES];
269 int instrs_counter_fd[NUM_CORES];
270 #endif
271 }
272 MasterEnv;
274 //========================= Extra Stuff Data Strucs =======================
275 typedef struct
276 {
278 }
279 VMSExcp;
281 struct _GateStruc
282 {
283 int32 gateClosed;
284 int32 preGateProgress;
285 int32 waitProgress;
286 int32 exitProgress;
287 };
288 //GateStruc
290 //======================= OS Thread related ===============================
292 void * coreLoop( void *paramsIn ); //standard PThreads fn prototype
293 void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype
294 void masterLoop( void *initData, VirtProcr *masterPr );
297 typedef struct
298 {
299 void *endThdPt;
300 unsigned int coreNum;
301 }
302 ThdParams;
304 pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state
305 ThdParams *coreLoopThdParams [ NUM_CORES ];
306 pthread_mutex_t suspendLock;
307 pthread_cond_t suspend_cond;
311 //===================== Global Vars ===================
313 volatile MasterEnv *_VMSMasterEnv;
318 //=========================== Function Prototypes =========================
321 //========== Setup and shutdown ==========
322 void
323 VMS__init();
325 void
326 VMS__init_Seq();
328 void
329 VMS__start_the_work_then_wait_until_done();
331 void
332 VMS__start_the_work_then_wait_until_done_Seq();
334 inline VirtProcr *
335 VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
337 void
338 VMS__dissipate_procr( VirtProcr *procrToDissipate );
340 //Use this to create processor inside entry point & other places outside
341 // the VMS system boundary (IE, not run in slave nor Master)
342 VirtProcr *
343 VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
345 void
346 VMS_ext__dissipate_procr( VirtProcr *procrToDissipate );
348 void
349 VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData );
351 void
352 VMS__shutdown();
354 void
355 VMS__cleanup_at_end_of_shutdown();
357 void *
358 VMS__give_sem_env_for( VirtProcr *animPr );
361 //============== Request Related ===============
363 void
364 VMS__suspend_procr( VirtProcr *callingPr );
366 inline void
367 VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr );
369 inline void
370 VMS__send_sem_request( void *semReqData, VirtProcr *callingPr );
372 void
373 VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr );
375 void inline
376 VMS__send_dissipate_req( VirtProcr *prToDissipate );
378 inline void
379 VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr );
381 VMSReqst *
382 VMS__take_next_request_out_of( VirtProcr *procrWithReq );
384 inline void *
385 VMS__take_sem_reqst_from( VMSReqst *req );
387 void inline
388 VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv,
389 ResumePrFnPtr resumePrFnPtr );
391 //======================== STATS ======================
393 //===== RDTSC wrapper ===== //Also runs with x86_64 code
395 #define saveTimeStampCountInto(low, high) \
396 asm volatile("RDTSC; \
397 movl %%eax, %0; \
398 movl %%edx, %1;" \
399 /* outputs */ : "=m" (low), "=m" (high)\
400 /* inputs */ : \
401 /* clobber */ : "%eax", "%edx" \
402 );
404 #define saveLowTimeStampCountInto(low) \
405 asm volatile("RDTSC; \
406 movl %%eax, %0;" \
407 /* outputs */ : "=m" (low) \
408 /* inputs */ : \
409 /* clobber */ : "%eax", "%edx" \
410 );
412 //====================
413 #define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \
414 makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \
415 _VMSMasterEnv->measHists[idx] = \
416 makeFixedBinHist( numBins, startVal, binWidth, name );
419 #define MEAS__SUB_CREATE /*turn on/off subtraction of create from plugin*/
421 #ifdef VPTHREAD
423 //VPThread
424 #define createHistIdx 1
425 #define mutexLockHistIdx 2
426 #define mutexUnlockHistIdx 3
427 #define condWaitHistIdx 4
428 #define condSignalHistIdx 5
430 #define MakeTheMeasHists() \
431 _VMSMasterEnv->measHistsInfo = \
432 makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
433 makeAMeasHist( createHistIdx, "create", 50, 0, 100 ) \
434 makeAMeasHist( mutexLockHistIdx, "mutex_lock", 50, 0, 100 ) \
435 makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock", 50, 0, 100 ) \
436 makeAMeasHist( condWaitHistIdx, "cond_wait", 50, 0, 100 ) \
437 makeAMeasHist( condSignalHistIdx, "cond_signal", 50, 0, 100 )
439 #endif
442 #ifdef VCILK
444 //VCilk
445 #define spawnHistIdx 1
446 #define syncHistIdx 2
448 #define MakeTheMeasHists() \
449 _VMSMasterEnv->measHistsInfo = \
450 makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
451 makeAMeasHist( spawnHistIdx, "Spawn", 50, 0, 200 ) \
452 makeAMeasHist( syncHistIdx, "Sync", 50, 0, 200 )
455 #endif
457 #ifdef SSR
459 //SSR
460 #define SendFromToHistIdx 1
461 #define SendOfTypeHistIdx 2
462 #define ReceiveFromToHistIdx 3
463 #define ReceiveOfTypeHistIdx 4
465 #define MakeTheMeasHists() \
466 _VMSMasterEnv->measHistsInfo = \
467 makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
468 makeAMeasHist( SendFromToHistIdx, "SendFromTo", 50, 0, 100 ) \
469 makeAMeasHist( SendOfTypeHistIdx, "SendOfType", 50, 0, 100 ) \
470 makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \
471 makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 )
473 #endif
475 //===========================================================================
476 //VPThread
479 #define Meas_startCreate \
480 int32 startStamp, endStamp; \
481 saveLowTimeStampCountInto( startStamp ); \
483 #define Meas_endCreate \
484 saveLowTimeStampCountInto( endStamp ); \
485 addIntervalToHist( startStamp, endStamp, \
486 _VMSMasterEnv->measHists[ createHistIdx ] );
488 #define Meas_startMutexLock \
489 int32 startStamp, endStamp; \
490 saveLowTimeStampCountInto( startStamp ); \
492 #define Meas_endMutexLock \
493 saveLowTimeStampCountInto( endStamp ); \
494 addIntervalToHist( startStamp, endStamp, \
495 _VMSMasterEnv->measHists[ mutexLockHistIdx ] );
497 #define Meas_startMutexUnlock \
498 int32 startStamp, endStamp; \
499 saveLowTimeStampCountInto( startStamp ); \
501 #define Meas_endMutexUnlock \
502 saveLowTimeStampCountInto( endStamp ); \
503 addIntervalToHist( startStamp, endStamp, \
504 _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] );
506 #define Meas_startCondWait \
507 int32 startStamp, endStamp; \
508 saveLowTimeStampCountInto( startStamp ); \
510 #define Meas_endCondWait \
511 saveLowTimeStampCountInto( endStamp ); \
512 addIntervalToHist( startStamp, endStamp, \
513 _VMSMasterEnv->measHists[ condWaitHistIdx ] );
515 #define Meas_startCondSignal \
516 int32 startStamp, endStamp; \
517 saveLowTimeStampCountInto( startStamp ); \
519 #define Meas_endCondSignal \
520 saveLowTimeStampCountInto( endStamp ); \
521 addIntervalToHist( startStamp, endStamp, \
522 _VMSMasterEnv->measHists[ condSignalHistIdx ] );
524 //===========================================================================
525 // VCilk
526 #define Meas_startSpawn \
527 int32 startStamp, endStamp; \
528 saveLowTimeStampCountInto( startStamp ); \
530 #define Meas_endSpawn \
531 saveLowTimeStampCountInto( endStamp ); \
532 addIntervalToHist( startStamp, endStamp, \
533 _VMSMasterEnv->measHists[ spawnHistIdx ] );
535 #define Meas_startSync \
536 int32 startStamp, endStamp; \
537 saveLowTimeStampCountInto( startStamp ); \
539 #define Meas_endSync \
540 saveLowTimeStampCountInto( endStamp ); \
541 addIntervalToHist( startStamp, endStamp, \
542 _VMSMasterEnv->measHists[ syncHistIdx ] );
544 //===========================================================================
545 // SSR
546 #define Meas_startSendFromTo \
547 int32 startStamp, endStamp; \
548 saveLowTimeStampCountInto( startStamp ); \
550 #define Meas_endSendFromTo \
551 saveLowTimeStampCountInto( endStamp ); \
552 addIntervalToHist( startStamp, endStamp, \
553 _VMSMasterEnv->measHists[ SendFromToHistIdx ] );
555 #define Meas_startSendOfType \
556 int32 startStamp, endStamp; \
557 saveLowTimeStampCountInto( startStamp ); \
559 #define Meas_endSendOfType \
560 saveLowTimeStampCountInto( endStamp ); \
561 addIntervalToHist( startStamp, endStamp, \
562 _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] );
564 #define Meas_startReceiveFromTo \
565 int32 startStamp, endStamp; \
566 saveLowTimeStampCountInto( startStamp ); \
568 #define Meas_endReceiveFromTo \
569 saveLowTimeStampCountInto( endStamp ); \
570 addIntervalToHist( startStamp, endStamp, \
571 _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] );
573 #define Meas_startReceiveOfType \
574 int32 startStamp, endStamp; \
575 saveLowTimeStampCountInto( startStamp ); \
577 #define Meas_endReceiveOfType \
578 saveLowTimeStampCountInto( endStamp ); \
579 addIntervalToHist( startStamp, endStamp, \
580 _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] );
582 //=====
584 #include "ProcrContext.h"
585 #include "probes.h"
586 #include "vutilities.h"
588 #endif /* _VMS_H */