Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 200:6db9e4898978 HW__generic_x86_64_MC
VMS name chgs -- added "WL" "PI" and "int" and split vms.h up
| author | Me@portablequad |
|---|---|
| date | Sun, 12 Feb 2012 01:49:33 -0800 |
| parents | 37b061180119 |
| children | caa8512f7bdc cb888346c3e0 |
| files | CoreLoop.c MasterLoop.c ProcrContext.c ProcrContext.h VMS.c VMS.h VMS_HW_specific_defs.h VMS_defs.h VMS_lang_specific_defs.h __brch__HW__generic_x86_64_MC probes.c probes.h vmalloc.c vmalloc.h vutilities.c vutilities.h |
| diffstat | 16 files changed, 687 insertions(+), 508 deletions(-) [+] |
line diff
1.1 --- a/CoreLoop.c Sat Feb 11 21:43:43 2012 -0800 1.2 +++ b/CoreLoop.c Sun Feb 12 01:49:33 2012 -0800 1.3 @@ -6,7 +6,6 @@ 1.4 1.5 1.6 #include "VMS.h" 1.7 -#include "Queue_impl/BlockingQueue.h" 1.8 #include "ProcrContext.h" 1.9 1.10 #include <stdlib.h> 1.11 @@ -16,7 +15,7 @@ 1.12 #include <pthread.h> 1.13 #include <sched.h> 1.14 1.15 -void *terminateCoreLoop(VirtProcr *currPr); 1.16 +void *terminateCoreLoop(SlaveVP *currPr); 1.17 1.18 /*This is the loop that runs in the OS Thread pinned to each core 1.19 *Get virt procr from queue, 1.20 @@ -35,9 +34,9 @@ 1.21 { 1.22 ThdParams *coreLoopThdParams; 1.23 int thisCoresIdx; 1.24 - VirtProcr *currPr; 1.25 - VMSQueueStruc *readyToAnimateQ; 1.26 - cpu_set_t coreMask; //has 1 in bit positions of allowed cores 1.27 + SlaveVP *currPr; 1.28 + VMSQueueStruc *readyToAnimateQ; 1.29 + cpu_set_t coreMask; //has 1 in bit positions of allowed cores 1.30 int errorCode; 1.31 1.32 //work-stealing struc on stack to prevent false-sharing in cache-line 1.33 @@ -101,13 +100,13 @@ 1.34 while( gate.gateClosed ) /*busy wait*/; 1.35 } 1.36 1.37 - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 1.38 + currPr = (SlaveVP *) readVMSQ( readyToAnimateQ ); 1.39 1.40 //Set the coreloop's progress, so stealer can see it has made it out 1.41 // of the protected area 1.42 gate.exitProgress = gate.preGateProgress; 1.43 #else 1.44 - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 1.45 + currPr = (SlaveVP *) readVMSQ( readyToAnimateQ ); 1.46 #endif 1.47 1.48 if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 1.49 @@ -159,10 +158,10 @@ 1.50 1.51 1.52 void * 1.53 -terminateCoreLoop(VirtProcr *currPr){ 1.54 +terminateCoreLoop(SlaveVP *currPr){ 1.55 //first free shutdown VP that jumped here -- it first restores the 1.56 // coreloop's stack, so addr of currPr in stack frame is still correct 1.57 - VMS__dissipate_procr( currPr ); 1.58 + VMS_int__dissipate_procr( currPr ); 1.59 pthread_exit( NULL ); 1.60 } 1.61 1.62 @@ -177,7 +176,7 @@ 1.63 void * 1.64 coreLoop_Seq( void *paramsIn ) 1.65 { 1.66 - VirtProcr *currPr; 1.67 + SlaveVP *currPr; 1.68 VMSQueueStruc *readyToAnimateQ; 1.69 1.70 ThdParams *coreLoopThdParams; 1.71 @@ -196,7 +195,7 @@ 1.72 //_VMSWorkQ must be a global, static volatile var, so not kept in reg, 1.73 // which forces reloading the pointer after each jmp to this point 1.74 readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 1.75 - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 1.76 + currPr = (SlaveVP *) readVMSQ( readyToAnimateQ ); 1.77 if( currPr == NULL ) 1.78 { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 1.79 { printf("too many back to back MasterVP\n"); exit(1); }
2.1 --- a/MasterLoop.c Sat Feb 11 21:43:43 2012 -0800 2.2 +++ b/MasterLoop.c Sun Feb 12 01:49:33 2012 -0800 2.3 @@ -16,7 +16,7 @@ 2.4 //=========================================================================== 2.5 void inline 2.6 stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 2.7 - VirtProcr *masterPr ); 2.8 + SlaveVP *masterPr ); 2.9 2.10 //=========================================================================== 2.11 2.12 @@ -71,10 +71,10 @@ 2.13 * is case when other cores starved and one core's requests generate work 2.14 * for them -- so keep max in queue to 3 or 4.. 2.15 */ 2.16 -void masterLoop( void *initData, VirtProcr *animatingPr ) 2.17 +void masterLoop( void *initData, SlaveVP *animatingPr ) 2.18 { 2.19 int32 slotIdx, numSlotsFilled; 2.20 - VirtProcr *schedVirtPr; 2.21 + SlaveVP *schedVirtPr; 2.22 SchedSlot *currSlot, **schedSlots; 2.23 MasterEnv *masterEnv; 2.24 VMSQueueStruc *readyToAnimateQ; 2.25 @@ -84,11 +84,11 @@ 2.26 void *semanticEnv; 2.27 2.28 int32 thisCoresIdx; 2.29 - VirtProcr *masterPr; 2.30 - volatile VirtProcr *volatileMasterPr; 2.31 + SlaveVP *masterPr; 2.32 + volatile SlaveVP *volatileMasterPr; 2.33 2.34 volatileMasterPr = animatingPr; 2.35 - masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp 2.36 + masterPr = (SlaveVP*)volatileMasterPr; //used to force re-define after jmp 2.37 2.38 //First animation of each MasterVP will in turn animate this part 2.39 // of setup code.. (VP creator sets up the stack as if this function 2.40 @@ -120,7 +120,7 @@ 2.41 masterEnv = (MasterEnv*)_VMSMasterEnv; 2.42 2.43 //GCC may optimize so doesn't always re-define from frame-storage 2.44 - masterPr = (VirtProcr*)volatileMasterPr; //just to make sure after jmp 2.45 + masterPr = (SlaveVP*)volatileMasterPr; //just to make sure after jmp 2.46 thisCoresIdx = masterPr->coreAnimatedBy; 2.47 readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; 2.48 schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; 2.49 @@ -202,9 +202,9 @@ 2.50 */ 2.51 void inline 2.52 stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 2.53 - VirtProcr *masterPr ) 2.54 + SlaveVP *masterPr ) 2.55 { 2.56 - VirtProcr *stolenPr; 2.57 + SlaveVP *stolenPr; 2.58 int32 coreIdx, i; 2.59 VMSQueueStruc *currQ; 2.60 2.61 @@ -306,9 +306,9 @@ 2.62 void inline 2.63 gateProtected_stealWorkInto( SchedSlot *currSlot, 2.64 VMSQueueStruc *myReadyToAnimateQ, 2.65 - VirtProcr *masterPr ) 2.66 + SlaveVP *masterPr ) 2.67 { 2.68 - VirtProcr *stolenPr; 2.69 + SlaveVP *stolenPr; 2.70 int32 coreIdx, i, haveAVictim, gotLock; 2.71 VMSQueueStruc *victimsQ; 2.72
3.1 --- a/ProcrContext.c Sat Feb 11 21:43:43 2012 -0800 3.2 +++ b/ProcrContext.c Sun Feb 12 01:49:33 2012 -0800 3.3 @@ -15,14 +15,14 @@ 3.4 * animator state to return to -- 3.5 * 3.6 */ 3.7 -inline VirtProcr * 3.8 -create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, 3.9 +inline SlaveVP * 3.10 +create_procr_helper( SlaveVP *newPr, VirtProcrFnPtr fnPtr, 3.11 void *initialData, void *stackLocs ) 3.12 { 3.13 void *stackPtr; 3.14 3.15 newPr->startOfStack = stackLocs; 3.16 - newPr->procrID = _VMSMasterEnv->numProcrsCreated++; 3.17 + newPr->procrID = _VMSMasterEnv->numVPsCreated++; 3.18 newPr->initialData = initialData; 3.19 newPr->requests = NULL; 3.20 newPr->schedSlot = NULL; 3.21 @@ -32,7 +32,7 @@ 3.22 */ 3.23 //instead of calling the function directly, call a wrapper function to fetch 3.24 //arguments from stack 3.25 - newPr->nextInstrPt = (VirtProcrFnPtr)&startVirtProcrFn; 3.26 + newPr->nextInstrPt = (VirtProcrFnPtr)&startVPFn; 3.27 3.28 //fnPtr takes two params -- void *initData & void *animProcr 3.29 //alloc stack locations, make stackPtr be the highest addr minus room 3.30 @@ -41,7 +41,7 @@ 3.31 stackPtr = ( (void *)stackLocs + VIRT_PROCR_STACK_SIZE - 4*sizeof(void*)); 3.32 3.33 //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp 3.34 - *((VirtProcr**)stackPtr + 2 ) = newPr; //rightmost param 3.35 + *((SlaveVP**)stackPtr + 2 ) = newPr; //rightmost param 3.36 *((void**)stackPtr + 1 ) = initialData; //next param to left 3.37 *((void**)stackPtr) = (void*)fnPtr; 3.38
4.1 --- a/ProcrContext.h Sat Feb 11 21:43:43 2012 -0800 4.2 +++ b/ProcrContext.h Sun Feb 12 01:49:33 2012 -0800 4.3 @@ -12,21 +12,21 @@ 4.4 4.5 void saveCoreLoopReturnAddr(void **returnAddress); 4.6 4.7 -void switchToVP(VirtProcr *nextProcr); 4.8 +void switchToVP(SlaveVP *nextProcr); 4.9 4.10 -void switchToCoreLoop(VirtProcr *nextProcr); 4.11 +void switchToCoreLoop(SlaveVP *nextProcr); 4.12 4.13 -void masterSwitchToCoreLoop(VirtProcr *nextProcr); 4.14 +void masterSwitchToCoreLoop(SlaveVP *nextProcr); 4.15 4.16 -void startVirtProcrFn(); 4.17 +void startVPFn(); 4.18 4.19 -void *asmTerminateCoreLoop(VirtProcr *currPr); 4.20 +void *asmTerminateCoreLoop(SlaveVP *currPr); 4.21 4.22 #define flushRegisters() \ 4.23 asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") 4.24 4.25 -inline VirtProcr * 4.26 -create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, 4.27 +inline SlaveVP * 4.28 +create_procr_helper( SlaveVP *newPr, VirtProcrFnPtr fnPtr, 4.29 void *initialData, void *stackLocs ); 4.30 4.31 #endif /* _ProcrContext_H */
5.1 --- a/VMS.c Sat Feb 11 21:43:43 2012 -0800 5.2 +++ b/VMS.c Sun Feb 12 01:49:33 2012 -0800 5.3 @@ -13,15 +13,13 @@ 5.4 5.5 #include "VMS.h" 5.6 #include "ProcrContext.h" 5.7 -#include "Queue_impl/BlockingQueue.h" 5.8 -#include "Histogram/Histogram.h" 5.9 5.10 5.11 #define thdAttrs NULL 5.12 5.13 //=========================================================================== 5.14 void 5.15 -shutdownFn( void *dummy, VirtProcr *dummy2 ); 5.16 +shutdownFn( void *dummy, SlaveVP *dummy2 ); 5.17 5.18 SchedSlot ** 5.19 create_sched_slots(); 5.20 @@ -36,7 +34,7 @@ 5.21 create_free_list(); 5.22 5.23 void 5.24 -endOSThreadFn( void *initData, VirtProcr *animatingPr ); 5.25 +endOSThreadFn( void *initData, SlaveVP *animatingPr ); 5.26 5.27 pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER; 5.28 pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER; 5.29 @@ -72,7 +70,7 @@ 5.30 * layer. 5.31 */ 5.32 void 5.33 -VMS__init() 5.34 +VMS_int__init() 5.35 { 5.36 create_masterEnv(); 5.37 create_the_coreLoop_OS_threads(); 5.38 @@ -83,7 +81,7 @@ 5.39 /*To initialize the sequential version, just don't create the threads 5.40 */ 5.41 void 5.42 -VMS__init_Seq() 5.43 +VMS_int__init_Seq() 5.44 { 5.45 create_masterEnv(); 5.46 } 5.47 @@ -95,7 +93,7 @@ 5.48 { MasterEnv *masterEnv; 5.49 VMSQueueStruc **readyToAnimateQs; 5.50 int coreIdx; 5.51 - VirtProcr **masterVPs; 5.52 + SlaveVP **masterVPs; 5.53 SchedSlot ***allSchedSlots; //ptr to array of ptrs 5.54 5.55 5.56 @@ -127,19 +125,19 @@ 5.57 masterEnv = (MasterEnv*)_VMSMasterEnv; 5.58 5.59 //Make a readyToAnimateQ for each core loop 5.60 - readyToAnimateQs = VMS__malloc( NUM_CORES * sizeof(VMSQueueStruc *) ); 5.61 - masterVPs = VMS__malloc( NUM_CORES * sizeof(VirtProcr *) ); 5.62 + readyToAnimateQs = VMS_int__malloc( NUM_CORES * sizeof(VMSQueueStruc *) ); 5.63 + masterVPs = VMS_int__malloc( NUM_CORES * sizeof(SlaveVP *) ); 5.64 5.65 //One array for each core, 3 in array, core's masterVP scheds all 5.66 - allSchedSlots = VMS__malloc( NUM_CORES * sizeof(SchedSlot *) ); 5.67 + allSchedSlots = VMS_int__malloc( NUM_CORES * sizeof(SchedSlot *) ); 5.68 5.69 - _VMSMasterEnv->numProcrsCreated = 0; //used by create procr 5.70 + _VMSMasterEnv->numVPsCreated = 0; //used by create procr 5.71 for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 5.72 { 5.73 readyToAnimateQs[ coreIdx ] = makeVMSQ(); 5.74 5.75 //Q: should give masterVP core-specific info as its init data? 5.76 - masterVPs[ coreIdx ] = VMS__create_procr( (VirtProcrFnPtr)&masterLoop, (void*)masterEnv ); 5.77 + masterVPs[ coreIdx ] = VMS_int__create_procr( (VirtProcrFnPtr)&masterLoop, (void*)masterEnv ); 5.78 masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx; 5.79 allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core 5.80 _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; 5.81 @@ -161,7 +159,7 @@ 5.82 _VMSMasterEnv->dynIntervalProbesInfo = 5.83 makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->intervalProbes), 200); 5.84 5.85 - _VMSMasterEnv->probeNameHashTbl = makeHashTable( 1000, &VMS__free ); 5.86 + _VMSMasterEnv->probeNameHashTbl = makeHashTable( 1000, &VMS_int__free ); 5.87 5.88 //put creation time directly into master env, for fast retrieval 5.89 struct timeval timeStamp; 5.90 @@ -186,11 +184,11 @@ 5.91 { SchedSlot **schedSlots; 5.92 int i; 5.93 5.94 - schedSlots = VMS__malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) ); 5.95 + schedSlots = VMS_int__malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) ); 5.96 5.97 for( i = 0; i < NUM_SCHED_SLOTS; i++ ) 5.98 { 5.99 - schedSlots[i] = VMS__malloc( sizeof(SchedSlot) ); 5.100 + schedSlots[i] = VMS_int__malloc( sizeof(SchedSlot) ); 5.101 5.102 //Set state to mean "handling requests done, slot needs filling" 5.103 schedSlots[i]->workIsDone = FALSE; 5.104 @@ -205,9 +203,9 @@ 5.105 { int i; 5.106 for( i = 0; i < NUM_SCHED_SLOTS; i++ ) 5.107 { 5.108 - VMS__free( schedSlots[i] ); 5.109 + VMS_int__free( schedSlots[i] ); 5.110 } 5.111 - VMS__free( schedSlots ); 5.112 + VMS_int__free( schedSlots ); 5.113 } 5.114 5.115 5.116 @@ -225,7 +223,7 @@ 5.117 5.118 //Make the threads that animate the core loops 5.119 for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 5.120 - { coreLoopThdParams[coreIdx] = VMS__malloc( sizeof(ThdParams) ); 5.121 + { coreLoopThdParams[coreIdx] = VMS_int__malloc( sizeof(ThdParams) ); 5.122 coreLoopThdParams[coreIdx]->coreNum = coreIdx; 5.123 5.124 retCode = 5.125 @@ -242,7 +240,7 @@ 5.126 *This starts the core loops running then waits for them to exit. 5.127 */ 5.128 void 5.129 -VMS__start_the_work_then_wait_until_done() 5.130 +VMS_WL__start_the_work_then_wait_until_done() 5.131 { int coreIdx; 5.132 //Start the core loops running 5.133 5.134 @@ -272,7 +270,7 @@ 5.135 * the sequential version of VMS is VMS__init_Seq, this, and coreLoop_Seq. 5.136 */ 5.137 void 5.138 -VMS__start_the_work_then_wait_until_done_Seq() 5.139 +VMS_WL__start_the_work_then_wait_until_done_Seq() 5.140 { 5.141 //Instead of un-suspending threads, just call the one and only 5.142 // core loop (sequential version), in the main thread. 5.143 @@ -282,13 +280,13 @@ 5.144 } 5.145 #endif 5.146 5.147 -inline VirtProcr * 5.148 -VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) 5.149 - { VirtProcr *newPr; 5.150 +inline SlaveVP * 5.151 +VMS_int__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) 5.152 + { SlaveVP *newPr; 5.153 void *stackLocs; 5.154 5.155 - newPr = VMS__malloc( sizeof(VirtProcr) ); 5.156 - stackLocs = VMS__malloc( VIRT_PROCR_STACK_SIZE ); 5.157 + newPr = VMS_int__malloc( sizeof(SlaveVP) ); 5.158 + stackLocs = VMS_int__malloc( VIRT_PROCR_STACK_SIZE ); 5.159 if( stackLocs == 0 ) 5.160 { perror("VMS__malloc stack"); exit(1); } 5.161 5.162 @@ -299,12 +297,12 @@ 5.163 * be called from main thread or other thread -- never from code animated by 5.164 * a VMS virtual processor. 5.165 */ 5.166 -inline VirtProcr * 5.167 +inline SlaveVP * 5.168 VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) 5.169 - { VirtProcr *newPr; 5.170 + { SlaveVP *newPr; 5.171 char *stackLocs; 5.172 5.173 - newPr = malloc( sizeof(VirtProcr) ); 5.174 + newPr = malloc( sizeof(SlaveVP) ); 5.175 stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); 5.176 if( stackLocs == 0 ) 5.177 { perror("malloc stack"); exit(1); } 5.178 @@ -316,7 +314,7 @@ 5.179 /*Anticipating multi-tasking 5.180 */ 5.181 void * 5.182 -VMS__give_sem_env_for( VirtProcr *animPr ) 5.183 +VMS_WL__give_sem_env_for( SlaveVP *animPr ) 5.184 { 5.185 return _VMSMasterEnv->semanticEnv; 5.186 } 5.187 @@ -331,7 +329,7 @@ 5.188 * next work-unit for that procr. 5.189 */ 5.190 void 5.191 -VMS__suspend_procr( VirtProcr *animatingPr ) 5.192 +VMS_int__suspend_procr( SlaveVP *animatingPr ) 5.193 { 5.194 5.195 //The request to master will cause this suspended virt procr to get 5.196 @@ -379,7 +377,7 @@ 5.197 * to the plugin. 5.198 */ 5.199 void 5.200 -VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ) 5.201 +VMS_WL__send_create_procr_req( void *semReqData, SlaveVP *reqstingPr ) 5.202 { VMSReqst req; 5.203 5.204 req.reqType = createReq; 5.205 @@ -387,7 +385,7 @@ 5.206 req.nextReqst = reqstingPr->requests; 5.207 reqstingPr->requests = &req; 5.208 5.209 - VMS__suspend_procr( reqstingPr ); 5.210 + VMS_int__suspend_procr( reqstingPr ); 5.211 } 5.212 5.213 5.214 @@ -413,14 +411,14 @@ 5.215 * pears -- making that suspend the last thing in the virt procr's trace. 5.216 */ 5.217 void 5.218 -VMS__send_dissipate_req( VirtProcr *procrToDissipate ) 5.219 +VMS_WL__send_dissipate_req( SlaveVP *procrToDissipate ) 5.220 { VMSReqst req; 5.221 5.222 req.reqType = dissipate; 5.223 req.nextReqst = procrToDissipate->requests; 5.224 procrToDissipate->requests = &req; 5.225 5.226 - VMS__suspend_procr( procrToDissipate ); 5.227 + VMS_int__suspend_procr( procrToDissipate ); 5.228 } 5.229 5.230 5.231 @@ -431,7 +429,7 @@ 5.232 *Use this version to dissipate VPs created outside the VMS system. 5.233 */ 5.234 void 5.235 -VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ) 5.236 +VMS_ext__dissipate_procr( SlaveVP *procrToDissipate ) 5.237 { 5.238 //NOTE: initialData was given to the processor, so should either have 5.239 // been alloc'd with VMS__malloc, or freed by the level above animPr. 5.240 @@ -456,11 +454,11 @@ 5.241 *The request handler has to call VMS__free_VMSReq for any of these 5.242 */ 5.243 inline void 5.244 -VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, 5.245 - VirtProcr *callingPr ) 5.246 +VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, 5.247 + SlaveVP *callingPr ) 5.248 { VMSReqst *req; 5.249 5.250 - req = VMS__malloc( sizeof(VMSReqst) ); 5.251 + req = VMS_int__malloc( sizeof(VMSReqst) ); 5.252 req->reqType = semantic; 5.253 req->semReqData = semReqData; 5.254 req->nextReqst = callingPr->requests; 5.255 @@ -473,7 +471,7 @@ 5.256 *Then it does suspend, to cause request to be sent. 5.257 */ 5.258 inline void 5.259 -VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ) 5.260 +VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingPr ) 5.261 { VMSReqst req; 5.262 5.263 req.reqType = semantic; 5.264 @@ -481,12 +479,12 @@ 5.265 req.nextReqst = callingPr->requests; 5.266 callingPr->requests = &req; 5.267 5.268 - VMS__suspend_procr( callingPr ); 5.269 + VMS_int__suspend_procr( callingPr ); 5.270 } 5.271 5.272 5.273 inline void 5.274 -VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ) 5.275 +VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingPr ) 5.276 { VMSReqst req; 5.277 5.278 req.reqType = VMSSemantic; 5.279 @@ -494,14 +492,14 @@ 5.280 req.nextReqst = callingPr->requests; //gab any other preceeding 5.281 callingPr->requests = &req; 5.282 5.283 - VMS__suspend_procr( callingPr ); 5.284 + VMS_int__suspend_procr( callingPr ); 5.285 } 5.286 5.287 5.288 /* 5.289 */ 5.290 VMSReqst * 5.291 -VMS__take_next_request_out_of( VirtProcr *procrWithReq ) 5.292 +VMS_PI__take_next_request_out_of( SlaveVP *procrWithReq ) 5.293 { VMSReqst *req; 5.294 5.295 req = procrWithReq->requests; 5.296 @@ -513,7 +511,7 @@ 5.297 5.298 5.299 inline void * 5.300 -VMS__take_sem_reqst_from( VMSReqst *req ) 5.301 +VMS_PI__take_sem_reqst_from( VMSReqst *req ) 5.302 { 5.303 return req->semReqData; 5.304 } 5.305 @@ -535,15 +533,15 @@ 5.306 * Do the same for OS calls -- look later at it.. 5.307 */ 5.308 void inline 5.309 -VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, 5.310 - ResumePrFnPtr resumePrFnPtr ) 5.311 +VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingPr, void *semEnv, 5.312 + ResumeVPFnPtr resumePrFnPtr ) 5.313 { VMSSemReq *semReq; 5.314 IntervalProbe *newProbe; 5.315 5.316 semReq = req->semReqData; 5.317 5.318 - newProbe = VMS__malloc( sizeof(IntervalProbe) ); 5.319 - newProbe->nameStr = VMS__strDup( semReq->nameStr ); 5.320 + newProbe = VMS_int__malloc( sizeof(IntervalProbe) ); 5.321 + newProbe->nameStr = VMS_int__strDup( semReq->nameStr ); 5.322 newProbe->hist = NULL; 5.323 newProbe->schedChoiceWasRecorded = FALSE; 5.324 5.325 @@ -576,7 +574,7 @@ 5.326 * of dis-owning it. 5.327 */ 5.328 void 5.329 -VMS__dissipate_procr( VirtProcr *animatingPr ) 5.330 +VMS_int__dissipate_procr( SlaveVP *animatingPr ) 5.331 { 5.332 //dis-own all locations owned by this processor, causing to be freed 5.333 // any locations that it is (was) sole owner of 5.334 @@ -589,8 +587,8 @@ 5.335 // itself 5.336 //Note, should not stack-allocate initial data -- no guarantee, in 5.337 // general that creating processor will outlive ones it creates. 5.338 - VMS__free( animatingPr->startOfStack ); 5.339 - VMS__free( animatingPr ); 5.340 + VMS_int__free( animatingPr->startOfStack ); 5.341 + VMS_int__free( animatingPr ); 5.342 } 5.343 5.344 5.345 @@ -627,15 +625,15 @@ 5.346 * point is it sure that all results have completed. 5.347 */ 5.348 void 5.349 -VMS__shutdown() 5.350 +VMS_int__shutdown() 5.351 { int coreIdx; 5.352 - VirtProcr *shutDownPr; 5.353 + SlaveVP *shutDownPr; 5.354 5.355 //create the shutdown processors, one for each core loop -- put them 5.356 // directly into the Q -- each core will die when gets one 5.357 for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 5.358 { //Note, this is running in the master 5.359 - shutDownPr = VMS__create_procr( &endOSThreadFn, NULL ); 5.360 + shutDownPr = VMS_int__create_procr( &endOSThreadFn, NULL ); 5.361 writeVMSQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] ); 5.362 } 5.363 5.364 @@ -656,7 +654,7 @@ 5.365 * processors). 5.366 */ 5.367 void 5.368 -endOSThreadFn( void *initData, VirtProcr *animatingPr ) 5.369 +endOSThreadFn( void *initData, SlaveVP *animatingPr ) 5.370 { 5.371 #ifdef SEQUENTIAL 5.372 asmTerminateCoreLoopSeq(animatingPr); 5.373 @@ -669,7 +667,7 @@ 5.374 /*This is called from the startup & shutdown 5.375 */ 5.376 void 5.377 -VMS__cleanup_at_end_of_shutdown() 5.378 +VMS_int__cleanup_at_end_of_shutdown() 5.379 { 5.380 //unused 5.381 //VMSQueueStruc **readyToAnimateQs; 5.382 @@ -707,7 +705,7 @@ 5.383 { 5.384 freeVMSQ( readyToAnimateQs[ coreIdx ] ); 5.385 //master VPs were created external to VMS, so use external free 5.386 - VMS__dissipate_procr( masterVPs[ coreIdx ] ); 5.387 + VMS_int__dissipate_procr( masterVPs[ coreIdx ] ); 5.388 5.389 freeSchedSlots( allSchedSlots[ coreIdx ] ); 5.390 } 5.391 @@ -718,7 +716,7 @@ 5.392 { 5.393 freeVMSQ( readyToAnimateQs[ coreIdx ] ); 5.394 //master VPs were created external to VMS, so use external free 5.395 - VMS__dissipate_procr( masterVPs[ coreIdx ] ); 5.396 + VMS_int__dissipate_procr( masterVPs[ coreIdx ] ); 5.397 5.398 freeSchedSlots( allSchedSlots[ coreIdx ] ); 5.399 } 5.400 @@ -763,7 +761,7 @@ 5.401 * the error message. 5.402 */ 5.403 void 5.404 -VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ) 5.405 +VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstPr, VMSExcp *excpData ) 5.406 { 5.407 printf("%s",msgStr); 5.408 fflush(stdin);
6.1 --- a/VMS.h Sat Feb 11 21:43:43 2012 -0800 6.2 +++ b/VMS.h Sun Feb 12 01:49:33 2012 -0800 6.3 @@ -11,121 +11,47 @@ 6.4 #define _GNU_SOURCE 6.5 6.6 #include "VMS_primitive_data_types.h" 6.7 -#include "../../C_Libraries/Queue_impl/PrivateQueue.h" 6.8 -#include "../../C_Libraries/Histogram/Histogram.h" 6.9 #include "../../C_Libraries/DynArray/DynArray.h" 6.10 #include "../../C_Libraries/Hash_impl/PrivateHash.h" 6.11 +#include "../../C_Libraries/Histogram/Histogram.h" 6.12 +#include "../../C_Libraries/Queue_impl/PrivateQueue.h" 6.13 #include "vmalloc.h" 6.14 6.15 #include <pthread.h> 6.16 #include <sys/time.h> 6.17 6.18 +//================= Defines: included from separate files ================= 6.19 +// 6.20 +// Note: ALL defines are in other files, none are in here 6.21 +// 6.22 +#include "VMS_defs.h" 6.23 6.24 -//=============================== Debug =================================== 6.25 + 6.26 + 6.27 +//================================ Typedefs ================================= 6.28 // 6.29 -//When SEQUENTIAL is defined, VMS does sequential exe in the main thread 6.30 -// It still does co-routines and all the mechanisms are the same, it just 6.31 -// has only a single thread and animates VPs one at a time 6.32 -//#define SEQUENTIAL 6.33 - 6.34 -//#define USE_WORK_STEALING 6.35 - 6.36 -//turns on the probe-instrumentation in the application -- when not 6.37 -// defined, the calls to the probe functions turn into comments 6.38 -#define STATS__ENABLE_PROBES 6.39 -//#define TURN_ON_DEBUG_PROBES 6.40 - 6.41 -//These defines turn types of bug messages on and off 6.42 -// be sure debug messages are un-commented (next block of defines) 6.43 -#define dbgAppFlow TRUE /* Top level flow of application code -- general*/ 6.44 -#define dbgProbes FALSE /* for issues inside probes themselves*/ 6.45 -#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ 6.46 -#define dbgRqstHdlr FALSE /* in request handler code*/ 6.47 - 6.48 -//Comment or un- the substitute half to turn on/off types of debug message 6.49 -#define DEBUG( bool, msg) \ 6.50 -// if( bool){ printf(msg); fflush(stdin);} 6.51 -#define DEBUG1( bool, msg, param) \ 6.52 -// if(bool){printf(msg, param); fflush(stdin);} 6.53 -#define DEBUG2( bool, msg, p1, p2) \ 6.54 -// if(bool) {printf(msg, p1, p2); fflush(stdin);} 6.55 - 6.56 -#define ERROR(msg) printf(msg); 6.57 -#define ERROR1(msg, param) printf(msg, param); 6.58 -#define ERROR2(msg, p1, p2) printf(msg, p1, p2); 6.59 - 6.60 -//=========================== STATS ======================= 6.61 - 6.62 - //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and 6.63 - // compiled-in that saves the low part of the time stamp count just before 6.64 - // suspending a processor and just after resuming that processorsrc/VPThread_lib/VMS/VMS.h:322: warning: previous declaration of ‘VMS__create_procr’ was here. It is 6.65 - // saved into a field added to VirtProcr. Have to sanity-check for 6.66 - // rollover of low portion into high portion. 6.67 -//#define MEAS__TIME_STAMP_SUSP 6.68 -//#define MEAS__TIME_MASTER 6.69 -#define MEAS__TIME_PLUGIN 6.70 -#define MEAS__TIME_MALLOC 6.71 -//#define MEAS__TIME_MASTER_LOCK 6.72 -#define MEAS__NUM_TIMES_TO_RUN 100000 6.73 - 6.74 - //For code that calculates normalization-offset between TSC counts of 6.75 - // different cores. 6.76 -#define NUM_TSC_ROUND_TRIPS 10 6.77 - 6.78 - 6.79 -//========================= Hardware related Constants ===================== 6.80 - //This value is the number of hardware threads in the shared memory 6.81 - // machine 6.82 -//#define NUM_CORES 8 6.83 - 6.84 - // tradeoff amortizing master fixed overhead vs imbalance potential 6.85 - // when work-stealing, can make bigger, at risk of losing cache affinity 6.86 -#define NUM_SCHED_SLOTS 5 6.87 - 6.88 -#define MIN_WORK_UNIT_CYCLES 20000 6.89 - 6.90 -#define MASTERLOCK_RETRIES 10000 6.91 - 6.92 - // stack size in virtual processors created 6.93 -#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */ 6.94 - 6.95 - // memory for VMS__malloc 6.96 -#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */ 6.97 - 6.98 -#define CACHE_LINE 64 6.99 -#define PAGE_SIZE 4096 6.100 - 6.101 - 6.102 -//============================== 6.103 - 6.104 -#define SUCCESS 0 6.105 - 6.106 -#define writeVMSQ writePrivQ 6.107 -#define readVMSQ readPrivQ 6.108 -#define makeVMSQ makeVMSPrivQ 6.109 -#define numInVMSQ numInPrivQ 6.110 -#define VMSQueueStruc PrivQueueStruc 6.111 - 6.112 - 6.113 - 6.114 -//=========================================================================== 6.115 typedef unsigned long long TSCount; 6.116 +typedef union 6.117 + { uint32 lowHigh[2]; 6.118 + uint64 longVal; 6.119 + } 6.120 +TSCountLowHigh; 6.121 6.122 typedef struct _SchedSlot SchedSlot; 6.123 typedef struct _VMSReqst VMSReqst; 6.124 -typedef struct _VirtProcr VirtProcr; 6.125 +typedef struct _SlaveVP SlaveVP; 6.126 typedef struct _IntervalProbe IntervalProbe; 6.127 typedef struct _GateStruc GateStruc; 6.128 6.129 6.130 -typedef VirtProcr * (*SlaveScheduler) ( void *, int ); //semEnv, coreIdx 6.131 -typedef void (*RequestHandler) ( VirtProcr *, void * ); //prWReqst, semEnv 6.132 -typedef void (*VirtProcrFnPtr) ( void *, VirtProcr * ); //initData, animPr 6.133 -typedef void VirtProcrFn ( void *, VirtProcr * ); //initData, animPr 6.134 -typedef void (*ResumePrFnPtr) ( VirtProcr *, void * ); 6.135 +typedef SlaveVP * (*SlaveScheduler) ( void *, int ); //semEnv, coreIdx 6.136 +typedef void (*RequestHandler) ( SlaveVP *, void * ); //prWReqst, semEnv 6.137 +typedef void (*VirtProcrFnPtr) ( void *, SlaveVP * ); //initData, animPr 6.138 +typedef void VirtProcrFn ( void *, SlaveVP * ); //initData, animPr 6.139 +typedef void (*ResumeVPFnPtr) ( SlaveVP *, void * ); 6.140 6.141 6.142 -//============= Requests =========== 6.143 +//============= Request Related =========== 6.144 // 6.145 6.146 enum VMSReqstType //avoid starting enums at 0, for debug reasons 6.147 @@ -154,7 +80,7 @@ 6.148 6.149 typedef struct 6.150 { enum VMSSemReqstType reqType; 6.151 - VirtProcr *requestingPr; 6.152 + SlaveVP *requestingPr; 6.153 char *nameStr; //for create probe 6.154 } 6.155 VMSSemReq; 6.156 @@ -166,14 +92,14 @@ 6.157 { 6.158 int workIsDone; 6.159 int needsProcrAssigned; 6.160 - VirtProcr *procrAssignedToSlot; 6.161 + SlaveVP *procrAssignedToSlot; 6.162 }; 6.163 //SchedSlot 6.164 6.165 /*WARNING: re-arranging this data structure could cause VP switching 6.166 * assembly code to fail -- hard-codes offsets of fields 6.167 */ 6.168 -struct _VirtProcr 6.169 +struct _SlaveVP 6.170 { int procrID; //for debugging -- count up each time create 6.171 int coreAnimatedBy; 6.172 void *startOfStack; 6.173 @@ -194,14 +120,19 @@ 6.174 void *dataRetFromReq;//values returned from plugin to VP go here 6.175 6.176 //=========== MEASUREMENT STUFF ========== 6.177 - #ifdef MEAS__TIME_STAMP_SUSP 6.178 - unsigned int preSuspTSCLow; 6.179 - unsigned int postSuspTSCLow; 6.180 - #endif 6.181 - #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/ 6.182 - unsigned int startMasterTSCLow;USE_GNU 6.183 - unsigned int endMasterTSCLow; 6.184 - #endif 6.185 + #ifdef MEAS__TIME_STAMP_SUSP 6.186 + uint32 preSuspTSCLow; 6.187 + uint32 postSuspTSCLow; 6.188 + #endif 6.189 + #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/ 6.190 + uint32 startMasterTSCLow;USE_GNU 6.191 + uint32 endMasterTSCLow; 6.192 + #endif 6.193 + #ifdef MEAS__TIME_2011_SYS 6.194 + TSCountLowHigh startSusp; 6.195 + uint64 totalSuspCycles; 6.196 + uint32 numGoodSusp; 6.197 + #endif 6.198 //======================================== 6.199 6.200 float64 createPtInSecs; //have space but don't use on some configs 6.201 @@ -215,49 +146,63 @@ 6.202 */ 6.203 typedef struct 6.204 { 6.205 + union{ //adds padding to put masterLock on its own cache-line to elim 6.206 + // false sharing (masterLock is most-accessed var in VMS) 6.207 + volatile int32 masterLock; 6.208 + char padding[CACHELINE_SIZE]; 6.209 + } masterLockUnion; 6.210 SlaveScheduler slaveScheduler; 6.211 RequestHandler requestHandler; 6.212 6.213 SchedSlot ***allSchedSlots; 6.214 VMSQueueStruc **readyToAnimateQs; 6.215 - VirtProcr **masterVPs; 6.216 + SlaveVP **masterVPs; 6.217 6.218 void *semanticEnv; 6.219 void *OSEventStruc; //for future, when add I/O to BLIS 6.220 - MallocProlog *freeListHead; 6.221 + MallocArrays *freeLists; 6.222 int32 amtOfOutstandingMem; //total currently allocated 6.223 6.224 void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop 6.225 6.226 int32 setupComplete; 6.227 - volatile int32 masterLock; 6.228 - 6.229 - int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP 6.230 + //int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP 6.231 GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal 6.232 int32 workStealingLock; 6.233 6.234 - int32 numProcrsCreated; //gives ordering to processor creation 6.235 + int32 numVPsCreated; //gives ordering to processor creation 6.236 6.237 //=========== MEASUREMENT STUFF ============= 6.238 - IntervalProbe **intervalProbes; 6.239 - PrivDynArrayInfo *dynIntervalProbesInfo; 6.240 - HashTable *probeNameHashTbl; 6.241 - int32 masterCreateProbeID; 6.242 - float64 createPtInSecs; 6.243 - Histogram **measHists; 6.244 - PrivDynArrayInfo *measHistsInfo; 6.245 - #ifdef MEAS__TIME_PLUGIN 6.246 - Histogram *reqHdlrLowTimeHist; 6.247 - Histogram *reqHdlrHighTimeHist; 6.248 - #endif 6.249 - #ifdef MEAS__TIME_MALLOC 6.250 - Histogram *mallocTimeHist; 6.251 - Histogram *freeTimeHist; 6.252 - #endif 6.253 - #ifdef MEAS__TIME_MASTER_LOCK 6.254 - Histogram *masterLockLowTimeHist; 6.255 - Histogram *masterLockHighTimeHist; 6.256 - #endif 6.257 + IntervalProbe **intervalProbes; 6.258 + PrivDynArrayInfo *dynIntervalProbesInfo; 6.259 + HashTable *probeNameHashTbl; 6.260 + int32 masterCreateProbeID; 6.261 + float64 createPtInSecs; 6.262 + Histogram **measHists; 6.263 + PrivDynArrayInfo *measHistsInfo; 6.264 + #ifdef MEAS__TIME_PLUGIN 6.265 + Histogram *reqHdlrLowTimeHist; 6.266 + Histogram *reqHdlrHighTimeHist; 6.267 + #endif 6.268 + #ifdef MEAS__TIME_MALLOC 6.269 + Histogram *mallocTimeHist; 6.270 + Histogram *freeTimeHist; 6.271 + #endif 6.272 + #ifdef MEAS__TIME_MASTER_LOCK 6.273 + Histogram *masterLockLowTimeHist; 6.274 + Histogram *masterLockHighTimeHist; 6.275 + #endif 6.276 + #ifdef MEAS__TIME_2011_SYS 6.277 + TSCountLowHigh startMaster; 6.278 + uint64 totalMasterCycles; 6.279 + uint32 numMasterAnimations; 6.280 + TSCountLowHigh startReqHdlr; 6.281 + uint64 totalPluginCycles; 6.282 + uint32 numPluginAnimations; 6.283 + uint64 cyclesTillStartMasterLoop; 6.284 + TSCountLowHigh endMasterLoop; 6.285 + #endif 6.286 + //========================================== 6.287 } 6.288 MasterEnv; 6.289 6.290 @@ -281,7 +226,7 @@ 6.291 6.292 void * coreLoop( void *paramsIn ); //standard PThreads fn prototype 6.293 void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype 6.294 -void masterLoop( void *initData, VirtProcr *masterPr ); 6.295 +void masterLoop( void *initData, SlaveVP *masterVP ); 6.296 6.297 6.298 typedef struct 6.299 @@ -298,278 +243,93 @@ 6.300 6.301 6.302 6.303 -//===================== Global Vars =================== 6.304 +//============================= Global Vars ================================ 6.305 6.306 -volatile MasterEnv *_VMSMasterEnv; 6.307 +volatile MasterEnv *_VMSMasterEnv __align_to_cacheline__; 6.308 6.309 6.310 6.311 6.312 -//=========================== Function Prototypes ========================= 6.313 +//========================= Function Prototypes =========================== 6.314 6.315 6.316 //========== Setup and shutdown ========== 6.317 void 6.318 -VMS__init(); 6.319 +VMS_int__init(); 6.320 6.321 void 6.322 -VMS__init_Seq(); 6.323 +VMS_int__init_Seq(); 6.324 6.325 void 6.326 -VMS__start_the_work_then_wait_until_done(); 6.327 +VMS_WL__start_the_work_then_wait_until_done(); 6.328 6.329 void 6.330 -VMS__start_the_work_then_wait_until_done_Seq(); 6.331 +VMS_WL__start_the_work_then_wait_until_done_Seq(); 6.332 6.333 -inline VirtProcr * 6.334 -VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); 6.335 +inline SlaveVP * 6.336 +VMS_int__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); 6.337 6.338 void 6.339 -VMS__dissipate_procr( VirtProcr *procrToDissipate ); 6.340 +VMS_int__dissipate_procr( SlaveVP *procrToDissipate ); 6.341 6.342 //Use this to create processor inside entry point & other places outside 6.343 // the VMS system boundary (IE, not run in slave nor Master) 6.344 -VirtProcr * 6.345 +SlaveVP * 6.346 VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); 6.347 6.348 void 6.349 -VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ); 6.350 +VMS_ext__dissipate_procr( SlaveVP *procrToDissipate ); 6.351 6.352 void 6.353 -VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ); 6.354 +VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstPr, VMSExcp *excpData ); 6.355 6.356 void 6.357 -VMS__shutdown(); 6.358 +VMS_int__shutdown(); 6.359 6.360 void 6.361 -VMS__cleanup_at_end_of_shutdown(); 6.362 +VMS_int__cleanup_at_end_of_shutdown(); 6.363 6.364 void * 6.365 -VMS__give_sem_env_for( VirtProcr *animPr ); 6.366 +VMS_WL__give_sem_env_for( SlaveVP *animPr ); 6.367 6.368 6.369 //============== Request Related =============== 6.370 6.371 void 6.372 -VMS__suspend_procr( VirtProcr *callingPr ); 6.373 +VMS_int__suspend_procr( SlaveVP *callingPr ); 6.374 6.375 inline void 6.376 -VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr ); 6.377 +VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, SlaveVP *callingPr ); 6.378 6.379 inline void 6.380 -VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ); 6.381 +VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingPr ); 6.382 6.383 void 6.384 -VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ); 6.385 +VMS_WL__send_create_procr_req( void *semReqData, SlaveVP *reqstingPr ); 6.386 6.387 void inline 6.388 -VMS__send_dissipate_req( VirtProcr *prToDissipate ); 6.389 +VMS_WL__send_dissipate_req( SlaveVP *prToDissipate ); 6.390 6.391 inline void 6.392 -VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ); 6.393 +VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingPr ); 6.394 6.395 VMSReqst * 6.396 -VMS__take_next_request_out_of( VirtProcr *procrWithReq ); 6.397 +VMS_PI__take_next_request_out_of( SlaveVP *procrWithReq ); 6.398 6.399 inline void * 6.400 -VMS__take_sem_reqst_from( VMSReqst *req ); 6.401 +VMS_PI__take_sem_reqst_from( VMSReqst *req ); 6.402 6.403 void inline 6.404 -VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, 6.405 - ResumePrFnPtr resumePrFnPtr ); 6.406 +VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingPr, void *semEnv, 6.407 + ResumeVPFnPtr resumePrFnPtr ); 6.408 6.409 -//======================== STATS ====================== 6.410 +//======================== MEASUREMENT ====================== 6.411 +uint64 6.412 +VMS_WL__give_num_plugin_cycles(); 6.413 +uint32 6.414 +VMS_WL__give_num_plugin_animations(); 6.415 6.416 -//===== RDTSC wrapper ===== //Also runs with x86_64 code 6.417 6.418 -#define saveTimeStampCountInto(low, high) \ 6.419 - asm volatile("RDTSC; \ 6.420 - movl %%eax, %0; \ 6.421 - movl %%edx, %1;" \ 6.422 - /* outputs */ : "=m" (low), "=m" (high)\ 6.423 - /* inputs */ : \ 6.424 - /* clobber */ : "%eax", "%edx" \ 6.425 - ); 6.426 - 6.427 -#define saveLowTimeStampCountInto(low) \ 6.428 - asm volatile("RDTSC; \ 6.429 - movl %%eax, %0;" \ 6.430 - /* outputs */ : "=m" (low) \ 6.431 - /* inputs */ : \ 6.432 - /* clobber */ : "%eax", "%edx" \ 6.433 - ); 6.434 - 6.435 -//==================== 6.436 -#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ 6.437 - makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ 6.438 - _VMSMasterEnv->measHists[idx] = \ 6.439 - makeFixedBinHist( numBins, startVal, binWidth, name ); 6.440 - 6.441 - 6.442 -#define MEAS__SUB_CREATE /*turn on/off subtraction of create from plugin*/ 6.443 - 6.444 -#ifdef VPTHREAD 6.445 - 6.446 -//VPThread 6.447 -#define createHistIdx 0 6.448 -#define mutexLockHistIdx 1 6.449 -#define mutexUnlockHistIdx 2 6.450 -#define condWaitHistIdx 3 6.451 -#define condSignalHistIdx 4 6.452 - 6.453 -#define MakeTheMeasHists() \ 6.454 - _VMSMasterEnv->measHistsInfo = \ 6.455 - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 6.456 - makeAMeasHist( createHistIdx, "create", 250, 0, 100 ) \ 6.457 - makeAMeasHist( mutexLockHistIdx, "mutex_lock", 50, 0, 100 ) \ 6.458 - makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock", 50, 0, 100 ) \ 6.459 - makeAMeasHist( condWaitHistIdx, "cond_wait", 50, 0, 100 ) \ 6.460 - makeAMeasHist( condSignalHistIdx, "cond_signal", 50, 0, 100 ) 6.461 - 6.462 -#endif 6.463 - 6.464 - 6.465 -#ifdef VCILK 6.466 - 6.467 -//VCilk 6.468 -#define spawnHistIdx 0 6.469 -#define syncHistIdx 1 6.470 - 6.471 -#define MakeTheMeasHists() \ 6.472 - _VMSMasterEnv->measHistsInfo = \ 6.473 - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 6.474 - makeAMeasHist( spawnHistIdx, "Spawn", 50, 0, 200 ) \ 6.475 - makeAMeasHist( syncHistIdx, "Sync", 50, 0, 200 ) 6.476 - 6.477 - 6.478 -#endif 6.479 - 6.480 -#ifdef SSR 6.481 - 6.482 -//SSR 6.483 -#define SendFromToHistIdx 0 6.484 -#define SendOfTypeHistIdx 1 6.485 -#define ReceiveFromToHistIdx 2 6.486 -#define ReceiveOfTypeHistIdx 3 6.487 - 6.488 -#define MakeTheMeasHists() \ 6.489 - _VMSMasterEnv->measHistsInfo = \ 6.490 - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 6.491 - makeAMeasHist( SendFromToHistIdx, "SendFromTo", 50, 0, 100 ) \ 6.492 - makeAMeasHist( SendOfTypeHistIdx, "SendOfType", 50, 0, 100 ) \ 6.493 - makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \ 6.494 - makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 ) 6.495 - 6.496 -#endif 6.497 - 6.498 -//=========================================================================== 6.499 -//VPThread 6.500 - 6.501 - 6.502 -#define Meas_startCreate \ 6.503 - int32 startStamp, endStamp; \ 6.504 - saveLowTimeStampCountInto( startStamp ); \ 6.505 - 6.506 -#define Meas_endCreate \ 6.507 - saveLowTimeStampCountInto( endStamp ); \ 6.508 - addIntervalToHist( startStamp, endStamp, \ 6.509 - _VMSMasterEnv->measHists[ createHistIdx ] ); 6.510 - 6.511 -#define Meas_startMutexLock \ 6.512 - int32 startStamp, endStamp; \ 6.513 - saveLowTimeStampCountInto( startStamp ); \ 6.514 - 6.515 -#define Meas_endMutexLock \ 6.516 - saveLowTimeStampCountInto( endStamp ); \ 6.517 - addIntervalToHist( startStamp, endStamp, \ 6.518 - _VMSMasterEnv->measHists[ mutexLockHistIdx ] ); 6.519 - 6.520 -#define Meas_startMutexUnlock \ 6.521 - int32 startStamp, endStamp; \ 6.522 - saveLowTimeStampCountInto( startStamp ); \ 6.523 - 6.524 -#define Meas_endMutexUnlock \ 6.525 - saveLowTimeStampCountInto( endStamp ); \ 6.526 - addIntervalToHist( startStamp, endStamp, \ 6.527 - _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] ); 6.528 - 6.529 -#define Meas_startCondWait \ 6.530 - int32 startStamp, endStamp; \ 6.531 - saveLowTimeStampCountInto( startStamp ); \ 6.532 - 6.533 -#define Meas_endCondWait \ 6.534 - saveLowTimeStampCountInto( endStamp ); \ 6.535 - addIntervalToHist( startStamp, endStamp, \ 6.536 - _VMSMasterEnv->measHists[ condWaitHistIdx ] ); 6.537 - 6.538 -#define Meas_startCondSignal \ 6.539 - int32 startStamp, endStamp; \ 6.540 - saveLowTimeStampCountInto( startStamp ); \ 6.541 - 6.542 -#define Meas_endCondSignal \ 6.543 - saveLowTimeStampCountInto( endStamp ); \ 6.544 - addIntervalToHist( startStamp, endStamp, \ 6.545 - _VMSMasterEnv->measHists[ condSignalHistIdx ] ); 6.546 - 6.547 -//=========================================================================== 6.548 -// VCilk 6.549 -#define Meas_startSpawn \ 6.550 - int32 startStamp, endStamp; \ 6.551 - saveLowTimeStampCountInto( startStamp ); \ 6.552 - 6.553 -#define Meas_endSpawn \ 6.554 - saveLowTimeStampCountInto( endStamp ); \ 6.555 - addIntervalToHist( startStamp, endStamp, \ 6.556 - _VMSMasterEnv->measHists[ spawnHistIdx ] ); 6.557 - 6.558 -#define Meas_startSync \ 6.559 - int32 startStamp, endStamp; \ 6.560 - saveLowTimeStampCountInto( startStamp ); \ 6.561 - 6.562 -#define Meas_endSync \ 6.563 - saveLowTimeStampCountInto( endStamp ); \ 6.564 - addIntervalToHist( startStamp, endStamp, \ 6.565 - _VMSMasterEnv->measHists[ syncHistIdx ] ); 6.566 - 6.567 -//=========================================================================== 6.568 -// SSR 6.569 -#define Meas_startSendFromTo \ 6.570 - int32 startStamp, endStamp; \ 6.571 - saveLowTimeStampCountInto( startStamp ); \ 6.572 - 6.573 -#define Meas_endSendFromTo \ 6.574 - saveLowTimeStampCountInto( endStamp ); \ 6.575 - addIntervalToHist( startStamp, endStamp, \ 6.576 - _VMSMasterEnv->measHists[ SendFromToHistIdx ] ); 6.577 - 6.578 -#define Meas_startSendOfType \ 6.579 - int32 startStamp, endStamp; \ 6.580 - saveLowTimeStampCountInto( startStamp ); \ 6.581 - 6.582 -#define Meas_endSendOfType \ 6.583 - saveLowTimeStampCountInto( endStamp ); \ 6.584 - addIntervalToHist( startStamp, endStamp, \ 6.585 - _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] ); 6.586 - 6.587 -#define Meas_startReceiveFromTo \ 6.588 - int32 startStamp, endStamp; \ 6.589 - saveLowTimeStampCountInto( startStamp ); \ 6.590 - 6.591 -#define Meas_endReceiveFromTo \ 6.592 - saveLowTimeStampCountInto( endStamp ); \ 6.593 - addIntervalToHist( startStamp, endStamp, \ 6.594 - _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] ); 6.595 - 6.596 -#define Meas_startReceiveOfType \ 6.597 - int32 startStamp, endStamp; \ 6.598 - saveLowTimeStampCountInto( startStamp ); \ 6.599 - 6.600 -#define Meas_endReceiveOfType \ 6.601 - saveLowTimeStampCountInto( endStamp ); \ 6.602 - addIntervalToHist( startStamp, endStamp, \ 6.603 - _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] ); 6.604 - 6.605 -//===== 6.606 6.607 #include "ProcrContext.h" 6.608 #include "probes.h"
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/VMS_HW_specific_defs.h Sun Feb 12 01:49:33 2012 -0800 7.3 @@ -0,0 +1,53 @@ 7.4 +/* 7.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 7.6 + * Licensed under GNU General Public License version 2 7.7 + * 7.8 + * Author: seanhalle@yahoo.com 7.9 + * 7.10 + */ 7.11 + 7.12 +#ifndef _VMS_HW_SPEC_DEFS_H 7.13 +#define _VMS_HW_SPEC_DEFS_H 7.14 +#define _GNU_SOURCE 7.15 + 7.16 +//=============================== Hardware ================================== 7.17 +// This aligns the variable to a 256 byte boundary, therefore padding the gaps. 7.18 +// This is apparently not true for data on the stack. Which is I guess not an 7.19 +// issue because they are only used in the cores own coreloop 7.20 +#define CACHELINE_SIZE 256 7.21 +#define __align_to_cacheline__ __attribute__ ((aligned(CACHELINE_SIZE))) 7.22 +//aligns a pointer to cacheline. The memory area has to contain at least 7.23 +//CACHELINE_SIZE bytes more then needed 7.24 +#define __align_adress(ptr) ((void*)(((uintptr_t)(ptr))&((uintptr_t)(~0x0FF)))) 7.25 + 7.26 + //Frequency of TS counts -- have to do tests to verify 7.27 + //NOTE: TURN OFF TURBO-BOOST and SPEED-STEP else this won't be constant 7.28 +#define TSCOUNT_FREQ 3180000000 7.29 + 7.30 + 7.31 +//========================= Hardware related Constants ===================== 7.32 + //This value is the number of hardware threads in the shared memory 7.33 + // machine 7.34 +//#define NUM_CORES 8 7.35 + 7.36 + // tradeoff amortizing master fixed overhead vs imbalance potential 7.37 + // when work-stealing, can make bigger, at risk of losing cache affinity 7.38 +#define NUM_SCHED_SLOTS 3 7.39 + 7.40 +#define MIN_WORK_UNIT_CYCLES 20000 7.41 + 7.42 +#define MASTERLOCK_RETRIES 10000 7.43 + 7.44 + // stack size in virtual processors created 7.45 +#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */ 7.46 + 7.47 + // memory for VMS__malloc 7.48 +#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x40000000 /* 1G */ 7.49 + 7.50 +#define CACHE_LINE 64 7.51 +#define PAGE_SIZE 4096 7.52 + 7.53 +//=========================================================================== 7.54 + 7.55 +#endif /* _VMS_DEFS_H */ 7.56 +
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/VMS_defs.h Sun Feb 12 01:49:33 2012 -0800 8.3 @@ -0,0 +1,189 @@ 8.4 +/* 8.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 8.6 + * Licensed under GNU General Public License version 2 8.7 + * 8.8 + * Author: seanhalle@yahoo.com 8.9 + * 8.10 + */ 8.11 + 8.12 +#ifndef _VMS_DEFS_H 8.13 +#define _VMS_DEFS_H 8.14 +#define _GNU_SOURCE 8.15 + 8.16 +//=========================== VMS-wide defs =============================== 8.17 +#include "VMS_primitive_data_types.h" 8.18 + 8.19 +#define SUCCESS 0 8.20 + 8.21 + //only after macro-expansion are the defs of writePrivQ, aso looked up 8.22 + // so these defs can be at the top, and writePrivQ defined later on.. 8.23 +#define writeVMSQ writePrivQ 8.24 +#define readVMSQ readPrivQ 8.25 +#define makeVMSQ makeVMSPrivQ 8.26 +#define numInVMSQ numInPrivQ 8.27 +#define VMSQueueStruc PrivQueueStruc 8.28 + 8.29 + 8.30 +//====================== Hardware Specific Defs ============================ 8.31 +#include "VMS_HW_specific_defs.h" 8.32 + 8.33 +//========================= Debug Related Defs ============================= 8.34 +// 8.35 +//When SEQUENTIAL is defined, VMS does sequential exe in the main thread 8.36 +// It still does co-routines and all the mechanisms are the same, it just 8.37 +// has only a single thread and animates VPs one at a time 8.38 +//#define SEQUENTIAL 8.39 + 8.40 +//#define USE_WORK_STEALING 8.41 + 8.42 +//turns on the probe-instrumentation in the application -- when not 8.43 +// defined, the calls to the probe functions turn into comments 8.44 +#define STATS__ENABLE_PROBES 8.45 +//#define TURN_ON_DEBUG_PROBES 8.46 + 8.47 +//These defines turn types of bug messages on and off 8.48 +// be sure debug messages are un-commented (next block of defines) 8.49 +#define dbgAppFlow TRUE /* Top level flow of application code -- general*/ 8.50 +#define dbgProbes FALSE /* for issues inside probes themselves*/ 8.51 +#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ 8.52 +#define dbgRqstHdlr FALSE /* in request handler code*/ 8.53 + 8.54 +//Comment or un- the substitute half to turn on/off types of debug message 8.55 +#define DEBUG( bool, msg) \ 8.56 +// if( bool){ printf(msg); fflush(stdin);} 8.57 +#define DEBUG1( bool, msg, param) \ 8.58 +// if(bool){printf(msg, param); fflush(stdin);} 8.59 +#define DEBUG2( bool, msg, p1, p2) \ 8.60 +// if(bool) {printf(msg, p1, p2); fflush(stdin);} 8.61 + 8.62 +#define ERROR(msg) printf(msg); 8.63 +#define ERROR1(msg, param) printf(msg, param); 8.64 +#define ERROR2(msg, p1, p2) printf(msg, p1, p2); 8.65 + 8.66 +//====================== Measurement Related Defs ========================== 8.67 +// 8.68 +// 8.69 + //when STATS__TURN_ON_PROBES is defined allows using probes to measure 8.70 + // time intervals. The probes are macros that only compile to something 8.71 + // when STATS__TURN_ON_PROBES is defined. The probes are saved in the 8.72 + // master env -- but only when this is defined. 8.73 + //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday 8.74 +#define STATS__TURN_ON_PROBES 8.75 +//#define STATS__USE_TSC_PROBES 8.76 +#define STATS__USE_DBL_PROBES 8.77 + 8.78 +//============================= Statistics ================================== 8.79 + 8.80 + 8.81 +inline TSCount getTSCount(); 8.82 + 8.83 +//================== Turn Measurement Things on and off ==================== 8.84 + 8.85 +//#define MEAS__TIME_2011_SYS 8.86 +//define this if any MEAS__... below are 8.87 +//#define MAKE_HISTS_FOR_MEASUREMENTS 8.88 + //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and 8.89 + // compiled-in that saves the low part of the time stamp count just before 8.90 + // suspending a processor and just after resuming that processor. It is 8.91 + // saved into a field added to VirtProcr. Have to sanity-check for 8.92 + // rollover of low portion into high portion. 8.93 +//#define MEAS__TIME_STAMP_SUSP 8.94 +//#define MEAS__TIME_MASTER 8.95 +//#define MEAS__TIME_PLUGIN 8.96 +//#define MEAS__TIME_MALLOC 8.97 +//#define MEAS__TIME_MASTER_LOCK 8.98 + 8.99 + //For code that calculates normalization-offset between TSC counts of 8.100 + // different cores. 8.101 +//#define NUM_TSC_ROUND_TRIPS 10 8.102 + 8.103 + 8.104 + 8.105 +//=================== Macros to Capture Measurements ====================== 8.106 +// 8.107 +//===== RDTSC wrapper ===== 8.108 +//Also runs with x86_64 code 8.109 +#define saveTSCLowHigh(lowHighIn) \ 8.110 + asm volatile("RDTSC; \ 8.111 + movl %%eax, %0; \ 8.112 + movl %%edx, %1;" \ 8.113 + /* outputs */ : "=m" (lowHighIn.lowHigh[0]), "=m" (lowHighIn.lowHigh[1])\ 8.114 + /* inputs */ : \ 8.115 + /* clobber */ : "%eax", "%edx" \ 8.116 + ); 8.117 + 8.118 +#define saveTimeStampCountInto(low, high) \ 8.119 + asm volatile("RDTSC; \ 8.120 + movl %%eax, %0; \ 8.121 + movl %%edx, %1;" \ 8.122 + /* outputs */ : "=m" (low), "=m" (high)\ 8.123 + /* inputs */ : \ 8.124 + /* clobber */ : "%eax", "%edx" \ 8.125 + ); 8.126 + 8.127 +#define saveLowTimeStampCountInto(low) \ 8.128 + asm volatile("RDTSC; \ 8.129 + movl %%eax, %0;" \ 8.130 + /* outputs */ : "=m" (low) \ 8.131 + /* inputs */ : \ 8.132 + /* clobber */ : "%eax", "%edx" \ 8.133 + ); 8.134 + 8.135 + 8.136 +//================== Macros define types of meas want ===================== 8.137 +#ifdef MEAS__TIME_PLUGIN 8.138 + 8.139 +#define Meas_startReqHdlr \ 8.140 + int32 startStamp1, endStamp1; \ 8.141 + saveLowTimeStampCountInto( startStamp1 ); 8.142 + 8.143 +#define Meas_endReqHdlr \ 8.144 + saveLowTimeStampCountInto( endStamp1 ); \ 8.145 + addIntervalToHist( startStamp1, endStamp1, \ 8.146 + _VMSMasterEnv->reqHdlrLowTimeHist ); \ 8.147 + addIntervalToHist( startStamp1, endStamp1, \ 8.148 + _VMSMasterEnv->reqHdlrHighTimeHist ); 8.149 + 8.150 +#elif defined MEAS__TIME_2011_SYS 8.151 +#define Meas_startMasterLoop \ 8.152 + TSCountLowHigh startStamp1, endStamp1; \ 8.153 + saveTSCLowHigh( endStamp1 ); \ 8.154 + _VMSMasterEnv->cyclesTillStartMasterLoop = \ 8.155 + endStamp1.longVal - masterVP->startSusp.longVal; 8.156 + 8.157 +#define Meas_startReqHdlr \ 8.158 + saveTSCLowHigh( startStamp1 ); \ 8.159 + _VMSMasterEnv->startReqHdlr.longVal = startStamp1.longVal; 8.160 + 8.161 +#define Meas_endReqHdlr 8.162 + 8.163 +#define Meas_endMasterLoop \ 8.164 + saveTSCLowHigh( startStamp1 ); \ 8.165 + _VMSMasterEnv->endMasterLoop.longVal = startStamp1.longVal; 8.166 + 8.167 +#else 8.168 +#define Meas_startMasterLoop 8.169 +#define Meas_startReqHdlr 8.170 +#define Meas_endReqHdlr 8.171 +#define Meas_endMasterLoop 8.172 +#endif 8.173 + 8.174 +//====================== Histogram Macros -- Create ======================== 8.175 +// 8.176 +// 8.177 +#ifdef MAKE_HISTS_FOR_MEASUREMENTS 8.178 +#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ 8.179 + makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ 8.180 + _VMSMasterEnv->measHists[idx] = \ 8.181 + makeFixedBinHist( numBins, startVal, binWidth, name ); 8.182 +#else 8.183 +#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) 8.184 +#endif 8.185 + 8.186 + 8.187 +#define MEAS__SUB_CREATE /*turn on/off subtraction of create from plugin*/ 8.188 + 8.189 +#include "VMS_lang_specific_defs.h" 8.190 + 8.191 +#endif /* _VMS_DEFS_H */ 8.192 +
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/VMS_lang_specific_defs.h Sun Feb 12 01:49:33 2012 -0800 9.3 @@ -0,0 +1,180 @@ 9.4 +/* 9.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 9.6 + * Licensed under GNU General Public License version 2 9.7 + * 9.8 + * Author: seanhalle@yahoo.com 9.9 + * 9.10 + */ 9.11 + 9.12 +#ifndef _VMS_LANG_SPEC_DEFS_H 9.13 +#define _VMS_LANG_SPEC_DEFS_H 9.14 + 9.15 + 9.16 + 9.17 +//=================== Language-specific Measurement Stuff =================== 9.18 +// 9.19 +//TODO: Figure out way to move these into language dir.. 9.20 +// wrap them in #ifdef MEAS__... 9.21 +// 9.22 + 9.23 + 9.24 +//=========================================================================== 9.25 +//VPThread 9.26 +#ifdef VTHREAD 9.27 + 9.28 +#define createHistIdx 1 //note: starts at 1 9.29 +#define mutexLockHistIdx 2 9.30 +#define mutexUnlockHistIdx 3 9.31 +#define condWaitHistIdx 4 9.32 +#define condSignalHistIdx 5 9.33 + 9.34 +#define MakeTheMeasHists() \ 9.35 + _VMSMasterEnv->measHistsInfo = \ 9.36 + makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 9.37 + makeAMeasHist( createHistIdx, "create", 250, 0, 100 ) \ 9.38 + makeAMeasHist( mutexLockHistIdx, "mutex_lock", 50, 0, 100 ) \ 9.39 + makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock", 50, 0, 100 ) \ 9.40 + makeAMeasHist( condWaitHistIdx, "cond_wait", 50, 0, 100 ) \ 9.41 + makeAMeasHist( condSignalHistIdx, "cond_signal", 50, 0, 100 ) 9.42 + 9.43 + 9.44 +#define Meas_startCreate \ 9.45 + int32 startStamp, endStamp; \ 9.46 + saveLowTimeStampCountInto( startStamp ); \ 9.47 + 9.48 +#define Meas_endCreate \ 9.49 + saveLowTimeStampCountInto( endStamp ); \ 9.50 + addIntervalToHist( startStamp, endStamp, \ 9.51 + _VMSMasterEnv->measHists[ createHistIdx ] ); 9.52 + 9.53 +#define Meas_startMutexLock \ 9.54 + int32 startStamp, endStamp; \ 9.55 + saveLowTimeStampCountInto( startStamp ); \ 9.56 + 9.57 +#define Meas_endMutexLock \ 9.58 + saveLowTimeStampCountInto( endStamp ); \ 9.59 + addIntervalToHist( startStamp, endStamp, \ 9.60 + _VMSMasterEnv->measHists[ mutexLockHistIdx ] ); 9.61 + 9.62 +#define Meas_startMutexUnlock \ 9.63 + int32 startStamp, endStamp; \ 9.64 + saveLowTimeStampCountInto( startStamp ); \ 9.65 + 9.66 +#define Meas_endMutexUnlock \ 9.67 + saveLowTimeStampCountInto( endStamp ); \ 9.68 + addIntervalToHist( startStamp, endStamp, \ 9.69 + _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] ); 9.70 + 9.71 +#define Meas_startCondWait \ 9.72 + int32 startStamp, endStamp; \ 9.73 + saveLowTimeStampCountInto( startStamp ); \ 9.74 + 9.75 +#define Meas_endCondWait \ 9.76 + saveLowTimeStampCountInto( endStamp ); \ 9.77 + addIntervalToHist( startStamp, endStamp, \ 9.78 + _VMSMasterEnv->measHists[ condWaitHistIdx ] ); 9.79 + 9.80 +#define Meas_startCondSignal \ 9.81 + int32 startStamp, endStamp; \ 9.82 + saveLowTimeStampCountInto( startStamp ); \ 9.83 + 9.84 +#define Meas_endCondSignal \ 9.85 + saveLowTimeStampCountInto( endStamp ); \ 9.86 + addIntervalToHist( startStamp, endStamp, \ 9.87 + _VMSMasterEnv->measHists[ condSignalHistIdx ] ); 9.88 + 9.89 +#endif 9.90 + 9.91 + 9.92 + 9.93 +//=========================================================================== 9.94 +//VCilk 9.95 + 9.96 +#ifdef VCILK 9.97 + 9.98 +#define spawnHistIdx 1 //note: starts at 1 9.99 +#define syncHistIdx 2 9.100 + 9.101 +#define MakeTheMeasHists() \ 9.102 + _VMSMasterEnv->measHistsInfo = \ 9.103 + makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 9.104 + makeAMeasHist( spawnHistIdx, "Spawn", 50, 0, 200 ) \ 9.105 + makeAMeasHist( syncHistIdx, "Sync", 50, 0, 200 ) 9.106 + 9.107 + 9.108 +#define Meas_startSpawn \ 9.109 + int32 startStamp, endStamp; \ 9.110 + saveLowTimeStampCountInto( startStamp ); \ 9.111 + 9.112 +#define Meas_endSpawn \ 9.113 + saveLowTimeStampCountInto( endStamp ); \ 9.114 + addIntervalToHist( startStamp, endStamp, \ 9.115 + _VMSMasterEnv->measHists[ spawnHistIdx ] ); 9.116 + 9.117 +#define Meas_startSync \ 9.118 + int32 startStamp, endStamp; \ 9.119 + saveLowTimeStampCountInto( startStamp ); \ 9.120 + 9.121 +#define Meas_endSync \ 9.122 + saveLowTimeStampCountInto( endStamp ); \ 9.123 + addIntervalToHist( startStamp, endStamp, \ 9.124 + _VMSMasterEnv->measHists[ syncHistIdx ] ); 9.125 +#endif 9.126 + 9.127 +//=========================================================================== 9.128 +// SSR 9.129 + 9.130 +#ifdef SSR 9.131 + 9.132 +#define SendFromToHistIdx 1 //note: starts at 1 9.133 +#define SendOfTypeHistIdx 2 9.134 +#define ReceiveFromToHistIdx 3 9.135 +#define ReceiveOfTypeHistIdx 4 9.136 + 9.137 +#define MakeTheMeasHists() \ 9.138 + _VMSMasterEnv->measHistsInfo = \ 9.139 + makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 9.140 + makeAMeasHist( SendFromToHistIdx, "SendFromTo", 50, 0, 100 ) \ 9.141 + makeAMeasHist( SendOfTypeHistIdx, "SendOfType", 50, 0, 100 ) \ 9.142 + makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \ 9.143 + makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 ) 9.144 + 9.145 +#define Meas_startSendFromTo \ 9.146 + int32 startStamp, endStamp; \ 9.147 + saveLowTimeStampCountInto( startStamp ); \ 9.148 + 9.149 +#define Meas_endSendFromTo \ 9.150 + saveLowTimeStampCountInto( endStamp ); \ 9.151 + addIntervalToHist( startStamp, endStamp, \ 9.152 + _VMSMasterEnv->measHists[ SendFromToHistIdx ] ); 9.153 + 9.154 +#define Meas_startSendOfType \ 9.155 + int32 startStamp, endStamp; \ 9.156 + saveLowTimeStampCountInto( startStamp ); \ 9.157 + 9.158 +#define Meas_endSendOfType \ 9.159 + saveLowTimeStampCountInto( endStamp ); \ 9.160 + addIntervalToHist( startStamp, endStamp, \ 9.161 + _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] ); 9.162 + 9.163 +#define Meas_startReceiveFromTo \ 9.164 + int32 startStamp, endStamp; \ 9.165 + saveLowTimeStampCountInto( startStamp ); \ 9.166 + 9.167 +#define Meas_endReceiveFromTo \ 9.168 + saveLowTimeStampCountInto( endStamp ); \ 9.169 + addIntervalToHist( startStamp, endStamp, \ 9.170 + _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] ); 9.171 + 9.172 +#define Meas_startReceiveOfType \ 9.173 + int32 startStamp, endStamp; \ 9.174 + saveLowTimeStampCountInto( startStamp ); \ 9.175 + 9.176 +#define Meas_endReceiveOfType \ 9.177 + saveLowTimeStampCountInto( endStamp ); \ 9.178 + addIntervalToHist( startStamp, endStamp, \ 9.179 + _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] ); 9.180 +#endif /* SSR */ 9.181 + 9.182 +#endif /* _VMS_DEFS_H */ 9.183 +
10.1 --- a/__brch__HW__generic_x86_64_MC Sat Feb 11 21:43:43 2012 -0800 10.2 +++ b/__brch__HW__generic_x86_64_MC Sun Feb 12 01:49:33 2012 -0800 10.3 @@ -1,10 +1,10 @@ 10.4 A HW branch for: 10.5 10.6 -generic MultiCore x86 64bit instructions set 10.7 +generic MultiCore machines with x86 64bit instruction set 10.8 10.9 This branch shouldn't be used, except as a lazy fall-back. Instead, try out other branches tuned to specific hardware platforms to find the one that performs best on your machine. Use the "exe_time_vs_task_size" project to generate curves of overhead, and compare result from various branches. 10.10 10.11 -Note, if this branch is used, then vms_defs.h file has to be updated with the number of cores in your machine 10.12 +Note, if this branch is used, then NUM_CORES in VMS_HW_specific_defs.h file has to be updated with the number of cores in your machine 10.13 10.14 ======== Background on branch naming ========= 10.15
11.1 --- a/probes.c Sat Feb 11 21:43:43 2012 -0800 11.2 +++ b/probes.c Sun Feb 12 01:49:33 2012 -0800 11.3 @@ -9,21 +9,6 @@ 11.4 #include <sys/time.h> 11.5 11.6 #include "VMS.h" 11.7 -#include "Queue_impl/BlockingQueue.h" 11.8 -#include "Histogram/Histogram.h" 11.9 - 11.10 - 11.11 -//================================ STATS ==================================== 11.12 - 11.13 -inline TSCount getTSCount() 11.14 - { unsigned int low, high; 11.15 - TSCount out; 11.16 - 11.17 - saveTimeStampCountInto( low, high ); 11.18 - out = high; 11.19 - out = (out << 32) + low; 11.20 - return out; 11.21 - } 11.22 11.23 11.24 11.25 @@ -108,14 +93,14 @@ 11.26 * 11.27 */ 11.28 IntervalProbe * 11.29 -create_generic_probe( char *nameStr, VirtProcr *animPr ) 11.30 +create_generic_probe( char *nameStr, SlaveVP *animPr ) 11.31 { 11.32 VMSSemReq reqData; 11.33 11.34 reqData.reqType = createProbe; 11.35 reqData.nameStr = nameStr; 11.36 11.37 - VMS__send_VMSSem_request( &reqData, animPr ); 11.38 + VMS_WL__send_VMSSem_request( &reqData, animPr ); 11.39 11.40 return animPr->dataRetFromReq; 11.41 } 11.42 @@ -146,13 +131,13 @@ 11.43 void 11.44 VMS_impl__free_probe( IntervalProbe *probe ) 11.45 { if( probe->hist != NULL ) freeDblHist( probe->hist ); 11.46 - if( probe->nameStr != NULL) VMS__free( probe->nameStr ); 11.47 - VMS__free( probe ); 11.48 + if( probe->nameStr != NULL) VMS_int__free( probe->nameStr ); 11.49 + VMS_int__free( probe ); 11.50 } 11.51 11.52 11.53 int32 11.54 -VMS_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr) 11.55 +VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animPr) 11.56 { IntervalProbe *newProbe; 11.57 struct timeval *startStamp; 11.58 float64 startSecs; 11.59 @@ -190,7 +175,7 @@ 11.60 } 11.61 11.62 int32 11.63 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ) 11.64 +VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr ) 11.65 { IntervalProbe *newProbe; 11.66 11.67 newProbe = create_generic_probe( nameStr, animPr ); 11.68 @@ -200,7 +185,7 @@ 11.69 11.70 int32 11.71 VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 11.72 - float64 binWidth, char *nameStr, VirtProcr *animPr ) 11.73 + float64 binWidth, char *nameStr, SlaveVP *animPr ) 11.74 { IntervalProbe *newProbe; 11.75 DblHist *hist; 11.76 11.77 @@ -212,7 +197,7 @@ 11.78 } 11.79 11.80 void 11.81 -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ) 11.82 +VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr ) 11.83 { IntervalProbe *probe; 11.84 11.85 //TODO: fix this To be in Master -- race condition 11.86 @@ -222,7 +207,7 @@ 11.87 } 11.88 11.89 IntervalProbe * 11.90 -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ) 11.91 +VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr ) 11.92 { 11.93 //TODO: fix this To be in Master -- race condition 11.94 return getValueFromTable( probeName, _VMSMasterEnv->probeNameHashTbl ); 11.95 @@ -233,7 +218,7 @@ 11.96 * work locally, in the anim Pr 11.97 */ 11.98 void 11.99 -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animatingPr ) 11.100 +VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animatingPr ) 11.101 { IntervalProbe *probe; 11.102 11.103 probe = _VMSMasterEnv->intervalProbes[ probeID ];
12.1 --- a/probes.h Sat Feb 11 21:43:43 2012 -0800 12.2 +++ b/probes.h Sun Feb 12 01:49:33 2012 -0800 12.3 @@ -14,15 +14,10 @@ 12.4 12.5 #include <sys/time.h> 12.6 12.7 +/*Note on order of include files: 12.8 + * This file relies on #defines that appear in other files.. 12.9 + */ 12.10 12.11 - //when STATS__TURN_ON_PROBES is defined allows using probes to measure 12.12 - // time intervals. The probes are macros that only compile to something 12.13 - // when STATS__TURN_ON_PROBES is defined. The probes are saved in the 12.14 - // master env -- but only when this is defined. 12.15 - //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday 12.16 -#define STATS__TURN_ON_PROBES 12.17 -//#define STATS__USE_TSC_PROBES 12.18 -#define STATS__USE_DBL_PROBES 12.19 12.20 //typedef struct _IntervalProbe IntervalProbe; //in VMS.h 12.21 12.22 @@ -50,21 +45,13 @@ 12.23 }; 12.24 12.25 12.26 -//============================= Statistics ================================== 12.27 - 12.28 - //Frequency of TS counts 12.29 - //TODO: change freq for each machine 12.30 -#define TSCOUNT_FREQ 3180000000 12.31 - 12.32 -inline TSCount getTSCount(); 12.33 - 12.34 12.35 //======================== Probes ============================= 12.36 // 12.37 // Use macros to allow turning probes off with a #define switch 12.38 #ifdef STATS__ENABLE_PROBES 12.39 int32 12.40 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 12.41 +VMS_impl__record_time_point_into_new_probe( char *nameStr,SlaveVP *animPr); 12.42 #define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 12.43 VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) 12.44 12.45 @@ -75,14 +62,14 @@ 12.46 12.47 12.48 int32 12.49 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 12.50 +VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr ); 12.51 #define VMS__create_single_interval_probe( nameStr, animPr ) \ 12.52 VMS_impl__create_single_interval_probe( nameStr, animPr ) 12.53 12.54 12.55 int32 12.56 VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 12.57 - float64 binWidth, char *nameStr, VirtProcr *animPr ); 12.58 + float64 binWidth, char *nameStr, SlaveVP *animPr ); 12.59 #define VMS__create_histogram_probe( numBins, startValue, \ 12.60 binWidth, nameStr, animPr ) \ 12.61 VMS_impl__create_histogram_probe( numBins, startValue, \ 12.62 @@ -93,17 +80,17 @@ 12.63 VMS_impl__free_probe( probe ) 12.64 12.65 void 12.66 -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 12.67 +VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr ); 12.68 #define VMS__index_probe_by_its_name( probeID, animPr ) \ 12.69 VMS_impl__index_probe_by_its_name( probeID, animPr ) 12.70 12.71 IntervalProbe * 12.72 -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 12.73 +VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr ); 12.74 #define VMS__get_probe_by_name( probeID, animPr ) \ 12.75 VMS_impl__get_probe_by_name( probeName, animPr ) 12.76 12.77 void 12.78 -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 12.79 +VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animPr ); 12.80 #define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 12.81 VMS_impl__record_sched_choice_into_probe( probeID, animPr ) 12.82 12.83 @@ -130,7 +117,7 @@ 12.84 12.85 #else 12.86 int32 12.87 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 12.88 +VMS_impl__record_time_point_into_new_probe( char *nameStr,SlaveVP *animPr); 12.89 #define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 12.90 0 /* do nothing */ 12.91 12.92 @@ -141,30 +128,30 @@ 12.93 12.94 12.95 int32 12.96 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 12.97 +VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr ); 12.98 #define VMS__create_single_interval_probe( nameStr, animPr ) \ 12.99 0 /* do nothing */ 12.100 12.101 12.102 int32 12.103 VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 12.104 - float64 binWidth, char *nameStr, VirtProcr *animPr ); 12.105 + float64 binWidth, char *nameStr, SlaveVP *animPr ); 12.106 #define VMS__create_histogram_probe( numBins, startValue, \ 12.107 binWidth, nameStr, animPr ) \ 12.108 0 /* do nothing */ 12.109 12.110 void 12.111 -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 12.112 +VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr ); 12.113 #define VMS__index_probe_by_its_name( probeID, animPr ) \ 12.114 /* do nothing */ 12.115 12.116 IntervalProbe * 12.117 -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 12.118 +VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr ); 12.119 #define VMS__get_probe_by_name( probeID, animPr ) \ 12.120 NULL /* do nothing */ 12.121 12.122 void 12.123 -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 12.124 +VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animPr ); 12.125 #define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 12.126 /* do nothing */ 12.127
13.1 --- a/vmalloc.c Sat Feb 11 21:43:43 2012 -0800 13.2 +++ b/vmalloc.c Sun Feb 12 01:49:33 2012 -0800 13.3 @@ -13,7 +13,7 @@ 13.4 #include <stdio.h> 13.5 13.6 #include "VMS.h" 13.7 -#include "Histogram/Histogram.h" 13.8 +#include "../../C_Libraries/Histogram/Histogram.h" 13.9 13.10 /*Helper function 13.11 *Insert a newly generated free chunk into the first spot on the free list. 13.12 @@ -45,9 +45,8 @@ 13.13 *Shave off the extra and make it into a new free-list element, hook it in 13.14 * then return the address of the found element plus size of prolog. 13.15 * 13.16 - *Will find a 13.17 */ 13.18 -void *VMS__malloc( size_t sizeRequested ) 13.19 +void *VMS_int__malloc( size_t sizeRequested ) 13.20 { MallocProlog *foundElem = NULL, *currElem, *newElem; 13.21 ssize_t amountExtra, sizeConsumed,sizeOfFound; 13.22 uint32 foundElemIsTopOfHeap; 13.23 @@ -139,7 +138,7 @@ 13.24 * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk 13.25 * before the aligned chunk. 13.26 */ 13.27 -void *VMS__malloc_aligned( size_t sizeRequested ) 13.28 +void *VMS_int__malloc_aligned( size_t sizeRequested ) 13.29 { MallocProlog *foundElem = NULL, *currElem, *newElem; 13.30 ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; 13.31 uint32 foundElemIsTopOfHeap; 13.32 @@ -266,7 +265,7 @@ 13.33 * add this one to free-list. 13.34 */ 13.35 void 13.36 -VMS__free( void *ptrToFree ) 13.37 +VMS_int__free( void *ptrToFree ) 13.38 { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; 13.39 size_t sizeOfElem; 13.40 uint32 lowerExistsAndIsFree, higherExistsAndIsFree;
14.1 --- a/vmalloc.h Sat Feb 11 21:43:43 2012 -0800 14.2 +++ b/vmalloc.h Sun Feb 12 01:49:33 2012 -0800 14.3 @@ -33,13 +33,42 @@ 14.4 FreeListHead; 14.5 14.6 void * 14.7 -VMS__malloc( size_t sizeRequested ); 14.8 +VMS_int__malloc( size_t sizeRequested ); 14.9 14.10 void * 14.11 -VMS__malloc_aligned( size_t sizeRequested ); 14.12 +VMS_int__malloc_aligned( size_t sizeRequested ); 14.13 14.14 void 14.15 -VMS__free( void *ptrToFree ); 14.16 +VMS_int__free( void *ptrToFree ); 14.17 + 14.18 +#define VMS_PI__malloc VMS_int__malloc 14.19 +#define VMS_PI__malloc_aligned VMS_int__malloc_aligned 14.20 +#define VMS_PI__free VMS_int__free 14.21 +/* For now, the PI is protected by master lock, so int malloc fine 14.22 +void * 14.23 +VMS_PI__malloc( size_t sizeRequested ); 14.24 + 14.25 +void * 14.26 +VMS_PI__malloc_aligned( size_t sizeRequested ); 14.27 + 14.28 +void 14.29 +VMS_PI__free( void *ptrToFree ); 14.30 +*/ 14.31 + 14.32 +//TODO: protect WL malloc from concurrency!! shared freelist can be corrupted 14.33 +#define VMS_WL__malloc VMS_int__malloc 14.34 +#define VMS_WL__malloc_aligned VMS_int__malloc_aligned 14.35 +#define VMS_WL__free VMS_int__free 14.36 +/* 14.37 +void * 14.38 +VMS_WL__malloc( size_t sizeRequested ); 14.39 + 14.40 +void * 14.41 +VMS_WL__malloc_aligned( size_t sizeRequested ); 14.42 + 14.43 +void 14.44 +VMS_WL__free( void *ptrToFree ); 14.45 +*/ 14.46 14.47 /*Allocates memory from the external system -- higher overhead 14.48 */
15.1 --- a/vutilities.c Sat Feb 11 21:43:43 2012 -0800 15.2 +++ b/vutilities.c Sun Feb 12 01:49:33 2012 -0800 15.3 @@ -14,10 +14,10 @@ 15.4 15.5 15.6 inline char * 15.7 -VMS__strDup( char *str ) 15.8 +VMS_int__strDup( char *str ) 15.9 { char *retStr; 15.10 15.11 - retStr = VMS__malloc( strlen(str) + 1 ); 15.12 + retStr = VMS_int__malloc( strlen(str) + 1 ); 15.13 if( str == NULL ) return str; 15.14 strcpy( retStr, str ); 15.15
