Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 178:c1784868dcea
testing hgeol -- see if it fixes line-ending issues -- commit line endings
| author | Me@portablequad |
|---|---|
| date | Wed, 04 Jan 2012 16:10:11 -0800 |
| parents | ad8213a8e916 |
| children | 0cadabf64cfa |
| files | .hgeol CoreLoop.c MasterLoop.c ProcrContext.h VMS.h VMS_primitive_data_types.h probes.h vmalloc.c vmalloc.h vutilities.c vutilities.h |
| diffstat | 11 files changed, 2060 insertions(+), 2048 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/.hgeol Wed Jan 04 16:10:11 2012 -0800 1.3 @@ -0,0 +1,12 @@ 1.4 + 1.5 +[patterns] 1.6 +**.py = native 1.7 +**.txt = native 1.8 +**.c = native 1.9 +**.h = native 1.10 +**.cpp = native 1.11 +**.java = native 1.12 +**.sh = native 1.13 +**.pl = native 1.14 +**.jpg = bin 1.15 +**.gif = bin
2.1 --- a/CoreLoop.c Thu Oct 06 16:24:17 2011 +0200 2.2 +++ b/CoreLoop.c Wed Jan 04 16:10:11 2012 -0800 2.3 @@ -1,215 +1,215 @@ 2.4 -/* 2.5 - * Copyright 2010 OpenSourceStewardshipFoundation 2.6 - * 2.7 - * Licensed under BSD 2.8 - */ 2.9 - 2.10 - 2.11 -#include "VMS.h" 2.12 -#include "Queue_impl/BlockingQueue.h" 2.13 -#include "ProcrContext.h" 2.14 - 2.15 -#include <stdlib.h> 2.16 -#include <stdio.h> 2.17 -#include <time.h> 2.18 - 2.19 -#include <pthread.h> 2.20 -#include <sched.h> 2.21 - 2.22 -void *terminateCoreLoop(VirtProcr *currPr); 2.23 - 2.24 -/*This is the loop that runs in the OS Thread pinned to each core 2.25 - *Get virt procr from queue, 2.26 - * save state of current animator, then load in state of virt procr, using 2.27 - * jmp instr to switch the program-counter state -- making the virt procr 2.28 - * the new animator. 2.29 - *At some point, the virt procr will suspend itself by saving out its 2.30 - * animator state (stack ptr, frame ptr, program counter) and switching 2.31 - * back to the OS Thread's animator state, which means restoring the 2.32 - * stack and frame and jumping to the core loop start point. 2.33 - *This cycle then repeats, until a special shutdown virtual processor is 2.34 - * animated, which jumps to the end point at the bottom of core loop. 2.35 - */ 2.36 -void * 2.37 -coreLoop( void *paramsIn ) 2.38 - { 2.39 - ThdParams *coreLoopThdParams; 2.40 - int thisCoresIdx; 2.41 - VirtProcr *currPr; 2.42 - VMSQueueStruc *readyToAnimateQ; 2.43 - cpu_set_t coreMask; //has 1 in bit positions of allowed cores 2.44 - int errorCode; 2.45 - 2.46 - //work-stealing struc on stack to prevent false-sharing in cache-line 2.47 - volatile GateStruc gate; 2.48 - //preGateProgress, waitProgress, exitProgress, gateClosed; 2.49 - 2.50 - 2.51 - coreLoopThdParams = (ThdParams *)paramsIn; 2.52 - thisCoresIdx = coreLoopThdParams->coreNum; 2.53 - 2.54 - gate.gateClosed = FALSE; 2.55 - gate.preGateProgress = 0; 2.56 - gate.waitProgress = 0; 2.57 - gate.exitProgress = 0; 2.58 - _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup 2.59 - 2.60 - //wait until signalled that setup is complete 2.61 - pthread_mutex_lock( &suspendLock ); 2.62 - while( !(_VMSMasterEnv->setupComplete) ) 2.63 - { 2.64 - pthread_cond_wait( &suspend_cond, 2.65 - &suspendLock ); 2.66 - } 2.67 - pthread_mutex_unlock( &suspendLock ); 2.68 - 2.69 - //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); 2.70 - 2.71 - //set thread affinity 2.72 - //Linux requires pinning thd to core inside thread-function 2.73 - //Designate a core by a 1 in bit-position corresponding to the core 2.74 - CPU_ZERO(&coreMask); 2.75 - CPU_SET(coreLoopThdParams->coreNum,&coreMask); 2.76 - //coreMask = 1L << coreLoopThdParams->coreNum; 2.77 - 2.78 - pthread_t selfThd = pthread_self(); 2.79 - errorCode = 2.80 - pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); 2.81 - 2.82 - if(errorCode){ printf("\nset affinity failure\n"); exit(0); } 2.83 - 2.84 - 2.85 - //Save the return address in the SwitchVP function 2.86 - saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt)); 2.87 - 2.88 - 2.89 - while(1){ 2.90 - 2.91 - //Get virtual processor from queue 2.92 - //The Q must be a global, static volatile var, so not kept in reg, 2.93 - // which forces reloading the pointer after each jmp to this point 2.94 - readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 2.95 - 2.96 - #ifdef USE_WORK_STEALING 2.97 - //Alg for work-stealing designed to make common case fast. Comment 2.98 - // in stealer code explains. 2.99 - gate.preGateProgress++; 2.100 - if( gate.gateClosed ) 2.101 - { //now, set coreloop's progress, so stealer can see that core loop 2.102 - // has made it into the waiting area. 2.103 - gate.waitProgress = gate.preGateProgress; 2.104 - while( gate.gateClosed ) /*busy wait*/; 2.105 - } 2.106 - 2.107 - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 2.108 - 2.109 - //Set the coreloop's progress, so stealer can see it has made it out 2.110 - // of the protected area 2.111 - gate.exitProgress = gate.preGateProgress; 2.112 - #else 2.113 - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 2.114 - #endif 2.115 - 2.116 - if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 2.117 - else 2.118 - { 2.119 - //============================= MEASUREMENT STUFF ===================== 2.120 - #ifdef MEAS__TIME_MASTER_LOCK 2.121 - int32 startStamp, endStamp; 2.122 - saveLowTimeStampCountInto( startStamp ); 2.123 - #endif 2.124 - //===================================================================== 2.125 - int tries = 0; int gotLock = 0; 2.126 - while( currPr == NULL ) //if queue was empty, enter get masterLock loop 2.127 - { //queue was empty, so get master lock 2.128 - 2.129 - gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), 2.130 - UNLOCKED, LOCKED ); 2.131 - if( gotLock ) 2.132 - { //run own MasterVP -- jmps to coreLoops startPt when done 2.133 - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 2.134 - if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 2.135 - { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); 2.136 - pthread_yield(); 2.137 - } 2.138 - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 2.139 - break; //end while -- have a VP to animate now 2.140 - } 2.141 - 2.142 - tries++; //if too many, means master on other core taking too long 2.143 - if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } 2.144 - } 2.145 - //============================= MEASUREMENT STUFF ===================== 2.146 - #ifdef MEAS__TIME_MASTER_LOCK 2.147 - saveLowTimeStampCountInto( endStamp ); 2.148 - addIntervalToHist( startStamp, endStamp, 2.149 - _VMSMasterEnv->masterLockLowTimeHist ); 2.150 - addIntervalToHist( startStamp, endStamp, 2.151 - _VMSMasterEnv->masterLockHighTimeHist ); 2.152 - #endif 2.153 - //===================================================================== 2.154 - 2.155 - } 2.156 - 2.157 - 2.158 - switchToVP(currPr); //The VPs return in here 2.159 - flushRegisters(); 2.160 - }//CoreLoop 2.161 - } 2.162 - 2.163 - 2.164 -void * 2.165 -terminateCoreLoop(VirtProcr *currPr){ 2.166 - //first free shutdown VP that jumped here -- it first restores the 2.167 - // coreloop's stack, so addr of currPr in stack frame is still correct 2.168 - VMS__dissipate_procr( currPr ); 2.169 - pthread_exit( NULL ); 2.170 -} 2.171 - 2.172 - 2.173 - 2.174 -#ifdef SEQUENTIAL 2.175 - 2.176 -//=========================================================================== 2.177 -/*This sequential version is exact same as threaded, except doesn't do the 2.178 - * pin-threads part, nor the wait until setup complete part. 2.179 - */ 2.180 -void * 2.181 -coreLoop_Seq( void *paramsIn ) 2.182 - { 2.183 - VirtProcr *currPr; 2.184 - VMSQueueStruc *readyToAnimateQ; 2.185 - 2.186 - ThdParams *coreLoopThdParams; 2.187 - int thisCoresIdx; 2.188 - 2.189 - coreLoopThdParams = (ThdParams *)paramsIn; 2.190 -// thisCoresIdx = coreLoopThdParams->coreNum; 2.191 - thisCoresIdx = 0; 2.192 - 2.193 - //Save the return address in the SwitchVP function 2.194 - saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt)); 2.195 - 2.196 - 2.197 - while(1){ 2.198 - //Get virtual processor from queue 2.199 - //_VMSWorkQ must be a global, static volatile var, so not kept in reg, 2.200 - // which forces reloading the pointer after each jmp to this point 2.201 - readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 2.202 - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 2.203 - if( currPr == NULL ) 2.204 - { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 2.205 - { printf("too many back to back MasterVP\n"); exit(1); } 2.206 - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 2.207 - 2.208 - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 2.209 - } 2.210 - else 2.211 - _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 2.212 - 2.213 - 2.214 - switchToVP( currPr ); 2.215 - flushRegisters(); 2.216 - } 2.217 - } 2.218 -#endif 2.219 +/* 2.220 + * Copyright 2010 OpenSourceStewardshipFoundation 2.221 + * 2.222 + * Licensed under BSD 2.223 + */ 2.224 + 2.225 + 2.226 +#include "VMS.h" 2.227 +#include "Queue_impl/BlockingQueue.h" 2.228 +#include "ProcrContext.h" 2.229 + 2.230 +#include <stdlib.h> 2.231 +#include <stdio.h> 2.232 +#include <time.h> 2.233 + 2.234 +#include <pthread.h> 2.235 +#include <sched.h> 2.236 + 2.237 +void *terminateCoreLoop(VirtProcr *currPr); 2.238 + 2.239 +/*This is the loop that runs in the OS Thread pinned to each core 2.240 + *Get virt procr from queue, 2.241 + * save state of current animator, then load in state of virt procr, using 2.242 + * jmp instr to switch the program-counter state -- making the virt procr 2.243 + * the new animator. 2.244 + *At some point, the virt procr will suspend itself by saving out its 2.245 + * animator state (stack ptr, frame ptr, program counter) and switching 2.246 + * back to the OS Thread's animator state, which means restoring the 2.247 + * stack and frame and jumping to the core loop start point. 2.248 + *This cycle then repeats, until a special shutdown virtual processor is 2.249 + * animated, which jumps to the end point at the bottom of core loop. 2.250 + */ 2.251 +void * 2.252 +coreLoop( void *paramsIn ) 2.253 + { 2.254 + ThdParams *coreLoopThdParams; 2.255 + int thisCoresIdx; 2.256 + VirtProcr *currPr; 2.257 + VMSQueueStruc *readyToAnimateQ; 2.258 + cpu_set_t coreMask; //has 1 in bit positions of allowed cores 2.259 + int errorCode; 2.260 + 2.261 + //work-stealing struc on stack to prevent false-sharing in cache-line 2.262 + volatile GateStruc gate; 2.263 + //preGateProgress, waitProgress, exitProgress, gateClosed; 2.264 + 2.265 + 2.266 + coreLoopThdParams = (ThdParams *)paramsIn; 2.267 + thisCoresIdx = coreLoopThdParams->coreNum; 2.268 + 2.269 + gate.gateClosed = FALSE; 2.270 + gate.preGateProgress = 0; 2.271 + gate.waitProgress = 0; 2.272 + gate.exitProgress = 0; 2.273 + _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup 2.274 + 2.275 + //wait until signalled that setup is complete 2.276 + pthread_mutex_lock( &suspendLock ); 2.277 + while( !(_VMSMasterEnv->setupComplete) ) 2.278 + { 2.279 + pthread_cond_wait( &suspend_cond, 2.280 + &suspendLock ); 2.281 + } 2.282 + pthread_mutex_unlock( &suspendLock ); 2.283 + 2.284 + //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); 2.285 + 2.286 + //set thread affinity 2.287 + //Linux requires pinning thd to core inside thread-function 2.288 + //Designate a core by a 1 in bit-position corresponding to the core 2.289 + CPU_ZERO(&coreMask); 2.290 + CPU_SET(coreLoopThdParams->coreNum,&coreMask); 2.291 + //coreMask = 1L << coreLoopThdParams->coreNum; 2.292 + 2.293 + pthread_t selfThd = pthread_self(); 2.294 + errorCode = 2.295 + pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); 2.296 + 2.297 + if(errorCode){ printf("\nset affinity failure\n"); exit(0); } 2.298 + 2.299 + 2.300 + //Save the return address in the SwitchVP function 2.301 + saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt)); 2.302 + 2.303 + 2.304 + while(1){ 2.305 + 2.306 + //Get virtual processor from queue 2.307 + //The Q must be a global, static volatile var, so not kept in reg, 2.308 + // which forces reloading the pointer after each jmp to this point 2.309 + readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 2.310 + 2.311 + #ifdef USE_WORK_STEALING 2.312 + //Alg for work-stealing designed to make common case fast. Comment 2.313 + // in stealer code explains. 2.314 + gate.preGateProgress++; 2.315 + if( gate.gateClosed ) 2.316 + { //now, set coreloop's progress, so stealer can see that core loop 2.317 + // has made it into the waiting area. 2.318 + gate.waitProgress = gate.preGateProgress; 2.319 + while( gate.gateClosed ) /*busy wait*/; 2.320 + } 2.321 + 2.322 + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 2.323 + 2.324 + //Set the coreloop's progress, so stealer can see it has made it out 2.325 + // of the protected area 2.326 + gate.exitProgress = gate.preGateProgress; 2.327 + #else 2.328 + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 2.329 + #endif 2.330 + 2.331 + if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 2.332 + else 2.333 + { 2.334 + //============================= MEASUREMENT STUFF ===================== 2.335 + #ifdef MEAS__TIME_MASTER_LOCK 2.336 + int32 startStamp, endStamp; 2.337 + saveLowTimeStampCountInto( startStamp ); 2.338 + #endif 2.339 + //===================================================================== 2.340 + int tries = 0; int gotLock = 0; 2.341 + while( currPr == NULL ) //if queue was empty, enter get masterLock loop 2.342 + { //queue was empty, so get master lock 2.343 + 2.344 + gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), 2.345 + UNLOCKED, LOCKED ); 2.346 + if( gotLock ) 2.347 + { //run own MasterVP -- jmps to coreLoops startPt when done 2.348 + currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 2.349 + if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 2.350 + { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); 2.351 + pthread_yield(); 2.352 + } 2.353 + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 2.354 + break; //end while -- have a VP to animate now 2.355 + } 2.356 + 2.357 + tries++; //if too many, means master on other core taking too long 2.358 + if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } 2.359 + } 2.360 + //============================= MEASUREMENT STUFF ===================== 2.361 + #ifdef MEAS__TIME_MASTER_LOCK 2.362 + saveLowTimeStampCountInto( endStamp ); 2.363 + addIntervalToHist( startStamp, endStamp, 2.364 + _VMSMasterEnv->masterLockLowTimeHist ); 2.365 + addIntervalToHist( startStamp, endStamp, 2.366 + _VMSMasterEnv->masterLockHighTimeHist ); 2.367 + #endif 2.368 + //===================================================================== 2.369 + 2.370 + } 2.371 + 2.372 + 2.373 + switchToVP(currPr); //The VPs return in here 2.374 + flushRegisters(); 2.375 + }//CoreLoop 2.376 + } 2.377 + 2.378 + 2.379 +void * 2.380 +terminateCoreLoop(VirtProcr *currPr){ 2.381 + //first free shutdown VP that jumped here -- it first restores the 2.382 + // coreloop's stack, so addr of currPr in stack frame is still correct 2.383 + VMS__dissipate_procr( currPr ); 2.384 + pthread_exit( NULL ); 2.385 +} 2.386 + 2.387 + 2.388 + 2.389 +#ifdef SEQUENTIAL 2.390 + 2.391 +//=========================================================================== 2.392 +/*This sequential version is exact same as threaded, except doesn't do the 2.393 + * pin-threads part, nor the wait until setup complete part. 2.394 + */ 2.395 +void * 2.396 +coreLoop_Seq( void *paramsIn ) 2.397 + { 2.398 + VirtProcr *currPr; 2.399 + VMSQueueStruc *readyToAnimateQ; 2.400 + 2.401 + ThdParams *coreLoopThdParams; 2.402 + int thisCoresIdx; 2.403 + 2.404 + coreLoopThdParams = (ThdParams *)paramsIn; 2.405 +// thisCoresIdx = coreLoopThdParams->coreNum; 2.406 + thisCoresIdx = 0; 2.407 + 2.408 + //Save the return address in the SwitchVP function 2.409 + saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt)); 2.410 + 2.411 + 2.412 + while(1){ 2.413 + //Get virtual processor from queue 2.414 + //_VMSWorkQ must be a global, static volatile var, so not kept in reg, 2.415 + // which forces reloading the pointer after each jmp to this point 2.416 + readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 2.417 + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 2.418 + if( currPr == NULL ) 2.419 + { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 2.420 + { printf("too many back to back MasterVP\n"); exit(1); } 2.421 + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 2.422 + 2.423 + currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 2.424 + } 2.425 + else 2.426 + _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 2.427 + 2.428 + 2.429 + switchToVP( currPr ); 2.430 + flushRegisters(); 2.431 + } 2.432 + } 2.433 +#endif
3.1 --- a/MasterLoop.c Thu Oct 06 16:24:17 2011 +0200 3.2 +++ b/MasterLoop.c Wed Jan 04 16:10:11 2012 -0800 3.3 @@ -1,373 +1,373 @@ 3.4 -/* 3.5 - * Copyright 2010 OpenSourceStewardshipFoundation 3.6 - * 3.7 - * Licensed under BSD 3.8 - */ 3.9 - 3.10 - 3.11 - 3.12 -#include <stdio.h> 3.13 -#include <stddef.h> 3.14 - 3.15 -#include "VMS.h" 3.16 -#include "ProcrContext.h" 3.17 - 3.18 - 3.19 -//=========================================================================== 3.20 -void inline 3.21 -stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 3.22 - VirtProcr *masterPr ); 3.23 - 3.24 -//=========================================================================== 3.25 - 3.26 - 3.27 - 3.28 -/*This code is animated by the virtual Master processor. 3.29 - * 3.30 - *Polls each sched slot exactly once, hands any requests made by a newly 3.31 - * done slave to the "request handler" plug-in function 3.32 - * 3.33 - *Any slots that need a virt procr assigned are given to the "schedule" 3.34 - * plug-in function, which tries to assign a virt procr (slave) to it. 3.35 - * 3.36 - *When all slots needing a processor have been given to the schedule plug-in, 3.37 - * a fraction of the procrs successfully scheduled are put into the 3.38 - * work queue, then a continuation of this function is put in, then the rest 3.39 - * of the virt procrs that were successfully scheduled. 3.40 - * 3.41 - *The first thing the continuation does is busy-wait until the previous 3.42 - * animation completes. This is because an (unlikely) continuation may 3.43 - * sneak through queue before previous continuation is done putting second 3.44 - * part of scheduled slaves in, which is the only race condition. 3.45 - * 3.46 - */ 3.47 - 3.48 -/*May 29, 2010 -- birth a Master during init so that first core loop to 3.49 - * start running gets it and does all the stuff for a newly born -- 3.50 - * from then on, will be doing continuation, but do suspension self 3.51 - * directly at end of master loop 3.52 - *So VMS__init just births the master virtual processor same way it births 3.53 - * all the others -- then does any extra setup needed and puts it into the 3.54 - * work queue. 3.55 - *However means have to make masterEnv a global static volatile the same way 3.56 - * did with readyToAnimateQ in core loop. -- for performance, put the 3.57 - * jump to the core loop directly in here, and have it directly jump back. 3.58 - * 3.59 - * 3.60 - *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this 3.61 - * avoids the suspected bug in the system stack that causes bizarre faults 3.62 - * at random places in the system code. 3.63 - * 3.64 - *So, this function is coupled to each of the MasterVPs, -- meaning this 3.65 - * function can't rely on a particular stack and frame -- each MasterVP that 3.66 - * animates this function has a different one. 3.67 - * 3.68 - *At this point, the masterLoop does not write itself into the queue anymore, 3.69 - * instead, the coreLoop acquires the masterLock when it has nothing to 3.70 - * animate, and then animates its own masterLoop. However, still try to put 3.71 - * several AppVPs into the queue to amortize the startup cost of switching 3.72 - * to the MasterVP. Note, don't have to worry about latency of requests much 3.73 - * because most requests generate work for same core -- only latency issue 3.74 - * is case when other cores starved and one core's requests generate work 3.75 - * for them -- so keep max in queue to 3 or 4.. 3.76 - */ 3.77 -void masterLoop( void *initData, VirtProcr *animatingPr ) 3.78 - { 3.79 - int32 slotIdx, numSlotsFilled; 3.80 - VirtProcr *schedVirtPr; 3.81 - SchedSlot *currSlot, **schedSlots; 3.82 - MasterEnv *masterEnv; 3.83 - VMSQueueStruc *readyToAnimateQ; 3.84 - 3.85 - SlaveScheduler slaveScheduler; 3.86 - RequestHandler requestHandler; 3.87 - void *semanticEnv; 3.88 - 3.89 - int32 thisCoresIdx; 3.90 - VirtProcr *masterPr; 3.91 - volatile VirtProcr *volatileMasterPr; 3.92 - 3.93 - volatileMasterPr = animatingPr; 3.94 - masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp 3.95 - 3.96 - //First animation of each MasterVP will in turn animate this part 3.97 - // of setup code.. (VP creator sets up the stack as if this function 3.98 - // was called normally, but actually get here by jmp) 3.99 - //So, setup values about stack ptr, jmp pt and all that 3.100 - //masterPr->nextInstrPt = &&masterLoopStartPt; 3.101 - 3.102 - 3.103 - //Note, got rid of writing the stack and frame ptr up here, because 3.104 - // only one 3.105 - // core can ever animate a given MasterVP, so don't need to communicate 3.106 - // new frame and stack ptr to the MasterVP storage before a second 3.107 - // version of that MasterVP can get animated on a different core. 3.108 - //Also got rid of the busy-wait. 3.109 - 3.110 - 3.111 - //masterLoopStartPt: 3.112 - while(1){ 3.113 - 3.114 - //============================= MEASUREMENT STUFF ======================== 3.115 - #ifdef MEAS__TIME_MASTER 3.116 - //Total Master time includes one coreloop time -- just assume the core 3.117 - // loop time is same for Master as for AppVPs, even though it may be 3.118 - // smaller due to higher predictability of the fixed jmp. 3.119 - saveLowTimeStampCountInto( masterPr->startMasterTSCLow ); 3.120 - #endif 3.121 - //======================================================================== 3.122 - 3.123 - masterEnv = (MasterEnv*)_VMSMasterEnv; 3.124 - 3.125 - //GCC may optimize so doesn't always re-define from frame-storage 3.126 - masterPr = (VirtProcr*)volatileMasterPr; //just to make sure after jmp 3.127 - thisCoresIdx = masterPr->coreAnimatedBy; 3.128 - readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; 3.129 - schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; 3.130 - 3.131 - requestHandler = masterEnv->requestHandler; 3.132 - slaveScheduler = masterEnv->slaveScheduler; 3.133 - semanticEnv = masterEnv->semanticEnv; 3.134 - 3.135 - 3.136 - //Poll each slot's Done flag 3.137 - numSlotsFilled = 0; 3.138 - for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++) 3.139 - { 3.140 - currSlot = schedSlots[ slotIdx ]; 3.141 - 3.142 - if( currSlot->workIsDone ) 3.143 - { 3.144 - currSlot->workIsDone = FALSE; 3.145 - currSlot->needsProcrAssigned = TRUE; 3.146 - 3.147 - //process requests from slave to master 3.148 - //====================== MEASUREMENT STUFF =================== 3.149 - #ifdef MEAS__TIME_PLUGIN 3.150 - int32 startStamp1, endStamp1; 3.151 - saveLowTimeStampCountInto( startStamp1 ); 3.152 - #endif 3.153 - //============================================================ 3.154 - (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv ); 3.155 - //====================== MEASUREMENT STUFF =================== 3.156 - #ifdef MEAS__TIME_PLUGIN 3.157 - saveLowTimeStampCountInto( endStamp1 ); 3.158 - addIntervalToHist( startStamp1, endStamp1, 3.159 - _VMSMasterEnv->reqHdlrLowTimeHist ); 3.160 - addIntervalToHist( startStamp1, endStamp1, 3.161 - _VMSMasterEnv->reqHdlrHighTimeHist ); 3.162 - #endif 3.163 - //============================================================ 3.164 - } 3.165 - if( currSlot->needsProcrAssigned ) 3.166 - { //give slot a new virt procr 3.167 - schedVirtPr = 3.168 - (*slaveScheduler)( semanticEnv, thisCoresIdx ); 3.169 - 3.170 - if( schedVirtPr != NULL ) 3.171 - { currSlot->procrAssignedToSlot = schedVirtPr; 3.172 - schedVirtPr->schedSlot = currSlot; 3.173 - currSlot->needsProcrAssigned = FALSE; 3.174 - numSlotsFilled += 1; 3.175 - 3.176 - writeVMSQ( schedVirtPr, readyToAnimateQ ); 3.177 - } 3.178 - } 3.179 - } 3.180 - 3.181 - 3.182 - #ifdef USE_WORK_STEALING 3.183 - //If no slots filled, means no more work, look for work to steal. 3.184 - if( numSlotsFilled == 0 ) 3.185 - { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr ); 3.186 - } 3.187 - #endif 3.188 - 3.189 - 3.190 - #ifdef MEAS__TIME_MASTER 3.191 - saveLowTimeStampCountInto( masterPr->endMasterTSCLow ); 3.192 - #endif 3.193 - 3.194 - masterSwitchToCoreLoop(animatingPr); 3.195 - flushRegisters(); 3.196 - }//MasterLoop 3.197 - 3.198 - 3.199 - } 3.200 - 3.201 - 3.202 - 3.203 -/*This has a race condition -- the coreloops are accessing their own queues 3.204 - * at the same time that this work-stealer on a different core is trying to 3.205 - */ 3.206 -void inline 3.207 -stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 3.208 - VirtProcr *masterPr ) 3.209 - { 3.210 - VirtProcr *stolenPr; 3.211 - int32 coreIdx, i; 3.212 - VMSQueueStruc *currQ; 3.213 - 3.214 - stolenPr = NULL; 3.215 - coreIdx = masterPr->coreAnimatedBy; 3.216 - for( i = 0; i < NUM_CORES -1; i++ ) 3.217 - { 3.218 - if( coreIdx >= NUM_CORES -1 ) 3.219 - { coreIdx = 0; 3.220 - } 3.221 - else 3.222 - { coreIdx++; 3.223 - } 3.224 - currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 3.225 - if( numInVMSQ( currQ ) > 0 ) 3.226 - { stolenPr = readVMSQ (currQ ); 3.227 - break; 3.228 - } 3.229 - } 3.230 - 3.231 - if( stolenPr != NULL ) 3.232 - { currSlot->procrAssignedToSlot = stolenPr; 3.233 - stolenPr->schedSlot = currSlot; 3.234 - currSlot->needsProcrAssigned = FALSE; 3.235 - 3.236 - writeVMSQ( stolenPr, readyToAnimateQ ); 3.237 - } 3.238 - } 3.239 - 3.240 -/*This algorithm makes the common case fast. Make the coreloop passive, 3.241 - * and show its progress. Make the stealer control a gate that coreloop 3.242 - * has to pass. 3.243 - *To avoid interference, only one stealer at a time. Use a global 3.244 - * stealer-lock. 3.245 - * 3.246 - *The pattern is based on a gate -- stealer shuts the gate, then monitors 3.247 - * to be sure any already past make it all the way out, before starting. 3.248 - *So, have a "progress" measure just before the gate, then have two after it, 3.249 - * one is in a "waiting room" outside the gate, the other is at the exit. 3.250 - *Then, the stealer first shuts the gate, then checks the progress measure 3.251 - * outside it, then looks to see if the progress measure at the exit is the 3.252 - * same. If yes, it knows the protected area is empty 'cause no other way 3.253 - * to get in and the last to get in also exited. 3.254 - *If the progress measure at the exit is not the same, then the stealer goes 3.255 - * into a loop checking both the waiting-area and the exit progress-measures 3.256 - * until one of them shows the same as the measure outside the gate. Might 3.257 - * as well re-read the measure outside the gate each go around, just to be 3.258 - * sure. It is guaranteed that one of the two will eventually match the one 3.259 - * outside the gate. 3.260 - * 3.261 - *Here's an informal proof of correctness: 3.262 - *The gate can be closed at any point, and have only four cases: 3.263 - * 1) coreloop made it past the gate-closing but not yet past the exit 3.264 - * 2) coreloop made it past the pre-gate progress update but not yet past 3.265 - * the gate, 3.266 - * 3) coreloop is right before the pre-gate update 3.267 - * 4) coreloop is past the exit and far from the pre-gate update. 3.268 - * 3.269 - * Covering the cases in reverse order, 3.270 - * 4) is not a problem -- stealer will read pre-gate progress, see that it 3.271 - * matches exit progress, and the gate is closed, so stealer can proceed. 3.272 - * 3) stealer will read pre-gate progress just after coreloop updates it.. 3.273 - * so stealer goes into a loop until the coreloop causes wait-progress 3.274 - * to match pre-gate progress, so then stealer can proceed 3.275 - * 2) same as 3.. 3.276 - * 1) stealer reads pre-gate progress, sees that it's different than exit, 3.277 - * so goes into loop until exit matches pre-gate, now it knows coreloop 3.278 - * is not in protected and cannot get back in, so can proceed. 3.279 - * 3.280 - *Implementation for the stealer: 3.281 - * 3.282 - *First, acquire the stealer lock -- only cores with no work to do will 3.283 - * compete to steal, so not a big performance penalty having only one -- 3.284 - * will rarely have multiple stealers in a system with plenty of work -- and 3.285 - * in a system with little work, it doesn't matter. 3.286 - * 3.287 - *Note, have single-reader, single-writer pattern for all variables used to 3.288 - * communicate between stealer and victims 3.289 - * 3.290 - *So, scan the queues of the core loops, until find non-empty. Each core 3.291 - * has its own list that it scans. The list goes in order from closest to 3.292 - * furthest core, so it steals first from close cores. Later can add 3.293 - * taking info from the app about overlapping footprints, and scan all the 3.294 - * others then choose work with the most footprint overlap with the contents 3.295 - * of this core's cache. 3.296 - * 3.297 - *Now, have a victim want to take work from. So, shut the gate in that 3.298 - * coreloop, by setting the "gate closed" var on its stack to TRUE. 3.299 - *Then, read the core's pre-gate progress and compare to the core's exit 3.300 - * progress. 3.301 - *If same, can proceed to take work from the coreloop's queue. When done, 3.302 - * write FALSE to gate closed var. 3.303 - *If different, then enter a loop that reads the pre-gate progress, then 3.304 - * compares to exit progress then to wait progress. When one of two 3.305 - * matches, proceed. Take work from the coreloop's queue. When done, 3.306 - * write FALSE to the gate closed var. 3.307 - * 3.308 - */ 3.309 -void inline 3.310 -gateProtected_stealWorkInto( SchedSlot *currSlot, 3.311 - VMSQueueStruc *myReadyToAnimateQ, 3.312 - VirtProcr *masterPr ) 3.313 - { 3.314 - VirtProcr *stolenPr; 3.315 - int32 coreIdx, i, haveAVictim, gotLock; 3.316 - VMSQueueStruc *victimsQ; 3.317 - 3.318 - volatile GateStruc *vicGate; 3.319 - int32 coreMightBeInProtected; 3.320 - 3.321 - 3.322 - 3.323 - //see if any other cores have work available to steal 3.324 - haveAVictim = FALSE; 3.325 - coreIdx = masterPr->coreAnimatedBy; 3.326 - for( i = 0; i < NUM_CORES -1; i++ ) 3.327 - { 3.328 - if( coreIdx >= NUM_CORES -1 ) 3.329 - { coreIdx = 0; 3.330 - } 3.331 - else 3.332 - { coreIdx++; 3.333 - } 3.334 - victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 3.335 - if( numInVMSQ( victimsQ ) > 0 ) 3.336 - { haveAVictim = TRUE; 3.337 - vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ]; 3.338 - break; 3.339 - } 3.340 - } 3.341 - if( !haveAVictim ) return; //no work to steal, exit 3.342 - 3.343 - //have a victim core, now get the stealer-lock 3.344 - gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock), 3.345 - UNLOCKED, LOCKED ); 3.346 - if( !gotLock ) return; //go back to core loop, which will re-start master 3.347 - 3.348 - 3.349 - //====== Start Gate-protection ======= 3.350 - vicGate->gateClosed = TRUE; 3.351 - coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress; 3.352 - while( coreMightBeInProtected ) 3.353 - { //wait until sure 3.354 - if( vicGate->preGateProgress == vicGate->waitProgress ) 3.355 - coreMightBeInProtected = FALSE; 3.356 - if( vicGate->preGateProgress == vicGate->exitProgress ) 3.357 - coreMightBeInProtected = FALSE; 3.358 - } 3.359 - 3.360 - stolenPr = readVMSQ ( victimsQ ); 3.361 - 3.362 - vicGate->gateClosed = FALSE; 3.363 - //======= End Gate-protection ======= 3.364 - 3.365 - 3.366 - if( stolenPr != NULL ) //victim could have been in protected and taken 3.367 - { currSlot->procrAssignedToSlot = stolenPr; 3.368 - stolenPr->schedSlot = currSlot; 3.369 - currSlot->needsProcrAssigned = FALSE; 3.370 - 3.371 - writeVMSQ( stolenPr, myReadyToAnimateQ ); 3.372 - } 3.373 - 3.374 - //unlock the work stealing lock 3.375 - _VMSMasterEnv->workStealingLock = UNLOCKED; 3.376 - } 3.377 +/* 3.378 + * Copyright 2010 OpenSourceStewardshipFoundation 3.379 + * 3.380 + * Licensed under BSD 3.381 + */ 3.382 + 3.383 + 3.384 + 3.385 +#include <stdio.h> 3.386 +#include <stddef.h> 3.387 + 3.388 +#include "VMS.h" 3.389 +#include "ProcrContext.h" 3.390 + 3.391 + 3.392 +//=========================================================================== 3.393 +void inline 3.394 +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 3.395 + VirtProcr *masterPr ); 3.396 + 3.397 +//=========================================================================== 3.398 + 3.399 + 3.400 + 3.401 +/*This code is animated by the virtual Master processor. 3.402 + * 3.403 + *Polls each sched slot exactly once, hands any requests made by a newly 3.404 + * done slave to the "request handler" plug-in function 3.405 + * 3.406 + *Any slots that need a virt procr assigned are given to the "schedule" 3.407 + * plug-in function, which tries to assign a virt procr (slave) to it. 3.408 + * 3.409 + *When all slots needing a processor have been given to the schedule plug-in, 3.410 + * a fraction of the procrs successfully scheduled are put into the 3.411 + * work queue, then a continuation of this function is put in, then the rest 3.412 + * of the virt procrs that were successfully scheduled. 3.413 + * 3.414 + *The first thing the continuation does is busy-wait until the previous 3.415 + * animation completes. This is because an (unlikely) continuation may 3.416 + * sneak through queue before previous continuation is done putting second 3.417 + * part of scheduled slaves in, which is the only race condition. 3.418 + * 3.419 + */ 3.420 + 3.421 +/*May 29, 2010 -- birth a Master during init so that first core loop to 3.422 + * start running gets it and does all the stuff for a newly born -- 3.423 + * from then on, will be doing continuation, but do suspension self 3.424 + * directly at end of master loop 3.425 + *So VMS__init just births the master virtual processor same way it births 3.426 + * all the others -- then does any extra setup needed and puts it into the 3.427 + * work queue. 3.428 + *However means have to make masterEnv a global static volatile the same way 3.429 + * did with readyToAnimateQ in core loop. -- for performance, put the 3.430 + * jump to the core loop directly in here, and have it directly jump back. 3.431 + * 3.432 + * 3.433 + *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this 3.434 + * avoids the suspected bug in the system stack that causes bizarre faults 3.435 + * at random places in the system code. 3.436 + * 3.437 + *So, this function is coupled to each of the MasterVPs, -- meaning this 3.438 + * function can't rely on a particular stack and frame -- each MasterVP that 3.439 + * animates this function has a different one. 3.440 + * 3.441 + *At this point, the masterLoop does not write itself into the queue anymore, 3.442 + * instead, the coreLoop acquires the masterLock when it has nothing to 3.443 + * animate, and then animates its own masterLoop. However, still try to put 3.444 + * several AppVPs into the queue to amortize the startup cost of switching 3.445 + * to the MasterVP. Note, don't have to worry about latency of requests much 3.446 + * because most requests generate work for same core -- only latency issue 3.447 + * is case when other cores starved and one core's requests generate work 3.448 + * for them -- so keep max in queue to 3 or 4.. 3.449 + */ 3.450 +void masterLoop( void *initData, VirtProcr *animatingPr ) 3.451 + { 3.452 + int32 slotIdx, numSlotsFilled; 3.453 + VirtProcr *schedVirtPr; 3.454 + SchedSlot *currSlot, **schedSlots; 3.455 + MasterEnv *masterEnv; 3.456 + VMSQueueStruc *readyToAnimateQ; 3.457 + 3.458 + SlaveScheduler slaveScheduler; 3.459 + RequestHandler requestHandler; 3.460 + void *semanticEnv; 3.461 + 3.462 + int32 thisCoresIdx; 3.463 + VirtProcr *masterPr; 3.464 + volatile VirtProcr *volatileMasterPr; 3.465 + 3.466 + volatileMasterPr = animatingPr; 3.467 + masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp 3.468 + 3.469 + //First animation of each MasterVP will in turn animate this part 3.470 + // of setup code.. (VP creator sets up the stack as if this function 3.471 + // was called normally, but actually get here by jmp) 3.472 + //So, setup values about stack ptr, jmp pt and all that 3.473 + //masterPr->nextInstrPt = &&masterLoopStartPt; 3.474 + 3.475 + 3.476 + //Note, got rid of writing the stack and frame ptr up here, because 3.477 + // only one 3.478 + // core can ever animate a given MasterVP, so don't need to communicate 3.479 + // new frame and stack ptr to the MasterVP storage before a second 3.480 + // version of that MasterVP can get animated on a different core. 3.481 + //Also got rid of the busy-wait. 3.482 + 3.483 + 3.484 + //masterLoopStartPt: 3.485 + while(1){ 3.486 + 3.487 + //============================= MEASUREMENT STUFF ======================== 3.488 + #ifdef MEAS__TIME_MASTER 3.489 + //Total Master time includes one coreloop time -- just assume the core 3.490 + // loop time is same for Master as for AppVPs, even though it may be 3.491 + // smaller due to higher predictability of the fixed jmp. 3.492 + saveLowTimeStampCountInto( masterPr->startMasterTSCLow ); 3.493 + #endif 3.494 + //======================================================================== 3.495 + 3.496 + masterEnv = (MasterEnv*)_VMSMasterEnv; 3.497 + 3.498 + //GCC may optimize so doesn't always re-define from frame-storage 3.499 + masterPr = (VirtProcr*)volatileMasterPr; //just to make sure after jmp 3.500 + thisCoresIdx = masterPr->coreAnimatedBy; 3.501 + readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; 3.502 + schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; 3.503 + 3.504 + requestHandler = masterEnv->requestHandler; 3.505 + slaveScheduler = masterEnv->slaveScheduler; 3.506 + semanticEnv = masterEnv->semanticEnv; 3.507 + 3.508 + 3.509 + //Poll each slot's Done flag 3.510 + numSlotsFilled = 0; 3.511 + for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++) 3.512 + { 3.513 + currSlot = schedSlots[ slotIdx ]; 3.514 + 3.515 + if( currSlot->workIsDone ) 3.516 + { 3.517 + currSlot->workIsDone = FALSE; 3.518 + currSlot->needsProcrAssigned = TRUE; 3.519 + 3.520 + //process requests from slave to master 3.521 + //====================== MEASUREMENT STUFF =================== 3.522 + #ifdef MEAS__TIME_PLUGIN 3.523 + int32 startStamp1, endStamp1; 3.524 + saveLowTimeStampCountInto( startStamp1 ); 3.525 + #endif 3.526 + //============================================================ 3.527 + (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv ); 3.528 + //====================== MEASUREMENT STUFF =================== 3.529 + #ifdef MEAS__TIME_PLUGIN 3.530 + saveLowTimeStampCountInto( endStamp1 ); 3.531 + addIntervalToHist( startStamp1, endStamp1, 3.532 + _VMSMasterEnv->reqHdlrLowTimeHist ); 3.533 + addIntervalToHist( startStamp1, endStamp1, 3.534 + _VMSMasterEnv->reqHdlrHighTimeHist ); 3.535 + #endif 3.536 + //============================================================ 3.537 + } 3.538 + if( currSlot->needsProcrAssigned ) 3.539 + { //give slot a new virt procr 3.540 + schedVirtPr = 3.541 + (*slaveScheduler)( semanticEnv, thisCoresIdx ); 3.542 + 3.543 + if( schedVirtPr != NULL ) 3.544 + { currSlot->procrAssignedToSlot = schedVirtPr; 3.545 + schedVirtPr->schedSlot = currSlot; 3.546 + currSlot->needsProcrAssigned = FALSE; 3.547 + numSlotsFilled += 1; 3.548 + 3.549 + writeVMSQ( schedVirtPr, readyToAnimateQ ); 3.550 + } 3.551 + } 3.552 + } 3.553 + 3.554 + 3.555 + #ifdef USE_WORK_STEALING 3.556 + //If no slots filled, means no more work, look for work to steal. 3.557 + if( numSlotsFilled == 0 ) 3.558 + { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr ); 3.559 + } 3.560 + #endif 3.561 + 3.562 + 3.563 + #ifdef MEAS__TIME_MASTER 3.564 + saveLowTimeStampCountInto( masterPr->endMasterTSCLow ); 3.565 + #endif 3.566 + 3.567 + masterSwitchToCoreLoop(animatingPr); 3.568 + flushRegisters(); 3.569 + }//MasterLoop 3.570 + 3.571 + 3.572 + } 3.573 + 3.574 + 3.575 + 3.576 +/*This has a race condition -- the coreloops are accessing their own queues 3.577 + * at the same time that this work-stealer on a different core is trying to 3.578 + */ 3.579 +void inline 3.580 +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 3.581 + VirtProcr *masterPr ) 3.582 + { 3.583 + VirtProcr *stolenPr; 3.584 + int32 coreIdx, i; 3.585 + VMSQueueStruc *currQ; 3.586 + 3.587 + stolenPr = NULL; 3.588 + coreIdx = masterPr->coreAnimatedBy; 3.589 + for( i = 0; i < NUM_CORES -1; i++ ) 3.590 + { 3.591 + if( coreIdx >= NUM_CORES -1 ) 3.592 + { coreIdx = 0; 3.593 + } 3.594 + else 3.595 + { coreIdx++; 3.596 + } 3.597 + currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 3.598 + if( numInVMSQ( currQ ) > 0 ) 3.599 + { stolenPr = readVMSQ (currQ ); 3.600 + break; 3.601 + } 3.602 + } 3.603 + 3.604 + if( stolenPr != NULL ) 3.605 + { currSlot->procrAssignedToSlot = stolenPr; 3.606 + stolenPr->schedSlot = currSlot; 3.607 + currSlot->needsProcrAssigned = FALSE; 3.608 + 3.609 + writeVMSQ( stolenPr, readyToAnimateQ ); 3.610 + } 3.611 + } 3.612 + 3.613 +/*This algorithm makes the common case fast. Make the coreloop passive, 3.614 + * and show its progress. Make the stealer control a gate that coreloop 3.615 + * has to pass. 3.616 + *To avoid interference, only one stealer at a time. Use a global 3.617 + * stealer-lock. 3.618 + * 3.619 + *The pattern is based on a gate -- stealer shuts the gate, then monitors 3.620 + * to be sure any already past make it all the way out, before starting. 3.621 + *So, have a "progress" measure just before the gate, then have two after it, 3.622 + * one is in a "waiting room" outside the gate, the other is at the exit. 3.623 + *Then, the stealer first shuts the gate, then checks the progress measure 3.624 + * outside it, then looks to see if the progress measure at the exit is the 3.625 + * same. If yes, it knows the protected area is empty 'cause no other way 3.626 + * to get in and the last to get in also exited. 3.627 + *If the progress measure at the exit is not the same, then the stealer goes 3.628 + * into a loop checking both the waiting-area and the exit progress-measures 3.629 + * until one of them shows the same as the measure outside the gate. Might 3.630 + * as well re-read the measure outside the gate each go around, just to be 3.631 + * sure. It is guaranteed that one of the two will eventually match the one 3.632 + * outside the gate. 3.633 + * 3.634 + *Here's an informal proof of correctness: 3.635 + *The gate can be closed at any point, and have only four cases: 3.636 + * 1) coreloop made it past the gate-closing but not yet past the exit 3.637 + * 2) coreloop made it past the pre-gate progress update but not yet past 3.638 + * the gate, 3.639 + * 3) coreloop is right before the pre-gate update 3.640 + * 4) coreloop is past the exit and far from the pre-gate update. 3.641 + * 3.642 + * Covering the cases in reverse order, 3.643 + * 4) is not a problem -- stealer will read pre-gate progress, see that it 3.644 + * matches exit progress, and the gate is closed, so stealer can proceed. 3.645 + * 3) stealer will read pre-gate progress just after coreloop updates it.. 3.646 + * so stealer goes into a loop until the coreloop causes wait-progress 3.647 + * to match pre-gate progress, so then stealer can proceed 3.648 + * 2) same as 3.. 3.649 + * 1) stealer reads pre-gate progress, sees that it's different than exit, 3.650 + * so goes into loop until exit matches pre-gate, now it knows coreloop 3.651 + * is not in protected and cannot get back in, so can proceed. 3.652 + * 3.653 + *Implementation for the stealer: 3.654 + * 3.655 + *First, acquire the stealer lock -- only cores with no work to do will 3.656 + * compete to steal, so not a big performance penalty having only one -- 3.657 + * will rarely have multiple stealers in a system with plenty of work -- and 3.658 + * in a system with little work, it doesn't matter. 3.659 + * 3.660 + *Note, have single-reader, single-writer pattern for all variables used to 3.661 + * communicate between stealer and victims 3.662 + * 3.663 + *So, scan the queues of the core loops, until find non-empty. Each core 3.664 + * has its own list that it scans. The list goes in order from closest to 3.665 + * furthest core, so it steals first from close cores. Later can add 3.666 + * taking info from the app about overlapping footprints, and scan all the 3.667 + * others then choose work with the most footprint overlap with the contents 3.668 + * of this core's cache. 3.669 + * 3.670 + *Now, have a victim want to take work from. So, shut the gate in that 3.671 + * coreloop, by setting the "gate closed" var on its stack to TRUE. 3.672 + *Then, read the core's pre-gate progress and compare to the core's exit 3.673 + * progress. 3.674 + *If same, can proceed to take work from the coreloop's queue. When done, 3.675 + * write FALSE to gate closed var. 3.676 + *If different, then enter a loop that reads the pre-gate progress, then 3.677 + * compares to exit progress then to wait progress. When one of two 3.678 + * matches, proceed. Take work from the coreloop's queue. When done, 3.679 + * write FALSE to the gate closed var. 3.680 + * 3.681 + */ 3.682 +void inline 3.683 +gateProtected_stealWorkInto( SchedSlot *currSlot, 3.684 + VMSQueueStruc *myReadyToAnimateQ, 3.685 + VirtProcr *masterPr ) 3.686 + { 3.687 + VirtProcr *stolenPr; 3.688 + int32 coreIdx, i, haveAVictim, gotLock; 3.689 + VMSQueueStruc *victimsQ; 3.690 + 3.691 + volatile GateStruc *vicGate; 3.692 + int32 coreMightBeInProtected; 3.693 + 3.694 + 3.695 + 3.696 + //see if any other cores have work available to steal 3.697 + haveAVictim = FALSE; 3.698 + coreIdx = masterPr->coreAnimatedBy; 3.699 + for( i = 0; i < NUM_CORES -1; i++ ) 3.700 + { 3.701 + if( coreIdx >= NUM_CORES -1 ) 3.702 + { coreIdx = 0; 3.703 + } 3.704 + else 3.705 + { coreIdx++; 3.706 + } 3.707 + victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 3.708 + if( numInVMSQ( victimsQ ) > 0 ) 3.709 + { haveAVictim = TRUE; 3.710 + vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ]; 3.711 + break; 3.712 + } 3.713 + } 3.714 + if( !haveAVictim ) return; //no work to steal, exit 3.715 + 3.716 + //have a victim core, now get the stealer-lock 3.717 + gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock), 3.718 + UNLOCKED, LOCKED ); 3.719 + if( !gotLock ) return; //go back to core loop, which will re-start master 3.720 + 3.721 + 3.722 + //====== Start Gate-protection ======= 3.723 + vicGate->gateClosed = TRUE; 3.724 + coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress; 3.725 + while( coreMightBeInProtected ) 3.726 + { //wait until sure 3.727 + if( vicGate->preGateProgress == vicGate->waitProgress ) 3.728 + coreMightBeInProtected = FALSE; 3.729 + if( vicGate->preGateProgress == vicGate->exitProgress ) 3.730 + coreMightBeInProtected = FALSE; 3.731 + } 3.732 + 3.733 + stolenPr = readVMSQ ( victimsQ ); 3.734 + 3.735 + vicGate->gateClosed = FALSE; 3.736 + //======= End Gate-protection ======= 3.737 + 3.738 + 3.739 + if( stolenPr != NULL ) //victim could have been in protected and taken 3.740 + { currSlot->procrAssignedToSlot = stolenPr; 3.741 + stolenPr->schedSlot = currSlot; 3.742 + currSlot->needsProcrAssigned = FALSE; 3.743 + 3.744 + writeVMSQ( stolenPr, myReadyToAnimateQ ); 3.745 + } 3.746 + 3.747 + //unlock the work stealing lock 3.748 + _VMSMasterEnv->workStealingLock = UNLOCKED; 3.749 + }
4.1 --- a/ProcrContext.h Thu Oct 06 16:24:17 2011 +0200 4.2 +++ b/ProcrContext.h Wed Jan 04 16:10:11 2012 -0800 4.3 @@ -1,33 +1,33 @@ 4.4 -/* 4.5 - * Copyright 2009 OpenSourceStewardshipFoundation.org 4.6 - * Licensed under GNU General Public License version 2 4.7 - * 4.8 - * Author: seanhalle@yahoo.com 4.9 - * 4.10 - */ 4.11 - 4.12 -#ifndef _ProcrContext_H 4.13 -#define _ProcrContext_H 4.14 -#define _GNU_SOURCE 4.15 - 4.16 -void saveCoreLoopReturnAddr(void **returnAddress); 4.17 - 4.18 -void switchToVP(VirtProcr *nextProcr); 4.19 - 4.20 -void switchToCoreLoop(VirtProcr *nextProcr); 4.21 - 4.22 -void masterSwitchToCoreLoop(VirtProcr *nextProcr); 4.23 - 4.24 -void startVirtProcrFn(); 4.25 - 4.26 -void *asmTerminateCoreLoop(VirtProcr *currPr); 4.27 - 4.28 -#define flushRegisters() \ 4.29 - asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") 4.30 - 4.31 -inline VirtProcr * 4.32 -create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, 4.33 - void *initialData, void *stackLocs ); 4.34 - 4.35 -#endif /* _ProcrContext_H */ 4.36 - 4.37 +/* 4.38 + * Copyright 2009 OpenSourceStewardshipFoundation.org 4.39 + * Licensed under GNU General Public License version 2 4.40 + * 4.41 + * Author: seanhalle@yahoo.com 4.42 + * 4.43 + */ 4.44 + 4.45 +#ifndef _ProcrContext_H 4.46 +#define _ProcrContext_H 4.47 +#define _GNU_SOURCE 4.48 + 4.49 +void saveCoreLoopReturnAddr(void **returnAddress); 4.50 + 4.51 +void switchToVP(VirtProcr *nextProcr); 4.52 + 4.53 +void switchToCoreLoop(VirtProcr *nextProcr); 4.54 + 4.55 +void masterSwitchToCoreLoop(VirtProcr *nextProcr); 4.56 + 4.57 +void startVirtProcrFn(); 4.58 + 4.59 +void *asmTerminateCoreLoop(VirtProcr *currPr); 4.60 + 4.61 +#define flushRegisters() \ 4.62 + asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") 4.63 + 4.64 +inline VirtProcr * 4.65 +create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, 4.66 + void *initialData, void *stackLocs ); 4.67 + 4.68 +#endif /* _ProcrContext_H */ 4.69 +
5.1 --- a/VMS.h Thu Oct 06 16:24:17 2011 +0200 5.2 +++ b/VMS.h Wed Jan 04 16:10:11 2012 -0800 5.3 @@ -1,579 +1,579 @@ 5.4 -/* 5.5 - * Copyright 2009 OpenSourceStewardshipFoundation.org 5.6 - * Licensed under GNU General Public License version 2 5.7 - * 5.8 - * Author: seanhalle@yahoo.com 5.9 - * 5.10 - */ 5.11 - 5.12 -#ifndef _VMS_H 5.13 -#define _VMS_H 5.14 -#define _GNU_SOURCE 5.15 - 5.16 -#include "VMS_primitive_data_types.h" 5.17 -#include "Queue_impl/PrivateQueue.h" 5.18 -#include "Histogram/Histogram.h" 5.19 -#include "DynArray/DynArray.h" 5.20 -#include "Hash_impl/PrivateHash.h" 5.21 -#include "vmalloc.h" 5.22 - 5.23 -#include <pthread.h> 5.24 -#include <sys/time.h> 5.25 - 5.26 - 5.27 -//=============================== Debug =================================== 5.28 -// 5.29 -//When SEQUENTIAL is defined, VMS does sequential exe in the main thread 5.30 -// It still does co-routines and all the mechanisms are the same, it just 5.31 -// has only a single thread and animates VPs one at a time 5.32 -//#define SEQUENTIAL 5.33 - 5.34 -//#define USE_WORK_STEALING 5.35 - 5.36 -//turns on the probe-instrumentation in the application -- when not 5.37 -// defined, the calls to the probe functions turn into comments 5.38 -#define STATS__ENABLE_PROBES 5.39 -//#define TURN_ON_DEBUG_PROBES 5.40 - 5.41 -//These defines turn types of bug messages on and off 5.42 -// be sure debug messages are un-commented (next block of defines) 5.43 -#define dbgAppFlow TRUE /* Top level flow of application code -- general*/ 5.44 -#define dbgProbes FALSE /* for issues inside probes themselves*/ 5.45 -#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ 5.46 -#define dbgRqstHdlr FALSE /* in request handler code*/ 5.47 - 5.48 -//Comment or un- the substitute half to turn on/off types of debug message 5.49 -#define DEBUG( bool, msg) \ 5.50 -// if( bool){ printf(msg); fflush(stdin);} 5.51 -#define DEBUG1( bool, msg, param) \ 5.52 -// if(bool){printf(msg, param); fflush(stdin);} 5.53 -#define DEBUG2( bool, msg, p1, p2) \ 5.54 -// if(bool) {printf(msg, p1, p2); fflush(stdin);} 5.55 - 5.56 -#define ERROR(msg) printf(msg); 5.57 -#define ERROR1(msg, param) printf(msg, param); 5.58 -#define ERROR2(msg, p1, p2) printf(msg, p1, p2); 5.59 - 5.60 -//=========================== STATS ======================= 5.61 - 5.62 - //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and 5.63 - // compiled-in that saves the low part of the time stamp count just before 5.64 - // suspending a processor and just after resuming that processorsrc/VPThread_lib/VMS/VMS.h:322: warning: previous declaration of ‘VMS__create_procr’ was here. It is 5.65 - // saved into a field added to VirtProcr. Have to sanity-check for 5.66 - // rollover of low portion into high portion. 5.67 -//#define MEAS__TIME_STAMP_SUSP 5.68 -//#define MEAS__TIME_MASTER 5.69 -#define MEAS__TIME_PLUGIN 5.70 -#define MEAS__TIME_MALLOC 5.71 -//#define MEAS__TIME_MASTER_LOCK 5.72 -#define MEAS__NUM_TIMES_TO_RUN 100000 5.73 - 5.74 - //For code that calculates normalization-offset between TSC counts of 5.75 - // different cores. 5.76 -#define NUM_TSC_ROUND_TRIPS 10 5.77 - 5.78 - 5.79 -//========================= Hardware related Constants ===================== 5.80 - //This value is the number of hardware threads in the shared memory 5.81 - // machine 5.82 -//#define NUM_CORES 8 5.83 - 5.84 - // tradeoff amortizing master fixed overhead vs imbalance potential 5.85 - // when work-stealing, can make bigger, at risk of losing cache affinity 5.86 -#define NUM_SCHED_SLOTS 5 5.87 - 5.88 -#define MIN_WORK_UNIT_CYCLES 20000 5.89 - 5.90 -#define MASTERLOCK_RETRIES 10000 5.91 - 5.92 - // stack size in virtual processors created 5.93 -#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */ 5.94 - 5.95 - // memory for VMS__malloc 5.96 -#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */ 5.97 - 5.98 -#define CACHE_LINE 64 5.99 -#define PAGE_SIZE 4096 5.100 - 5.101 - 5.102 -//============================== 5.103 - 5.104 -#define SUCCESS 0 5.105 - 5.106 -#define writeVMSQ writePrivQ 5.107 -#define readVMSQ readPrivQ 5.108 -#define makeVMSQ makeVMSPrivQ 5.109 -#define numInVMSQ numInPrivQ 5.110 -#define VMSQueueStruc PrivQueueStruc 5.111 - 5.112 - 5.113 - 5.114 -//=========================================================================== 5.115 -typedef unsigned long long TSCount; 5.116 - 5.117 -typedef struct _SchedSlot SchedSlot; 5.118 -typedef struct _VMSReqst VMSReqst; 5.119 -typedef struct _VirtProcr VirtProcr; 5.120 -typedef struct _IntervalProbe IntervalProbe; 5.121 -typedef struct _GateStruc GateStruc; 5.122 - 5.123 - 5.124 -typedef VirtProcr * (*SlaveScheduler) ( void *, int ); //semEnv, coreIdx 5.125 -typedef void (*RequestHandler) ( VirtProcr *, void * ); //prWReqst, semEnv 5.126 -typedef void (*VirtProcrFnPtr) ( void *, VirtProcr * ); //initData, animPr 5.127 -typedef void VirtProcrFn ( void *, VirtProcr * ); //initData, animPr 5.128 -typedef void (*ResumePrFnPtr) ( VirtProcr *, void * ); 5.129 - 5.130 - 5.131 -//============= Requests =========== 5.132 -// 5.133 - 5.134 -enum VMSReqstType //avoid starting enums at 0, for debug reasons 5.135 - { 5.136 - semantic = 1, 5.137 - createReq, 5.138 - dissipate, 5.139 - VMSSemantic //goes with VMSSemReqst below 5.140 - }; 5.141 - 5.142 -struct _VMSReqst 5.143 - { 5.144 - enum VMSReqstType reqType;//used for dissipate and in future for IO requests 5.145 - void *semReqData; 5.146 - 5.147 - VMSReqst *nextReqst; 5.148 - }; 5.149 -//VMSReqst 5.150 - 5.151 -enum VMSSemReqstType //These are equivalent to semantic requests, but for 5.152 - { // VMS's services available directly to app, like OS 5.153 - createProbe = 1, // and probe services -- like a VMS-wide built-in lang 5.154 - openFile, 5.155 - otherIO 5.156 - }; 5.157 - 5.158 -typedef struct 5.159 - { enum VMSSemReqstType reqType; 5.160 - VirtProcr *requestingPr; 5.161 - char *nameStr; //for create probe 5.162 - } 5.163 - VMSSemReq; 5.164 - 5.165 - 5.166 -//==================== Core data structures =================== 5.167 - 5.168 -struct _SchedSlot 5.169 - { 5.170 - int workIsDone; 5.171 - int needsProcrAssigned; 5.172 - VirtProcr *procrAssignedToSlot; 5.173 - }; 5.174 -//SchedSlot 5.175 - 5.176 -/*WARNING: re-arranging this data structure could cause VP switching 5.177 - * assembly code to fail -- hard-codes offsets of fields 5.178 - */ 5.179 -struct _VirtProcr 5.180 - { int procrID; //for debugging -- count up each time create 5.181 - int coreAnimatedBy; 5.182 - void *startOfStack; 5.183 - void *stackPtr; 5.184 - void *framePtr; 5.185 - void *nextInstrPt; 5.186 - 5.187 - void *coreLoopStartPt; //allows proto-runtime to be linked later 5.188 - void *coreLoopFramePtr; //restore before jmp back to core loop 5.189 - void *coreLoopStackPtr; //restore before jmp back to core loop 5.190 - 5.191 - void *initialData; 5.192 - 5.193 - SchedSlot *schedSlot; 5.194 - VMSReqst *requests; 5.195 - 5.196 - void *semanticData; //this livesUSE_GNU here for the life of VP 5.197 - void *dataRetFromReq;//values returned from plugin to VP go here 5.198 - 5.199 - //=========== MEASUREMENT STUFF ========== 5.200 - #ifdef MEAS__TIME_STAMP_SUSP 5.201 - unsigned int preSuspTSCLow; 5.202 - unsigned int postSuspTSCLow; 5.203 - #endif 5.204 - #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/ 5.205 - unsigned int startMasterTSCLow;USE_GNU 5.206 - unsigned int endMasterTSCLow; 5.207 - #endif 5.208 - //======================================== 5.209 - 5.210 - float64 createPtInSecs; //have space but don't use on some configs 5.211 - }; 5.212 -//VirtProcr 5.213 - 5.214 - 5.215 -/*WARNING: re-arranging this data structure could cause VP-switching 5.216 - * assembly code to fail -- hard-codes offsets of fields 5.217 - * (because -O3 messes with things otherwise) 5.218 - */ 5.219 -typedef struct 5.220 - { 5.221 - SlaveScheduler slaveScheduler; 5.222 - RequestHandler requestHandler; 5.223 - 5.224 - SchedSlot ***allSchedSlots; 5.225 - VMSQueueStruc **readyToAnimateQs; 5.226 - VirtProcr **masterVPs; 5.227 - 5.228 - void *semanticEnv; 5.229 - void *OSEventStruc; //for future, when add I/O to BLIS 5.230 - MallocProlog *freeListHead; 5.231 - int32 amtOfOutstandingMem; //total currently allocated 5.232 - 5.233 - void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop 5.234 - 5.235 - int32 setupComplete; 5.236 - volatile int32 masterLock; 5.237 - 5.238 - int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP 5.239 - GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal 5.240 - int32 workStealingLock; 5.241 - 5.242 - int32 numProcrsCreated; //gives ordering to processor creation 5.243 - 5.244 - //=========== MEASUREMENT STUFF ============= 5.245 - IntervalProbe **intervalProbes; 5.246 - PrivDynArrayInfo *dynIntervalProbesInfo; 5.247 - HashTable *probeNameHashTbl; 5.248 - int32 masterCreateProbeID; 5.249 - float64 createPtInSecs; 5.250 - Histogram **measHists; 5.251 - PrivDynArrayInfo *measHistsInfo; 5.252 - #ifdef MEAS__TIME_PLUGIN 5.253 - Histogram *reqHdlrLowTimeHist; 5.254 - Histogram *reqHdlrHighTimeHist; 5.255 - #endif 5.256 - #ifdef MEAS__TIME_MALLOC 5.257 - Histogram *mallocTimeHist; 5.258 - Histogram *freeTimeHist; 5.259 - #endif 5.260 - #ifdef MEAS__TIME_MASTER_LOCK 5.261 - Histogram *masterLockLowTimeHist; 5.262 - Histogram *masterLockHighTimeHist; 5.263 - #endif 5.264 - } 5.265 -MasterEnv; 5.266 - 5.267 -//========================= Extra Stuff Data Strucs ======================= 5.268 -typedef struct 5.269 - { 5.270 - 5.271 - } 5.272 -VMSExcp; 5.273 - 5.274 -struct _GateStruc 5.275 - { 5.276 - int32 gateClosed; 5.277 - int32 preGateProgress; 5.278 - int32 waitProgress; 5.279 - int32 exitProgress; 5.280 - }; 5.281 -//GateStruc 5.282 - 5.283 -//======================= OS Thread related =============================== 5.284 - 5.285 -void * coreLoop( void *paramsIn ); //standard PThreads fn prototype 5.286 -void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype 5.287 -void masterLoop( void *initData, VirtProcr *masterPr ); 5.288 - 5.289 - 5.290 -typedef struct 5.291 - { 5.292 - void *endThdPt; 5.293 - unsigned int coreNum; 5.294 - } 5.295 -ThdParams; 5.296 - 5.297 -pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state 5.298 -ThdParams *coreLoopThdParams [ NUM_CORES ]; 5.299 -pthread_mutex_t suspendLock; 5.300 -pthread_cond_t suspend_cond; 5.301 - 5.302 - 5.303 - 5.304 -//===================== Global Vars =================== 5.305 - 5.306 -volatile MasterEnv *_VMSMasterEnv; 5.307 - 5.308 - 5.309 - 5.310 - 5.311 -//=========================== Function Prototypes ========================= 5.312 - 5.313 - 5.314 -//========== Setup and shutdown ========== 5.315 -void 5.316 -VMS__init(); 5.317 - 5.318 -void 5.319 -VMS__init_Seq(); 5.320 - 5.321 -void 5.322 -VMS__start_the_work_then_wait_until_done(); 5.323 - 5.324 -void 5.325 -VMS__start_the_work_then_wait_until_done_Seq(); 5.326 - 5.327 -inline VirtProcr * 5.328 -VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); 5.329 - 5.330 -void 5.331 -VMS__dissipate_procr( VirtProcr *procrToDissipate ); 5.332 - 5.333 - //Use this to create processor inside entry point & other places outside 5.334 - // the VMS system boundary (IE, not run in slave nor Master) 5.335 -VirtProcr * 5.336 -VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); 5.337 - 5.338 -void 5.339 -VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ); 5.340 - 5.341 -void 5.342 -VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ); 5.343 - 5.344 -void 5.345 -VMS__shutdown(); 5.346 - 5.347 -void 5.348 -VMS__cleanup_at_end_of_shutdown(); 5.349 - 5.350 -void * 5.351 -VMS__give_sem_env_for( VirtProcr *animPr ); 5.352 - 5.353 - 5.354 -//============== Request Related =============== 5.355 - 5.356 -void 5.357 -VMS__suspend_procr( VirtProcr *callingPr ); 5.358 - 5.359 -inline void 5.360 -VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr ); 5.361 - 5.362 -inline void 5.363 -VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ); 5.364 - 5.365 -void 5.366 -VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ); 5.367 - 5.368 -void inline 5.369 -VMS__send_dissipate_req( VirtProcr *prToDissipate ); 5.370 - 5.371 -inline void 5.372 -VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ); 5.373 - 5.374 -VMSReqst * 5.375 -VMS__take_next_request_out_of( VirtProcr *procrWithReq ); 5.376 - 5.377 -inline void * 5.378 -VMS__take_sem_reqst_from( VMSReqst *req ); 5.379 - 5.380 -void inline 5.381 -VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, 5.382 - ResumePrFnPtr resumePrFnPtr ); 5.383 - 5.384 -//======================== STATS ====================== 5.385 - 5.386 -//===== RDTSC wrapper ===== //Also runs with x86_64 code 5.387 - 5.388 -#define saveTimeStampCountInto(low, high) \ 5.389 - asm volatile("RDTSC; \ 5.390 - movl %%eax, %0; \ 5.391 - movl %%edx, %1;" \ 5.392 - /* outputs */ : "=m" (low), "=m" (high)\ 5.393 - /* inputs */ : \ 5.394 - /* clobber */ : "%eax", "%edx" \ 5.395 - ); 5.396 - 5.397 -#define saveLowTimeStampCountInto(low) \ 5.398 - asm volatile("RDTSC; \ 5.399 - movl %%eax, %0;" \ 5.400 - /* outputs */ : "=m" (low) \ 5.401 - /* inputs */ : \ 5.402 - /* clobber */ : "%eax", "%edx" \ 5.403 - ); 5.404 - 5.405 -//==================== 5.406 -#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ 5.407 - makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ 5.408 - _VMSMasterEnv->measHists[idx] = \ 5.409 - makeFixedBinHist( numBins, startVal, binWidth, name ); 5.410 - 5.411 - 5.412 -#define MEAS__SUB_CREATE /*turn on/off subtraction of create from plugin*/ 5.413 - 5.414 -#ifdef VPTHREAD 5.415 - 5.416 -//VPThread 5.417 -#define createHistIdx 0 5.418 -#define mutexLockHistIdx 1 5.419 -#define mutexUnlockHistIdx 2 5.420 -#define condWaitHistIdx 3 5.421 -#define condSignalHistIdx 4 5.422 - 5.423 -#define MakeTheMeasHists() \ 5.424 - _VMSMasterEnv->measHistsInfo = \ 5.425 - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 5.426 - makeAMeasHist( createHistIdx, "create", 250, 0, 100 ) \ 5.427 - makeAMeasHist( mutexLockHistIdx, "mutex_lock", 50, 0, 100 ) \ 5.428 - makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock", 50, 0, 100 ) \ 5.429 - makeAMeasHist( condWaitHistIdx, "cond_wait", 50, 0, 100 ) \ 5.430 - makeAMeasHist( condSignalHistIdx, "cond_signal", 50, 0, 100 ) 5.431 - 5.432 -#endif 5.433 - 5.434 - 5.435 -#ifdef VCILK 5.436 - 5.437 -//VCilk 5.438 -#define spawnHistIdx 0 5.439 -#define syncHistIdx 1 5.440 - 5.441 -#define MakeTheMeasHists() \ 5.442 - _VMSMasterEnv->measHistsInfo = \ 5.443 - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 5.444 - makeAMeasHist( spawnHistIdx, "Spawn", 50, 0, 200 ) \ 5.445 - makeAMeasHist( syncHistIdx, "Sync", 50, 0, 200 ) 5.446 - 5.447 - 5.448 -#endif 5.449 - 5.450 -#ifdef SSR 5.451 - 5.452 -//SSR 5.453 -#define SendFromToHistIdx 0 5.454 -#define SendOfTypeHistIdx 1 5.455 -#define ReceiveFromToHistIdx 2 5.456 -#define ReceiveOfTypeHistIdx 3 5.457 - 5.458 -#define MakeTheMeasHists() \ 5.459 - _VMSMasterEnv->measHistsInfo = \ 5.460 - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 5.461 - makeAMeasHist( SendFromToHistIdx, "SendFromTo", 50, 0, 100 ) \ 5.462 - makeAMeasHist( SendOfTypeHistIdx, "SendOfType", 50, 0, 100 ) \ 5.463 - makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \ 5.464 - makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 ) 5.465 - 5.466 -#endif 5.467 - 5.468 -//=========================================================================== 5.469 -//VPThread 5.470 - 5.471 - 5.472 -#define Meas_startCreate \ 5.473 - int32 startStamp, endStamp; \ 5.474 - saveLowTimeStampCountInto( startStamp ); \ 5.475 - 5.476 -#define Meas_endCreate \ 5.477 - saveLowTimeStampCountInto( endStamp ); \ 5.478 - addIntervalToHist( startStamp, endStamp, \ 5.479 - _VMSMasterEnv->measHists[ createHistIdx ] ); 5.480 - 5.481 -#define Meas_startMutexLock \ 5.482 - int32 startStamp, endStamp; \ 5.483 - saveLowTimeStampCountInto( startStamp ); \ 5.484 - 5.485 -#define Meas_endMutexLock \ 5.486 - saveLowTimeStampCountInto( endStamp ); \ 5.487 - addIntervalToHist( startStamp, endStamp, \ 5.488 - _VMSMasterEnv->measHists[ mutexLockHistIdx ] ); 5.489 - 5.490 -#define Meas_startMutexUnlock \ 5.491 - int32 startStamp, endStamp; \ 5.492 - saveLowTimeStampCountInto( startStamp ); \ 5.493 - 5.494 -#define Meas_endMutexUnlock \ 5.495 - saveLowTimeStampCountInto( endStamp ); \ 5.496 - addIntervalToHist( startStamp, endStamp, \ 5.497 - _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] ); 5.498 - 5.499 -#define Meas_startCondWait \ 5.500 - int32 startStamp, endStamp; \ 5.501 - saveLowTimeStampCountInto( startStamp ); \ 5.502 - 5.503 -#define Meas_endCondWait \ 5.504 - saveLowTimeStampCountInto( endStamp ); \ 5.505 - addIntervalToHist( startStamp, endStamp, \ 5.506 - _VMSMasterEnv->measHists[ condWaitHistIdx ] ); 5.507 - 5.508 -#define Meas_startCondSignal \ 5.509 - int32 startStamp, endStamp; \ 5.510 - saveLowTimeStampCountInto( startStamp ); \ 5.511 - 5.512 -#define Meas_endCondSignal \ 5.513 - saveLowTimeStampCountInto( endStamp ); \ 5.514 - addIntervalToHist( startStamp, endStamp, \ 5.515 - _VMSMasterEnv->measHists[ condSignalHistIdx ] ); 5.516 - 5.517 -//=========================================================================== 5.518 -// VCilk 5.519 -#define Meas_startSpawn \ 5.520 - int32 startStamp, endStamp; \ 5.521 - saveLowTimeStampCountInto( startStamp ); \ 5.522 - 5.523 -#define Meas_endSpawn \ 5.524 - saveLowTimeStampCountInto( endStamp ); \ 5.525 - addIntervalToHist( startStamp, endStamp, \ 5.526 - _VMSMasterEnv->measHists[ spawnHistIdx ] ); 5.527 - 5.528 -#define Meas_startSync \ 5.529 - int32 startStamp, endStamp; \ 5.530 - saveLowTimeStampCountInto( startStamp ); \ 5.531 - 5.532 -#define Meas_endSync \ 5.533 - saveLowTimeStampCountInto( endStamp ); \ 5.534 - addIntervalToHist( startStamp, endStamp, \ 5.535 - _VMSMasterEnv->measHists[ syncHistIdx ] ); 5.536 - 5.537 -//=========================================================================== 5.538 -// SSR 5.539 -#define Meas_startSendFromTo \ 5.540 - int32 startStamp, endStamp; \ 5.541 - saveLowTimeStampCountInto( startStamp ); \ 5.542 - 5.543 -#define Meas_endSendFromTo \ 5.544 - saveLowTimeStampCountInto( endStamp ); \ 5.545 - addIntervalToHist( startStamp, endStamp, \ 5.546 - _VMSMasterEnv->measHists[ SendFromToHistIdx ] ); 5.547 - 5.548 -#define Meas_startSendOfType \ 5.549 - int32 startStamp, endStamp; \ 5.550 - saveLowTimeStampCountInto( startStamp ); \ 5.551 - 5.552 -#define Meas_endSendOfType \ 5.553 - saveLowTimeStampCountInto( endStamp ); \ 5.554 - addIntervalToHist( startStamp, endStamp, \ 5.555 - _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] ); 5.556 - 5.557 -#define Meas_startReceiveFromTo \ 5.558 - int32 startStamp, endStamp; \ 5.559 - saveLowTimeStampCountInto( startStamp ); \ 5.560 - 5.561 -#define Meas_endReceiveFromTo \ 5.562 - saveLowTimeStampCountInto( endStamp ); \ 5.563 - addIntervalToHist( startStamp, endStamp, \ 5.564 - _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] ); 5.565 - 5.566 -#define Meas_startReceiveOfType \ 5.567 - int32 startStamp, endStamp; \ 5.568 - saveLowTimeStampCountInto( startStamp ); \ 5.569 - 5.570 -#define Meas_endReceiveOfType \ 5.571 - saveLowTimeStampCountInto( endStamp ); \ 5.572 - addIntervalToHist( startStamp, endStamp, \ 5.573 - _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] ); 5.574 - 5.575 -//===== 5.576 - 5.577 -#include "ProcrContext.h" 5.578 -#include "probes.h" 5.579 -#include "vutilities.h" 5.580 - 5.581 -#endif /* _VMS_H */ 5.582 - 5.583 +/* 5.584 + * Copyright 2009 OpenSourceStewardshipFoundation.org 5.585 + * Licensed under GNU General Public License version 2 5.586 + * 5.587 + * Author: seanhalle@yahoo.com 5.588 + * 5.589 + */ 5.590 + 5.591 +#ifndef _VMS_H 5.592 +#define _VMS_H 5.593 +#define _GNU_SOURCE 5.594 + 5.595 +#include "VMS_primitive_data_types.h" 5.596 +#include "Queue_impl/PrivateQueue.h" 5.597 +#include "Histogram/Histogram.h" 5.598 +#include "DynArray/DynArray.h" 5.599 +#include "Hash_impl/PrivateHash.h" 5.600 +#include "vmalloc.h" 5.601 + 5.602 +#include <pthread.h> 5.603 +#include <sys/time.h> 5.604 + 5.605 + 5.606 +//=============================== Debug =================================== 5.607 +// 5.608 +//When SEQUENTIAL is defined, VMS does sequential exe in the main thread 5.609 +// It still does co-routines and all the mechanisms are the same, it just 5.610 +// has only a single thread and animates VPs one at a time 5.611 +//#define SEQUENTIAL 5.612 + 5.613 +//#define USE_WORK_STEALING 5.614 + 5.615 +//turns on the probe-instrumentation in the application -- when not 5.616 +// defined, the calls to the probe functions turn into comments 5.617 +#define STATS__ENABLE_PROBES 5.618 +//#define TURN_ON_DEBUG_PROBES 5.619 + 5.620 +//These defines turn types of bug messages on and off 5.621 +// be sure debug messages are un-commented (next block of defines) 5.622 +#define dbgAppFlow TRUE /* Top level flow of application code -- general*/ 5.623 +#define dbgProbes FALSE /* for issues inside probes themselves*/ 5.624 +#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ 5.625 +#define dbgRqstHdlr FALSE /* in request handler code*/ 5.626 + 5.627 +//Comment or un- the substitute half to turn on/off types of debug message 5.628 +#define DEBUG( bool, msg) \ 5.629 +// if( bool){ printf(msg); fflush(stdin);} 5.630 +#define DEBUG1( bool, msg, param) \ 5.631 +// if(bool){printf(msg, param); fflush(stdin);} 5.632 +#define DEBUG2( bool, msg, p1, p2) \ 5.633 +// if(bool) {printf(msg, p1, p2); fflush(stdin);} 5.634 + 5.635 +#define ERROR(msg) printf(msg); 5.636 +#define ERROR1(msg, param) printf(msg, param); 5.637 +#define ERROR2(msg, p1, p2) printf(msg, p1, p2); 5.638 + 5.639 +//=========================== STATS ======================= 5.640 + 5.641 + //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and 5.642 + // compiled-in that saves the low part of the time stamp count just before 5.643 + // suspending a processor and just after resuming that processorsrc/VPThread_lib/VMS/VMS.h:322: warning: previous declaration of ‘VMS__create_procr’ was here. It is 5.644 + // saved into a field added to VirtProcr. Have to sanity-check for 5.645 + // rollover of low portion into high portion. 5.646 +//#define MEAS__TIME_STAMP_SUSP 5.647 +//#define MEAS__TIME_MASTER 5.648 +#define MEAS__TIME_PLUGIN 5.649 +#define MEAS__TIME_MALLOC 5.650 +//#define MEAS__TIME_MASTER_LOCK 5.651 +#define MEAS__NUM_TIMES_TO_RUN 100000 5.652 + 5.653 + //For code that calculates normalization-offset between TSC counts of 5.654 + // different cores. 5.655 +#define NUM_TSC_ROUND_TRIPS 10 5.656 + 5.657 + 5.658 +//========================= Hardware related Constants ===================== 5.659 + //This value is the number of hardware threads in the shared memory 5.660 + // machine 5.661 +//#define NUM_CORES 8 5.662 + 5.663 + // tradeoff amortizing master fixed overhead vs imbalance potential 5.664 + // when work-stealing, can make bigger, at risk of losing cache affinity 5.665 +#define NUM_SCHED_SLOTS 5 5.666 + 5.667 +#define MIN_WORK_UNIT_CYCLES 20000 5.668 + 5.669 +#define MASTERLOCK_RETRIES 10000 5.670 + 5.671 + // stack size in virtual processors created 5.672 +#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */ 5.673 + 5.674 + // memory for VMS__malloc 5.675 +#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */ 5.676 + 5.677 +#define CACHE_LINE 64 5.678 +#define PAGE_SIZE 4096 5.679 + 5.680 + 5.681 +//============================== 5.682 + 5.683 +#define SUCCESS 0 5.684 + 5.685 +#define writeVMSQ writePrivQ 5.686 +#define readVMSQ readPrivQ 5.687 +#define makeVMSQ makeVMSPrivQ 5.688 +#define numInVMSQ numInPrivQ 5.689 +#define VMSQueueStruc PrivQueueStruc 5.690 + 5.691 + 5.692 + 5.693 +//=========================================================================== 5.694 +typedef unsigned long long TSCount; 5.695 + 5.696 +typedef struct _SchedSlot SchedSlot; 5.697 +typedef struct _VMSReqst VMSReqst; 5.698 +typedef struct _VirtProcr VirtProcr; 5.699 +typedef struct _IntervalProbe IntervalProbe; 5.700 +typedef struct _GateStruc GateStruc; 5.701 + 5.702 + 5.703 +typedef VirtProcr * (*SlaveScheduler) ( void *, int ); //semEnv, coreIdx 5.704 +typedef void (*RequestHandler) ( VirtProcr *, void * ); //prWReqst, semEnv 5.705 +typedef void (*VirtProcrFnPtr) ( void *, VirtProcr * ); //initData, animPr 5.706 +typedef void VirtProcrFn ( void *, VirtProcr * ); //initData, animPr 5.707 +typedef void (*ResumePrFnPtr) ( VirtProcr *, void * ); 5.708 + 5.709 + 5.710 +//============= Requests =========== 5.711 +// 5.712 + 5.713 +enum VMSReqstType //avoid starting enums at 0, for debug reasons 5.714 + { 5.715 + semantic = 1, 5.716 + createReq, 5.717 + dissipate, 5.718 + VMSSemantic //goes with VMSSemReqst below 5.719 + }; 5.720 + 5.721 +struct _VMSReqst 5.722 + { 5.723 + enum VMSReqstType reqType;//used for dissipate and in future for IO requests 5.724 + void *semReqData; 5.725 + 5.726 + VMSReqst *nextReqst; 5.727 + }; 5.728 +//VMSReqst 5.729 + 5.730 +enum VMSSemReqstType //These are equivalent to semantic requests, but for 5.731 + { // VMS's services available directly to app, like OS 5.732 + createProbe = 1, // and probe services -- like a VMS-wide built-in lang 5.733 + openFile, 5.734 + otherIO 5.735 + }; 5.736 + 5.737 +typedef struct 5.738 + { enum VMSSemReqstType reqType; 5.739 + VirtProcr *requestingPr; 5.740 + char *nameStr; //for create probe 5.741 + } 5.742 + VMSSemReq; 5.743 + 5.744 + 5.745 +//==================== Core data structures =================== 5.746 + 5.747 +struct _SchedSlot 5.748 + { 5.749 + int workIsDone; 5.750 + int needsProcrAssigned; 5.751 + VirtProcr *procrAssignedToSlot; 5.752 + }; 5.753 +//SchedSlot 5.754 + 5.755 +/*WARNING: re-arranging this data structure could cause VP switching 5.756 + * assembly code to fail -- hard-codes offsets of fields 5.757 + */ 5.758 +struct _VirtProcr 5.759 + { int procrID; //for debugging -- count up each time create 5.760 + int coreAnimatedBy; 5.761 + void *startOfStack; 5.762 + void *stackPtr; 5.763 + void *framePtr; 5.764 + void *nextInstrPt; 5.765 + 5.766 + void *coreLoopStartPt; //allows proto-runtime to be linked later 5.767 + void *coreLoopFramePtr; //restore before jmp back to core loop 5.768 + void *coreLoopStackPtr; //restore before jmp back to core loop 5.769 + 5.770 + void *initialData; 5.771 + 5.772 + SchedSlot *schedSlot; 5.773 + VMSReqst *requests; 5.774 + 5.775 + void *semanticData; //this livesUSE_GNU here for the life of VP 5.776 + void *dataRetFromReq;//values returned from plugin to VP go here 5.777 + 5.778 + //=========== MEASUREMENT STUFF ========== 5.779 + #ifdef MEAS__TIME_STAMP_SUSP 5.780 + unsigned int preSuspTSCLow; 5.781 + unsigned int postSuspTSCLow; 5.782 + #endif 5.783 + #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/ 5.784 + unsigned int startMasterTSCLow;USE_GNU 5.785 + unsigned int endMasterTSCLow; 5.786 + #endif 5.787 + //======================================== 5.788 + 5.789 + float64 createPtInSecs; //have space but don't use on some configs 5.790 + }; 5.791 +//VirtProcr 5.792 + 5.793 + 5.794 +/*WARNING: re-arranging this data structure could cause VP-switching 5.795 + * assembly code to fail -- hard-codes offsets of fields 5.796 + * (because -O3 messes with things otherwise) 5.797 + */ 5.798 +typedef struct 5.799 + { 5.800 + SlaveScheduler slaveScheduler; 5.801 + RequestHandler requestHandler; 5.802 + 5.803 + SchedSlot ***allSchedSlots; 5.804 + VMSQueueStruc **readyToAnimateQs; 5.805 + VirtProcr **masterVPs; 5.806 + 5.807 + void *semanticEnv; 5.808 + void *OSEventStruc; //for future, when add I/O to BLIS 5.809 + MallocProlog *freeListHead; 5.810 + int32 amtOfOutstandingMem; //total currently allocated 5.811 + 5.812 + void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop 5.813 + 5.814 + int32 setupComplete; 5.815 + volatile int32 masterLock; 5.816 + 5.817 + int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP 5.818 + GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal 5.819 + int32 workStealingLock; 5.820 + 5.821 + int32 numProcrsCreated; //gives ordering to processor creation 5.822 + 5.823 + //=========== MEASUREMENT STUFF ============= 5.824 + IntervalProbe **intervalProbes; 5.825 + PrivDynArrayInfo *dynIntervalProbesInfo; 5.826 + HashTable *probeNameHashTbl; 5.827 + int32 masterCreateProbeID; 5.828 + float64 createPtInSecs; 5.829 + Histogram **measHists; 5.830 + PrivDynArrayInfo *measHistsInfo; 5.831 + #ifdef MEAS__TIME_PLUGIN 5.832 + Histogram *reqHdlrLowTimeHist; 5.833 + Histogram *reqHdlrHighTimeHist; 5.834 + #endif 5.835 + #ifdef MEAS__TIME_MALLOC 5.836 + Histogram *mallocTimeHist; 5.837 + Histogram *freeTimeHist; 5.838 + #endif 5.839 + #ifdef MEAS__TIME_MASTER_LOCK 5.840 + Histogram *masterLockLowTimeHist; 5.841 + Histogram *masterLockHighTimeHist; 5.842 + #endif 5.843 + } 5.844 +MasterEnv; 5.845 + 5.846 +//========================= Extra Stuff Data Strucs ======================= 5.847 +typedef struct 5.848 + { 5.849 + 5.850 + } 5.851 +VMSExcp; 5.852 + 5.853 +struct _GateStruc 5.854 + { 5.855 + int32 gateClosed; 5.856 + int32 preGateProgress; 5.857 + int32 waitProgress; 5.858 + int32 exitProgress; 5.859 + }; 5.860 +//GateStruc 5.861 + 5.862 +//======================= OS Thread related =============================== 5.863 + 5.864 +void * coreLoop( void *paramsIn ); //standard PThreads fn prototype 5.865 +void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype 5.866 +void masterLoop( void *initData, VirtProcr *masterPr ); 5.867 + 5.868 + 5.869 +typedef struct 5.870 + { 5.871 + void *endThdPt; 5.872 + unsigned int coreNum; 5.873 + } 5.874 +ThdParams; 5.875 + 5.876 +pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state 5.877 +ThdParams *coreLoopThdParams [ NUM_CORES ]; 5.878 +pthread_mutex_t suspendLock; 5.879 +pthread_cond_t suspend_cond; 5.880 + 5.881 + 5.882 + 5.883 +//===================== Global Vars =================== 5.884 + 5.885 +volatile MasterEnv *_VMSMasterEnv; 5.886 + 5.887 + 5.888 + 5.889 + 5.890 +//=========================== Function Prototypes ========================= 5.891 + 5.892 + 5.893 +//========== Setup and shutdown ========== 5.894 +void 5.895 +VMS__init(); 5.896 + 5.897 +void 5.898 +VMS__init_Seq(); 5.899 + 5.900 +void 5.901 +VMS__start_the_work_then_wait_until_done(); 5.902 + 5.903 +void 5.904 +VMS__start_the_work_then_wait_until_done_Seq(); 5.905 + 5.906 +inline VirtProcr * 5.907 +VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); 5.908 + 5.909 +void 5.910 +VMS__dissipate_procr( VirtProcr *procrToDissipate ); 5.911 + 5.912 + //Use this to create processor inside entry point & other places outside 5.913 + // the VMS system boundary (IE, not run in slave nor Master) 5.914 +VirtProcr * 5.915 +VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); 5.916 + 5.917 +void 5.918 +VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ); 5.919 + 5.920 +void 5.921 +VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ); 5.922 + 5.923 +void 5.924 +VMS__shutdown(); 5.925 + 5.926 +void 5.927 +VMS__cleanup_at_end_of_shutdown(); 5.928 + 5.929 +void * 5.930 +VMS__give_sem_env_for( VirtProcr *animPr ); 5.931 + 5.932 + 5.933 +//============== Request Related =============== 5.934 + 5.935 +void 5.936 +VMS__suspend_procr( VirtProcr *callingPr ); 5.937 + 5.938 +inline void 5.939 +VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr ); 5.940 + 5.941 +inline void 5.942 +VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ); 5.943 + 5.944 +void 5.945 +VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ); 5.946 + 5.947 +void inline 5.948 +VMS__send_dissipate_req( VirtProcr *prToDissipate ); 5.949 + 5.950 +inline void 5.951 +VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ); 5.952 + 5.953 +VMSReqst * 5.954 +VMS__take_next_request_out_of( VirtProcr *procrWithReq ); 5.955 + 5.956 +inline void * 5.957 +VMS__take_sem_reqst_from( VMSReqst *req ); 5.958 + 5.959 +void inline 5.960 +VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, 5.961 + ResumePrFnPtr resumePrFnPtr ); 5.962 + 5.963 +//======================== STATS ====================== 5.964 + 5.965 +//===== RDTSC wrapper ===== //Also runs with x86_64 code 5.966 + 5.967 +#define saveTimeStampCountInto(low, high) \ 5.968 + asm volatile("RDTSC; \ 5.969 + movl %%eax, %0; \ 5.970 + movl %%edx, %1;" \ 5.971 + /* outputs */ : "=m" (low), "=m" (high)\ 5.972 + /* inputs */ : \ 5.973 + /* clobber */ : "%eax", "%edx" \ 5.974 + ); 5.975 + 5.976 +#define saveLowTimeStampCountInto(low) \ 5.977 + asm volatile("RDTSC; \ 5.978 + movl %%eax, %0;" \ 5.979 + /* outputs */ : "=m" (low) \ 5.980 + /* inputs */ : \ 5.981 + /* clobber */ : "%eax", "%edx" \ 5.982 + ); 5.983 + 5.984 +//==================== 5.985 +#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ 5.986 + makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ 5.987 + _VMSMasterEnv->measHists[idx] = \ 5.988 + makeFixedBinHist( numBins, startVal, binWidth, name ); 5.989 + 5.990 + 5.991 +#define MEAS__SUB_CREATE /*turn on/off subtraction of create from plugin*/ 5.992 + 5.993 +#ifdef VPTHREAD 5.994 + 5.995 +//VPThread 5.996 +#define createHistIdx 0 5.997 +#define mutexLockHistIdx 1 5.998 +#define mutexUnlockHistIdx 2 5.999 +#define condWaitHistIdx 3 5.1000 +#define condSignalHistIdx 4 5.1001 + 5.1002 +#define MakeTheMeasHists() \ 5.1003 + _VMSMasterEnv->measHistsInfo = \ 5.1004 + makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 5.1005 + makeAMeasHist( createHistIdx, "create", 250, 0, 100 ) \ 5.1006 + makeAMeasHist( mutexLockHistIdx, "mutex_lock", 50, 0, 100 ) \ 5.1007 + makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock", 50, 0, 100 ) \ 5.1008 + makeAMeasHist( condWaitHistIdx, "cond_wait", 50, 0, 100 ) \ 5.1009 + makeAMeasHist( condSignalHistIdx, "cond_signal", 50, 0, 100 ) 5.1010 + 5.1011 +#endif 5.1012 + 5.1013 + 5.1014 +#ifdef VCILK 5.1015 + 5.1016 +//VCilk 5.1017 +#define spawnHistIdx 0 5.1018 +#define syncHistIdx 1 5.1019 + 5.1020 +#define MakeTheMeasHists() \ 5.1021 + _VMSMasterEnv->measHistsInfo = \ 5.1022 + makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 5.1023 + makeAMeasHist( spawnHistIdx, "Spawn", 50, 0, 200 ) \ 5.1024 + makeAMeasHist( syncHistIdx, "Sync", 50, 0, 200 ) 5.1025 + 5.1026 + 5.1027 +#endif 5.1028 + 5.1029 +#ifdef SSR 5.1030 + 5.1031 +//SSR 5.1032 +#define SendFromToHistIdx 0 5.1033 +#define SendOfTypeHistIdx 1 5.1034 +#define ReceiveFromToHistIdx 2 5.1035 +#define ReceiveOfTypeHistIdx 3 5.1036 + 5.1037 +#define MakeTheMeasHists() \ 5.1038 + _VMSMasterEnv->measHistsInfo = \ 5.1039 + makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 5.1040 + makeAMeasHist( SendFromToHistIdx, "SendFromTo", 50, 0, 100 ) \ 5.1041 + makeAMeasHist( SendOfTypeHistIdx, "SendOfType", 50, 0, 100 ) \ 5.1042 + makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \ 5.1043 + makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 ) 5.1044 + 5.1045 +#endif 5.1046 + 5.1047 +//=========================================================================== 5.1048 +//VPThread 5.1049 + 5.1050 + 5.1051 +#define Meas_startCreate \ 5.1052 + int32 startStamp, endStamp; \ 5.1053 + saveLowTimeStampCountInto( startStamp ); \ 5.1054 + 5.1055 +#define Meas_endCreate \ 5.1056 + saveLowTimeStampCountInto( endStamp ); \ 5.1057 + addIntervalToHist( startStamp, endStamp, \ 5.1058 + _VMSMasterEnv->measHists[ createHistIdx ] ); 5.1059 + 5.1060 +#define Meas_startMutexLock \ 5.1061 + int32 startStamp, endStamp; \ 5.1062 + saveLowTimeStampCountInto( startStamp ); \ 5.1063 + 5.1064 +#define Meas_endMutexLock \ 5.1065 + saveLowTimeStampCountInto( endStamp ); \ 5.1066 + addIntervalToHist( startStamp, endStamp, \ 5.1067 + _VMSMasterEnv->measHists[ mutexLockHistIdx ] ); 5.1068 + 5.1069 +#define Meas_startMutexUnlock \ 5.1070 + int32 startStamp, endStamp; \ 5.1071 + saveLowTimeStampCountInto( startStamp ); \ 5.1072 + 5.1073 +#define Meas_endMutexUnlock \ 5.1074 + saveLowTimeStampCountInto( endStamp ); \ 5.1075 + addIntervalToHist( startStamp, endStamp, \ 5.1076 + _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] ); 5.1077 + 5.1078 +#define Meas_startCondWait \ 5.1079 + int32 startStamp, endStamp; \ 5.1080 + saveLowTimeStampCountInto( startStamp ); \ 5.1081 + 5.1082 +#define Meas_endCondWait \ 5.1083 + saveLowTimeStampCountInto( endStamp ); \ 5.1084 + addIntervalToHist( startStamp, endStamp, \ 5.1085 + _VMSMasterEnv->measHists[ condWaitHistIdx ] ); 5.1086 + 5.1087 +#define Meas_startCondSignal \ 5.1088 + int32 startStamp, endStamp; \ 5.1089 + saveLowTimeStampCountInto( startStamp ); \ 5.1090 + 5.1091 +#define Meas_endCondSignal \ 5.1092 + saveLowTimeStampCountInto( endStamp ); \ 5.1093 + addIntervalToHist( startStamp, endStamp, \ 5.1094 + _VMSMasterEnv->measHists[ condSignalHistIdx ] ); 5.1095 + 5.1096 +//=========================================================================== 5.1097 +// VCilk 5.1098 +#define Meas_startSpawn \ 5.1099 + int32 startStamp, endStamp; \ 5.1100 + saveLowTimeStampCountInto( startStamp ); \ 5.1101 + 5.1102 +#define Meas_endSpawn \ 5.1103 + saveLowTimeStampCountInto( endStamp ); \ 5.1104 + addIntervalToHist( startStamp, endStamp, \ 5.1105 + _VMSMasterEnv->measHists[ spawnHistIdx ] ); 5.1106 + 5.1107 +#define Meas_startSync \ 5.1108 + int32 startStamp, endStamp; \ 5.1109 + saveLowTimeStampCountInto( startStamp ); \ 5.1110 + 5.1111 +#define Meas_endSync \ 5.1112 + saveLowTimeStampCountInto( endStamp ); \ 5.1113 + addIntervalToHist( startStamp, endStamp, \ 5.1114 + _VMSMasterEnv->measHists[ syncHistIdx ] ); 5.1115 + 5.1116 +//=========================================================================== 5.1117 +// SSR 5.1118 +#define Meas_startSendFromTo \ 5.1119 + int32 startStamp, endStamp; \ 5.1120 + saveLowTimeStampCountInto( startStamp ); \ 5.1121 + 5.1122 +#define Meas_endSendFromTo \ 5.1123 + saveLowTimeStampCountInto( endStamp ); \ 5.1124 + addIntervalToHist( startStamp, endStamp, \ 5.1125 + _VMSMasterEnv->measHists[ SendFromToHistIdx ] ); 5.1126 + 5.1127 +#define Meas_startSendOfType \ 5.1128 + int32 startStamp, endStamp; \ 5.1129 + saveLowTimeStampCountInto( startStamp ); \ 5.1130 + 5.1131 +#define Meas_endSendOfType \ 5.1132 + saveLowTimeStampCountInto( endStamp ); \ 5.1133 + addIntervalToHist( startStamp, endStamp, \ 5.1134 + _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] ); 5.1135 + 5.1136 +#define Meas_startReceiveFromTo \ 5.1137 + int32 startStamp, endStamp; \ 5.1138 + saveLowTimeStampCountInto( startStamp ); \ 5.1139 + 5.1140 +#define Meas_endReceiveFromTo \ 5.1141 + saveLowTimeStampCountInto( endStamp ); \ 5.1142 + addIntervalToHist( startStamp, endStamp, \ 5.1143 + _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] ); 5.1144 + 5.1145 +#define Meas_startReceiveOfType \ 5.1146 + int32 startStamp, endStamp; \ 5.1147 + saveLowTimeStampCountInto( startStamp ); \ 5.1148 + 5.1149 +#define Meas_endReceiveOfType \ 5.1150 + saveLowTimeStampCountInto( endStamp ); \ 5.1151 + addIntervalToHist( startStamp, endStamp, \ 5.1152 + _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] ); 5.1153 + 5.1154 +//===== 5.1155 + 5.1156 +#include "ProcrContext.h" 5.1157 +#include "probes.h" 5.1158 +#include "vutilities.h" 5.1159 + 5.1160 +#endif /* _VMS_H */ 5.1161 +
6.1 --- a/VMS_primitive_data_types.h Thu Oct 06 16:24:17 2011 +0200 6.2 +++ b/VMS_primitive_data_types.h Wed Jan 04 16:10:11 2012 -0800 6.3 @@ -1,53 +1,53 @@ 6.4 -/* 6.5 - * Copyright 2009 OpenSourceStewardshipFoundation.org 6.6 - * Licensed under GNU General Public License version 2 6.7 - * 6.8 - * Author: seanhalle@yahoo.com 6.9 - * 6.10 - 6.11 - */ 6.12 - 6.13 -#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H 6.14 -#define _BLIS_PRIMITIVE_DATA_TYPES_H 6.15 - 6.16 - 6.17 -/*For portability, need primitive data types that have a well defined 6.18 - * size, and well-defined layout into bytes 6.19 - *To do this, provide BLIS standard aliases for all primitive data types 6.20 - *These aliases must be used in all BLIS functions instead of the ANSI types 6.21 - * 6.22 - *These definitions will be replaced inside each specialization module 6.23 - * according to the compiler used in that module and the hardware being 6.24 - * specialized to. 6.25 - */ 6.26 -/* 6.27 -#define int8 char 6.28 -#define uint8 char 6.29 -#define int16 short 6.30 -#define uint16 unsigned short 6.31 -#define int32 int 6.32 -#define uint32 unsigned int 6.33 -#define int64 long long 6.34 -#define uint64 unsigned long long 6.35 -#define float32 float 6.36 -#define float64 double 6.37 -*/ 6.38 -typedef char bool8; 6.39 -typedef char int8; 6.40 -typedef char uint8; 6.41 -typedef short int16; 6.42 -typedef unsigned short uint16; 6.43 -typedef int int32; 6.44 -typedef unsigned int uint32; 6.45 -typedef long long int64; 6.46 -typedef unsigned long long uint64; 6.47 -typedef float float32; 6.48 -typedef double float64; 6.49 -//typedef double double float128; 6.50 -#define float128 double double 6.51 - 6.52 -#define TRUE 1 6.53 -#define FALSE 0 6.54 - 6.55 -#endif /* _BLIS_PRIMITIVE_DATA_TYPES_H */ 6.56 - 6.57 +/* 6.58 + * Copyright 2009 OpenSourceStewardshipFoundation.org 6.59 + * Licensed under GNU General Public License version 2 6.60 + * 6.61 + * Author: seanhalle@yahoo.com 6.62 + * 6.63 + 6.64 + */ 6.65 + 6.66 +#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H 6.67 +#define _BLIS_PRIMITIVE_DATA_TYPES_H 6.68 + 6.69 + 6.70 +/*For portability, need primitive data types that have a well defined 6.71 + * size, and well-defined layout into bytes 6.72 + *To do this, provide BLIS standard aliases for all primitive data types 6.73 + *These aliases must be used in all BLIS functions instead of the ANSI types 6.74 + * 6.75 + *These definitions will be replaced inside each specialization module 6.76 + * according to the compiler used in that module and the hardware being 6.77 + * specialized to. 6.78 + */ 6.79 +/* 6.80 +#define int8 char 6.81 +#define uint8 char 6.82 +#define int16 short 6.83 +#define uint16 unsigned short 6.84 +#define int32 int 6.85 +#define uint32 unsigned int 6.86 +#define int64 long long 6.87 +#define uint64 unsigned long long 6.88 +#define float32 float 6.89 +#define float64 double 6.90 +*/ 6.91 +typedef char bool8; 6.92 +typedef char int8; 6.93 +typedef char uint8; 6.94 +typedef short int16; 6.95 +typedef unsigned short uint16; 6.96 +typedef int int32; 6.97 +typedef unsigned int uint32; 6.98 +typedef long long int64; 6.99 +typedef unsigned long long uint64; 6.100 +typedef float float32; 6.101 +typedef double float64; 6.102 +//typedef double double float128; 6.103 +#define float128 double double 6.104 + 6.105 +#define TRUE 1 6.106 +#define FALSE 0 6.107 + 6.108 +#endif /* _BLIS_PRIMITIVE_DATA_TYPES_H */ 6.109 +
7.1 --- a/probes.h Thu Oct 06 16:24:17 2011 +0200 7.2 +++ b/probes.h Wed Jan 04 16:10:11 2012 -0800 7.3 @@ -1,195 +1,195 @@ 7.4 -/* 7.5 - * Copyright 2009 OpenSourceStewardshipFoundation.org 7.6 - * Licensed under GNU General Public License version 2 7.7 - * 7.8 - * Author: seanhalle@yahoo.com 7.9 - * 7.10 - */ 7.11 - 7.12 -#ifndef _PROBES_H 7.13 -#define _PROBES_H 7.14 -#define _GNU_SOURCE 7.15 - 7.16 -#include "VMS_primitive_data_types.h" 7.17 - 7.18 -#include <sys/time.h> 7.19 - 7.20 - 7.21 - //when STATS__TURN_ON_PROBES is defined allows using probes to measure 7.22 - // time intervals. The probes are macros that only compile to something 7.23 - // when STATS__TURN_ON_PROBES is defined. The probes are saved in the 7.24 - // master env -- but only when this is defined. 7.25 - //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday 7.26 -#define STATS__TURN_ON_PROBES 7.27 -//#define STATS__USE_TSC_PROBES 7.28 -#define STATS__USE_DBL_PROBES 7.29 - 7.30 -//typedef struct _IntervalProbe IntervalProbe; //in VMS.h 7.31 - 7.32 -struct _IntervalProbe 7.33 - { 7.34 - char *nameStr; 7.35 - int32 probeID; 7.36 - 7.37 - int32 schedChoiceWasRecorded; 7.38 - int32 coreNum; 7.39 - int32 procrID; 7.40 - float64 procrCreateSecs; 7.41 - 7.42 - #ifdef STATS__USE_TSC_PROBES 7.43 - TSCount startStamp; 7.44 - TSCount endStamp; 7.45 - #else 7.46 - struct timeval startStamp; 7.47 - struct timeval endStamp; 7.48 - #endif 7.49 - float64 startSecs; 7.50 - float64 endSecs; 7.51 - float64 interval; 7.52 - DblHist *hist;//if NULL, then is single interval probe 7.53 - }; 7.54 - 7.55 - 7.56 -//============================= Statistics ================================== 7.57 - 7.58 - //Frequency of TS counts 7.59 - //TODO: change freq for each machine 7.60 -#define TSCOUNT_FREQ 3180000000 7.61 - 7.62 -inline TSCount getTSCount(); 7.63 - 7.64 - 7.65 -//======================== Probes ============================= 7.66 -// 7.67 -// Use macros to allow turning probes off with a #define switch 7.68 -#ifdef STATS__ENABLE_PROBES 7.69 -int32 7.70 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 7.71 -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 7.72 - VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) 7.73 - 7.74 -int32 7.75 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 7.76 -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 7.77 - VMS_ext_impl__record_time_point_into_new_probe( nameStr ) 7.78 - 7.79 - 7.80 -int32 7.81 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 7.82 -#define VMS__create_single_interval_probe( nameStr, animPr ) \ 7.83 - VMS_impl__create_single_interval_probe( nameStr, animPr ) 7.84 - 7.85 - 7.86 -int32 7.87 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 7.88 - float64 binWidth, char *nameStr, VirtProcr *animPr ); 7.89 -#define VMS__create_histogram_probe( numBins, startValue, \ 7.90 - binWidth, nameStr, animPr ) \ 7.91 - VMS_impl__create_histogram_probe( numBins, startValue, \ 7.92 - binWidth, nameStr, animPr ) 7.93 -void 7.94 -VMS_impl__free_probe( IntervalProbe *probe ); 7.95 -#define VMS__free_probe( probe ) \ 7.96 - VMS_impl__free_probe( probe ) 7.97 - 7.98 -void 7.99 -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 7.100 -#define VMS__index_probe_by_its_name( probeID, animPr ) \ 7.101 - VMS_impl__index_probe_by_its_name( probeID, animPr ) 7.102 - 7.103 -IntervalProbe * 7.104 -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 7.105 -#define VMS__get_probe_by_name( probeID, animPr ) \ 7.106 - VMS_impl__get_probe_by_name( probeName, animPr ) 7.107 - 7.108 -void 7.109 -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 7.110 -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 7.111 - VMS_impl__record_sched_choice_into_probe( probeID, animPr ) 7.112 - 7.113 -void 7.114 -VMS_impl__record_interval_start_in_probe( int32 probeID ); 7.115 -#define VMS__record_interval_start_in_probe( probeID ) \ 7.116 - VMS_impl__record_interval_start_in_probe( probeID ) 7.117 - 7.118 -void 7.119 -VMS_impl__record_interval_end_in_probe( int32 probeID ); 7.120 -#define VMS__record_interval_end_in_probe( probeID ) \ 7.121 - VMS_impl__record_interval_end_in_probe( probeID ) 7.122 - 7.123 -void 7.124 -VMS_impl__print_stats_of_probe( int32 probeID ); 7.125 -#define VMS__print_stats_of_probe( probeID ) \ 7.126 - VMS_impl__print_stats_of_probe( probeID ) 7.127 - 7.128 -void 7.129 -VMS_impl__print_stats_of_all_probes(); 7.130 -#define VMS__print_stats_of_all_probes() \ 7.131 - VMS_impl__print_stats_of_all_probes() 7.132 - 7.133 - 7.134 -#else 7.135 -int32 7.136 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 7.137 -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 7.138 - 0 /* do nothing */ 7.139 - 7.140 -int32 7.141 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 7.142 -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 7.143 - 0 /* do nothing */ 7.144 - 7.145 - 7.146 -int32 7.147 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 7.148 -#define VMS__create_single_interval_probe( nameStr, animPr ) \ 7.149 - 0 /* do nothing */ 7.150 - 7.151 - 7.152 -int32 7.153 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 7.154 - float64 binWidth, char *nameStr, VirtProcr *animPr ); 7.155 -#define VMS__create_histogram_probe( numBins, startValue, \ 7.156 - binWidth, nameStr, animPr ) \ 7.157 - 0 /* do nothing */ 7.158 - 7.159 -void 7.160 -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 7.161 -#define VMS__index_probe_by_its_name( probeID, animPr ) \ 7.162 - /* do nothing */ 7.163 - 7.164 -IntervalProbe * 7.165 -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 7.166 -#define VMS__get_probe_by_name( probeID, animPr ) \ 7.167 - NULL /* do nothing */ 7.168 - 7.169 -void 7.170 -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 7.171 -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 7.172 - /* do nothing */ 7.173 - 7.174 -void 7.175 -VMS_impl__record_interval_start_in_probe( int32 probeID ); 7.176 -#define VMS__record_interval_start_in_probe( probeID ) \ 7.177 - /* do nothing */ 7.178 - 7.179 -void 7.180 -VMS_impl__record_interval_end_in_probe( int32 probeID ); 7.181 -#define VMS__record_interval_end_in_probe( probeID ) \ 7.182 - /* do nothing */ 7.183 - 7.184 -inline void doNothing(); 7.185 -void 7.186 -VMS_impl__print_stats_of_probe( int32 probeID ); 7.187 -#define VMS__print_stats_of_probe( probeID ) \ 7.188 - doNothing/* do nothing */ 7.189 - 7.190 -void 7.191 -VMS_impl__print_stats_of_all_probes(); 7.192 -#define VMS__print_stats_of_all_probes \ 7.193 - doNothing/* do nothing */ 7.194 - 7.195 -#endif /* defined STATS__ENABLE_PROBES */ 7.196 - 7.197 -#endif /* _PROBES_H */ 7.198 - 7.199 +/* 7.200 + * Copyright 2009 OpenSourceStewardshipFoundation.org 7.201 + * Licensed under GNU General Public License version 2 7.202 + * 7.203 + * Author: seanhalle@yahoo.com 7.204 + * 7.205 + */ 7.206 + 7.207 +#ifndef _PROBES_H 7.208 +#define _PROBES_H 7.209 +#define _GNU_SOURCE 7.210 + 7.211 +#include "VMS_primitive_data_types.h" 7.212 + 7.213 +#include <sys/time.h> 7.214 + 7.215 + 7.216 + //when STATS__TURN_ON_PROBES is defined allows using probes to measure 7.217 + // time intervals. The probes are macros that only compile to something 7.218 + // when STATS__TURN_ON_PROBES is defined. The probes are saved in the 7.219 + // master env -- but only when this is defined. 7.220 + //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday 7.221 +#define STATS__TURN_ON_PROBES 7.222 +//#define STATS__USE_TSC_PROBES 7.223 +#define STATS__USE_DBL_PROBES 7.224 + 7.225 +//typedef struct _IntervalProbe IntervalProbe; //in VMS.h 7.226 + 7.227 +struct _IntervalProbe 7.228 + { 7.229 + char *nameStr; 7.230 + int32 probeID; 7.231 + 7.232 + int32 schedChoiceWasRecorded; 7.233 + int32 coreNum; 7.234 + int32 procrID; 7.235 + float64 procrCreateSecs; 7.236 + 7.237 + #ifdef STATS__USE_TSC_PROBES 7.238 + TSCount startStamp; 7.239 + TSCount endStamp; 7.240 + #else 7.241 + struct timeval startStamp; 7.242 + struct timeval endStamp; 7.243 + #endif 7.244 + float64 startSecs; 7.245 + float64 endSecs; 7.246 + float64 interval; 7.247 + DblHist *hist;//if NULL, then is single interval probe 7.248 + }; 7.249 + 7.250 + 7.251 +//============================= Statistics ================================== 7.252 + 7.253 + //Frequency of TS counts 7.254 + //TODO: change freq for each machine 7.255 +#define TSCOUNT_FREQ 3180000000 7.256 + 7.257 +inline TSCount getTSCount(); 7.258 + 7.259 + 7.260 +//======================== Probes ============================= 7.261 +// 7.262 +// Use macros to allow turning probes off with a #define switch 7.263 +#ifdef STATS__ENABLE_PROBES 7.264 +int32 7.265 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 7.266 +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 7.267 + VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) 7.268 + 7.269 +int32 7.270 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 7.271 +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 7.272 + VMS_ext_impl__record_time_point_into_new_probe( nameStr ) 7.273 + 7.274 + 7.275 +int32 7.276 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 7.277 +#define VMS__create_single_interval_probe( nameStr, animPr ) \ 7.278 + VMS_impl__create_single_interval_probe( nameStr, animPr ) 7.279 + 7.280 + 7.281 +int32 7.282 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 7.283 + float64 binWidth, char *nameStr, VirtProcr *animPr ); 7.284 +#define VMS__create_histogram_probe( numBins, startValue, \ 7.285 + binWidth, nameStr, animPr ) \ 7.286 + VMS_impl__create_histogram_probe( numBins, startValue, \ 7.287 + binWidth, nameStr, animPr ) 7.288 +void 7.289 +VMS_impl__free_probe( IntervalProbe *probe ); 7.290 +#define VMS__free_probe( probe ) \ 7.291 + VMS_impl__free_probe( probe ) 7.292 + 7.293 +void 7.294 +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 7.295 +#define VMS__index_probe_by_its_name( probeID, animPr ) \ 7.296 + VMS_impl__index_probe_by_its_name( probeID, animPr ) 7.297 + 7.298 +IntervalProbe * 7.299 +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 7.300 +#define VMS__get_probe_by_name( probeID, animPr ) \ 7.301 + VMS_impl__get_probe_by_name( probeName, animPr ) 7.302 + 7.303 +void 7.304 +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 7.305 +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 7.306 + VMS_impl__record_sched_choice_into_probe( probeID, animPr ) 7.307 + 7.308 +void 7.309 +VMS_impl__record_interval_start_in_probe( int32 probeID ); 7.310 +#define VMS__record_interval_start_in_probe( probeID ) \ 7.311 + VMS_impl__record_interval_start_in_probe( probeID ) 7.312 + 7.313 +void 7.314 +VMS_impl__record_interval_end_in_probe( int32 probeID ); 7.315 +#define VMS__record_interval_end_in_probe( probeID ) \ 7.316 + VMS_impl__record_interval_end_in_probe( probeID ) 7.317 + 7.318 +void 7.319 +VMS_impl__print_stats_of_probe( int32 probeID ); 7.320 +#define VMS__print_stats_of_probe( probeID ) \ 7.321 + VMS_impl__print_stats_of_probe( probeID ) 7.322 + 7.323 +void 7.324 +VMS_impl__print_stats_of_all_probes(); 7.325 +#define VMS__print_stats_of_all_probes() \ 7.326 + VMS_impl__print_stats_of_all_probes() 7.327 + 7.328 + 7.329 +#else 7.330 +int32 7.331 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 7.332 +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 7.333 + 0 /* do nothing */ 7.334 + 7.335 +int32 7.336 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 7.337 +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 7.338 + 0 /* do nothing */ 7.339 + 7.340 + 7.341 +int32 7.342 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 7.343 +#define VMS__create_single_interval_probe( nameStr, animPr ) \ 7.344 + 0 /* do nothing */ 7.345 + 7.346 + 7.347 +int32 7.348 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 7.349 + float64 binWidth, char *nameStr, VirtProcr *animPr ); 7.350 +#define VMS__create_histogram_probe( numBins, startValue, \ 7.351 + binWidth, nameStr, animPr ) \ 7.352 + 0 /* do nothing */ 7.353 + 7.354 +void 7.355 +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 7.356 +#define VMS__index_probe_by_its_name( probeID, animPr ) \ 7.357 + /* do nothing */ 7.358 + 7.359 +IntervalProbe * 7.360 +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 7.361 +#define VMS__get_probe_by_name( probeID, animPr ) \ 7.362 + NULL /* do nothing */ 7.363 + 7.364 +void 7.365 +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 7.366 +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 7.367 + /* do nothing */ 7.368 + 7.369 +void 7.370 +VMS_impl__record_interval_start_in_probe( int32 probeID ); 7.371 +#define VMS__record_interval_start_in_probe( probeID ) \ 7.372 + /* do nothing */ 7.373 + 7.374 +void 7.375 +VMS_impl__record_interval_end_in_probe( int32 probeID ); 7.376 +#define VMS__record_interval_end_in_probe( probeID ) \ 7.377 + /* do nothing */ 7.378 + 7.379 +inline void doNothing(); 7.380 +void 7.381 +VMS_impl__print_stats_of_probe( int32 probeID ); 7.382 +#define VMS__print_stats_of_probe( probeID ) \ 7.383 + doNothing/* do nothing */ 7.384 + 7.385 +void 7.386 +VMS_impl__print_stats_of_all_probes(); 7.387 +#define VMS__print_stats_of_all_probes \ 7.388 + doNothing/* do nothing */ 7.389 + 7.390 +#endif /* defined STATS__ENABLE_PROBES */ 7.391 + 7.392 +#endif /* _PROBES_H */ 7.393 +
8.1 --- a/vmalloc.c Thu Oct 06 16:24:17 2011 +0200 8.2 +++ b/vmalloc.c Wed Jan 04 16:10:11 2012 -0800 8.3 @@ -1,495 +1,495 @@ 8.4 -/* 8.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 8.6 - * Licensed under GNU General Public License version 2 8.7 - * 8.8 - * Author: seanhalle@yahoo.com 8.9 - * 8.10 - * Created on November 14, 2009, 9:07 PM 8.11 - */ 8.12 - 8.13 -#include <malloc.h> 8.14 -#include <inttypes.h> 8.15 -#include <stdlib.h> 8.16 -#include <stdio.h> 8.17 - 8.18 -#include "VMS.h" 8.19 -#include "Histogram/Histogram.h" 8.20 - 8.21 -/*Helper function 8.22 - *Insert a newly generated free chunk into the first spot on the free list. 8.23 - * The chunk is cast as a MallocProlog, so the various pointers in it are 8.24 - * accessed with C's help -- and the size of the prolog is easily added to 8.25 - * the pointer when a chunk is returned to the app -- so C handles changes 8.26 - * in pointer sizes among machines. 8.27 - * 8.28 - *The list head is a normal MallocProlog struct -- identified by its 8.29 - * prevChunkInFreeList being NULL -- the only one. 8.30 - * 8.31 - *The end of the list is identified by next chunk being NULL, as usual. 8.32 - */ 8.33 -void inline 8.34 -add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) 8.35 - { 8.36 - chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; 8.37 - if( chunk->nextChunkInFreeList != NULL ) //if not last in free list 8.38 - chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; 8.39 - chunk->prevChunkInFreeList = listHead; 8.40 - listHead->nextChunkInFreeList = chunk; 8.41 - } 8.42 - 8.43 - 8.44 -/*This is sequential code, meant to only be called from the Master, not from 8.45 - * any slave VPs. 8.46 - *Search down list, checking size by the nextHigherInMem pointer, to find 8.47 - * first chunk bigger than size needed. 8.48 - *Shave off the extra and make it into a new free-list element, hook it in 8.49 - * then return the address of the found element plus size of prolog. 8.50 - * 8.51 - *Will find a 8.52 - */ 8.53 -void *VMS__malloc( size_t sizeRequested ) 8.54 - { MallocProlog *foundElem = NULL, *currElem, *newElem; 8.55 - ssize_t amountExtra, sizeConsumed,sizeOfFound; 8.56 - uint32 foundElemIsTopOfHeap; 8.57 - 8.58 - //============================= MEASUREMENT STUFF ======================== 8.59 - #ifdef MEAS__TIME_MALLOC 8.60 - int32 startStamp, endStamp; 8.61 - saveLowTimeStampCountInto( startStamp ); 8.62 - #endif 8.63 - //======================================================================== 8.64 - 8.65 - //step up the size to be aligned at 16-byte boundary, prob better ways 8.66 - sizeRequested = (sizeRequested + 16) & ~15; 8.67 - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 8.68 - 8.69 - while( currElem != NULL ) 8.70 - { //check if size of currElem is big enough 8.71 - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 8.72 - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 8.73 - if( amountExtra > 0 ) 8.74 - { //found it, get out of loop 8.75 - foundElem = currElem; 8.76 - currElem = NULL; 8.77 - } 8.78 - else 8.79 - currElem = currElem->nextChunkInFreeList; 8.80 - } 8.81 - 8.82 - if( foundElem == NULL ) 8.83 - { ERROR("\nmalloc failed\n") 8.84 - return (void *)NULL; //indicates malloc failed 8.85 - } 8.86 - //Using a kludge to identify the element that is the top chunk in the 8.87 - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 8.88 - // save addr of start of heap in head's nextLowerInMem 8.89 - //Will handle top of Heap specially 8.90 - foundElemIsTopOfHeap = foundElem->nextHigherInMem == 8.91 - _VMSMasterEnv->freeListHead->nextHigherInMem; 8.92 - 8.93 - //before shave off and try to insert new elem, remove found elem 8.94 - //note, foundElem will never be the head, so always has valid prevChunk 8.95 - foundElem->prevChunkInFreeList->nextChunkInFreeList = 8.96 - foundElem->nextChunkInFreeList; 8.97 - if( foundElem->nextChunkInFreeList != NULL ) 8.98 - { foundElem->nextChunkInFreeList->prevChunkInFreeList = 8.99 - foundElem->prevChunkInFreeList; 8.100 - } 8.101 - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 8.102 - 8.103 - //if enough, turn extra into new elem & insert it 8.104 - if( amountExtra > 64 ) 8.105 - { //make new elem by adding to addr of curr elem then casting 8.106 - sizeConsumed = sizeof(MallocProlog) + sizeRequested; 8.107 - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 8.108 - newElem->nextLowerInMem = foundElem; //This is evil (but why?) 8.109 - newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) 8.110 - foundElem->nextHigherInMem = newElem; 8.111 - if( ! foundElemIsTopOfHeap ) 8.112 - { //there is no next higher for top of heap, so can't write to it 8.113 - newElem->nextHigherInMem->nextLowerInMem = newElem; 8.114 - } 8.115 - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 8.116 - } 8.117 - else 8.118 - { 8.119 - sizeConsumed = sizeOfFound; 8.120 - } 8.121 - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 8.122 - 8.123 - //============================= MEASUREMENT STUFF ======================== 8.124 - #ifdef MEAS__TIME_MALLOC 8.125 - saveLowTimeStampCountInto( endStamp ); 8.126 - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 8.127 - #endif 8.128 - //======================================================================== 8.129 - 8.130 - //skip over the prolog by adding its size to the pointer return 8.131 - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 8.132 - } 8.133 - 8.134 -/*This is sequential code, meant to only be called from the Master, not from 8.135 - * any slave VPs. 8.136 - *Search down list, checking size by the nextHigherInMem pointer, to find 8.137 - * first chunk bigger than size needed. 8.138 - *Shave off the extra and make it into a new free-list element, hook it in 8.139 - * then return the address of the found element plus size of prolog. 8.140 - * 8.141 - * The difference to the regular malloc is, that all the allocated chunks are 8.142 - * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk 8.143 - * before the aligned chunk. 8.144 - */ 8.145 -void *VMS__malloc_aligned( size_t sizeRequested ) 8.146 - { MallocProlog *foundElem = NULL, *currElem, *newElem; 8.147 - ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; 8.148 - uint32 foundElemIsTopOfHeap; 8.149 - 8.150 - //============================= MEASUREMENT STUFF ======================== 8.151 - #ifdef MEAS__TIME_MALLOC 8.152 - uint32 startStamp, endStamp; 8.153 - saveLowTimeStampCountInto( startStamp ); 8.154 - #endif 8.155 - //======================================================================== 8.156 - 8.157 - //step up the size to be multiple of the cache line size 8.158 - sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1); 8.159 - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 8.160 - 8.161 - while( currElem != NULL ) 8.162 - { //check if size of currElem is big enough 8.163 - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 8.164 - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 8.165 - if( amountExtra > 0 ) 8.166 - { 8.167 - //look if the found element is already aligned 8.168 - if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){ 8.169 - //found it, get out of loop 8.170 - foundElem = currElem; 8.171 - break; 8.172 - }else{ 8.173 - //find first aligned address and check if it's still big enough 8.174 - //check also if the space before the aligned address is big enough 8.175 - //for a new element 8.176 - void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1))); 8.177 - prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; 8.178 - sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); 8.179 - amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); 8.180 - if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ 8.181 - //found suitable element 8.182 - //create new previous element and exit loop 8.183 - MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; 8.184 - 8.185 - //insert new element into free list 8.186 - if(currElem->nextChunkInFreeList != NULL) 8.187 - currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; 8.188 - newAlignedElem->prevChunkInFreeList = currElem; 8.189 - newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; 8.190 - currElem->nextChunkInFreeList = newAlignedElem; 8.191 - 8.192 - //set higherInMem and lowerInMem 8.193 - newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; 8.194 - foundElemIsTopOfHeap = currElem->nextHigherInMem == 8.195 - _VMSMasterEnv->freeListHead->nextHigherInMem; 8.196 - if(!foundElemIsTopOfHeap) 8.197 - currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; 8.198 - currElem->nextHigherInMem = newAlignedElem; 8.199 - newAlignedElem->nextLowerInMem = currElem; 8.200 - 8.201 - //Found new element leaving loop 8.202 - foundElem = newAlignedElem; 8.203 - break; 8.204 - } 8.205 - } 8.206 - 8.207 - } 8.208 - currElem = currElem->nextChunkInFreeList; 8.209 - } 8.210 - 8.211 - if( foundElem == NULL ) 8.212 - { ERROR("\nmalloc failed\n") 8.213 - return (void *)NULL; //indicates malloc failed 8.214 - } 8.215 - //Using a kludge to identify the element that is the top chunk in the 8.216 - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 8.217 - // save addr of start of heap in head's nextLowerInMem 8.218 - //Will handle top of Heap specially 8.219 - foundElemIsTopOfHeap = foundElem->nextHigherInMem == 8.220 - _VMSMasterEnv->freeListHead->nextHigherInMem; 8.221 - 8.222 - //before shave off and try to insert new elem, remove found elem 8.223 - //note, foundElem will never be the head, so always has valid prevChunk 8.224 - foundElem->prevChunkInFreeList->nextChunkInFreeList = 8.225 - foundElem->nextChunkInFreeList; 8.226 - if( foundElem->nextChunkInFreeList != NULL ) 8.227 - { foundElem->nextChunkInFreeList->prevChunkInFreeList = 8.228 - foundElem->prevChunkInFreeList; 8.229 - } 8.230 - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 8.231 - 8.232 - //if enough, turn extra into new elem & insert it 8.233 - if( amountExtra > 64 ) 8.234 - { //make new elem by adding to addr of curr elem then casting 8.235 - sizeConsumed = sizeof(MallocProlog) + sizeRequested; 8.236 - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 8.237 - newElem->nextHigherInMem = foundElem->nextHigherInMem; 8.238 - newElem->nextLowerInMem = foundElem; 8.239 - foundElem->nextHigherInMem = newElem; 8.240 - 8.241 - if( ! foundElemIsTopOfHeap ) 8.242 - { //there is no next higher for top of heap, so can't write to it 8.243 - newElem->nextHigherInMem->nextLowerInMem = newElem; 8.244 - } 8.245 - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 8.246 - } 8.247 - else 8.248 - { 8.249 - sizeConsumed = sizeOfFound; 8.250 - } 8.251 - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 8.252 - 8.253 - //============================= MEASUREMENT STUFF ======================== 8.254 - #ifdef MEAS__TIME_MALLOC 8.255 - saveLowTimeStampCountInto( endStamp ); 8.256 - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 8.257 - #endif 8.258 - //======================================================================== 8.259 - 8.260 - //skip over the prolog by adding its size to the pointer return 8.261 - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 8.262 - } 8.263 - 8.264 - 8.265 -/*This is sequential code -- only to be called from the Master 8.266 - * When free, subtract the size of prolog from pointer, then cast it to a 8.267 - * MallocProlog. Then check the nextLower and nextHigher chunks to see if 8.268 - * one or both are also free, and coalesce if so, and if neither free, then 8.269 - * add this one to free-list. 8.270 - */ 8.271 -void 8.272 -VMS__free( void *ptrToFree ) 8.273 - { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; 8.274 - size_t sizeOfElem; 8.275 - uint32 lowerExistsAndIsFree, higherExistsAndIsFree; 8.276 - 8.277 - //============================= MEASUREMENT STUFF ======================== 8.278 - #ifdef MEAS__TIME_MALLOC 8.279 - int32 startStamp, endStamp; 8.280 - saveLowTimeStampCountInto( startStamp ); 8.281 - #endif 8.282 - //======================================================================== 8.283 - 8.284 - if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || 8.285 - ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) 8.286 - { //outside the range of data owned by VMS's malloc, so do nothing 8.287 - return; 8.288 - } 8.289 - //subtract size of prolog to get pointer to prolog, then cast 8.290 - elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog)); 8.291 - sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree); 8.292 - 8.293 - if( elemToFree->prevChunkInFreeList != NULL ) 8.294 - { printf( "error: freeing same element twice!" ); exit(1); 8.295 - } 8.296 - 8.297 - _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; 8.298 - 8.299 - nextLowerElem = elemToFree->nextLowerInMem; 8.300 - nextHigherElem = elemToFree->nextHigherInMem; 8.301 - 8.302 - if( nextHigherElem == NULL ) 8.303 - higherExistsAndIsFree = FALSE; 8.304 - else //okay exists, now check if in the free-list by checking back ptr 8.305 - higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); 8.306 - 8.307 - if( nextLowerElem == NULL ) 8.308 - lowerExistsAndIsFree = FALSE; 8.309 - else //okay, it exists, now check if it's free 8.310 - lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); 8.311 - 8.312 - 8.313 - //now, know what exists and what's free 8.314 - if( lowerExistsAndIsFree ) 8.315 - { if( higherExistsAndIsFree ) 8.316 - { //both exist and are free, so coalesce all three 8.317 - //First, remove higher from free-list 8.318 - nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = 8.319 - nextHigherElem->nextChunkInFreeList; 8.320 - if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? 8.321 - nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = 8.322 - nextHigherElem->prevChunkInFreeList; 8.323 - //Now, fix-up sequence-in-mem list -- by side-effect, this also 8.324 - // changes size of the lower elem, which is still in free-list 8.325 - nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; 8.326 - if( nextHigherElem->nextHigherInMem != 8.327 - _VMSMasterEnv->freeListHead->nextHigherInMem ) 8.328 - nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; 8.329 - //notice didn't do anything to elemToFree -- it simply is no 8.330 - // longer reachable from any of the lists. Wonder if could be a 8.331 - // security leak because left valid addresses in it, 8.332 - // but don't care for now. 8.333 - } 8.334 - else 8.335 - { //lower is the only of the two that exists and is free, 8.336 - //In this case, no adjustment to free-list, just change mem-list. 8.337 - // By side-effect, changes size of the lower elem 8.338 - nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; 8.339 - if( elemToFree->nextHigherInMem != 8.340 - _VMSMasterEnv->freeListHead->nextHigherInMem ) 8.341 - elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; 8.342 - } 8.343 - } 8.344 - else 8.345 - { //lower either doesn't exist or isn't free, so check higher 8.346 - if( higherExistsAndIsFree ) 8.347 - { //higher exists and is the only of the two free 8.348 - //First, in free-list, replace higher elem with the one to free 8.349 - elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; 8.350 - elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; 8.351 - elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; 8.352 - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 8.353 - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 8.354 - //Now chg mem-list. By side-effect, changes size of elemToFree 8.355 - elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; 8.356 - if( elemToFree->nextHigherInMem != 8.357 - _VMSMasterEnv->freeListHead->nextHigherInMem ) 8.358 - elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; 8.359 - } 8.360 - else 8.361 - { //neither lower nor higher is availabe to coalesce so add to list 8.362 - // this makes prev chunk ptr non-null, which indicates it's free 8.363 - elemToFree->nextChunkInFreeList = 8.364 - _VMSMasterEnv->freeListHead->nextChunkInFreeList; 8.365 - _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; 8.366 - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 8.367 - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 8.368 - elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; 8.369 - } 8.370 - } 8.371 - //============================= MEASUREMENT STUFF ======================== 8.372 - #ifdef MEAS__TIME_MALLOC 8.373 - saveLowTimeStampCountInto( endStamp ); 8.374 - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist ); 8.375 - #endif 8.376 - //======================================================================== 8.377 - 8.378 - } 8.379 - 8.380 - 8.381 -/*Allocates memory from the external system -- higher overhead 8.382 - * 8.383 - *Because of Linux's malloc throwing bizarre random faults when malloc is 8.384 - * used inside a VMS virtual processor, have to pass this as a request and 8.385 - * have the core loop do it when it gets around to it -- will look for these 8.386 - * chores leftover from the previous animation of masterVP the next time it 8.387 - * goes to animate the masterVP -- so it takes two separate masterVP 8.388 - * animations, separated by work, to complete an external malloc or 8.389 - * external free request. 8.390 - * 8.391 - *Thinking core loop accepts signals -- just looks if signal-location is 8.392 - * empty or not -- 8.393 - */ 8.394 -void * 8.395 -VMS__malloc_in_ext( size_t sizeRequested ) 8.396 - { 8.397 - /* 8.398 - //This is running in the master, so no chance for multiple cores to be 8.399 - // competing for the core's flag. 8.400 - if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) 8.401 - { //something has already signalled to core loop, so save the signal 8.402 - // and look, next time master animated, to see if can send it. 8.403 - //Note, the addr to put a signal is in the coreloop's frame, so just 8.404 - // checks it each time through -- make it volatile to avoid GCC 8.405 - // optimizations -- it's a coreloop local var that only changes 8.406 - // after jumping away. The signal includes the addr to send the 8.407 - //return to -- even if just empty return completion-signal 8.408 - // 8.409 - //save the signal in some queue that the master looks at each time 8.410 - // it starts up -- one loc says if empty for fast common case -- 8.411 - //something like that -- want to hide this inside this call -- but 8.412 - // think this has to come as a request -- req handler gives procr 8.413 - // back to master loop, which gives it back to req handler at point 8.414 - // it sees that core loop has sent return signal. Something like 8.415 - // that. 8.416 - saveTheSignal 8.417 - 8.418 - } 8.419 - coreSigData->type = malloc; 8.420 - coreSigData->sizeToMalloc = sizeRequested; 8.421 - coreSigData->locToSignalCompletion = &figureOut; 8.422 - _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; 8.423 - */ 8.424 - //just risk system-stack faults until get this figured out 8.425 - return malloc( sizeRequested ); 8.426 - } 8.427 - 8.428 - 8.429 -/*Frees memory that was allocated in the external system -- higher overhead 8.430 - * 8.431 - *As noted in external malloc comment, this is clunky 'cause the free has 8.432 - * to be called in the core loop. 8.433 - */ 8.434 -void 8.435 -VMS__free_in_ext( void *ptrToFree ) 8.436 - { 8.437 - //just risk system-stack faults until get this figured out 8.438 - free( ptrToFree ); 8.439 - 8.440 - //TODO: fix this -- so 8.441 - } 8.442 - 8.443 - 8.444 -/*Designed to be called from the main thread outside of VMS, during init 8.445 - */ 8.446 -MallocProlog * 8.447 -VMS_ext__create_free_list() 8.448 - { MallocProlog *freeListHead, *firstChunk; 8.449 - 8.450 - //Note, this is running in the main thread -- all increases in malloc 8.451 - // mem and all frees of it must be done in this thread, with the 8.452 - // thread's original stack available 8.453 - freeListHead = malloc( sizeof(MallocProlog) ); 8.454 - firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); 8.455 - if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} 8.456 - 8.457 - //Touch memory to avoid page faults 8.458 - void *ptr,*endPtr; 8.459 - endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE; 8.460 - for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE) 8.461 - { 8.462 - *(char*)ptr = 0; 8.463 - } 8.464 - 8.465 - freeListHead->prevChunkInFreeList = NULL; 8.466 - //Use this addr to free the heap when cleanup 8.467 - freeListHead->nextLowerInMem = firstChunk; 8.468 - //to identify top-of-heap elem, compare this addr to elem's next higher 8.469 - freeListHead->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 8.470 - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 8.471 - freeListHead->nextChunkInFreeList = firstChunk; 8.472 - 8.473 - firstChunk->nextChunkInFreeList = NULL; 8.474 - firstChunk->prevChunkInFreeList = freeListHead; 8.475 - //next Higher has to be set to top of chunk, so can calc size in malloc 8.476 - firstChunk->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 8.477 - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 8.478 - firstChunk->nextLowerInMem = NULL; //identifies as bott of heap 8.479 - 8.480 - _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet 8.481 - 8.482 - return freeListHead; 8.483 - } 8.484 - 8.485 - 8.486 -/*Designed to be called from the main thread outside of VMS, during cleanup 8.487 - */ 8.488 -void 8.489 -VMS_ext__free_free_list( MallocProlog *freeListHead ) 8.490 - { 8.491 - //stashed a ptr to the one and only bug chunk malloc'd from OS in the 8.492 - // free list head's next lower in mem pointer 8.493 - free( freeListHead->nextLowerInMem ); 8.494 - 8.495 - //don't free the head -- it'll be in an array eventually -- free whole 8.496 - // array when all the free lists linked from it have already been freed 8.497 - } 8.498 - 8.499 +/* 8.500 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 8.501 + * Licensed under GNU General Public License version 2 8.502 + * 8.503 + * Author: seanhalle@yahoo.com 8.504 + * 8.505 + * Created on November 14, 2009, 9:07 PM 8.506 + */ 8.507 + 8.508 +#include <malloc.h> 8.509 +#include <inttypes.h> 8.510 +#include <stdlib.h> 8.511 +#include <stdio.h> 8.512 + 8.513 +#include "VMS.h" 8.514 +#include "Histogram/Histogram.h" 8.515 + 8.516 +/*Helper function 8.517 + *Insert a newly generated free chunk into the first spot on the free list. 8.518 + * The chunk is cast as a MallocProlog, so the various pointers in it are 8.519 + * accessed with C's help -- and the size of the prolog is easily added to 8.520 + * the pointer when a chunk is returned to the app -- so C handles changes 8.521 + * in pointer sizes among machines. 8.522 + * 8.523 + *The list head is a normal MallocProlog struct -- identified by its 8.524 + * prevChunkInFreeList being NULL -- the only one. 8.525 + * 8.526 + *The end of the list is identified by next chunk being NULL, as usual. 8.527 + */ 8.528 +void inline 8.529 +add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) 8.530 + { 8.531 + chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; 8.532 + if( chunk->nextChunkInFreeList != NULL ) //if not last in free list 8.533 + chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; 8.534 + chunk->prevChunkInFreeList = listHead; 8.535 + listHead->nextChunkInFreeList = chunk; 8.536 + } 8.537 + 8.538 + 8.539 +/*This is sequential code, meant to only be called from the Master, not from 8.540 + * any slave VPs. 8.541 + *Search down list, checking size by the nextHigherInMem pointer, to find 8.542 + * first chunk bigger than size needed. 8.543 + *Shave off the extra and make it into a new free-list element, hook it in 8.544 + * then return the address of the found element plus size of prolog. 8.545 + * 8.546 + *Will find a 8.547 + */ 8.548 +void *VMS__malloc( size_t sizeRequested ) 8.549 + { MallocProlog *foundElem = NULL, *currElem, *newElem; 8.550 + ssize_t amountExtra, sizeConsumed,sizeOfFound; 8.551 + uint32 foundElemIsTopOfHeap; 8.552 + 8.553 + //============================= MEASUREMENT STUFF ======================== 8.554 + #ifdef MEAS__TIME_MALLOC 8.555 + int32 startStamp, endStamp; 8.556 + saveLowTimeStampCountInto( startStamp ); 8.557 + #endif 8.558 + //======================================================================== 8.559 + 8.560 + //step up the size to be aligned at 16-byte boundary, prob better ways 8.561 + sizeRequested = (sizeRequested + 16) & ~15; 8.562 + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 8.563 + 8.564 + while( currElem != NULL ) 8.565 + { //check if size of currElem is big enough 8.566 + sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 8.567 + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 8.568 + if( amountExtra > 0 ) 8.569 + { //found it, get out of loop 8.570 + foundElem = currElem; 8.571 + currElem = NULL; 8.572 + } 8.573 + else 8.574 + currElem = currElem->nextChunkInFreeList; 8.575 + } 8.576 + 8.577 + if( foundElem == NULL ) 8.578 + { ERROR("\nmalloc failed\n") 8.579 + return (void *)NULL; //indicates malloc failed 8.580 + } 8.581 + //Using a kludge to identify the element that is the top chunk in the 8.582 + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 8.583 + // save addr of start of heap in head's nextLowerInMem 8.584 + //Will handle top of Heap specially 8.585 + foundElemIsTopOfHeap = foundElem->nextHigherInMem == 8.586 + _VMSMasterEnv->freeListHead->nextHigherInMem; 8.587 + 8.588 + //before shave off and try to insert new elem, remove found elem 8.589 + //note, foundElem will never be the head, so always has valid prevChunk 8.590 + foundElem->prevChunkInFreeList->nextChunkInFreeList = 8.591 + foundElem->nextChunkInFreeList; 8.592 + if( foundElem->nextChunkInFreeList != NULL ) 8.593 + { foundElem->nextChunkInFreeList->prevChunkInFreeList = 8.594 + foundElem->prevChunkInFreeList; 8.595 + } 8.596 + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 8.597 + 8.598 + //if enough, turn extra into new elem & insert it 8.599 + if( amountExtra > 64 ) 8.600 + { //make new elem by adding to addr of curr elem then casting 8.601 + sizeConsumed = sizeof(MallocProlog) + sizeRequested; 8.602 + newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 8.603 + newElem->nextLowerInMem = foundElem; //This is evil (but why?) 8.604 + newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) 8.605 + foundElem->nextHigherInMem = newElem; 8.606 + if( ! foundElemIsTopOfHeap ) 8.607 + { //there is no next higher for top of heap, so can't write to it 8.608 + newElem->nextHigherInMem->nextLowerInMem = newElem; 8.609 + } 8.610 + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 8.611 + } 8.612 + else 8.613 + { 8.614 + sizeConsumed = sizeOfFound; 8.615 + } 8.616 + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 8.617 + 8.618 + //============================= MEASUREMENT STUFF ======================== 8.619 + #ifdef MEAS__TIME_MALLOC 8.620 + saveLowTimeStampCountInto( endStamp ); 8.621 + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 8.622 + #endif 8.623 + //======================================================================== 8.624 + 8.625 + //skip over the prolog by adding its size to the pointer return 8.626 + return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 8.627 + } 8.628 + 8.629 +/*This is sequential code, meant to only be called from the Master, not from 8.630 + * any slave VPs. 8.631 + *Search down list, checking size by the nextHigherInMem pointer, to find 8.632 + * first chunk bigger than size needed. 8.633 + *Shave off the extra and make it into a new free-list element, hook it in 8.634 + * then return the address of the found element plus size of prolog. 8.635 + * 8.636 + * The difference to the regular malloc is, that all the allocated chunks are 8.637 + * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk 8.638 + * before the aligned chunk. 8.639 + */ 8.640 +void *VMS__malloc_aligned( size_t sizeRequested ) 8.641 + { MallocProlog *foundElem = NULL, *currElem, *newElem; 8.642 + ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; 8.643 + uint32 foundElemIsTopOfHeap; 8.644 + 8.645 + //============================= MEASUREMENT STUFF ======================== 8.646 + #ifdef MEAS__TIME_MALLOC 8.647 + uint32 startStamp, endStamp; 8.648 + saveLowTimeStampCountInto( startStamp ); 8.649 + #endif 8.650 + //======================================================================== 8.651 + 8.652 + //step up the size to be multiple of the cache line size 8.653 + sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1); 8.654 + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 8.655 + 8.656 + while( currElem != NULL ) 8.657 + { //check if size of currElem is big enough 8.658 + sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 8.659 + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 8.660 + if( amountExtra > 0 ) 8.661 + { 8.662 + //look if the found element is already aligned 8.663 + if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){ 8.664 + //found it, get out of loop 8.665 + foundElem = currElem; 8.666 + break; 8.667 + }else{ 8.668 + //find first aligned address and check if it's still big enough 8.669 + //check also if the space before the aligned address is big enough 8.670 + //for a new element 8.671 + void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1))); 8.672 + prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; 8.673 + sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); 8.674 + amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); 8.675 + if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ 8.676 + //found suitable element 8.677 + //create new previous element and exit loop 8.678 + MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; 8.679 + 8.680 + //insert new element into free list 8.681 + if(currElem->nextChunkInFreeList != NULL) 8.682 + currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; 8.683 + newAlignedElem->prevChunkInFreeList = currElem; 8.684 + newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; 8.685 + currElem->nextChunkInFreeList = newAlignedElem; 8.686 + 8.687 + //set higherInMem and lowerInMem 8.688 + newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; 8.689 + foundElemIsTopOfHeap = currElem->nextHigherInMem == 8.690 + _VMSMasterEnv->freeListHead->nextHigherInMem; 8.691 + if(!foundElemIsTopOfHeap) 8.692 + currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; 8.693 + currElem->nextHigherInMem = newAlignedElem; 8.694 + newAlignedElem->nextLowerInMem = currElem; 8.695 + 8.696 + //Found new element leaving loop 8.697 + foundElem = newAlignedElem; 8.698 + break; 8.699 + } 8.700 + } 8.701 + 8.702 + } 8.703 + currElem = currElem->nextChunkInFreeList; 8.704 + } 8.705 + 8.706 + if( foundElem == NULL ) 8.707 + { ERROR("\nmalloc failed\n") 8.708 + return (void *)NULL; //indicates malloc failed 8.709 + } 8.710 + //Using a kludge to identify the element that is the top chunk in the 8.711 + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 8.712 + // save addr of start of heap in head's nextLowerInMem 8.713 + //Will handle top of Heap specially 8.714 + foundElemIsTopOfHeap = foundElem->nextHigherInMem == 8.715 + _VMSMasterEnv->freeListHead->nextHigherInMem; 8.716 + 8.717 + //before shave off and try to insert new elem, remove found elem 8.718 + //note, foundElem will never be the head, so always has valid prevChunk 8.719 + foundElem->prevChunkInFreeList->nextChunkInFreeList = 8.720 + foundElem->nextChunkInFreeList; 8.721 + if( foundElem->nextChunkInFreeList != NULL ) 8.722 + { foundElem->nextChunkInFreeList->prevChunkInFreeList = 8.723 + foundElem->prevChunkInFreeList; 8.724 + } 8.725 + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 8.726 + 8.727 + //if enough, turn extra into new elem & insert it 8.728 + if( amountExtra > 64 ) 8.729 + { //make new elem by adding to addr of curr elem then casting 8.730 + sizeConsumed = sizeof(MallocProlog) + sizeRequested; 8.731 + newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 8.732 + newElem->nextHigherInMem = foundElem->nextHigherInMem; 8.733 + newElem->nextLowerInMem = foundElem; 8.734 + foundElem->nextHigherInMem = newElem; 8.735 + 8.736 + if( ! foundElemIsTopOfHeap ) 8.737 + { //there is no next higher for top of heap, so can't write to it 8.738 + newElem->nextHigherInMem->nextLowerInMem = newElem; 8.739 + } 8.740 + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 8.741 + } 8.742 + else 8.743 + { 8.744 + sizeConsumed = sizeOfFound; 8.745 + } 8.746 + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 8.747 + 8.748 + //============================= MEASUREMENT STUFF ======================== 8.749 + #ifdef MEAS__TIME_MALLOC 8.750 + saveLowTimeStampCountInto( endStamp ); 8.751 + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 8.752 + #endif 8.753 + //======================================================================== 8.754 + 8.755 + //skip over the prolog by adding its size to the pointer return 8.756 + return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 8.757 + } 8.758 + 8.759 + 8.760 +/*This is sequential code -- only to be called from the Master 8.761 + * When free, subtract the size of prolog from pointer, then cast it to a 8.762 + * MallocProlog. Then check the nextLower and nextHigher chunks to see if 8.763 + * one or both are also free, and coalesce if so, and if neither free, then 8.764 + * add this one to free-list. 8.765 + */ 8.766 +void 8.767 +VMS__free( void *ptrToFree ) 8.768 + { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; 8.769 + size_t sizeOfElem; 8.770 + uint32 lowerExistsAndIsFree, higherExistsAndIsFree; 8.771 + 8.772 + //============================= MEASUREMENT STUFF ======================== 8.773 + #ifdef MEAS__TIME_MALLOC 8.774 + int32 startStamp, endStamp; 8.775 + saveLowTimeStampCountInto( startStamp ); 8.776 + #endif 8.777 + //======================================================================== 8.778 + 8.779 + if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || 8.780 + ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) 8.781 + { //outside the range of data owned by VMS's malloc, so do nothing 8.782 + return; 8.783 + } 8.784 + //subtract size of prolog to get pointer to prolog, then cast 8.785 + elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog)); 8.786 + sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree); 8.787 + 8.788 + if( elemToFree->prevChunkInFreeList != NULL ) 8.789 + { printf( "error: freeing same element twice!" ); exit(1); 8.790 + } 8.791 + 8.792 + _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; 8.793 + 8.794 + nextLowerElem = elemToFree->nextLowerInMem; 8.795 + nextHigherElem = elemToFree->nextHigherInMem; 8.796 + 8.797 + if( nextHigherElem == NULL ) 8.798 + higherExistsAndIsFree = FALSE; 8.799 + else //okay exists, now check if in the free-list by checking back ptr 8.800 + higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); 8.801 + 8.802 + if( nextLowerElem == NULL ) 8.803 + lowerExistsAndIsFree = FALSE; 8.804 + else //okay, it exists, now check if it's free 8.805 + lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); 8.806 + 8.807 + 8.808 + //now, know what exists and what's free 8.809 + if( lowerExistsAndIsFree ) 8.810 + { if( higherExistsAndIsFree ) 8.811 + { //both exist and are free, so coalesce all three 8.812 + //First, remove higher from free-list 8.813 + nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = 8.814 + nextHigherElem->nextChunkInFreeList; 8.815 + if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? 8.816 + nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = 8.817 + nextHigherElem->prevChunkInFreeList; 8.818 + //Now, fix-up sequence-in-mem list -- by side-effect, this also 8.819 + // changes size of the lower elem, which is still in free-list 8.820 + nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; 8.821 + if( nextHigherElem->nextHigherInMem != 8.822 + _VMSMasterEnv->freeListHead->nextHigherInMem ) 8.823 + nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; 8.824 + //notice didn't do anything to elemToFree -- it simply is no 8.825 + // longer reachable from any of the lists. Wonder if could be a 8.826 + // security leak because left valid addresses in it, 8.827 + // but don't care for now. 8.828 + } 8.829 + else 8.830 + { //lower is the only of the two that exists and is free, 8.831 + //In this case, no adjustment to free-list, just change mem-list. 8.832 + // By side-effect, changes size of the lower elem 8.833 + nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; 8.834 + if( elemToFree->nextHigherInMem != 8.835 + _VMSMasterEnv->freeListHead->nextHigherInMem ) 8.836 + elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; 8.837 + } 8.838 + } 8.839 + else 8.840 + { //lower either doesn't exist or isn't free, so check higher 8.841 + if( higherExistsAndIsFree ) 8.842 + { //higher exists and is the only of the two free 8.843 + //First, in free-list, replace higher elem with the one to free 8.844 + elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; 8.845 + elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; 8.846 + elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; 8.847 + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 8.848 + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 8.849 + //Now chg mem-list. By side-effect, changes size of elemToFree 8.850 + elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; 8.851 + if( elemToFree->nextHigherInMem != 8.852 + _VMSMasterEnv->freeListHead->nextHigherInMem ) 8.853 + elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; 8.854 + } 8.855 + else 8.856 + { //neither lower nor higher is availabe to coalesce so add to list 8.857 + // this makes prev chunk ptr non-null, which indicates it's free 8.858 + elemToFree->nextChunkInFreeList = 8.859 + _VMSMasterEnv->freeListHead->nextChunkInFreeList; 8.860 + _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; 8.861 + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 8.862 + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 8.863 + elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; 8.864 + } 8.865 + } 8.866 + //============================= MEASUREMENT STUFF ======================== 8.867 + #ifdef MEAS__TIME_MALLOC 8.868 + saveLowTimeStampCountInto( endStamp ); 8.869 + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist ); 8.870 + #endif 8.871 + //======================================================================== 8.872 + 8.873 + } 8.874 + 8.875 + 8.876 +/*Allocates memory from the external system -- higher overhead 8.877 + * 8.878 + *Because of Linux's malloc throwing bizarre random faults when malloc is 8.879 + * used inside a VMS virtual processor, have to pass this as a request and 8.880 + * have the core loop do it when it gets around to it -- will look for these 8.881 + * chores leftover from the previous animation of masterVP the next time it 8.882 + * goes to animate the masterVP -- so it takes two separate masterVP 8.883 + * animations, separated by work, to complete an external malloc or 8.884 + * external free request. 8.885 + * 8.886 + *Thinking core loop accepts signals -- just looks if signal-location is 8.887 + * empty or not -- 8.888 + */ 8.889 +void * 8.890 +VMS__malloc_in_ext( size_t sizeRequested ) 8.891 + { 8.892 + /* 8.893 + //This is running in the master, so no chance for multiple cores to be 8.894 + // competing for the core's flag. 8.895 + if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) 8.896 + { //something has already signalled to core loop, so save the signal 8.897 + // and look, next time master animated, to see if can send it. 8.898 + //Note, the addr to put a signal is in the coreloop's frame, so just 8.899 + // checks it each time through -- make it volatile to avoid GCC 8.900 + // optimizations -- it's a coreloop local var that only changes 8.901 + // after jumping away. The signal includes the addr to send the 8.902 + //return to -- even if just empty return completion-signal 8.903 + // 8.904 + //save the signal in some queue that the master looks at each time 8.905 + // it starts up -- one loc says if empty for fast common case -- 8.906 + //something like that -- want to hide this inside this call -- but 8.907 + // think this has to come as a request -- req handler gives procr 8.908 + // back to master loop, which gives it back to req handler at point 8.909 + // it sees that core loop has sent return signal. Something like 8.910 + // that. 8.911 + saveTheSignal 8.912 + 8.913 + } 8.914 + coreSigData->type = malloc; 8.915 + coreSigData->sizeToMalloc = sizeRequested; 8.916 + coreSigData->locToSignalCompletion = &figureOut; 8.917 + _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; 8.918 + */ 8.919 + //just risk system-stack faults until get this figured out 8.920 + return malloc( sizeRequested ); 8.921 + } 8.922 + 8.923 + 8.924 +/*Frees memory that was allocated in the external system -- higher overhead 8.925 + * 8.926 + *As noted in external malloc comment, this is clunky 'cause the free has 8.927 + * to be called in the core loop. 8.928 + */ 8.929 +void 8.930 +VMS__free_in_ext( void *ptrToFree ) 8.931 + { 8.932 + //just risk system-stack faults until get this figured out 8.933 + free( ptrToFree ); 8.934 + 8.935 + //TODO: fix this -- so 8.936 + } 8.937 + 8.938 + 8.939 +/*Designed to be called from the main thread outside of VMS, during init 8.940 + */ 8.941 +MallocProlog * 8.942 +VMS_ext__create_free_list() 8.943 + { MallocProlog *freeListHead, *firstChunk; 8.944 + 8.945 + //Note, this is running in the main thread -- all increases in malloc 8.946 + // mem and all frees of it must be done in this thread, with the 8.947 + // thread's original stack available 8.948 + freeListHead = malloc( sizeof(MallocProlog) ); 8.949 + firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); 8.950 + if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} 8.951 + 8.952 + //Touch memory to avoid page faults 8.953 + void *ptr,*endPtr; 8.954 + endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE; 8.955 + for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE) 8.956 + { 8.957 + *(char*)ptr = 0; 8.958 + } 8.959 + 8.960 + freeListHead->prevChunkInFreeList = NULL; 8.961 + //Use this addr to free the heap when cleanup 8.962 + freeListHead->nextLowerInMem = firstChunk; 8.963 + //to identify top-of-heap elem, compare this addr to elem's next higher 8.964 + freeListHead->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 8.965 + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 8.966 + freeListHead->nextChunkInFreeList = firstChunk; 8.967 + 8.968 + firstChunk->nextChunkInFreeList = NULL; 8.969 + firstChunk->prevChunkInFreeList = freeListHead; 8.970 + //next Higher has to be set to top of chunk, so can calc size in malloc 8.971 + firstChunk->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 8.972 + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 8.973 + firstChunk->nextLowerInMem = NULL; //identifies as bott of heap 8.974 + 8.975 + _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet 8.976 + 8.977 + return freeListHead; 8.978 + } 8.979 + 8.980 + 8.981 +/*Designed to be called from the main thread outside of VMS, during cleanup 8.982 + */ 8.983 +void 8.984 +VMS_ext__free_free_list( MallocProlog *freeListHead ) 8.985 + { 8.986 + //stashed a ptr to the one and only bug chunk malloc'd from OS in the 8.987 + // free list head's next lower in mem pointer 8.988 + free( freeListHead->nextLowerInMem ); 8.989 + 8.990 + //don't free the head -- it'll be in an array eventually -- free whole 8.991 + // array when all the free lists linked from it have already been freed 8.992 + } 8.993 +
9.1 --- a/vmalloc.h Thu Oct 06 16:24:17 2011 +0200 9.2 +++ b/vmalloc.h Wed Jan 04 16:10:11 2012 -0800 9.3 @@ -1,61 +1,61 @@ 9.4 -/* 9.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 9.6 - * Licensed under GNU General Public License version 2 9.7 - * 9.8 - * Author: seanhalle@yahoo.com 9.9 - * 9.10 - * Created on November 14, 2009, 9:07 PM 9.11 - */ 9.12 - 9.13 -#ifndef _VMALLOC_H 9.14 -#define _VMALLOC_H 9.15 - 9.16 -#include <malloc.h> 9.17 -#include <inttypes.h> 9.18 -#include "VMS_primitive_data_types.h" 9.19 - 9.20 -typedef struct _MallocProlog MallocProlog; 9.21 - 9.22 -struct _MallocProlog 9.23 - { 9.24 - MallocProlog *nextChunkInFreeList; 9.25 - MallocProlog *prevChunkInFreeList; 9.26 - MallocProlog *nextHigherInMem; 9.27 - MallocProlog *nextLowerInMem; 9.28 - }; 9.29 -//MallocProlog 9.30 - 9.31 -typedef struct 9.32 - { 9.33 - MallocProlog *firstChunkInFreeList; 9.34 - int32 numInList; //TODO not used 9.35 - } 9.36 -FreeListHead; 9.37 - 9.38 -void * 9.39 -VMS__malloc( size_t sizeRequested ); 9.40 - 9.41 -void * 9.42 -VMS__malloc_aligned( size_t sizeRequested ); 9.43 - 9.44 -void 9.45 -VMS__free( void *ptrToFree ); 9.46 - 9.47 -/*Allocates memory from the external system -- higher overhead 9.48 - */ 9.49 -void * 9.50 -VMS__malloc_in_ext( size_t sizeRequested ); 9.51 - 9.52 -/*Frees memory that was allocated in the external system -- higher overhead 9.53 - */ 9.54 -void 9.55 -VMS__free_in_ext( void *ptrToFree ); 9.56 - 9.57 - 9.58 -MallocProlog * 9.59 -VMS_ext__create_free_list(); 9.60 - 9.61 -void 9.62 -VMS_ext__free_free_list( MallocProlog *freeListHead ); 9.63 - 9.64 +/* 9.65 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 9.66 + * Licensed under GNU General Public License version 2 9.67 + * 9.68 + * Author: seanhalle@yahoo.com 9.69 + * 9.70 + * Created on November 14, 2009, 9:07 PM 9.71 + */ 9.72 + 9.73 +#ifndef _VMALLOC_H 9.74 +#define _VMALLOC_H 9.75 + 9.76 +#include <malloc.h> 9.77 +#include <inttypes.h> 9.78 +#include "VMS_primitive_data_types.h" 9.79 + 9.80 +typedef struct _MallocProlog MallocProlog; 9.81 + 9.82 +struct _MallocProlog 9.83 + { 9.84 + MallocProlog *nextChunkInFreeList; 9.85 + MallocProlog *prevChunkInFreeList; 9.86 + MallocProlog *nextHigherInMem; 9.87 + MallocProlog *nextLowerInMem; 9.88 + }; 9.89 +//MallocProlog 9.90 + 9.91 +typedef struct 9.92 + { 9.93 + MallocProlog *firstChunkInFreeList; 9.94 + int32 numInList; //TODO not used 9.95 + } 9.96 +FreeListHead; 9.97 + 9.98 +void * 9.99 +VMS__malloc( size_t sizeRequested ); 9.100 + 9.101 +void * 9.102 +VMS__malloc_aligned( size_t sizeRequested ); 9.103 + 9.104 +void 9.105 +VMS__free( void *ptrToFree ); 9.106 + 9.107 +/*Allocates memory from the external system -- higher overhead 9.108 + */ 9.109 +void * 9.110 +VMS__malloc_in_ext( size_t sizeRequested ); 9.111 + 9.112 +/*Frees memory that was allocated in the external system -- higher overhead 9.113 + */ 9.114 +void 9.115 +VMS__free_in_ext( void *ptrToFree ); 9.116 + 9.117 + 9.118 +MallocProlog * 9.119 +VMS_ext__create_free_list(); 9.120 + 9.121 +void 9.122 +VMS_ext__free_free_list( MallocProlog *freeListHead ); 9.123 + 9.124 #endif 9.125 \ No newline at end of file
10.1 --- a/vutilities.c Thu Oct 06 16:24:17 2011 +0200 10.2 +++ b/vutilities.c Wed Jan 04 16:10:11 2012 -0800 10.3 @@ -1,25 +1,25 @@ 10.4 -/* 10.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 10.6 - * Licensed under GNU General Public License version 2 10.7 - * 10.8 - * Author: seanhalle@yahoo.com 10.9 - * 10.10 - * Created on November 14, 2009, 9:07 PM 10.11 - */ 10.12 - 10.13 -#include <malloc.h> 10.14 -#include <stdlib.h> 10.15 - 10.16 -#include "VMS.h" 10.17 - 10.18 - 10.19 -inline char * 10.20 -VMS__strDup( char *str ) 10.21 - { char *retStr; 10.22 - 10.23 - retStr = VMS__malloc( strlen(str) + 1 ); 10.24 - if( str == NULL ) return str; 10.25 - strcpy( retStr, str ); 10.26 - 10.27 - return retStr; 10.28 - } 10.29 +/* 10.30 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 10.31 + * Licensed under GNU General Public License version 2 10.32 + * 10.33 + * Author: seanhalle@yahoo.com 10.34 + * 10.35 + * Created on November 14, 2009, 9:07 PM 10.36 + */ 10.37 + 10.38 +#include <malloc.h> 10.39 +#include <stdlib.h> 10.40 + 10.41 +#include "VMS.h" 10.42 + 10.43 + 10.44 +inline char * 10.45 +VMS__strDup( char *str ) 10.46 + { char *retStr; 10.47 + 10.48 + retStr = VMS__malloc( strlen(str) + 1 ); 10.49 + if( str == NULL ) return str; 10.50 + strcpy( retStr, str ); 10.51 + 10.52 + return retStr; 10.53 + }
11.1 --- a/vutilities.h Thu Oct 06 16:24:17 2011 +0200 11.2 +++ b/vutilities.h Wed Jan 04 16:10:11 2012 -0800 11.3 @@ -1,20 +1,20 @@ 11.4 -/* 11.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 11.6 - * Licensed under GNU General Public License version 2 11.7 - * 11.8 - * Author: seanhalle@yahoo.com 11.9 - * 11.10 - * Created on November 14, 2009, 9:07 PM 11.11 - */ 11.12 - 11.13 - 11.14 -#ifndef _UTILITIES_H 11.15 -#define _UTILITIES_H 11.16 - 11.17 -#include <string.h> 11.18 -#include "VMS_primitive_data_types.h" 11.19 - 11.20 -inline char * 11.21 -VMS__strDup( char *str ); 11.22 - 11.23 -#endif 11.24 +/* 11.25 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 11.26 + * Licensed under GNU General Public License version 2 11.27 + * 11.28 + * Author: seanhalle@yahoo.com 11.29 + * 11.30 + * Created on November 14, 2009, 9:07 PM 11.31 + */ 11.32 + 11.33 + 11.34 +#ifndef _UTILITIES_H 11.35 +#define _UTILITIES_H 11.36 + 11.37 +#include <string.h> 11.38 +#include "VMS_primitive_data_types.h" 11.39 + 11.40 +inline char * 11.41 +VMS__strDup( char *str ); 11.42 + 11.43 +#endif
