Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 182:7523ee70d66c perf_counters
merge changes from default
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Fri, 06 Jan 2012 18:55:05 +0100 |
| parents | 3bd35fc83c61 dd0aa7f62780 |
| children | 50b29548d4f0 |
| files | .hgsubstate CoreLoop.c MasterLoop.c VMS.c VMS.h VMS__DESIGN_NOTES.txt |
| diffstat | 16 files changed, 1161 insertions(+), 1159 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/.hgeol Fri Jan 06 18:55:05 2012 +0100 1.3 @@ -0,0 +1,12 @@ 1.4 + 1.5 +[patterns] 1.6 +**.py = native 1.7 +**.txt = native 1.8 +**.c = native 1.9 +**.h = native 1.10 +**.cpp = native 1.11 +**.java = native 1.12 +**.sh = native 1.13 +**.pl = native 1.14 +**.jpg = bin 1.15 +**.gif = bin
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/.hgsub Fri Jan 06 18:55:05 2012 +0100 2.3 @@ -0,0 +1,5 @@ 2.4 +DynArray = ../C/DynArray 2.5 +Hash_impl = ../C/Hash_impl 2.6 +Histogram = ../C/Histogram 2.7 +Queue_impl = ../C/Queue_impl 2.8 +
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/.hgsubstate Fri Jan 06 18:55:05 2012 +0100 3.3 @@ -0,0 +1,4 @@ 3.4 +20f0996a84002efdfb2e260488b70ad415f2b4cf DynArray 3.5 +8bafd14e9fde67082fb08186463a4803bc25b428 Hash_impl 3.6 +20410d90dabbfee1fff8a3b48c6cf6090603e8ea Histogram 3.7 +7c9e00ff1bf452be98dce61cfa42c01939964d7d Queue_impl
4.1 --- a/.hgtags Wed Jan 04 16:40:10 2012 +0100 4.2 +++ b/.hgtags Fri Jan 06 18:55:05 2012 +0100 4.3 @@ -1,6 +1,1 @@ 4.4 9c3107044f86c36fea3a8f72f64910b1363555be Dec27_2010_about_to_add_sched_record 4.5 -97e26095c01fef53783d2d89e11575856ce243c5 V0 4.6 -21c95d402fe66570067000e484bfec9a5d92c9d0 malloc_touch 4.7 -42d015d48eebc7ba49b24fa5f24cb1e66244e5e7 malloc_touch 4.8 -42d015d48eebc7ba49b24fa5f24cb1e66244e5e7 malloc_touch 4.9 -0000000000000000000000000000000000000000 malloc_touch
5.1 --- a/CoreLoop.c Wed Jan 04 16:40:10 2012 +0100 5.2 +++ b/CoreLoop.c Fri Jan 06 18:55:05 2012 +0100 5.3 @@ -1,232 +1,232 @@ 5.4 -/* 5.5 - * Copyright 2010 OpenSourceStewardshipFoundation 5.6 - * 5.7 - * Licensed under BSD 5.8 - */ 5.9 - 5.10 - 5.11 -#include "VMS.h" 5.12 -#include "Queue_impl/BlockingQueue.h" 5.13 -#include "ProcrContext.h" 5.14 - 5.15 -#include <stdlib.h> 5.16 -#include <stdio.h> 5.17 -#include <time.h> 5.18 - 5.19 -#include <pthread.h> 5.20 -#include <sched.h> 5.21 - 5.22 -void *terminateCoreLoop(VirtProcr *currPr); 5.23 - 5.24 -/*This is the loop that runs in the OS Thread pinned to each core 5.25 - *Get virt procr from queue, 5.26 - * save state of current animator, then load in state of virt procr, using 5.27 - * jmp instr to switch the program-counter state -- making the virt procr 5.28 - * the new animator. 5.29 - *At some point, the virt procr will suspend itself by saving out its 5.30 - * animator state (stack ptr, frame ptr, program counter) and switching 5.31 - * back to the OS Thread's animator state, which means restoring the 5.32 - * stack and frame and jumping to the core loop start point. 5.33 - *This cycle then repeats, until a special shutdown virtual processor is 5.34 - * animated, which jumps to the end point at the bottom of core loop. 5.35 - */ 5.36 -void * 5.37 -coreLoop( void *paramsIn ) 5.38 - { 5.39 - ThdParams *coreLoopThdParams; 5.40 - int thisCoresIdx; 5.41 - VirtProcr *currPr = NULL; 5.42 - VMSQueueStruc *readyToAnimateQ; 5.43 - cpu_set_t coreMask; //has 1 in bit positions of allowed cores 5.44 - int errorCode; 5.45 - 5.46 - //work-stealing struc on stack to prevent false-sharing in cache-line 5.47 - volatile GateStruc gate; 5.48 - //preGateProgress, waitProgress, exitProgress, gateClosed; 5.49 - 5.50 - 5.51 - coreLoopThdParams = (ThdParams *)paramsIn; 5.52 - thisCoresIdx = coreLoopThdParams->coreNum; 5.53 - 5.54 - gate.gateClosed = FALSE; 5.55 - gate.preGateProgress = 0; 5.56 - gate.waitProgress = 0; 5.57 - gate.exitProgress = 0; 5.58 - _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup 5.59 - 5.60 - //wait until signalled that setup is complete 5.61 - pthread_mutex_lock( &suspendLock ); 5.62 - while( !(_VMSMasterEnv->setupComplete) ) 5.63 - { 5.64 - pthread_cond_wait( &suspend_cond, 5.65 - &suspendLock ); 5.66 - } 5.67 - pthread_mutex_unlock( &suspendLock ); 5.68 - 5.69 - //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); 5.70 - 5.71 - //set thread affinity 5.72 - //Linux requires pinning thd to core inside thread-function 5.73 - //Designate a core by a 1 in bit-position corresponding to the core 5.74 - CPU_ZERO(&coreMask); 5.75 - CPU_SET(coreLoopThdParams->coreNum,&coreMask); 5.76 - //coreMask = 1L << coreLoopThdParams->coreNum; 5.77 - 5.78 - pthread_t selfThd = pthread_self(); 5.79 - errorCode = 5.80 - pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); 5.81 - 5.82 - if(errorCode){ printf("\nset affinity failure\n"); exit(0); } 5.83 - 5.84 - 5.85 - //Save the return address in the SwitchVP function 5.86 - saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt)); 5.87 - 5.88 - 5.89 - while(1){ 5.90 - #ifdef MEAS__PERF_COUNTERS 5.91 - //end work 5.92 - 5.93 - if(currPr!=NULL){ 5.94 - int lastRecordIdx = currPr->counter_history_array_info->numInArray -1; 5.95 - CounterRecord* lastRecord = currPr->counter_history[lastRecordIdx]; 5.96 - //lastRecord->work_core = thisCoresIdx; 5.97 - saveCyclesAndInstrs(thisCoresIdx,lastRecord->suspend_cycles,lastRecord->suspend_instrs); 5.98 - } 5.99 - 5.100 - #endif 5.101 - //Get virtual processor from queue 5.102 - //The Q must be a global, static volatile var, so not kept in reg, 5.103 - // which forces reloading the pointer after each jmp to this point 5.104 - readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 5.105 - 5.106 - #ifdef USE_WORK_STEALING 5.107 - //Alg for work-stealing designed to make common case fast. Comment 5.108 - // in stealer code explains. 5.109 - gate.preGateProgress++; 5.110 - if( gate.gateClosed ) 5.111 - { //now, set coreloop's progress, so stealer can see that core loop 5.112 - // has made it into the waiting area. 5.113 - gate.waitProgress = gate.preGateProgress; 5.114 - while( gate.gateClosed ) /*busy wait*/; 5.115 - } 5.116 - 5.117 - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 5.118 - 5.119 - //Set the coreloop's progress, so stealer can see it has made it out 5.120 - // of the protected area 5.121 - gate.exitProgress = gate.preGateProgress; 5.122 - #else 5.123 - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 5.124 - #endif 5.125 - 5.126 - if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 5.127 - else 5.128 - { 5.129 - //============================= MEASUREMENT STUFF ===================== 5.130 - #ifdef MEAS__TIME_MASTER_LOCK 5.131 - int32 startStamp, endStamp; 5.132 - saveLowTimeStampCountInto( startStamp ); 5.133 - #endif 5.134 - //===================================================================== 5.135 - int tries = 0; int gotLock = 0; 5.136 - while( currPr == NULL ) //if queue was empty, enter get masterLock loop 5.137 - { //queue was empty, so get master lock 5.138 - 5.139 - gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), 5.140 - UNLOCKED, LOCKED ); 5.141 - if( gotLock ) 5.142 - { //run own MasterVP -- jmps to coreLoops startPt when done 5.143 - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 5.144 - if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 5.145 - { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); 5.146 - pthread_yield(); 5.147 - } 5.148 - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 5.149 - break; //end while -- have a VP to animate now 5.150 - } 5.151 - 5.152 - tries++; //if too many, means master on other core taking too long 5.153 - if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } 5.154 - } 5.155 - //============================= MEASUREMENT STUFF ===================== 5.156 - #ifdef MEAS__TIME_MASTER_LOCK 5.157 - saveLowTimeStampCountInto( endStamp ); 5.158 - addIntervalToHist( startStamp, endStamp, 5.159 - _VMSMasterEnv->masterLockLowTimeHist ); 5.160 - addIntervalToHist( startStamp, endStamp, 5.161 - _VMSMasterEnv->masterLockHighTimeHist ); 5.162 - #endif 5.163 - //===================================================================== 5.164 - 5.165 - } 5.166 - 5.167 - #ifdef MEAS__PERF_COUNTERS 5.168 - //start work 5.169 - int lastRecordIdx = currPr->counter_history_array_info->numInArray -1; 5.170 - CounterRecord* lastRecord = currPr->counter_history[lastRecordIdx]; 5.171 - lastRecord->work_core = thisCoresIdx; 5.172 - saveCyclesAndInstrs(thisCoresIdx,lastRecord->start_work_cycles,lastRecord->start_work_instrs); 5.173 - #endif 5.174 - switchToVP(currPr); //The VPs return in here 5.175 - 5.176 - flushRegisters(); 5.177 - }//CoreLoop 5.178 - } 5.179 - 5.180 - 5.181 -void * 5.182 -terminateCoreLoop(VirtProcr *currPr){ 5.183 - //first free shutdown VP that jumped here -- it first restores the 5.184 - // coreloop's stack, so addr of currPr in stack frame is still correct 5.185 - VMS__dissipate_procr( currPr ); 5.186 - pthread_exit( NULL ); 5.187 -} 5.188 - 5.189 - 5.190 - 5.191 -#ifdef SEQUENTIAL 5.192 - 5.193 -//=========================================================================== 5.194 -/*This sequential version is exact same as threaded, except doesn't do the 5.195 - * pin-threads part, nor the wait until setup complete part. 5.196 - */ 5.197 -void * 5.198 -coreLoop_Seq( void *paramsIn ) 5.199 - { 5.200 - VirtProcr *currPr; 5.201 - VMSQueueStruc *readyToAnimateQ; 5.202 - 5.203 - ThdParams *coreLoopThdParams; 5.204 - int thisCoresIdx; 5.205 - 5.206 - coreLoopThdParams = (ThdParams *)paramsIn; 5.207 -// thisCoresIdx = coreLoopThdParams->coreNum; 5.208 - thisCoresIdx = 0; 5.209 - 5.210 - //Save the return address in the SwitchVP function 5.211 - saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt)); 5.212 - 5.213 - 5.214 - while(1){ 5.215 - //Get virtual processor from queue 5.216 - //_VMSWorkQ must be a global, static volatile var, so not kept in reg, 5.217 - // which forces reloading the pointer after each jmp to this point 5.218 - readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 5.219 - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 5.220 - if( currPr == NULL ) 5.221 - { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 5.222 - { printf("too many back to back MasterVP\n"); exit(1); } 5.223 - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 5.224 - 5.225 - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 5.226 - } 5.227 - else 5.228 - _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 5.229 - 5.230 - 5.231 - switchToVP( currPr ); 5.232 - flushRegisters(); 5.233 - } 5.234 - } 5.235 -#endif 5.236 +/* 5.237 + * Copyright 2010 OpenSourceStewardshipFoundation 5.238 + * 5.239 + * Licensed under BSD 5.240 + */ 5.241 + 5.242 + 5.243 +#include "VMS.h" 5.244 +#include "Queue_impl/BlockingQueue.h" 5.245 +#include "ProcrContext.h" 5.246 + 5.247 +#include <stdlib.h> 5.248 +#include <stdio.h> 5.249 +#include <time.h> 5.250 + 5.251 +#include <pthread.h> 5.252 +#include <sched.h> 5.253 + 5.254 +void *terminateCoreLoop(VirtProcr *currPr); 5.255 + 5.256 +/*This is the loop that runs in the OS Thread pinned to each core 5.257 + *Get virt procr from queue, 5.258 + * save state of current animator, then load in state of virt procr, using 5.259 + * jmp instr to switch the program-counter state -- making the virt procr 5.260 + * the new animator. 5.261 + *At some point, the virt procr will suspend itself by saving out its 5.262 + * animator state (stack ptr, frame ptr, program counter) and switching 5.263 + * back to the OS Thread's animator state, which means restoring the 5.264 + * stack and frame and jumping to the core loop start point. 5.265 + *This cycle then repeats, until a special shutdown virtual processor is 5.266 + * animated, which jumps to the end point at the bottom of core loop. 5.267 + */ 5.268 +void * 5.269 +coreLoop( void *paramsIn ) 5.270 + { 5.271 + ThdParams *coreLoopThdParams; 5.272 + int thisCoresIdx; 5.273 + VirtProcr *currPr = NULL; 5.274 + VMSQueueStruc *readyToAnimateQ; 5.275 + cpu_set_t coreMask; //has 1 in bit positions of allowed cores 5.276 + int errorCode; 5.277 + 5.278 + //work-stealing struc on stack to prevent false-sharing in cache-line 5.279 + volatile GateStruc gate; 5.280 + //preGateProgress, waitProgress, exitProgress, gateClosed; 5.281 + 5.282 + 5.283 + coreLoopThdParams = (ThdParams *)paramsIn; 5.284 + thisCoresIdx = coreLoopThdParams->coreNum; 5.285 + 5.286 + gate.gateClosed = FALSE; 5.287 + gate.preGateProgress = 0; 5.288 + gate.waitProgress = 0; 5.289 + gate.exitProgress = 0; 5.290 + _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup 5.291 + 5.292 + //wait until signalled that setup is complete 5.293 + pthread_mutex_lock( &suspendLock ); 5.294 + while( !(_VMSMasterEnv->setupComplete) ) 5.295 + { 5.296 + pthread_cond_wait( &suspend_cond, 5.297 + &suspendLock ); 5.298 + } 5.299 + pthread_mutex_unlock( &suspendLock ); 5.300 + 5.301 + //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); 5.302 + 5.303 + //set thread affinity 5.304 + //Linux requires pinning thd to core inside thread-function 5.305 + //Designate a core by a 1 in bit-position corresponding to the core 5.306 + CPU_ZERO(&coreMask); 5.307 + CPU_SET(coreLoopThdParams->coreNum,&coreMask); 5.308 + //coreMask = 1L << coreLoopThdParams->coreNum; 5.309 + 5.310 + pthread_t selfThd = pthread_self(); 5.311 + errorCode = 5.312 + pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); 5.313 + 5.314 + if(errorCode){ printf("\nset affinity failure\n"); exit(0); } 5.315 + 5.316 + 5.317 + //Save the return address in the SwitchVP function 5.318 + saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt)); 5.319 + 5.320 + 5.321 + while(1){ 5.322 + #ifdef MEAS__PERF_COUNTERS 5.323 + //end work 5.324 + 5.325 + if(currPr!=NULL){ 5.326 + int lastRecordIdx = currPr->counter_history_array_info->numInArray -1; 5.327 + CounterRecord* lastRecord = currPr->counter_history[lastRecordIdx]; 5.328 + //lastRecord->work_core = thisCoresIdx; 5.329 + saveCyclesAndInstrs(thisCoresIdx,lastRecord->suspend_cycles,lastRecord->suspend_instrs); 5.330 + } 5.331 + 5.332 + #endif 5.333 + //Get virtual processor from queue 5.334 + //The Q must be a global, static volatile var, so not kept in reg, 5.335 + // which forces reloading the pointer after each jmp to this point 5.336 + readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 5.337 + 5.338 + #ifdef USE_WORK_STEALING 5.339 + //Alg for work-stealing designed to make common case fast. Comment 5.340 + // in stealer code explains. 5.341 + gate.preGateProgress++; 5.342 + if( gate.gateClosed ) 5.343 + { //now, set coreloop's progress, so stealer can see that core loop 5.344 + // has made it into the waiting area. 5.345 + gate.waitProgress = gate.preGateProgress; 5.346 + while( gate.gateClosed ) /*busy wait*/; 5.347 + } 5.348 + 5.349 + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 5.350 + 5.351 + //Set the coreloop's progress, so stealer can see it has made it out 5.352 + // of the protected area 5.353 + gate.exitProgress = gate.preGateProgress; 5.354 + #else 5.355 + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 5.356 + #endif 5.357 + 5.358 + if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 5.359 + else 5.360 + { 5.361 + //============================= MEASUREMENT STUFF ===================== 5.362 + #ifdef MEAS__TIME_MASTER_LOCK 5.363 + int32 startStamp, endStamp; 5.364 + saveLowTimeStampCountInto( startStamp ); 5.365 + #endif 5.366 + //===================================================================== 5.367 + int tries = 0; int gotLock = 0; 5.368 + while( currPr == NULL ) //if queue was empty, enter get masterLock loop 5.369 + { //queue was empty, so get master lock 5.370 + 5.371 + gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), 5.372 + UNLOCKED, LOCKED ); 5.373 + if( gotLock ) 5.374 + { //run own MasterVP -- jmps to coreLoops startPt when done 5.375 + currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 5.376 + if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 5.377 + { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); 5.378 + pthread_yield(); 5.379 + } 5.380 + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 5.381 + break; //end while -- have a VP to animate now 5.382 + } 5.383 + 5.384 + tries++; //if too many, means master on other core taking too long 5.385 + if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } 5.386 + } 5.387 + //============================= MEASUREMENT STUFF ===================== 5.388 + #ifdef MEAS__TIME_MASTER_LOCK 5.389 + saveLowTimeStampCountInto( endStamp ); 5.390 + addIntervalToHist( startStamp, endStamp, 5.391 + _VMSMasterEnv->masterLockLowTimeHist ); 5.392 + addIntervalToHist( startStamp, endStamp, 5.393 + _VMSMasterEnv->masterLockHighTimeHist ); 5.394 + #endif 5.395 + //===================================================================== 5.396 + 5.397 + } 5.398 + 5.399 + #ifdef MEAS__PERF_COUNTERS 5.400 + //start work 5.401 + int lastRecordIdx = currPr->counter_history_array_info->numInArray -1; 5.402 + CounterRecord* lastRecord = currPr->counter_history[lastRecordIdx]; 5.403 + lastRecord->work_core = thisCoresIdx; 5.404 + saveCyclesAndInstrs(thisCoresIdx,lastRecord->start_work_cycles,lastRecord->start_work_instrs); 5.405 + #endif 5.406 + switchToVP(currPr); //The VPs return in here 5.407 + 5.408 + flushRegisters(); 5.409 + }//CoreLoop 5.410 + } 5.411 + 5.412 + 5.413 +void * 5.414 +terminateCoreLoop(VirtProcr *currPr){ 5.415 + //first free shutdown VP that jumped here -- it first restores the 5.416 + // coreloop's stack, so addr of currPr in stack frame is still correct 5.417 + VMS__dissipate_procr( currPr ); 5.418 + pthread_exit( NULL ); 5.419 +} 5.420 + 5.421 + 5.422 + 5.423 +#ifdef SEQUENTIAL 5.424 + 5.425 +//=========================================================================== 5.426 +/*This sequential version is exact same as threaded, except doesn't do the 5.427 + * pin-threads part, nor the wait until setup complete part. 5.428 + */ 5.429 +void * 5.430 +coreLoop_Seq( void *paramsIn ) 5.431 + { 5.432 + VirtProcr *currPr; 5.433 + VMSQueueStruc *readyToAnimateQ; 5.434 + 5.435 + ThdParams *coreLoopThdParams; 5.436 + int thisCoresIdx; 5.437 + 5.438 + coreLoopThdParams = (ThdParams *)paramsIn; 5.439 +// thisCoresIdx = coreLoopThdParams->coreNum; 5.440 + thisCoresIdx = 0; 5.441 + 5.442 + //Save the return address in the SwitchVP function 5.443 + saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt)); 5.444 + 5.445 + 5.446 + while(1){ 5.447 + //Get virtual processor from queue 5.448 + //_VMSWorkQ must be a global, static volatile var, so not kept in reg, 5.449 + // which forces reloading the pointer after each jmp to this point 5.450 + readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 5.451 + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 5.452 + if( currPr == NULL ) 5.453 + { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 5.454 + { printf("too many back to back MasterVP\n"); exit(1); } 5.455 + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 5.456 + 5.457 + currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 5.458 + } 5.459 + else 5.460 + _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 5.461 + 5.462 + 5.463 + switchToVP( currPr ); 5.464 + flushRegisters(); 5.465 + } 5.466 + } 5.467 +#endif
6.1 --- a/MasterLoop.c Wed Jan 04 16:40:10 2012 +0100 6.2 +++ b/MasterLoop.c Fri Jan 06 18:55:05 2012 +0100 6.3 @@ -90,8 +90,6 @@ 6.4 volatileMasterPr = animatingPr; 6.5 masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp 6.6 6.7 - bulb b = new_bulb(); 6.8 - numSlotsFilled=1; 6.9 //First animation of each MasterVP will in turn animate this part 6.10 // of setup code.. (VP creator sets up the stack as if this function 6.11 // was called normally, but actually get here by jmp) 6.12 @@ -131,9 +129,6 @@ 6.13 slaveScheduler = masterEnv->slaveScheduler; 6.14 semanticEnv = masterEnv->semanticEnv; 6.15 6.16 - #ifdef DETECT_LOOP_GRAPH 6.17 - 6.18 - #endif 6.19 6.20 //Poll each slot's Done flag 6.21 numSlotsFilled = 0; 6.22 @@ -221,9 +216,7 @@ 6.23 lastRecord->start_assign_instrs = tmp_instrs; 6.24 saveCyclesAndInstrs(thisCoresIdx,lastRecord->end_assign_cycles,lastRecord->end_assign_instrs); 6.25 #endif 6.26 - #ifdef DETECT_LOOP_GRAPH 6.27 6.28 - #endif 6.29 writeVMSQ( schedVirtPr, readyToAnimateQ ); 6.30 } 6.31 }
7.1 --- a/ProcrContext.h Wed Jan 04 16:40:10 2012 +0100 7.2 +++ b/ProcrContext.h Fri Jan 06 18:55:05 2012 +0100 7.3 @@ -1,33 +1,33 @@ 7.4 -/* 7.5 - * Copyright 2009 OpenSourceStewardshipFoundation.org 7.6 - * Licensed under GNU General Public License version 2 7.7 - * 7.8 - * Author: seanhalle@yahoo.com 7.9 - * 7.10 - */ 7.11 - 7.12 -#ifndef _ProcrContext_H 7.13 -#define _ProcrContext_H 7.14 -#define _GNU_SOURCE 7.15 - 7.16 -void saveCoreLoopReturnAddr(void **returnAddress); 7.17 - 7.18 -void switchToVP(VirtProcr *nextProcr); 7.19 - 7.20 -void switchToCoreLoop(VirtProcr *nextProcr); 7.21 - 7.22 -void masterSwitchToCoreLoop(VirtProcr *nextProcr); 7.23 - 7.24 -void startVirtProcrFn(); 7.25 - 7.26 -void *asmTerminateCoreLoop(VirtProcr *currPr); 7.27 - 7.28 -#define flushRegisters() \ 7.29 - asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") 7.30 - 7.31 -inline VirtProcr * 7.32 -create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, 7.33 - void *initialData, void *stackLocs ); 7.34 - 7.35 -#endif /* _ProcrContext_H */ 7.36 - 7.37 +/* 7.38 + * Copyright 2009 OpenSourceStewardshipFoundation.org 7.39 + * Licensed under GNU General Public License version 2 7.40 + * 7.41 + * Author: seanhalle@yahoo.com 7.42 + * 7.43 + */ 7.44 + 7.45 +#ifndef _ProcrContext_H 7.46 +#define _ProcrContext_H 7.47 +#define _GNU_SOURCE 7.48 + 7.49 +void saveCoreLoopReturnAddr(void **returnAddress); 7.50 + 7.51 +void switchToVP(VirtProcr *nextProcr); 7.52 + 7.53 +void switchToCoreLoop(VirtProcr *nextProcr); 7.54 + 7.55 +void masterSwitchToCoreLoop(VirtProcr *nextProcr); 7.56 + 7.57 +void startVirtProcrFn(); 7.58 + 7.59 +void *asmTerminateCoreLoop(VirtProcr *currPr); 7.60 + 7.61 +#define flushRegisters() \ 7.62 + asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") 7.63 + 7.64 +inline VirtProcr * 7.65 +create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, 7.66 + void *initialData, void *stackLocs ); 7.67 + 7.68 +#endif /* _ProcrContext_H */ 7.69 +
8.1 --- a/VMS.c Wed Jan 04 16:40:10 2012 +0100 8.2 +++ b/VMS.c Fri Jan 06 18:55:05 2012 +0100 8.3 @@ -118,15 +118,15 @@ 8.4 8.5 //============================= MEASUREMENT STUFF ======================== 8.6 #ifdef MEAS__TIME_MALLOC 8.7 - _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 100, 0, 100, 8.8 + _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 100, 0, 30, 8.9 "malloc_time_hist"); 8.10 - _VMSMasterEnv->freeTimeHist = makeFixedBinHistExt( 80, 0, 100, 8.11 + _VMSMasterEnv->freeTimeHist = makeFixedBinHistExt( 100, 0, 30, 8.12 "free_time_hist"); 8.13 #endif 8.14 #ifdef MEAS__TIME_PLUGIN 8.15 - _VMSMasterEnv->reqHdlrLowTimeHist = makeFixedBinHistExt( 1000, 0, 100, 8.16 + _VMSMasterEnv->reqHdlrLowTimeHist = makeFixedBinHistExt( 100, 0, 200, 8.17 "plugin_low_time_hist"); 8.18 - _VMSMasterEnv->reqHdlrHighTimeHist = makeFixedBinHistExt( 1000, 0, 100, 8.19 + _VMSMasterEnv->reqHdlrHighTimeHist = makeFixedBinHistExt( 100, 0, 200, 8.20 "plugin_high_time_hist"); 8.21 #endif 8.22 //======================================================================== 8.23 @@ -462,9 +462,8 @@ 8.24 * it lets the lang have lang-specific data related to creation transported 8.25 * to the plugin. 8.26 */ 8.27 -__attribute__ ((noinline)) void 8.28 -VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ) 8.29 - 8.30 +void 8.31 +VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ) 8.32 { VMSReqst req; 8.33 8.34 req.reqType = createReq; 8.35 @@ -497,8 +496,8 @@ 8.36 * gets suspended in this call and all the virt processor's state disap- 8.37 * pears -- making that suspend the last thing in the virt procr's trace. 8.38 */ 8.39 -__attribute__ ((noinline)) void 8.40 -VMS__send_dissipate_req( VirtProcr *procrToDissipate ) 8.41 +void 8.42 +VMS__send_dissipate_req( VirtProcr *procrToDissipate ) 8.43 { VMSReqst req; 8.44 8.45 req.reqType = dissipate; 8.46 @@ -557,8 +556,8 @@ 8.47 * to plugin 8.48 *Then it does suspend, to cause request to be sent. 8.49 */ 8.50 -/*inline*/__attribute__ ((noinline)) void 8.51 -VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ) 8.52 +inline void 8.53 +VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ) 8.54 { VMSReqst req; 8.55 8.56 req.reqType = semantic; 8.57 @@ -570,9 +569,8 @@ 8.58 } 8.59 8.60 8.61 -/*inline*/ __attribute__ ((noinline)) void 8.62 -VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ) 8.63 - 8.64 +inline void 8.65 +VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ) 8.66 { VMSReqst req; 8.67 8.68 req.reqType = VMSSemantic; 8.69 @@ -777,7 +775,7 @@ 8.70 //Before getting rid of everything, print out any measurements made 8.71 //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist ); 8.72 //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile); 8.73 - //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHistExt ); 8.74 + //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist ); 8.75 8.76 8.77 #ifdef MEAS__TIME_PLUGIN
9.1 --- a/VMS.h Wed Jan 04 16:40:10 2012 +0100 9.2 +++ b/VMS.h Fri Jan 06 18:55:05 2012 +0100 9.3 @@ -279,9 +279,6 @@ 9.4 CounterRecord** counter_history; 9.5 PrivDynArrayInfo* counter_history_array_info; 9.6 #endif 9.7 - #ifdef DETECT_LOOP_GRAPH 9.8 - 9.9 - #endif 9.10 } 9.11 MasterEnv; 9.12 9.13 @@ -380,16 +377,16 @@ 9.14 inline void 9.15 VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr ); 9.16 9.17 -/*inline*/ __attribute__ ((noinline)) void 9.18 +inline void 9.19 VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ); 9.20 9.21 void 9.22 VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ); 9.23 9.24 -void /*inline**/ __attribute__ ((noinline)) 9.25 +void inline 9.26 VMS__send_dissipate_req( VirtProcr *prToDissipate ); 9.27 9.28 -/*inline**/ __attribute__ ((noinline)) void 9.29 +inline void 9.30 VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ); 9.31 9.32 VMSReqst * 9.33 @@ -461,11 +458,11 @@ 9.34 #ifdef VPTHREAD 9.35 9.36 //VPThread 9.37 -#define createHistIdx 1 9.38 -#define mutexLockHistIdx 2 9.39 -#define mutexUnlockHistIdx 3 9.40 -#define condWaitHistIdx 4 9.41 -#define condSignalHistIdx 5 9.42 +#define createHistIdx 0 9.43 +#define mutexLockHistIdx 1 9.44 +#define mutexUnlockHistIdx 2 9.45 +#define condWaitHistIdx 3 9.46 +#define condSignalHistIdx 4 9.47 9.48 #define MakeTheMeasHists() \ 9.49 _VMSMasterEnv->measHistsInfo = \ 9.50 @@ -482,8 +479,8 @@ 9.51 #ifdef VCILK 9.52 9.53 //VCilk 9.54 -#define spawnHistIdx 1 9.55 -#define syncHistIdx 2 9.56 +#define spawnHistIdx 0 9.57 +#define syncHistIdx 1 9.58 9.59 #define MakeTheMeasHists() \ 9.60 _VMSMasterEnv->measHistsInfo = \ 9.61 @@ -497,10 +494,10 @@ 9.62 #ifdef SSR 9.63 9.64 //SSR 9.65 -#define SendFromToHistIdx 1 9.66 -#define SendOfTypeHistIdx 2 9.67 -#define ReceiveFromToHistIdx 3 9.68 -#define ReceiveOfTypeHistIdx 4 9.69 +#define SendFromToHistIdx 0 9.70 +#define SendOfTypeHistIdx 1 9.71 +#define ReceiveFromToHistIdx 2 9.72 +#define ReceiveOfTypeHistIdx 3 9.73 9.74 #define MakeTheMeasHists() \ 9.75 _VMSMasterEnv->measHistsInfo = \
10.1 --- a/VMS__DESIGN_NOTES.txt Wed Jan 04 16:40:10 2012 +0100 10.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 10.3 @@ -1,2 +0,0 @@ 10.4 - 10.5 10.6 -Implement VMS this way: 10.7
11.1 --- a/VMS_primitive_data_types.h Wed Jan 04 16:40:10 2012 +0100 11.2 +++ b/VMS_primitive_data_types.h Fri Jan 06 18:55:05 2012 +0100 11.3 @@ -1,53 +1,53 @@ 11.4 -/* 11.5 - * Copyright 2009 OpenSourceStewardshipFoundation.org 11.6 - * Licensed under GNU General Public License version 2 11.7 - * 11.8 - * Author: seanhalle@yahoo.com 11.9 - * 11.10 - 11.11 - */ 11.12 - 11.13 -#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H 11.14 -#define _BLIS_PRIMITIVE_DATA_TYPES_H 11.15 - 11.16 - 11.17 -/*For portability, need primitive data types that have a well defined 11.18 - * size, and well-defined layout into bytes 11.19 - *To do this, provide BLIS standard aliases for all primitive data types 11.20 - *These aliases must be used in all BLIS functions instead of the ANSI types 11.21 - * 11.22 - *These definitions will be replaced inside each specialization module 11.23 - * according to the compiler used in that module and the hardware being 11.24 - * specialized to. 11.25 - */ 11.26 -/* 11.27 -#define int8 char 11.28 -#define uint8 char 11.29 -#define int16 short 11.30 -#define uint16 unsigned short 11.31 -#define int32 int 11.32 -#define uint32 unsigned int 11.33 -#define int64 long long 11.34 -#define uint64 unsigned long long 11.35 -#define float32 float 11.36 -#define float64 double 11.37 -*/ 11.38 -typedef char bool8; 11.39 -typedef char int8; 11.40 -typedef char uint8; 11.41 -typedef short int16; 11.42 -typedef unsigned short uint16; 11.43 -typedef int int32; 11.44 -typedef unsigned int uint32; 11.45 -typedef long long int64; 11.46 -typedef unsigned long long uint64; 11.47 -typedef float float32; 11.48 -typedef double float64; 11.49 -//typedef double double float128; 11.50 -#define float128 double double 11.51 - 11.52 -#define TRUE 1 11.53 -#define FALSE 0 11.54 - 11.55 -#endif /* _BLIS_PRIMITIVE_DATA_TYPES_H */ 11.56 - 11.57 +/* 11.58 + * Copyright 2009 OpenSourceStewardshipFoundation.org 11.59 + * Licensed under GNU General Public License version 2 11.60 + * 11.61 + * Author: seanhalle@yahoo.com 11.62 + * 11.63 + 11.64 + */ 11.65 + 11.66 +#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H 11.67 +#define _BLIS_PRIMITIVE_DATA_TYPES_H 11.68 + 11.69 + 11.70 +/*For portability, need primitive data types that have a well defined 11.71 + * size, and well-defined layout into bytes 11.72 + *To do this, provide BLIS standard aliases for all primitive data types 11.73 + *These aliases must be used in all BLIS functions instead of the ANSI types 11.74 + * 11.75 + *These definitions will be replaced inside each specialization module 11.76 + * according to the compiler used in that module and the hardware being 11.77 + * specialized to. 11.78 + */ 11.79 +/* 11.80 +#define int8 char 11.81 +#define uint8 char 11.82 +#define int16 short 11.83 +#define uint16 unsigned short 11.84 +#define int32 int 11.85 +#define uint32 unsigned int 11.86 +#define int64 long long 11.87 +#define uint64 unsigned long long 11.88 +#define float32 float 11.89 +#define float64 double 11.90 +*/ 11.91 +typedef char bool8; 11.92 +typedef char int8; 11.93 +typedef char uint8; 11.94 +typedef short int16; 11.95 +typedef unsigned short uint16; 11.96 +typedef int int32; 11.97 +typedef unsigned int uint32; 11.98 +typedef long long int64; 11.99 +typedef unsigned long long uint64; 11.100 +typedef float float32; 11.101 +typedef double float64; 11.102 +//typedef double double float128; 11.103 +#define float128 double double 11.104 + 11.105 +#define TRUE 1 11.106 +#define FALSE 0 11.107 + 11.108 +#endif /* _BLIS_PRIMITIVE_DATA_TYPES_H */ 11.109 +
12.1 --- a/probes.h Wed Jan 04 16:40:10 2012 +0100 12.2 +++ b/probes.h Fri Jan 06 18:55:05 2012 +0100 12.3 @@ -1,195 +1,195 @@ 12.4 -/* 12.5 - * Copyright 2009 OpenSourceStewardshipFoundation.org 12.6 - * Licensed under GNU General Public License version 2 12.7 - * 12.8 - * Author: seanhalle@yahoo.com 12.9 - * 12.10 - */ 12.11 - 12.12 -#ifndef _PROBES_H 12.13 -#define _PROBES_H 12.14 -#define _GNU_SOURCE 12.15 - 12.16 -#include "VMS_primitive_data_types.h" 12.17 - 12.18 -#include <sys/time.h> 12.19 - 12.20 - 12.21 - //when STATS__TURN_ON_PROBES is defined allows using probes to measure 12.22 - // time intervals. The probes are macros that only compile to something 12.23 - // when STATS__TURN_ON_PROBES is defined. The probes are saved in the 12.24 - // master env -- but only when this is defined. 12.25 - //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday 12.26 -#define STATS__TURN_ON_PROBES 12.27 -//#define STATS__USE_TSC_PROBES 12.28 -#define STATS__USE_DBL_PROBES 12.29 - 12.30 -//typedef struct _IntervalProbe IntervalProbe; //in VMS.h 12.31 - 12.32 -struct _IntervalProbe 12.33 - { 12.34 - char *nameStr; 12.35 - int32 probeID; 12.36 - 12.37 - int32 schedChoiceWasRecorded; 12.38 - int32 coreNum; 12.39 - int32 procrID; 12.40 - float64 procrCreateSecs; 12.41 - 12.42 - #ifdef STATS__USE_TSC_PROBES 12.43 - TSCount startStamp; 12.44 - TSCount endStamp; 12.45 - #else 12.46 - struct timeval startStamp; 12.47 - struct timeval endStamp; 12.48 - #endif 12.49 - float64 startSecs; 12.50 - float64 endSecs; 12.51 - float64 interval; 12.52 - DblHist *hist;//if NULL, then is single interval probe 12.53 - }; 12.54 - 12.55 - 12.56 -//============================= Statistics ================================== 12.57 - 12.58 - //Frequency of TS counts 12.59 - //TODO: change freq for each machine 12.60 -#define TSCOUNT_FREQ 3180000000 12.61 - 12.62 -inline TSCount getTSCount(); 12.63 - 12.64 - 12.65 -//======================== Probes ============================= 12.66 -// 12.67 -// Use macros to allow turning probes off with a #define switch 12.68 -#ifdef STATS__ENABLE_PROBES 12.69 -int32 12.70 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 12.71 -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 12.72 - VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) 12.73 - 12.74 -int32 12.75 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 12.76 -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 12.77 - VMS_ext_impl__record_time_point_into_new_probe( nameStr ) 12.78 - 12.79 - 12.80 -int32 12.81 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 12.82 -#define VMS__create_single_interval_probe( nameStr, animPr ) \ 12.83 - VMS_impl__create_single_interval_probe( nameStr, animPr ) 12.84 - 12.85 - 12.86 -int32 12.87 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 12.88 - float64 binWidth, char *nameStr, VirtProcr *animPr ); 12.89 -#define VMS__create_histogram_probe( numBins, startValue, \ 12.90 - binWidth, nameStr, animPr ) \ 12.91 - VMS_impl__create_histogram_probe( numBins, startValue, \ 12.92 - binWidth, nameStr, animPr ) 12.93 -void 12.94 -VMS_impl__free_probe( IntervalProbe *probe ); 12.95 -#define VMS__free_probe( probe ) \ 12.96 - VMS_impl__free_probe( probe ) 12.97 - 12.98 -void 12.99 -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 12.100 -#define VMS__index_probe_by_its_name( probeID, animPr ) \ 12.101 - VMS_impl__index_probe_by_its_name( probeID, animPr ) 12.102 - 12.103 -IntervalProbe * 12.104 -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 12.105 -#define VMS__get_probe_by_name( probeID, animPr ) \ 12.106 - VMS_impl__get_probe_by_name( probeName, animPr ) 12.107 - 12.108 -void 12.109 -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 12.110 -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 12.111 - VMS_impl__record_sched_choice_into_probe( probeID, animPr ) 12.112 - 12.113 -void 12.114 -VMS_impl__record_interval_start_in_probe( int32 probeID ); 12.115 -#define VMS__record_interval_start_in_probe( probeID ) \ 12.116 - VMS_impl__record_interval_start_in_probe( probeID ) 12.117 - 12.118 -void 12.119 -VMS_impl__record_interval_end_in_probe( int32 probeID ); 12.120 -#define VMS__record_interval_end_in_probe( probeID ) \ 12.121 - VMS_impl__record_interval_end_in_probe( probeID ) 12.122 - 12.123 -void 12.124 -VMS_impl__print_stats_of_probe( int32 probeID ); 12.125 -#define VMS__print_stats_of_probe( probeID ) \ 12.126 - VMS_impl__print_stats_of_probe( probeID ) 12.127 - 12.128 -void 12.129 -VMS_impl__print_stats_of_all_probes(); 12.130 -#define VMS__print_stats_of_all_probes() \ 12.131 - VMS_impl__print_stats_of_all_probes() 12.132 - 12.133 - 12.134 -#else 12.135 -int32 12.136 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 12.137 -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 12.138 - 0 /* do nothing */ 12.139 - 12.140 -int32 12.141 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 12.142 -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 12.143 - 0 /* do nothing */ 12.144 - 12.145 - 12.146 -int32 12.147 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 12.148 -#define VMS__create_single_interval_probe( nameStr, animPr ) \ 12.149 - 0 /* do nothing */ 12.150 - 12.151 - 12.152 -int32 12.153 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 12.154 - float64 binWidth, char *nameStr, VirtProcr *animPr ); 12.155 -#define VMS__create_histogram_probe( numBins, startValue, \ 12.156 - binWidth, nameStr, animPr ) \ 12.157 - 0 /* do nothing */ 12.158 - 12.159 -void 12.160 -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 12.161 -#define VMS__index_probe_by_its_name( probeID, animPr ) \ 12.162 - /* do nothing */ 12.163 - 12.164 -IntervalProbe * 12.165 -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 12.166 -#define VMS__get_probe_by_name( probeID, animPr ) \ 12.167 - NULL /* do nothing */ 12.168 - 12.169 -void 12.170 -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 12.171 -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 12.172 - /* do nothing */ 12.173 - 12.174 -void 12.175 -VMS_impl__record_interval_start_in_probe( int32 probeID ); 12.176 -#define VMS__record_interval_start_in_probe( probeID ) \ 12.177 - /* do nothing */ 12.178 - 12.179 -void 12.180 -VMS_impl__record_interval_end_in_probe( int32 probeID ); 12.181 -#define VMS__record_interval_end_in_probe( probeID ) \ 12.182 - /* do nothing */ 12.183 - 12.184 -inline void doNothing(); 12.185 -void 12.186 -VMS_impl__print_stats_of_probe( int32 probeID ); 12.187 -#define VMS__print_stats_of_probe( probeID ) \ 12.188 - doNothing/* do nothing */ 12.189 - 12.190 -void 12.191 -VMS_impl__print_stats_of_all_probes(); 12.192 -#define VMS__print_stats_of_all_probes \ 12.193 - doNothing/* do nothing */ 12.194 - 12.195 -#endif /* defined STATS__ENABLE_PROBES */ 12.196 - 12.197 -#endif /* _PROBES_H */ 12.198 - 12.199 +/* 12.200 + * Copyright 2009 OpenSourceStewardshipFoundation.org 12.201 + * Licensed under GNU General Public License version 2 12.202 + * 12.203 + * Author: seanhalle@yahoo.com 12.204 + * 12.205 + */ 12.206 + 12.207 +#ifndef _PROBES_H 12.208 +#define _PROBES_H 12.209 +#define _GNU_SOURCE 12.210 + 12.211 +#include "VMS_primitive_data_types.h" 12.212 + 12.213 +#include <sys/time.h> 12.214 + 12.215 + 12.216 + //when STATS__TURN_ON_PROBES is defined allows using probes to measure 12.217 + // time intervals. The probes are macros that only compile to something 12.218 + // when STATS__TURN_ON_PROBES is defined. The probes are saved in the 12.219 + // master env -- but only when this is defined. 12.220 + //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday 12.221 +#define STATS__TURN_ON_PROBES 12.222 +//#define STATS__USE_TSC_PROBES 12.223 +#define STATS__USE_DBL_PROBES 12.224 + 12.225 +//typedef struct _IntervalProbe IntervalProbe; //in VMS.h 12.226 + 12.227 +struct _IntervalProbe 12.228 + { 12.229 + char *nameStr; 12.230 + int32 probeID; 12.231 + 12.232 + int32 schedChoiceWasRecorded; 12.233 + int32 coreNum; 12.234 + int32 procrID; 12.235 + float64 procrCreateSecs; 12.236 + 12.237 + #ifdef STATS__USE_TSC_PROBES 12.238 + TSCount startStamp; 12.239 + TSCount endStamp; 12.240 + #else 12.241 + struct timeval startStamp; 12.242 + struct timeval endStamp; 12.243 + #endif 12.244 + float64 startSecs; 12.245 + float64 endSecs; 12.246 + float64 interval; 12.247 + DblHist *hist;//if NULL, then is single interval probe 12.248 + }; 12.249 + 12.250 + 12.251 +//============================= Statistics ================================== 12.252 + 12.253 + //Frequency of TS counts 12.254 + //TODO: change freq for each machine 12.255 +#define TSCOUNT_FREQ 3180000000 12.256 + 12.257 +inline TSCount getTSCount(); 12.258 + 12.259 + 12.260 +//======================== Probes ============================= 12.261 +// 12.262 +// Use macros to allow turning probes off with a #define switch 12.263 +#ifdef STATS__ENABLE_PROBES 12.264 +int32 12.265 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 12.266 +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 12.267 + VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) 12.268 + 12.269 +int32 12.270 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 12.271 +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 12.272 + VMS_ext_impl__record_time_point_into_new_probe( nameStr ) 12.273 + 12.274 + 12.275 +int32 12.276 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 12.277 +#define VMS__create_single_interval_probe( nameStr, animPr ) \ 12.278 + VMS_impl__create_single_interval_probe( nameStr, animPr ) 12.279 + 12.280 + 12.281 +int32 12.282 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 12.283 + float64 binWidth, char *nameStr, VirtProcr *animPr ); 12.284 +#define VMS__create_histogram_probe( numBins, startValue, \ 12.285 + binWidth, nameStr, animPr ) \ 12.286 + VMS_impl__create_histogram_probe( numBins, startValue, \ 12.287 + binWidth, nameStr, animPr ) 12.288 +void 12.289 +VMS_impl__free_probe( IntervalProbe *probe ); 12.290 +#define VMS__free_probe( probe ) \ 12.291 + VMS_impl__free_probe( probe ) 12.292 + 12.293 +void 12.294 +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 12.295 +#define VMS__index_probe_by_its_name( probeID, animPr ) \ 12.296 + VMS_impl__index_probe_by_its_name( probeID, animPr ) 12.297 + 12.298 +IntervalProbe * 12.299 +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 12.300 +#define VMS__get_probe_by_name( probeID, animPr ) \ 12.301 + VMS_impl__get_probe_by_name( probeName, animPr ) 12.302 + 12.303 +void 12.304 +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 12.305 +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 12.306 + VMS_impl__record_sched_choice_into_probe( probeID, animPr ) 12.307 + 12.308 +void 12.309 +VMS_impl__record_interval_start_in_probe( int32 probeID ); 12.310 +#define VMS__record_interval_start_in_probe( probeID ) \ 12.311 + VMS_impl__record_interval_start_in_probe( probeID ) 12.312 + 12.313 +void 12.314 +VMS_impl__record_interval_end_in_probe( int32 probeID ); 12.315 +#define VMS__record_interval_end_in_probe( probeID ) \ 12.316 + VMS_impl__record_interval_end_in_probe( probeID ) 12.317 + 12.318 +void 12.319 +VMS_impl__print_stats_of_probe( int32 probeID ); 12.320 +#define VMS__print_stats_of_probe( probeID ) \ 12.321 + VMS_impl__print_stats_of_probe( probeID ) 12.322 + 12.323 +void 12.324 +VMS_impl__print_stats_of_all_probes(); 12.325 +#define VMS__print_stats_of_all_probes() \ 12.326 + VMS_impl__print_stats_of_all_probes() 12.327 + 12.328 + 12.329 +#else 12.330 +int32 12.331 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 12.332 +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 12.333 + 0 /* do nothing */ 12.334 + 12.335 +int32 12.336 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 12.337 +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 12.338 + 0 /* do nothing */ 12.339 + 12.340 + 12.341 +int32 12.342 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 12.343 +#define VMS__create_single_interval_probe( nameStr, animPr ) \ 12.344 + 0 /* do nothing */ 12.345 + 12.346 + 12.347 +int32 12.348 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 12.349 + float64 binWidth, char *nameStr, VirtProcr *animPr ); 12.350 +#define VMS__create_histogram_probe( numBins, startValue, \ 12.351 + binWidth, nameStr, animPr ) \ 12.352 + 0 /* do nothing */ 12.353 + 12.354 +void 12.355 +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 12.356 +#define VMS__index_probe_by_its_name( probeID, animPr ) \ 12.357 + /* do nothing */ 12.358 + 12.359 +IntervalProbe * 12.360 +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 12.361 +#define VMS__get_probe_by_name( probeID, animPr ) \ 12.362 + NULL /* do nothing */ 12.363 + 12.364 +void 12.365 +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 12.366 +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 12.367 + /* do nothing */ 12.368 + 12.369 +void 12.370 +VMS_impl__record_interval_start_in_probe( int32 probeID ); 12.371 +#define VMS__record_interval_start_in_probe( probeID ) \ 12.372 + /* do nothing */ 12.373 + 12.374 +void 12.375 +VMS_impl__record_interval_end_in_probe( int32 probeID ); 12.376 +#define VMS__record_interval_end_in_probe( probeID ) \ 12.377 + /* do nothing */ 12.378 + 12.379 +inline void doNothing(); 12.380 +void 12.381 +VMS_impl__print_stats_of_probe( int32 probeID ); 12.382 +#define VMS__print_stats_of_probe( probeID ) \ 12.383 + doNothing/* do nothing */ 12.384 + 12.385 +void 12.386 +VMS_impl__print_stats_of_all_probes(); 12.387 +#define VMS__print_stats_of_all_probes \ 12.388 + doNothing/* do nothing */ 12.389 + 12.390 +#endif /* defined STATS__ENABLE_PROBES */ 12.391 + 12.392 +#endif /* _PROBES_H */ 12.393 +
13.1 --- a/vmalloc.c Wed Jan 04 16:40:10 2012 +0100 13.2 +++ b/vmalloc.c Fri Jan 06 18:55:05 2012 +0100 13.3 @@ -1,495 +1,495 @@ 13.4 -/* 13.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 13.6 - * Licensed under GNU General Public License version 2 13.7 - * 13.8 - * Author: seanhalle@yahoo.com 13.9 - * 13.10 - * Created on November 14, 2009, 9:07 PM 13.11 - */ 13.12 - 13.13 -#include <malloc.h> 13.14 -#include <inttypes.h> 13.15 -#include <stdlib.h> 13.16 -#include <stdio.h> 13.17 - 13.18 -#include "VMS.h" 13.19 -#include "Histogram/Histogram.h" 13.20 - 13.21 -/*Helper function 13.22 - *Insert a newly generated free chunk into the first spot on the free list. 13.23 - * The chunk is cast as a MallocProlog, so the various pointers in it are 13.24 - * accessed with C's help -- and the size of the prolog is easily added to 13.25 - * the pointer when a chunk is returned to the app -- so C handles changes 13.26 - * in pointer sizes among machines. 13.27 - * 13.28 - *The list head is a normal MallocProlog struct -- identified by its 13.29 - * prevChunkInFreeList being NULL -- the only one. 13.30 - * 13.31 - *The end of the list is identified by next chunk being NULL, as usual. 13.32 - */ 13.33 -void inline 13.34 -add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) 13.35 - { 13.36 - chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; 13.37 - if( chunk->nextChunkInFreeList != NULL ) //if not last in free list 13.38 - chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; 13.39 - chunk->prevChunkInFreeList = listHead; 13.40 - listHead->nextChunkInFreeList = chunk; 13.41 - } 13.42 - 13.43 - 13.44 -/*This is sequential code, meant to only be called from the Master, not from 13.45 - * any slave VPs. 13.46 - *Search down list, checking size by the nextHigherInMem pointer, to find 13.47 - * first chunk bigger than size needed. 13.48 - *Shave off the extra and make it into a new free-list element, hook it in 13.49 - * then return the address of the found element plus size of prolog. 13.50 - * 13.51 - *Will find a 13.52 - */ 13.53 -void *VMS__malloc( size_t sizeRequested ) 13.54 - { MallocProlog *foundElem = NULL, *currElem, *newElem; 13.55 - ssize_t amountExtra, sizeConsumed,sizeOfFound; 13.56 - uint32 foundElemIsTopOfHeap; 13.57 - 13.58 - //============================= MEASUREMENT STUFF ======================== 13.59 - #ifdef MEAS__TIME_MALLOC 13.60 - int32 startStamp, endStamp; 13.61 - saveLowTimeStampCountInto( startStamp ); 13.62 - #endif 13.63 - //======================================================================== 13.64 - 13.65 - //step up the size to be aligned at 16-byte boundary, prob better ways 13.66 - sizeRequested = (sizeRequested + 16) & ~15; 13.67 - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 13.68 - 13.69 - while( currElem != NULL ) 13.70 - { //check if size of currElem is big enough 13.71 - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 13.72 - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 13.73 - if( amountExtra > 0 ) 13.74 - { //found it, get out of loop 13.75 - foundElem = currElem; 13.76 - currElem = NULL; 13.77 - } 13.78 - else 13.79 - currElem = currElem->nextChunkInFreeList; 13.80 - } 13.81 - 13.82 - if( foundElem == NULL ) 13.83 - { ERROR("\nmalloc failed\n") 13.84 - return (void *)NULL; //indicates malloc failed 13.85 - } 13.86 - //Using a kludge to identify the element that is the top chunk in the 13.87 - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 13.88 - // save addr of start of heap in head's nextLowerInMem 13.89 - //Will handle top of Heap specially 13.90 - foundElemIsTopOfHeap = foundElem->nextHigherInMem == 13.91 - _VMSMasterEnv->freeListHead->nextHigherInMem; 13.92 - 13.93 - //before shave off and try to insert new elem, remove found elem 13.94 - //note, foundElem will never be the head, so always has valid prevChunk 13.95 - foundElem->prevChunkInFreeList->nextChunkInFreeList = 13.96 - foundElem->nextChunkInFreeList; 13.97 - if( foundElem->nextChunkInFreeList != NULL ) 13.98 - { foundElem->nextChunkInFreeList->prevChunkInFreeList = 13.99 - foundElem->prevChunkInFreeList; 13.100 - } 13.101 - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 13.102 - 13.103 - //if enough, turn extra into new elem & insert it 13.104 - if( amountExtra > 64 ) 13.105 - { //make new elem by adding to addr of curr elem then casting 13.106 - sizeConsumed = sizeof(MallocProlog) + sizeRequested; 13.107 - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 13.108 - newElem->nextLowerInMem = foundElem; //This is evil (but why?) 13.109 - newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) 13.110 - foundElem->nextHigherInMem = newElem; 13.111 - if( ! foundElemIsTopOfHeap ) 13.112 - { //there is no next higher for top of heap, so can't write to it 13.113 - newElem->nextHigherInMem->nextLowerInMem = newElem; 13.114 - } 13.115 - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 13.116 - } 13.117 - else 13.118 - { 13.119 - sizeConsumed = sizeOfFound; 13.120 - } 13.121 - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 13.122 - 13.123 - //============================= MEASUREMENT STUFF ======================== 13.124 - #ifdef MEAS__TIME_MALLOC 13.125 - saveLowTimeStampCountInto( endStamp ); 13.126 - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 13.127 - #endif 13.128 - //======================================================================== 13.129 - 13.130 - //skip over the prolog by adding its size to the pointer return 13.131 - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 13.132 - } 13.133 - 13.134 -/*This is sequential code, meant to only be called from the Master, not from 13.135 - * any slave VPs. 13.136 - *Search down list, checking size by the nextHigherInMem pointer, to find 13.137 - * first chunk bigger than size needed. 13.138 - *Shave off the extra and make it into a new free-list element, hook it in 13.139 - * then return the address of the found element plus size of prolog. 13.140 - * 13.141 - * The difference to the regular malloc is, that all the allocated chunks are 13.142 - * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk 13.143 - * before the aligned chunk. 13.144 - */ 13.145 -void *VMS__malloc_aligned( size_t sizeRequested ) 13.146 - { MallocProlog *foundElem = NULL, *currElem, *newElem; 13.147 - ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; 13.148 - uint32 foundElemIsTopOfHeap; 13.149 - 13.150 - //============================= MEASUREMENT STUFF ======================== 13.151 - #ifdef MEAS__TIME_MALLOC 13.152 - uint32 startStamp, endStamp; 13.153 - saveLowTimeStampCountInto( startStamp ); 13.154 - #endif 13.155 - //======================================================================== 13.156 - 13.157 - //step up the size to be multiple of the cache line size 13.158 - sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1); 13.159 - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 13.160 - 13.161 - while( currElem != NULL ) 13.162 - { //check if size of currElem is big enough 13.163 - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 13.164 - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 13.165 - if( amountExtra > 0 ) 13.166 - { 13.167 - //look if the found element is already aligned 13.168 - if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){ 13.169 - //found it, get out of loop 13.170 - foundElem = currElem; 13.171 - break; 13.172 - }else{ 13.173 - //find first aligned address and check if it's still big enough 13.174 - //check also if the space before the aligned address is big enough 13.175 - //for a new element 13.176 - void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1))); 13.177 - prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; 13.178 - sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); 13.179 - amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); 13.180 - if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ 13.181 - //found suitable element 13.182 - //create new previous element and exit loop 13.183 - MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; 13.184 - 13.185 - //insert new element into free list 13.186 - if(currElem->nextChunkInFreeList != NULL) 13.187 - currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; 13.188 - newAlignedElem->prevChunkInFreeList = currElem; 13.189 - newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; 13.190 - currElem->nextChunkInFreeList = newAlignedElem; 13.191 - 13.192 - //set higherInMem and lowerInMem 13.193 - newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; 13.194 - foundElemIsTopOfHeap = currElem->nextHigherInMem == 13.195 - _VMSMasterEnv->freeListHead->nextHigherInMem; 13.196 - if(!foundElemIsTopOfHeap) 13.197 - currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; 13.198 - currElem->nextHigherInMem = newAlignedElem; 13.199 - newAlignedElem->nextLowerInMem = currElem; 13.200 - 13.201 - //Found new element leaving loop 13.202 - foundElem = newAlignedElem; 13.203 - break; 13.204 - } 13.205 - } 13.206 - 13.207 - } 13.208 - currElem = currElem->nextChunkInFreeList; 13.209 - } 13.210 - 13.211 - if( foundElem == NULL ) 13.212 - { ERROR("\nmalloc failed\n") 13.213 - return (void *)NULL; //indicates malloc failed 13.214 - } 13.215 - //Using a kludge to identify the element that is the top chunk in the 13.216 - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 13.217 - // save addr of start of heap in head's nextLowerInMem 13.218 - //Will handle top of Heap specially 13.219 - foundElemIsTopOfHeap = foundElem->nextHigherInMem == 13.220 - _VMSMasterEnv->freeListHead->nextHigherInMem; 13.221 - 13.222 - //before shave off and try to insert new elem, remove found elem 13.223 - //note, foundElem will never be the head, so always has valid prevChunk 13.224 - foundElem->prevChunkInFreeList->nextChunkInFreeList = 13.225 - foundElem->nextChunkInFreeList; 13.226 - if( foundElem->nextChunkInFreeList != NULL ) 13.227 - { foundElem->nextChunkInFreeList->prevChunkInFreeList = 13.228 - foundElem->prevChunkInFreeList; 13.229 - } 13.230 - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 13.231 - 13.232 - //if enough, turn extra into new elem & insert it 13.233 - if( amountExtra > 64 ) 13.234 - { //make new elem by adding to addr of curr elem then casting 13.235 - sizeConsumed = sizeof(MallocProlog) + sizeRequested; 13.236 - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 13.237 - newElem->nextHigherInMem = foundElem->nextHigherInMem; 13.238 - newElem->nextLowerInMem = foundElem; 13.239 - foundElem->nextHigherInMem = newElem; 13.240 - 13.241 - if( ! foundElemIsTopOfHeap ) 13.242 - { //there is no next higher for top of heap, so can't write to it 13.243 - newElem->nextHigherInMem->nextLowerInMem = newElem; 13.244 - } 13.245 - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 13.246 - } 13.247 - else 13.248 - { 13.249 - sizeConsumed = sizeOfFound; 13.250 - } 13.251 - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 13.252 - 13.253 - //============================= MEASUREMENT STUFF ======================== 13.254 - #ifdef MEAS__TIME_MALLOC 13.255 - saveLowTimeStampCountInto( endStamp ); 13.256 - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 13.257 - #endif 13.258 - //======================================================================== 13.259 - 13.260 - //skip over the prolog by adding its size to the pointer return 13.261 - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 13.262 - } 13.263 - 13.264 - 13.265 -/*This is sequential code -- only to be called from the Master 13.266 - * When free, subtract the size of prolog from pointer, then cast it to a 13.267 - * MallocProlog. Then check the nextLower and nextHigher chunks to see if 13.268 - * one or both are also free, and coalesce if so, and if neither free, then 13.269 - * add this one to free-list. 13.270 - */ 13.271 -void 13.272 -VMS__free( void *ptrToFree ) 13.273 - { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; 13.274 - size_t sizeOfElem; 13.275 - uint32 lowerExistsAndIsFree, higherExistsAndIsFree; 13.276 - 13.277 - //============================= MEASUREMENT STUFF ======================== 13.278 - #ifdef MEAS__TIME_MALLOC 13.279 - int32 startStamp, endStamp; 13.280 - saveLowTimeStampCountInto( startStamp ); 13.281 - #endif 13.282 - //======================================================================== 13.283 - 13.284 - if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || 13.285 - ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) 13.286 - { //outside the range of data owned by VMS's malloc, so do nothing 13.287 - return; 13.288 - } 13.289 - //subtract size of prolog to get pointer to prolog, then cast 13.290 - elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog)); 13.291 - sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree); 13.292 - 13.293 - if( elemToFree->prevChunkInFreeList != NULL ) 13.294 - { printf( "error: freeing same element twice!" ); exit(1); 13.295 - } 13.296 - 13.297 - _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; 13.298 - 13.299 - nextLowerElem = elemToFree->nextLowerInMem; 13.300 - nextHigherElem = elemToFree->nextHigherInMem; 13.301 - 13.302 - if( nextHigherElem == NULL ) 13.303 - higherExistsAndIsFree = FALSE; 13.304 - else //okay exists, now check if in the free-list by checking back ptr 13.305 - higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); 13.306 - 13.307 - if( nextLowerElem == NULL ) 13.308 - lowerExistsAndIsFree = FALSE; 13.309 - else //okay, it exists, now check if it's free 13.310 - lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); 13.311 - 13.312 - 13.313 - //now, know what exists and what's free 13.314 - if( lowerExistsAndIsFree ) 13.315 - { if( higherExistsAndIsFree ) 13.316 - { //both exist and are free, so coalesce all three 13.317 - //First, remove higher from free-list 13.318 - nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = 13.319 - nextHigherElem->nextChunkInFreeList; 13.320 - if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? 13.321 - nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = 13.322 - nextHigherElem->prevChunkInFreeList; 13.323 - //Now, fix-up sequence-in-mem list -- by side-effect, this also 13.324 - // changes size of the lower elem, which is still in free-list 13.325 - nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; 13.326 - if( nextHigherElem->nextHigherInMem != 13.327 - _VMSMasterEnv->freeListHead->nextHigherInMem ) 13.328 - nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; 13.329 - //notice didn't do anything to elemToFree -- it simply is no 13.330 - // longer reachable from any of the lists. Wonder if could be a 13.331 - // security leak because left valid addresses in it, 13.332 - // but don't care for now. 13.333 - } 13.334 - else 13.335 - { //lower is the only of the two that exists and is free, 13.336 - //In this case, no adjustment to free-list, just change mem-list. 13.337 - // By side-effect, changes size of the lower elem 13.338 - nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; 13.339 - if( elemToFree->nextHigherInMem != 13.340 - _VMSMasterEnv->freeListHead->nextHigherInMem ) 13.341 - elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; 13.342 - } 13.343 - } 13.344 - else 13.345 - { //lower either doesn't exist or isn't free, so check higher 13.346 - if( higherExistsAndIsFree ) 13.347 - { //higher exists and is the only of the two free 13.348 - //First, in free-list, replace higher elem with the one to free 13.349 - elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; 13.350 - elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; 13.351 - elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; 13.352 - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 13.353 - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 13.354 - //Now chg mem-list. By side-effect, changes size of elemToFree 13.355 - elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; 13.356 - if( elemToFree->nextHigherInMem != 13.357 - _VMSMasterEnv->freeListHead->nextHigherInMem ) 13.358 - elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; 13.359 - } 13.360 - else 13.361 - { //neither lower nor higher is availabe to coalesce so add to list 13.362 - // this makes prev chunk ptr non-null, which indicates it's free 13.363 - elemToFree->nextChunkInFreeList = 13.364 - _VMSMasterEnv->freeListHead->nextChunkInFreeList; 13.365 - _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; 13.366 - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 13.367 - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 13.368 - elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; 13.369 - } 13.370 - } 13.371 - //============================= MEASUREMENT STUFF ======================== 13.372 - #ifdef MEAS__TIME_MALLOC 13.373 - saveLowTimeStampCountInto( endStamp ); 13.374 - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist ); 13.375 - #endif 13.376 - //======================================================================== 13.377 - 13.378 - } 13.379 - 13.380 - 13.381 -/*Allocates memory from the external system -- higher overhead 13.382 - * 13.383 - *Because of Linux's malloc throwing bizarre random faults when malloc is 13.384 - * used inside a VMS virtual processor, have to pass this as a request and 13.385 - * have the core loop do it when it gets around to it -- will look for these 13.386 - * chores leftover from the previous animation of masterVP the next time it 13.387 - * goes to animate the masterVP -- so it takes two separate masterVP 13.388 - * animations, separated by work, to complete an external malloc or 13.389 - * external free request. 13.390 - * 13.391 - *Thinking core loop accepts signals -- just looks if signal-location is 13.392 - * empty or not -- 13.393 - */ 13.394 -void * 13.395 -VMS__malloc_in_ext( size_t sizeRequested ) 13.396 - { 13.397 - /* 13.398 - //This is running in the master, so no chance for multiple cores to be 13.399 - // competing for the core's flag. 13.400 - if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) 13.401 - { //something has already signalled to core loop, so save the signal 13.402 - // and look, next time master animated, to see if can send it. 13.403 - //Note, the addr to put a signal is in the coreloop's frame, so just 13.404 - // checks it each time through -- make it volatile to avoid GCC 13.405 - // optimizations -- it's a coreloop local var that only changes 13.406 - // after jumping away. The signal includes the addr to send the 13.407 - //return to -- even if just empty return completion-signal 13.408 - // 13.409 - //save the signal in some queue that the master looks at each time 13.410 - // it starts up -- one loc says if empty for fast common case -- 13.411 - //something like that -- want to hide this inside this call -- but 13.412 - // think this has to come as a request -- req handler gives procr 13.413 - // back to master loop, which gives it back to req handler at point 13.414 - // it sees that core loop has sent return signal. Something like 13.415 - // that. 13.416 - saveTheSignal 13.417 - 13.418 - } 13.419 - coreSigData->type = malloc; 13.420 - coreSigData->sizeToMalloc = sizeRequested; 13.421 - coreSigData->locToSignalCompletion = &figureOut; 13.422 - _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; 13.423 - */ 13.424 - //just risk system-stack faults until get this figured out 13.425 - return malloc( sizeRequested ); 13.426 - } 13.427 - 13.428 - 13.429 -/*Frees memory that was allocated in the external system -- higher overhead 13.430 - * 13.431 - *As noted in external malloc comment, this is clunky 'cause the free has 13.432 - * to be called in the core loop. 13.433 - */ 13.434 -void 13.435 -VMS__free_in_ext( void *ptrToFree ) 13.436 - { 13.437 - //just risk system-stack faults until get this figured out 13.438 - free( ptrToFree ); 13.439 - 13.440 - //TODO: fix this -- so 13.441 - } 13.442 - 13.443 - 13.444 -/*Designed to be called from the main thread outside of VMS, during init 13.445 - */ 13.446 -MallocProlog * 13.447 -VMS_ext__create_free_list() 13.448 - { MallocProlog *freeListHead, *firstChunk; 13.449 - 13.450 - //Note, this is running in the main thread -- all increases in malloc 13.451 - // mem and all frees of it must be done in this thread, with the 13.452 - // thread's original stack available 13.453 - freeListHead = malloc( sizeof(MallocProlog) ); 13.454 - firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); 13.455 - if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} 13.456 - 13.457 - //Touch memory to avoid page faults 13.458 - void *ptr,*endPtr; 13.459 - endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE; 13.460 - for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE) 13.461 - { 13.462 - *(char*)ptr = 0; 13.463 - } 13.464 - 13.465 - freeListHead->prevChunkInFreeList = NULL; 13.466 - //Use this addr to free the heap when cleanup 13.467 - freeListHead->nextLowerInMem = firstChunk; 13.468 - //to identify top-of-heap elem, compare this addr to elem's next higher 13.469 - freeListHead->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 13.470 - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 13.471 - freeListHead->nextChunkInFreeList = firstChunk; 13.472 - 13.473 - firstChunk->nextChunkInFreeList = NULL; 13.474 - firstChunk->prevChunkInFreeList = freeListHead; 13.475 - //next Higher has to be set to top of chunk, so can calc size in malloc 13.476 - firstChunk->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 13.477 - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 13.478 - firstChunk->nextLowerInMem = NULL; //identifies as bott of heap 13.479 - 13.480 - _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet 13.481 - 13.482 - return freeListHead; 13.483 - } 13.484 - 13.485 - 13.486 -/*Designed to be called from the main thread outside of VMS, during cleanup 13.487 - */ 13.488 -void 13.489 -VMS_ext__free_free_list( MallocProlog *freeListHead ) 13.490 - { 13.491 - //stashed a ptr to the one and only bug chunk malloc'd from OS in the 13.492 - // free list head's next lower in mem pointer 13.493 - free( freeListHead->nextLowerInMem ); 13.494 - 13.495 - //don't free the head -- it'll be in an array eventually -- free whole 13.496 - // array when all the free lists linked from it have already been freed 13.497 - } 13.498 - 13.499 +/* 13.500 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 13.501 + * Licensed under GNU General Public License version 2 13.502 + * 13.503 + * Author: seanhalle@yahoo.com 13.504 + * 13.505 + * Created on November 14, 2009, 9:07 PM 13.506 + */ 13.507 + 13.508 +#include <malloc.h> 13.509 +#include <inttypes.h> 13.510 +#include <stdlib.h> 13.511 +#include <stdio.h> 13.512 + 13.513 +#include "VMS.h" 13.514 +#include "Histogram/Histogram.h" 13.515 + 13.516 +/*Helper function 13.517 + *Insert a newly generated free chunk into the first spot on the free list. 13.518 + * The chunk is cast as a MallocProlog, so the various pointers in it are 13.519 + * accessed with C's help -- and the size of the prolog is easily added to 13.520 + * the pointer when a chunk is returned to the app -- so C handles changes 13.521 + * in pointer sizes among machines. 13.522 + * 13.523 + *The list head is a normal MallocProlog struct -- identified by its 13.524 + * prevChunkInFreeList being NULL -- the only one. 13.525 + * 13.526 + *The end of the list is identified by next chunk being NULL, as usual. 13.527 + */ 13.528 +void inline 13.529 +add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) 13.530 + { 13.531 + chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; 13.532 + if( chunk->nextChunkInFreeList != NULL ) //if not last in free list 13.533 + chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; 13.534 + chunk->prevChunkInFreeList = listHead; 13.535 + listHead->nextChunkInFreeList = chunk; 13.536 + } 13.537 + 13.538 + 13.539 +/*This is sequential code, meant to only be called from the Master, not from 13.540 + * any slave VPs. 13.541 + *Search down list, checking size by the nextHigherInMem pointer, to find 13.542 + * first chunk bigger than size needed. 13.543 + *Shave off the extra and make it into a new free-list element, hook it in 13.544 + * then return the address of the found element plus size of prolog. 13.545 + * 13.546 + *Will find a 13.547 + */ 13.548 +void *VMS__malloc( size_t sizeRequested ) 13.549 + { MallocProlog *foundElem = NULL, *currElem, *newElem; 13.550 + ssize_t amountExtra, sizeConsumed,sizeOfFound; 13.551 + uint32 foundElemIsTopOfHeap; 13.552 + 13.553 + //============================= MEASUREMENT STUFF ======================== 13.554 + #ifdef MEAS__TIME_MALLOC 13.555 + int32 startStamp, endStamp; 13.556 + saveLowTimeStampCountInto( startStamp ); 13.557 + #endif 13.558 + //======================================================================== 13.559 + 13.560 + //step up the size to be aligned at 16-byte boundary, prob better ways 13.561 + sizeRequested = (sizeRequested + 16) & ~15; 13.562 + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 13.563 + 13.564 + while( currElem != NULL ) 13.565 + { //check if size of currElem is big enough 13.566 + sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 13.567 + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 13.568 + if( amountExtra > 0 ) 13.569 + { //found it, get out of loop 13.570 + foundElem = currElem; 13.571 + currElem = NULL; 13.572 + } 13.573 + else 13.574 + currElem = currElem->nextChunkInFreeList; 13.575 + } 13.576 + 13.577 + if( foundElem == NULL ) 13.578 + { ERROR("\nmalloc failed\n") 13.579 + return (void *)NULL; //indicates malloc failed 13.580 + } 13.581 + //Using a kludge to identify the element that is the top chunk in the 13.582 + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 13.583 + // save addr of start of heap in head's nextLowerInMem 13.584 + //Will handle top of Heap specially 13.585 + foundElemIsTopOfHeap = foundElem->nextHigherInMem == 13.586 + _VMSMasterEnv->freeListHead->nextHigherInMem; 13.587 + 13.588 + //before shave off and try to insert new elem, remove found elem 13.589 + //note, foundElem will never be the head, so always has valid prevChunk 13.590 + foundElem->prevChunkInFreeList->nextChunkInFreeList = 13.591 + foundElem->nextChunkInFreeList; 13.592 + if( foundElem->nextChunkInFreeList != NULL ) 13.593 + { foundElem->nextChunkInFreeList->prevChunkInFreeList = 13.594 + foundElem->prevChunkInFreeList; 13.595 + } 13.596 + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 13.597 + 13.598 + //if enough, turn extra into new elem & insert it 13.599 + if( amountExtra > 64 ) 13.600 + { //make new elem by adding to addr of curr elem then casting 13.601 + sizeConsumed = sizeof(MallocProlog) + sizeRequested; 13.602 + newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 13.603 + newElem->nextLowerInMem = foundElem; //This is evil (but why?) 13.604 + newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) 13.605 + foundElem->nextHigherInMem = newElem; 13.606 + if( ! foundElemIsTopOfHeap ) 13.607 + { //there is no next higher for top of heap, so can't write to it 13.608 + newElem->nextHigherInMem->nextLowerInMem = newElem; 13.609 + } 13.610 + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 13.611 + } 13.612 + else 13.613 + { 13.614 + sizeConsumed = sizeOfFound; 13.615 + } 13.616 + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 13.617 + 13.618 + //============================= MEASUREMENT STUFF ======================== 13.619 + #ifdef MEAS__TIME_MALLOC 13.620 + saveLowTimeStampCountInto( endStamp ); 13.621 + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 13.622 + #endif 13.623 + //======================================================================== 13.624 + 13.625 + //skip over the prolog by adding its size to the pointer return 13.626 + return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 13.627 + } 13.628 + 13.629 +/*This is sequential code, meant to only be called from the Master, not from 13.630 + * any slave VPs. 13.631 + *Search down list, checking size by the nextHigherInMem pointer, to find 13.632 + * first chunk bigger than size needed. 13.633 + *Shave off the extra and make it into a new free-list element, hook it in 13.634 + * then return the address of the found element plus size of prolog. 13.635 + * 13.636 + * The difference to the regular malloc is, that all the allocated chunks are 13.637 + * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk 13.638 + * before the aligned chunk. 13.639 + */ 13.640 +void *VMS__malloc_aligned( size_t sizeRequested ) 13.641 + { MallocProlog *foundElem = NULL, *currElem, *newElem; 13.642 + ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; 13.643 + uint32 foundElemIsTopOfHeap; 13.644 + 13.645 + //============================= MEASUREMENT STUFF ======================== 13.646 + #ifdef MEAS__TIME_MALLOC 13.647 + uint32 startStamp, endStamp; 13.648 + saveLowTimeStampCountInto( startStamp ); 13.649 + #endif 13.650 + //======================================================================== 13.651 + 13.652 + //step up the size to be multiple of the cache line size 13.653 + sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1); 13.654 + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 13.655 + 13.656 + while( currElem != NULL ) 13.657 + { //check if size of currElem is big enough 13.658 + sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 13.659 + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 13.660 + if( amountExtra > 0 ) 13.661 + { 13.662 + //look if the found element is already aligned 13.663 + if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){ 13.664 + //found it, get out of loop 13.665 + foundElem = currElem; 13.666 + break; 13.667 + }else{ 13.668 + //find first aligned address and check if it's still big enough 13.669 + //check also if the space before the aligned address is big enough 13.670 + //for a new element 13.671 + void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1))); 13.672 + prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; 13.673 + sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); 13.674 + amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); 13.675 + if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ 13.676 + //found suitable element 13.677 + //create new previous element and exit loop 13.678 + MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; 13.679 + 13.680 + //insert new element into free list 13.681 + if(currElem->nextChunkInFreeList != NULL) 13.682 + currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; 13.683 + newAlignedElem->prevChunkInFreeList = currElem; 13.684 + newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; 13.685 + currElem->nextChunkInFreeList = newAlignedElem; 13.686 + 13.687 + //set higherInMem and lowerInMem 13.688 + newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; 13.689 + foundElemIsTopOfHeap = currElem->nextHigherInMem == 13.690 + _VMSMasterEnv->freeListHead->nextHigherInMem; 13.691 + if(!foundElemIsTopOfHeap) 13.692 + currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; 13.693 + currElem->nextHigherInMem = newAlignedElem; 13.694 + newAlignedElem->nextLowerInMem = currElem; 13.695 + 13.696 + //Found new element leaving loop 13.697 + foundElem = newAlignedElem; 13.698 + break; 13.699 + } 13.700 + } 13.701 + 13.702 + } 13.703 + currElem = currElem->nextChunkInFreeList; 13.704 + } 13.705 + 13.706 + if( foundElem == NULL ) 13.707 + { ERROR("\nmalloc failed\n") 13.708 + return (void *)NULL; //indicates malloc failed 13.709 + } 13.710 + //Using a kludge to identify the element that is the top chunk in the 13.711 + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 13.712 + // save addr of start of heap in head's nextLowerInMem 13.713 + //Will handle top of Heap specially 13.714 + foundElemIsTopOfHeap = foundElem->nextHigherInMem == 13.715 + _VMSMasterEnv->freeListHead->nextHigherInMem; 13.716 + 13.717 + //before shave off and try to insert new elem, remove found elem 13.718 + //note, foundElem will never be the head, so always has valid prevChunk 13.719 + foundElem->prevChunkInFreeList->nextChunkInFreeList = 13.720 + foundElem->nextChunkInFreeList; 13.721 + if( foundElem->nextChunkInFreeList != NULL ) 13.722 + { foundElem->nextChunkInFreeList->prevChunkInFreeList = 13.723 + foundElem->prevChunkInFreeList; 13.724 + } 13.725 + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 13.726 + 13.727 + //if enough, turn extra into new elem & insert it 13.728 + if( amountExtra > 64 ) 13.729 + { //make new elem by adding to addr of curr elem then casting 13.730 + sizeConsumed = sizeof(MallocProlog) + sizeRequested; 13.731 + newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 13.732 + newElem->nextHigherInMem = foundElem->nextHigherInMem; 13.733 + newElem->nextLowerInMem = foundElem; 13.734 + foundElem->nextHigherInMem = newElem; 13.735 + 13.736 + if( ! foundElemIsTopOfHeap ) 13.737 + { //there is no next higher for top of heap, so can't write to it 13.738 + newElem->nextHigherInMem->nextLowerInMem = newElem; 13.739 + } 13.740 + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 13.741 + } 13.742 + else 13.743 + { 13.744 + sizeConsumed = sizeOfFound; 13.745 + } 13.746 + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 13.747 + 13.748 + //============================= MEASUREMENT STUFF ======================== 13.749 + #ifdef MEAS__TIME_MALLOC 13.750 + saveLowTimeStampCountInto( endStamp ); 13.751 + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 13.752 + #endif 13.753 + //======================================================================== 13.754 + 13.755 + //skip over the prolog by adding its size to the pointer return 13.756 + return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 13.757 + } 13.758 + 13.759 + 13.760 +/*This is sequential code -- only to be called from the Master 13.761 + * When free, subtract the size of prolog from pointer, then cast it to a 13.762 + * MallocProlog. Then check the nextLower and nextHigher chunks to see if 13.763 + * one or both are also free, and coalesce if so, and if neither free, then 13.764 + * add this one to free-list. 13.765 + */ 13.766 +void 13.767 +VMS__free( void *ptrToFree ) 13.768 + { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; 13.769 + size_t sizeOfElem; 13.770 + uint32 lowerExistsAndIsFree, higherExistsAndIsFree; 13.771 + 13.772 + //============================= MEASUREMENT STUFF ======================== 13.773 + #ifdef MEAS__TIME_MALLOC 13.774 + int32 startStamp, endStamp; 13.775 + saveLowTimeStampCountInto( startStamp ); 13.776 + #endif 13.777 + //======================================================================== 13.778 + 13.779 + if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || 13.780 + ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) 13.781 + { //outside the range of data owned by VMS's malloc, so do nothing 13.782 + return; 13.783 + } 13.784 + //subtract size of prolog to get pointer to prolog, then cast 13.785 + elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog)); 13.786 + sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree); 13.787 + 13.788 + if( elemToFree->prevChunkInFreeList != NULL ) 13.789 + { printf( "error: freeing same element twice!" ); exit(1); 13.790 + } 13.791 + 13.792 + _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; 13.793 + 13.794 + nextLowerElem = elemToFree->nextLowerInMem; 13.795 + nextHigherElem = elemToFree->nextHigherInMem; 13.796 + 13.797 + if( nextHigherElem == NULL ) 13.798 + higherExistsAndIsFree = FALSE; 13.799 + else //okay exists, now check if in the free-list by checking back ptr 13.800 + higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); 13.801 + 13.802 + if( nextLowerElem == NULL ) 13.803 + lowerExistsAndIsFree = FALSE; 13.804 + else //okay, it exists, now check if it's free 13.805 + lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); 13.806 + 13.807 + 13.808 + //now, know what exists and what's free 13.809 + if( lowerExistsAndIsFree ) 13.810 + { if( higherExistsAndIsFree ) 13.811 + { //both exist and are free, so coalesce all three 13.812 + //First, remove higher from free-list 13.813 + nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = 13.814 + nextHigherElem->nextChunkInFreeList; 13.815 + if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? 13.816 + nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = 13.817 + nextHigherElem->prevChunkInFreeList; 13.818 + //Now, fix-up sequence-in-mem list -- by side-effect, this also 13.819 + // changes size of the lower elem, which is still in free-list 13.820 + nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; 13.821 + if( nextHigherElem->nextHigherInMem != 13.822 + _VMSMasterEnv->freeListHead->nextHigherInMem ) 13.823 + nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; 13.824 + //notice didn't do anything to elemToFree -- it simply is no 13.825 + // longer reachable from any of the lists. Wonder if could be a 13.826 + // security leak because left valid addresses in it, 13.827 + // but don't care for now. 13.828 + } 13.829 + else 13.830 + { //lower is the only of the two that exists and is free, 13.831 + //In this case, no adjustment to free-list, just change mem-list. 13.832 + // By side-effect, changes size of the lower elem 13.833 + nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; 13.834 + if( elemToFree->nextHigherInMem != 13.835 + _VMSMasterEnv->freeListHead->nextHigherInMem ) 13.836 + elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; 13.837 + } 13.838 + } 13.839 + else 13.840 + { //lower either doesn't exist or isn't free, so check higher 13.841 + if( higherExistsAndIsFree ) 13.842 + { //higher exists and is the only of the two free 13.843 + //First, in free-list, replace higher elem with the one to free 13.844 + elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; 13.845 + elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; 13.846 + elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; 13.847 + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 13.848 + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 13.849 + //Now chg mem-list. By side-effect, changes size of elemToFree 13.850 + elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; 13.851 + if( elemToFree->nextHigherInMem != 13.852 + _VMSMasterEnv->freeListHead->nextHigherInMem ) 13.853 + elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; 13.854 + } 13.855 + else 13.856 + { //neither lower nor higher is availabe to coalesce so add to list 13.857 + // this makes prev chunk ptr non-null, which indicates it's free 13.858 + elemToFree->nextChunkInFreeList = 13.859 + _VMSMasterEnv->freeListHead->nextChunkInFreeList; 13.860 + _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; 13.861 + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 13.862 + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 13.863 + elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; 13.864 + } 13.865 + } 13.866 + //============================= MEASUREMENT STUFF ======================== 13.867 + #ifdef MEAS__TIME_MALLOC 13.868 + saveLowTimeStampCountInto( endStamp ); 13.869 + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist ); 13.870 + #endif 13.871 + //======================================================================== 13.872 + 13.873 + } 13.874 + 13.875 + 13.876 +/*Allocates memory from the external system -- higher overhead 13.877 + * 13.878 + *Because of Linux's malloc throwing bizarre random faults when malloc is 13.879 + * used inside a VMS virtual processor, have to pass this as a request and 13.880 + * have the core loop do it when it gets around to it -- will look for these 13.881 + * chores leftover from the previous animation of masterVP the next time it 13.882 + * goes to animate the masterVP -- so it takes two separate masterVP 13.883 + * animations, separated by work, to complete an external malloc or 13.884 + * external free request. 13.885 + * 13.886 + *Thinking core loop accepts signals -- just looks if signal-location is 13.887 + * empty or not -- 13.888 + */ 13.889 +void * 13.890 +VMS__malloc_in_ext( size_t sizeRequested ) 13.891 + { 13.892 + /* 13.893 + //This is running in the master, so no chance for multiple cores to be 13.894 + // competing for the core's flag. 13.895 + if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) 13.896 + { //something has already signalled to core loop, so save the signal 13.897 + // and look, next time master animated, to see if can send it. 13.898 + //Note, the addr to put a signal is in the coreloop's frame, so just 13.899 + // checks it each time through -- make it volatile to avoid GCC 13.900 + // optimizations -- it's a coreloop local var that only changes 13.901 + // after jumping away. The signal includes the addr to send the 13.902 + //return to -- even if just empty return completion-signal 13.903 + // 13.904 + //save the signal in some queue that the master looks at each time 13.905 + // it starts up -- one loc says if empty for fast common case -- 13.906 + //something like that -- want to hide this inside this call -- but 13.907 + // think this has to come as a request -- req handler gives procr 13.908 + // back to master loop, which gives it back to req handler at point 13.909 + // it sees that core loop has sent return signal. Something like 13.910 + // that. 13.911 + saveTheSignal 13.912 + 13.913 + } 13.914 + coreSigData->type = malloc; 13.915 + coreSigData->sizeToMalloc = sizeRequested; 13.916 + coreSigData->locToSignalCompletion = &figureOut; 13.917 + _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; 13.918 + */ 13.919 + //just risk system-stack faults until get this figured out 13.920 + return malloc( sizeRequested ); 13.921 + } 13.922 + 13.923 + 13.924 +/*Frees memory that was allocated in the external system -- higher overhead 13.925 + * 13.926 + *As noted in external malloc comment, this is clunky 'cause the free has 13.927 + * to be called in the core loop. 13.928 + */ 13.929 +void 13.930 +VMS__free_in_ext( void *ptrToFree ) 13.931 + { 13.932 + //just risk system-stack faults until get this figured out 13.933 + free( ptrToFree ); 13.934 + 13.935 + //TODO: fix this -- so 13.936 + } 13.937 + 13.938 + 13.939 +/*Designed to be called from the main thread outside of VMS, during init 13.940 + */ 13.941 +MallocProlog * 13.942 +VMS_ext__create_free_list() 13.943 + { MallocProlog *freeListHead, *firstChunk; 13.944 + 13.945 + //Note, this is running in the main thread -- all increases in malloc 13.946 + // mem and all frees of it must be done in this thread, with the 13.947 + // thread's original stack available 13.948 + freeListHead = malloc( sizeof(MallocProlog) ); 13.949 + firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); 13.950 + if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} 13.951 + 13.952 + //Touch memory to avoid page faults 13.953 + void *ptr,*endPtr; 13.954 + endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE; 13.955 + for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE) 13.956 + { 13.957 + *(char*)ptr = 0; 13.958 + } 13.959 + 13.960 + freeListHead->prevChunkInFreeList = NULL; 13.961 + //Use this addr to free the heap when cleanup 13.962 + freeListHead->nextLowerInMem = firstChunk; 13.963 + //to identify top-of-heap elem, compare this addr to elem's next higher 13.964 + freeListHead->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 13.965 + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 13.966 + freeListHead->nextChunkInFreeList = firstChunk; 13.967 + 13.968 + firstChunk->nextChunkInFreeList = NULL; 13.969 + firstChunk->prevChunkInFreeList = freeListHead; 13.970 + //next Higher has to be set to top of chunk, so can calc size in malloc 13.971 + firstChunk->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 13.972 + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 13.973 + firstChunk->nextLowerInMem = NULL; //identifies as bott of heap 13.974 + 13.975 + _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet 13.976 + 13.977 + return freeListHead; 13.978 + } 13.979 + 13.980 + 13.981 +/*Designed to be called from the main thread outside of VMS, during cleanup 13.982 + */ 13.983 +void 13.984 +VMS_ext__free_free_list( MallocProlog *freeListHead ) 13.985 + { 13.986 + //stashed a ptr to the one and only bug chunk malloc'd from OS in the 13.987 + // free list head's next lower in mem pointer 13.988 + free( freeListHead->nextLowerInMem ); 13.989 + 13.990 + //don't free the head -- it'll be in an array eventually -- free whole 13.991 + // array when all the free lists linked from it have already been freed 13.992 + } 13.993 +
14.1 --- a/vmalloc.h Wed Jan 04 16:40:10 2012 +0100 14.2 +++ b/vmalloc.h Fri Jan 06 18:55:05 2012 +0100 14.3 @@ -1,61 +1,61 @@ 14.4 -/* 14.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 14.6 - * Licensed under GNU General Public License version 2 14.7 - * 14.8 - * Author: seanhalle@yahoo.com 14.9 - * 14.10 - * Created on November 14, 2009, 9:07 PM 14.11 - */ 14.12 - 14.13 -#ifndef _VMALLOC_H 14.14 -#define _VMALLOC_H 14.15 - 14.16 -#include <malloc.h> 14.17 -#include <inttypes.h> 14.18 -#include "VMS_primitive_data_types.h" 14.19 - 14.20 -typedef struct _MallocProlog MallocProlog; 14.21 - 14.22 -struct _MallocProlog 14.23 - { 14.24 - MallocProlog *nextChunkInFreeList; 14.25 - MallocProlog *prevChunkInFreeList; 14.26 - MallocProlog *nextHigherInMem; 14.27 - MallocProlog *nextLowerInMem; 14.28 - }; 14.29 -//MallocProlog 14.30 - 14.31 -typedef struct 14.32 - { 14.33 - MallocProlog *firstChunkInFreeList; 14.34 - int32 numInList; //TODO not used 14.35 - } 14.36 -FreeListHead; 14.37 - 14.38 -void * 14.39 -VMS__malloc( size_t sizeRequested ); 14.40 - 14.41 -void * 14.42 -VMS__malloc_aligned( size_t sizeRequested ); 14.43 - 14.44 -void 14.45 -VMS__free( void *ptrToFree ); 14.46 - 14.47 -/*Allocates memory from the external system -- higher overhead 14.48 - */ 14.49 -void * 14.50 -VMS__malloc_in_ext( size_t sizeRequested ); 14.51 - 14.52 -/*Frees memory that was allocated in the external system -- higher overhead 14.53 - */ 14.54 -void 14.55 -VMS__free_in_ext( void *ptrToFree ); 14.56 - 14.57 - 14.58 -MallocProlog * 14.59 -VMS_ext__create_free_list(); 14.60 - 14.61 -void 14.62 -VMS_ext__free_free_list( MallocProlog *freeListHead ); 14.63 - 14.64 +/* 14.65 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 14.66 + * Licensed under GNU General Public License version 2 14.67 + * 14.68 + * Author: seanhalle@yahoo.com 14.69 + * 14.70 + * Created on November 14, 2009, 9:07 PM 14.71 + */ 14.72 + 14.73 +#ifndef _VMALLOC_H 14.74 +#define _VMALLOC_H 14.75 + 14.76 +#include <malloc.h> 14.77 +#include <inttypes.h> 14.78 +#include "VMS_primitive_data_types.h" 14.79 + 14.80 +typedef struct _MallocProlog MallocProlog; 14.81 + 14.82 +struct _MallocProlog 14.83 + { 14.84 + MallocProlog *nextChunkInFreeList; 14.85 + MallocProlog *prevChunkInFreeList; 14.86 + MallocProlog *nextHigherInMem; 14.87 + MallocProlog *nextLowerInMem; 14.88 + }; 14.89 +//MallocProlog 14.90 + 14.91 +typedef struct 14.92 + { 14.93 + MallocProlog *firstChunkInFreeList; 14.94 + int32 numInList; //TODO not used 14.95 + } 14.96 +FreeListHead; 14.97 + 14.98 +void * 14.99 +VMS__malloc( size_t sizeRequested ); 14.100 + 14.101 +void * 14.102 +VMS__malloc_aligned( size_t sizeRequested ); 14.103 + 14.104 +void 14.105 +VMS__free( void *ptrToFree ); 14.106 + 14.107 +/*Allocates memory from the external system -- higher overhead 14.108 + */ 14.109 +void * 14.110 +VMS__malloc_in_ext( size_t sizeRequested ); 14.111 + 14.112 +/*Frees memory that was allocated in the external system -- higher overhead 14.113 + */ 14.114 +void 14.115 +VMS__free_in_ext( void *ptrToFree ); 14.116 + 14.117 + 14.118 +MallocProlog * 14.119 +VMS_ext__create_free_list(); 14.120 + 14.121 +void 14.122 +VMS_ext__free_free_list( MallocProlog *freeListHead ); 14.123 + 14.124 #endif 14.125 \ No newline at end of file
15.1 --- a/vutilities.c Wed Jan 04 16:40:10 2012 +0100 15.2 +++ b/vutilities.c Fri Jan 06 18:55:05 2012 +0100 15.3 @@ -1,25 +1,25 @@ 15.4 -/* 15.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 15.6 - * Licensed under GNU General Public License version 2 15.7 - * 15.8 - * Author: seanhalle@yahoo.com 15.9 - * 15.10 - * Created on November 14, 2009, 9:07 PM 15.11 - */ 15.12 - 15.13 -#include <malloc.h> 15.14 -#include <stdlib.h> 15.15 - 15.16 -#include "VMS.h" 15.17 - 15.18 - 15.19 -inline char * 15.20 -VMS__strDup( char *str ) 15.21 - { char *retStr; 15.22 - 15.23 - retStr = VMS__malloc( strlen(str) + 1 ); 15.24 - if( str == NULL ) return str; 15.25 - strcpy( retStr, str ); 15.26 - 15.27 - return retStr; 15.28 - } 15.29 +/* 15.30 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 15.31 + * Licensed under GNU General Public License version 2 15.32 + * 15.33 + * Author: seanhalle@yahoo.com 15.34 + * 15.35 + * Created on November 14, 2009, 9:07 PM 15.36 + */ 15.37 + 15.38 +#include <malloc.h> 15.39 +#include <stdlib.h> 15.40 + 15.41 +#include "VMS.h" 15.42 + 15.43 + 15.44 +inline char * 15.45 +VMS__strDup( char *str ) 15.46 + { char *retStr; 15.47 + 15.48 + retStr = VMS__malloc( strlen(str) + 1 ); 15.49 + if( str == NULL ) return str; 15.50 + strcpy( retStr, str ); 15.51 + 15.52 + return retStr; 15.53 + }
16.1 --- a/vutilities.h Wed Jan 04 16:40:10 2012 +0100 16.2 +++ b/vutilities.h Fri Jan 06 18:55:05 2012 +0100 16.3 @@ -1,20 +1,20 @@ 16.4 -/* 16.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 16.6 - * Licensed under GNU General Public License version 2 16.7 - * 16.8 - * Author: seanhalle@yahoo.com 16.9 - * 16.10 - * Created on November 14, 2009, 9:07 PM 16.11 - */ 16.12 - 16.13 - 16.14 -#ifndef _UTILITIES_H 16.15 -#define _UTILITIES_H 16.16 - 16.17 -#include <string.h> 16.18 -#include "VMS_primitive_data_types.h" 16.19 - 16.20 -inline char * 16.21 -VMS__strDup( char *str ); 16.22 - 16.23 -#endif 16.24 +/* 16.25 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 16.26 + * Licensed under GNU General Public License version 2 16.27 + * 16.28 + * Author: seanhalle@yahoo.com 16.29 + * 16.30 + * Created on November 14, 2009, 9:07 PM 16.31 + */ 16.32 + 16.33 + 16.34 +#ifndef _UTILITIES_H 16.35 +#define _UTILITIES_H 16.36 + 16.37 +#include <string.h> 16.38 +#include "VMS_primitive_data_types.h" 16.39 + 16.40 +inline char * 16.41 +VMS__strDup( char *str ); 16.42 + 16.43 +#endif
