Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 209:0c83ea8adefc Common_Ancestor
Close to compilable version of common_ancestor -- still includes HW dep stuff
| author | Some Random Person <seanhalle@yahoo.com> |
|---|---|
| date | Sun, 04 Mar 2012 14:26:35 -0800 |
| parents | eaf7e4c58c9e |
| children | a18539c0bc37 |
| files | CoreLoop.c MasterLoop.c VMS.h VMS__HW_dependent.c VMS__HW_dependent.h VMS__HW_dependent.s VMS__PI.c VMS__WL.c VMS__int.c VMS__startup_and_shutdown.c VMS_defs__HW_specific.h VMS_defs__lang_specific.h VMS_defs__main.h probes.c probes.h vmalloc.c vmalloc.h vutilities.h |
| diffstat | 18 files changed, 1163 insertions(+), 1269 deletions(-) [+] |
line diff
1.1 --- a/CoreLoop.c Wed Feb 22 11:39:12 2012 -0800 1.2 +++ b/CoreLoop.c Sun Mar 04 14:26:35 2012 -0800 1.3 @@ -6,7 +6,6 @@ 1.4 1.5 1.6 #include "VMS.h" 1.7 -#include "ProcrContext.h" 1.8 1.9 #include <stdlib.h> 1.10 #include <stdio.h> 1.11 @@ -15,14 +14,14 @@ 1.12 #include <pthread.h> 1.13 #include <sched.h> 1.14 1.15 -void *terminateCoreLoop(SlaveVP *currPr); 1.16 +void *terminateCoreLoop(SlaveVP *currSlv); 1.17 1.18 /*This is the loop that runs in the OS Thread pinned to each core 1.19 - *Get virt procr from queue, 1.20 - * save state of current animator, then load in state of virt procr, using 1.21 - * jmp instr to switch the program-counter state -- making the virt procr 1.22 + *Get Slv from queue, 1.23 + * save state of current animator, then load in state of Slv, using 1.24 + * jmp instr to switch the program-counter state -- making the Slv 1.25 * the new animator. 1.26 - *At some point, the virt procr will suspend itself by saving out its 1.27 + *At some point, the Slv will suspend itself by saving out its 1.28 * animator state (stack ptr, frame ptr, program counter) and switching 1.29 * back to the OS Thread's animator state, which means restoring the 1.30 * stack and frame and jumping to the core loop start point. 1.31 @@ -34,7 +33,7 @@ 1.32 { 1.33 ThdParams *coreLoopThdParams; 1.34 int thisCoresIdx; 1.35 - SlaveVP *currPr; 1.36 + SlaveVP *currSlv; 1.37 VMSQueueStruc *readyToAnimateQ; 1.38 cpu_set_t coreMask; //has 1 in bit positions of allowed cores 1.39 int errorCode; 1.40 @@ -78,7 +77,7 @@ 1.41 if(errorCode){ printf("\nset affinity failure\n"); exit(0); } 1.42 1.43 1.44 - //Save the return address in the SwitchVP function 1.45 + //Save the return address in the SwitchSlv function 1.46 saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt)); 1.47 1.48 1.49 @@ -100,68 +99,55 @@ 1.50 while( gate.gateClosed ) /*busy wait*/; 1.51 } 1.52 1.53 - currPr = (SlaveVP *) readVMSQ( readyToAnimateQ ); 1.54 + currSlv = (SlaveVP *) readVMSQ( readyToAnimateQ ); 1.55 1.56 //Set the coreloop's progress, so stealer can see it has made it out 1.57 // of the protected area 1.58 gate.exitProgress = gate.preGateProgress; 1.59 #else 1.60 - currPr = (SlaveVP *) readVMSQ( readyToAnimateQ ); 1.61 + currSlv = (SlaveVP *) readVMSQ( readyToAnimateQ ); 1.62 #endif 1.63 1.64 - if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 1.65 + if( currSlv != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 1.66 else 1.67 { 1.68 - //============================= MEASUREMENT STUFF ===================== 1.69 - #ifdef MEAS__TIME_MASTER_LOCK 1.70 - int32 startStamp, endStamp; 1.71 - saveLowTimeStampCountInto( startStamp ); 1.72 - #endif 1.73 - //===================================================================== 1.74 + MEAS__Capture_Pre_Master_Lock_Point; 1.75 + 1.76 int tries = 0; int gotLock = 0; 1.77 - while( currPr == NULL ) //if queue was empty, enter get masterLock loop 1.78 + while( currSlv == NULL ) //if queue was empty, enter get masterLock loop 1.79 { //queue was empty, so get master lock 1.80 1.81 gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), 1.82 UNLOCKED, LOCKED ); 1.83 if( gotLock ) 1.84 { //run own MasterVP -- jmps to coreLoops startPt when done 1.85 - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 1.86 + currSlv = _VMSMasterEnv->masterVPs[thisCoresIdx]; 1.87 if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 1.88 { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); 1.89 pthread_yield(); 1.90 } 1.91 _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 1.92 - break; //end while -- have a VP to animate now 1.93 + break; //end while -- have a Slv to animate now 1.94 } 1.95 1.96 tries++; //if too many, means master on other core taking too long 1.97 if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } 1.98 } 1.99 - //============================= MEASUREMENT STUFF ===================== 1.100 - #ifdef MEAS__TIME_MASTER_LOCK 1.101 - saveLowTimeStampCountInto( endStamp ); 1.102 - addIntervalToHist( startStamp, endStamp, 1.103 - _VMSMasterEnv->masterLockLowTimeHist ); 1.104 - addIntervalToHist( startStamp, endStamp, 1.105 - _VMSMasterEnv->masterLockHighTimeHist ); 1.106 - #endif 1.107 - //===================================================================== 1.108 - 1.109 + MEAS__Capture_Post_Master_Lock_Point; 1.110 } 1.111 1.112 1.113 - switchToVP(currPr); //The VPs return in here 1.114 + switchToSlv(currSlv); //The Slvs return in here 1.115 flushRegisters(); 1.116 }//CoreLoop 1.117 } 1.118 1.119 1.120 void * 1.121 -terminateCoreLoop(SlaveVP *currPr){ 1.122 - //first free shutdown VP that jumped here -- it first restores the 1.123 - // coreloop's stack, so addr of currPr in stack frame is still correct 1.124 - VMS_int__dissipate_procr( currPr ); 1.125 +terminateCoreLoop(SlaveVP *currSlv){ 1.126 + //first free shutdown Slv that jumped here -- it first restores the 1.127 + // coreloop's stack, so addr of currSlv in stack frame is still correct 1.128 + VMS_int__dissipate_SlaveVP( currSlv ); 1.129 pthread_exit( NULL ); 1.130 } 1.131 1.132 @@ -176,7 +162,7 @@ 1.133 void * 1.134 coreLoop_Seq( void *paramsIn ) 1.135 { 1.136 - SlaveVP *currPr; 1.137 + SlaveVP *currSlv; 1.138 VMSQueueStruc *readyToAnimateQ; 1.139 1.140 ThdParams *coreLoopThdParams; 1.141 @@ -186,7 +172,7 @@ 1.142 // thisCoresIdx = coreLoopThdParams->coreNum; 1.143 thisCoresIdx = 0; 1.144 1.145 - //Save the return address in the SwitchVP function 1.146 + //Save the return address in the SwitchSlv function 1.147 saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt)); 1.148 1.149 1.150 @@ -195,19 +181,19 @@ 1.151 //_VMSWorkQ must be a global, static volatile var, so not kept in reg, 1.152 // which forces reloading the pointer after each jmp to this point 1.153 readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 1.154 - currPr = (SlaveVP *) readVMSQ( readyToAnimateQ ); 1.155 - if( currPr == NULL ) 1.156 + currSlv = (SlaveVP *) readVMSQ( readyToAnimateQ ); 1.157 + if( currSlv == NULL ) 1.158 { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 1.159 { printf("too many back to back MasterVP\n"); exit(1); } 1.160 _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 1.161 1.162 - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 1.163 + currSlv = _VMSMasterEnv->masterVPs[thisCoresIdx]; 1.164 } 1.165 else 1.166 _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 1.167 1.168 1.169 - switchToVP( currPr ); 1.170 + switchToSlv( currSlv ); 1.171 flushRegisters(); 1.172 } 1.173 }
2.1 --- a/MasterLoop.c Wed Feb 22 11:39:12 2012 -0800 2.2 +++ b/MasterLoop.c Sun Mar 04 14:26:35 2012 -0800 2.3 @@ -10,13 +10,12 @@ 2.4 #include <stddef.h> 2.5 2.6 #include "VMS.h" 2.7 -#include "ProcrContext.h" 2.8 2.9 2.10 //=========================================================================== 2.11 void inline 2.12 stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 2.13 - SlaveVP *masterPr ); 2.14 + SlaveVP *masterVP ); 2.15 2.16 //=========================================================================== 2.17 2.18 @@ -27,13 +26,13 @@ 2.19 *Polls each sched slot exactly once, hands any requests made by a newly 2.20 * done slave to the "request handler" plug-in function 2.21 * 2.22 - *Any slots that need a virt procr assigned are given to the "schedule" 2.23 - * plug-in function, which tries to assign a virt procr (slave) to it. 2.24 + *Any slots that need a Slv assigned are given to the "schedule" 2.25 + * plug-in function, which tries to assign a Slv (slave) to it. 2.26 * 2.27 *When all slots needing a processor have been given to the schedule plug-in, 2.28 - * a fraction of the procrs successfully scheduled are put into the 2.29 + * a fraction of the slaves successfully scheduled are put into the 2.30 * work queue, then a continuation of this function is put in, then the rest 2.31 - * of the virt procrs that were successfully scheduled. 2.32 + * of the Slvs that were successfully scheduled. 2.33 * 2.34 *The first thing the continuation does is busy-wait until the previous 2.35 * animation completes. This is because an (unlikely) continuation may 2.36 @@ -46,7 +45,7 @@ 2.37 * start running gets it and does all the stuff for a newly born -- 2.38 * from then on, will be doing continuation, but do suspension self 2.39 * directly at end of master loop 2.40 - *So VMS__init just births the master virtual processor same way it births 2.41 + *So VMS_WL__init just births the master virtual processor same way it births 2.42 * all the others -- then does any extra setup needed and puts it into the 2.43 * work queue. 2.44 *However means have to make masterEnv a global static volatile the same way 2.45 @@ -65,36 +64,36 @@ 2.46 *At this point, the masterLoop does not write itself into the queue anymore, 2.47 * instead, the coreLoop acquires the masterLock when it has nothing to 2.48 * animate, and then animates its own masterLoop. However, still try to put 2.49 - * several AppVPs into the queue to amortize the startup cost of switching 2.50 + * several AppSlvs into the queue to amortize the startup cost of switching 2.51 * to the MasterVP. Note, don't have to worry about latency of requests much 2.52 * because most requests generate work for same core -- only latency issue 2.53 * is case when other cores starved and one core's requests generate work 2.54 * for them -- so keep max in queue to 3 or 4.. 2.55 */ 2.56 -void masterLoop( void *initData, SlaveVP *animatingPr ) 2.57 +void masterLoop( void *initData, SlaveVP *animatingSlv ) 2.58 { 2.59 int32 slotIdx, numSlotsFilled; 2.60 - SlaveVP *schedVirtPr; 2.61 + SlaveVP *schedSlaveVP; 2.62 SchedSlot *currSlot, **schedSlots; 2.63 MasterEnv *masterEnv; 2.64 VMSQueueStruc *readyToAnimateQ; 2.65 2.66 - Sched_Assigner slaveScheduler; 2.67 + Sched_Assigner slaveAssigner; 2.68 RequestHandler requestHandler; 2.69 void *semanticEnv; 2.70 2.71 int32 thisCoresIdx; 2.72 - SlaveVP *masterPr; 2.73 - volatile SlaveVP *volatileMasterPr; 2.74 + SlaveVP *masterVP; 2.75 + volatile SlaveVP *volatileMasterVP; 2.76 2.77 - volatileMasterPr = animatingPr; 2.78 - masterPr = (SlaveVP*)volatileMasterPr; //used to force re-define after jmp 2.79 + volatileMasterVP = animatingSlv; 2.80 + masterVP = (SlaveVP*)volatileMasterVP; //used to force re-define after jmp 2.81 2.82 //First animation of each MasterVP will in turn animate this part 2.83 - // of setup code.. (VP creator sets up the stack as if this function 2.84 + // of setup code.. (Slv creator sets up the stack as if this function 2.85 // was called normally, but actually get here by jmp) 2.86 //So, setup values about stack ptr, jmp pt and all that 2.87 - //masterPr->resumeInstrPtr = &&masterLoopStartPt; 2.88 + //masterVP->resumeInstrPtr = &&masterLoopStartPt; 2.89 2.90 2.91 //Note, got rid of writing the stack and frame ptr up here, because 2.92 @@ -108,25 +107,18 @@ 2.93 //masterLoopStartPt: 2.94 while(1){ 2.95 2.96 - //============================= MEASUREMENT STUFF ======================== 2.97 - #ifdef MEAS__TIME_MASTER 2.98 - //Total Master time includes one coreloop time -- just assume the core 2.99 - // loop time is same for Master as for AppVPs, even though it may be 2.100 - // smaller due to higher predictability of the fixed jmp. 2.101 - saveLowTimeStampCountInto( masterPr->startMasterTSCLow ); 2.102 - #endif 2.103 - //======================================================================== 2.104 + MEAS__Capture_Pre_Master_Point 2.105 2.106 masterEnv = (MasterEnv*)_VMSMasterEnv; 2.107 2.108 //GCC may optimize so doesn't always re-define from frame-storage 2.109 - masterPr = (SlaveVP*)volatileMasterPr; //just to make sure after jmp 2.110 - thisCoresIdx = masterPr->coreAnimatedBy; 2.111 + masterVP = (SlaveVP*)volatileMasterVP; //just to make sure after jmp 2.112 + thisCoresIdx = masterVP->coreAnimatedBy; 2.113 readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; 2.114 schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; 2.115 2.116 requestHandler = masterEnv->requestHandler; 2.117 - slaveScheduler = masterEnv->slaveSchedAssigner; 2.118 + slaveAssigner = masterEnv->slaveAssigner; 2.119 semanticEnv = masterEnv->semanticEnv; 2.120 2.121 2.122 @@ -139,18 +131,18 @@ 2.123 if( currSlot->workIsDone ) 2.124 { 2.125 currSlot->workIsDone = FALSE; 2.126 - currSlot->needsProcrAssigned = TRUE; 2.127 + currSlot->needsSlaveAssigned = TRUE; 2.128 2.129 //process requests from slave to master 2.130 //====================== MEASUREMENT STUFF =================== 2.131 - #ifdef MEAS__TIME_PLUGIN 2.132 + #ifdef MEAS__TURN_ON_PLUGIN_MEAS 2.133 int32 startStamp1, endStamp1; 2.134 saveLowTimeStampCountInto( startStamp1 ); 2.135 #endif 2.136 //============================================================ 2.137 - (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv ); 2.138 + (*requestHandler)( currSlot->slaveAssignedToSlot, semanticEnv ); 2.139 //====================== MEASUREMENT STUFF =================== 2.140 - #ifdef MEAS__TIME_PLUGIN 2.141 + #ifdef MEAS__TURN_ON_PLUGIN_MEAS 2.142 saveLowTimeStampCountInto( endStamp1 ); 2.143 addIntervalToHist( startStamp1, endStamp1, 2.144 _VMSMasterEnv->reqHdlrLowTimeHist ); 2.145 @@ -159,18 +151,18 @@ 2.146 #endif 2.147 //============================================================ 2.148 } 2.149 - if( currSlot->needsProcrAssigned ) 2.150 - { //give slot a new virt procr 2.151 - schedVirtPr = 2.152 - (*slaveScheduler)( semanticEnv, thisCoresIdx ); 2.153 + if( currSlot->needsSlaveAssigned ) 2.154 + { //give slot a new Slv 2.155 + schedSlaveVP = 2.156 + (*slaveAssigner)( semanticEnv, thisCoresIdx ); 2.157 2.158 - if( schedVirtPr != NULL ) 2.159 - { currSlot->procrAssignedToSlot = schedVirtPr; 2.160 - schedVirtPr->schedSlot = currSlot; 2.161 - currSlot->needsProcrAssigned = FALSE; 2.162 + if( schedSlaveVP != NULL ) 2.163 + { currSlot->slaveAssignedToSlot = schedSlaveVP; 2.164 + schedSlaveVP->schedSlot = currSlot; 2.165 + currSlot->needsSlaveAssigned = FALSE; 2.166 numSlotsFilled += 1; 2.167 2.168 - writeVMSQ( schedVirtPr, readyToAnimateQ ); 2.169 + writeVMSQ( schedSlaveVP, readyToAnimateQ ); 2.170 } 2.171 } 2.172 } 2.173 @@ -179,16 +171,13 @@ 2.174 #ifdef USE_WORK_STEALING 2.175 //If no slots filled, means no more work, look for work to steal. 2.176 if( numSlotsFilled == 0 ) 2.177 - { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr ); 2.178 + { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterVP ); 2.179 } 2.180 #endif 2.181 2.182 + MEAS__Capture_Post_Master_Point; 2.183 2.184 - #ifdef MEAS__TIME_MASTER 2.185 - saveLowTimeStampCountInto( masterPr->endMasterTSCLow ); 2.186 - #endif 2.187 - 2.188 - masterSwitchToCoreLoop(animatingPr); 2.189 + masterSwitchToCoreLoop(animatingSlv); 2.190 flushRegisters(); 2.191 }//MasterLoop 2.192 2.193 @@ -202,14 +191,14 @@ 2.194 */ 2.195 void inline 2.196 stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 2.197 - SlaveVP *masterPr ) 2.198 + SlaveVP *masterVP ) 2.199 { 2.200 - SlaveVP *stolenPr; 2.201 + SlaveVP *stolenSlv; 2.202 int32 coreIdx, i; 2.203 VMSQueueStruc *currQ; 2.204 2.205 - stolenPr = NULL; 2.206 - coreIdx = masterPr->coreAnimatedBy; 2.207 + stolenSlv = NULL; 2.208 + coreIdx = masterVP->coreAnimatedBy; 2.209 for( i = 0; i < NUM_CORES -1; i++ ) 2.210 { 2.211 if( coreIdx >= NUM_CORES -1 ) 2.212 @@ -220,17 +209,17 @@ 2.213 } 2.214 currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 2.215 if( numInVMSQ( currQ ) > 0 ) 2.216 - { stolenPr = readVMSQ (currQ ); 2.217 + { stolenSlv = readVMSQ (currQ ); 2.218 break; 2.219 } 2.220 } 2.221 2.222 - if( stolenPr != NULL ) 2.223 - { currSlot->procrAssignedToSlot = stolenPr; 2.224 - stolenPr->schedSlot = currSlot; 2.225 - currSlot->needsProcrAssigned = FALSE; 2.226 + if( stolenSlv != NULL ) 2.227 + { currSlot->slaveAssignedToSlot = stolenSlv; 2.228 + stolenSlv->schedSlot = currSlot; 2.229 + currSlot->needsSlaveAssigned = FALSE; 2.230 2.231 - writeVMSQ( stolenPr, readyToAnimateQ ); 2.232 + writeVMSQ( stolenSlv, readyToAnimateQ ); 2.233 } 2.234 } 2.235 2.236 @@ -306,9 +295,9 @@ 2.237 void inline 2.238 gateProtected_stealWorkInto( SchedSlot *currSlot, 2.239 VMSQueueStruc *myReadyToAnimateQ, 2.240 - SlaveVP *masterPr ) 2.241 + SlaveVP *masterVP ) 2.242 { 2.243 - SlaveVP *stolenPr; 2.244 + SlaveVP *stolenSlv; 2.245 int32 coreIdx, i, haveAVictim, gotLock; 2.246 VMSQueueStruc *victimsQ; 2.247 2.248 @@ -319,7 +308,7 @@ 2.249 2.250 //see if any other cores have work available to steal 2.251 haveAVictim = FALSE; 2.252 - coreIdx = masterPr->coreAnimatedBy; 2.253 + coreIdx = masterVP->coreAnimatedBy; 2.254 for( i = 0; i < NUM_CORES -1; i++ ) 2.255 { 2.256 if( coreIdx >= NUM_CORES -1 ) 2.257 @@ -354,18 +343,18 @@ 2.258 coreMightBeInProtected = FALSE; 2.259 } 2.260 2.261 - stolenPr = readVMSQ ( victimsQ ); 2.262 + stolenSlv = readVMSQ ( victimsQ ); 2.263 2.264 vicGate->gateClosed = FALSE; 2.265 //======= End Gate-protection ======= 2.266 2.267 2.268 - if( stolenPr != NULL ) //victim could have been in protected and taken 2.269 - { currSlot->procrAssignedToSlot = stolenPr; 2.270 - stolenPr->schedSlot = currSlot; 2.271 - currSlot->needsProcrAssigned = FALSE; 2.272 + if( stolenSlv != NULL ) //victim could have been in protected and taken 2.273 + { currSlot->slaveAssignedToSlot = stolenSlv; 2.274 + stolenSlv->schedSlot = currSlot; 2.275 + currSlot->needsSlaveAssigned = FALSE; 2.276 2.277 - writeVMSQ( stolenPr, myReadyToAnimateQ ); 2.278 + writeVMSQ( stolenSlv, myReadyToAnimateQ ); 2.279 } 2.280 2.281 //unlock the work stealing lock
3.1 --- a/VMS.h Wed Feb 22 11:39:12 2012 -0800 3.2 +++ b/VMS.h Sun Mar 04 14:26:35 2012 -0800 3.3 @@ -20,6 +20,10 @@ 3.4 #include <pthread.h> 3.5 #include <sys/time.h> 3.6 3.7 +#ifndef _LANG_NAME_ 3.8 +#define _LANG_NAME_ "" 3.9 +#endif 3.10 + 3.11 //================= Defines: included from separate files ================= 3.12 // 3.13 // Note: ALL defines are in other files, none are in here 3.14 @@ -44,11 +48,15 @@ 3.15 typedef struct _GateStruc GateStruc; 3.16 3.17 3.18 -typedef SlaveVP * (*Sched_Assigner) ( void *, int ); //semEnv, coreIdx 3.19 -typedef void (*RequestHandler) ( SlaveVP *, void * ); //prWReqst, semEnv 3.20 -typedef void (*TopLevelFnPtr) ( void *, SlaveVP * ); //initData, animPr 3.21 -typedef void TopLevelFn ( void *, SlaveVP * ); //initData, animPr 3.22 -typedef void (*ResumeVPFnPtr) ( SlaveVP *, void * ); 3.23 +typedef SlaveVP * (*Sched_Assigner) ( void *, int ); //semEnv, coreIdx 3.24 +typedef void (*RequestHandler) ( SlaveVP *, void * ); //prWReqst, semEnv 3.25 +typedef void (*TopLevelFnPtr) ( void *, SlaveVP * ); //initData, animSlv 3.26 +typedef void TopLevelFn ( void *, SlaveVP * ); //initData, animSlv 3.27 +typedef void (*ResumeSlvFnPtr) ( SlaveVP *, void * ); 3.28 + 3.29 +//============================ HW Dependent Fns ================================ 3.30 + 3.31 +#include "VMS__HW_dependent.h" 3.32 3.33 //============================= Statistics ================================== 3.34 3.35 @@ -83,7 +91,7 @@ 3.36 3.37 typedef struct 3.38 { enum VMSSemReqstType reqType; 3.39 - SlaveVP *requestingPr; 3.40 + SlaveVP *requestingSlv; 3.41 char *nameStr; //for create probe 3.42 } 3.43 VMSSemReq; 3.44 @@ -94,12 +102,12 @@ 3.45 struct _SchedSlot 3.46 { 3.47 int workIsDone; 3.48 - int needsProcrAssigned; 3.49 - SlaveVP *procrAssignedToSlot; 3.50 + int needsSlaveAssigned; 3.51 + SlaveVP *slaveAssignedToSlot; 3.52 }; 3.53 //SchedSlot 3.54 3.55 -/*WARNING: re-arranging this data structure could cause VP switching 3.56 +/*WARNING: re-arranging this data structure could cause Slv switching 3.57 * assembly code to fail -- hard-codes offsets of fields 3.58 */ 3.59 struct _SlaveVP 3.60 @@ -117,23 +125,11 @@ 3.61 SchedSlot *schedSlot; 3.62 VMSReqst *requests; 3.63 3.64 - void *semanticData; //this livesUSE_GNU here for the life of VP 3.65 - void *dataRetFromReq;//values returned from plugin to VP go here 3.66 + void *semanticData; //this livesUSE_GNU here for the life of Slv 3.67 + void *dataRetFromReq;//values returned from plugin to Slv go here 3.68 3.69 //=========== MEASUREMENT STUFF ========== 3.70 - #ifdef MEAS__TIME_STAMP_SUSP 3.71 - uint32 preSuspTSCLow; 3.72 - uint32 postSuspTSCLow; 3.73 - #endif 3.74 - #ifdef MEAS__TIME_MASTER /* in SlaveVP because multiple masterVPs*/ 3.75 - uint32 startMasterTSCLow;USE_GNU 3.76 - uint32 endMasterTSCLow; 3.77 - #endif 3.78 - #ifdef MEAS__TIME_2011_SYS 3.79 - TSCountLowHigh startSusp; 3.80 - uint64 totalSuspCycles; 3.81 - uint32 numGoodSusp; 3.82 - #endif 3.83 + MEAS__Insert_Meas_Fields_into_Slave; 3.84 //======================================== 3.85 3.86 float64 createPtInSecs; //have space but don't use on some configs 3.87 @@ -141,18 +137,13 @@ 3.88 //SlaveVP 3.89 3.90 3.91 -/*WARNING: re-arranging this data structure could cause VP-switching 3.92 +/*WARNING: re-arranging this data structure could cause Slv-switching 3.93 * assembly code to fail -- hard-codes offsets of fields 3.94 * (because -O3 messes with things otherwise) 3.95 */ 3.96 typedef struct 3.97 { 3.98 - union{ //adds padding to put masterLock on its own cache-line to elim 3.99 - // false sharing (masterLock is most-accessed var in VMS) 3.100 - volatile int32 masterLock; 3.101 - char padding[CACHE_LINE_SZ]; 3.102 - } masterLockUnion; 3.103 - Sched_Assigner slaveSchedAssigner; 3.104 + Sched_Assigner slaveAssigner; 3.105 RequestHandler requestHandler; 3.106 3.107 SchedSlot ***allSchedSlots; 3.108 @@ -161,17 +152,19 @@ 3.109 3.110 void *semanticEnv; 3.111 void *OSEventStruc; //for future, when add I/O to BLIS 3.112 - MallocArrays *freeLists; 3.113 + MallocArrays *freeLists; 3.114 int32 amtOfOutstandingMem; //total currently allocated 3.115 3.116 void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop 3.117 3.118 int32 setupComplete; 3.119 - //int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP 3.120 + int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP 3.121 + int32 masterLock __align_to_cacheline__; 3.122 GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal 3.123 int32 workStealingLock; 3.124 3.125 - int32 numVPsCreated; //gives ordering to processor creation 3.126 + int32 numSlavesCreated; //gives ordering to processor creation 3.127 + int32 numSlavesAlive; //used to detect when to shutdown 3.128 3.129 //=========== MEASUREMENT STUFF ============= 3.130 IntervalProbe **intervalProbes; 3.131 @@ -181,28 +174,12 @@ 3.132 float64 createPtInSecs; 3.133 Histogram **measHists; 3.134 PrivDynArrayInfo *measHistsInfo; 3.135 - #ifdef MEAS__TIME_PLUGIN 3.136 - Histogram *reqHdlrLowTimeHist; 3.137 - Histogram *reqHdlrHighTimeHist; 3.138 - #endif 3.139 - #ifdef MEAS__TIME_MALLOC 3.140 - Histogram *mallocTimeHist; 3.141 - Histogram *freeTimeHist; 3.142 - #endif 3.143 - #ifdef MEAS__TIME_MASTER_LOCK 3.144 - Histogram *masterLockLowTimeHist; 3.145 - Histogram *masterLockHighTimeHist; 3.146 - #endif 3.147 - #ifdef MEAS__TIME_2011_SYS 3.148 - TSCountLowHigh startMaster; 3.149 - uint64 totalMasterCycles; 3.150 - uint32 numMasterAnimations; 3.151 - TSCountLowHigh startReqHdlr; 3.152 - uint64 totalPluginCycles; 3.153 - uint32 numPluginAnimations; 3.154 - uint64 cyclesTillStartMasterLoop; 3.155 - TSCountLowHigh endMasterLoop; 3.156 - #endif 3.157 + MEAS__Insert_Susp_Meas_Fields_into_MasterEnv; 3.158 + MEAS__Insert_Master_Meas_Fields_into_MasterEnv; 3.159 + MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv; 3.160 + MEAS__Insert_Malloc_Meas_Fields_into_MasterEnv; 3.161 + MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv; 3.162 + MEAS__Insert_System_Meas_Fields_into_MasterEnv; 3.163 //========================================== 3.164 } 3.165 MasterEnv; 3.166 @@ -237,28 +214,32 @@ 3.167 } 3.168 ThdParams; 3.169 3.170 +//============================= Global Vars ================================ 3.171 + 3.172 pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state 3.173 ThdParams *coreLoopThdParams [ NUM_CORES ]; 3.174 pthread_mutex_t suspendLock; 3.175 pthread_cond_t suspend_cond; 3.176 3.177 - 3.178 - 3.179 -//============================= Global Vars ================================ 3.180 - 3.181 volatile MasterEnv *_VMSMasterEnv __align_to_cacheline__; 3.182 3.183 3.184 - 3.185 - 3.186 //========================= Function Prototypes =========================== 3.187 3.188 +/* MEANING OF WL PI SS int 3.189 + * These indicate which places the function is safe to use. They stand for: 3.190 + * WL: Wrapper Library 3.191 + * PI: Plugin 3.192 + * SS: Startup and Shutdown 3.193 + * int: internal to the VMS implementation 3.194 + */ 3.195 3.196 //========== Setup and shutdown ========== 3.197 void 3.198 -VMS_int__init(); 3.199 +VMS_SS__init(); 3.200 3.201 -Fix seed-procr creation -- put box around language, have lang register stuff 3.202 +//Fix; 3.203 +/*seed-procr creation -- put box around language, have lang register stuff 3.204 with VMS. 3.205 have main program explicitly INIT Lang! -- makes more sense to 3.206 C programmers -- makes it clear that there's a transition. 3.207 @@ -289,77 +270,83 @@ 3.208 lang's sync constructs -- VMS uses message system to establish tie-pt, 3.209 each lang defines what a tie-point means to it.. (work with the 3.210 diff semantics?) 3.211 +*/ 3.212 void 3.213 -VMS_WL__start_the_work_then_wait_until_done(); 3.214 +VMS_SS__start_the_work_then_wait_until_done(); 3.215 3.216 void 3.217 -VMS_int__shutdown(); 3.218 +VMS_SS__shutdown(); 3.219 3.220 void 3.221 -VMS_int__cleanup_at_end_of_shutdown(); 3.222 +VMS_SS__cleanup_at_end_of_shutdown(); 3.223 3.224 3.225 //============== =============== 3.226 3.227 inline SlaveVP * 3.228 -VMS_int__create_procr( TopLevelFnPtr fnPtr, void *dataParam ); 3.229 +VMS_int__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam ); 3.230 +#define VMS_PI__create_slaveVP VMS_int__create_slaveVP 3.231 +#define VMS_WL__create_slaveVP VMS_int__create_slaveVP 3.232 3.233 inline void 3.234 -VMS_int__point_slave_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr, 3.235 +VMS_int__point_slaveVP_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr, 3.236 void *dataParam); 3.237 +#define VMS_PI__point_slaveVP_to_Fn VMS_int__point_slaveVP_to_Fn 3.238 +#define VMS_WL__point_slaveVP_to_Fn VMS_int__point_slaveVP_to_Fn 3.239 3.240 void 3.241 -VMS_int__save_return_addr_into_ptd_to_loc(void *ptrToReturnAddrHoldingLoc); 3.242 - 3.243 -void 3.244 -VMS_int__write_return_addr_from_ptd_to_loc(void *ptrToReturnAddrHoldingLoc); 3.245 - 3.246 -void 3.247 -VMS_int__dissipate_procr( SlaveVP *procrToDissipate ); 3.248 +VMS_int__dissipate_SlaveVP( SlaveVP *slaveToDissipate ); 3.249 +#define VMS_PI__dissipate_SlaveVP VMS_int__dissipateSlaveVP 3.250 +//From WL, dissipate a SlaveVP by sending a request 3.251 3.252 //Use this to create processor inside entry point & other places outside 3.253 // the VMS system boundary (IE, not run in slave nor Master) 3.254 SlaveVP * 3.255 -VMS_ext__create_procr( TopLevelFnPtr fnPtr, void *dataParam ); 3.256 +VMS_ext__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam ); 3.257 3.258 void 3.259 -VMS_ext__dissipate_procr( SlaveVP *procrToDissipate ); 3.260 +VMS_ext__dissipate_slaveVP( SlaveVP *slaveToDissipate ); 3.261 3.262 void 3.263 -VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstPr, VMSExcp *excpData ); 3.264 +VMS_int__throw_exception( char *msgStr, SlaveVP *reqstSlv, VMSExcp *excpData ); 3.265 +#define VMS_PI__throw_exception VMS_int__throw_exception 3.266 +#define VMS_WL__throw_exception VMS_int__throw_exception 3.267 3.268 void * 3.269 -VMS_WL__give_sem_env_for( SlaveVP *animPr ); 3.270 +VMS_int__give_sem_env_for( SlaveVP *animSlv ); 3.271 +#define VMS_PI__give_sem_env_for VMS_int__give_sem_env_for 3.272 +#define VMS_SS__give_sem_env_for VMS_int__give_sem_env_for 3.273 +//No WL version -- not safe! if use in WL, be sure data rd & wr is stable 3.274 3.275 //============== Request Related =============== 3.276 3.277 void 3.278 -VMS_int__suspend_procr( SlaveVP *callingPr ); 3.279 +VMS_int__suspend_slaveVP_and_send_req( SlaveVP *callingSlv ); 3.280 3.281 inline void 3.282 -VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, SlaveVP *callingPr ); 3.283 +VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, SlaveVP *callingSlv ); 3.284 3.285 inline void 3.286 -VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingPr ); 3.287 +VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingSlv ); 3.288 3.289 void 3.290 -VMS_WL__send_create_procr_req( void *semReqData, SlaveVP *reqstingPr ); 3.291 +VMS_WL__send_create_slaveVP_req( void *semReqData, SlaveVP *reqstingSlv ); 3.292 3.293 void inline 3.294 VMS_WL__send_dissipate_req( SlaveVP *prToDissipate ); 3.295 3.296 inline void 3.297 -VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingPr ); 3.298 +VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingSlv ); 3.299 3.300 VMSReqst * 3.301 -VMS_PI__take_next_request_out_of( SlaveVP *procrWithReq ); 3.302 +VMS_PI__take_next_request_out_of( SlaveVP *slaveWithReq ); 3.303 3.304 inline void * 3.305 VMS_PI__take_sem_reqst_from( VMSReqst *req ); 3.306 3.307 void inline 3.308 -VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingPr, void *semEnv, 3.309 - ResumeVPFnPtr resumePrFnPtr ); 3.310 +VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingSlv, void *semEnv, 3.311 + ResumeSlvFnPtr resumeSlvFnPtr ); 3.312 3.313 //======================== MEASUREMENT ====================== 3.314 uint64 3.315 @@ -368,8 +355,6 @@ 3.316 VMS_WL__give_num_plugin_animations(); 3.317 3.318 3.319 - 3.320 -#include "VMS__HW_dependent.h" 3.321 #include "probes.h" 3.322 #include "vutilities.h" 3.323
4.1 --- a/VMS__HW_dependent.c Wed Feb 22 11:39:12 2012 -0800 4.2 +++ b/VMS__HW_dependent.c Sun Mar 04 14:26:35 2012 -0800 4.3 @@ -12,7 +12,8 @@ 4.4 *No need to save registers on old stack frame, because there's no old 4.5 * animator state to return to 4.6 */ 4.7 -VMS_int__point_slave_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr, 4.8 +inline void 4.9 +VMS_int__point_slaveVP_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr, 4.10 void *dataParam) 4.11 { void *stackPtr; 4.12
5.1 --- a/VMS__HW_dependent.h Wed Feb 22 11:39:12 2012 -0800 5.2 +++ b/VMS__HW_dependent.h Sun Mar 04 14:26:35 2012 -0800 5.3 @@ -6,28 +6,75 @@ 5.4 * 5.5 */ 5.6 5.7 -#ifndef _ProcrContext_H 5.8 -#define _ProcrContext_H 5.9 +#ifndef _VMS__HW_DEPENDENT_H 5.10 +#define _VMS__HW_DEPENDENT_H 5.11 #define _GNU_SOURCE 5.12 5.13 -void saveCoreLoopReturnAddr(void **returnAddress); 5.14 +void 5.15 +saveCoreLoopReturnAddr(void **returnAddress); 5.16 5.17 -void switchToVP(SlaveVP *nextProcr); 5.18 +void 5.19 +switchToSlv(SlaveVP *nextSlave); 5.20 5.21 -void switchToCoreLoop(SlaveVP *nextProcr); 5.22 +void 5.23 +switchToCoreLoop(SlaveVP *nextSlave); 5.24 5.25 -void masterSwitchToCoreLoop(SlaveVP *nextProcr); 5.26 +void 5.27 +masterSwitchToCoreLoop(SlaveVP *nextSlave); 5.28 5.29 -void startUpTopLevelFn(); 5.30 +void 5.31 +startUpTopLevelFn(); 5.32 5.33 -void *asmTerminateCoreLoop(SlaveVP *currPr); 5.34 +void * 5.35 +asmTerminateCoreLoop(SlaveVP *currSlv); 5.36 5.37 #define flushRegisters() \ 5.38 asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") 5.39 5.40 inline SlaveVP * 5.41 -create_procr_helper( SlaveVP *newPr, TopLevelFnPtr fnPtr, 5.42 +create_slaveVP_helper( SlaveVP *newSlv, TopLevelFnPtr fnPtr, 5.43 void *dataParam, void *stackLocs ); 5.44 5.45 -#endif /* _ProcrContext_H */ 5.46 +void 5.47 +VMS_int__save_return_into_ptd_to_loc_then_do_ret(void *ptdToLoc); 5.48 5.49 +void 5.50 +VMS_int__return_to_addr_in_ptd_to_loc(void *ptdToLoc); 5.51 + 5.52 +//=================== Macros to Capture Measurements ====================== 5.53 +// 5.54 +//===== RDTSC wrapper ===== 5.55 +//Also runs with x86_64 code 5.56 +#define saveTSCLowHigh(lowHighIn) \ 5.57 + asm volatile("RDTSC; \ 5.58 + movl %%eax, %0; \ 5.59 + movl %%edx, %1;" \ 5.60 + /* outputs */ : "=m" (lowHighIn.lowHigh[0]), "=m" (lowHighIn.lowHigh[1])\ 5.61 + /* inputs */ : \ 5.62 + /* clobber */ : "%eax", "%edx" \ 5.63 + ); 5.64 + 5.65 +#define saveTimeStampCountInto(low, high) \ 5.66 + asm volatile("RDTSC; \ 5.67 + movl %%eax, %0; \ 5.68 + movl %%edx, %1;" \ 5.69 + /* outputs */ : "=m" (low), "=m" (high)\ 5.70 + /* inputs */ : \ 5.71 + /* clobber */ : "%eax", "%edx" \ 5.72 + ); 5.73 + 5.74 +#define saveLowTimeStampCountInto(low) \ 5.75 + asm volatile("RDTSC; \ 5.76 + movl %%eax, %0;" \ 5.77 + /* outputs */ : "=m" (low) \ 5.78 + /* inputs */ : \ 5.79 + /* clobber */ : "%eax", "%edx" \ 5.80 + ); 5.81 + 5.82 + //For code that calculates normalization-offset between TSC counts of 5.83 + // different cores. 5.84 +//#define NUM_TSC_ROUND_TRIPS 10 5.85 + 5.86 + 5.87 +#endif /* _VMS__HW_DEPENDENT_H */ 5.88 +
6.1 --- a/VMS__HW_dependent.s Wed Feb 22 11:39:12 2012 -0800 6.2 +++ b/VMS__HW_dependent.s Sun Mar 04 14:26:35 2012 -0800 6.3 @@ -16,13 +16,13 @@ 6.4 // the top-level function, which was pointed to by the stack-ptr 6.5 .globl startUpTopLevelFn 6.6 startUpTopLevelFn: 6.7 - movq %rdi , %rsi #get second argument from first argument of switchVP 6.8 + movq %rdi , %rsi #get second argument from first argument of switchSlv 6.9 movq 0x08(%rsp), %rdi #get first argument from stack 6.10 movq (%rsp) , %rax #get top-level function's addr from stack 6.11 jmp *%rax #jump to the top-level function 6.12 6.13 -//Switches form CoreLoop to VP ether a normal VP or the Master Loop 6.14 -//switch to virt procr's stack and frame ptr then jump to virt procr fn 6.15 +//Switches form CoreLoop to Slv ether a normal Slv or the Master Loop 6.16 +//switch to Slv's stack and frame ptr then jump to Slv fn 6.17 /* SlaveVP offsets: 6.18 * 0x10 stackPtr 6.19 * 0x18 framePtr 6.20 @@ -34,15 +34,15 @@ 6.21 * 0x48 coreLoopReturnPt 6.22 * 0x54 masterLock 6.23 */ 6.24 -.globl switchToVP 6.25 -switchToVP: 6.26 +.globl switchToSlv 6.27 +switchToSlv: 6.28 #SlaveVP in %rdi 6.29 movq %rsp , 0x38(%rdi) #save core loop stack pointer 6.30 movq %rbp , 0x30(%rdi) #save core loop frame pointer 6.31 movq 0x10(%rdi), %rsp #restore stack pointer 6.32 movq 0x18(%rdi), %rbp #restore frame pointer 6.33 movq 0x20(%rdi), %rax #get jmp pointer 6.34 - jmp *%rax #jmp to VP 6.35 + jmp *%rax #jmp to Slv 6.36 coreLoopReturn: 6.37 ret 6.38 6.39 @@ -62,7 +62,7 @@ 6.40 .globl switchToCoreLoop 6.41 switchToCoreLoop: 6.42 #SlaveVP in %rdi 6.43 - movq $VPReturn , 0x20(%rdi) #store return address 6.44 + movq $SlvReturn , 0x20(%rdi) #store return address 6.45 movq %rsp , 0x10(%rdi) #save stack pointer 6.46 movq %rbp , 0x18(%rdi) #save frame pointer 6.47 movq 0x38(%rdi), %rsp #restore stack pointer 6.48 @@ -71,7 +71,7 @@ 6.49 movq (%rcx) , %rcx 6.50 movq 0x48(%rcx), %rax #get CoreLoopStartPt 6.51 jmp *%rax #jmp to CoreLoop 6.52 -VPReturn: 6.53 +SlvReturn: 6.54 ret 6.55 6.56 6.57 @@ -108,10 +108,10 @@ 6.58 6.59 //Switch to terminateCoreLoop 6.60 //therefor switch to coreLoop context from master context 6.61 -// no need to call because the stack is already set up for switchVP 6.62 -// and virtPr is in %rdi 6.63 +// no need to call because the stack is already set up for switchSlv 6.64 +// and Slv is in %rdi 6.65 // and both functions have the same argument. 6.66 -// do not save register of VP because this function will never return 6.67 +// do not save register of Slv because this function will never return 6.68 /* SlaveVP offsets: 6.69 * 0x10 stackPtr 6.70 * 0x18 framePtr 6.71 @@ -134,7 +134,7 @@ 6.72 6.73 /* 6.74 * This one for the sequential version is special. It discards the current stack 6.75 - * and returns directly from the coreLoop after VMS__dissipate_procr was called 6.76 + * and returns directly from the coreLoop after VMS_WL__dissipate_slaveVP was called 6.77 */ 6.78 .globl asmTerminateCoreLoopSeq 6.79 asmTerminateCoreLoopSeq: 6.80 @@ -142,7 +142,7 @@ 6.81 movq 0x38(%rdi), %rsp #restore stack pointer 6.82 movq 0x30(%rdi), %rbp #restore frame pointer 6.83 #argument is in %rdi 6.84 - call VMS__dissipate_procr 6.85 + call VMS_int__dissipate_slaveVP 6.86 movq %rbp , %rsp #goto the coreLoops stack 6.87 pop %rbp #restore the old framepointer 6.88 ret #return from core loop 6.89 @@ -150,18 +150,18 @@ 6.90 6.91 //Assembly code takes the return addr off the stack and saves 6.92 // into the loc pointed to by rdi. The return addr is at 0x8(%rbp) for 64bit 6.93 -.globl asm_save_ret_to_singleton 6.94 -VMS_int__save_return_addr_into_ptd_to_loc: 6.95 +.globl VMS_int__save_return_into_ptd_to_loc_then_do_ret 6.96 +VMS_int__save_return_into_ptd_to_loc_then_do_ret: 6.97 movq 0x8(%rbp), %rax #get ret address, rbp is the same as in the calling function 6.98 - movq %rax, (%rdi) #write ret addr to endInstrAddr field 6.99 + movq %rax, (%rdi) #write ret addr into addr passed as param field 6.100 ret 6.101 6.102 6.103 //Assembly code changes the return addr on the stack to the one 6.104 -// pointed to by the parameter. The stack's return addr is at 0x8(%rbp) 6.105 -.globl asm_write_ret_from_singleton 6.106 -VMS_int__write_return_addr_from_ptd_to_loc: 6.107 - movq (%rdi), %rax #get return addr 6.108 - movq %rax, 0x8(%rbp) #write return addr to the stack of the caller 6.109 +// pointed to by the parameter, then returns. Stack's return addr is at 0x8(%rbp) 6.110 +.globl VMS_int__return_to_addr_in_ptd_to_loc 6.111 +VMS_int__return_to_addr_in_ptd_to_loc: 6.112 + movq (%rdi), %rax #get return addr from addr passed as param 6.113 + movq %rax, 0x8(%rbp) #write return addr to the stack of the caller 6.114 ret 6.115
7.1 --- a/VMS__PI.c Wed Feb 22 11:39:12 2012 -0800 7.2 +++ b/VMS__PI.c Sun Mar 04 14:26:35 2012 -0800 7.3 @@ -17,13 +17,13 @@ 7.4 /* 7.5 */ 7.6 VMSReqst * 7.7 -VMS_PI__take_next_request_out_of( SlaveVP *procrWithReq ) 7.8 +VMS_PI__take_next_request_out_of( SlaveVP *slaveWithReq ) 7.9 { VMSReqst *req; 7.10 7.11 - req = procrWithReq->requests; 7.12 + req = slaveWithReq->requests; 7.13 if( req == NULL ) return NULL; 7.14 7.15 - procrWithReq->requests = procrWithReq->requests->nextReqst; 7.16 + slaveWithReq->requests = slaveWithReq->requests->nextReqst; 7.17 return req; 7.18 } 7.19 7.20 @@ -51,8 +51,8 @@ 7.21 * Do the same for OS calls -- look later at it.. 7.22 */ 7.23 void inline 7.24 -VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingPr, void *semEnv, 7.25 - ResumeVPFnPtr resumePrFnPtr ) 7.26 +VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingSlv, void *semEnv, 7.27 + ResumeSlvFnPtr resumeSlvFnPtr ) 7.28 { VMSSemReq *semReq; 7.29 IntervalProbe *newProbe; 7.30 7.31 @@ -67,9 +67,9 @@ 7.32 newProbe->probeID = 7.33 addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); 7.34 7.35 - requestingPr->dataRetFromReq = newProbe; 7.36 + requestingSlv->dataRetFromReq = newProbe; 7.37 7.38 - (*resumePrFnPtr)( requestingPr, semEnv ); 7.39 + (*resumeSlvFnPtr)( requestingSlv, semEnv ); 7.40 } 7.41 7.42 7.43 @@ -77,7 +77,7 @@ 7.44 * the error message. 7.45 */ 7.46 void 7.47 -VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstPr, VMSExcp *excpData ) 7.48 +VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstSlv, VMSExcp *excpData ) 7.49 { 7.50 printf("%s",msgStr); 7.51 fflush(stdin);
8.1 --- a/VMS__WL.c Wed Feb 22 11:39:12 2012 -0800 8.2 +++ b/VMS__WL.c Sun Mar 04 14:26:35 2012 -0800 8.3 @@ -14,38 +14,30 @@ 8.4 #include "VMS.h" 8.5 8.6 8.7 -/*Anticipating multi-tasking 8.8 - */ 8.9 -void * 8.10 -VMS_WL__give_sem_env_for( SlaveVP *animPr ) 8.11 - { 8.12 - return _VMSMasterEnv->semanticEnv; 8.13 - } 8.14 - 8.15 8.16 /*For this implementation of VMS, it may not make much sense to have the 8.17 * system of requests for creating a new processor done this way.. but over 8.18 * the scope of single-master, multi-master, mult-tasking, OS-implementing, 8.19 * distributed-memory, and so on, this gives VMS implementation a chance to 8.20 - * do stuff before suspend, in the AppVP, and in the Master before the plugin 8.21 + * do stuff before suspend, in the SlaveVP, and in the Master before the plugin 8.22 * is called, as well as in the lang-lib before this is called, and in the 8.23 * plugin. So, this gives both VMS and language implementations a chance to 8.24 * intercept at various points and do order-dependent stuff. 8.25 *Having a standard VMSNewPrReqData struc allows the language to create and 8.26 - * free the struc, while VMS knows how to get the newPr if it wants it, and 8.27 + * free the struc, while VMS knows how to get the newSlv if it wants it, and 8.28 * it lets the lang have lang-specific data related to creation transported 8.29 * to the plugin. 8.30 */ 8.31 void 8.32 -VMS_WL__send_create_procr_req( void *semReqData, SlaveVP *reqstingPr ) 8.33 +VMS_WL__send_create_slaveVP_req( void *semReqData, SlaveVP *reqstingSlv ) 8.34 { VMSReqst req; 8.35 8.36 req.reqType = createReq; 8.37 req.semReqData = semReqData; 8.38 - req.nextReqst = reqstingPr->requests; 8.39 - reqstingPr->requests = &req; 8.40 + req.nextReqst = reqstingSlv->requests; 8.41 + reqstingSlv->requests = &req; 8.42 8.43 - VMS_int__suspend_procr( reqstingPr ); 8.44 + VMS_int__suspend_slaveVP_and_send_req( reqstingSlv ); 8.45 } 8.46 8.47 8.48 @@ -61,24 +53,24 @@ 8.49 *This form is a bit misleading to understand if one is trying to figure out 8.50 * how VMS works -- it looks like a normal function call, but inside it 8.51 * sends a request to the request handler and suspends the processor, which 8.52 - * jumps out of the VMS__dissipate_procr function, and out of all nestings 8.53 + * jumps out of the VMS_WL__dissipate_slaveVP function, and out of all nestings 8.54 * above it, transferring the work of dissipating to the request handler, 8.55 * which then does the actual work -- causing the processor that animated 8.56 * the call of this function to disappear and the "hanging" state of this 8.57 * function to just poof into thin air -- the virtual processor's trace 8.58 * never returns from this call, but instead the virtual processor's trace 8.59 * gets suspended in this call and all the virt processor's state disap- 8.60 - * pears -- making that suspend the last thing in the virt procr's trace. 8.61 + * pears -- making that suspend the last thing in the Slv's trace. 8.62 */ 8.63 void 8.64 -VMS_WL__send_dissipate_req( SlaveVP *procrToDissipate ) 8.65 +VMS_WL__send_dissipate_req( SlaveVP *slaveToDissipate ) 8.66 { VMSReqst req; 8.67 8.68 req.reqType = dissipate; 8.69 - req.nextReqst = procrToDissipate->requests; 8.70 - procrToDissipate->requests = &req; 8.71 + req.nextReqst = slaveToDissipate->requests; 8.72 + slaveToDissipate->requests = &req; 8.73 8.74 - VMS_int__suspend_procr( procrToDissipate ); 8.75 + VMS_int__suspend_slaveVP_and_send_req( slaveToDissipate ); 8.76 } 8.77 8.78 8.79 @@ -95,14 +87,14 @@ 8.80 */ 8.81 inline void 8.82 VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, 8.83 - SlaveVP *callingPr ) 8.84 + SlaveVP *callingSlv ) 8.85 { VMSReqst *req; 8.86 8.87 req = VMS_int__malloc( sizeof(VMSReqst) ); 8.88 req->reqType = semantic; 8.89 req->semReqData = semReqData; 8.90 - req->nextReqst = callingPr->requests; 8.91 - callingPr->requests = req; 8.92 + req->nextReqst = callingSlv->requests; 8.93 + callingSlv->requests = req; 8.94 } 8.95 8.96 /*This inserts the semantic-layer's request data into standard VMS carrier 8.97 @@ -111,28 +103,28 @@ 8.98 *Then it does suspend, to cause request to be sent. 8.99 */ 8.100 inline void 8.101 -VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingPr ) 8.102 +VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingSlv ) 8.103 { VMSReqst req; 8.104 8.105 req.reqType = semantic; 8.106 req.semReqData = semReqData; 8.107 - req.nextReqst = callingPr->requests; 8.108 - callingPr->requests = &req; 8.109 + req.nextReqst = callingSlv->requests; 8.110 + callingSlv->requests = &req; 8.111 8.112 - VMS_int__suspend_procr( callingPr ); 8.113 + VMS_int__suspend_slaveVP_and_send_req( callingSlv ); 8.114 } 8.115 8.116 8.117 inline void 8.118 -VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingPr ) 8.119 +VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingSlv ) 8.120 { VMSReqst req; 8.121 8.122 req.reqType = VMSSemantic; 8.123 req.semReqData = semReqData; 8.124 - req.nextReqst = callingPr->requests; //gab any other preceeding 8.125 - callingPr->requests = &req; 8.126 + req.nextReqst = callingSlv->requests; //gab any other preceeding 8.127 + callingSlv->requests = &req; 8.128 8.129 - VMS_int__suspend_procr( callingPr ); 8.130 + VMS_int__suspend_slaveVP_and_send_req( callingSlv ); 8.131 } 8.132 8.133
9.1 --- a/VMS__int.c Wed Feb 22 11:39:12 2012 -0800 9.2 +++ b/VMS__int.c Sun Mar 04 14:26:35 2012 -0800 9.3 @@ -15,18 +15,18 @@ 9.4 9.5 9.6 inline SlaveVP * 9.7 -VMS_int__create_procr( TopLevelFnPtr fnPtr, void *dataParam ) 9.8 - { SlaveVP *newPr; 9.9 +VMS_int__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam ) 9.10 + { SlaveVP *newSlv; 9.11 void *stackLocs; 9.12 9.13 - newPr = VMS_int__malloc( sizeof(SlaveVP) ); 9.14 + newSlv = VMS_int__malloc( sizeof(SlaveVP) ); 9.15 stackLocs = VMS_int__malloc( VIRT_PROCR_STACK_SIZE ); 9.16 if( stackLocs == 0 ) 9.17 { perror("VMS__malloc stack"); exit(1); } 9.18 9.19 - _VMSMasterEnv->numSlaves += 1; 9.20 + _VMSMasterEnv->numSlavesAlive += 1; 9.21 9.22 - return create_procr_helper( newPr, fnPtr, dataParam, stackLocs ); 9.23 + return create_slaveVP_helper( newSlv, fnPtr, dataParam, stackLocs ); 9.24 } 9.25 9.26 /* "ext" designates that it's for use outside the VMS system -- should only 9.27 @@ -34,59 +34,49 @@ 9.28 * a VMS virtual processor. 9.29 */ 9.30 inline SlaveVP * 9.31 -VMS_ext__create_procr( TopLevelFnPtr fnPtr, void *dataParam ) 9.32 - { SlaveVP *newPr; 9.33 +VMS_ext__create_slaveVP( TopLevelFnPtr fnPtr, void *dataParam ) 9.34 + { SlaveVP *newSlv; 9.35 char *stackLocs; 9.36 9.37 - newPr = malloc( sizeof(SlaveVP) ); 9.38 + newSlv = malloc( sizeof(SlaveVP) ); 9.39 stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); 9.40 if( stackLocs == 0 ) 9.41 { perror("malloc stack"); exit(1); } 9.42 9.43 - return create_procr_helper( newPr, fnPtr, dataParam, stackLocs ); 9.44 + _VMSMasterEnv->numSlavesAlive += 1; 9.45 + 9.46 + return create_slaveVP_helper(newSlv, fnPtr, dataParam, stackLocs); 9.47 } 9.48 9.49 9.50 //=========================================================================== 9.51 /*there is a label inside this function -- save the addr of this label in 9.52 - * the callingPr struc, as the pick-up point from which to start the next 9.53 - * work-unit for that procr. If turns out have to save registers, then 9.54 - * save them in the procr struc too. Then do assembly jump to the CoreLoop's 9.55 - * "done with work-unit" label. The procr struc is in the request in the 9.56 + * the callingSlv struc, as the pick-up point from which to start the next 9.57 + * work-unit for that slave. If turns out have to save registers, then 9.58 + * save them in the slave struc too. Then do assembly jump to the CoreLoop's 9.59 + * "done with work-unit" label. The slave struc is in the request in the 9.60 * slave that animated the just-ended work-unit, so all the state is saved 9.61 * there, and will get passed along, inside the request handler, to the 9.62 - * next work-unit for that procr. 9.63 + * next work-unit for that slave. 9.64 */ 9.65 void 9.66 -VMS_int__suspend_procr( SlaveVP *animatingPr ) 9.67 +VMS_int__suspend_slaveVP_and_send_req( SlaveVP *animatingSlv ) 9.68 { 9.69 9.70 - //The request to master will cause this suspended virt procr to get 9.71 + //The request to master will cause this suspended Slv to get 9.72 // scheduled again at some future point -- to resume, core loop jumps 9.73 // to the resume point (below), which causes restore of saved regs and 9.74 // "return" from this call. 9.75 - //animatingPr->resumeInstrPtr = &&ResumePt; 9.76 + //animatingSlv->resumeInstrPtr = &&ResumePt; 9.77 9.78 - //return ownership of the virt procr and sched slot to Master virt pr 9.79 - animatingPr->schedSlot->workIsDone = TRUE; 9.80 + //return ownership of the Slv and sched slot to Master virt pr 9.81 + animatingSlv->schedSlot->workIsDone = TRUE; 9.82 9.83 - //=========================== Measurement stuff ======================== 9.84 - #ifdef MEAS__TIME_STAMP_SUSP 9.85 - //record time stamp: compare to time-stamp recorded below 9.86 - saveLowTimeStampCountInto( animatingPr->preSuspTSCLow ); 9.87 - #endif 9.88 - //======================================================================= 9.89 - 9.90 - switchToCoreLoop(animatingPr); 9.91 + MEAS__Capture_Pre_Susp_Point; 9.92 + switchToCoreLoop(animatingSlv); 9.93 flushRegisters(); 9.94 - 9.95 - //======================================================================= 9.96 - 9.97 - #ifdef MEAS__TIME_STAMP_SUSP 9.98 - //NOTE: only take low part of count -- do sanity check when take diff 9.99 - saveLowTimeStampCountInto( animatingPr->postSuspTSCLow ); 9.100 - #endif 9.101 - 9.102 + MEAS__Capture_Post_Susp_Point; 9.103 + 9.104 return; 9.105 } 9.106 9.107 @@ -95,19 +85,19 @@ 9.108 * be called from main thread or other thread -- never from code animated by 9.109 * a SlaveVP, nor from a masterVP. 9.110 * 9.111 - *Use this version to dissipate VPs created outside the VMS system. 9.112 + *Use this version to dissipate Slvs created outside the VMS system. 9.113 */ 9.114 void 9.115 -VMS_ext__dissipate_procr( SlaveVP *procrToDissipate ) 9.116 +VMS_ext__dissipate_slaveVP( SlaveVP *slaveToDissipate ) 9.117 { 9.118 //NOTE: dataParam was given to the processor, so should either have 9.119 - // been alloc'd with VMS__malloc, or freed by the level above animPr. 9.120 + // been alloc'd with VMS__malloc, or freed by the level above animSlv. 9.121 //So, all that's left to free here is the stack and the SlaveVP struc 9.122 // itself 9.123 //Note, should not stack-allocate the data param -- no guarantee, in 9.124 // general that creating processor will outlive ones it creates. 9.125 - free( procrToDissipate->startOfStack ); 9.126 - free( procrToDissipate ); 9.127 + free( slaveToDissipate->startOfStack ); 9.128 + free( slaveToDissipate ); 9.129 } 9.130 9.131 9.132 @@ -130,26 +120,32 @@ 9.133 * of dis-owning it. 9.134 */ 9.135 void 9.136 -VMS_int__dissipate_procr( SlaveVP *animatingPr ) 9.137 +VMS_int__dissipate_SlaveVP( SlaveVP *animatingSlv ) 9.138 { 9.139 //dis-own all locations owned by this processor, causing to be freed 9.140 // any locations that it is (was) sole owner of 9.141 -//TODO: implement VMS__malloc system, including "give up ownership" 9.142 9.143 - _VMSMasterEnv->numSlaves -= 1; 9.144 - if( _VMSMasterEnv->numSlaves == 0 ) 9.145 + _VMSMasterEnv->numSlavesAlive -= 1; 9.146 + if( _VMSMasterEnv->numSlavesAlive == 0 ) 9.147 { //no more work, so shutdown 9.148 - VMS_int__shutdown(); //note, creates 4 shut-down processors 9.149 + VMS_SS__shutdown(); //note, creates 4 shut-down processors 9.150 } 9.151 9.152 //NOTE: dataParam was given to the processor, so should either have 9.153 - // been alloc'd with VMS__malloc, or freed by the level above animPr. 9.154 + // been alloc'd with VMS__malloc, or freed by the level above animSlv. 9.155 //So, all that's left to free here is the stack and the SlaveVP struc 9.156 // itself 9.157 //Note, should not stack-allocate initial data -- no guarantee, in 9.158 // general that creating processor will outlive ones it creates. 9.159 - VMS_int__free( animatingPr->startOfStack ); 9.160 - VMS_int__free( animatingPr ); 9.161 + VMS_int__free( animatingSlv->startOfStack ); 9.162 + VMS_int__free( animatingSlv ); 9.163 } 9.164 9.165 +/*Anticipating multi-tasking 9.166 + */ 9.167 +void * 9.168 +VMS_int__give_sem_env_for( SlaveVP *animSlv ) 9.169 + { 9.170 + return _VMSMasterEnv->semanticEnv; 9.171 + } 9.172
10.1 --- a/VMS__startup_and_shutdown.c Wed Feb 22 11:39:12 2012 -0800 10.2 +++ b/VMS__startup_and_shutdown.c Sun Mar 04 14:26:35 2012 -0800 10.3 @@ -12,7 +12,7 @@ 10.4 #include <sys/time.h> 10.5 10.6 #include "VMS.h" 10.7 -#include "VMS__HW_dependent.h" 10.8 +//#include "VMS__HW_dependent.h" 10.9 10.10 10.11 #define thdAttrs NULL 10.12 @@ -34,7 +34,7 @@ 10.13 create_free_list(); 10.14 10.15 void 10.16 -endOSThreadFn( void *initData, SlaveVP *animatingPr ); 10.17 +endOSThreadFn( void *initData, SlaveVP *animatingSlv ); 10.18 10.19 pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER; 10.20 pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER; 10.21 @@ -43,9 +43,9 @@ 10.22 10.23 /*Setup has two phases: 10.24 * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts 10.25 - * the master virt procr into the work-queue, ready for first "call" 10.26 + * the master Slv into the work-queue, ready for first "call" 10.27 * 2) Semantic layer then does its own init, which creates the seed virt 10.28 - * procr inside the semantic layer, ready to schedule it when 10.29 + * slave inside the semantic layer, ready to schedule it when 10.30 * asked by the first run of the masterLoop. 10.31 * 10.32 *This part is bit weird because VMS really wants to be "always there", and 10.33 @@ -54,15 +54,15 @@ 10.34 * 10.35 *The semantic layer is isolated from the VMS internals by making the 10.36 * semantic layer do setup to a state that it's ready with its 10.37 - * initial virt procrs, ready to schedule them to slots when the masterLoop 10.38 + * initial Slvs, ready to schedule them to slots when the masterLoop 10.39 * asks. Without this pattern, the semantic layer's setup would 10.40 * have to modify slots directly to assign the initial virt-procrs, and put 10.41 * them into the readyToAnimateQ itself, breaking the isolation completely. 10.42 * 10.43 * 10.44 - *The semantic layer creates the initial virt procr(s), and adds its 10.45 + *The semantic layer creates the initial Slv(s), and adds its 10.46 * own environment to masterEnv, and fills in the pointers to 10.47 - * the requestHandler and slaveScheduler plug-in functions 10.48 + * the requestHandler and slaveAssigner plug-in functions 10.49 */ 10.50 10.51 /*This allocates VMS data structures, populates the master VMSProc, 10.52 @@ -70,7 +70,7 @@ 10.53 * layer. 10.54 */ 10.55 void 10.56 -VMS_int__init() 10.57 +VMS_SS__init() 10.58 { 10.59 10.60 #ifdef SEQUENTIAL 10.61 @@ -97,24 +97,12 @@ 10.62 //Very first thing put into the master env is the free-list, seeded 10.63 // with a massive initial chunk of memory. 10.64 //After this, all other mallocs are VMS__malloc. 10.65 - _VMSMasterEnv->freeListHead = VMS_ext__create_free_list(); 10.66 + _VMSMasterEnv->freeLists = VMS_ext__create_free_list(); 10.67 10.68 10.69 - //============================= MEASUREMENT STUFF ======================== 10.70 - #ifdef MEAS__TIME_MALLOC 10.71 - _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 100, 0, 30, 10.72 - "malloc_time_hist"); 10.73 - _VMSMasterEnv->freeTimeHist = makeFixedBinHistExt( 100, 0, 30, 10.74 - "free_time_hist"); 10.75 - #endif 10.76 - #ifdef MEAS__TIME_PLUGIN 10.77 - _VMSMasterEnv->reqHdlrLowTimeHist = makeFixedBinHistExt( 100, 0, 200, 10.78 - "plugin_low_time_hist"); 10.79 - _VMSMasterEnv->reqHdlrHighTimeHist = makeFixedBinHistExt( 100, 0, 200, 10.80 - "plugin_high_time_hist"); 10.81 - #endif 10.82 - //======================================================================== 10.83 - 10.84 + MEAS__Make_Meas_Hists_for_Malloc_Meas; 10.85 + MEAS__Make_Meas_Hists_for_Plugin_Meas; 10.86 + 10.87 //===================== Only VMS__malloc after this ==================== 10.88 masterEnv = (MasterEnv*)_VMSMasterEnv; 10.89 10.90 @@ -125,15 +113,15 @@ 10.91 //One array for each core, 3 in array, core's masterVP scheds all 10.92 allSchedSlots = VMS_int__malloc( NUM_CORES * sizeof(SchedSlot *) ); 10.93 10.94 - _VMSMasterEnv->numSlaves = 0; //used to detect shut-down condition 10.95 + _VMSMasterEnv->numSlavesAlive = 0; //used to detect shut-down condition 10.96 10.97 - _VMSMasterEnv->numVPsCreated = 0; //used by create procr to set ID 10.98 + _VMSMasterEnv->numSlavesCreated = 0; //used by create slave to set ID 10.99 for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 10.100 { 10.101 readyToAnimateQs[ coreIdx ] = makeVMSQ(); 10.102 10.103 //Q: should give masterVP core-specific info as its init data? 10.104 - masterVPs[ coreIdx ] = VMS_int__create_procr( (TopLevelFnPtr)&masterLoop, (void*)masterEnv ); 10.105 + masterVPs[ coreIdx ] = VMS_int__create_slaveVP( (TopLevelFnPtr)&masterLoop, (void*)masterEnv ); 10.106 masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx; 10.107 allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core 10.108 _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; 10.109 @@ -146,10 +134,6 @@ 10.110 _VMSMasterEnv->workStealingLock = UNLOCKED; 10.111 10.112 10.113 - //Aug 19, 2010: no longer need to place initial masterVP into queue 10.114 - // because coreLoop now controls -- animates its masterVP when no work 10.115 - 10.116 - 10.117 //============================= MEASUREMENT STUFF ======================== 10.118 #ifdef STATS__TURN_ON_PROBES 10.119 _VMSMasterEnv->dynIntervalProbesInfo = 10.120 @@ -163,14 +147,10 @@ 10.121 _VMSMasterEnv->createPtInSecs = 10.122 timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0); 10.123 #endif 10.124 - #ifdef MEAS__TIME_MASTER_LOCK 10.125 - _VMSMasterEnv->masterLockLowTimeHist = makeFixedBinHist( 50, 0, 2, 10.126 - "master lock low time hist"); 10.127 - _VMSMasterEnv->masterLockHighTimeHist = makeFixedBinHist( 50, 0, 100, 10.128 - "master lock high time hist"); 10.129 - #endif 10.130 10.131 - MakeTheMeasHists(); 10.132 + MEAS__Make_Meas_Hists_for_Master_Lock_Meas 10.133 + 10.134 + MEAS__Make_Meas_Hists_for_Language(); 10.135 //======================================================================== 10.136 } 10.137 10.138 @@ -187,7 +167,7 @@ 10.139 10.140 //Set state to mean "handling requests done, slot needs filling" 10.141 schedSlots[i]->workIsDone = FALSE; 10.142 - schedSlots[i]->needsProcrAssigned = TRUE; 10.143 + schedSlots[i]->needsSlaveAssigned = TRUE; 10.144 } 10.145 return schedSlots; 10.146 } 10.147 @@ -233,17 +213,17 @@ 10.148 10.149 10.150 void 10.151 -VMS_WL__register_request_handler( RequestHandler requestHandler ) 10.152 +VMS_SS__register_request_handler( RequestHandler requestHandler ) 10.153 { _VMSMasterEnv->requestHandler = requestHandler; 10.154 } 10.155 10.156 10.157 void 10.158 -VMS_WL__register_sched_assigner( Sched_Assigner schedAssigner ) 10.159 - { _VMSMasterEnv->slaveSchedAssigner = schedAssigner; 10.160 +VMS_SS__register_sched_assigner( Sched_Assigner schedAssigner ) 10.161 + { _VMSMasterEnv->slaveAssigner = schedAssigner; 10.162 } 10.163 10.164 -VMS_WL__register_semantic_env( void *semanticEnv ) 10.165 +VMS_SS__register_semantic_env( void *semanticEnv ) 10.166 { _VMSMasterEnv->semanticEnv = semanticEnv; 10.167 } 10.168 10.169 @@ -254,7 +234,7 @@ 10.170 *Wrapper lib layer calls this when it wants the system to start running.. 10.171 */ 10.172 void 10.173 -VMS_WL__start_the_work_then_wait_until_done() 10.174 +VMS_SS__start_the_work_then_wait_until_done() 10.175 { 10.176 #ifdef SEQUENTIAL 10.177 /*Only difference between version with an OS thread pinned to each core and 10.178 @@ -293,7 +273,7 @@ 10.179 10.180 //TODO: look at architecting cleanest separation between request handler 10.181 // and master loop, for dissipate, create, shutdown, and other non-semantic 10.182 -// requests. Issue is chain: one removes requests from AppVP, one dispatches 10.183 +// requests. Issue is chain: one removes requests from AppSlv, one dispatches 10.184 // on type of request, and one handles each type.. but some types require 10.185 // action from both request handler and master loop -- maybe just give the 10.186 // request handler calls like: VMS__handle_X_request_type 10.187 @@ -308,7 +288,7 @@ 10.188 *The _VMSMasterEnv is needed by this shut down function, so the create-seed- 10.189 * and-wait function has to free a bunch of stuff after it detects the 10.190 * threads have all died: the masterEnv, the thread-related locations, 10.191 - * masterVP any AppVPs that might still be allocated and sitting in the 10.192 + * masterVP any AppSlvs that might still be allocated and sitting in the 10.193 * semantic environment, or have been orphaned in the _VMSWorkQ. 10.194 * 10.195 *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the 10.196 @@ -318,22 +298,22 @@ 10.197 *In here,create one core-loop shut-down processor for each core loop and put 10.198 * them all directly into the readyToAnimateQ. 10.199 *Note, this function can ONLY be called after the semantic environment no 10.200 - * longer cares if AppVPs get animated after the point this is called. In 10.201 + * longer cares if AppSlvs get animated after the point this is called. In 10.202 * other words, this can be used as an abort, or else it should only be 10.203 - * called when all AppVPs have finished dissipate requests -- only at that 10.204 + * called when all AppSlvs have finished dissipate requests -- only at that 10.205 * point is it sure that all results have completed. 10.206 */ 10.207 void 10.208 -VMS_int__shutdown() 10.209 +VMS_SS__shutdown() 10.210 { int coreIdx; 10.211 - SlaveVP *shutDownPr; 10.212 + SlaveVP *shutDownSlv; 10.213 10.214 //create the shutdown processors, one for each core loop -- put them 10.215 // directly into the Q -- each core will die when gets one 10.216 for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 10.217 { //Note, this is running in the master 10.218 - shutDownPr = VMS_int__create_procr( &endOSThreadFn, NULL ); 10.219 - writeVMSQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] ); 10.220 + shutDownSlv = VMS_int__create_slaveVP( &endOSThreadFn, NULL ); 10.221 + writeVMSQ( shutDownSlv, _VMSMasterEnv->readyToAnimateQs[coreIdx] ); 10.222 } 10.223 10.224 } 10.225 @@ -348,78 +328,37 @@ 10.226 * up just as if it never jumped out, before calling pthread_exit. 10.227 *The end-point of core loop will free the stack and so forth of the 10.228 * processor that animates this function, (this fn is transfering the 10.229 - * animator of the AppVP that is in turn animating this function over 10.230 + * animator of the AppSlv that is in turn animating this function over 10.231 * to core loop function -- note that this slices out a level of virtual 10.232 * processors). 10.233 */ 10.234 void 10.235 -endOSThreadFn( void *initData, SlaveVP *animatingPr ) 10.236 +endOSThreadFn( void *initData, SlaveVP *animatingSlv ) 10.237 { 10.238 -#ifdef SEQUENTIAL 10.239 - asmTerminateCoreLoopSeq(animatingPr); 10.240 -#else 10.241 - asmTerminateCoreLoop(animatingPr); 10.242 -#endif 10.243 + #ifdef SEQUENTIAL 10.244 + asmTerminateCoreLoopSeq(animatingSlv); 10.245 + #else 10.246 + asmTerminateCoreLoop(animatingSlv); 10.247 + #endif 10.248 } 10.249 10.250 10.251 /*This is called from the startup & shutdown 10.252 */ 10.253 void 10.254 -VMS_int__cleanup_at_end_of_shutdown() 10.255 +VMS_SS__cleanup_at_end_of_shutdown() 10.256 { 10.257 - //unused 10.258 - //VMSQueueStruc **readyToAnimateQs; 10.259 - //int coreIdx; 10.260 - //SlaveVP **masterVPs; 10.261 - //SchedSlot ***allSchedSlots; //ptr to array of ptrs 10.262 - 10.263 //Before getting rid of everything, print out any measurements made 10.264 forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist ); 10.265 forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile); 10.266 forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist ); 10.267 - #ifdef MEAS__TIME_PLUGIN 10.268 - printHist( _VMSMasterEnv->reqHdlrLowTimeHist ); 10.269 - saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist ); 10.270 - printHist( _VMSMasterEnv->reqHdlrHighTimeHist ); 10.271 - saveHistToFile( _VMSMasterEnv->reqHdlrHighTimeHist ); 10.272 - freeHistExt( _VMSMasterEnv->reqHdlrLowTimeHist ); 10.273 - freeHistExt( _VMSMasterEnv->reqHdlrHighTimeHist ); 10.274 - #endif 10.275 - #ifdef MEAS__TIME_MALLOC 10.276 - printHist( _VMSMasterEnv->mallocTimeHist ); 10.277 - saveHistToFile( _VMSMasterEnv->mallocTimeHist ); 10.278 - printHist( _VMSMasterEnv->freeTimeHist ); 10.279 - saveHistToFile( _VMSMasterEnv->freeTimeHist ); 10.280 - freeHistExt( _VMSMasterEnv->mallocTimeHist ); 10.281 - freeHistExt( _VMSMasterEnv->freeTimeHist ); 10.282 - #endif 10.283 - #ifdef MEAS__TIME_MASTER_LOCK 10.284 - printHist( _VMSMasterEnv->masterLockLowTimeHist ); 10.285 - printHist( _VMSMasterEnv->masterLockHighTimeHist ); 10.286 - #endif 10.287 - #ifdef MEAS__TIME_MASTER 10.288 - printHist( _VMSMasterEnv->pluginTimeHist ); 10.289 - for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 10.290 - { 10.291 - freeVMSQ( readyToAnimateQs[ coreIdx ] ); 10.292 - //master VPs were created external to VMS, so use external free 10.293 - VMS_int__dissipate_procr( masterVPs[ coreIdx ] ); 10.294 - 10.295 - freeSchedSlots( allSchedSlots[ coreIdx ] ); 10.296 - } 10.297 - #endif 10.298 - #ifdef MEAS__TIME_STAMP_SUSP 10.299 - printHist( _VMSMasterEnv->pluginTimeHist ); 10.300 - for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 10.301 - { 10.302 - freeVMSQ( readyToAnimateQs[ coreIdx ] ); 10.303 - //master VPs were created external to VMS, so use external free 10.304 - VMS_int__dissipate_procr( masterVPs[ coreIdx ] ); 10.305 - 10.306 - freeSchedSlots( allSchedSlots[ coreIdx ] ); 10.307 - } 10.308 - #endif 10.309 + 10.310 + MEAS__Print_Hists_for_Susp_Meas; 10.311 + MEAS__Print_Hists_for_Master_Meas; 10.312 + MEAS__Print_Hists_for_Master_Lock_Meas; 10.313 + MEAS__Print_Hists_for_Malloc_Meas; 10.314 + MEAS__Print_Hists_for_Plugin_Meas; 10.315 + 10.316 10.317 //All the environment data has been allocated with VMS__malloc, so just 10.318 // free its internal big-chunk and all inside it disappear. 10.319 @@ -431,24 +370,24 @@ 10.320 for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 10.321 { 10.322 freeVMSQ( readyToAnimateQs[ coreIdx ] ); 10.323 - //master VPs were created external to VMS, so use external free 10.324 - VMS__dissipate_procr( masterVPs[ coreIdx ] ); 10.325 + //master Slvs were created external to VMS, so use external free 10.326 + VMS_int__dissipate_slaveVP( masterVPs[ coreIdx ] ); 10.327 10.328 freeSchedSlots( allSchedSlots[ coreIdx ] ); 10.329 } 10.330 10.331 - VMS__free( _VMSMasterEnv->readyToAnimateQs ); 10.332 - VMS__free( _VMSMasterEnv->masterVPs ); 10.333 - VMS__free( _VMSMasterEnv->allSchedSlots ); 10.334 + VMS_int__free( _VMSMasterEnv->readyToAnimateQs ); 10.335 + VMS_int__free( _VMSMasterEnv->masterVPs ); 10.336 + VMS_int__free( _VMSMasterEnv->allSchedSlots ); 10.337 10.338 //============================= MEASUREMENT STUFF ======================== 10.339 #ifdef STATS__TURN_ON_PROBES 10.340 - freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS__free_probe); 10.341 + freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS_WL__free_probe); 10.342 #endif 10.343 //======================================================================== 10.344 */ 10.345 //These are the only two that use system free 10.346 - VMS_ext__free_free_list( _VMSMasterEnv->freeListHead ); 10.347 + VMS_ext__free_free_list( _VMSMasterEnv->freeLists ); 10.348 free( (void *)_VMSMasterEnv ); 10.349 } 10.350
11.1 --- a/VMS_defs__HW_specific.h Wed Feb 22 11:39:12 2012 -0800 11.2 +++ b/VMS_defs__HW_specific.h Sun Mar 04 14:26:35 2012 -0800 11.3 @@ -27,7 +27,7 @@ 11.4 // stack size in virtual processors created 11.5 #define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */ 11.6 11.7 - // memory for VMS__malloc 11.8 + // memory for VMS_WL__malloc 11.9 #define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x40000000 /* 1G */ 11.10 11.11 //Frequency of TS counts -- have to do tests to verify
12.1 --- a/VMS_defs__lang_specific.h Wed Feb 22 11:39:12 2012 -0800 12.2 +++ b/VMS_defs__lang_specific.h Sun Mar 04 14:26:35 2012 -0800 12.3 @@ -13,80 +13,8 @@ 12.4 12.5 //=================== Language-specific Measurement Stuff =================== 12.6 // 12.7 -//TODO: Figure out way to move these into language dir.. 12.8 -// wrap them in #ifdef MEAS__... 12.9 +//TODO: move these into the language implementation directories 12.10 // 12.11 -#ifndef MAKE_HISTS_FOR_MEASUREMENTS 12.12 -#define MakeTheMeasHists() 12.13 -#endif 12.14 - 12.15 -//=========================================================================== 12.16 -//VPThread 12.17 -#ifdef VTHREAD 12.18 - 12.19 -#define createHistIdx 1 //note: starts at 1 12.20 -#define mutexLockHistIdx 2 12.21 -#define mutexUnlockHistIdx 3 12.22 -#define condWaitHistIdx 4 12.23 -#define condSignalHistIdx 5 12.24 - 12.25 -#define MakeTheMeasHists() \ 12.26 - _VMSMasterEnv->measHistsInfo = \ 12.27 - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 12.28 - makeAMeasHist( createHistIdx, "create", 250, 0, 100 ) \ 12.29 - makeAMeasHist( mutexLockHistIdx, "mutex_lock", 50, 0, 100 ) \ 12.30 - makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock", 50, 0, 100 ) \ 12.31 - makeAMeasHist( condWaitHistIdx, "cond_wait", 50, 0, 100 ) \ 12.32 - makeAMeasHist( condSignalHistIdx, "cond_signal", 50, 0, 100 ) 12.33 - 12.34 - 12.35 -#define Meas_startCreate \ 12.36 - int32 startStamp, endStamp; \ 12.37 - saveLowTimeStampCountInto( startStamp ); \ 12.38 - 12.39 -#define Meas_endCreate \ 12.40 - saveLowTimeStampCountInto( endStamp ); \ 12.41 - addIntervalToHist( startStamp, endStamp, \ 12.42 - _VMSMasterEnv->measHists[ createHistIdx ] ); 12.43 - 12.44 -#define Meas_startMutexLock \ 12.45 - int32 startStamp, endStamp; \ 12.46 - saveLowTimeStampCountInto( startStamp ); \ 12.47 - 12.48 -#define Meas_endMutexLock \ 12.49 - saveLowTimeStampCountInto( endStamp ); \ 12.50 - addIntervalToHist( startStamp, endStamp, \ 12.51 - _VMSMasterEnv->measHists[ mutexLockHistIdx ] ); 12.52 - 12.53 -#define Meas_startMutexUnlock \ 12.54 - int32 startStamp, endStamp; \ 12.55 - saveLowTimeStampCountInto( startStamp ); \ 12.56 - 12.57 -#define Meas_endMutexUnlock \ 12.58 - saveLowTimeStampCountInto( endStamp ); \ 12.59 - addIntervalToHist( startStamp, endStamp, \ 12.60 - _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] ); 12.61 - 12.62 -#define Meas_startCondWait \ 12.63 - int32 startStamp, endStamp; \ 12.64 - saveLowTimeStampCountInto( startStamp ); \ 12.65 - 12.66 -#define Meas_endCondWait \ 12.67 - saveLowTimeStampCountInto( endStamp ); \ 12.68 - addIntervalToHist( startStamp, endStamp, \ 12.69 - _VMSMasterEnv->measHists[ condWaitHistIdx ] ); 12.70 - 12.71 -#define Meas_startCondSignal \ 12.72 - int32 startStamp, endStamp; \ 12.73 - saveLowTimeStampCountInto( startStamp ); \ 12.74 - 12.75 -#define Meas_endCondSignal \ 12.76 - saveLowTimeStampCountInto( endStamp ); \ 12.77 - addIntervalToHist( startStamp, endStamp, \ 12.78 - _VMSMasterEnv->measHists[ condSignalHistIdx ] ); 12.79 - 12.80 -#endif 12.81 - 12.82 12.83 12.84 //=========================================================================== 12.85 @@ -97,7 +25,7 @@ 12.86 #define spawnHistIdx 1 //note: starts at 1 12.87 #define syncHistIdx 2 12.88 12.89 -#define MakeTheMeasHists() \ 12.90 +#define MEAS__Make_Meas_Hists_for_Language() \ 12.91 _VMSMasterEnv->measHistsInfo = \ 12.92 makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 12.93 makeAMeasHist( spawnHistIdx, "Spawn", 50, 0, 200 ) \ 12.94 @@ -133,7 +61,7 @@ 12.95 #define ReceiveFromToHistIdx 3 12.96 #define ReceiveOfTypeHistIdx 4 12.97 12.98 -#define MakeTheMeasHists() \ 12.99 +#define MEAS__Make_Meas_Hists_for_Language() \ 12.100 _VMSMasterEnv->measHistsInfo = \ 12.101 makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 12.102 makeAMeasHist( SendFromToHistIdx, "SendFromTo", 50, 0, 100 ) \
13.1 --- a/VMS_defs__main.h Wed Feb 22 11:39:12 2012 -0800 13.2 +++ b/VMS_defs__main.h Sun Mar 04 14:26:35 2012 -0800 13.3 @@ -6,8 +6,8 @@ 13.4 * 13.5 */ 13.6 13.7 -#ifndef _VMS_DEFS_H 13.8 -#define _VMS_DEFS_H 13.9 +#ifndef _VMS_DEFS_MAIN_H 13.10 +#define _VMS_DEFS_MAIN_H 13.11 #define _GNU_SOURCE 13.12 13.13 //=========================== VMS-wide defs =============================== 13.14 @@ -19,7 +19,7 @@ 13.15 // so these defs can be at the top, and writePrivQ defined later on.. 13.16 #define writeVMSQ writePrivQ 13.17 #define readVMSQ readPrivQ 13.18 -#define makeVMSQ makeVMSPrivQ 13.19 +#define makeVMSQ makePrivQ 13.20 #define numInVMSQ numInPrivQ 13.21 #define VMSQueueStruc PrivQueueStruc 13.22 13.23 @@ -31,21 +31,21 @@ 13.24 // 13.25 //When SEQUENTIAL is defined, VMS does sequential exe in the main thread 13.26 // It still does co-routines and all the mechanisms are the same, it just 13.27 -// has only a single thread and animates VPs one at a time 13.28 +// has only a single thread and animates Slvs one at a time 13.29 //#define SEQUENTIAL 13.30 13.31 //#define USE_WORK_STEALING 13.32 13.33 //turns on the probe-instrumentation in the application -- when not 13.34 // defined, the calls to the probe functions turn into comments 13.35 -#define STATS__ENABLE_PROBES 13.36 +//#define STATS__ENABLE_PROBES 13.37 //#define TURN_ON_DEBUG_PROBES 13.38 13.39 //These defines turn types of bug messages on and off 13.40 // be sure debug messages are un-commented (next block of defines) 13.41 #define dbgAppFlow TRUE /* Top level flow of application code -- general*/ 13.42 #define dbgProbes FALSE /* for issues inside probes themselves*/ 13.43 -#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ 13.44 +#define dbgB2BMaster FALSE /* in coreloop, back to back master Slvs*/ 13.45 #define dbgRqstHdlr FALSE /* in request handler code*/ 13.46 13.47 //Comment or un- the substitute half to turn on/off types of debug message 13.48 @@ -74,112 +74,310 @@ 13.49 13.50 //================== Turn Measurement Things on and off ==================== 13.51 13.52 -//#define MEAS__TIME_2011_SYS 13.53 +//#define MEAS__TURN_ON_SYSTEM_MEAS 13.54 13.55 -//define this if any MEAS__... below are 13.56 -//#define MAKE_HISTS_FOR_MEASUREMENTS 13.57 - //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and 13.58 - // compiled-in that saves the low part of the time stamp count just before 13.59 - // suspending a processor and just after resuming that processor. It is 13.60 - // saved into a field added to VirtProcr. Have to sanity-check for 13.61 - // rollover of low portion into high portion. 13.62 -//#define MEAS__TIME_STAMP_SUSP 13.63 -//#define MEAS__TIME_MASTER 13.64 -//#define MEAS__TIME_PLUGIN 13.65 -//#define MEAS__TIME_MALLOC 13.66 -//#define MEAS__TIME_MASTER_LOCK 13.67 +/*NOTE: define MEAS__TURN_ON_MAKE_HISTS if any other MEAS__... below are*/ 13.68 +//#define MEAS__TURN_ON_MAKE_HISTS 13.69 13.70 - //For code that calculates normalization-offset between TSC counts of 13.71 - // different cores. 13.72 -//#define NUM_TSC_ROUND_TRIPS 10 13.73 +//#define MEAS__TURN_ON_SUSP_MEAS 13.74 +//#define MEAS__TURN_ON_MASTER_MEAS 13.75 +//#define MEAS__TURN_ON_PLUGIN_MEAS 13.76 +//#define MEAS__TURN_ON_MALLOC_MEAS 13.77 +//#define MEAS__TURN_ON_MASTER_LOCK_MEAS 13.78 13.79 + /*turn on/off subtraction of create measurements from plugin meas*/ 13.80 +//#define MEAS__TURN_ON_EXCLUDE_CREATION_TIME 13.81 13.82 13.83 -//=================== Macros to Capture Measurements ====================== 13.84 -// 13.85 -//===== RDTSC wrapper ===== 13.86 -//Also runs with x86_64 code 13.87 -#define saveTSCLowHigh(lowHighIn) \ 13.88 - asm volatile("RDTSC; \ 13.89 - movl %%eax, %0; \ 13.90 - movl %%edx, %1;" \ 13.91 - /* outputs */ : "=m" (lowHighIn.lowHigh[0]), "=m" (lowHighIn.lowHigh[1])\ 13.92 - /* inputs */ : \ 13.93 - /* clobber */ : "%eax", "%edx" \ 13.94 - ); 13.95 - 13.96 -#define saveTimeStampCountInto(low, high) \ 13.97 - asm volatile("RDTSC; \ 13.98 - movl %%eax, %0; \ 13.99 - movl %%edx, %1;" \ 13.100 - /* outputs */ : "=m" (low), "=m" (high)\ 13.101 - /* inputs */ : \ 13.102 - /* clobber */ : "%eax", "%edx" \ 13.103 - ); 13.104 - 13.105 -#define saveLowTimeStampCountInto(low) \ 13.106 - asm volatile("RDTSC; \ 13.107 - movl %%eax, %0;" \ 13.108 - /* outputs */ : "=m" (low) \ 13.109 - /* inputs */ : \ 13.110 - /* clobber */ : "%eax", "%edx" \ 13.111 - ); 13.112 - 13.113 13.114 //================== Macros define types of meas want ===================== 13.115 -#ifdef MEAS__TIME_PLUGIN 13.116 13.117 -#define Meas_startReqHdlr \ 13.118 - int32 startStamp1, endStamp1; \ 13.119 - saveLowTimeStampCountInto( startStamp1 ); 13.120 +#ifdef MEAS__TURN_ON_SUSP_MEAS 13.121 + #define MEAS__Insert_Susp_Meas_Fields_into_Slave \ 13.122 + uint32 preSuspTSCLow; \ 13.123 + uint32 postSuspTSCLow; 13.124 13.125 -#define Meas_endReqHdlr \ 13.126 - saveLowTimeStampCountInto( endStamp1 ); \ 13.127 - addIntervalToHist( startStamp1, endStamp1, \ 13.128 - _VMSMasterEnv->reqHdlrLowTimeHist ); \ 13.129 - addIntervalToHist( startStamp1, endStamp1, \ 13.130 - _VMSMasterEnv->reqHdlrHighTimeHist ); 13.131 - 13.132 -#elif defined MEAS__TIME_2011_SYS 13.133 -#define Meas_startMasterLoop \ 13.134 - TSCountLowHigh startStamp1, endStamp1; \ 13.135 - saveTSCLowHigh( endStamp1 ); \ 13.136 - _VMSMasterEnv->cyclesTillStartMasterLoop = \ 13.137 - endStamp1.longVal - masterVP->startSusp.longVal; 13.138 + #define MEAS__Insert_Susp_Meas_Fields_into_MasterEnv \ 13.139 + Histogram *suspLowTimeHist; \ 13.140 + Histogram *suspHighTimeHist; 13.141 13.142 -#define Meas_startReqHdlr \ 13.143 - saveTSCLowHigh( startStamp1 ); \ 13.144 - _VMSMasterEnv->startReqHdlr.longVal = startStamp1.longVal; 13.145 + #define MEAS__Make_Meas_Hists_for_Susp_Meas \ 13.146 + _VMSMasterEnv->suspLowTimeHist = makeFixedBinHistExt( 100, 0, 200,\ 13.147 + "master_low_time_hist");\ 13.148 + _VMSMasterEnv->suspHighTimeHist = makeFixedBinHistExt( 100, 0, 200,\ 13.149 + "master_high_time_hist"); 13.150 + 13.151 + //record time stamp: compare to time-stamp recorded below 13.152 + #define MEAS__Capture_Pre_Susp_Point \ 13.153 + saveLowTimeStampCountInto( animatingSlv->preSuspTSCLow ); 13.154 + 13.155 + //NOTE: only take low part of count -- do sanity check when take diff 13.156 + #define MEAS__Capture_Post_Susp_Point \ 13.157 + saveLowTimeStampCountInto( animatingSlv->postSuspTSCLow );\ 13.158 + addIntervalToHist( preSuspTSCLow, postSuspTSCLow,\ 13.159 + _VMSMasterEnv->suspLowTimeHist ); \ 13.160 + addIntervalToHist( preSuspTSCLow, postSuspTSCLow,\ 13.161 + _VMSMasterEnv->suspHighTimeHist ); 13.162 13.163 -#define Meas_endReqHdlr 13.164 + #define MEAS__Print_Hists_for_Susp_Meas \ 13.165 + printHist( _VMSMasterEnv->pluginTimeHist ); 13.166 + 13.167 +#else 13.168 + #define MEAS__Insert_Susp_Meas_Fields_into_Slave 13.169 + #define MEAS__Insert_Susp_Meas_Fields_into_MasterEnv 13.170 + #define MEAS__Make_Meas_Hists_for_Susp_Meas 13.171 + #define MEAS__Capture_Pre_Susp_Point 13.172 + #define MEAS__Capture_Post_Susp_Point 13.173 + #define MEAS__Print_Hists_for_Susp_Meas 13.174 +#endif 13.175 13.176 -#define Meas_endMasterLoop \ 13.177 - saveTSCLowHigh( startStamp1 ); \ 13.178 - _VMSMasterEnv->endMasterLoop.longVal = startStamp1.longVal; 13.179 +#ifdef MEAS__TURN_ON_MASTER_MEAS 13.180 + #define MEAS__Insert_Master_Meas_Fields_into_Slave \ 13.181 + uint32 startMasterTSCLow; \ 13.182 + uint32 endMasterTSCLow; 13.183 + 13.184 + #define MEAS__Insert_Master_Meas_Fields_into_MasterEnv \ 13.185 + Histogram *masterLowTimeHist; \ 13.186 + Histogram *masterHighTimeHist; 13.187 + 13.188 + #define MEAS__Make_Meas_Hists_for_Master_Meas \ 13.189 + _VMSMasterEnv->masterLowTimeHist = makeFixedBinHistExt( 100, 0, 200,\ 13.190 + "master_low_time_hist");\ 13.191 + _VMSMasterEnv->masterHighTimeHist = makeFixedBinHistExt( 100, 0, 200,\ 13.192 + "master_high_time_hist"); 13.193 + 13.194 + //Total Master time includes one coreloop time -- just assume the core 13.195 + // loop time is same for Master as for AppSlvs, even though it may be 13.196 + // smaller due to higher predictability of the fixed jmp. 13.197 + #define MEAS__Capture_Pre_Master_Point\ 13.198 + saveLowTimeStampCountInto( masterVP->startMasterTSCLow ); 13.199 + 13.200 + #define MEAS__Capture_Post_Master_Point \ 13.201 + saveLowTimeStampCountInto( masterVP->endMasterTSCLow );\ 13.202 + addIntervalToHist( startMasterTSCLow, endMasterTSCLow,\ 13.203 + _VMSMasterEnv->masterLowTimeHist ); \ 13.204 + addIntervalToHist( startMasterTSCLow, endMasterTSCLow,\ 13.205 + _VMSMasterEnv->masterHighTimeHist ); 13.206 + 13.207 + #define MEAS__Print_Hists_for_Master_Meas \ 13.208 + printHist( _VMSMasterEnv->pluginTimeHist ); 13.209 13.210 #else 13.211 -#define Meas_startMasterLoop 13.212 -#define Meas_startReqHdlr 13.213 -#define Meas_endReqHdlr 13.214 -#define Meas_endMasterLoop 13.215 + #define MEAS__Insert_Master_Meas_Fields_into_Slave 13.216 + #define MEAS__Insert_Master_Meas_Fields_into_MasterEnv 13.217 + #define MEAS__Make_Meas_Hists_for_Master_Meas 13.218 + #define MEAS__Capture_Pre_Master_Point 13.219 + #define MEAS__Capture_Post_Master_Point 13.220 + #define MEAS__Print_Hists_for_Master_Meas 13.221 #endif 13.222 13.223 + 13.224 +#ifdef MEAS__TURN_ON_MASTER_LOCK_MEAS 13.225 + #define MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv \ 13.226 + Histogram *masterLockLowTimeHist; \ 13.227 + Histogram *masterLockHighTimeHist; 13.228 + 13.229 + #define MEAS__Make_Meas_Hists_for_Master_Lock_Meas \ 13.230 + _VMSMasterEnv->masterLockLowTimeHist = makeFixedBinHist( 50, 0, 2, \ 13.231 + "master lock low time hist");\ 13.232 + _VMSMasterEnv->masterLockHighTimeHist = makeFixedBinHist( 50, 0, 100,\ 13.233 + "master lock high time hist"); 13.234 + 13.235 + #define MEAS__Capture_Pre_Master_Lock_Point \ 13.236 + int32 startStamp, endStamp; \ 13.237 + saveLowTimeStampCountInto( startStamp ); 13.238 + 13.239 + #define MEAS__Capture_Post_Master_Lock_Point \ 13.240 + saveLowTimeStampCountInto( endStamp ); \ 13.241 + addIntervalToHist( startStamp, endStamp,\ 13.242 + _VMSMasterEnv->masterLockLowTimeHist ); \ 13.243 + addIntervalToHist( startStamp, endStamp,\ 13.244 + _VMSMasterEnv->masterLockHighTimeHist ); 13.245 + 13.246 + #define MEAS__Print_Hists_for_Master_Lock_Meas \ 13.247 + printHist( _VMSMasterEnv->masterLockLowTimeHist ); \ 13.248 + printHist( _VMSMasterEnv->masterLockHighTimeHist ); 13.249 + 13.250 +#else 13.251 + #define MEAS__Insert_Master_Lock_Meas_Fields_into_MasterEnv 13.252 + #define MEAS__Make_Meas_Hists_for_Master_Lock_Meas 13.253 + #define MEAS__Capture_Pre_Master_Lock_Point 13.254 + #define MEAS__Capture_Post_Master_Lock_Point 13.255 + #define MEAS__Print_Hists_for_Master_Lock_Meas 13.256 +#endif 13.257 + 13.258 + 13.259 +#ifdef MEAS__TURN_ON_MALLOC_MEAS 13.260 + #define MEAS__Insert_Malloc_Meas_Fields_into_MasterEnv\ 13.261 + Histogram *mallocTimeHist; \ 13.262 + Histogram *freeTimeHist; 13.263 + 13.264 + #define MEAS__Make_Meas_Hists_for_Malloc_Meas \ 13.265 + _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 100, 0, 30,\ 13.266 + "malloc_time_hist");\ 13.267 + _VMSMasterEnv->freeTimeHist = makeFixedBinHistExt( 100, 0, 30,\ 13.268 + "free_time_hist"); 13.269 + 13.270 + #define MEAS__Capture_Pre_Malloc_Point \ 13.271 + int32 startStamp, endStamp; \ 13.272 + saveLowTimeStampCountInto( startStamp ); 13.273 + 13.274 + #define MEAS__Capture_Post_Malloc_Point \ 13.275 + saveLowTimeStampCountInto( endStamp ); \ 13.276 + addIntervalToHist( startStamp, endStamp,\ 13.277 + _VMSMasterEnv->mallocTimeHist ); 13.278 + 13.279 + #define MEAS__Capture_Pre_Free_Point \ 13.280 + int32 startStamp, endStamp; \ 13.281 + saveLowTimeStampCountInto( startStamp ); 13.282 + 13.283 + #define MEAS__Capture_Post_Free_Point \ 13.284 + saveLowTimeStampCountInto( endStamp ); \ 13.285 + addIntervalToHist( startStamp, endStamp,\ 13.286 + _VMSMasterEnv->freeTimeHist ); 13.287 + 13.288 + #define MEAS__Print_Hists_for_Malloc_Meas \ 13.289 + printHist( _VMSMasterEnv->mallocTimeHist ); \ 13.290 + saveHistToFile( _VMSMasterEnv->mallocTimeHist ); \ 13.291 + printHist( _VMSMasterEnv->freeTimeHist ); \ 13.292 + saveHistToFile( _VMSMasterEnv->freeTimeHist ); \ 13.293 + freeHistExt( _VMSMasterEnv->mallocTimeHist ); \ 13.294 + freeHistExt( _VMSMasterEnv->freeTimeHist ); 13.295 + 13.296 +#else 13.297 + #define MEAS__Insert_Malloc_Meas_Fields_into_MasterEnv 13.298 + #define MEAS__Make_Meas_Hists_for_Malloc_Meas 13.299 + #define MEAS__Capture_Pre_Malloc_Point 13.300 + #define MEAS__Capture_Post_Malloc_Point 13.301 + #define MEAS__Capture_Pre_Free_Point 13.302 + #define MEAS__Capture_Post_Free_Point 13.303 + #define MEAS__Print_Hists_for_Malloc_Meas 13.304 +#endif 13.305 + 13.306 + 13.307 +#ifdef MEAS__TURN_ON_SYSTEM_MEAS 13.308 + #define MEAS__Insert_System_Meas_Fields_into_Slave \ 13.309 + TSCountLowHigh startSusp; \ 13.310 + uint64 totalSuspCycles; \ 13.311 + uint32 numGoodSusp; 13.312 + 13.313 + #define MEAS__Insert_System_Meas_Fields_into_MasterEnv \ 13.314 + TSCountLowHigh startMaster; \ 13.315 + uint64 totalMasterCycles; \ 13.316 + uint32 numMasterAnimations; \ 13.317 + TSCountLowHigh startReqHdlr; \ 13.318 + uint64 totalPluginCycles; \ 13.319 + uint32 numPluginAnimations; \ 13.320 + uint64 cyclesTillStartMasterLoop; \ 13.321 + TSCountLowHigh endMasterLoop; 13.322 + 13.323 +#else 13.324 + #define MEAS__Insert_System_Meas_Fields_into_Slave 13.325 + #define MEAS__Insert_System_Meas_Fields_into_MasterEnv 13.326 +#endif 13.327 + 13.328 + 13.329 +/*This macro's a bit weird -- the same macro is defined in three different 13.330 + * ways, depending upon which defines are turned on 13.331 + *That's because added the system meas, which interferes with plugin meas, 13.332 + * but don't want to make plugin meas stop working.. this is compromise 13.333 + */ 13.334 +#ifdef MEAS__TURN_ON_PLUGIN_MEAS 13.335 + #define MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv \ 13.336 + Histogram *reqHdlrLowTimeHist; \ 13.337 + Histogram *reqHdlrHighTimeHist; 13.338 + 13.339 + #define MEAS__Make_Meas_Hists_for_Plugin_Meas \ 13.340 + _VMSMasterEnv->reqHdlrLowTimeHist = makeFixedBinHistExt( 100, 0, 200,\ 13.341 + "plugin_low_time_hist");\ 13.342 + _VMSMasterEnv->reqHdlrHighTimeHist = makeFixedBinHistExt( 100, 0, 200,\ 13.343 + "plugin_high_time_hist"); 13.344 + 13.345 + #define Meas_startReqHdlr \ 13.346 + int32 startStamp1, endStamp1; \ 13.347 + saveLowTimeStampCountInto( startStamp1 ); 13.348 + 13.349 + #define Meas_endReqHdlr \ 13.350 + saveLowTimeStampCountInto( endStamp1 ); \ 13.351 + addIntervalToHist( startStamp1, endStamp1, \ 13.352 + _VMSMasterEnv->reqHdlrLowTimeHist ); \ 13.353 + addIntervalToHist( startStamp1, endStamp1, \ 13.354 + _VMSMasterEnv->reqHdlrHighTimeHist ); 13.355 + 13.356 + #define MEAS__Print_Hists_for_Plugin_Meas \ 13.357 + printHist( _VMSMasterEnv->reqHdlrLowTimeHist ); \ 13.358 + saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist ); \ 13.359 + printHist( _VMSMasterEnv->reqHdlrHighTimeHist ); \ 13.360 + saveHistToFile( _VMSMasterEnv->reqHdlrHighTimeHist ); \ 13.361 + freeHistExt( _VMSMasterEnv->reqHdlrLowTimeHist ); \ 13.362 + freeHistExt( _VMSMasterEnv->reqHdlrHighTimeHist ); 13.363 + 13.364 +#elif defined MEAS__TURN_ON_SYSTEM_MEAS 13.365 + #define MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv 13.366 + 13.367 + #define MEAS__Make_Meas_Hists_for_Plugin_Meas 13.368 + 13.369 + #define Meas_startMasterLoop \ 13.370 + TSCountLowHigh startStamp1, endStamp1; \ 13.371 + saveTSCLowHigh( endStamp1 ); \ 13.372 + _VMSMasterEnv->cyclesTillStartMasterLoop = \ 13.373 + endStamp1.longVal - masterVP->startSusp.longVal; 13.374 + 13.375 + #define Meas_endMasterLoop \ 13.376 + saveTSCLowHigh( startStamp1 ); \ 13.377 + _VMSMasterEnv->endMasterLoop.longVal = startStamp1.longVal; 13.378 + 13.379 + #define Meas_startReqHdlr \ 13.380 + saveTSCLowHigh( startStamp1 ); \ 13.381 + _VMSMasterEnv->startReqHdlr.longVal = startStamp1.longVal; 13.382 + 13.383 + #define Meas_endReqHdlr 13.384 + 13.385 + #define MEAS__Print_Hists_for_Plugin_Meas 13.386 + 13.387 +#else 13.388 + #define MEAS__Insert_Plugin_Meas_Fields_into_MasterEnv 13.389 + #define MEAS__Make_Meas_Hists_for_Plugin_Meas 13.390 + #define Meas_startMasterLoop 13.391 + #define Meas_endMasterLoop 13.392 + #define Meas_startReqHdlr 13.393 + #define Meas_endReqHdlr 13.394 + #define MEAS__Print_Hists_for_Plugin_Meas 13.395 +#endif 13.396 + 13.397 + 13.398 +//Experiment in two-step macros -- if doesn't work, insert each separately 13.399 +#define MEAS__Insert_Meas_Fields_into_Slave \ 13.400 + MEAS__Insert_Susp_Meas_Fields_into_Slave \ 13.401 + MEAS__Insert_Master_Meas_Fields_into_Slave \ 13.402 + MEAS__Insert_System_Meas_Fields_into_Slave 13.403 + 13.404 + 13.405 //====================== Histogram Macros -- Create ======================== 13.406 // 13.407 // 13.408 -#ifdef MAKE_HISTS_FOR_MEASUREMENTS 13.409 -#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ 13.410 - makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ 13.411 - _VMSMasterEnv->measHists[idx] = \ 13.412 + 13.413 +//The language implementation should include a definition of this macro, 13.414 +// which creates all the histograms the language uses to collect measurements 13.415 +// of plugin operation -- so, if the language didn't define it, must 13.416 +// define it here (as empty), to avoid compile error 13.417 +#ifndef MEAS__Make_Meas_Hists_for_Language 13.418 +#define MEAS__Make_Meas_Hists_for_Language() /*consume parens!*/ 13.419 +#endif 13.420 + 13.421 + 13.422 +#ifdef MEAS__TURN_ON_MAKE_HISTS 13.423 + #define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ 13.424 + makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ 13.425 + _VMSMasterEnv->measHists[idx] = \ 13.426 makeFixedBinHist( numBins, startVal, binWidth, name ); 13.427 #else 13.428 -#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) 13.429 + #define makeAMeasHist( idx, name, numBins, startVal, binWidth ) 13.430 #endif 13.431 13.432 +//============================== Probes =================================== 13.433 13.434 -#define MEAS__SUB_CREATE /*turn on/off subtraction of create from plugin*/ 13.435 13.436 -#include "VMS_defs__lang_specific.h" 13.437 - 13.438 +//=========================================================================== 13.439 #endif /* _VMS_DEFS_H */ 13.440
14.1 --- a/probes.c Wed Feb 22 11:39:12 2012 -0800 14.2 +++ b/probes.c Sun Mar 04 14:26:35 2012 -0800 14.3 @@ -13,100 +13,46 @@ 14.4 14.5 14.6 //==================== Probes ================= 14.7 -#ifdef STATS__USE_TSC_PROBES 14.8 - 14.9 -int32 14.10 -VMS__create_histogram_probe( int32 numBins, float32 startValue, 14.11 - float32 binWidth, char *nameStr ) 14.12 - { IntervalProbe *newProbe; 14.13 - int32 idx; 14.14 - FloatHist *hist; 14.15 - 14.16 - idx = VMS__create_single_interval_probe( nameStr ); 14.17 - newProbe = _VMSMasterEnv->intervalProbes[ idx ]; 14.18 - 14.19 - hist = makeFloatHistogram( numBins, startValue, binWidth ); 14.20 - newProbe->hist = hist; 14.21 - return idx; 14.22 - } 14.23 - 14.24 -void 14.25 -VMS_impl__record_interval_start_in_probe( int32 probeID ) 14.26 - { IntervalProbe *probe; 14.27 - 14.28 - probe = _VMSMasterEnv->intervalProbes[ probeID ]; 14.29 - probe->startStamp = getTSCount(); 14.30 - } 14.31 - 14.32 -void 14.33 -VMS_impl__record_interval_end_in_probe( int32 probeID ) 14.34 - { IntervalProbe *probe; 14.35 - TSCount endStamp; 14.36 - 14.37 - endStamp = getTSCount(); 14.38 - 14.39 - probe = _VMSMasterEnv->intervalProbes[ probeID ]; 14.40 - probe->endStamp = endStamp; 14.41 - 14.42 - if( probe->hist != NULL ) 14.43 - { TSCount interval = probe->endStamp - probe->startStamp; 14.44 - //if the interval is sane, then add to histogram 14.45 - if( interval < probe->hist->endOfRange * 10 ) 14.46 - addToFloatHist( interval, probe->hist ); 14.47 - } 14.48 - } 14.49 - 14.50 -void 14.51 -VMS_impl__print_stats_of_probe( int32 probeID ) 14.52 - { IntervalProbe *probe; 14.53 - 14.54 - probe = _VMSMasterEnv->intervalProbes[ probeID ]; 14.55 - 14.56 - if( probe->hist == NULL ) 14.57 - { 14.58 - printf("probe: %s, interval: %.6lf\n", probe->nameStr,probe->interval); 14.59 - } 14.60 - 14.61 - else 14.62 - { 14.63 - printf( "probe: %s\n", probe->nameStr ); 14.64 - printFloatHist( probe->hist ); 14.65 - } 14.66 - } 14.67 -#else 14.68 - 14.69 /* 14.70 * In practice, probe operations are called from the app, from inside slaves 14.71 - * -- so have to be sure each probe is single-VP owned, and be sure that 14.72 + * -- so have to be sure each probe is single-Slv owned, and be sure that 14.73 * any place common structures are modified it's done inside the master. 14.74 * So -- the only place common structures are modified is during creation. 14.75 * after that, all mods are to individual instances. 14.76 * 14.77 * Thniking perhaps should change the semantics to be that probes are 14.78 * attached to the virtual processor -- and then everything is guaranteed 14.79 - * to be isolated -- except then can't take any intervals that span VPs, 14.80 - * and would have to transfer the probes to Master env when VP dissipates.. 14.81 + * to be isolated -- except then can't take any intervals that span Slvs, 14.82 + * and would have to transfer the probes to Master env when Slv dissipates.. 14.83 * gets messy.. 14.84 * 14.85 * For now, just making so that probe creation causes a suspend, so that 14.86 * the dynamic array in the master env is only modified from the master 14.87 * 14.88 */ 14.89 + 14.90 +//============================ Helpers =========================== 14.91 +inline void 14.92 +doNothing() 14.93 + { 14.94 + } 14.95 + 14.96 + 14.97 IntervalProbe * 14.98 -create_generic_probe( char *nameStr, SlaveVP *animPr ) 14.99 -{ 14.100 +create_generic_probe( char *nameStr, SlaveVP *animSlv ) 14.101 + { 14.102 VMSSemReq reqData; 14.103 14.104 reqData.reqType = createProbe; 14.105 reqData.nameStr = nameStr; 14.106 14.107 - VMS_WL__send_VMSSem_request( &reqData, animPr ); 14.108 + VMS_WL__send_VMSSem_request( &reqData, animSlv ); 14.109 14.110 - return animPr->dataRetFromReq; 14.111 + return animSlv->dataRetFromReq; 14.112 } 14.113 14.114 /*Use this version from outside VMS -- it uses external malloc, and modifies 14.115 - * dynamic array, so can't be animated in a slave VP 14.116 + * dynamic array, so can't be animated in a slave Slv 14.117 */ 14.118 IntervalProbe * 14.119 ext__create_generic_probe( char *nameStr ) 14.120 @@ -125,24 +71,38 @@ 14.121 return newProbe; 14.122 } 14.123 14.124 +//============================ Fns def in header ======================= 14.125 14.126 -/*Only call from inside master or main startup/shutdown thread 14.127 - */ 14.128 -void 14.129 -VMS_impl__free_probe( IntervalProbe *probe ) 14.130 - { if( probe->hist != NULL ) freeDblHist( probe->hist ); 14.131 - if( probe->nameStr != NULL) VMS_int__free( probe->nameStr ); 14.132 - VMS_int__free( probe ); 14.133 +int32 14.134 +VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animSlv ) 14.135 + { IntervalProbe *newProbe; 14.136 + 14.137 + newProbe = create_generic_probe( nameStr, animSlv ); 14.138 + 14.139 + return newProbe->probeID; 14.140 } 14.141 14.142 +int32 14.143 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 14.144 + float64 binWidth, char *nameStr, SlaveVP *animSlv ) 14.145 + { IntervalProbe *newProbe; 14.146 + DblHist *hist; 14.147 + 14.148 + newProbe = create_generic_probe( nameStr, animSlv ); 14.149 + 14.150 + hist = makeDblHistogram( numBins, startValue, binWidth ); 14.151 + newProbe->hist = hist; 14.152 + return newProbe->probeID; 14.153 + } 14.154 + 14.155 14.156 int32 14.157 -VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animPr) 14.158 +VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animSlv) 14.159 { IntervalProbe *newProbe; 14.160 struct timeval *startStamp; 14.161 float64 startSecs; 14.162 14.163 - newProbe = create_generic_probe( nameStr, animPr ); 14.164 + newProbe = create_generic_probe( nameStr, animSlv ); 14.165 newProbe->endSecs = 0; 14.166 14.167 gettimeofday( &(newProbe->startStamp), NULL); 14.168 @@ -174,30 +134,19 @@ 14.169 return newProbe->probeID; 14.170 } 14.171 14.172 -int32 14.173 -VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr ) 14.174 - { IntervalProbe *newProbe; 14.175 14.176 - newProbe = create_generic_probe( nameStr, animPr ); 14.177 - 14.178 - return newProbe->probeID; 14.179 +/*Only call from inside master or main startup/shutdown thread 14.180 + */ 14.181 +void 14.182 +VMS_impl__free_probe( IntervalProbe *probe ) 14.183 + { if( probe->hist != NULL ) freeDblHist( probe->hist ); 14.184 + if( probe->nameStr != NULL) VMS_int__free( probe->nameStr ); 14.185 + VMS_int__free( probe ); 14.186 } 14.187 14.188 -int32 14.189 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 14.190 - float64 binWidth, char *nameStr, SlaveVP *animPr ) 14.191 - { IntervalProbe *newProbe; 14.192 - DblHist *hist; 14.193 - 14.194 - newProbe = create_generic_probe( nameStr, animPr ); 14.195 - 14.196 - hist = makeDblHistogram( numBins, startValue, binWidth ); 14.197 - newProbe->hist = hist; 14.198 - return newProbe->probeID; 14.199 - } 14.200 14.201 void 14.202 -VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr ) 14.203 +VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animSlv ) 14.204 { IntervalProbe *probe; 14.205 14.206 //TODO: fix this To be in Master -- race condition 14.207 @@ -206,8 +155,9 @@ 14.208 addValueIntoTable(probe->nameStr, probe, _VMSMasterEnv->probeNameHashTbl); 14.209 } 14.210 14.211 + 14.212 IntervalProbe * 14.213 -VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr ) 14.214 +VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animSlv ) 14.215 { 14.216 //TODO: fix this To be in Master -- race condition 14.217 return getValueFromTable( probeName, _VMSMasterEnv->probeNameHashTbl ); 14.218 @@ -215,21 +165,21 @@ 14.219 14.220 14.221 /*Everything is local to the animating procr, so no need for request, do 14.222 - * work locally, in the anim Pr 14.223 + * work locally, in the anim Slv 14.224 */ 14.225 void 14.226 -VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animatingPr ) 14.227 +VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animatingSlv ) 14.228 { IntervalProbe *probe; 14.229 14.230 probe = _VMSMasterEnv->intervalProbes[ probeID ]; 14.231 probe->schedChoiceWasRecorded = TRUE; 14.232 - probe->coreNum = animatingPr->coreAnimatedBy; 14.233 - probe->procrID = animatingPr->procrID; 14.234 - probe->procrCreateSecs = animatingPr->createPtInSecs; 14.235 + probe->coreNum = animatingSlv->coreAnimatedBy; 14.236 + probe->slaveID = animatingSlv->procrID; 14.237 + probe->slaveCreateSecs = animatingSlv->createPtInSecs; 14.238 } 14.239 14.240 /*Everything is local to the animating procr, so no need for request, do 14.241 - * work locally, in the anim Pr 14.242 + * work locally, in the anim Slv 14.243 */ 14.244 void 14.245 VMS_impl__record_interval_start_in_probe( int32 probeID ) 14.246 @@ -237,44 +187,37 @@ 14.247 14.248 DEBUG( dbgProbes, "record start of interval\n" ) 14.249 probe = _VMSMasterEnv->intervalProbes[ probeID ]; 14.250 - gettimeofday( &(probe->startStamp), NULL ); 14.251 + probe->startStamp = getTSCount(); 14.252 } 14.253 14.254 14.255 /*Everything is local to the animating procr, so no need for request, do 14.256 - * work locally, in the anim Pr 14.257 + * work locally, in the anim Slv 14.258 + * 14.259 + *This should be safe to run inside SlaveVP -- weird behavior will be due 14.260 + * to the logical error of having more than one interval open in overlapped. 14.261 */ 14.262 void 14.263 VMS_impl__record_interval_end_in_probe( int32 probeID ) 14.264 { IntervalProbe *probe; 14.265 - struct timeval *endStamp, *startStamp; 14.266 - float64 startSecs, endSecs; 14.267 + TSCount endStamp; 14.268 14.269 + endStamp = getTSCount(); 14.270 + 14.271 DEBUG( dbgProbes, "record end of interval\n" ) 14.272 - //possible seg-fault if array resized by diff core right after this 14.273 - // one gets probe..? Something like that? Might be safe.. don't care 14.274 + 14.275 probe = _VMSMasterEnv->intervalProbes[ probeID ]; 14.276 - gettimeofday( &(probe->endStamp), NULL); 14.277 - 14.278 - //now turn into an interval held in a double 14.279 - startStamp = &(probe->startStamp); 14.280 - endStamp = &(probe->endStamp); 14.281 - 14.282 - startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); 14.283 - endSecs = endStamp->tv_sec + ( endStamp->tv_usec / 1000000.0 ); 14.284 - 14.285 - probe->interval = endSecs - startSecs; 14.286 - probe->startSecs = startSecs; 14.287 - probe->endSecs = endSecs; 14.288 + probe->endStamp = endStamp; 14.289 14.290 if( probe->hist != NULL ) 14.291 - { 14.292 + { TSCount interval = probe->endStamp - probe->startStamp; 14.293 //if the interval is sane, then add to histogram 14.294 - if( probe->interval < probe->hist->endOfRange * 10 ) 14.295 - addToDblHist( probe->interval, probe->hist ); 14.296 + if( interval < probe->hist->endOfRange * 10 ) 14.297 + addToFloatHist( interval, probe->hist ); 14.298 } 14.299 } 14.300 14.301 + 14.302 void 14.303 print_probe_helper( IntervalProbe *probe ) 14.304 { 14.305 @@ -283,7 +226,7 @@ 14.306 14.307 if( probe->schedChoiceWasRecorded ) 14.308 { printf( "coreNum: %d, procrID: %d, procrCreated: %0.6f | ", 14.309 - probe->coreNum, probe->procrID, probe->procrCreateSecs ); 14.310 + probe->coreNum, probe->slaveID, probe->slaveCreateSecs ); 14.311 } 14.312 14.313 if( probe->endSecs == 0 ) //just a single point in time 14.314 @@ -318,22 +261,10 @@ 14.315 } 14.316 14.317 14.318 -inline void doNothing(){}; 14.319 - 14.320 -void 14.321 -generic_print_probe( void *_probe ) 14.322 - { 14.323 - IntervalProbe *probe = (IntervalProbe *)_probe; 14.324 - 14.325 - //TODO segfault in printf 14.326 - //print_probe_helper( probe ); 14.327 - } 14.328 - 14.329 void 14.330 VMS_impl__print_stats_of_all_probes() 14.331 { 14.332 forAllInDynArrayDo( _VMSMasterEnv->dynIntervalProbesInfo, 14.333 - &generic_print_probe ); 14.334 + &VMS_impl__print_stats_of_probe ); 14.335 fflush( stdout ); 14.336 } 14.337 -#endif
15.1 --- a/probes.h Wed Feb 22 11:39:12 2012 -0800 15.2 +++ b/probes.h Sun Mar 04 14:26:35 2012 -0800 15.3 @@ -28,16 +28,16 @@ 15.4 15.5 int32 schedChoiceWasRecorded; 15.6 int32 coreNum; 15.7 - int32 procrID; 15.8 - float64 procrCreateSecs; 15.9 + int32 slaveID; 15.10 + float64 slaveCreateSecs; 15.11 15.12 - #ifdef STATS__USE_TSC_PROBES 15.13 + // #ifdef STATS__USE_TSC_PROBES 15.14 TSCount startStamp; 15.15 TSCount endStamp; 15.16 - #else 15.17 - struct timeval startStamp; 15.18 - struct timeval endStamp; 15.19 - #endif 15.20 +// #else 15.21 +// struct timeval startStamp; 15.22 +// struct timeval endStamp; 15.23 +// #endif 15.24 float64 startSecs; 15.25 float64 endSecs; 15.26 float64 interval; 15.27 @@ -45,136 +45,136 @@ 15.28 }; 15.29 15.30 15.31 +int32 15.32 +VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animSlv ); 15.33 + 15.34 +int32 15.35 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 15.36 + float64 binWidth, char *nameStr, SlaveVP *animSlv ); 15.37 + 15.38 +int32 15.39 +VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animSlv); 15.40 + 15.41 +int32 15.42 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 15.43 + 15.44 +void 15.45 +VMS_impl__free_probe( IntervalProbe *probe ); 15.46 + 15.47 +void 15.48 +VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animSlv ); 15.49 + 15.50 +IntervalProbe * 15.51 +VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animSlv ); 15.52 + 15.53 +void 15.54 +VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animSlv ); 15.55 + 15.56 +void 15.57 +VMS_impl__record_interval_start_in_probe( int32 probeID ); 15.58 + 15.59 +void 15.60 +VMS_impl__record_interval_end_in_probe( int32 probeID ); 15.61 + 15.62 +void 15.63 +VMS_impl__print_stats_of_probe( int32 probeID ); 15.64 + 15.65 +void 15.66 +VMS_impl__print_stats_of_all_probes(); 15.67 + 15.68 15.69 //======================== Probes ============================= 15.70 // 15.71 // Use macros to allow turning probes off with a #define switch 15.72 +// This means probes have zero impact on performance when off 15.73 +//============================================================= 15.74 +#define VMS_App__record_time_point_into_new_probe VMS_WL__record_time_point_into_new_probe 15.75 +#define VMS_ext__record_time_point_into_new_probe 15.76 +#define VMS_App__create_single_interval_probe VMS_WL__create_single_interval_probe 15.77 +#define VMS_App__create_histogram_probe VMS_WL__create_histogram_probe 15.78 +#define VMS_App__index_probe_by_its_name VMS_WL__index_probe_by_its_name 15.79 +#define VMS_App__get_probe_by_name VMS_WL__get_probe_by_name 15.80 +#define VMS_App__record_sched_choice_into_probe VMS_WL__record_sched_choice_into_probe 15.81 +#define VMS_App__record_interval_start_in_probe VMS_WL__record_interval_start_in_probe 15.82 +#define VMS_App__record_interval_end_in_probe VMS_WL__record_interval_end_in_probe 15.83 +#define VMS_App__print_stats_of_probe VMS_WL__print_stats_of_probe 15.84 +#define VMS_App__print_stats_of_all_probes VMS_WL__print_stats_of_all_probes 15.85 + 15.86 #ifdef STATS__ENABLE_PROBES 15.87 -int32 15.88 -VMS_impl__record_time_point_into_new_probe( char *nameStr,SlaveVP *animPr); 15.89 -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 15.90 - VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) 15.91 +#define VMS_WL__record_time_point_into_new_probe( nameStr, animSlv ) \ 15.92 + VMS_impl__record_time_point_in_new_probe( nameStr, animSlv ) 15.93 15.94 -int32 15.95 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 15.96 #define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 15.97 VMS_ext_impl__record_time_point_into_new_probe( nameStr ) 15.98 15.99 +#define VMS_WL__create_single_interval_probe( nameStr, animSlv ) \ 15.100 + VMS_impl__create_single_interval_probe( nameStr, animSlv ) 15.101 15.102 -int32 15.103 -VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr ); 15.104 -#define VMS__create_single_interval_probe( nameStr, animPr ) \ 15.105 - VMS_impl__create_single_interval_probe( nameStr, animPr ) 15.106 - 15.107 - 15.108 -int32 15.109 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 15.110 - float64 binWidth, char *nameStr, SlaveVP *animPr ); 15.111 -#define VMS__create_histogram_probe( numBins, startValue, \ 15.112 - binWidth, nameStr, animPr ) \ 15.113 +#define VMS_WL__create_histogram_probe( numBins, startValue, \ 15.114 + binWidth, nameStr, animSlv ) \ 15.115 VMS_impl__create_histogram_probe( numBins, startValue, \ 15.116 - binWidth, nameStr, animPr ) 15.117 -void 15.118 -VMS_impl__free_probe( IntervalProbe *probe ); 15.119 -#define VMS__free_probe( probe ) \ 15.120 + binWidth, nameStr, animSlv ) 15.121 +#define VMS_int__free_probe( probe ) \ 15.122 VMS_impl__free_probe( probe ) 15.123 15.124 -void 15.125 -VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr ); 15.126 -#define VMS__index_probe_by_its_name( probeID, animPr ) \ 15.127 - VMS_impl__index_probe_by_its_name( probeID, animPr ) 15.128 +#define VMS_WL__index_probe_by_its_name( probeID, animSlv ) \ 15.129 + VMS_impl__index_probe_by_its_name( probeID, animSlv ) 15.130 15.131 -IntervalProbe * 15.132 -VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr ); 15.133 -#define VMS__get_probe_by_name( probeID, animPr ) \ 15.134 - VMS_impl__get_probe_by_name( probeName, animPr ) 15.135 +#define VMS_WL__get_probe_by_name( probeID, animSlv ) \ 15.136 + VMS_impl__get_probe_by_name( probeName, animSlv ) 15.137 15.138 -void 15.139 -VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animPr ); 15.140 -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 15.141 - VMS_impl__record_sched_choice_into_probe( probeID, animPr ) 15.142 +#define VMS_WL__record_sched_choice_into_probe( probeID, animSlv ) \ 15.143 + VMS_impl__record_sched_choice_into_probe( probeID, animSlv ) 15.144 15.145 -void 15.146 -VMS_impl__record_interval_start_in_probe( int32 probeID ); 15.147 -#define VMS__record_interval_start_in_probe( probeID ) \ 15.148 +#define VMS_WL__record_interval_start_in_probe( probeID ) \ 15.149 VMS_impl__record_interval_start_in_probe( probeID ) 15.150 15.151 -void 15.152 -VMS_impl__record_interval_end_in_probe( int32 probeID ); 15.153 -#define VMS__record_interval_end_in_probe( probeID ) \ 15.154 +#define VMS_WL__record_interval_end_in_probe( probeID ) \ 15.155 VMS_impl__record_interval_end_in_probe( probeID ) 15.156 15.157 -void 15.158 -VMS_impl__print_stats_of_probe( int32 probeID ); 15.159 -#define VMS__print_stats_of_probe( probeID ) \ 15.160 +#define VMS_WL__print_stats_of_probe( probeID ) \ 15.161 VMS_impl__print_stats_of_probe( probeID ) 15.162 15.163 -void 15.164 -VMS_impl__print_stats_of_all_probes(); 15.165 -#define VMS__print_stats_of_all_probes() \ 15.166 +#define VMS_WL__print_stats_of_all_probes() \ 15.167 VMS_impl__print_stats_of_all_probes() 15.168 15.169 15.170 #else 15.171 -int32 15.172 -VMS_impl__record_time_point_into_new_probe( char *nameStr,SlaveVP *animPr); 15.173 -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 15.174 +#define VMS_WL__record_time_point_into_new_probe( nameStr, animSlv ) \ 15.175 0 /* do nothing */ 15.176 15.177 -int32 15.178 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 15.179 #define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 15.180 0 /* do nothing */ 15.181 15.182 15.183 -int32 15.184 -VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr ); 15.185 -#define VMS__create_single_interval_probe( nameStr, animPr ) \ 15.186 +#define VMS_WL__create_single_interval_probe( nameStr, animSlv ) \ 15.187 0 /* do nothing */ 15.188 15.189 15.190 -int32 15.191 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 15.192 - float64 binWidth, char *nameStr, SlaveVP *animPr ); 15.193 -#define VMS__create_histogram_probe( numBins, startValue, \ 15.194 - binWidth, nameStr, animPr ) \ 15.195 +#define VMS_WL__create_histogram_probe( numBins, startValue, \ 15.196 + binWidth, nameStr, animSlv ) \ 15.197 0 /* do nothing */ 15.198 15.199 -void 15.200 -VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr ); 15.201 -#define VMS__index_probe_by_its_name( probeID, animPr ) \ 15.202 +#define VMS_WL__index_probe_by_its_name( probeID, animSlv ) \ 15.203 /* do nothing */ 15.204 15.205 -IntervalProbe * 15.206 -VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr ); 15.207 -#define VMS__get_probe_by_name( probeID, animPr ) \ 15.208 +#define VMS_WL__get_probe_by_name( probeID, animSlv ) \ 15.209 NULL /* do nothing */ 15.210 15.211 -void 15.212 -VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animPr ); 15.213 -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 15.214 +#define VMS_WL__record_sched_choice_into_probe( probeID, animSlv ) \ 15.215 /* do nothing */ 15.216 15.217 -void 15.218 -VMS_impl__record_interval_start_in_probe( int32 probeID ); 15.219 -#define VMS__record_interval_start_in_probe( probeID ) \ 15.220 +#define VMS_WL__record_interval_start_in_probe( probeID ) \ 15.221 /* do nothing */ 15.222 15.223 -void 15.224 -VMS_impl__record_interval_end_in_probe( int32 probeID ); 15.225 -#define VMS__record_interval_end_in_probe( probeID ) \ 15.226 +#define VMS_WL__record_interval_end_in_probe( probeID ) \ 15.227 /* do nothing */ 15.228 15.229 -inline void doNothing(); 15.230 -void 15.231 -VMS_impl__print_stats_of_probe( int32 probeID ); 15.232 -#define VMS__print_stats_of_probe( probeID ) \ 15.233 - doNothing/* do nothing */ 15.234 +#define VMS_WL__print_stats_of_probe( probeID ) \ 15.235 + ; /* do nothing */ 15.236 15.237 -void 15.238 -VMS_impl__print_stats_of_all_probes(); 15.239 -#define VMS__print_stats_of_all_probes \ 15.240 - doNothing/* do nothing */ 15.241 +#define VMS_WL__print_stats_of_all_probes() \ 15.242 + ;/* do nothing */ 15.243 15.244 #endif /* defined STATS__ENABLE_PROBES */ 15.245
16.1 --- a/vmalloc.c Wed Feb 22 11:39:12 2012 -0800 16.2 +++ b/vmalloc.c Sun Mar 04 14:26:35 2012 -0800 16.3 @@ -11,46 +11,200 @@ 16.4 #include <inttypes.h> 16.5 #include <stdlib.h> 16.6 #include <stdio.h> 16.7 +#include <string.h> 16.8 +#include <math.h> 16.9 16.10 #include "VMS.h" 16.11 #include "C_Libraries/Histogram/Histogram.h" 16.12 16.13 -/*Helper function 16.14 - *Insert a newly generated free chunk into the first spot on the free list. 16.15 - * The chunk is cast as a MallocProlog, so the various pointers in it are 16.16 - * accessed with C's help -- and the size of the prolog is easily added to 16.17 - * the pointer when a chunk is returned to the app -- so C handles changes 16.18 - * in pointer sizes among machines. 16.19 - * 16.20 - *The list head is a normal MallocProlog struct -- identified by its 16.21 - * prevChunkInFreeList being NULL -- the only one. 16.22 - * 16.23 - *The end of the list is identified by next chunk being NULL, as usual. 16.24 +#define MAX_UINT64 0xFFFFFFFFFFFFFFFF 16.25 + 16.26 +//A MallocProlog is a head element if the HigherInMem variable is NULL 16.27 +//A Chunk is free if the prevChunkInFreeList variable is NULL 16.28 + 16.29 +/* 16.30 + * This calculates the container which fits the given size. 16.31 */ 16.32 -void inline 16.33 -add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) 16.34 - { 16.35 - chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; 16.36 - if( chunk->nextChunkInFreeList != NULL ) //if not last in free list 16.37 - chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; 16.38 - chunk->prevChunkInFreeList = listHead; 16.39 - listHead->nextChunkInFreeList = chunk; 16.40 - } 16.41 +inline 16.42 +uint32 getContainer(size_t size) 16.43 +{ 16.44 + return (log2(size)-LOG128)/LOG54; 16.45 +} 16.46 16.47 +/* 16.48 + * Removes the first chunk of a freeList 16.49 + * The chunk is removed but not set as free. There is no check if 16.50 + * the free list is empty, so make sure this is not the case. 16.51 + */ 16.52 +inline 16.53 +MallocProlog *removeChunk(MallocArrays* freeLists, uint32 containerIdx) 16.54 +{ 16.55 + MallocProlog** container = &freeLists->bigChunks[containerIdx]; 16.56 + MallocProlog* removedChunk = *container; 16.57 + *container = removedChunk->nextChunkInFreeList; 16.58 + 16.59 + if(removedChunk->nextChunkInFreeList) 16.60 + removedChunk->nextChunkInFreeList->prevChunkInFreeList = 16.61 + (MallocProlog*)container; 16.62 + 16.63 + if(*container == NULL) 16.64 + { 16.65 + if(containerIdx < 64) 16.66 + freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); 16.67 + else 16.68 + freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64)); 16.69 + } 16.70 + 16.71 + return removedChunk; 16.72 +} 16.73 16.74 -/*This is sequential code, meant to only be called from the Master, not from 16.75 - * any slave VPs. 16.76 - *Search down list, checking size by the nextHigherInMem pointer, to find 16.77 - * first chunk bigger than size needed. 16.78 - *Shave off the extra and make it into a new free-list element, hook it in 16.79 - * then return the address of the found element plus size of prolog. 16.80 - * 16.81 +/* 16.82 + * Removes the first chunk of a freeList 16.83 + * The chunk is removed but not set as free. There is no check if 16.84 + * the free list is empty, so make sure this is not the case. 16.85 + */ 16.86 +inline 16.87 +MallocProlog *removeSmallChunk(MallocArrays* freeLists, uint32 containerIdx) 16.88 +{ 16.89 + MallocProlog** container = &freeLists->smallChunks[containerIdx]; 16.90 + MallocProlog* removedChunk = *container; 16.91 + *container = removedChunk->nextChunkInFreeList; 16.92 + 16.93 + if(removedChunk->nextChunkInFreeList) 16.94 + removedChunk->nextChunkInFreeList->prevChunkInFreeList = 16.95 + (MallocProlog*)container; 16.96 + 16.97 + return removedChunk; 16.98 +} 16.99 + 16.100 +inline 16.101 +size_t getChunkSize(MallocProlog* chunk) 16.102 +{ 16.103 + return (uintptr_t)chunk->nextHigherInMem - 16.104 + (uintptr_t)chunk - sizeof(MallocProlog); 16.105 +} 16.106 + 16.107 +/* 16.108 + * Removes a chunk from a free list. 16.109 + */ 16.110 +inline 16.111 +void extractChunk(MallocProlog* chunk, MallocArrays *freeLists) 16.112 +{ 16.113 + chunk->prevChunkInFreeList->nextChunkInFreeList = chunk->nextChunkInFreeList; 16.114 + if(chunk->nextChunkInFreeList) 16.115 + chunk->nextChunkInFreeList->prevChunkInFreeList = chunk->prevChunkInFreeList; 16.116 + 16.117 + //The last element in the list points to the container. If the container points 16.118 + //to NULL the container is empty 16.119 + if(*((void**)(chunk->prevChunkInFreeList)) == NULL && getChunkSize(chunk) >= BIG_LOWER_BOUND) 16.120 + { 16.121 + //Find the approppiate container because we do not know it 16.122 + uint64 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 3; 16.123 + if(containerIdx < (uint32)64) 16.124 + freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); 16.125 + if(containerIdx < 128 && containerIdx >=64) 16.126 + freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64)); 16.127 + 16.128 + } 16.129 +} 16.130 + 16.131 +/* 16.132 + * Merges two chunks. 16.133 + * Chunk A has to be before chunk B in memory. Both have to be removed from 16.134 + * a free list 16.135 + */ 16.136 +inline 16.137 +MallocProlog *mergeChunks(MallocProlog* chunkA, MallocProlog* chunkB) 16.138 +{ 16.139 + chunkA->nextHigherInMem = chunkB->nextHigherInMem; 16.140 + chunkB->nextHigherInMem->nextLowerInMem = chunkA; 16.141 + return chunkA; 16.142 +} 16.143 +/* 16.144 + * Inserts a chunk into a free list. 16.145 + */ 16.146 +inline 16.147 +void insertChunk(MallocProlog* chunk, MallocProlog** container) 16.148 +{ 16.149 + chunk->nextChunkInFreeList = *container; 16.150 + chunk->prevChunkInFreeList = (MallocProlog*)container; 16.151 + if(*container) 16.152 + (*container)->prevChunkInFreeList = chunk; 16.153 + *container = chunk; 16.154 +} 16.155 + 16.156 +/* 16.157 + * Divides the chunk that a new chunk of newSize is created. 16.158 + * There is no size check, so make sure the size value is valid. 16.159 + */ 16.160 +inline 16.161 +MallocProlog *divideChunk(MallocProlog* chunk, size_t newSize) 16.162 +{ 16.163 + MallocProlog* newChunk = (MallocProlog*)((uintptr_t)chunk->nextHigherInMem - 16.164 + newSize - sizeof(MallocProlog)); 16.165 + 16.166 + newChunk->nextLowerInMem = chunk; 16.167 + newChunk->nextHigherInMem = chunk->nextHigherInMem; 16.168 + 16.169 + chunk->nextHigherInMem->nextLowerInMem = newChunk; 16.170 + chunk->nextHigherInMem = newChunk; 16.171 + 16.172 + return newChunk; 16.173 +} 16.174 + 16.175 +/* 16.176 + * Search for chunk in the list of big chunks. Split the block if it's too big 16.177 + */ 16.178 +inline 16.179 +MallocProlog *searchChunk(MallocArrays *freeLists, size_t sizeRequested, uint32 containerIdx) 16.180 +{ 16.181 + MallocProlog* foundChunk; 16.182 + 16.183 + uint64 searchVector = freeLists->bigChunksSearchVector[0]; 16.184 + //set small chunk bits to zero 16.185 + searchVector &= MAX_UINT64 << containerIdx; 16.186 + containerIdx = __builtin_ffsl(searchVector); 16.187 + 16.188 + if(containerIdx == 0) 16.189 + { 16.190 + searchVector = freeLists->bigChunksSearchVector[1]; 16.191 + containerIdx = __builtin_ffsl(searchVector); 16.192 + if(containerIdx == 0) 16.193 + { 16.194 + printf("VMS malloc failed: low memory"); 16.195 + exit(1); 16.196 + } 16.197 + containerIdx += 64; 16.198 + } 16.199 + containerIdx--; 16.200 + 16.201 + 16.202 + foundChunk = removeChunk(freeLists, containerIdx); 16.203 + size_t chunkSize = getChunkSize(foundChunk); 16.204 + 16.205 + //If the new chunk is larger than the requested size: split 16.206 + if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + BIG_LOWER_BOUND) 16.207 + { 16.208 + MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested); 16.209 + containerIdx = getContainer(getChunkSize(foundChunk)) - 1; 16.210 + insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]); 16.211 + if(containerIdx < 64) 16.212 + freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx); 16.213 + else 16.214 + freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64)); 16.215 + foundChunk = newChunk; 16.216 + } 16.217 + 16.218 + return foundChunk; 16.219 +} 16.220 + 16.221 + 16.222 +/* 16.223 + * This is sequential code, meant to only be called from the Master, not from 16.224 + * any slave Slvs. 16.225 */ 16.226 void *VMS_int__malloc( size_t sizeRequested ) 16.227 - { MallocProlog *foundElem = NULL, *currElem, *newElem; 16.228 - ssize_t amountExtra, sizeConsumed,sizeOfFound; 16.229 - uint32 foundElemIsTopOfHeap; 16.230 - 16.231 + { 16.232 //============================= MEASUREMENT STUFF ======================== 16.233 #ifdef MEAS__TIME_MALLOC 16.234 int32 startStamp, endStamp; 16.235 @@ -58,312 +212,101 @@ 16.236 #endif 16.237 //======================================================================== 16.238 16.239 - //step up the size to be aligned at 16-byte boundary, prob better ways 16.240 - sizeRequested = (sizeRequested + 16) & ~15; 16.241 - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 16.242 - 16.243 - while( currElem != NULL ) 16.244 - { //check if size of currElem is big enough 16.245 - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 16.246 - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 16.247 - if( amountExtra > 0 ) 16.248 - { //found it, get out of loop 16.249 - foundElem = currElem; 16.250 - currElem = NULL; 16.251 - } 16.252 - else 16.253 - currElem = currElem->nextChunkInFreeList; 16.254 - } 16.255 + MallocArrays* freeLists = _VMSMasterEnv->freeLists; 16.256 + MallocProlog* foundChunk; 16.257 16.258 - if( foundElem == NULL ) 16.259 - { ERROR("\nmalloc failed\n") 16.260 - return (void *)NULL; //indicates malloc failed 16.261 - } 16.262 - //Using a kludge to identify the element that is the top chunk in the 16.263 - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 16.264 - // save addr of start of heap in head's nextLowerInMem 16.265 - //Will handle top of Heap specially 16.266 - foundElemIsTopOfHeap = foundElem->nextHigherInMem == 16.267 - _VMSMasterEnv->freeListHead->nextHigherInMem; 16.268 + //Return a small chunk if the requested size is smaller than 128B 16.269 + if(sizeRequested <= LOWER_BOUND) 16.270 + { 16.271 + uint32 freeListIdx = (sizeRequested-1)/SMALL_CHUNK_SIZE; 16.272 + if(freeLists->smallChunks[freeListIdx] == NULL) 16.273 + foundChunk = searchChunk(freeLists, SMALL_CHUNK_SIZE*(freeListIdx+1), 0); 16.274 + else 16.275 + foundChunk = removeSmallChunk(freeLists, freeListIdx); 16.276 + 16.277 + //Mark as allocated 16.278 + foundChunk->prevChunkInFreeList = NULL; 16.279 + return foundChunk + 1; 16.280 + } 16.281 16.282 - //before shave off and try to insert new elem, remove found elem 16.283 - //note, foundElem will never be the head, so always has valid prevChunk 16.284 - foundElem->prevChunkInFreeList->nextChunkInFreeList = 16.285 - foundElem->nextChunkInFreeList; 16.286 - if( foundElem->nextChunkInFreeList != NULL ) 16.287 - { foundElem->nextChunkInFreeList->prevChunkInFreeList = 16.288 - foundElem->prevChunkInFreeList; 16.289 - } 16.290 - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 16.291 + //Calculate the expected container. Start one higher to have a Chunk that's 16.292 + //always big enough. 16.293 + uint32 containerIdx = getContainer(sizeRequested); 16.294 16.295 - //if enough, turn extra into new elem & insert it 16.296 - if( amountExtra > 64 ) 16.297 - { //make new elem by adding to addr of curr elem then casting 16.298 - sizeConsumed = sizeof(MallocProlog) + sizeRequested; 16.299 - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 16.300 - newElem->nextLowerInMem = foundElem; //This is evil (but why?) 16.301 - newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) 16.302 - foundElem->nextHigherInMem = newElem; 16.303 - if( ! foundElemIsTopOfHeap ) 16.304 - { //there is no next higher for top of heap, so can't write to it 16.305 - newElem->nextHigherInMem->nextLowerInMem = newElem; 16.306 - } 16.307 - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 16.308 - } 16.309 + if(freeLists->bigChunks[containerIdx] == NULL) 16.310 + foundChunk = searchChunk(freeLists, sizeRequested, containerIdx); 16.311 else 16.312 - { 16.313 - sizeConsumed = sizeOfFound; 16.314 - } 16.315 - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 16.316 - 16.317 + foundChunk = removeChunk(freeLists, containerIdx); 16.318 + 16.319 + //Mark as allocated 16.320 + foundChunk->prevChunkInFreeList = NULL; 16.321 + 16.322 //============================= MEASUREMENT STUFF ======================== 16.323 #ifdef MEAS__TIME_MALLOC 16.324 saveLowTimeStampCountInto( endStamp ); 16.325 addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 16.326 #endif 16.327 //======================================================================== 16.328 - 16.329 - //skip over the prolog by adding its size to the pointer return 16.330 - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 16.331 + 16.332 + //skip over the prolog by adding its size to the pointer return 16.333 + return foundChunk + 1; 16.334 } 16.335 16.336 -/*This is sequential code, meant to only be called from the Master, not from 16.337 - * any slave VPs. 16.338 - *Search down list, checking size by the nextHigherInMem pointer, to find 16.339 - * first chunk bigger than size needed. 16.340 - *Shave off the extra and make it into a new free-list element, hook it in 16.341 - * then return the address of the found element plus size of prolog. 16.342 - * 16.343 - * The difference to the regular malloc is, that all the allocated chunks are 16.344 - * aligned and padded to the size of a CACHE_LINE_SZ. Thus creating a new chunk 16.345 - * before the aligned chunk. 16.346 - */ 16.347 -void *VMS_int__malloc_aligned( size_t sizeRequested ) 16.348 - { MallocProlog *foundElem = NULL, *currElem, *newElem; 16.349 - ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; 16.350 - uint32 foundElemIsTopOfHeap; 16.351 - 16.352 - //============================= MEASUREMENT STUFF ======================== 16.353 - #ifdef MEAS__TIME_MALLOC 16.354 - uint32 startStamp, endStamp; 16.355 - saveLowTimeStampCountInto( startStamp ); 16.356 - #endif 16.357 - //======================================================================== 16.358 - 16.359 - //step up the size to be multiple of the cache line size 16.360 - sizeRequested = (sizeRequested + CACHE_LINE_SZ) & ~(CACHE_LINE_SZ-1); 16.361 - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 16.362 - 16.363 - while( currElem != NULL ) 16.364 - { //check if size of currElem is big enough 16.365 - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 16.366 - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 16.367 - if( amountExtra > 0 ) 16.368 - { 16.369 - //look if the found element is already aligned 16.370 - if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE_SZ-1)) == 0){ 16.371 - //found it, get out of loop 16.372 - foundElem = currElem; 16.373 - break; 16.374 - }else{ 16.375 - //find first aligned address and check if it's still big enough 16.376 - //check also if the space before the aligned address is big enough 16.377 - //for a new element 16.378 - void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE_SZ) & ~((uintptr_t)(CACHE_LINE_SZ-1))); 16.379 - prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; 16.380 - sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); 16.381 - amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); 16.382 - if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ 16.383 - //found suitable element 16.384 - //create new previous element and exit loop 16.385 - MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; 16.386 - 16.387 - //insert new element into free list 16.388 - if(currElem->nextChunkInFreeList != NULL) 16.389 - currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; 16.390 - newAlignedElem->prevChunkInFreeList = currElem; 16.391 - newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; 16.392 - currElem->nextChunkInFreeList = newAlignedElem; 16.393 - 16.394 - //set higherInMem and lowerInMem 16.395 - newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; 16.396 - foundElemIsTopOfHeap = currElem->nextHigherInMem == 16.397 - _VMSMasterEnv->freeListHead->nextHigherInMem; 16.398 - if(!foundElemIsTopOfHeap) 16.399 - currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; 16.400 - currElem->nextHigherInMem = newAlignedElem; 16.401 - newAlignedElem->nextLowerInMem = currElem; 16.402 - 16.403 - //Found new element leaving loop 16.404 - foundElem = newAlignedElem; 16.405 - break; 16.406 - } 16.407 - } 16.408 - 16.409 - } 16.410 - currElem = currElem->nextChunkInFreeList; 16.411 - } 16.412 - 16.413 - if( foundElem == NULL ) 16.414 - { ERROR("\nmalloc failed\n") 16.415 - return (void *)NULL; //indicates malloc failed 16.416 - } 16.417 - //Using a kludge to identify the element that is the top chunk in the 16.418 - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 16.419 - // save addr of start of heap in head's nextLowerInMem 16.420 - //Will handle top of Heap specially 16.421 - foundElemIsTopOfHeap = foundElem->nextHigherInMem == 16.422 - _VMSMasterEnv->freeListHead->nextHigherInMem; 16.423 - 16.424 - //before shave off and try to insert new elem, remove found elem 16.425 - //note, foundElem will never be the head, so always has valid prevChunk 16.426 - foundElem->prevChunkInFreeList->nextChunkInFreeList = 16.427 - foundElem->nextChunkInFreeList; 16.428 - if( foundElem->nextChunkInFreeList != NULL ) 16.429 - { foundElem->nextChunkInFreeList->prevChunkInFreeList = 16.430 - foundElem->prevChunkInFreeList; 16.431 - } 16.432 - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 16.433 - 16.434 - //if enough, turn extra into new elem & insert it 16.435 - if( amountExtra > 64 ) 16.436 - { //make new elem by adding to addr of curr elem then casting 16.437 - sizeConsumed = sizeof(MallocProlog) + sizeRequested; 16.438 - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 16.439 - newElem->nextHigherInMem = foundElem->nextHigherInMem; 16.440 - newElem->nextLowerInMem = foundElem; 16.441 - foundElem->nextHigherInMem = newElem; 16.442 - 16.443 - if( ! foundElemIsTopOfHeap ) 16.444 - { //there is no next higher for top of heap, so can't write to it 16.445 - newElem->nextHigherInMem->nextLowerInMem = newElem; 16.446 - } 16.447 - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 16.448 - } 16.449 - else 16.450 - { 16.451 - sizeConsumed = sizeOfFound; 16.452 - } 16.453 - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 16.454 - 16.455 - //============================= MEASUREMENT STUFF ======================== 16.456 - #ifdef MEAS__TIME_MALLOC 16.457 - saveLowTimeStampCountInto( endStamp ); 16.458 - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 16.459 - #endif 16.460 - //======================================================================== 16.461 - 16.462 - //skip over the prolog by adding its size to the pointer return 16.463 - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 16.464 - } 16.465 - 16.466 - 16.467 -/*This is sequential code -- only to be called from the Master 16.468 - * When free, subtract the size of prolog from pointer, then cast it to a 16.469 - * MallocProlog. Then check the nextLower and nextHigher chunks to see if 16.470 - * one or both are also free, and coalesce if so, and if neither free, then 16.471 - * add this one to free-list. 16.472 +/* 16.473 + * This is sequential code, meant to only be called from the Master, not from 16.474 + * any slave Slvs. 16.475 */ 16.476 void 16.477 VMS_int__free( void *ptrToFree ) 16.478 - { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; 16.479 - size_t sizeOfElem; 16.480 - uint32 lowerExistsAndIsFree, higherExistsAndIsFree; 16.481 - 16.482 + { 16.483 + 16.484 //============================= MEASUREMENT STUFF ======================== 16.485 #ifdef MEAS__TIME_MALLOC 16.486 int32 startStamp, endStamp; 16.487 saveLowTimeStampCountInto( startStamp ); 16.488 #endif 16.489 //======================================================================== 16.490 - 16.491 - if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || 16.492 - ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) 16.493 - { //outside the range of data owned by VMS's malloc, so do nothing 16.494 - return; 16.495 - } 16.496 - //subtract size of prolog to get pointer to prolog, then cast 16.497 - elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog)); 16.498 - sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree); 16.499 - 16.500 - if( elemToFree->prevChunkInFreeList != NULL ) 16.501 - { printf( "error: freeing same element twice!" ); exit(1); 16.502 - } 16.503 - 16.504 - _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; 16.505 - 16.506 - nextLowerElem = elemToFree->nextLowerInMem; 16.507 - nextHigherElem = elemToFree->nextHigherInMem; 16.508 - 16.509 - if( nextHigherElem == NULL ) 16.510 - higherExistsAndIsFree = FALSE; 16.511 - else //okay exists, now check if in the free-list by checking back ptr 16.512 - higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); 16.513 - 16.514 - if( nextLowerElem == NULL ) 16.515 - lowerExistsAndIsFree = FALSE; 16.516 - else //okay, it exists, now check if it's free 16.517 - lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); 16.518 - 16.519 - 16.520 - //now, know what exists and what's free 16.521 - if( lowerExistsAndIsFree ) 16.522 - { if( higherExistsAndIsFree ) 16.523 - { //both exist and are free, so coalesce all three 16.524 - //First, remove higher from free-list 16.525 - nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = 16.526 - nextHigherElem->nextChunkInFreeList; 16.527 - if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? 16.528 - nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = 16.529 - nextHigherElem->prevChunkInFreeList; 16.530 - //Now, fix-up sequence-in-mem list -- by side-effect, this also 16.531 - // changes size of the lower elem, which is still in free-list 16.532 - nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; 16.533 - if( nextHigherElem->nextHigherInMem != 16.534 - _VMSMasterEnv->freeListHead->nextHigherInMem ) 16.535 - nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; 16.536 - //notice didn't do anything to elemToFree -- it simply is no 16.537 - // longer reachable from any of the lists. Wonder if could be a 16.538 - // security leak because left valid addresses in it, 16.539 - // but don't care for now. 16.540 + 16.541 + MallocArrays* freeLists = _VMSMasterEnv->freeLists; 16.542 + MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1; 16.543 + uint32 containerIdx; 16.544 + 16.545 + //Check for free neighbors 16.546 + if(chunkToFree->nextLowerInMem) 16.547 + { 16.548 + if(chunkToFree->nextLowerInMem->prevChunkInFreeList != NULL) 16.549 + {//Chunk is not allocated 16.550 + extractChunk(chunkToFree->nextLowerInMem, freeLists); 16.551 + chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree); 16.552 } 16.553 - else 16.554 - { //lower is the only of the two that exists and is free, 16.555 - //In this case, no adjustment to free-list, just change mem-list. 16.556 - // By side-effect, changes size of the lower elem 16.557 - nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; 16.558 - if( elemToFree->nextHigherInMem != 16.559 - _VMSMasterEnv->freeListHead->nextHigherInMem ) 16.560 - elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; 16.561 + } 16.562 + if(chunkToFree->nextHigherInMem) 16.563 + { 16.564 + if(chunkToFree->nextHigherInMem->prevChunkInFreeList != NULL) 16.565 + {//Chunk is not allocated 16.566 + extractChunk(chunkToFree->nextHigherInMem, freeLists); 16.567 + chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem); 16.568 } 16.569 - } 16.570 + } 16.571 + 16.572 + size_t chunkSize = getChunkSize(chunkToFree); 16.573 + if(chunkSize < BIG_LOWER_BOUND) 16.574 + { 16.575 + containerIdx = (chunkSize/SMALL_CHUNK_SIZE)-1; 16.576 + if(containerIdx > SMALL_CHUNK_COUNT-1) 16.577 + containerIdx = SMALL_CHUNK_COUNT-1; 16.578 + insertChunk(chunkToFree, &freeLists->smallChunks[containerIdx]); 16.579 + } 16.580 else 16.581 - { //lower either doesn't exist or isn't free, so check higher 16.582 - if( higherExistsAndIsFree ) 16.583 - { //higher exists and is the only of the two free 16.584 - //First, in free-list, replace higher elem with the one to free 16.585 - elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; 16.586 - elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; 16.587 - elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; 16.588 - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 16.589 - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 16.590 - //Now chg mem-list. By side-effect, changes size of elemToFree 16.591 - elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; 16.592 - if( elemToFree->nextHigherInMem != 16.593 - _VMSMasterEnv->freeListHead->nextHigherInMem ) 16.594 - elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; 16.595 - } 16.596 - else 16.597 - { //neither lower nor higher is availabe to coalesce so add to list 16.598 - // this makes prev chunk ptr non-null, which indicates it's free 16.599 - elemToFree->nextChunkInFreeList = 16.600 - _VMSMasterEnv->freeListHead->nextChunkInFreeList; 16.601 - _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; 16.602 - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 16.603 - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 16.604 - elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; 16.605 - } 16.606 - } 16.607 + { 16.608 + containerIdx = getContainer(getChunkSize(chunkToFree)) - 1; 16.609 + insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]); 16.610 + if(containerIdx < 64) 16.611 + freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx; 16.612 + else 16.613 + freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64); 16.614 + } 16.615 + 16.616 //============================= MEASUREMENT STUFF ======================== 16.617 #ifdef MEAS__TIME_MALLOC 16.618 saveLowTimeStampCountInto( endStamp ); 16.619 @@ -373,82 +316,31 @@ 16.620 16.621 } 16.622 16.623 - 16.624 -/*Allocates memory from the external system -- higher overhead 16.625 - * 16.626 - *Because of Linux's malloc throwing bizarre random faults when malloc is 16.627 - * used inside a VMS virtual processor, have to pass this as a request and 16.628 - * have the core loop do it when it gets around to it -- will look for these 16.629 - * chores leftover from the previous animation of masterVP the next time it 16.630 - * goes to animate the masterVP -- so it takes two separate masterVP 16.631 - * animations, separated by work, to complete an external malloc or 16.632 - * external free request. 16.633 - * 16.634 - *Thinking core loop accepts signals -- just looks if signal-location is 16.635 - * empty or not -- 16.636 +/* 16.637 + * Designed to be called from the main thread outside of VMS, during init 16.638 */ 16.639 -void * 16.640 -VMS__malloc_in_ext( size_t sizeRequested ) 16.641 - { 16.642 - /* 16.643 - //This is running in the master, so no chance for multiple cores to be 16.644 - // competing for the core's flag. 16.645 - if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) 16.646 - { //something has already signalled to core loop, so save the signal 16.647 - // and look, next time master animated, to see if can send it. 16.648 - //Note, the addr to put a signal is in the coreloop's frame, so just 16.649 - // checks it each time through -- make it volatile to avoid GCC 16.650 - // optimizations -- it's a coreloop local var that only changes 16.651 - // after jumping away. The signal includes the addr to send the 16.652 - //return to -- even if just empty return completion-signal 16.653 - // 16.654 - //save the signal in some queue that the master looks at each time 16.655 - // it starts up -- one loc says if empty for fast common case -- 16.656 - //something like that -- want to hide this inside this call -- but 16.657 - // think this has to come as a request -- req handler gives procr 16.658 - // back to master loop, which gives it back to req handler at point 16.659 - // it sees that core loop has sent return signal. Something like 16.660 - // that. 16.661 - saveTheSignal 16.662 - 16.663 - } 16.664 - coreSigData->type = malloc; 16.665 - coreSigData->sizeToMalloc = sizeRequested; 16.666 - coreSigData->locToSignalCompletion = &figureOut; 16.667 - _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; 16.668 - */ 16.669 - //just risk system-stack faults until get this figured out 16.670 - return malloc( sizeRequested ); 16.671 - } 16.672 - 16.673 - 16.674 -/*Frees memory that was allocated in the external system -- higher overhead 16.675 - * 16.676 - *As noted in external malloc comment, this is clunky 'cause the free has 16.677 - * to be called in the core loop. 16.678 - */ 16.679 -void 16.680 -VMS__free_in_ext( void *ptrToFree ) 16.681 - { 16.682 - //just risk system-stack faults until get this figured out 16.683 - free( ptrToFree ); 16.684 - 16.685 - //TODO: fix this -- so 16.686 - } 16.687 - 16.688 - 16.689 -/*Designed to be called from the main thread outside of VMS, during init 16.690 - */ 16.691 -MallocProlog * 16.692 +MallocArrays * 16.693 VMS_ext__create_free_list() 16.694 - { MallocProlog *freeListHead, *firstChunk; 16.695 - 16.696 - //Note, this is running in the main thread -- all increases in malloc 16.697 - // mem and all frees of it must be done in this thread, with the 16.698 - // thread's original stack available 16.699 - freeListHead = malloc( sizeof(MallocProlog) ); 16.700 - firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); 16.701 - if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} 16.702 +{ 16.703 + //Initialize containers for small chunks and fill with zeros 16.704 + _VMSMasterEnv->freeLists = (MallocArrays*)malloc( sizeof(MallocArrays) ); 16.705 + MallocArrays *freeLists = _VMSMasterEnv->freeLists; 16.706 + 16.707 + freeLists->smallChunks = 16.708 + (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*)); 16.709 + memset((void*)freeLists->smallChunks, 16.710 + 0,SMALL_CHUNK_COUNT*sizeof(MallocProlog*)); 16.711 + 16.712 + //Calculate number of containers for big chunks 16.713 + uint32 container = getContainer(MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE)+1; 16.714 + freeLists->bigChunks = (MallocProlog**)malloc(container*sizeof(MallocProlog*)); 16.715 + memset((void*)freeLists->bigChunks,0,container*sizeof(MallocProlog*)); 16.716 + freeLists->containerCount = container; 16.717 + 16.718 + //Create first element in lastContainer 16.719 + MallocProlog *firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); 16.720 + if( firstChunk == NULL ) {printf("Can't allocate initial memory\n"); exit(1);} 16.721 + freeLists->memSpace = firstChunk; 16.722 16.723 //Touch memory to avoid page faults 16.724 void *ptr,*endPtr; 16.725 @@ -457,38 +349,47 @@ 16.726 { 16.727 *(char*)ptr = 0; 16.728 } 16.729 - 16.730 - freeListHead->prevChunkInFreeList = NULL; 16.731 - //Use this addr to free the heap when cleanup 16.732 - freeListHead->nextLowerInMem = firstChunk; 16.733 - //to identify top-of-heap elem, compare this addr to elem's next higher 16.734 - freeListHead->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 16.735 - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 16.736 - freeListHead->nextChunkInFreeList = firstChunk; 16.737 - 16.738 - firstChunk->nextChunkInFreeList = NULL; 16.739 - firstChunk->prevChunkInFreeList = freeListHead; 16.740 - //next Higher has to be set to top of chunk, so can calc size in malloc 16.741 - firstChunk->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 16.742 - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 16.743 - firstChunk->nextLowerInMem = NULL; //identifies as bott of heap 16.744 16.745 - _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet 16.746 - 16.747 - return freeListHead; 16.748 + firstChunk->nextLowerInMem = NULL; 16.749 + firstChunk->nextHigherInMem = (MallocProlog*)((uintptr_t)firstChunk + 16.750 + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog)); 16.751 + firstChunk->nextChunkInFreeList = NULL; 16.752 + //previous element in the queue is the container 16.753 + firstChunk->prevChunkInFreeList = &freeLists->bigChunks[container-2]; 16.754 + 16.755 + freeLists->bigChunks[container-2] = firstChunk; 16.756 + //Insert into bit search list 16.757 + if(container <= 65) 16.758 + { 16.759 + freeLists->bigChunksSearchVector[0] = ((uint64)1 << (container-2)); 16.760 + freeLists->bigChunksSearchVector[1] = 0; 16.761 + } 16.762 + else 16.763 + { 16.764 + freeLists->bigChunksSearchVector[0] = 0; 16.765 + freeLists->bigChunksSearchVector[1] = ((uint64)1 << (container-66)); 16.766 + } 16.767 + 16.768 + //Create dummy chunk to mark the top of stack this is of course 16.769 + //never freed 16.770 + MallocProlog *dummyChunk = firstChunk->nextHigherInMem; 16.771 + dummyChunk->nextHigherInMem = dummyChunk+1; 16.772 + dummyChunk->nextLowerInMem = NULL; 16.773 + dummyChunk->nextChunkInFreeList = NULL; 16.774 + dummyChunk->prevChunkInFreeList = NULL; 16.775 + 16.776 + return freeLists; 16.777 } 16.778 16.779 16.780 /*Designed to be called from the main thread outside of VMS, during cleanup 16.781 */ 16.782 void 16.783 -VMS_ext__free_free_list( MallocProlog *freeListHead ) 16.784 +VMS_ext__free_free_list( MallocArrays *freeLists ) 16.785 { 16.786 - //stashed a ptr to the one and only bug chunk malloc'd from OS in the 16.787 - // free list head's next lower in mem pointer 16.788 - free( freeListHead->nextLowerInMem ); 16.789 - 16.790 - //don't free the head -- it'll be in an array eventually -- free whole 16.791 - // array when all the free lists linked from it have already been freed 16.792 + free(freeLists->memSpace); 16.793 + free(freeLists->bigChunks); 16.794 + free(freeLists->smallChunks); 16.795 + 16.796 } 16.797
17.1 --- a/vmalloc.h Wed Feb 22 11:39:12 2012 -0800 17.2 +++ b/vmalloc.h Sun Mar 04 14:26:35 2012 -0800 17.3 @@ -14,6 +14,14 @@ 17.4 #include <inttypes.h> 17.5 #include "VMS_primitive_data_types.h" 17.6 17.7 +#define SMALL_CHUNK_SIZE 32 17.8 +#define SMALL_CHUNK_COUNT 4 17.9 +#define LOWER_BOUND 128 //Biggest chunk size that is created for the small chunks 17.10 +#define BIG_LOWER_BOUND 160 //Smallest chunk size that is created for the big chunks 17.11 + 17.12 +#define LOG54 0.3219280948873623 17.13 +#define LOG128 7 17.14 + 17.15 typedef struct _MallocProlog MallocProlog; 17.16 17.17 struct _MallocProlog 17.18 @@ -24,6 +32,18 @@ 17.19 MallocProlog *nextLowerInMem; 17.20 }; 17.21 //MallocProlog 17.22 + 17.23 + typedef struct MallocArrays MallocArrays; 17.24 + 17.25 + struct MallocArrays 17.26 + { 17.27 + MallocProlog **smallChunks; 17.28 + MallocProlog **bigChunks; 17.29 + uint64 bigChunksSearchVector[2]; 17.30 + void *memSpace; 17.31 + uint32 containerCount; 17.32 + }; 17.33 + //MallocArrays 17.34 17.35 typedef struct 17.36 { 17.37 @@ -34,57 +54,38 @@ 17.38 17.39 void * 17.40 VMS_int__malloc( size_t sizeRequested ); 17.41 +#define VMS_PI__malloc VMS_int__malloc 17.42 +#define VMS_WL__malloc VMS_int__malloc /*TODO: Bug -- Not protected!! */ 17.43 +#define VMS_App__malloc VMS_int__malloc /*TODO: Bug -- Not protected!! */ 17.44 17.45 void * 17.46 VMS_int__malloc_aligned( size_t sizeRequested ); 17.47 +#define VMS_PI__malloc_aligned VMS_int__malloc_aligned 17.48 +#define VMS_WL__malloc_aligned VMS_int__malloc_aligned 17.49 17.50 void 17.51 VMS_int__free( void *ptrToFree ); 17.52 +#define VMS_PI__free VMS_int__free 17.53 +#define VMS_WL__free VMS_int__free /*TODO: Bug -- Not protected!! */ 17.54 +#define VMS_App__free VMS_int__free /*TODO: Bug -- Not protected!! */ 17.55 17.56 -#define VMS_PI__malloc VMS_int__malloc 17.57 -#define VMS_PI__malloc_aligned VMS_int__malloc_aligned 17.58 -#define VMS_PI__free VMS_int__free 17.59 -/* For now, the PI is protected by master lock, so int malloc fine 17.60 -void * 17.61 -VMS_PI__malloc( size_t sizeRequested ); 17.62 17.63 -void * 17.64 -VMS_PI__malloc_aligned( size_t sizeRequested ); 17.65 - 17.66 -void 17.67 -VMS_PI__free( void *ptrToFree ); 17.68 -*/ 17.69 - 17.70 -//TODO: protect WL malloc from concurrency!! shared freelist can be corrupted 17.71 -#define VMS_WL__malloc VMS_int__malloc 17.72 -#define VMS_WL__malloc_aligned VMS_int__malloc_aligned 17.73 -#define VMS_WL__free VMS_int__free 17.74 -/* 17.75 -void * 17.76 -VMS_WL__malloc( size_t sizeRequested ); 17.77 - 17.78 -void * 17.79 -VMS_WL__malloc_aligned( size_t sizeRequested ); 17.80 - 17.81 -void 17.82 -VMS_WL__free( void *ptrToFree ); 17.83 -*/ 17.84 17.85 /*Allocates memory from the external system -- higher overhead 17.86 */ 17.87 void * 17.88 -VMS__malloc_in_ext( size_t sizeRequested ); 17.89 +VMS_ext__malloc_in_ext( size_t sizeRequested ); 17.90 17.91 /*Frees memory that was allocated in the external system -- higher overhead 17.92 */ 17.93 void 17.94 -VMS__free_in_ext( void *ptrToFree ); 17.95 +VMS_ext__free_in_ext( void *ptrToFree ); 17.96 17.97 17.98 -MallocProlog * 17.99 +MallocArrays * 17.100 VMS_ext__create_free_list(); 17.101 17.102 void 17.103 -VMS_ext__free_free_list( MallocProlog *freeListHead ); 17.104 +VMS_ext__free_free_list(MallocArrays *freeLists ); 17.105 17.106 #endif 17.107 \ No newline at end of file
18.1 --- a/vutilities.h Wed Feb 22 11:39:12 2012 -0800 18.2 +++ b/vutilities.h Sun Mar 04 14:26:35 2012 -0800 18.3 @@ -8,8 +8,8 @@ 18.4 */ 18.5 18.6 18.7 -#ifndef _UTILITIES_H 18.8 -#define _UTILITIES_H 18.9 +#ifndef _VUTILITIES_H 18.10 +#define _VUTILITIES_H 18.11 18.12 #include <string.h> 18.13 #include "VMS_primitive_data_types.h"
