Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 208:eaf7e4c58c9e Common_Ancestor
Create common_ancestor brch -- all branches will be closed, then new ones
created with this as the common ancestor of all branches -- it is incomplete!
only code that is common to all HW and Feat and FeatDev branches is in here
| author | Some Random Person <seanhalle@yahoo.com> |
|---|---|
| date | Wed, 22 Feb 2012 11:39:12 -0800 |
| parents | bc4cb994f114 |
| children | 0c83ea8adefc |
| files | .hgignore .hgtags CoreLoop.c MasterLoop.c VMS.h VMS__HW_dependent.c VMS__HW_dependent.h VMS__HW_dependent.s VMS__PI.c VMS__WL.c VMS__int.c VMS__startup_and_shutdown.c VMS_defs__HW_specific.h VMS_defs__lang_specific.h VMS_defs__main.h VMS_primitive_data_types.h __brch__Common_ancestor __brch__DEPRECATED_README probes.c probes.h vmalloc.c vmalloc.h vutilities.c vutilities.h |
| diffstat | 24 files changed, 3707 insertions(+), 29 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/.hgignore Wed Feb 22 11:39:12 2012 -0800 1.3 @@ -0,0 +1,3 @@ 1.4 +syntax: glob 1.5 + 1.6 +*.o
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/.hgtags Wed Feb 22 11:39:12 2012 -0800 2.3 @@ -0,0 +1,1 @@ 2.4 +9c3107044f86c36fea3a8f72f64910b1363555be Dec27_2010_about_to_add_sched_record
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/CoreLoop.c Wed Feb 22 11:39:12 2012 -0800 3.3 @@ -0,0 +1,214 @@ 3.4 +/* 3.5 + * Copyright 2010 OpenSourceStewardshipFoundation 3.6 + * 3.7 + * Licensed under BSD 3.8 + */ 3.9 + 3.10 + 3.11 +#include "VMS.h" 3.12 +#include "ProcrContext.h" 3.13 + 3.14 +#include <stdlib.h> 3.15 +#include <stdio.h> 3.16 +#include <time.h> 3.17 + 3.18 +#include <pthread.h> 3.19 +#include <sched.h> 3.20 + 3.21 +void *terminateCoreLoop(SlaveVP *currPr); 3.22 + 3.23 +/*This is the loop that runs in the OS Thread pinned to each core 3.24 + *Get virt procr from queue, 3.25 + * save state of current animator, then load in state of virt procr, using 3.26 + * jmp instr to switch the program-counter state -- making the virt procr 3.27 + * the new animator. 3.28 + *At some point, the virt procr will suspend itself by saving out its 3.29 + * animator state (stack ptr, frame ptr, program counter) and switching 3.30 + * back to the OS Thread's animator state, which means restoring the 3.31 + * stack and frame and jumping to the core loop start point. 3.32 + *This cycle then repeats, until a special shutdown virtual processor is 3.33 + * animated, which jumps to the end point at the bottom of core loop. 3.34 + */ 3.35 +void * 3.36 +coreLoop( void *paramsIn ) 3.37 + { 3.38 + ThdParams *coreLoopThdParams; 3.39 + int thisCoresIdx; 3.40 + SlaveVP *currPr; 3.41 + VMSQueueStruc *readyToAnimateQ; 3.42 + cpu_set_t coreMask; //has 1 in bit positions of allowed cores 3.43 + int errorCode; 3.44 + 3.45 + //work-stealing struc on stack to prevent false-sharing in cache-line 3.46 + volatile GateStruc gate; 3.47 + //preGateProgress, waitProgress, exitProgress, gateClosed; 3.48 + 3.49 + 3.50 + coreLoopThdParams = (ThdParams *)paramsIn; 3.51 + thisCoresIdx = coreLoopThdParams->coreNum; 3.52 + 3.53 + gate.gateClosed = FALSE; 3.54 + gate.preGateProgress = 0; 3.55 + gate.waitProgress = 0; 3.56 + gate.exitProgress = 0; 3.57 + _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup 3.58 + 3.59 + //wait until signalled that setup is complete 3.60 + pthread_mutex_lock( &suspendLock ); 3.61 + while( !(_VMSMasterEnv->setupComplete) ) 3.62 + { 3.63 + pthread_cond_wait( &suspend_cond, 3.64 + &suspendLock ); 3.65 + } 3.66 + pthread_mutex_unlock( &suspendLock ); 3.67 + 3.68 + //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); 3.69 + 3.70 + //set thread affinity 3.71 + //Linux requires pinning thd to core inside thread-function 3.72 + //Designate a core by a 1 in bit-position corresponding to the core 3.73 + CPU_ZERO(&coreMask); 3.74 + CPU_SET(coreLoopThdParams->coreNum,&coreMask); 3.75 + //coreMask = 1L << coreLoopThdParams->coreNum; 3.76 + 3.77 + pthread_t selfThd = pthread_self(); 3.78 + errorCode = 3.79 + pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); 3.80 + 3.81 + if(errorCode){ printf("\nset affinity failure\n"); exit(0); } 3.82 + 3.83 + 3.84 + //Save the return address in the SwitchVP function 3.85 + saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt)); 3.86 + 3.87 + 3.88 + while(1){ 3.89 + 3.90 + //Get virtual processor from queue 3.91 + //The Q must be a global, static volatile var, so not kept in reg, 3.92 + // which forces reloading the pointer after each jmp to this point 3.93 + readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 3.94 + 3.95 + #ifdef USE_WORK_STEALING 3.96 + //Alg for work-stealing designed to make common case fast. Comment 3.97 + // in stealer code explains. 3.98 + gate.preGateProgress++; 3.99 + if( gate.gateClosed ) 3.100 + { //now, set coreloop's progress, so stealer can see that core loop 3.101 + // has made it into the waiting area. 3.102 + gate.waitProgress = gate.preGateProgress; 3.103 + while( gate.gateClosed ) /*busy wait*/; 3.104 + } 3.105 + 3.106 + currPr = (SlaveVP *) readVMSQ( readyToAnimateQ ); 3.107 + 3.108 + //Set the coreloop's progress, so stealer can see it has made it out 3.109 + // of the protected area 3.110 + gate.exitProgress = gate.preGateProgress; 3.111 + #else 3.112 + currPr = (SlaveVP *) readVMSQ( readyToAnimateQ ); 3.113 + #endif 3.114 + 3.115 + if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 3.116 + else 3.117 + { 3.118 + //============================= MEASUREMENT STUFF ===================== 3.119 + #ifdef MEAS__TIME_MASTER_LOCK 3.120 + int32 startStamp, endStamp; 3.121 + saveLowTimeStampCountInto( startStamp ); 3.122 + #endif 3.123 + //===================================================================== 3.124 + int tries = 0; int gotLock = 0; 3.125 + while( currPr == NULL ) //if queue was empty, enter get masterLock loop 3.126 + { //queue was empty, so get master lock 3.127 + 3.128 + gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), 3.129 + UNLOCKED, LOCKED ); 3.130 + if( gotLock ) 3.131 + { //run own MasterVP -- jmps to coreLoops startPt when done 3.132 + currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 3.133 + if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 3.134 + { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); 3.135 + pthread_yield(); 3.136 + } 3.137 + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 3.138 + break; //end while -- have a VP to animate now 3.139 + } 3.140 + 3.141 + tries++; //if too many, means master on other core taking too long 3.142 + if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } 3.143 + } 3.144 + //============================= MEASUREMENT STUFF ===================== 3.145 + #ifdef MEAS__TIME_MASTER_LOCK 3.146 + saveLowTimeStampCountInto( endStamp ); 3.147 + addIntervalToHist( startStamp, endStamp, 3.148 + _VMSMasterEnv->masterLockLowTimeHist ); 3.149 + addIntervalToHist( startStamp, endStamp, 3.150 + _VMSMasterEnv->masterLockHighTimeHist ); 3.151 + #endif 3.152 + //===================================================================== 3.153 + 3.154 + } 3.155 + 3.156 + 3.157 + switchToVP(currPr); //The VPs return in here 3.158 + flushRegisters(); 3.159 + }//CoreLoop 3.160 + } 3.161 + 3.162 + 3.163 +void * 3.164 +terminateCoreLoop(SlaveVP *currPr){ 3.165 + //first free shutdown VP that jumped here -- it first restores the 3.166 + // coreloop's stack, so addr of currPr in stack frame is still correct 3.167 + VMS_int__dissipate_procr( currPr ); 3.168 + pthread_exit( NULL ); 3.169 +} 3.170 + 3.171 + 3.172 + 3.173 +#ifdef SEQUENTIAL 3.174 + 3.175 +//=========================================================================== 3.176 +/*This sequential version is exact same as threaded, except doesn't do the 3.177 + * pin-threads part, nor the wait until setup complete part. 3.178 + */ 3.179 +void * 3.180 +coreLoop_Seq( void *paramsIn ) 3.181 + { 3.182 + SlaveVP *currPr; 3.183 + VMSQueueStruc *readyToAnimateQ; 3.184 + 3.185 + ThdParams *coreLoopThdParams; 3.186 + int thisCoresIdx; 3.187 + 3.188 + coreLoopThdParams = (ThdParams *)paramsIn; 3.189 +// thisCoresIdx = coreLoopThdParams->coreNum; 3.190 + thisCoresIdx = 0; 3.191 + 3.192 + //Save the return address in the SwitchVP function 3.193 + saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt)); 3.194 + 3.195 + 3.196 + while(1){ 3.197 + //Get virtual processor from queue 3.198 + //_VMSWorkQ must be a global, static volatile var, so not kept in reg, 3.199 + // which forces reloading the pointer after each jmp to this point 3.200 + readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 3.201 + currPr = (SlaveVP *) readVMSQ( readyToAnimateQ ); 3.202 + if( currPr == NULL ) 3.203 + { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 3.204 + { printf("too many back to back MasterVP\n"); exit(1); } 3.205 + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 3.206 + 3.207 + currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 3.208 + } 3.209 + else 3.210 + _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 3.211 + 3.212 + 3.213 + switchToVP( currPr ); 3.214 + flushRegisters(); 3.215 + } 3.216 + } 3.217 +#endif
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/MasterLoop.c Wed Feb 22 11:39:12 2012 -0800 4.3 @@ -0,0 +1,373 @@ 4.4 +/* 4.5 + * Copyright 2010 OpenSourceStewardshipFoundation 4.6 + * 4.7 + * Licensed under BSD 4.8 + */ 4.9 + 4.10 + 4.11 + 4.12 +#include <stdio.h> 4.13 +#include <stddef.h> 4.14 + 4.15 +#include "VMS.h" 4.16 +#include "ProcrContext.h" 4.17 + 4.18 + 4.19 +//=========================================================================== 4.20 +void inline 4.21 +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 4.22 + SlaveVP *masterPr ); 4.23 + 4.24 +//=========================================================================== 4.25 + 4.26 + 4.27 + 4.28 +/*This code is animated by the virtual Master processor. 4.29 + * 4.30 + *Polls each sched slot exactly once, hands any requests made by a newly 4.31 + * done slave to the "request handler" plug-in function 4.32 + * 4.33 + *Any slots that need a virt procr assigned are given to the "schedule" 4.34 + * plug-in function, which tries to assign a virt procr (slave) to it. 4.35 + * 4.36 + *When all slots needing a processor have been given to the schedule plug-in, 4.37 + * a fraction of the procrs successfully scheduled are put into the 4.38 + * work queue, then a continuation of this function is put in, then the rest 4.39 + * of the virt procrs that were successfully scheduled. 4.40 + * 4.41 + *The first thing the continuation does is busy-wait until the previous 4.42 + * animation completes. This is because an (unlikely) continuation may 4.43 + * sneak through queue before previous continuation is done putting second 4.44 + * part of scheduled slaves in, which is the only race condition. 4.45 + * 4.46 + */ 4.47 + 4.48 +/*May 29, 2010 -- birth a Master during init so that first core loop to 4.49 + * start running gets it and does all the stuff for a newly born -- 4.50 + * from then on, will be doing continuation, but do suspension self 4.51 + * directly at end of master loop 4.52 + *So VMS__init just births the master virtual processor same way it births 4.53 + * all the others -- then does any extra setup needed and puts it into the 4.54 + * work queue. 4.55 + *However means have to make masterEnv a global static volatile the same way 4.56 + * did with readyToAnimateQ in core loop. -- for performance, put the 4.57 + * jump to the core loop directly in here, and have it directly jump back. 4.58 + * 4.59 + * 4.60 + *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this 4.61 + * avoids the suspected bug in the system stack that causes bizarre faults 4.62 + * at random places in the system code. 4.63 + * 4.64 + *So, this function is coupled to each of the MasterVPs, -- meaning this 4.65 + * function can't rely on a particular stack and frame -- each MasterVP that 4.66 + * animates this function has a different one. 4.67 + * 4.68 + *At this point, the masterLoop does not write itself into the queue anymore, 4.69 + * instead, the coreLoop acquires the masterLock when it has nothing to 4.70 + * animate, and then animates its own masterLoop. However, still try to put 4.71 + * several AppVPs into the queue to amortize the startup cost of switching 4.72 + * to the MasterVP. Note, don't have to worry about latency of requests much 4.73 + * because most requests generate work for same core -- only latency issue 4.74 + * is case when other cores starved and one core's requests generate work 4.75 + * for them -- so keep max in queue to 3 or 4.. 4.76 + */ 4.77 +void masterLoop( void *initData, SlaveVP *animatingPr ) 4.78 + { 4.79 + int32 slotIdx, numSlotsFilled; 4.80 + SlaveVP *schedVirtPr; 4.81 + SchedSlot *currSlot, **schedSlots; 4.82 + MasterEnv *masterEnv; 4.83 + VMSQueueStruc *readyToAnimateQ; 4.84 + 4.85 + Sched_Assigner slaveScheduler; 4.86 + RequestHandler requestHandler; 4.87 + void *semanticEnv; 4.88 + 4.89 + int32 thisCoresIdx; 4.90 + SlaveVP *masterPr; 4.91 + volatile SlaveVP *volatileMasterPr; 4.92 + 4.93 + volatileMasterPr = animatingPr; 4.94 + masterPr = (SlaveVP*)volatileMasterPr; //used to force re-define after jmp 4.95 + 4.96 + //First animation of each MasterVP will in turn animate this part 4.97 + // of setup code.. (VP creator sets up the stack as if this function 4.98 + // was called normally, but actually get here by jmp) 4.99 + //So, setup values about stack ptr, jmp pt and all that 4.100 + //masterPr->resumeInstrPtr = &&masterLoopStartPt; 4.101 + 4.102 + 4.103 + //Note, got rid of writing the stack and frame ptr up here, because 4.104 + // only one 4.105 + // core can ever animate a given MasterVP, so don't need to communicate 4.106 + // new frame and stack ptr to the MasterVP storage before a second 4.107 + // version of that MasterVP can get animated on a different core. 4.108 + //Also got rid of the busy-wait. 4.109 + 4.110 + 4.111 + //masterLoopStartPt: 4.112 + while(1){ 4.113 + 4.114 + //============================= MEASUREMENT STUFF ======================== 4.115 + #ifdef MEAS__TIME_MASTER 4.116 + //Total Master time includes one coreloop time -- just assume the core 4.117 + // loop time is same for Master as for AppVPs, even though it may be 4.118 + // smaller due to higher predictability of the fixed jmp. 4.119 + saveLowTimeStampCountInto( masterPr->startMasterTSCLow ); 4.120 + #endif 4.121 + //======================================================================== 4.122 + 4.123 + masterEnv = (MasterEnv*)_VMSMasterEnv; 4.124 + 4.125 + //GCC may optimize so doesn't always re-define from frame-storage 4.126 + masterPr = (SlaveVP*)volatileMasterPr; //just to make sure after jmp 4.127 + thisCoresIdx = masterPr->coreAnimatedBy; 4.128 + readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; 4.129 + schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; 4.130 + 4.131 + requestHandler = masterEnv->requestHandler; 4.132 + slaveScheduler = masterEnv->slaveSchedAssigner; 4.133 + semanticEnv = masterEnv->semanticEnv; 4.134 + 4.135 + 4.136 + //Poll each slot's Done flag 4.137 + numSlotsFilled = 0; 4.138 + for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++) 4.139 + { 4.140 + currSlot = schedSlots[ slotIdx ]; 4.141 + 4.142 + if( currSlot->workIsDone ) 4.143 + { 4.144 + currSlot->workIsDone = FALSE; 4.145 + currSlot->needsProcrAssigned = TRUE; 4.146 + 4.147 + //process requests from slave to master 4.148 + //====================== MEASUREMENT STUFF =================== 4.149 + #ifdef MEAS__TIME_PLUGIN 4.150 + int32 startStamp1, endStamp1; 4.151 + saveLowTimeStampCountInto( startStamp1 ); 4.152 + #endif 4.153 + //============================================================ 4.154 + (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv ); 4.155 + //====================== MEASUREMENT STUFF =================== 4.156 + #ifdef MEAS__TIME_PLUGIN 4.157 + saveLowTimeStampCountInto( endStamp1 ); 4.158 + addIntervalToHist( startStamp1, endStamp1, 4.159 + _VMSMasterEnv->reqHdlrLowTimeHist ); 4.160 + addIntervalToHist( startStamp1, endStamp1, 4.161 + _VMSMasterEnv->reqHdlrHighTimeHist ); 4.162 + #endif 4.163 + //============================================================ 4.164 + } 4.165 + if( currSlot->needsProcrAssigned ) 4.166 + { //give slot a new virt procr 4.167 + schedVirtPr = 4.168 + (*slaveScheduler)( semanticEnv, thisCoresIdx ); 4.169 + 4.170 + if( schedVirtPr != NULL ) 4.171 + { currSlot->procrAssignedToSlot = schedVirtPr; 4.172 + schedVirtPr->schedSlot = currSlot; 4.173 + currSlot->needsProcrAssigned = FALSE; 4.174 + numSlotsFilled += 1; 4.175 + 4.176 + writeVMSQ( schedVirtPr, readyToAnimateQ ); 4.177 + } 4.178 + } 4.179 + } 4.180 + 4.181 + 4.182 + #ifdef USE_WORK_STEALING 4.183 + //If no slots filled, means no more work, look for work to steal. 4.184 + if( numSlotsFilled == 0 ) 4.185 + { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr ); 4.186 + } 4.187 + #endif 4.188 + 4.189 + 4.190 + #ifdef MEAS__TIME_MASTER 4.191 + saveLowTimeStampCountInto( masterPr->endMasterTSCLow ); 4.192 + #endif 4.193 + 4.194 + masterSwitchToCoreLoop(animatingPr); 4.195 + flushRegisters(); 4.196 + }//MasterLoop 4.197 + 4.198 + 4.199 + } 4.200 + 4.201 + 4.202 + 4.203 +/*This has a race condition -- the coreloops are accessing their own queues 4.204 + * at the same time that this work-stealer on a different core is trying to 4.205 + */ 4.206 +void inline 4.207 +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 4.208 + SlaveVP *masterPr ) 4.209 + { 4.210 + SlaveVP *stolenPr; 4.211 + int32 coreIdx, i; 4.212 + VMSQueueStruc *currQ; 4.213 + 4.214 + stolenPr = NULL; 4.215 + coreIdx = masterPr->coreAnimatedBy; 4.216 + for( i = 0; i < NUM_CORES -1; i++ ) 4.217 + { 4.218 + if( coreIdx >= NUM_CORES -1 ) 4.219 + { coreIdx = 0; 4.220 + } 4.221 + else 4.222 + { coreIdx++; 4.223 + } 4.224 + currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 4.225 + if( numInVMSQ( currQ ) > 0 ) 4.226 + { stolenPr = readVMSQ (currQ ); 4.227 + break; 4.228 + } 4.229 + } 4.230 + 4.231 + if( stolenPr != NULL ) 4.232 + { currSlot->procrAssignedToSlot = stolenPr; 4.233 + stolenPr->schedSlot = currSlot; 4.234 + currSlot->needsProcrAssigned = FALSE; 4.235 + 4.236 + writeVMSQ( stolenPr, readyToAnimateQ ); 4.237 + } 4.238 + } 4.239 + 4.240 +/*This algorithm makes the common case fast. Make the coreloop passive, 4.241 + * and show its progress. Make the stealer control a gate that coreloop 4.242 + * has to pass. 4.243 + *To avoid interference, only one stealer at a time. Use a global 4.244 + * stealer-lock. 4.245 + * 4.246 + *The pattern is based on a gate -- stealer shuts the gate, then monitors 4.247 + * to be sure any already past make it all the way out, before starting. 4.248 + *So, have a "progress" measure just before the gate, then have two after it, 4.249 + * one is in a "waiting room" outside the gate, the other is at the exit. 4.250 + *Then, the stealer first shuts the gate, then checks the progress measure 4.251 + * outside it, then looks to see if the progress measure at the exit is the 4.252 + * same. If yes, it knows the protected area is empty 'cause no other way 4.253 + * to get in and the last to get in also exited. 4.254 + *If the progress measure at the exit is not the same, then the stealer goes 4.255 + * into a loop checking both the waiting-area and the exit progress-measures 4.256 + * until one of them shows the same as the measure outside the gate. Might 4.257 + * as well re-read the measure outside the gate each go around, just to be 4.258 + * sure. It is guaranteed that one of the two will eventually match the one 4.259 + * outside the gate. 4.260 + * 4.261 + *Here's an informal proof of correctness: 4.262 + *The gate can be closed at any point, and have only four cases: 4.263 + * 1) coreloop made it past the gate-closing but not yet past the exit 4.264 + * 2) coreloop made it past the pre-gate progress update but not yet past 4.265 + * the gate, 4.266 + * 3) coreloop is right before the pre-gate update 4.267 + * 4) coreloop is past the exit and far from the pre-gate update. 4.268 + * 4.269 + * Covering the cases in reverse order, 4.270 + * 4) is not a problem -- stealer will read pre-gate progress, see that it 4.271 + * matches exit progress, and the gate is closed, so stealer can proceed. 4.272 + * 3) stealer will read pre-gate progress just after coreloop updates it.. 4.273 + * so stealer goes into a loop until the coreloop causes wait-progress 4.274 + * to match pre-gate progress, so then stealer can proceed 4.275 + * 2) same as 3.. 4.276 + * 1) stealer reads pre-gate progress, sees that it's different than exit, 4.277 + * so goes into loop until exit matches pre-gate, now it knows coreloop 4.278 + * is not in protected and cannot get back in, so can proceed. 4.279 + * 4.280 + *Implementation for the stealer: 4.281 + * 4.282 + *First, acquire the stealer lock -- only cores with no work to do will 4.283 + * compete to steal, so not a big performance penalty having only one -- 4.284 + * will rarely have multiple stealers in a system with plenty of work -- and 4.285 + * in a system with little work, it doesn't matter. 4.286 + * 4.287 + *Note, have single-reader, single-writer pattern for all variables used to 4.288 + * communicate between stealer and victims 4.289 + * 4.290 + *So, scan the queues of the core loops, until find non-empty. Each core 4.291 + * has its own list that it scans. The list goes in order from closest to 4.292 + * furthest core, so it steals first from close cores. Later can add 4.293 + * taking info from the app about overlapping footprints, and scan all the 4.294 + * others then choose work with the most footprint overlap with the contents 4.295 + * of this core's cache. 4.296 + * 4.297 + *Now, have a victim want to take work from. So, shut the gate in that 4.298 + * coreloop, by setting the "gate closed" var on its stack to TRUE. 4.299 + *Then, read the core's pre-gate progress and compare to the core's exit 4.300 + * progress. 4.301 + *If same, can proceed to take work from the coreloop's queue. When done, 4.302 + * write FALSE to gate closed var. 4.303 + *If different, then enter a loop that reads the pre-gate progress, then 4.304 + * compares to exit progress then to wait progress. When one of two 4.305 + * matches, proceed. Take work from the coreloop's queue. When done, 4.306 + * write FALSE to the gate closed var. 4.307 + * 4.308 + */ 4.309 +void inline 4.310 +gateProtected_stealWorkInto( SchedSlot *currSlot, 4.311 + VMSQueueStruc *myReadyToAnimateQ, 4.312 + SlaveVP *masterPr ) 4.313 + { 4.314 + SlaveVP *stolenPr; 4.315 + int32 coreIdx, i, haveAVictim, gotLock; 4.316 + VMSQueueStruc *victimsQ; 4.317 + 4.318 + volatile GateStruc *vicGate; 4.319 + int32 coreMightBeInProtected; 4.320 + 4.321 + 4.322 + 4.323 + //see if any other cores have work available to steal 4.324 + haveAVictim = FALSE; 4.325 + coreIdx = masterPr->coreAnimatedBy; 4.326 + for( i = 0; i < NUM_CORES -1; i++ ) 4.327 + { 4.328 + if( coreIdx >= NUM_CORES -1 ) 4.329 + { coreIdx = 0; 4.330 + } 4.331 + else 4.332 + { coreIdx++; 4.333 + } 4.334 + victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 4.335 + if( numInVMSQ( victimsQ ) > 0 ) 4.336 + { haveAVictim = TRUE; 4.337 + vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ]; 4.338 + break; 4.339 + } 4.340 + } 4.341 + if( !haveAVictim ) return; //no work to steal, exit 4.342 + 4.343 + //have a victim core, now get the stealer-lock 4.344 + gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock), 4.345 + UNLOCKED, LOCKED ); 4.346 + if( !gotLock ) return; //go back to core loop, which will re-start master 4.347 + 4.348 + 4.349 + //====== Start Gate-protection ======= 4.350 + vicGate->gateClosed = TRUE; 4.351 + coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress; 4.352 + while( coreMightBeInProtected ) 4.353 + { //wait until sure 4.354 + if( vicGate->preGateProgress == vicGate->waitProgress ) 4.355 + coreMightBeInProtected = FALSE; 4.356 + if( vicGate->preGateProgress == vicGate->exitProgress ) 4.357 + coreMightBeInProtected = FALSE; 4.358 + } 4.359 + 4.360 + stolenPr = readVMSQ ( victimsQ ); 4.361 + 4.362 + vicGate->gateClosed = FALSE; 4.363 + //======= End Gate-protection ======= 4.364 + 4.365 + 4.366 + if( stolenPr != NULL ) //victim could have been in protected and taken 4.367 + { currSlot->procrAssignedToSlot = stolenPr; 4.368 + stolenPr->schedSlot = currSlot; 4.369 + currSlot->needsProcrAssigned = FALSE; 4.370 + 4.371 + writeVMSQ( stolenPr, myReadyToAnimateQ ); 4.372 + } 4.373 + 4.374 + //unlock the work stealing lock 4.375 + _VMSMasterEnv->workStealingLock = UNLOCKED; 4.376 + }
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/VMS.h Wed Feb 22 11:39:12 2012 -0800 5.3 @@ -0,0 +1,377 @@ 5.4 +/* 5.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 5.6 + * Licensed under GNU General Public License version 2 5.7 + * 5.8 + * Author: seanhalle@yahoo.com 5.9 + * 5.10 + */ 5.11 + 5.12 +#ifndef _VMS_H 5.13 +#define _VMS_H 5.14 +#define _GNU_SOURCE 5.15 + 5.16 +#include "VMS_primitive_data_types.h" 5.17 +#include "C_Libraries/DynArray/DynArray.h" 5.18 +#include "C_Libraries/Hash_impl/PrivateHash.h" 5.19 +#include "C_Libraries/Histogram/Histogram.h" 5.20 +#include "C_Libraries/Queue_impl/PrivateQueue.h" 5.21 +#include "vmalloc.h" 5.22 + 5.23 +#include <pthread.h> 5.24 +#include <sys/time.h> 5.25 + 5.26 +//================= Defines: included from separate files ================= 5.27 +// 5.28 +// Note: ALL defines are in other files, none are in here 5.29 +// 5.30 +#include "VMS_defs__main.h" 5.31 + 5.32 + 5.33 +//================================ Typedefs ================================= 5.34 +// 5.35 +typedef unsigned long long TSCount; 5.36 +typedef union 5.37 + { uint32 lowHigh[2]; 5.38 + uint64 longVal; 5.39 + } 5.40 +TSCountLowHigh; 5.41 + 5.42 +typedef struct _SchedSlot SchedSlot; 5.43 +typedef struct _VMSReqst VMSReqst; 5.44 +typedef struct _SlaveVP SlaveVP; 5.45 +typedef struct _MasterVP MasterVP; 5.46 +typedef struct _IntervalProbe IntervalProbe; 5.47 +typedef struct _GateStruc GateStruc; 5.48 + 5.49 + 5.50 +typedef SlaveVP * (*Sched_Assigner) ( void *, int ); //semEnv, coreIdx 5.51 +typedef void (*RequestHandler) ( SlaveVP *, void * ); //prWReqst, semEnv 5.52 +typedef void (*TopLevelFnPtr) ( void *, SlaveVP * ); //initData, animPr 5.53 +typedef void TopLevelFn ( void *, SlaveVP * ); //initData, animPr 5.54 +typedef void (*ResumeVPFnPtr) ( SlaveVP *, void * ); 5.55 + 5.56 +//============================= Statistics ================================== 5.57 + 5.58 +inline TSCount getTSCount(); 5.59 + 5.60 +//============= Request Related =========== 5.61 +// 5.62 + 5.63 +enum VMSReqstType //avoid starting enums at 0, for debug reasons 5.64 + { 5.65 + semantic = 1, 5.66 + createReq, 5.67 + dissipate, 5.68 + VMSSemantic //goes with VMSSemReqst below 5.69 + }; 5.70 + 5.71 +struct _VMSReqst 5.72 + { 5.73 + enum VMSReqstType reqType;//used for dissipate and in future for IO requests 5.74 + void *semReqData; 5.75 + 5.76 + VMSReqst *nextReqst; 5.77 + }; 5.78 +//VMSReqst 5.79 + 5.80 +enum VMSSemReqstType //These are equivalent to semantic requests, but for 5.81 + { // VMS's services available directly to app, like OS 5.82 + createProbe = 1, // and probe services -- like a VMS-wide built-in lang 5.83 + openFile, 5.84 + otherIO 5.85 + }; 5.86 + 5.87 +typedef struct 5.88 + { enum VMSSemReqstType reqType; 5.89 + SlaveVP *requestingPr; 5.90 + char *nameStr; //for create probe 5.91 + } 5.92 + VMSSemReq; 5.93 + 5.94 + 5.95 +//==================== Core data structures =================== 5.96 + 5.97 +struct _SchedSlot 5.98 + { 5.99 + int workIsDone; 5.100 + int needsProcrAssigned; 5.101 + SlaveVP *procrAssignedToSlot; 5.102 + }; 5.103 +//SchedSlot 5.104 + 5.105 +/*WARNING: re-arranging this data structure could cause VP switching 5.106 + * assembly code to fail -- hard-codes offsets of fields 5.107 + */ 5.108 +struct _SlaveVP 5.109 + { int procrID; //for debugging -- count up each time create 5.110 + int coreAnimatedBy; 5.111 + void *startOfStack; 5.112 + void *stackPtr; 5.113 + void *framePtr; 5.114 + void *resumeInstrPtr; 5.115 + 5.116 + void *coreLoopStartPt; //allows proto-runtime to be linked later 5.117 + void *coreLoopFramePtr; //restore before jmp back to core loop 5.118 + void *coreLoopStackPtr; //restore before jmp back to core loop 5.119 + 5.120 + SchedSlot *schedSlot; 5.121 + VMSReqst *requests; 5.122 + 5.123 + void *semanticData; //this livesUSE_GNU here for the life of VP 5.124 + void *dataRetFromReq;//values returned from plugin to VP go here 5.125 + 5.126 + //=========== MEASUREMENT STUFF ========== 5.127 + #ifdef MEAS__TIME_STAMP_SUSP 5.128 + uint32 preSuspTSCLow; 5.129 + uint32 postSuspTSCLow; 5.130 + #endif 5.131 + #ifdef MEAS__TIME_MASTER /* in SlaveVP because multiple masterVPs*/ 5.132 + uint32 startMasterTSCLow;USE_GNU 5.133 + uint32 endMasterTSCLow; 5.134 + #endif 5.135 + #ifdef MEAS__TIME_2011_SYS 5.136 + TSCountLowHigh startSusp; 5.137 + uint64 totalSuspCycles; 5.138 + uint32 numGoodSusp; 5.139 + #endif 5.140 + //======================================== 5.141 + 5.142 + float64 createPtInSecs; //have space but don't use on some configs 5.143 + }; 5.144 +//SlaveVP 5.145 + 5.146 + 5.147 +/*WARNING: re-arranging this data structure could cause VP-switching 5.148 + * assembly code to fail -- hard-codes offsets of fields 5.149 + * (because -O3 messes with things otherwise) 5.150 + */ 5.151 +typedef struct 5.152 + { 5.153 + union{ //adds padding to put masterLock on its own cache-line to elim 5.154 + // false sharing (masterLock is most-accessed var in VMS) 5.155 + volatile int32 masterLock; 5.156 + char padding[CACHE_LINE_SZ]; 5.157 + } masterLockUnion; 5.158 + Sched_Assigner slaveSchedAssigner; 5.159 + RequestHandler requestHandler; 5.160 + 5.161 + SchedSlot ***allSchedSlots; 5.162 + VMSQueueStruc **readyToAnimateQs; 5.163 + SlaveVP **masterVPs; 5.164 + 5.165 + void *semanticEnv; 5.166 + void *OSEventStruc; //for future, when add I/O to BLIS 5.167 + MallocArrays *freeLists; 5.168 + int32 amtOfOutstandingMem; //total currently allocated 5.169 + 5.170 + void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop 5.171 + 5.172 + int32 setupComplete; 5.173 + //int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP 5.174 + GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal 5.175 + int32 workStealingLock; 5.176 + 5.177 + int32 numVPsCreated; //gives ordering to processor creation 5.178 + 5.179 + //=========== MEASUREMENT STUFF ============= 5.180 + IntervalProbe **intervalProbes; 5.181 + PrivDynArrayInfo *dynIntervalProbesInfo; 5.182 + HashTable *probeNameHashTbl; 5.183 + int32 masterCreateProbeID; 5.184 + float64 createPtInSecs; 5.185 + Histogram **measHists; 5.186 + PrivDynArrayInfo *measHistsInfo; 5.187 + #ifdef MEAS__TIME_PLUGIN 5.188 + Histogram *reqHdlrLowTimeHist; 5.189 + Histogram *reqHdlrHighTimeHist; 5.190 + #endif 5.191 + #ifdef MEAS__TIME_MALLOC 5.192 + Histogram *mallocTimeHist; 5.193 + Histogram *freeTimeHist; 5.194 + #endif 5.195 + #ifdef MEAS__TIME_MASTER_LOCK 5.196 + Histogram *masterLockLowTimeHist; 5.197 + Histogram *masterLockHighTimeHist; 5.198 + #endif 5.199 + #ifdef MEAS__TIME_2011_SYS 5.200 + TSCountLowHigh startMaster; 5.201 + uint64 totalMasterCycles; 5.202 + uint32 numMasterAnimations; 5.203 + TSCountLowHigh startReqHdlr; 5.204 + uint64 totalPluginCycles; 5.205 + uint32 numPluginAnimations; 5.206 + uint64 cyclesTillStartMasterLoop; 5.207 + TSCountLowHigh endMasterLoop; 5.208 + #endif 5.209 + //========================================== 5.210 + } 5.211 +MasterEnv; 5.212 + 5.213 +//========================= Extra Stuff Data Strucs ======================= 5.214 +typedef struct 5.215 + { 5.216 + 5.217 + } 5.218 +VMSExcp; 5.219 + 5.220 +struct _GateStruc 5.221 + { 5.222 + int32 gateClosed; 5.223 + int32 preGateProgress; 5.224 + int32 waitProgress; 5.225 + int32 exitProgress; 5.226 + }; 5.227 +//GateStruc 5.228 + 5.229 +//======================= OS Thread related =============================== 5.230 + 5.231 +void * coreLoop( void *paramsIn ); //standard PThreads fn prototype 5.232 +void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype 5.233 +void masterLoop( void *initData, SlaveVP *masterVP ); 5.234 + 5.235 + 5.236 +typedef struct 5.237 + { 5.238 + void *endThdPt; 5.239 + unsigned int coreNum; 5.240 + } 5.241 +ThdParams; 5.242 + 5.243 +pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state 5.244 +ThdParams *coreLoopThdParams [ NUM_CORES ]; 5.245 +pthread_mutex_t suspendLock; 5.246 +pthread_cond_t suspend_cond; 5.247 + 5.248 + 5.249 + 5.250 +//============================= Global Vars ================================ 5.251 + 5.252 +volatile MasterEnv *_VMSMasterEnv __align_to_cacheline__; 5.253 + 5.254 + 5.255 + 5.256 + 5.257 +//========================= Function Prototypes =========================== 5.258 + 5.259 + 5.260 +//========== Setup and shutdown ========== 5.261 +void 5.262 +VMS_int__init(); 5.263 + 5.264 +Fix seed-procr creation -- put box around language, have lang register stuff 5.265 + with VMS. 5.266 + have main program explicitly INIT Lang! -- makes more sense to 5.267 + C programmers -- makes it clear that there's a transition. 5.268 +(might need to have the pthreads remain waiting for 5.269 + cond until work is scheduled) 5.270 +Have main do call to tell language to perform work -- like did with DKU 5.271 + 5.272 +Ex: "HWSim__run_a_simulation(netlist, paramBag);" 5.273 + "processID = SSR__run_program(seed_fn, seedData); " 5.274 + "SSR__Wait_for_program_to_end(processID);" 5.275 + "SSR__run_program_and_wait_till_it_ends(seed_fn, seedData);" 5.276 + 5.277 + allows multiple languages to be started, and programs run in several, 5.278 + overlapped, or one program to be run that uses multiple langs..? 5.279 + So, each program is in separate directory: 5.280 + "HWSim_ArchDef__PingPong" "SSR_Program__Blocked_Matrix_Mult" 5.281 + 5.282 + Those programs can talk to each other, via VMS, by handles they each 5.283 + return 5.284 + "processIDs[0] = SSR__run_program(seed_fn1, seedData1);" 5.285 + "processIDs[1] = SSR__run_program(seed_fn2, seedData2);" 5.286 + "SSR__link_programs(processIDs, 2);" 5.287 +or even 5.288 + "processIDs[0] = Vthread__run_program(seed_fn1, seedData1);" 5.289 + "processIDs[1] = SSR__run_program(seed_fn2, seedData2);" 5.290 + "VMS__link_programs(processIDs, 2);" 5.291 + Then, the programs just know they sync with other prog, but use own 5.292 + lang's sync constructs -- VMS uses message system to establish tie-pt, 5.293 + each lang defines what a tie-point means to it.. (work with the 5.294 + diff semantics?) 5.295 +void 5.296 +VMS_WL__start_the_work_then_wait_until_done(); 5.297 + 5.298 +void 5.299 +VMS_int__shutdown(); 5.300 + 5.301 +void 5.302 +VMS_int__cleanup_at_end_of_shutdown(); 5.303 + 5.304 + 5.305 +//============== =============== 5.306 + 5.307 +inline SlaveVP * 5.308 +VMS_int__create_procr( TopLevelFnPtr fnPtr, void *dataParam ); 5.309 + 5.310 +inline void 5.311 +VMS_int__point_slave_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr, 5.312 + void *dataParam); 5.313 + 5.314 +void 5.315 +VMS_int__save_return_addr_into_ptd_to_loc(void *ptrToReturnAddrHoldingLoc); 5.316 + 5.317 +void 5.318 +VMS_int__write_return_addr_from_ptd_to_loc(void *ptrToReturnAddrHoldingLoc); 5.319 + 5.320 +void 5.321 +VMS_int__dissipate_procr( SlaveVP *procrToDissipate ); 5.322 + 5.323 + //Use this to create processor inside entry point & other places outside 5.324 + // the VMS system boundary (IE, not run in slave nor Master) 5.325 +SlaveVP * 5.326 +VMS_ext__create_procr( TopLevelFnPtr fnPtr, void *dataParam ); 5.327 + 5.328 +void 5.329 +VMS_ext__dissipate_procr( SlaveVP *procrToDissipate ); 5.330 + 5.331 +void 5.332 +VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstPr, VMSExcp *excpData ); 5.333 + 5.334 +void * 5.335 +VMS_WL__give_sem_env_for( SlaveVP *animPr ); 5.336 + 5.337 +//============== Request Related =============== 5.338 + 5.339 +void 5.340 +VMS_int__suspend_procr( SlaveVP *callingPr ); 5.341 + 5.342 +inline void 5.343 +VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, SlaveVP *callingPr ); 5.344 + 5.345 +inline void 5.346 +VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingPr ); 5.347 + 5.348 +void 5.349 +VMS_WL__send_create_procr_req( void *semReqData, SlaveVP *reqstingPr ); 5.350 + 5.351 +void inline 5.352 +VMS_WL__send_dissipate_req( SlaveVP *prToDissipate ); 5.353 + 5.354 +inline void 5.355 +VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingPr ); 5.356 + 5.357 +VMSReqst * 5.358 +VMS_PI__take_next_request_out_of( SlaveVP *procrWithReq ); 5.359 + 5.360 +inline void * 5.361 +VMS_PI__take_sem_reqst_from( VMSReqst *req ); 5.362 + 5.363 +void inline 5.364 +VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingPr, void *semEnv, 5.365 + ResumeVPFnPtr resumePrFnPtr ); 5.366 + 5.367 +//======================== MEASUREMENT ====================== 5.368 +uint64 5.369 +VMS_WL__give_num_plugin_cycles(); 5.370 +uint32 5.371 +VMS_WL__give_num_plugin_animations(); 5.372 + 5.373 + 5.374 + 5.375 +#include "VMS__HW_dependent.h" 5.376 +#include "probes.h" 5.377 +#include "vutilities.h" 5.378 + 5.379 +#endif /* _VMS_H */ 5.380 +
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/VMS__HW_dependent.c Wed Feb 22 11:39:12 2012 -0800 6.3 @@ -0,0 +1,47 @@ 6.4 +/* 6.5 + * This File contains all hardware dependent C code. 6.6 + */ 6.7 + 6.8 + 6.9 +#include "VMS.h" 6.10 + 6.11 +/*Set up the stack with __cdecl structure on it 6.12 + * Except doing a trick for 64 bits, where put top-level fn pointer on 6.13 + * stack, then call an assembly helper that copies it into a reg and 6.14 + * jumps to it. So, set the resumeInstrPtr to the helper-assembly. 6.15 + *No need to save registers on old stack frame, because there's no old 6.16 + * animator state to return to 6.17 + */ 6.18 +VMS_int__point_slave_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr, 6.19 + void *dataParam) 6.20 + { void *stackPtr; 6.21 + 6.22 +// Start of Hardware dependent part 6.23 + 6.24 + //Set slave's instr pointer to a helper Fn that copies params from stack 6.25 + slaveVP->resumeInstrPtr = (TopLevelFnPtr)&startUpTopLevelFn; 6.26 + 6.27 + //fnPtr takes two params -- void *dataParam & void *animProcr 6.28 + // Stack grows *down*, so start it at highest stack addr, minus room 6.29 + // for 2 params + return addr. 6.30 + stackPtr = 6.31 + (void *)slaveVP->startOfStack + VIRT_PROCR_STACK_SIZE - 4*sizeof(void*); 6.32 + 6.33 + //setup __cdecl on stack 6.34 + //Normally, return Addr is in loc pointed to by stackPtr, but doing a 6.35 + // trick for 64 bit arch, where put ptr to top-level fn there instead, 6.36 + // and set resumeInstrPtr to a helper-fn that copies the top-level 6.37 + // fn ptr and params into registers. 6.38 + //Then, dataParam is at stackPtr + 8 bytes, & animating SlaveVP above 6.39 + *((SlaveVP**)stackPtr + 2 ) = slaveVP; //rightmost param 6.40 + *((void**)stackPtr + 1 ) = dataParam; //next param to left 6.41 + *((void**)stackPtr) = (void*)fnPtr; //copied to reg by helper Fn 6.42 + 6.43 + 6.44 +// end of Hardware dependent part 6.45 + 6.46 + //core loop will switch to stack & frame pointers stored in slave, 6.47 + // suspend will save processor's stack and frame into slave 6.48 + slaveVP->stackPtr = slaveVP->startOfStack; 6.49 + slaveVP->framePtr = slaveVP->startOfStack; 6.50 + } 6.51 \ No newline at end of file
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/VMS__HW_dependent.h Wed Feb 22 11:39:12 2012 -0800 7.3 @@ -0,0 +1,33 @@ 7.4 +/* 7.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 7.6 + * Licensed under GNU General Public License version 2 7.7 + * 7.8 + * Author: seanhalle@yahoo.com 7.9 + * 7.10 + */ 7.11 + 7.12 +#ifndef _ProcrContext_H 7.13 +#define _ProcrContext_H 7.14 +#define _GNU_SOURCE 7.15 + 7.16 +void saveCoreLoopReturnAddr(void **returnAddress); 7.17 + 7.18 +void switchToVP(SlaveVP *nextProcr); 7.19 + 7.20 +void switchToCoreLoop(SlaveVP *nextProcr); 7.21 + 7.22 +void masterSwitchToCoreLoop(SlaveVP *nextProcr); 7.23 + 7.24 +void startUpTopLevelFn(); 7.25 + 7.26 +void *asmTerminateCoreLoop(SlaveVP *currPr); 7.27 + 7.28 +#define flushRegisters() \ 7.29 + asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") 7.30 + 7.31 +inline SlaveVP * 7.32 +create_procr_helper( SlaveVP *newPr, TopLevelFnPtr fnPtr, 7.33 + void *dataParam, void *stackLocs ); 7.34 + 7.35 +#endif /* _ProcrContext_H */ 7.36 +
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/VMS__HW_dependent.s Wed Feb 22 11:39:12 2012 -0800 8.3 @@ -0,0 +1,167 @@ 8.4 +.data 8.5 + 8.6 + 8.7 +.text 8.8 + 8.9 +//Save return label address for the coreLoop to pointer 8.10 +//Arguments: Pointer to variable holding address 8.11 +.globl saveCoreLoopReturnAddr 8.12 +saveCoreLoopReturnAddr: 8.13 + movq $coreLoopReturn, %rcx #load label address 8.14 + movq %rcx, (%rdi) #save address to pointer 8.15 + ret 8.16 + 8.17 + 8.18 +//Trick for 64 bit arch -- copies args from stack into regs, then does jmp to 8.19 +// the top-level function, which was pointed to by the stack-ptr 8.20 +.globl startUpTopLevelFn 8.21 +startUpTopLevelFn: 8.22 + movq %rdi , %rsi #get second argument from first argument of switchVP 8.23 + movq 0x08(%rsp), %rdi #get first argument from stack 8.24 + movq (%rsp) , %rax #get top-level function's addr from stack 8.25 + jmp *%rax #jump to the top-level function 8.26 + 8.27 +//Switches form CoreLoop to VP ether a normal VP or the Master Loop 8.28 +//switch to virt procr's stack and frame ptr then jump to virt procr fn 8.29 +/* SlaveVP offsets: 8.30 + * 0x10 stackPtr 8.31 + * 0x18 framePtr 8.32 + * 0x20 resumeInstrPtr 8.33 + * 0x30 coreLoopFramePtr 8.34 + * 0x38 coreLoopStackPtr 8.35 + * 8.36 + * _VMSMasterEnv offsets: 8.37 + * 0x48 coreLoopReturnPt 8.38 + * 0x54 masterLock 8.39 + */ 8.40 +.globl switchToVP 8.41 +switchToVP: 8.42 + #SlaveVP in %rdi 8.43 + movq %rsp , 0x38(%rdi) #save core loop stack pointer 8.44 + movq %rbp , 0x30(%rdi) #save core loop frame pointer 8.45 + movq 0x10(%rdi), %rsp #restore stack pointer 8.46 + movq 0x18(%rdi), %rbp #restore frame pointer 8.47 + movq 0x20(%rdi), %rax #get jmp pointer 8.48 + jmp *%rax #jmp to VP 8.49 +coreLoopReturn: 8.50 + ret 8.51 + 8.52 + 8.53 +//switches to core loop. saves return address 8.54 +/* SlaveVP offsets: 8.55 + * 0x10 stackPtr 8.56 + * 0x18 framePtr 8.57 + * 0x20 resumeInstrPtr 8.58 + * 0x30 coreLoopFramePtr 8.59 + * 0x38 coreLoopStackPtr 8.60 + * 8.61 + * _VMSMasterEnv offsets: 8.62 + * 0x48 coreLoopReturnPt 8.63 + * 0x54 masterLock 8.64 + */ 8.65 +.globl switchToCoreLoop 8.66 +switchToCoreLoop: 8.67 + #SlaveVP in %rdi 8.68 + movq $VPReturn , 0x20(%rdi) #store return address 8.69 + movq %rsp , 0x10(%rdi) #save stack pointer 8.70 + movq %rbp , 0x18(%rdi) #save frame pointer 8.71 + movq 0x38(%rdi), %rsp #restore stack pointer 8.72 + movq 0x30(%rdi), %rbp #restore frame pointer 8.73 + movq $_VMSMasterEnv, %rcx 8.74 + movq (%rcx) , %rcx 8.75 + movq 0x48(%rcx), %rax #get CoreLoopStartPt 8.76 + jmp *%rax #jmp to CoreLoop 8.77 +VPReturn: 8.78 + ret 8.79 + 8.80 + 8.81 + 8.82 +//switches to core loop from master. saves return address 8.83 +//Releases masterLock so the next MasterLoop can be executed 8.84 +/* SlaveVP offsets: 8.85 + * 0x10 stackPtr 8.86 + * 0x18 framePtr 8.87 + * 0x20 resumeInstrPtr 8.88 + * 0x30 coreLoopFramePtr 8.89 + * 0x38 coreLoopStackPtr 8.90 + * 8.91 + * _VMSMasterEnv offsets: 8.92 + * 0x48 coreLoopReturnPt 8.93 + * 0x54 masterLock 8.94 + */ 8.95 +.globl masterSwitchToCoreLoop 8.96 +masterSwitchToCoreLoop: 8.97 + #SlaveVP in %rdi 8.98 + movq $MasterReturn, 0x20(%rdi) #store return address 8.99 + movq %rsp , 0x10(%rdi) #save stack pointer 8.100 + movq %rbp , 0x18(%rdi) #save frame pointer 8.101 + movq 0x38(%rdi), %rsp #restore stack pointer 8.102 + movq 0x30(%rdi), %rbp #restore frame pointer 8.103 + movq $_VMSMasterEnv, %rcx 8.104 + movq (%rcx) , %rcx 8.105 + movq 0x48(%rcx), %rax #get CoreLoopStartPt 8.106 + movl $0x0 , 0x54(%rcx) #release lock 8.107 + jmp *%rax #jmp to CoreLoop 8.108 +MasterReturn: 8.109 + ret 8.110 + 8.111 + 8.112 +//Switch to terminateCoreLoop 8.113 +//therefor switch to coreLoop context from master context 8.114 +// no need to call because the stack is already set up for switchVP 8.115 +// and virtPr is in %rdi 8.116 +// and both functions have the same argument. 8.117 +// do not save register of VP because this function will never return 8.118 +/* SlaveVP offsets: 8.119 + * 0x10 stackPtr 8.120 + * 0x18 framePtr 8.121 + * 0x20 resumeInstrPtr 8.122 + * 0x30 coreLoopFramePtr 8.123 + * 0x38 coreLoopStackPtr 8.124 + * 8.125 + * _VMSMasterEnv offsets: 8.126 + * 0x48 coreLoopReturnPt 8.127 + * 0x58 masterLock 8.128 + */ 8.129 +.globl asmTerminateCoreLoop 8.130 +asmTerminateCoreLoop: 8.131 + #SlaveVP in %rdi 8.132 + movq 0x38(%rdi), %rsp #restore stack pointer 8.133 + movq 0x30(%rdi), %rbp #restore frame pointer 8.134 + movq $terminateCoreLoop, %rax 8.135 + jmp *%rax #jmp to CoreLoop 8.136 + 8.137 + 8.138 +/* 8.139 + * This one for the sequential version is special. It discards the current stack 8.140 + * and returns directly from the coreLoop after VMS__dissipate_procr was called 8.141 + */ 8.142 +.globl asmTerminateCoreLoopSeq 8.143 +asmTerminateCoreLoopSeq: 8.144 + #SlaveVP in %rdi 8.145 + movq 0x38(%rdi), %rsp #restore stack pointer 8.146 + movq 0x30(%rdi), %rbp #restore frame pointer 8.147 + #argument is in %rdi 8.148 + call VMS__dissipate_procr 8.149 + movq %rbp , %rsp #goto the coreLoops stack 8.150 + pop %rbp #restore the old framepointer 8.151 + ret #return from core loop 8.152 + 8.153 + 8.154 +//Assembly code takes the return addr off the stack and saves 8.155 +// into the loc pointed to by rdi. The return addr is at 0x8(%rbp) for 64bit 8.156 +.globl asm_save_ret_to_singleton 8.157 +VMS_int__save_return_addr_into_ptd_to_loc: 8.158 + movq 0x8(%rbp), %rax #get ret address, rbp is the same as in the calling function 8.159 + movq %rax, (%rdi) #write ret addr to endInstrAddr field 8.160 + ret 8.161 + 8.162 + 8.163 +//Assembly code changes the return addr on the stack to the one 8.164 +// pointed to by the parameter. The stack's return addr is at 0x8(%rbp) 8.165 +.globl asm_write_ret_from_singleton 8.166 +VMS_int__write_return_addr_from_ptd_to_loc: 8.167 + movq (%rdi), %rax #get return addr 8.168 + movq %rax, 0x8(%rbp) #write return addr to the stack of the caller 8.169 + ret 8.170 +
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/VMS__PI.c Wed Feb 22 11:39:12 2012 -0800 9.3 @@ -0,0 +1,87 @@ 9.4 +/* 9.5 + * Copyright 2010 OpenSourceStewardshipFoundation 9.6 + * 9.7 + * Licensed under BSD 9.8 + */ 9.9 + 9.10 +#include <stdio.h> 9.11 +#include <stdlib.h> 9.12 +#include <string.h> 9.13 +#include <malloc.h> 9.14 +#include <inttypes.h> 9.15 +#include <sys/time.h> 9.16 + 9.17 +#include "VMS.h" 9.18 + 9.19 + 9.20 +/* 9.21 + */ 9.22 +VMSReqst * 9.23 +VMS_PI__take_next_request_out_of( SlaveVP *procrWithReq ) 9.24 + { VMSReqst *req; 9.25 + 9.26 + req = procrWithReq->requests; 9.27 + if( req == NULL ) return NULL; 9.28 + 9.29 + procrWithReq->requests = procrWithReq->requests->nextReqst; 9.30 + return req; 9.31 + } 9.32 + 9.33 + 9.34 +inline void * 9.35 +VMS_PI__take_sem_reqst_from( VMSReqst *req ) 9.36 + { 9.37 + return req->semReqData; 9.38 + } 9.39 + 9.40 + 9.41 + 9.42 +/* This is for OS requests and VMS infrastructure requests, such as to create 9.43 + * a probe -- a probe is inside the heart of VMS-core, it's not part of any 9.44 + * language -- but it's also a semantic thing that's triggered from and used 9.45 + * in the application.. so it crosses abstractions.. so, need some special 9.46 + * pattern here for handling such requests. 9.47 + * Doing this just like it were a second language sharing VMS-core. 9.48 + * 9.49 + * This is called from the language's request handler when it sees a request 9.50 + * of type VMSSemReq 9.51 + * 9.52 + * TODO: Later change this, to give probes their own separate plugin & have 9.53 + * VMS-core steer the request to appropriate plugin 9.54 + * Do the same for OS calls -- look later at it.. 9.55 + */ 9.56 +void inline 9.57 +VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingPr, void *semEnv, 9.58 + ResumeVPFnPtr resumePrFnPtr ) 9.59 + { VMSSemReq *semReq; 9.60 + IntervalProbe *newProbe; 9.61 + 9.62 + semReq = req->semReqData; 9.63 + 9.64 + newProbe = VMS_int__malloc( sizeof(IntervalProbe) ); 9.65 + newProbe->nameStr = VMS_int__strDup( semReq->nameStr ); 9.66 + newProbe->hist = NULL; 9.67 + newProbe->schedChoiceWasRecorded = FALSE; 9.68 + 9.69 + //This runs in masterVP, so no race-condition worries 9.70 + newProbe->probeID = 9.71 + addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); 9.72 + 9.73 + requestingPr->dataRetFromReq = newProbe; 9.74 + 9.75 + (*resumePrFnPtr)( requestingPr, semEnv ); 9.76 + } 9.77 + 9.78 + 9.79 +/*Later, improve this -- for now, just exits the application after printing 9.80 + * the error message. 9.81 + */ 9.82 +void 9.83 +VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstPr, VMSExcp *excpData ) 9.84 + { 9.85 + printf("%s",msgStr); 9.86 + fflush(stdin); 9.87 + exit(1); 9.88 + } 9.89 + 9.90 +
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/VMS__WL.c Wed Feb 22 11:39:12 2012 -0800 10.3 @@ -0,0 +1,138 @@ 10.4 +/* 10.5 + * Copyright 2010 OpenSourceStewardshipFoundation 10.6 + * 10.7 + * Licensed under BSD 10.8 + */ 10.9 + 10.10 +#include <stdio.h> 10.11 +#include <stdlib.h> 10.12 +#include <string.h> 10.13 +#include <malloc.h> 10.14 +#include <inttypes.h> 10.15 +#include <sys/time.h> 10.16 + 10.17 +#include "VMS.h" 10.18 + 10.19 + 10.20 +/*Anticipating multi-tasking 10.21 + */ 10.22 +void * 10.23 +VMS_WL__give_sem_env_for( SlaveVP *animPr ) 10.24 + { 10.25 + return _VMSMasterEnv->semanticEnv; 10.26 + } 10.27 + 10.28 + 10.29 +/*For this implementation of VMS, it may not make much sense to have the 10.30 + * system of requests for creating a new processor done this way.. but over 10.31 + * the scope of single-master, multi-master, mult-tasking, OS-implementing, 10.32 + * distributed-memory, and so on, this gives VMS implementation a chance to 10.33 + * do stuff before suspend, in the AppVP, and in the Master before the plugin 10.34 + * is called, as well as in the lang-lib before this is called, and in the 10.35 + * plugin. So, this gives both VMS and language implementations a chance to 10.36 + * intercept at various points and do order-dependent stuff. 10.37 + *Having a standard VMSNewPrReqData struc allows the language to create and 10.38 + * free the struc, while VMS knows how to get the newPr if it wants it, and 10.39 + * it lets the lang have lang-specific data related to creation transported 10.40 + * to the plugin. 10.41 + */ 10.42 +void 10.43 +VMS_WL__send_create_procr_req( void *semReqData, SlaveVP *reqstingPr ) 10.44 + { VMSReqst req; 10.45 + 10.46 + req.reqType = createReq; 10.47 + req.semReqData = semReqData; 10.48 + req.nextReqst = reqstingPr->requests; 10.49 + reqstingPr->requests = &req; 10.50 + 10.51 + VMS_int__suspend_procr( reqstingPr ); 10.52 + } 10.53 + 10.54 + 10.55 +/* 10.56 + *This adds a request to dissipate, then suspends the processor so that the 10.57 + * request handler will receive the request. The request handler is what 10.58 + * does the work of freeing memory and removing the processor from the 10.59 + * semantic environment's data structures. 10.60 + *The request handler also is what figures out when to shutdown the VMS 10.61 + * system -- which causes all the core loop threads to die, and returns from 10.62 + * the call that started up VMS to perform the work. 10.63 + * 10.64 + *This form is a bit misleading to understand if one is trying to figure out 10.65 + * how VMS works -- it looks like a normal function call, but inside it 10.66 + * sends a request to the request handler and suspends the processor, which 10.67 + * jumps out of the VMS__dissipate_procr function, and out of all nestings 10.68 + * above it, transferring the work of dissipating to the request handler, 10.69 + * which then does the actual work -- causing the processor that animated 10.70 + * the call of this function to disappear and the "hanging" state of this 10.71 + * function to just poof into thin air -- the virtual processor's trace 10.72 + * never returns from this call, but instead the virtual processor's trace 10.73 + * gets suspended in this call and all the virt processor's state disap- 10.74 + * pears -- making that suspend the last thing in the virt procr's trace. 10.75 + */ 10.76 +void 10.77 +VMS_WL__send_dissipate_req( SlaveVP *procrToDissipate ) 10.78 + { VMSReqst req; 10.79 + 10.80 + req.reqType = dissipate; 10.81 + req.nextReqst = procrToDissipate->requests; 10.82 + procrToDissipate->requests = &req; 10.83 + 10.84 + VMS_int__suspend_procr( procrToDissipate ); 10.85 + } 10.86 + 10.87 + 10.88 + 10.89 +/*This call's name indicates that request is malloc'd -- so req handler 10.90 + * has to free any extra requests tacked on before a send, using this. 10.91 + * 10.92 + * This inserts the semantic-layer's request data into standard VMS carrier 10.93 + * request data-struct that is mallocd. The sem request doesn't need to 10.94 + * be malloc'd if this is called inside the same call chain before the 10.95 + * send of the last request is called. 10.96 + * 10.97 + *The request handler has to call VMS__free_VMSReq for any of these 10.98 + */ 10.99 +inline void 10.100 +VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, 10.101 + SlaveVP *callingPr ) 10.102 + { VMSReqst *req; 10.103 + 10.104 + req = VMS_int__malloc( sizeof(VMSReqst) ); 10.105 + req->reqType = semantic; 10.106 + req->semReqData = semReqData; 10.107 + req->nextReqst = callingPr->requests; 10.108 + callingPr->requests = req; 10.109 + } 10.110 + 10.111 +/*This inserts the semantic-layer's request data into standard VMS carrier 10.112 + * request data-struct is allocated on stack of this call & ptr to it sent 10.113 + * to plugin 10.114 + *Then it does suspend, to cause request to be sent. 10.115 + */ 10.116 +inline void 10.117 +VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingPr ) 10.118 + { VMSReqst req; 10.119 + 10.120 + req.reqType = semantic; 10.121 + req.semReqData = semReqData; 10.122 + req.nextReqst = callingPr->requests; 10.123 + callingPr->requests = &req; 10.124 + 10.125 + VMS_int__suspend_procr( callingPr ); 10.126 + } 10.127 + 10.128 + 10.129 +inline void 10.130 +VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingPr ) 10.131 + { VMSReqst req; 10.132 + 10.133 + req.reqType = VMSSemantic; 10.134 + req.semReqData = semReqData; 10.135 + req.nextReqst = callingPr->requests; //gab any other preceeding 10.136 + callingPr->requests = &req; 10.137 + 10.138 + VMS_int__suspend_procr( callingPr ); 10.139 + } 10.140 + 10.141 +
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/VMS__int.c Wed Feb 22 11:39:12 2012 -0800 11.3 @@ -0,0 +1,155 @@ 11.4 +/* 11.5 + * Copyright 2010 OpenSourceStewardshipFoundation 11.6 + * 11.7 + * Licensed under BSD 11.8 + */ 11.9 + 11.10 +#include <stdio.h> 11.11 +#include <stdlib.h> 11.12 +#include <string.h> 11.13 +#include <malloc.h> 11.14 +#include <inttypes.h> 11.15 +#include <sys/time.h> 11.16 + 11.17 +#include "VMS.h" 11.18 + 11.19 + 11.20 +inline SlaveVP * 11.21 +VMS_int__create_procr( TopLevelFnPtr fnPtr, void *dataParam ) 11.22 + { SlaveVP *newPr; 11.23 + void *stackLocs; 11.24 + 11.25 + newPr = VMS_int__malloc( sizeof(SlaveVP) ); 11.26 + stackLocs = VMS_int__malloc( VIRT_PROCR_STACK_SIZE ); 11.27 + if( stackLocs == 0 ) 11.28 + { perror("VMS__malloc stack"); exit(1); } 11.29 + 11.30 + _VMSMasterEnv->numSlaves += 1; 11.31 + 11.32 + return create_procr_helper( newPr, fnPtr, dataParam, stackLocs ); 11.33 + } 11.34 + 11.35 +/* "ext" designates that it's for use outside the VMS system -- should only 11.36 + * be called from main thread or other thread -- never from code animated by 11.37 + * a VMS virtual processor. 11.38 + */ 11.39 +inline SlaveVP * 11.40 +VMS_ext__create_procr( TopLevelFnPtr fnPtr, void *dataParam ) 11.41 + { SlaveVP *newPr; 11.42 + char *stackLocs; 11.43 + 11.44 + newPr = malloc( sizeof(SlaveVP) ); 11.45 + stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); 11.46 + if( stackLocs == 0 ) 11.47 + { perror("malloc stack"); exit(1); } 11.48 + 11.49 + return create_procr_helper( newPr, fnPtr, dataParam, stackLocs ); 11.50 + } 11.51 + 11.52 + 11.53 +//=========================================================================== 11.54 +/*there is a label inside this function -- save the addr of this label in 11.55 + * the callingPr struc, as the pick-up point from which to start the next 11.56 + * work-unit for that procr. If turns out have to save registers, then 11.57 + * save them in the procr struc too. Then do assembly jump to the CoreLoop's 11.58 + * "done with work-unit" label. The procr struc is in the request in the 11.59 + * slave that animated the just-ended work-unit, so all the state is saved 11.60 + * there, and will get passed along, inside the request handler, to the 11.61 + * next work-unit for that procr. 11.62 + */ 11.63 +void 11.64 +VMS_int__suspend_procr( SlaveVP *animatingPr ) 11.65 + { 11.66 + 11.67 + //The request to master will cause this suspended virt procr to get 11.68 + // scheduled again at some future point -- to resume, core loop jumps 11.69 + // to the resume point (below), which causes restore of saved regs and 11.70 + // "return" from this call. 11.71 + //animatingPr->resumeInstrPtr = &&ResumePt; 11.72 + 11.73 + //return ownership of the virt procr and sched slot to Master virt pr 11.74 + animatingPr->schedSlot->workIsDone = TRUE; 11.75 + 11.76 + //=========================== Measurement stuff ======================== 11.77 + #ifdef MEAS__TIME_STAMP_SUSP 11.78 + //record time stamp: compare to time-stamp recorded below 11.79 + saveLowTimeStampCountInto( animatingPr->preSuspTSCLow ); 11.80 + #endif 11.81 + //======================================================================= 11.82 + 11.83 + switchToCoreLoop(animatingPr); 11.84 + flushRegisters(); 11.85 + 11.86 + //======================================================================= 11.87 + 11.88 + #ifdef MEAS__TIME_STAMP_SUSP 11.89 + //NOTE: only take low part of count -- do sanity check when take diff 11.90 + saveLowTimeStampCountInto( animatingPr->postSuspTSCLow ); 11.91 + #endif 11.92 + 11.93 + return; 11.94 + } 11.95 + 11.96 + 11.97 +/* "ext" designates that it's for use outside the VMS system -- should only 11.98 + * be called from main thread or other thread -- never from code animated by 11.99 + * a SlaveVP, nor from a masterVP. 11.100 + * 11.101 + *Use this version to dissipate VPs created outside the VMS system. 11.102 + */ 11.103 +void 11.104 +VMS_ext__dissipate_procr( SlaveVP *procrToDissipate ) 11.105 + { 11.106 + //NOTE: dataParam was given to the processor, so should either have 11.107 + // been alloc'd with VMS__malloc, or freed by the level above animPr. 11.108 + //So, all that's left to free here is the stack and the SlaveVP struc 11.109 + // itself 11.110 + //Note, should not stack-allocate the data param -- no guarantee, in 11.111 + // general that creating processor will outlive ones it creates. 11.112 + free( procrToDissipate->startOfStack ); 11.113 + free( procrToDissipate ); 11.114 + } 11.115 + 11.116 + 11.117 + 11.118 +/*This must be called by the request handler plugin -- it cannot be called 11.119 + * from the semantic library "dissipate processor" function -- instead, the 11.120 + * semantic layer has to generate a request, and the plug-in calls this 11.121 + * function. 11.122 + *The reason is that this frees the virtual processor's stack -- which is 11.123 + * still in use inside semantic library calls! 11.124 + * 11.125 + *This frees or recycles all the state owned by and comprising the VMS 11.126 + * portion of the animating virtual procr. The request handler must first 11.127 + * free any semantic data created for the processor that didn't use the 11.128 + * VMS_malloc mechanism. Then it calls this, which first asks the malloc 11.129 + * system to disown any state that did use VMS_malloc, and then frees the 11.130 + * statck and the processor-struct itself. 11.131 + *If the dissipated processor is the sole (remaining) owner of VMS__malloc'd 11.132 + * state, then that state gets freed (or sent to recycling) as a side-effect 11.133 + * of dis-owning it. 11.134 + */ 11.135 +void 11.136 +VMS_int__dissipate_procr( SlaveVP *animatingPr ) 11.137 + { 11.138 + //dis-own all locations owned by this processor, causing to be freed 11.139 + // any locations that it is (was) sole owner of 11.140 +//TODO: implement VMS__malloc system, including "give up ownership" 11.141 + 11.142 + _VMSMasterEnv->numSlaves -= 1; 11.143 + if( _VMSMasterEnv->numSlaves == 0 ) 11.144 + { //no more work, so shutdown 11.145 + VMS_int__shutdown(); //note, creates 4 shut-down processors 11.146 + } 11.147 + 11.148 + //NOTE: dataParam was given to the processor, so should either have 11.149 + // been alloc'd with VMS__malloc, or freed by the level above animPr. 11.150 + //So, all that's left to free here is the stack and the SlaveVP struc 11.151 + // itself 11.152 + //Note, should not stack-allocate initial data -- no guarantee, in 11.153 + // general that creating processor will outlive ones it creates. 11.154 + VMS_int__free( animatingPr->startOfStack ); 11.155 + VMS_int__free( animatingPr ); 11.156 + } 11.157 + 11.158 +
12.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 12.2 +++ b/VMS__startup_and_shutdown.c Wed Feb 22 11:39:12 2012 -0800 12.3 @@ -0,0 +1,458 @@ 12.4 +/* 12.5 + * Copyright 2010 OpenSourceStewardshipFoundation 12.6 + * 12.7 + * Licensed under BSD 12.8 + */ 12.9 + 12.10 +#include <stdio.h> 12.11 +#include <stdlib.h> 12.12 +#include <string.h> 12.13 +#include <malloc.h> 12.14 +#include <inttypes.h> 12.15 +#include <sys/time.h> 12.16 + 12.17 +#include "VMS.h" 12.18 +#include "VMS__HW_dependent.h" 12.19 + 12.20 + 12.21 +#define thdAttrs NULL 12.22 + 12.23 +//=========================================================================== 12.24 +void 12.25 +shutdownFn( void *dummy, SlaveVP *dummy2 ); 12.26 + 12.27 +SchedSlot ** 12.28 +create_sched_slots(); 12.29 + 12.30 +void 12.31 +create_masterEnv(); 12.32 + 12.33 +void 12.34 +create_the_coreLoop_OS_threads(); 12.35 + 12.36 +MallocProlog * 12.37 +create_free_list(); 12.38 + 12.39 +void 12.40 +endOSThreadFn( void *initData, SlaveVP *animatingPr ); 12.41 + 12.42 +pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER; 12.43 +pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER; 12.44 + 12.45 +//=========================================================================== 12.46 + 12.47 +/*Setup has two phases: 12.48 + * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts 12.49 + * the master virt procr into the work-queue, ready for first "call" 12.50 + * 2) Semantic layer then does its own init, which creates the seed virt 12.51 + * procr inside the semantic layer, ready to schedule it when 12.52 + * asked by the first run of the masterLoop. 12.53 + * 12.54 + *This part is bit weird because VMS really wants to be "always there", and 12.55 + * have applications attach and detach.. for now, this VMS is part of 12.56 + * the app, so the VMS system starts up as part of running the app. 12.57 + * 12.58 + *The semantic layer is isolated from the VMS internals by making the 12.59 + * semantic layer do setup to a state that it's ready with its 12.60 + * initial virt procrs, ready to schedule them to slots when the masterLoop 12.61 + * asks. Without this pattern, the semantic layer's setup would 12.62 + * have to modify slots directly to assign the initial virt-procrs, and put 12.63 + * them into the readyToAnimateQ itself, breaking the isolation completely. 12.64 + * 12.65 + * 12.66 + *The semantic layer creates the initial virt procr(s), and adds its 12.67 + * own environment to masterEnv, and fills in the pointers to 12.68 + * the requestHandler and slaveScheduler plug-in functions 12.69 + */ 12.70 + 12.71 +/*This allocates VMS data structures, populates the master VMSProc, 12.72 + * and master environment, and returns the master environment to the semantic 12.73 + * layer. 12.74 + */ 12.75 +void 12.76 +VMS_int__init() 12.77 + { 12.78 + 12.79 +#ifdef SEQUENTIAL 12.80 + create_masterEnv(); 12.81 + flushRegisters(); //? not sure why here -- merten added it..? 12.82 +#else 12.83 + create_masterEnv(); 12.84 + create_the_coreLoop_OS_threads(); 12.85 +#endif 12.86 + } 12.87 + 12.88 +void 12.89 +create_masterEnv() 12.90 + { MasterEnv *masterEnv; 12.91 + VMSQueueStruc **readyToAnimateQs; 12.92 + int coreIdx; 12.93 + SlaveVP **masterVPs; 12.94 + SchedSlot ***allSchedSlots; //ptr to array of ptrs 12.95 + 12.96 + 12.97 + //Make the master env, which holds everything else 12.98 + _VMSMasterEnv = malloc( sizeof(MasterEnv) ); 12.99 + 12.100 + //Very first thing put into the master env is the free-list, seeded 12.101 + // with a massive initial chunk of memory. 12.102 + //After this, all other mallocs are VMS__malloc. 12.103 + _VMSMasterEnv->freeListHead = VMS_ext__create_free_list(); 12.104 + 12.105 + 12.106 + //============================= MEASUREMENT STUFF ======================== 12.107 + #ifdef MEAS__TIME_MALLOC 12.108 + _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 100, 0, 30, 12.109 + "malloc_time_hist"); 12.110 + _VMSMasterEnv->freeTimeHist = makeFixedBinHistExt( 100, 0, 30, 12.111 + "free_time_hist"); 12.112 + #endif 12.113 + #ifdef MEAS__TIME_PLUGIN 12.114 + _VMSMasterEnv->reqHdlrLowTimeHist = makeFixedBinHistExt( 100, 0, 200, 12.115 + "plugin_low_time_hist"); 12.116 + _VMSMasterEnv->reqHdlrHighTimeHist = makeFixedBinHistExt( 100, 0, 200, 12.117 + "plugin_high_time_hist"); 12.118 + #endif 12.119 + //======================================================================== 12.120 + 12.121 + //===================== Only VMS__malloc after this ==================== 12.122 + masterEnv = (MasterEnv*)_VMSMasterEnv; 12.123 + 12.124 + //Make a readyToAnimateQ for each core loop 12.125 + readyToAnimateQs = VMS_int__malloc( NUM_CORES * sizeof(VMSQueueStruc *) ); 12.126 + masterVPs = VMS_int__malloc( NUM_CORES * sizeof(SlaveVP *) ); 12.127 + 12.128 + //One array for each core, 3 in array, core's masterVP scheds all 12.129 + allSchedSlots = VMS_int__malloc( NUM_CORES * sizeof(SchedSlot *) ); 12.130 + 12.131 + _VMSMasterEnv->numSlaves = 0; //used to detect shut-down condition 12.132 + 12.133 + _VMSMasterEnv->numVPsCreated = 0; //used by create procr to set ID 12.134 + for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 12.135 + { 12.136 + readyToAnimateQs[ coreIdx ] = makeVMSQ(); 12.137 + 12.138 + //Q: should give masterVP core-specific info as its init data? 12.139 + masterVPs[ coreIdx ] = VMS_int__create_procr( (TopLevelFnPtr)&masterLoop, (void*)masterEnv ); 12.140 + masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx; 12.141 + allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core 12.142 + _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; 12.143 + _VMSMasterEnv->workStealingGates[ coreIdx ] = NULL; 12.144 + } 12.145 + _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs; 12.146 + _VMSMasterEnv->masterVPs = masterVPs; 12.147 + _VMSMasterEnv->masterLock = UNLOCKED; 12.148 + _VMSMasterEnv->allSchedSlots = allSchedSlots; 12.149 + _VMSMasterEnv->workStealingLock = UNLOCKED; 12.150 + 12.151 + 12.152 + //Aug 19, 2010: no longer need to place initial masterVP into queue 12.153 + // because coreLoop now controls -- animates its masterVP when no work 12.154 + 12.155 + 12.156 + //============================= MEASUREMENT STUFF ======================== 12.157 + #ifdef STATS__TURN_ON_PROBES 12.158 + _VMSMasterEnv->dynIntervalProbesInfo = 12.159 + makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->intervalProbes), 200); 12.160 + 12.161 + _VMSMasterEnv->probeNameHashTbl = makeHashTable( 1000, &VMS_int__free ); 12.162 + 12.163 + //put creation time directly into master env, for fast retrieval 12.164 + struct timeval timeStamp; 12.165 + gettimeofday( &(timeStamp), NULL); 12.166 + _VMSMasterEnv->createPtInSecs = 12.167 + timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0); 12.168 + #endif 12.169 + #ifdef MEAS__TIME_MASTER_LOCK 12.170 + _VMSMasterEnv->masterLockLowTimeHist = makeFixedBinHist( 50, 0, 2, 12.171 + "master lock low time hist"); 12.172 + _VMSMasterEnv->masterLockHighTimeHist = makeFixedBinHist( 50, 0, 100, 12.173 + "master lock high time hist"); 12.174 + #endif 12.175 + 12.176 + MakeTheMeasHists(); 12.177 + //======================================================================== 12.178 + } 12.179 + 12.180 +SchedSlot ** 12.181 +create_sched_slots() 12.182 + { SchedSlot **schedSlots; 12.183 + int i; 12.184 + 12.185 + schedSlots = VMS_int__malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) ); 12.186 + 12.187 + for( i = 0; i < NUM_SCHED_SLOTS; i++ ) 12.188 + { 12.189 + schedSlots[i] = VMS_int__malloc( sizeof(SchedSlot) ); 12.190 + 12.191 + //Set state to mean "handling requests done, slot needs filling" 12.192 + schedSlots[i]->workIsDone = FALSE; 12.193 + schedSlots[i]->needsProcrAssigned = TRUE; 12.194 + } 12.195 + return schedSlots; 12.196 + } 12.197 + 12.198 + 12.199 +void 12.200 +freeSchedSlots( SchedSlot **schedSlots ) 12.201 + { int i; 12.202 + for( i = 0; i < NUM_SCHED_SLOTS; i++ ) 12.203 + { 12.204 + VMS_int__free( schedSlots[i] ); 12.205 + } 12.206 + VMS_int__free( schedSlots ); 12.207 + } 12.208 + 12.209 + 12.210 +void 12.211 +create_the_coreLoop_OS_threads() 12.212 + { 12.213 + //======================================================================== 12.214 + // Create the Threads 12.215 + int coreIdx, retCode; 12.216 + 12.217 + //Need the threads to be created suspended, and wait for a signal 12.218 + // before proceeding -- gives time after creating to initialize other 12.219 + // stuff before the coreLoops set off. 12.220 + _VMSMasterEnv->setupComplete = 0; 12.221 + 12.222 + //Make the threads that animate the core loops 12.223 + for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 12.224 + { coreLoopThdParams[coreIdx] = VMS_int__malloc( sizeof(ThdParams) ); 12.225 + coreLoopThdParams[coreIdx]->coreNum = coreIdx; 12.226 + 12.227 + retCode = 12.228 + pthread_create( &(coreLoopThdHandles[coreIdx]), 12.229 + thdAttrs, 12.230 + &coreLoop, 12.231 + (void *)(coreLoopThdParams[coreIdx]) ); 12.232 + if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);} 12.233 + } 12.234 + } 12.235 + 12.236 + 12.237 + 12.238 +void 12.239 +VMS_WL__register_request_handler( RequestHandler requestHandler ) 12.240 + { _VMSMasterEnv->requestHandler = requestHandler; 12.241 + } 12.242 + 12.243 + 12.244 +void 12.245 +VMS_WL__register_sched_assigner( Sched_Assigner schedAssigner ) 12.246 + { _VMSMasterEnv->slaveSchedAssigner = schedAssigner; 12.247 + } 12.248 + 12.249 +VMS_WL__register_semantic_env( void *semanticEnv ) 12.250 + { _VMSMasterEnv->semanticEnv = semanticEnv; 12.251 + } 12.252 + 12.253 + 12.254 +/*This is what causes the VMS system to initialize.. then waits for it to 12.255 + * exit. 12.256 + * 12.257 + *Wrapper lib layer calls this when it wants the system to start running.. 12.258 + */ 12.259 +void 12.260 +VMS_WL__start_the_work_then_wait_until_done() 12.261 + { 12.262 +#ifdef SEQUENTIAL 12.263 + /*Only difference between version with an OS thread pinned to each core and 12.264 + * the sequential version of VMS is VMS__init_Seq, this, and coreLoop_Seq. 12.265 + */ 12.266 + //Instead of un-suspending threads, just call the one and only 12.267 + // core loop (sequential version), in the main thread. 12.268 + coreLoop_Seq( NULL ); 12.269 + flushRegisters(); 12.270 +#else 12.271 + int coreIdx; 12.272 + //Start the core loops running 12.273 + 12.274 + //tell the core loop threads that setup is complete 12.275 + //get lock, to lock out any threads still starting up -- they'll see 12.276 + // that setupComplete is true before entering while loop, and so never 12.277 + // wait on the condition 12.278 + pthread_mutex_lock( &suspendLock ); 12.279 + _VMSMasterEnv->setupComplete = 1; 12.280 + pthread_mutex_unlock( &suspendLock ); 12.281 + pthread_cond_broadcast( &suspend_cond ); 12.282 + 12.283 + 12.284 + //wait for all to complete 12.285 + for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 12.286 + { 12.287 + pthread_join( coreLoopThdHandles[coreIdx], NULL ); 12.288 + } 12.289 + 12.290 + //NOTE: do not clean up VMS env here -- semantic layer has to have 12.291 + // a chance to clean up its environment first, then do a call to free 12.292 + // the Master env and rest of VMS locations 12.293 +#endif 12.294 + } 12.295 + 12.296 + 12.297 +//TODO: look at architecting cleanest separation between request handler 12.298 +// and master loop, for dissipate, create, shutdown, and other non-semantic 12.299 +// requests. Issue is chain: one removes requests from AppVP, one dispatches 12.300 +// on type of request, and one handles each type.. but some types require 12.301 +// action from both request handler and master loop -- maybe just give the 12.302 +// request handler calls like: VMS__handle_X_request_type 12.303 + 12.304 + 12.305 +/*This is called by the semantic layer's request handler when it decides its 12.306 + * time to shut down the VMS system. Calling this causes the core loop OS 12.307 + * threads to exit, which unblocks the entry-point function that started up 12.308 + * VMS, and allows it to grab the result and return to the original single- 12.309 + * threaded application. 12.310 + * 12.311 + *The _VMSMasterEnv is needed by this shut down function, so the create-seed- 12.312 + * and-wait function has to free a bunch of stuff after it detects the 12.313 + * threads have all died: the masterEnv, the thread-related locations, 12.314 + * masterVP any AppVPs that might still be allocated and sitting in the 12.315 + * semantic environment, or have been orphaned in the _VMSWorkQ. 12.316 + * 12.317 + *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the 12.318 + * locations it needs, and give ownership to masterVP. Then, they will be 12.319 + * automatically freed. 12.320 + * 12.321 + *In here,create one core-loop shut-down processor for each core loop and put 12.322 + * them all directly into the readyToAnimateQ. 12.323 + *Note, this function can ONLY be called after the semantic environment no 12.324 + * longer cares if AppVPs get animated after the point this is called. In 12.325 + * other words, this can be used as an abort, or else it should only be 12.326 + * called when all AppVPs have finished dissipate requests -- only at that 12.327 + * point is it sure that all results have completed. 12.328 + */ 12.329 +void 12.330 +VMS_int__shutdown() 12.331 + { int coreIdx; 12.332 + SlaveVP *shutDownPr; 12.333 + 12.334 + //create the shutdown processors, one for each core loop -- put them 12.335 + // directly into the Q -- each core will die when gets one 12.336 + for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 12.337 + { //Note, this is running in the master 12.338 + shutDownPr = VMS_int__create_procr( &endOSThreadFn, NULL ); 12.339 + writeVMSQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] ); 12.340 + } 12.341 + 12.342 + } 12.343 + 12.344 + 12.345 +/*Am trying to be cute, avoiding IF statement in coreLoop that checks for 12.346 + * a special shutdown procr. Ended up with extra-complex shutdown sequence. 12.347 + *This function has the sole purpose of setting the stack and framePtr 12.348 + * to the coreLoop's stack and framePtr.. it does that then jumps to the 12.349 + * core loop's shutdown point -- might be able to just call Pthread_exit 12.350 + * from here, but am going back to the pthread's stack and setting everything 12.351 + * up just as if it never jumped out, before calling pthread_exit. 12.352 + *The end-point of core loop will free the stack and so forth of the 12.353 + * processor that animates this function, (this fn is transfering the 12.354 + * animator of the AppVP that is in turn animating this function over 12.355 + * to core loop function -- note that this slices out a level of virtual 12.356 + * processors). 12.357 + */ 12.358 +void 12.359 +endOSThreadFn( void *initData, SlaveVP *animatingPr ) 12.360 + { 12.361 +#ifdef SEQUENTIAL 12.362 + asmTerminateCoreLoopSeq(animatingPr); 12.363 +#else 12.364 + asmTerminateCoreLoop(animatingPr); 12.365 +#endif 12.366 + } 12.367 + 12.368 + 12.369 +/*This is called from the startup & shutdown 12.370 + */ 12.371 +void 12.372 +VMS_int__cleanup_at_end_of_shutdown() 12.373 + { 12.374 + //unused 12.375 + //VMSQueueStruc **readyToAnimateQs; 12.376 + //int coreIdx; 12.377 + //SlaveVP **masterVPs; 12.378 + //SchedSlot ***allSchedSlots; //ptr to array of ptrs 12.379 + 12.380 + //Before getting rid of everything, print out any measurements made 12.381 + forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist ); 12.382 + forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile); 12.383 + forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist ); 12.384 + #ifdef MEAS__TIME_PLUGIN 12.385 + printHist( _VMSMasterEnv->reqHdlrLowTimeHist ); 12.386 + saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist ); 12.387 + printHist( _VMSMasterEnv->reqHdlrHighTimeHist ); 12.388 + saveHistToFile( _VMSMasterEnv->reqHdlrHighTimeHist ); 12.389 + freeHistExt( _VMSMasterEnv->reqHdlrLowTimeHist ); 12.390 + freeHistExt( _VMSMasterEnv->reqHdlrHighTimeHist ); 12.391 + #endif 12.392 + #ifdef MEAS__TIME_MALLOC 12.393 + printHist( _VMSMasterEnv->mallocTimeHist ); 12.394 + saveHistToFile( _VMSMasterEnv->mallocTimeHist ); 12.395 + printHist( _VMSMasterEnv->freeTimeHist ); 12.396 + saveHistToFile( _VMSMasterEnv->freeTimeHist ); 12.397 + freeHistExt( _VMSMasterEnv->mallocTimeHist ); 12.398 + freeHistExt( _VMSMasterEnv->freeTimeHist ); 12.399 + #endif 12.400 + #ifdef MEAS__TIME_MASTER_LOCK 12.401 + printHist( _VMSMasterEnv->masterLockLowTimeHist ); 12.402 + printHist( _VMSMasterEnv->masterLockHighTimeHist ); 12.403 + #endif 12.404 + #ifdef MEAS__TIME_MASTER 12.405 + printHist( _VMSMasterEnv->pluginTimeHist ); 12.406 + for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 12.407 + { 12.408 + freeVMSQ( readyToAnimateQs[ coreIdx ] ); 12.409 + //master VPs were created external to VMS, so use external free 12.410 + VMS_int__dissipate_procr( masterVPs[ coreIdx ] ); 12.411 + 12.412 + freeSchedSlots( allSchedSlots[ coreIdx ] ); 12.413 + } 12.414 + #endif 12.415 + #ifdef MEAS__TIME_STAMP_SUSP 12.416 + printHist( _VMSMasterEnv->pluginTimeHist ); 12.417 + for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 12.418 + { 12.419 + freeVMSQ( readyToAnimateQs[ coreIdx ] ); 12.420 + //master VPs were created external to VMS, so use external free 12.421 + VMS_int__dissipate_procr( masterVPs[ coreIdx ] ); 12.422 + 12.423 + freeSchedSlots( allSchedSlots[ coreIdx ] ); 12.424 + } 12.425 + #endif 12.426 + 12.427 + //All the environment data has been allocated with VMS__malloc, so just 12.428 + // free its internal big-chunk and all inside it disappear. 12.429 +/* 12.430 + readyToAnimateQs = _VMSMasterEnv->readyToAnimateQs; 12.431 + masterVPs = _VMSMasterEnv->masterVPs; 12.432 + allSchedSlots = _VMSMasterEnv->allSchedSlots; 12.433 + 12.434 + for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 12.435 + { 12.436 + freeVMSQ( readyToAnimateQs[ coreIdx ] ); 12.437 + //master VPs were created external to VMS, so use external free 12.438 + VMS__dissipate_procr( masterVPs[ coreIdx ] ); 12.439 + 12.440 + freeSchedSlots( allSchedSlots[ coreIdx ] ); 12.441 + } 12.442 + 12.443 + VMS__free( _VMSMasterEnv->readyToAnimateQs ); 12.444 + VMS__free( _VMSMasterEnv->masterVPs ); 12.445 + VMS__free( _VMSMasterEnv->allSchedSlots ); 12.446 + 12.447 + //============================= MEASUREMENT STUFF ======================== 12.448 + #ifdef STATS__TURN_ON_PROBES 12.449 + freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS__free_probe); 12.450 + #endif 12.451 + //======================================================================== 12.452 +*/ 12.453 + //These are the only two that use system free 12.454 + VMS_ext__free_free_list( _VMSMasterEnv->freeListHead ); 12.455 + free( (void *)_VMSMasterEnv ); 12.456 + } 12.457 + 12.458 + 12.459 +//================================ 12.460 + 12.461 +
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 13.2 +++ b/VMS_defs__HW_specific.h Wed Feb 22 11:39:12 2012 -0800 13.3 @@ -0,0 +1,51 @@ 13.4 +/* 13.5 + * Copyright 2012 OpenSourceStewardshipFoundation 13.6 + * Licensed under BSD 13.7 + * 13.8 + * Author: seanhalle@yahoo.com 13.9 + * 13.10 + */ 13.11 + 13.12 +#ifndef _VMS_HW_SPEC_DEFS_H 13.13 +#define _VMS_HW_SPEC_DEFS_H 13.14 +#define _GNU_SOURCE 13.15 + 13.16 + 13.17 +//========================= Hardware related Constants ===================== 13.18 + //This value is the number of hardware threads in the shared memory 13.19 + // machine 13.20 +#define NUM_CORES 4 13.21 + 13.22 + // tradeoff amortizing master fixed overhead vs imbalance potential 13.23 + // when work-stealing, can make bigger, at risk of losing cache affinity 13.24 +#define NUM_SCHED_SLOTS 3 13.25 + 13.26 +#define MIN_WORK_UNIT_CYCLES 20000 13.27 + 13.28 +#define MASTERLOCK_RETRIES 10000 13.29 + 13.30 + // stack size in virtual processors created 13.31 +#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */ 13.32 + 13.33 + // memory for VMS__malloc 13.34 +#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x40000000 /* 1G */ 13.35 + 13.36 + //Frequency of TS counts -- have to do tests to verify 13.37 + //NOTE: turn off (in BIOS) TURBO-BOOST and SPEED-STEP else won't be const 13.38 +#define TSCOUNT_FREQ 3180000000 13.39 + 13.40 +#define CACHE_LINE_SZ 256 13.41 +#define PAGE_SIZE 4096 13.42 + 13.43 +//To prevent false-sharing, aligns a variable to a cache-line boundary. 13.44 +//No need to use for local vars because those are never shared between cores 13.45 +#define __align_to_cacheline__ __attribute__ ((aligned(CACHE_LINE_SZ))) 13.46 + 13.47 +//aligns a pointer to cacheline. The memory area has to contain at least 13.48 +//CACHE_LINE_SZ bytes more then needed 13.49 +#define __align_address(ptr) ((void*)(((uintptr_t)(ptr))&((uintptr_t)(~0x0FF)))) 13.50 + 13.51 +//=========================================================================== 13.52 + 13.53 +#endif /* _VMS_DEFS_H */ 13.54 +
14.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 14.2 +++ b/VMS_defs__lang_specific.h Wed Feb 22 11:39:12 2012 -0800 14.3 @@ -0,0 +1,182 @@ 14.4 +/* 14.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 14.6 + * Licensed under GNU General Public License version 2 14.7 + * 14.8 + * Author: seanhalle@yahoo.com 14.9 + * 14.10 + */ 14.11 + 14.12 +#ifndef _VMS_LANG_SPEC_DEFS_H 14.13 +#define _VMS_LANG_SPEC_DEFS_H 14.14 + 14.15 + 14.16 + 14.17 +//=================== Language-specific Measurement Stuff =================== 14.18 +// 14.19 +//TODO: Figure out way to move these into language dir.. 14.20 +// wrap them in #ifdef MEAS__... 14.21 +// 14.22 +#ifndef MAKE_HISTS_FOR_MEASUREMENTS 14.23 +#define MakeTheMeasHists() 14.24 +#endif 14.25 + 14.26 +//=========================================================================== 14.27 +//VPThread 14.28 +#ifdef VTHREAD 14.29 + 14.30 +#define createHistIdx 1 //note: starts at 1 14.31 +#define mutexLockHistIdx 2 14.32 +#define mutexUnlockHistIdx 3 14.33 +#define condWaitHistIdx 4 14.34 +#define condSignalHistIdx 5 14.35 + 14.36 +#define MakeTheMeasHists() \ 14.37 + _VMSMasterEnv->measHistsInfo = \ 14.38 + makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 14.39 + makeAMeasHist( createHistIdx, "create", 250, 0, 100 ) \ 14.40 + makeAMeasHist( mutexLockHistIdx, "mutex_lock", 50, 0, 100 ) \ 14.41 + makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock", 50, 0, 100 ) \ 14.42 + makeAMeasHist( condWaitHistIdx, "cond_wait", 50, 0, 100 ) \ 14.43 + makeAMeasHist( condSignalHistIdx, "cond_signal", 50, 0, 100 ) 14.44 + 14.45 + 14.46 +#define Meas_startCreate \ 14.47 + int32 startStamp, endStamp; \ 14.48 + saveLowTimeStampCountInto( startStamp ); \ 14.49 + 14.50 +#define Meas_endCreate \ 14.51 + saveLowTimeStampCountInto( endStamp ); \ 14.52 + addIntervalToHist( startStamp, endStamp, \ 14.53 + _VMSMasterEnv->measHists[ createHistIdx ] ); 14.54 + 14.55 +#define Meas_startMutexLock \ 14.56 + int32 startStamp, endStamp; \ 14.57 + saveLowTimeStampCountInto( startStamp ); \ 14.58 + 14.59 +#define Meas_endMutexLock \ 14.60 + saveLowTimeStampCountInto( endStamp ); \ 14.61 + addIntervalToHist( startStamp, endStamp, \ 14.62 + _VMSMasterEnv->measHists[ mutexLockHistIdx ] ); 14.63 + 14.64 +#define Meas_startMutexUnlock \ 14.65 + int32 startStamp, endStamp; \ 14.66 + saveLowTimeStampCountInto( startStamp ); \ 14.67 + 14.68 +#define Meas_endMutexUnlock \ 14.69 + saveLowTimeStampCountInto( endStamp ); \ 14.70 + addIntervalToHist( startStamp, endStamp, \ 14.71 + _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] ); 14.72 + 14.73 +#define Meas_startCondWait \ 14.74 + int32 startStamp, endStamp; \ 14.75 + saveLowTimeStampCountInto( startStamp ); \ 14.76 + 14.77 +#define Meas_endCondWait \ 14.78 + saveLowTimeStampCountInto( endStamp ); \ 14.79 + addIntervalToHist( startStamp, endStamp, \ 14.80 + _VMSMasterEnv->measHists[ condWaitHistIdx ] ); 14.81 + 14.82 +#define Meas_startCondSignal \ 14.83 + int32 startStamp, endStamp; \ 14.84 + saveLowTimeStampCountInto( startStamp ); \ 14.85 + 14.86 +#define Meas_endCondSignal \ 14.87 + saveLowTimeStampCountInto( endStamp ); \ 14.88 + addIntervalToHist( startStamp, endStamp, \ 14.89 + _VMSMasterEnv->measHists[ condSignalHistIdx ] ); 14.90 + 14.91 +#endif 14.92 + 14.93 + 14.94 + 14.95 +//=========================================================================== 14.96 +//VCilk 14.97 + 14.98 +#ifdef VCILK 14.99 + 14.100 +#define spawnHistIdx 1 //note: starts at 1 14.101 +#define syncHistIdx 2 14.102 + 14.103 +#define MakeTheMeasHists() \ 14.104 + _VMSMasterEnv->measHistsInfo = \ 14.105 + makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 14.106 + makeAMeasHist( spawnHistIdx, "Spawn", 50, 0, 200 ) \ 14.107 + makeAMeasHist( syncHistIdx, "Sync", 50, 0, 200 ) 14.108 + 14.109 + 14.110 +#define Meas_startSpawn \ 14.111 + int32 startStamp, endStamp; \ 14.112 + saveLowTimeStampCountInto( startStamp ); \ 14.113 + 14.114 +#define Meas_endSpawn \ 14.115 + saveLowTimeStampCountInto( endStamp ); \ 14.116 + addIntervalToHist( startStamp, endStamp, \ 14.117 + _VMSMasterEnv->measHists[ spawnHistIdx ] ); 14.118 + 14.119 +#define Meas_startSync \ 14.120 + int32 startStamp, endStamp; \ 14.121 + saveLowTimeStampCountInto( startStamp ); \ 14.122 + 14.123 +#define Meas_endSync \ 14.124 + saveLowTimeStampCountInto( endStamp ); \ 14.125 + addIntervalToHist( startStamp, endStamp, \ 14.126 + _VMSMasterEnv->measHists[ syncHistIdx ] ); 14.127 +#endif 14.128 + 14.129 +//=========================================================================== 14.130 +// SSR 14.131 + 14.132 +#ifdef SSR 14.133 + 14.134 +#define SendFromToHistIdx 1 //note: starts at 1 14.135 +#define SendOfTypeHistIdx 2 14.136 +#define ReceiveFromToHistIdx 3 14.137 +#define ReceiveOfTypeHistIdx 4 14.138 + 14.139 +#define MakeTheMeasHists() \ 14.140 + _VMSMasterEnv->measHistsInfo = \ 14.141 + makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 14.142 + makeAMeasHist( SendFromToHistIdx, "SendFromTo", 50, 0, 100 ) \ 14.143 + makeAMeasHist( SendOfTypeHistIdx, "SendOfType", 50, 0, 100 ) \ 14.144 + makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \ 14.145 + makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 ) 14.146 + 14.147 +#define Meas_startSendFromTo \ 14.148 + int32 startStamp, endStamp; \ 14.149 + saveLowTimeStampCountInto( startStamp ); \ 14.150 + 14.151 +#define Meas_endSendFromTo \ 14.152 + saveLowTimeStampCountInto( endStamp ); \ 14.153 + addIntervalToHist( startStamp, endStamp, \ 14.154 + _VMSMasterEnv->measHists[ SendFromToHistIdx ] ); 14.155 + 14.156 +#define Meas_startSendOfType \ 14.157 + int32 startStamp, endStamp; \ 14.158 + saveLowTimeStampCountInto( startStamp ); \ 14.159 + 14.160 +#define Meas_endSendOfType \ 14.161 + saveLowTimeStampCountInto( endStamp ); \ 14.162 + addIntervalToHist( startStamp, endStamp, \ 14.163 + _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] ); 14.164 + 14.165 +#define Meas_startReceiveFromTo \ 14.166 + int32 startStamp, endStamp; \ 14.167 + saveLowTimeStampCountInto( startStamp ); \ 14.168 + 14.169 +#define Meas_endReceiveFromTo \ 14.170 + saveLowTimeStampCountInto( endStamp ); \ 14.171 + addIntervalToHist( startStamp, endStamp, \ 14.172 + _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] ); 14.173 + 14.174 +#define Meas_startReceiveOfType \ 14.175 + int32 startStamp, endStamp; \ 14.176 + saveLowTimeStampCountInto( startStamp ); \ 14.177 + 14.178 +#define Meas_endReceiveOfType \ 14.179 + saveLowTimeStampCountInto( endStamp ); \ 14.180 + addIntervalToHist( startStamp, endStamp, \ 14.181 + _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] ); 14.182 +#endif /* SSR */ 14.183 + 14.184 +#endif /* _VMS_DEFS_H */ 14.185 +
15.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 15.2 +++ b/VMS_defs__main.h Wed Feb 22 11:39:12 2012 -0800 15.3 @@ -0,0 +1,185 @@ 15.4 +/* 15.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 15.6 + * Licensed under GNU General Public License version 2 15.7 + * 15.8 + * Author: seanhalle@yahoo.com 15.9 + * 15.10 + */ 15.11 + 15.12 +#ifndef _VMS_DEFS_H 15.13 +#define _VMS_DEFS_H 15.14 +#define _GNU_SOURCE 15.15 + 15.16 +//=========================== VMS-wide defs =============================== 15.17 +#include "VMS_primitive_data_types.h" 15.18 + 15.19 +#define SUCCESS 0 15.20 + 15.21 + //only after macro-expansion are the defs of writePrivQ, aso looked up 15.22 + // so these defs can be at the top, and writePrivQ defined later on.. 15.23 +#define writeVMSQ writePrivQ 15.24 +#define readVMSQ readPrivQ 15.25 +#define makeVMSQ makeVMSPrivQ 15.26 +#define numInVMSQ numInPrivQ 15.27 +#define VMSQueueStruc PrivQueueStruc 15.28 + 15.29 + 15.30 +//====================== Hardware Specific Defs ============================ 15.31 +#include "VMS_defs__HW_specific.h" 15.32 + 15.33 +//========================= Debug Related Defs ============================= 15.34 +// 15.35 +//When SEQUENTIAL is defined, VMS does sequential exe in the main thread 15.36 +// It still does co-routines and all the mechanisms are the same, it just 15.37 +// has only a single thread and animates VPs one at a time 15.38 +//#define SEQUENTIAL 15.39 + 15.40 +//#define USE_WORK_STEALING 15.41 + 15.42 +//turns on the probe-instrumentation in the application -- when not 15.43 +// defined, the calls to the probe functions turn into comments 15.44 +#define STATS__ENABLE_PROBES 15.45 +//#define TURN_ON_DEBUG_PROBES 15.46 + 15.47 +//These defines turn types of bug messages on and off 15.48 +// be sure debug messages are un-commented (next block of defines) 15.49 +#define dbgAppFlow TRUE /* Top level flow of application code -- general*/ 15.50 +#define dbgProbes FALSE /* for issues inside probes themselves*/ 15.51 +#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ 15.52 +#define dbgRqstHdlr FALSE /* in request handler code*/ 15.53 + 15.54 +//Comment or un- the substitute half to turn on/off types of debug message 15.55 +#define DEBUG( bool, msg) \ 15.56 +// if( bool){ printf(msg); fflush(stdin);} 15.57 +#define DEBUG1( bool, msg, param) \ 15.58 +// if(bool){printf(msg, param); fflush(stdin);} 15.59 +#define DEBUG2( bool, msg, p1, p2) \ 15.60 +// if(bool) {printf(msg, p1, p2); fflush(stdin);} 15.61 + 15.62 +#define ERROR(msg) printf(msg); 15.63 +#define ERROR1(msg, param) printf(msg, param); 15.64 +#define ERROR2(msg, p1, p2) printf(msg, p1, p2); 15.65 + 15.66 +//====================== Measurement Related Defs ========================== 15.67 +// 15.68 +// 15.69 + //when STATS__TURN_ON_PROBES is defined allows using probes to measure 15.70 + // time intervals. The probes are macros that only compile to something 15.71 + // when STATS__TURN_ON_PROBES is defined. The probes are saved in the 15.72 + // master env -- but only when this is defined. 15.73 + //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday 15.74 +#define STATS__TURN_ON_PROBES 15.75 +//#define STATS__USE_TSC_PROBES 15.76 +#define STATS__USE_DBL_PROBES 15.77 + 15.78 +//================== Turn Measurement Things on and off ==================== 15.79 + 15.80 +//#define MEAS__TIME_2011_SYS 15.81 + 15.82 +//define this if any MEAS__... below are 15.83 +//#define MAKE_HISTS_FOR_MEASUREMENTS 15.84 + //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and 15.85 + // compiled-in that saves the low part of the time stamp count just before 15.86 + // suspending a processor and just after resuming that processor. It is 15.87 + // saved into a field added to VirtProcr. Have to sanity-check for 15.88 + // rollover of low portion into high portion. 15.89 +//#define MEAS__TIME_STAMP_SUSP 15.90 +//#define MEAS__TIME_MASTER 15.91 +//#define MEAS__TIME_PLUGIN 15.92 +//#define MEAS__TIME_MALLOC 15.93 +//#define MEAS__TIME_MASTER_LOCK 15.94 + 15.95 + //For code that calculates normalization-offset between TSC counts of 15.96 + // different cores. 15.97 +//#define NUM_TSC_ROUND_TRIPS 10 15.98 + 15.99 + 15.100 + 15.101 +//=================== Macros to Capture Measurements ====================== 15.102 +// 15.103 +//===== RDTSC wrapper ===== 15.104 +//Also runs with x86_64 code 15.105 +#define saveTSCLowHigh(lowHighIn) \ 15.106 + asm volatile("RDTSC; \ 15.107 + movl %%eax, %0; \ 15.108 + movl %%edx, %1;" \ 15.109 + /* outputs */ : "=m" (lowHighIn.lowHigh[0]), "=m" (lowHighIn.lowHigh[1])\ 15.110 + /* inputs */ : \ 15.111 + /* clobber */ : "%eax", "%edx" \ 15.112 + ); 15.113 + 15.114 +#define saveTimeStampCountInto(low, high) \ 15.115 + asm volatile("RDTSC; \ 15.116 + movl %%eax, %0; \ 15.117 + movl %%edx, %1;" \ 15.118 + /* outputs */ : "=m" (low), "=m" (high)\ 15.119 + /* inputs */ : \ 15.120 + /* clobber */ : "%eax", "%edx" \ 15.121 + ); 15.122 + 15.123 +#define saveLowTimeStampCountInto(low) \ 15.124 + asm volatile("RDTSC; \ 15.125 + movl %%eax, %0;" \ 15.126 + /* outputs */ : "=m" (low) \ 15.127 + /* inputs */ : \ 15.128 + /* clobber */ : "%eax", "%edx" \ 15.129 + ); 15.130 + 15.131 + 15.132 +//================== Macros define types of meas want ===================== 15.133 +#ifdef MEAS__TIME_PLUGIN 15.134 + 15.135 +#define Meas_startReqHdlr \ 15.136 + int32 startStamp1, endStamp1; \ 15.137 + saveLowTimeStampCountInto( startStamp1 ); 15.138 + 15.139 +#define Meas_endReqHdlr \ 15.140 + saveLowTimeStampCountInto( endStamp1 ); \ 15.141 + addIntervalToHist( startStamp1, endStamp1, \ 15.142 + _VMSMasterEnv->reqHdlrLowTimeHist ); \ 15.143 + addIntervalToHist( startStamp1, endStamp1, \ 15.144 + _VMSMasterEnv->reqHdlrHighTimeHist ); 15.145 + 15.146 +#elif defined MEAS__TIME_2011_SYS 15.147 +#define Meas_startMasterLoop \ 15.148 + TSCountLowHigh startStamp1, endStamp1; \ 15.149 + saveTSCLowHigh( endStamp1 ); \ 15.150 + _VMSMasterEnv->cyclesTillStartMasterLoop = \ 15.151 + endStamp1.longVal - masterVP->startSusp.longVal; 15.152 + 15.153 +#define Meas_startReqHdlr \ 15.154 + saveTSCLowHigh( startStamp1 ); \ 15.155 + _VMSMasterEnv->startReqHdlr.longVal = startStamp1.longVal; 15.156 + 15.157 +#define Meas_endReqHdlr 15.158 + 15.159 +#define Meas_endMasterLoop \ 15.160 + saveTSCLowHigh( startStamp1 ); \ 15.161 + _VMSMasterEnv->endMasterLoop.longVal = startStamp1.longVal; 15.162 + 15.163 +#else 15.164 +#define Meas_startMasterLoop 15.165 +#define Meas_startReqHdlr 15.166 +#define Meas_endReqHdlr 15.167 +#define Meas_endMasterLoop 15.168 +#endif 15.169 + 15.170 +//====================== Histogram Macros -- Create ======================== 15.171 +// 15.172 +// 15.173 +#ifdef MAKE_HISTS_FOR_MEASUREMENTS 15.174 +#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ 15.175 + makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ 15.176 + _VMSMasterEnv->measHists[idx] = \ 15.177 + makeFixedBinHist( numBins, startVal, binWidth, name ); 15.178 +#else 15.179 +#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) 15.180 +#endif 15.181 + 15.182 + 15.183 +#define MEAS__SUB_CREATE /*turn on/off subtraction of create from plugin*/ 15.184 + 15.185 +#include "VMS_defs__lang_specific.h" 15.186 + 15.187 +#endif /* _VMS_DEFS_H */ 15.188 +
16.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 16.2 +++ b/VMS_primitive_data_types.h Wed Feb 22 11:39:12 2012 -0800 16.3 @@ -0,0 +1,53 @@ 16.4 +/* 16.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 16.6 + * Licensed under GNU General Public License version 2 16.7 + * 16.8 + * Author: seanhalle@yahoo.com 16.9 + * 16.10 + 16.11 + */ 16.12 + 16.13 +#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H 16.14 +#define _BLIS_PRIMITIVE_DATA_TYPES_H 16.15 + 16.16 + 16.17 +/*For portability, need primitive data types that have a well defined 16.18 + * size, and well-defined layout into bytes 16.19 + *To do this, provide BLIS standard aliases for all primitive data types 16.20 + *These aliases must be used in all BLIS functions instead of the ANSI types 16.21 + * 16.22 + *These definitions will be replaced inside each specialization module 16.23 + * according to the compiler used in that module and the hardware being 16.24 + * specialized to. 16.25 + */ 16.26 +/* 16.27 +#define int8 char 16.28 +#define uint8 char 16.29 +#define int16 short 16.30 +#define uint16 unsigned short 16.31 +#define int32 int 16.32 +#define uint32 unsigned int 16.33 +#define int64 long long 16.34 +#define uint64 unsigned long long 16.35 +#define float32 float 16.36 +#define float64 double 16.37 +*/ 16.38 +typedef char bool8; 16.39 +typedef char int8; 16.40 +typedef char uint8; 16.41 +typedef short int16; 16.42 +typedef unsigned short uint16; 16.43 +typedef int int32; 16.44 +typedef unsigned int uint32; 16.45 +typedef long long int64; 16.46 +typedef unsigned long long uint64; 16.47 +typedef float float32; 16.48 +typedef double float64; 16.49 +//typedef double double float128; 16.50 +#define float128 double double 16.51 + 16.52 +#define TRUE 1 16.53 +#define FALSE 0 16.54 + 16.55 +#endif /* _BLIS_PRIMITIVE_DATA_TYPES_H */ 16.56 +
17.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 17.2 +++ b/__brch__Common_ancestor Wed Feb 22 11:39:12 2012 -0800 17.3 @@ -0,0 +1,33 @@ 17.4 +A HW branch for: 17.5 + 17.6 +generic MultiCore machines with x86 64bit instruction set 17.7 + 17.8 +This branch shouldn't be used, except as a lazy fall-back. Instead, try out other branches tuned to specific hardware platforms to find the one that performs best on your machine. Use the "exe_time_vs_task_size" project to generate curves of overhead, and compare result from various branches. 17.9 + 17.10 +Note, if this branch is used, then NUM_CORES in VMS_HW_specific_defs.h file has to be updated with the number of cores in your machine 17.11 + 17.12 +======== Background on branch naming ========= 17.13 + 17.14 +There are two kinds of branchs: ones used to develop features, and ones tuned to particular hardware. A given HW branch may combine features from several feature-branches, picking and choosing among them. 17.15 + 17.16 +After Feb 2012, branches are named by the scheme: 17.17 + 17.18 +feat__<feat_descr>__<HW_feat_dev_on> 17.19 + 17.20 +HW__<desc_of_HW_brch_tuned_for> 17.21 + 17.22 +where <HW_feat_dev_on> and <desc_of_HW_brch_tuned_for> follow the pattern: 17.23 + 17.24 +<num_socket> x <num_cores>_<Manuf>_<special_features> 17.25 + 17.26 +Examples: 17.27 + 17.28 +feat__exp_array_malloc 17.29 + 17.30 +feat__rand_backoff__4x10_Intel_WestmereEx 17.31 + 17.32 +HW__1x4_Intel_SandyBridge 17.33 + 17.34 +HW__4x10_Intel_WestmereEx 17.35 + 17.36 +HW__1x4_AMD_mobile
18.1 --- a/__brch__DEPRECATED_README Mon Feb 13 13:34:13 2012 -0800 18.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 18.3 @@ -1,29 +0,0 @@ 18.4 -*DEPRECATED* as of Feb 2012, this branch should not be used. Too many variations of VMS for MC_shared exist. 18.5 - 18.6 -Instead, choose a branch that has the best implementation for the machine being run on. For example, single-socket with 2 cores, or with 4 cores, or with 8 cores all have their own branches with code tuned to that number of cores. AMD processors require different low-level tweaking than Intel, and so on. 18.7 - 18.8 -============== Background on Branch Naming ============ 18.9 - 18.10 -There are two kinds of branchs: ones used to develop features, and ones tuned to particular hardware. A given HW branch may combine features from several feature-branches, picking and choosing among them. 18.11 - 18.12 -Legacy branches, from before Feb 2012 have random names. After Feb 2012, they're named by the scheme: 18.13 - 18.14 -feat__<feat_descr>__<HW_feat_dev_on> 18.15 - 18.16 -HW__<desc_of_HW_brch_tuned_for> 18.17 - 18.18 -where <HW_feat_dev_on> and <desc_of_HW_brch_tuned_for> follow the pattern: 18.19 - 18.20 -<num_socket> x <num_cores>_<ArchName>_<optional_special_features> 18.21 - 18.22 -Examples: 18.23 - 18.24 -feat__exp_array_malloc__generic_MC 18.25 - 18.26 -feat__rand_backoff__4x10_WestmereEx 18.27 - 18.28 -HW__1x4_SandyBridge 18.29 - 18.30 -HW__4x10_WestmereEx 18.31 - 18.32 -HW__1x4_AMD_mobile 18.33 \ No newline at end of file
19.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 19.2 +++ b/probes.c Wed Feb 22 11:39:12 2012 -0800 19.3 @@ -0,0 +1,339 @@ 19.4 +/* 19.5 + * Copyright 2010 OpenSourceStewardshipFoundation 19.6 + * 19.7 + * Licensed under BSD 19.8 + */ 19.9 + 19.10 +#include <stdio.h> 19.11 +#include <malloc.h> 19.12 +#include <sys/time.h> 19.13 + 19.14 +#include "VMS.h" 19.15 + 19.16 + 19.17 + 19.18 +//==================== Probes ================= 19.19 +#ifdef STATS__USE_TSC_PROBES 19.20 + 19.21 +int32 19.22 +VMS__create_histogram_probe( int32 numBins, float32 startValue, 19.23 + float32 binWidth, char *nameStr ) 19.24 + { IntervalProbe *newProbe; 19.25 + int32 idx; 19.26 + FloatHist *hist; 19.27 + 19.28 + idx = VMS__create_single_interval_probe( nameStr ); 19.29 + newProbe = _VMSMasterEnv->intervalProbes[ idx ]; 19.30 + 19.31 + hist = makeFloatHistogram( numBins, startValue, binWidth ); 19.32 + newProbe->hist = hist; 19.33 + return idx; 19.34 + } 19.35 + 19.36 +void 19.37 +VMS_impl__record_interval_start_in_probe( int32 probeID ) 19.38 + { IntervalProbe *probe; 19.39 + 19.40 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 19.41 + probe->startStamp = getTSCount(); 19.42 + } 19.43 + 19.44 +void 19.45 +VMS_impl__record_interval_end_in_probe( int32 probeID ) 19.46 + { IntervalProbe *probe; 19.47 + TSCount endStamp; 19.48 + 19.49 + endStamp = getTSCount(); 19.50 + 19.51 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 19.52 + probe->endStamp = endStamp; 19.53 + 19.54 + if( probe->hist != NULL ) 19.55 + { TSCount interval = probe->endStamp - probe->startStamp; 19.56 + //if the interval is sane, then add to histogram 19.57 + if( interval < probe->hist->endOfRange * 10 ) 19.58 + addToFloatHist( interval, probe->hist ); 19.59 + } 19.60 + } 19.61 + 19.62 +void 19.63 +VMS_impl__print_stats_of_probe( int32 probeID ) 19.64 + { IntervalProbe *probe; 19.65 + 19.66 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 19.67 + 19.68 + if( probe->hist == NULL ) 19.69 + { 19.70 + printf("probe: %s, interval: %.6lf\n", probe->nameStr,probe->interval); 19.71 + } 19.72 + 19.73 + else 19.74 + { 19.75 + printf( "probe: %s\n", probe->nameStr ); 19.76 + printFloatHist( probe->hist ); 19.77 + } 19.78 + } 19.79 +#else 19.80 + 19.81 +/* 19.82 + * In practice, probe operations are called from the app, from inside slaves 19.83 + * -- so have to be sure each probe is single-VP owned, and be sure that 19.84 + * any place common structures are modified it's done inside the master. 19.85 + * So -- the only place common structures are modified is during creation. 19.86 + * after that, all mods are to individual instances. 19.87 + * 19.88 + * Thniking perhaps should change the semantics to be that probes are 19.89 + * attached to the virtual processor -- and then everything is guaranteed 19.90 + * to be isolated -- except then can't take any intervals that span VPs, 19.91 + * and would have to transfer the probes to Master env when VP dissipates.. 19.92 + * gets messy.. 19.93 + * 19.94 + * For now, just making so that probe creation causes a suspend, so that 19.95 + * the dynamic array in the master env is only modified from the master 19.96 + * 19.97 + */ 19.98 +IntervalProbe * 19.99 +create_generic_probe( char *nameStr, SlaveVP *animPr ) 19.100 +{ 19.101 + VMSSemReq reqData; 19.102 + 19.103 + reqData.reqType = createProbe; 19.104 + reqData.nameStr = nameStr; 19.105 + 19.106 + VMS_WL__send_VMSSem_request( &reqData, animPr ); 19.107 + 19.108 + return animPr->dataRetFromReq; 19.109 + } 19.110 + 19.111 +/*Use this version from outside VMS -- it uses external malloc, and modifies 19.112 + * dynamic array, so can't be animated in a slave VP 19.113 + */ 19.114 +IntervalProbe * 19.115 +ext__create_generic_probe( char *nameStr ) 19.116 + { IntervalProbe *newProbe; 19.117 + int32 nameLen; 19.118 + 19.119 + newProbe = malloc( sizeof(IntervalProbe) ); 19.120 + nameLen = strlen( nameStr ); 19.121 + newProbe->nameStr = malloc( nameLen ); 19.122 + memcpy( newProbe->nameStr, nameStr, nameLen ); 19.123 + newProbe->hist = NULL; 19.124 + newProbe->schedChoiceWasRecorded = FALSE; 19.125 + newProbe->probeID = 19.126 + addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); 19.127 + 19.128 + return newProbe; 19.129 + } 19.130 + 19.131 + 19.132 +/*Only call from inside master or main startup/shutdown thread 19.133 + */ 19.134 +void 19.135 +VMS_impl__free_probe( IntervalProbe *probe ) 19.136 + { if( probe->hist != NULL ) freeDblHist( probe->hist ); 19.137 + if( probe->nameStr != NULL) VMS_int__free( probe->nameStr ); 19.138 + VMS_int__free( probe ); 19.139 + } 19.140 + 19.141 + 19.142 +int32 19.143 +VMS_impl__record_time_point_into_new_probe( char *nameStr, SlaveVP *animPr) 19.144 + { IntervalProbe *newProbe; 19.145 + struct timeval *startStamp; 19.146 + float64 startSecs; 19.147 + 19.148 + newProbe = create_generic_probe( nameStr, animPr ); 19.149 + newProbe->endSecs = 0; 19.150 + 19.151 + gettimeofday( &(newProbe->startStamp), NULL); 19.152 + 19.153 + //turn into a double 19.154 + startStamp = &(newProbe->startStamp); 19.155 + startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); 19.156 + newProbe->startSecs = startSecs; 19.157 + 19.158 + return newProbe->probeID; 19.159 + } 19.160 + 19.161 +int32 19.162 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ) 19.163 + { IntervalProbe *newProbe; 19.164 + struct timeval *startStamp; 19.165 + float64 startSecs; 19.166 + 19.167 + newProbe = ext__create_generic_probe( nameStr ); 19.168 + newProbe->endSecs = 0; 19.169 + 19.170 + gettimeofday( &(newProbe->startStamp), NULL); 19.171 + 19.172 + //turn into a double 19.173 + startStamp = &(newProbe->startStamp); 19.174 + startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); 19.175 + newProbe->startSecs = startSecs; 19.176 + 19.177 + return newProbe->probeID; 19.178 + } 19.179 + 19.180 +int32 19.181 +VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr ) 19.182 + { IntervalProbe *newProbe; 19.183 + 19.184 + newProbe = create_generic_probe( nameStr, animPr ); 19.185 + 19.186 + return newProbe->probeID; 19.187 + } 19.188 + 19.189 +int32 19.190 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 19.191 + float64 binWidth, char *nameStr, SlaveVP *animPr ) 19.192 + { IntervalProbe *newProbe; 19.193 + DblHist *hist; 19.194 + 19.195 + newProbe = create_generic_probe( nameStr, animPr ); 19.196 + 19.197 + hist = makeDblHistogram( numBins, startValue, binWidth ); 19.198 + newProbe->hist = hist; 19.199 + return newProbe->probeID; 19.200 + } 19.201 + 19.202 +void 19.203 +VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr ) 19.204 + { IntervalProbe *probe; 19.205 + 19.206 + //TODO: fix this To be in Master -- race condition 19.207 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 19.208 + 19.209 + addValueIntoTable(probe->nameStr, probe, _VMSMasterEnv->probeNameHashTbl); 19.210 + } 19.211 + 19.212 +IntervalProbe * 19.213 +VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr ) 19.214 + { 19.215 + //TODO: fix this To be in Master -- race condition 19.216 + return getValueFromTable( probeName, _VMSMasterEnv->probeNameHashTbl ); 19.217 + } 19.218 + 19.219 + 19.220 +/*Everything is local to the animating procr, so no need for request, do 19.221 + * work locally, in the anim Pr 19.222 + */ 19.223 +void 19.224 +VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animatingPr ) 19.225 + { IntervalProbe *probe; 19.226 + 19.227 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 19.228 + probe->schedChoiceWasRecorded = TRUE; 19.229 + probe->coreNum = animatingPr->coreAnimatedBy; 19.230 + probe->procrID = animatingPr->procrID; 19.231 + probe->procrCreateSecs = animatingPr->createPtInSecs; 19.232 + } 19.233 + 19.234 +/*Everything is local to the animating procr, so no need for request, do 19.235 + * work locally, in the anim Pr 19.236 + */ 19.237 +void 19.238 +VMS_impl__record_interval_start_in_probe( int32 probeID ) 19.239 + { IntervalProbe *probe; 19.240 + 19.241 + DEBUG( dbgProbes, "record start of interval\n" ) 19.242 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 19.243 + gettimeofday( &(probe->startStamp), NULL ); 19.244 + } 19.245 + 19.246 + 19.247 +/*Everything is local to the animating procr, so no need for request, do 19.248 + * work locally, in the anim Pr 19.249 + */ 19.250 +void 19.251 +VMS_impl__record_interval_end_in_probe( int32 probeID ) 19.252 + { IntervalProbe *probe; 19.253 + struct timeval *endStamp, *startStamp; 19.254 + float64 startSecs, endSecs; 19.255 + 19.256 + DEBUG( dbgProbes, "record end of interval\n" ) 19.257 + //possible seg-fault if array resized by diff core right after this 19.258 + // one gets probe..? Something like that? Might be safe.. don't care 19.259 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 19.260 + gettimeofday( &(probe->endStamp), NULL); 19.261 + 19.262 + //now turn into an interval held in a double 19.263 + startStamp = &(probe->startStamp); 19.264 + endStamp = &(probe->endStamp); 19.265 + 19.266 + startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); 19.267 + endSecs = endStamp->tv_sec + ( endStamp->tv_usec / 1000000.0 ); 19.268 + 19.269 + probe->interval = endSecs - startSecs; 19.270 + probe->startSecs = startSecs; 19.271 + probe->endSecs = endSecs; 19.272 + 19.273 + if( probe->hist != NULL ) 19.274 + { 19.275 + //if the interval is sane, then add to histogram 19.276 + if( probe->interval < probe->hist->endOfRange * 10 ) 19.277 + addToDblHist( probe->interval, probe->hist ); 19.278 + } 19.279 + } 19.280 + 19.281 +void 19.282 +print_probe_helper( IntervalProbe *probe ) 19.283 + { 19.284 + printf( "\nprobe: %s, ", probe->nameStr ); 19.285 + 19.286 + 19.287 + if( probe->schedChoiceWasRecorded ) 19.288 + { printf( "coreNum: %d, procrID: %d, procrCreated: %0.6f | ", 19.289 + probe->coreNum, probe->procrID, probe->procrCreateSecs ); 19.290 + } 19.291 + 19.292 + if( probe->endSecs == 0 ) //just a single point in time 19.293 + { 19.294 + printf( " time point: %.6f\n", 19.295 + probe->startSecs - _VMSMasterEnv->createPtInSecs ); 19.296 + } 19.297 + else if( probe->hist == NULL ) //just an interval 19.298 + { 19.299 + printf( " startSecs: %.6f interval: %.6f\n", 19.300 + (probe->startSecs - _VMSMasterEnv->createPtInSecs), probe->interval); 19.301 + } 19.302 + else //a full histogram of intervals 19.303 + { 19.304 + printDblHist( probe->hist ); 19.305 + } 19.306 + } 19.307 + 19.308 +//TODO: change so pass around pointer to probe instead of its array-index.. 19.309 +// will eliminate chance for timing of resize to cause problems with the 19.310 +// lookup -- even though don't think it actually can cause problems.. 19.311 +// there's no need to pass index around -- have hash table for names, and 19.312 +// only need it once, then have ptr to probe.. the thing about enum the 19.313 +// index and use that as name is clunky in practice -- just hash. 19.314 +void 19.315 +VMS_impl__print_stats_of_probe( int32 probeID ) 19.316 + { IntervalProbe *probe; 19.317 + 19.318 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 19.319 + 19.320 + print_probe_helper( probe ); 19.321 + } 19.322 + 19.323 + 19.324 +inline void doNothing(){}; 19.325 + 19.326 +void 19.327 +generic_print_probe( void *_probe ) 19.328 + { 19.329 + IntervalProbe *probe = (IntervalProbe *)_probe; 19.330 + 19.331 + //TODO segfault in printf 19.332 + //print_probe_helper( probe ); 19.333 + } 19.334 + 19.335 +void 19.336 +VMS_impl__print_stats_of_all_probes() 19.337 + { 19.338 + forAllInDynArrayDo( _VMSMasterEnv->dynIntervalProbesInfo, 19.339 + &generic_print_probe ); 19.340 + fflush( stdout ); 19.341 + } 19.342 +#endif
20.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 20.2 +++ b/probes.h Wed Feb 22 11:39:12 2012 -0800 20.3 @@ -0,0 +1,182 @@ 20.4 +/* 20.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 20.6 + * Licensed under GNU General Public License version 2 20.7 + * 20.8 + * Author: seanhalle@yahoo.com 20.9 + * 20.10 + */ 20.11 + 20.12 +#ifndef _PROBES_H 20.13 +#define _PROBES_H 20.14 +#define _GNU_SOURCE 20.15 + 20.16 +#include "VMS_primitive_data_types.h" 20.17 + 20.18 +#include <sys/time.h> 20.19 + 20.20 +/*Note on order of include files: 20.21 + * This file relies on #defines that appear in other files.. 20.22 + */ 20.23 + 20.24 + 20.25 +//typedef struct _IntervalProbe IntervalProbe; //in VMS.h 20.26 + 20.27 +struct _IntervalProbe 20.28 + { 20.29 + char *nameStr; 20.30 + int32 probeID; 20.31 + 20.32 + int32 schedChoiceWasRecorded; 20.33 + int32 coreNum; 20.34 + int32 procrID; 20.35 + float64 procrCreateSecs; 20.36 + 20.37 + #ifdef STATS__USE_TSC_PROBES 20.38 + TSCount startStamp; 20.39 + TSCount endStamp; 20.40 + #else 20.41 + struct timeval startStamp; 20.42 + struct timeval endStamp; 20.43 + #endif 20.44 + float64 startSecs; 20.45 + float64 endSecs; 20.46 + float64 interval; 20.47 + DblHist *hist;//if NULL, then is single interval probe 20.48 + }; 20.49 + 20.50 + 20.51 + 20.52 +//======================== Probes ============================= 20.53 +// 20.54 +// Use macros to allow turning probes off with a #define switch 20.55 +#ifdef STATS__ENABLE_PROBES 20.56 +int32 20.57 +VMS_impl__record_time_point_into_new_probe( char *nameStr,SlaveVP *animPr); 20.58 +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 20.59 + VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) 20.60 + 20.61 +int32 20.62 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 20.63 +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 20.64 + VMS_ext_impl__record_time_point_into_new_probe( nameStr ) 20.65 + 20.66 + 20.67 +int32 20.68 +VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr ); 20.69 +#define VMS__create_single_interval_probe( nameStr, animPr ) \ 20.70 + VMS_impl__create_single_interval_probe( nameStr, animPr ) 20.71 + 20.72 + 20.73 +int32 20.74 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 20.75 + float64 binWidth, char *nameStr, SlaveVP *animPr ); 20.76 +#define VMS__create_histogram_probe( numBins, startValue, \ 20.77 + binWidth, nameStr, animPr ) \ 20.78 + VMS_impl__create_histogram_probe( numBins, startValue, \ 20.79 + binWidth, nameStr, animPr ) 20.80 +void 20.81 +VMS_impl__free_probe( IntervalProbe *probe ); 20.82 +#define VMS__free_probe( probe ) \ 20.83 + VMS_impl__free_probe( probe ) 20.84 + 20.85 +void 20.86 +VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr ); 20.87 +#define VMS__index_probe_by_its_name( probeID, animPr ) \ 20.88 + VMS_impl__index_probe_by_its_name( probeID, animPr ) 20.89 + 20.90 +IntervalProbe * 20.91 +VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr ); 20.92 +#define VMS__get_probe_by_name( probeID, animPr ) \ 20.93 + VMS_impl__get_probe_by_name( probeName, animPr ) 20.94 + 20.95 +void 20.96 +VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animPr ); 20.97 +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 20.98 + VMS_impl__record_sched_choice_into_probe( probeID, animPr ) 20.99 + 20.100 +void 20.101 +VMS_impl__record_interval_start_in_probe( int32 probeID ); 20.102 +#define VMS__record_interval_start_in_probe( probeID ) \ 20.103 + VMS_impl__record_interval_start_in_probe( probeID ) 20.104 + 20.105 +void 20.106 +VMS_impl__record_interval_end_in_probe( int32 probeID ); 20.107 +#define VMS__record_interval_end_in_probe( probeID ) \ 20.108 + VMS_impl__record_interval_end_in_probe( probeID ) 20.109 + 20.110 +void 20.111 +VMS_impl__print_stats_of_probe( int32 probeID ); 20.112 +#define VMS__print_stats_of_probe( probeID ) \ 20.113 + VMS_impl__print_stats_of_probe( probeID ) 20.114 + 20.115 +void 20.116 +VMS_impl__print_stats_of_all_probes(); 20.117 +#define VMS__print_stats_of_all_probes() \ 20.118 + VMS_impl__print_stats_of_all_probes() 20.119 + 20.120 + 20.121 +#else 20.122 +int32 20.123 +VMS_impl__record_time_point_into_new_probe( char *nameStr,SlaveVP *animPr); 20.124 +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 20.125 + 0 /* do nothing */ 20.126 + 20.127 +int32 20.128 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 20.129 +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 20.130 + 0 /* do nothing */ 20.131 + 20.132 + 20.133 +int32 20.134 +VMS_impl__create_single_interval_probe( char *nameStr, SlaveVP *animPr ); 20.135 +#define VMS__create_single_interval_probe( nameStr, animPr ) \ 20.136 + 0 /* do nothing */ 20.137 + 20.138 + 20.139 +int32 20.140 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 20.141 + float64 binWidth, char *nameStr, SlaveVP *animPr ); 20.142 +#define VMS__create_histogram_probe( numBins, startValue, \ 20.143 + binWidth, nameStr, animPr ) \ 20.144 + 0 /* do nothing */ 20.145 + 20.146 +void 20.147 +VMS_impl__index_probe_by_its_name( int32 probeID, SlaveVP *animPr ); 20.148 +#define VMS__index_probe_by_its_name( probeID, animPr ) \ 20.149 + /* do nothing */ 20.150 + 20.151 +IntervalProbe * 20.152 +VMS_impl__get_probe_by_name( char *probeName, SlaveVP *animPr ); 20.153 +#define VMS__get_probe_by_name( probeID, animPr ) \ 20.154 + NULL /* do nothing */ 20.155 + 20.156 +void 20.157 +VMS_impl__record_sched_choice_into_probe( int32 probeID, SlaveVP *animPr ); 20.158 +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 20.159 + /* do nothing */ 20.160 + 20.161 +void 20.162 +VMS_impl__record_interval_start_in_probe( int32 probeID ); 20.163 +#define VMS__record_interval_start_in_probe( probeID ) \ 20.164 + /* do nothing */ 20.165 + 20.166 +void 20.167 +VMS_impl__record_interval_end_in_probe( int32 probeID ); 20.168 +#define VMS__record_interval_end_in_probe( probeID ) \ 20.169 + /* do nothing */ 20.170 + 20.171 +inline void doNothing(); 20.172 +void 20.173 +VMS_impl__print_stats_of_probe( int32 probeID ); 20.174 +#define VMS__print_stats_of_probe( probeID ) \ 20.175 + doNothing/* do nothing */ 20.176 + 20.177 +void 20.178 +VMS_impl__print_stats_of_all_probes(); 20.179 +#define VMS__print_stats_of_all_probes \ 20.180 + doNothing/* do nothing */ 20.181 + 20.182 +#endif /* defined STATS__ENABLE_PROBES */ 20.183 + 20.184 +#endif /* _PROBES_H */ 20.185 +
21.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 21.2 +++ b/vmalloc.c Wed Feb 22 11:39:12 2012 -0800 21.3 @@ -0,0 +1,494 @@ 21.4 +/* 21.5 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 21.6 + * Licensed under GNU General Public License version 2 21.7 + * 21.8 + * Author: seanhalle@yahoo.com 21.9 + * 21.10 + * Created on November 14, 2009, 9:07 PM 21.11 + */ 21.12 + 21.13 +#include <malloc.h> 21.14 +#include <inttypes.h> 21.15 +#include <stdlib.h> 21.16 +#include <stdio.h> 21.17 + 21.18 +#include "VMS.h" 21.19 +#include "C_Libraries/Histogram/Histogram.h" 21.20 + 21.21 +/*Helper function 21.22 + *Insert a newly generated free chunk into the first spot on the free list. 21.23 + * The chunk is cast as a MallocProlog, so the various pointers in it are 21.24 + * accessed with C's help -- and the size of the prolog is easily added to 21.25 + * the pointer when a chunk is returned to the app -- so C handles changes 21.26 + * in pointer sizes among machines. 21.27 + * 21.28 + *The list head is a normal MallocProlog struct -- identified by its 21.29 + * prevChunkInFreeList being NULL -- the only one. 21.30 + * 21.31 + *The end of the list is identified by next chunk being NULL, as usual. 21.32 + */ 21.33 +void inline 21.34 +add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) 21.35 + { 21.36 + chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; 21.37 + if( chunk->nextChunkInFreeList != NULL ) //if not last in free list 21.38 + chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; 21.39 + chunk->prevChunkInFreeList = listHead; 21.40 + listHead->nextChunkInFreeList = chunk; 21.41 + } 21.42 + 21.43 + 21.44 +/*This is sequential code, meant to only be called from the Master, not from 21.45 + * any slave VPs. 21.46 + *Search down list, checking size by the nextHigherInMem pointer, to find 21.47 + * first chunk bigger than size needed. 21.48 + *Shave off the extra and make it into a new free-list element, hook it in 21.49 + * then return the address of the found element plus size of prolog. 21.50 + * 21.51 + */ 21.52 +void *VMS_int__malloc( size_t sizeRequested ) 21.53 + { MallocProlog *foundElem = NULL, *currElem, *newElem; 21.54 + ssize_t amountExtra, sizeConsumed,sizeOfFound; 21.55 + uint32 foundElemIsTopOfHeap; 21.56 + 21.57 + //============================= MEASUREMENT STUFF ======================== 21.58 + #ifdef MEAS__TIME_MALLOC 21.59 + int32 startStamp, endStamp; 21.60 + saveLowTimeStampCountInto( startStamp ); 21.61 + #endif 21.62 + //======================================================================== 21.63 + 21.64 + //step up the size to be aligned at 16-byte boundary, prob better ways 21.65 + sizeRequested = (sizeRequested + 16) & ~15; 21.66 + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 21.67 + 21.68 + while( currElem != NULL ) 21.69 + { //check if size of currElem is big enough 21.70 + sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 21.71 + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 21.72 + if( amountExtra > 0 ) 21.73 + { //found it, get out of loop 21.74 + foundElem = currElem; 21.75 + currElem = NULL; 21.76 + } 21.77 + else 21.78 + currElem = currElem->nextChunkInFreeList; 21.79 + } 21.80 + 21.81 + if( foundElem == NULL ) 21.82 + { ERROR("\nmalloc failed\n") 21.83 + return (void *)NULL; //indicates malloc failed 21.84 + } 21.85 + //Using a kludge to identify the element that is the top chunk in the 21.86 + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 21.87 + // save addr of start of heap in head's nextLowerInMem 21.88 + //Will handle top of Heap specially 21.89 + foundElemIsTopOfHeap = foundElem->nextHigherInMem == 21.90 + _VMSMasterEnv->freeListHead->nextHigherInMem; 21.91 + 21.92 + //before shave off and try to insert new elem, remove found elem 21.93 + //note, foundElem will never be the head, so always has valid prevChunk 21.94 + foundElem->prevChunkInFreeList->nextChunkInFreeList = 21.95 + foundElem->nextChunkInFreeList; 21.96 + if( foundElem->nextChunkInFreeList != NULL ) 21.97 + { foundElem->nextChunkInFreeList->prevChunkInFreeList = 21.98 + foundElem->prevChunkInFreeList; 21.99 + } 21.100 + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 21.101 + 21.102 + //if enough, turn extra into new elem & insert it 21.103 + if( amountExtra > 64 ) 21.104 + { //make new elem by adding to addr of curr elem then casting 21.105 + sizeConsumed = sizeof(MallocProlog) + sizeRequested; 21.106 + newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 21.107 + newElem->nextLowerInMem = foundElem; //This is evil (but why?) 21.108 + newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) 21.109 + foundElem->nextHigherInMem = newElem; 21.110 + if( ! foundElemIsTopOfHeap ) 21.111 + { //there is no next higher for top of heap, so can't write to it 21.112 + newElem->nextHigherInMem->nextLowerInMem = newElem; 21.113 + } 21.114 + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 21.115 + } 21.116 + else 21.117 + { 21.118 + sizeConsumed = sizeOfFound; 21.119 + } 21.120 + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 21.121 + 21.122 + //============================= MEASUREMENT STUFF ======================== 21.123 + #ifdef MEAS__TIME_MALLOC 21.124 + saveLowTimeStampCountInto( endStamp ); 21.125 + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 21.126 + #endif 21.127 + //======================================================================== 21.128 + 21.129 + //skip over the prolog by adding its size to the pointer return 21.130 + return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 21.131 + } 21.132 + 21.133 +/*This is sequential code, meant to only be called from the Master, not from 21.134 + * any slave VPs. 21.135 + *Search down list, checking size by the nextHigherInMem pointer, to find 21.136 + * first chunk bigger than size needed. 21.137 + *Shave off the extra and make it into a new free-list element, hook it in 21.138 + * then return the address of the found element plus size of prolog. 21.139 + * 21.140 + * The difference to the regular malloc is, that all the allocated chunks are 21.141 + * aligned and padded to the size of a CACHE_LINE_SZ. Thus creating a new chunk 21.142 + * before the aligned chunk. 21.143 + */ 21.144 +void *VMS_int__malloc_aligned( size_t sizeRequested ) 21.145 + { MallocProlog *foundElem = NULL, *currElem, *newElem; 21.146 + ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; 21.147 + uint32 foundElemIsTopOfHeap; 21.148 + 21.149 + //============================= MEASUREMENT STUFF ======================== 21.150 + #ifdef MEAS__TIME_MALLOC 21.151 + uint32 startStamp, endStamp; 21.152 + saveLowTimeStampCountInto( startStamp ); 21.153 + #endif 21.154 + //======================================================================== 21.155 + 21.156 + //step up the size to be multiple of the cache line size 21.157 + sizeRequested = (sizeRequested + CACHE_LINE_SZ) & ~(CACHE_LINE_SZ-1); 21.158 + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 21.159 + 21.160 + while( currElem != NULL ) 21.161 + { //check if size of currElem is big enough 21.162 + sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 21.163 + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 21.164 + if( amountExtra > 0 ) 21.165 + { 21.166 + //look if the found element is already aligned 21.167 + if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE_SZ-1)) == 0){ 21.168 + //found it, get out of loop 21.169 + foundElem = currElem; 21.170 + break; 21.171 + }else{ 21.172 + //find first aligned address and check if it's still big enough 21.173 + //check also if the space before the aligned address is big enough 21.174 + //for a new element 21.175 + void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE_SZ) & ~((uintptr_t)(CACHE_LINE_SZ-1))); 21.176 + prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; 21.177 + sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); 21.178 + amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); 21.179 + if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ 21.180 + //found suitable element 21.181 + //create new previous element and exit loop 21.182 + MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; 21.183 + 21.184 + //insert new element into free list 21.185 + if(currElem->nextChunkInFreeList != NULL) 21.186 + currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; 21.187 + newAlignedElem->prevChunkInFreeList = currElem; 21.188 + newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; 21.189 + currElem->nextChunkInFreeList = newAlignedElem; 21.190 + 21.191 + //set higherInMem and lowerInMem 21.192 + newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; 21.193 + foundElemIsTopOfHeap = currElem->nextHigherInMem == 21.194 + _VMSMasterEnv->freeListHead->nextHigherInMem; 21.195 + if(!foundElemIsTopOfHeap) 21.196 + currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; 21.197 + currElem->nextHigherInMem = newAlignedElem; 21.198 + newAlignedElem->nextLowerInMem = currElem; 21.199 + 21.200 + //Found new element leaving loop 21.201 + foundElem = newAlignedElem; 21.202 + break; 21.203 + } 21.204 + } 21.205 + 21.206 + } 21.207 + currElem = currElem->nextChunkInFreeList; 21.208 + } 21.209 + 21.210 + if( foundElem == NULL ) 21.211 + { ERROR("\nmalloc failed\n") 21.212 + return (void *)NULL; //indicates malloc failed 21.213 + } 21.214 + //Using a kludge to identify the element that is the top chunk in the 21.215 + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 21.216 + // save addr of start of heap in head's nextLowerInMem 21.217 + //Will handle top of Heap specially 21.218 + foundElemIsTopOfHeap = foundElem->nextHigherInMem == 21.219 + _VMSMasterEnv->freeListHead->nextHigherInMem; 21.220 + 21.221 + //before shave off and try to insert new elem, remove found elem 21.222 + //note, foundElem will never be the head, so always has valid prevChunk 21.223 + foundElem->prevChunkInFreeList->nextChunkInFreeList = 21.224 + foundElem->nextChunkInFreeList; 21.225 + if( foundElem->nextChunkInFreeList != NULL ) 21.226 + { foundElem->nextChunkInFreeList->prevChunkInFreeList = 21.227 + foundElem->prevChunkInFreeList; 21.228 + } 21.229 + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 21.230 + 21.231 + //if enough, turn extra into new elem & insert it 21.232 + if( amountExtra > 64 ) 21.233 + { //make new elem by adding to addr of curr elem then casting 21.234 + sizeConsumed = sizeof(MallocProlog) + sizeRequested; 21.235 + newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 21.236 + newElem->nextHigherInMem = foundElem->nextHigherInMem; 21.237 + newElem->nextLowerInMem = foundElem; 21.238 + foundElem->nextHigherInMem = newElem; 21.239 + 21.240 + if( ! foundElemIsTopOfHeap ) 21.241 + { //there is no next higher for top of heap, so can't write to it 21.242 + newElem->nextHigherInMem->nextLowerInMem = newElem; 21.243 + } 21.244 + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 21.245 + } 21.246 + else 21.247 + { 21.248 + sizeConsumed = sizeOfFound; 21.249 + } 21.250 + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 21.251 + 21.252 + //============================= MEASUREMENT STUFF ======================== 21.253 + #ifdef MEAS__TIME_MALLOC 21.254 + saveLowTimeStampCountInto( endStamp ); 21.255 + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 21.256 + #endif 21.257 + //======================================================================== 21.258 + 21.259 + //skip over the prolog by adding its size to the pointer return 21.260 + return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 21.261 + } 21.262 + 21.263 + 21.264 +/*This is sequential code -- only to be called from the Master 21.265 + * When free, subtract the size of prolog from pointer, then cast it to a 21.266 + * MallocProlog. Then check the nextLower and nextHigher chunks to see if 21.267 + * one or both are also free, and coalesce if so, and if neither free, then 21.268 + * add this one to free-list. 21.269 + */ 21.270 +void 21.271 +VMS_int__free( void *ptrToFree ) 21.272 + { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; 21.273 + size_t sizeOfElem; 21.274 + uint32 lowerExistsAndIsFree, higherExistsAndIsFree; 21.275 + 21.276 + //============================= MEASUREMENT STUFF ======================== 21.277 + #ifdef MEAS__TIME_MALLOC 21.278 + int32 startStamp, endStamp; 21.279 + saveLowTimeStampCountInto( startStamp ); 21.280 + #endif 21.281 + //======================================================================== 21.282 + 21.283 + if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || 21.284 + ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) 21.285 + { //outside the range of data owned by VMS's malloc, so do nothing 21.286 + return; 21.287 + } 21.288 + //subtract size of prolog to get pointer to prolog, then cast 21.289 + elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog)); 21.290 + sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree); 21.291 + 21.292 + if( elemToFree->prevChunkInFreeList != NULL ) 21.293 + { printf( "error: freeing same element twice!" ); exit(1); 21.294 + } 21.295 + 21.296 + _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; 21.297 + 21.298 + nextLowerElem = elemToFree->nextLowerInMem; 21.299 + nextHigherElem = elemToFree->nextHigherInMem; 21.300 + 21.301 + if( nextHigherElem == NULL ) 21.302 + higherExistsAndIsFree = FALSE; 21.303 + else //okay exists, now check if in the free-list by checking back ptr 21.304 + higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); 21.305 + 21.306 + if( nextLowerElem == NULL ) 21.307 + lowerExistsAndIsFree = FALSE; 21.308 + else //okay, it exists, now check if it's free 21.309 + lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); 21.310 + 21.311 + 21.312 + //now, know what exists and what's free 21.313 + if( lowerExistsAndIsFree ) 21.314 + { if( higherExistsAndIsFree ) 21.315 + { //both exist and are free, so coalesce all three 21.316 + //First, remove higher from free-list 21.317 + nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = 21.318 + nextHigherElem->nextChunkInFreeList; 21.319 + if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? 21.320 + nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = 21.321 + nextHigherElem->prevChunkInFreeList; 21.322 + //Now, fix-up sequence-in-mem list -- by side-effect, this also 21.323 + // changes size of the lower elem, which is still in free-list 21.324 + nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; 21.325 + if( nextHigherElem->nextHigherInMem != 21.326 + _VMSMasterEnv->freeListHead->nextHigherInMem ) 21.327 + nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; 21.328 + //notice didn't do anything to elemToFree -- it simply is no 21.329 + // longer reachable from any of the lists. Wonder if could be a 21.330 + // security leak because left valid addresses in it, 21.331 + // but don't care for now. 21.332 + } 21.333 + else 21.334 + { //lower is the only of the two that exists and is free, 21.335 + //In this case, no adjustment to free-list, just change mem-list. 21.336 + // By side-effect, changes size of the lower elem 21.337 + nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; 21.338 + if( elemToFree->nextHigherInMem != 21.339 + _VMSMasterEnv->freeListHead->nextHigherInMem ) 21.340 + elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; 21.341 + } 21.342 + } 21.343 + else 21.344 + { //lower either doesn't exist or isn't free, so check higher 21.345 + if( higherExistsAndIsFree ) 21.346 + { //higher exists and is the only of the two free 21.347 + //First, in free-list, replace higher elem with the one to free 21.348 + elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; 21.349 + elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; 21.350 + elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; 21.351 + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 21.352 + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 21.353 + //Now chg mem-list. By side-effect, changes size of elemToFree 21.354 + elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; 21.355 + if( elemToFree->nextHigherInMem != 21.356 + _VMSMasterEnv->freeListHead->nextHigherInMem ) 21.357 + elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; 21.358 + } 21.359 + else 21.360 + { //neither lower nor higher is availabe to coalesce so add to list 21.361 + // this makes prev chunk ptr non-null, which indicates it's free 21.362 + elemToFree->nextChunkInFreeList = 21.363 + _VMSMasterEnv->freeListHead->nextChunkInFreeList; 21.364 + _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; 21.365 + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 21.366 + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 21.367 + elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; 21.368 + } 21.369 + } 21.370 + //============================= MEASUREMENT STUFF ======================== 21.371 + #ifdef MEAS__TIME_MALLOC 21.372 + saveLowTimeStampCountInto( endStamp ); 21.373 + addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist ); 21.374 + #endif 21.375 + //======================================================================== 21.376 + 21.377 + } 21.378 + 21.379 + 21.380 +/*Allocates memory from the external system -- higher overhead 21.381 + * 21.382 + *Because of Linux's malloc throwing bizarre random faults when malloc is 21.383 + * used inside a VMS virtual processor, have to pass this as a request and 21.384 + * have the core loop do it when it gets around to it -- will look for these 21.385 + * chores leftover from the previous animation of masterVP the next time it 21.386 + * goes to animate the masterVP -- so it takes two separate masterVP 21.387 + * animations, separated by work, to complete an external malloc or 21.388 + * external free request. 21.389 + * 21.390 + *Thinking core loop accepts signals -- just looks if signal-location is 21.391 + * empty or not -- 21.392 + */ 21.393 +void * 21.394 +VMS__malloc_in_ext( size_t sizeRequested ) 21.395 + { 21.396 + /* 21.397 + //This is running in the master, so no chance for multiple cores to be 21.398 + // competing for the core's flag. 21.399 + if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) 21.400 + { //something has already signalled to core loop, so save the signal 21.401 + // and look, next time master animated, to see if can send it. 21.402 + //Note, the addr to put a signal is in the coreloop's frame, so just 21.403 + // checks it each time through -- make it volatile to avoid GCC 21.404 + // optimizations -- it's a coreloop local var that only changes 21.405 + // after jumping away. The signal includes the addr to send the 21.406 + //return to -- even if just empty return completion-signal 21.407 + // 21.408 + //save the signal in some queue that the master looks at each time 21.409 + // it starts up -- one loc says if empty for fast common case -- 21.410 + //something like that -- want to hide this inside this call -- but 21.411 + // think this has to come as a request -- req handler gives procr 21.412 + // back to master loop, which gives it back to req handler at point 21.413 + // it sees that core loop has sent return signal. Something like 21.414 + // that. 21.415 + saveTheSignal 21.416 + 21.417 + } 21.418 + coreSigData->type = malloc; 21.419 + coreSigData->sizeToMalloc = sizeRequested; 21.420 + coreSigData->locToSignalCompletion = &figureOut; 21.421 + _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; 21.422 + */ 21.423 + //just risk system-stack faults until get this figured out 21.424 + return malloc( sizeRequested ); 21.425 + } 21.426 + 21.427 + 21.428 +/*Frees memory that was allocated in the external system -- higher overhead 21.429 + * 21.430 + *As noted in external malloc comment, this is clunky 'cause the free has 21.431 + * to be called in the core loop. 21.432 + */ 21.433 +void 21.434 +VMS__free_in_ext( void *ptrToFree ) 21.435 + { 21.436 + //just risk system-stack faults until get this figured out 21.437 + free( ptrToFree ); 21.438 + 21.439 + //TODO: fix this -- so 21.440 + } 21.441 + 21.442 + 21.443 +/*Designed to be called from the main thread outside of VMS, during init 21.444 + */ 21.445 +MallocProlog * 21.446 +VMS_ext__create_free_list() 21.447 + { MallocProlog *freeListHead, *firstChunk; 21.448 + 21.449 + //Note, this is running in the main thread -- all increases in malloc 21.450 + // mem and all frees of it must be done in this thread, with the 21.451 + // thread's original stack available 21.452 + freeListHead = malloc( sizeof(MallocProlog) ); 21.453 + firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); 21.454 + if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} 21.455 + 21.456 + //Touch memory to avoid page faults 21.457 + void *ptr,*endPtr; 21.458 + endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE; 21.459 + for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE) 21.460 + { 21.461 + *(char*)ptr = 0; 21.462 + } 21.463 + 21.464 + freeListHead->prevChunkInFreeList = NULL; 21.465 + //Use this addr to free the heap when cleanup 21.466 + freeListHead->nextLowerInMem = firstChunk; 21.467 + //to identify top-of-heap elem, compare this addr to elem's next higher 21.468 + freeListHead->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 21.469 + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 21.470 + freeListHead->nextChunkInFreeList = firstChunk; 21.471 + 21.472 + firstChunk->nextChunkInFreeList = NULL; 21.473 + firstChunk->prevChunkInFreeList = freeListHead; 21.474 + //next Higher has to be set to top of chunk, so can calc size in malloc 21.475 + firstChunk->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 21.476 + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 21.477 + firstChunk->nextLowerInMem = NULL; //identifies as bott of heap 21.478 + 21.479 + _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet 21.480 + 21.481 + return freeListHead; 21.482 + } 21.483 + 21.484 + 21.485 +/*Designed to be called from the main thread outside of VMS, during cleanup 21.486 + */ 21.487 +void 21.488 +VMS_ext__free_free_list( MallocProlog *freeListHead ) 21.489 + { 21.490 + //stashed a ptr to the one and only bug chunk malloc'd from OS in the 21.491 + // free list head's next lower in mem pointer 21.492 + free( freeListHead->nextLowerInMem ); 21.493 + 21.494 + //don't free the head -- it'll be in an array eventually -- free whole 21.495 + // array when all the free lists linked from it have already been freed 21.496 + } 21.497 +
22.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 22.2 +++ b/vmalloc.h Wed Feb 22 11:39:12 2012 -0800 22.3 @@ -0,0 +1,90 @@ 22.4 +/* 22.5 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 22.6 + * Licensed under GNU General Public License version 2 22.7 + * 22.8 + * Author: seanhalle@yahoo.com 22.9 + * 22.10 + * Created on November 14, 2009, 9:07 PM 22.11 + */ 22.12 + 22.13 +#ifndef _VMALLOC_H 22.14 +#define _VMALLOC_H 22.15 + 22.16 +#include <malloc.h> 22.17 +#include <inttypes.h> 22.18 +#include "VMS_primitive_data_types.h" 22.19 + 22.20 +typedef struct _MallocProlog MallocProlog; 22.21 + 22.22 +struct _MallocProlog 22.23 + { 22.24 + MallocProlog *nextChunkInFreeList; 22.25 + MallocProlog *prevChunkInFreeList; 22.26 + MallocProlog *nextHigherInMem; 22.27 + MallocProlog *nextLowerInMem; 22.28 + }; 22.29 +//MallocProlog 22.30 + 22.31 +typedef struct 22.32 + { 22.33 + MallocProlog *firstChunkInFreeList; 22.34 + int32 numInList; //TODO not used 22.35 + } 22.36 +FreeListHead; 22.37 + 22.38 +void * 22.39 +VMS_int__malloc( size_t sizeRequested ); 22.40 + 22.41 +void * 22.42 +VMS_int__malloc_aligned( size_t sizeRequested ); 22.43 + 22.44 +void 22.45 +VMS_int__free( void *ptrToFree ); 22.46 + 22.47 +#define VMS_PI__malloc VMS_int__malloc 22.48 +#define VMS_PI__malloc_aligned VMS_int__malloc_aligned 22.49 +#define VMS_PI__free VMS_int__free 22.50 +/* For now, the PI is protected by master lock, so int malloc fine 22.51 +void * 22.52 +VMS_PI__malloc( size_t sizeRequested ); 22.53 + 22.54 +void * 22.55 +VMS_PI__malloc_aligned( size_t sizeRequested ); 22.56 + 22.57 +void 22.58 +VMS_PI__free( void *ptrToFree ); 22.59 +*/ 22.60 + 22.61 +//TODO: protect WL malloc from concurrency!! shared freelist can be corrupted 22.62 +#define VMS_WL__malloc VMS_int__malloc 22.63 +#define VMS_WL__malloc_aligned VMS_int__malloc_aligned 22.64 +#define VMS_WL__free VMS_int__free 22.65 +/* 22.66 +void * 22.67 +VMS_WL__malloc( size_t sizeRequested ); 22.68 + 22.69 +void * 22.70 +VMS_WL__malloc_aligned( size_t sizeRequested ); 22.71 + 22.72 +void 22.73 +VMS_WL__free( void *ptrToFree ); 22.74 +*/ 22.75 + 22.76 +/*Allocates memory from the external system -- higher overhead 22.77 + */ 22.78 +void * 22.79 +VMS__malloc_in_ext( size_t sizeRequested ); 22.80 + 22.81 +/*Frees memory that was allocated in the external system -- higher overhead 22.82 + */ 22.83 +void 22.84 +VMS__free_in_ext( void *ptrToFree ); 22.85 + 22.86 + 22.87 +MallocProlog * 22.88 +VMS_ext__create_free_list(); 22.89 + 22.90 +void 22.91 +VMS_ext__free_free_list( MallocProlog *freeListHead ); 22.92 + 22.93 +#endif 22.94 \ No newline at end of file
23.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 23.2 +++ b/vutilities.c Wed Feb 22 11:39:12 2012 -0800 23.3 @@ -0,0 +1,25 @@ 23.4 +/* 23.5 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 23.6 + * Licensed under GNU General Public License version 2 23.7 + * 23.8 + * Author: seanhalle@yahoo.com 23.9 + * 23.10 + * Created on November 14, 2009, 9:07 PM 23.11 + */ 23.12 + 23.13 +#include <malloc.h> 23.14 +#include <stdlib.h> 23.15 + 23.16 +#include "VMS.h" 23.17 + 23.18 + 23.19 +inline char * 23.20 +VMS_int__strDup( char *str ) 23.21 + { char *retStr; 23.22 + 23.23 + retStr = VMS_int__malloc( strlen(str) + 1 ); 23.24 + if( str == NULL ) return str; 23.25 + strcpy( retStr, str ); 23.26 + 23.27 + return retStr; 23.28 + }
24.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 24.2 +++ b/vutilities.h Wed Feb 22 11:39:12 2012 -0800 24.3 @@ -0,0 +1,20 @@ 24.4 +/* 24.5 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 24.6 + * Licensed under GNU General Public License version 2 24.7 + * 24.8 + * Author: seanhalle@yahoo.com 24.9 + * 24.10 + * Created on November 14, 2009, 9:07 PM 24.11 + */ 24.12 + 24.13 + 24.14 +#ifndef _UTILITIES_H 24.15 +#define _UTILITIES_H 24.16 + 24.17 +#include <string.h> 24.18 +#include "VMS_primitive_data_types.h" 24.19 + 24.20 +inline char * 24.21 +VMS_int__strDup( char *str ); 24.22 + 24.23 +#endif
