Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 199:651ee45615ae
made default brch deprecated (see README)
| author | Me@portablequad |
|---|---|
| date | Sat, 11 Feb 2012 21:47:25 -0800 |
| parents | 7cff4e13d5c4 |
| children | bc4cb994f114 |
| files | .hgignore .hgtags CoreLoop.c MasterLoop.c ProcrContext.c ProcrContext.h VMS.c VMS.h VMS_primitive_data_types.h __brch__DEPRECATED_README contextSwitch.s probes.c probes.h vmalloc.c vmalloc.h vutilities.c vutilities.h |
| diffstat | 17 files changed, 29 insertions(+), 3393 deletions(-) [+] |
line diff
1.1 --- a/.hgignore Fri Feb 10 12:05:17 2012 +0100 1.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 1.3 @@ -1,3 +0,0 @@ 1.4 -syntax: glob 1.5 - 1.6 -*.o
2.1 --- a/.hgtags Fri Feb 10 12:05:17 2012 +0100 2.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 2.3 @@ -1,1 +0,0 @@ 2.4 -9c3107044f86c36fea3a8f72f64910b1363555be Dec27_2010_about_to_add_sched_record
3.1 --- a/CoreLoop.c Fri Feb 10 12:05:17 2012 +0100 3.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 3.3 @@ -1,215 +0,0 @@ 3.4 -/* 3.5 - * Copyright 2010 OpenSourceStewardshipFoundation 3.6 - * 3.7 - * Licensed under BSD 3.8 - */ 3.9 - 3.10 - 3.11 -#include "VMS.h" 3.12 -#include "Queue_impl/BlockingQueue.h" 3.13 -#include "ProcrContext.h" 3.14 - 3.15 -#include <stdlib.h> 3.16 -#include <stdio.h> 3.17 -#include <time.h> 3.18 - 3.19 -#include <pthread.h> 3.20 -#include <sched.h> 3.21 - 3.22 -void *terminateCoreLoop(VirtProcr *currPr); 3.23 - 3.24 -/*This is the loop that runs in the OS Thread pinned to each core 3.25 - *Get virt procr from queue, 3.26 - * save state of current animator, then load in state of virt procr, using 3.27 - * jmp instr to switch the program-counter state -- making the virt procr 3.28 - * the new animator. 3.29 - *At some point, the virt procr will suspend itself by saving out its 3.30 - * animator state (stack ptr, frame ptr, program counter) and switching 3.31 - * back to the OS Thread's animator state, which means restoring the 3.32 - * stack and frame and jumping to the core loop start point. 3.33 - *This cycle then repeats, until a special shutdown virtual processor is 3.34 - * animated, which jumps to the end point at the bottom of core loop. 3.35 - */ 3.36 -void * 3.37 -coreLoop( void *paramsIn ) 3.38 - { 3.39 - ThdParams *coreLoopThdParams; 3.40 - int thisCoresIdx; 3.41 - VirtProcr *currPr; 3.42 - VMSQueueStruc *readyToAnimateQ; 3.43 - cpu_set_t coreMask; //has 1 in bit positions of allowed cores 3.44 - int errorCode; 3.45 - 3.46 - //work-stealing struc on stack to prevent false-sharing in cache-line 3.47 - volatile GateStruc gate; 3.48 - //preGateProgress, waitProgress, exitProgress, gateClosed; 3.49 - 3.50 - 3.51 - coreLoopThdParams = (ThdParams *)paramsIn; 3.52 - thisCoresIdx = coreLoopThdParams->coreNum; 3.53 - 3.54 - gate.gateClosed = FALSE; 3.55 - gate.preGateProgress = 0; 3.56 - gate.waitProgress = 0; 3.57 - gate.exitProgress = 0; 3.58 - _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup 3.59 - 3.60 - //wait until signalled that setup is complete 3.61 - pthread_mutex_lock( &suspendLock ); 3.62 - while( !(_VMSMasterEnv->setupComplete) ) 3.63 - { 3.64 - pthread_cond_wait( &suspend_cond, 3.65 - &suspendLock ); 3.66 - } 3.67 - pthread_mutex_unlock( &suspendLock ); 3.68 - 3.69 - //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); 3.70 - 3.71 - //set thread affinity 3.72 - //Linux requires pinning thd to core inside thread-function 3.73 - //Designate a core by a 1 in bit-position corresponding to the core 3.74 - CPU_ZERO(&coreMask); 3.75 - CPU_SET(coreLoopThdParams->coreNum,&coreMask); 3.76 - //coreMask = 1L << coreLoopThdParams->coreNum; 3.77 - 3.78 - pthread_t selfThd = pthread_self(); 3.79 - errorCode = 3.80 - pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); 3.81 - 3.82 - if(errorCode){ printf("\nset affinity failure\n"); exit(0); } 3.83 - 3.84 - 3.85 - //Save the return address in the SwitchVP function 3.86 - saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt)); 3.87 - 3.88 - 3.89 - while(1){ 3.90 - 3.91 - //Get virtual processor from queue 3.92 - //The Q must be a global, static volatile var, so not kept in reg, 3.93 - // which forces reloading the pointer after each jmp to this point 3.94 - readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 3.95 - 3.96 - #ifdef USE_WORK_STEALING 3.97 - //Alg for work-stealing designed to make common case fast. Comment 3.98 - // in stealer code explains. 3.99 - gate.preGateProgress++; 3.100 - if( gate.gateClosed ) 3.101 - { //now, set coreloop's progress, so stealer can see that core loop 3.102 - // has made it into the waiting area. 3.103 - gate.waitProgress = gate.preGateProgress; 3.104 - while( gate.gateClosed ) /*busy wait*/; 3.105 - } 3.106 - 3.107 - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 3.108 - 3.109 - //Set the coreloop's progress, so stealer can see it has made it out 3.110 - // of the protected area 3.111 - gate.exitProgress = gate.preGateProgress; 3.112 - #else 3.113 - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 3.114 - #endif 3.115 - 3.116 - if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 3.117 - else 3.118 - { 3.119 - //============================= MEASUREMENT STUFF ===================== 3.120 - #ifdef MEAS__TIME_MASTER_LOCK 3.121 - int32 startStamp, endStamp; 3.122 - saveLowTimeStampCountInto( startStamp ); 3.123 - #endif 3.124 - //===================================================================== 3.125 - int tries = 0; int gotLock = 0; 3.126 - while( currPr == NULL ) //if queue was empty, enter get masterLock loop 3.127 - { //queue was empty, so get master lock 3.128 - 3.129 - gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), 3.130 - UNLOCKED, LOCKED ); 3.131 - if( gotLock ) 3.132 - { //run own MasterVP -- jmps to coreLoops startPt when done 3.133 - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 3.134 - if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 3.135 - { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); 3.136 - pthread_yield(); 3.137 - } 3.138 - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 3.139 - break; //end while -- have a VP to animate now 3.140 - } 3.141 - 3.142 - tries++; //if too many, means master on other core taking too long 3.143 - if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } 3.144 - } 3.145 - //============================= MEASUREMENT STUFF ===================== 3.146 - #ifdef MEAS__TIME_MASTER_LOCK 3.147 - saveLowTimeStampCountInto( endStamp ); 3.148 - addIntervalToHist( startStamp, endStamp, 3.149 - _VMSMasterEnv->masterLockLowTimeHist ); 3.150 - addIntervalToHist( startStamp, endStamp, 3.151 - _VMSMasterEnv->masterLockHighTimeHist ); 3.152 - #endif 3.153 - //===================================================================== 3.154 - 3.155 - } 3.156 - 3.157 - 3.158 - switchToVP(currPr); //The VPs return in here 3.159 - flushRegisters(); 3.160 - }//CoreLoop 3.161 - } 3.162 - 3.163 - 3.164 -void * 3.165 -terminateCoreLoop(VirtProcr *currPr){ 3.166 - //first free shutdown VP that jumped here -- it first restores the 3.167 - // coreloop's stack, so addr of currPr in stack frame is still correct 3.168 - VMS__dissipate_procr( currPr ); 3.169 - pthread_exit( NULL ); 3.170 -} 3.171 - 3.172 - 3.173 - 3.174 -#ifdef SEQUENTIAL 3.175 - 3.176 -//=========================================================================== 3.177 -/*This sequential version is exact same as threaded, except doesn't do the 3.178 - * pin-threads part, nor the wait until setup complete part. 3.179 - */ 3.180 -void * 3.181 -coreLoop_Seq( void *paramsIn ) 3.182 - { 3.183 - VirtProcr *currPr; 3.184 - VMSQueueStruc *readyToAnimateQ; 3.185 - 3.186 - ThdParams *coreLoopThdParams; 3.187 - int thisCoresIdx; 3.188 - 3.189 - coreLoopThdParams = (ThdParams *)paramsIn; 3.190 -// thisCoresIdx = coreLoopThdParams->coreNum; 3.191 - thisCoresIdx = 0; 3.192 - 3.193 - //Save the return address in the SwitchVP function 3.194 - saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt)); 3.195 - 3.196 - 3.197 - while(1){ 3.198 - //Get virtual processor from queue 3.199 - //_VMSWorkQ must be a global, static volatile var, so not kept in reg, 3.200 - // which forces reloading the pointer after each jmp to this point 3.201 - readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 3.202 - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 3.203 - if( currPr == NULL ) 3.204 - { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 3.205 - { printf("too many back to back MasterVP\n"); exit(1); } 3.206 - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 3.207 - 3.208 - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 3.209 - } 3.210 - else 3.211 - _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 3.212 - 3.213 - 3.214 - switchToVP( currPr ); 3.215 - flushRegisters(); 3.216 - } 3.217 - } 3.218 -#endif
4.1 --- a/MasterLoop.c Fri Feb 10 12:05:17 2012 +0100 4.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 4.3 @@ -1,373 +0,0 @@ 4.4 -/* 4.5 - * Copyright 2010 OpenSourceStewardshipFoundation 4.6 - * 4.7 - * Licensed under BSD 4.8 - */ 4.9 - 4.10 - 4.11 - 4.12 -#include <stdio.h> 4.13 -#include <stddef.h> 4.14 - 4.15 -#include "VMS.h" 4.16 -#include "ProcrContext.h" 4.17 - 4.18 - 4.19 -//=========================================================================== 4.20 -void inline 4.21 -stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 4.22 - VirtProcr *masterPr ); 4.23 - 4.24 -//=========================================================================== 4.25 - 4.26 - 4.27 - 4.28 -/*This code is animated by the virtual Master processor. 4.29 - * 4.30 - *Polls each sched slot exactly once, hands any requests made by a newly 4.31 - * done slave to the "request handler" plug-in function 4.32 - * 4.33 - *Any slots that need a virt procr assigned are given to the "schedule" 4.34 - * plug-in function, which tries to assign a virt procr (slave) to it. 4.35 - * 4.36 - *When all slots needing a processor have been given to the schedule plug-in, 4.37 - * a fraction of the procrs successfully scheduled are put into the 4.38 - * work queue, then a continuation of this function is put in, then the rest 4.39 - * of the virt procrs that were successfully scheduled. 4.40 - * 4.41 - *The first thing the continuation does is busy-wait until the previous 4.42 - * animation completes. This is because an (unlikely) continuation may 4.43 - * sneak through queue before previous continuation is done putting second 4.44 - * part of scheduled slaves in, which is the only race condition. 4.45 - * 4.46 - */ 4.47 - 4.48 -/*May 29, 2010 -- birth a Master during init so that first core loop to 4.49 - * start running gets it and does all the stuff for a newly born -- 4.50 - * from then on, will be doing continuation, but do suspension self 4.51 - * directly at end of master loop 4.52 - *So VMS__init just births the master virtual processor same way it births 4.53 - * all the others -- then does any extra setup needed and puts it into the 4.54 - * work queue. 4.55 - *However means have to make masterEnv a global static volatile the same way 4.56 - * did with readyToAnimateQ in core loop. -- for performance, put the 4.57 - * jump to the core loop directly in here, and have it directly jump back. 4.58 - * 4.59 - * 4.60 - *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this 4.61 - * avoids the suspected bug in the system stack that causes bizarre faults 4.62 - * at random places in the system code. 4.63 - * 4.64 - *So, this function is coupled to each of the MasterVPs, -- meaning this 4.65 - * function can't rely on a particular stack and frame -- each MasterVP that 4.66 - * animates this function has a different one. 4.67 - * 4.68 - *At this point, the masterLoop does not write itself into the queue anymore, 4.69 - * instead, the coreLoop acquires the masterLock when it has nothing to 4.70 - * animate, and then animates its own masterLoop. However, still try to put 4.71 - * several AppVPs into the queue to amortize the startup cost of switching 4.72 - * to the MasterVP. Note, don't have to worry about latency of requests much 4.73 - * because most requests generate work for same core -- only latency issue 4.74 - * is case when other cores starved and one core's requests generate work 4.75 - * for them -- so keep max in queue to 3 or 4.. 4.76 - */ 4.77 -void masterLoop( void *initData, VirtProcr *animatingPr ) 4.78 - { 4.79 - int32 slotIdx, numSlotsFilled; 4.80 - VirtProcr *schedVirtPr; 4.81 - SchedSlot *currSlot, **schedSlots; 4.82 - MasterEnv *masterEnv; 4.83 - VMSQueueStruc *readyToAnimateQ; 4.84 - 4.85 - SlaveScheduler slaveScheduler; 4.86 - RequestHandler requestHandler; 4.87 - void *semanticEnv; 4.88 - 4.89 - int32 thisCoresIdx; 4.90 - VirtProcr *masterPr; 4.91 - volatile VirtProcr *volatileMasterPr; 4.92 - 4.93 - volatileMasterPr = animatingPr; 4.94 - masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp 4.95 - 4.96 - //First animation of each MasterVP will in turn animate this part 4.97 - // of setup code.. (VP creator sets up the stack as if this function 4.98 - // was called normally, but actually get here by jmp) 4.99 - //So, setup values about stack ptr, jmp pt and all that 4.100 - //masterPr->nextInstrPt = &&masterLoopStartPt; 4.101 - 4.102 - 4.103 - //Note, got rid of writing the stack and frame ptr up here, because 4.104 - // only one 4.105 - // core can ever animate a given MasterVP, so don't need to communicate 4.106 - // new frame and stack ptr to the MasterVP storage before a second 4.107 - // version of that MasterVP can get animated on a different core. 4.108 - //Also got rid of the busy-wait. 4.109 - 4.110 - 4.111 - //masterLoopStartPt: 4.112 - while(1){ 4.113 - 4.114 - //============================= MEASUREMENT STUFF ======================== 4.115 - #ifdef MEAS__TIME_MASTER 4.116 - //Total Master time includes one coreloop time -- just assume the core 4.117 - // loop time is same for Master as for AppVPs, even though it may be 4.118 - // smaller due to higher predictability of the fixed jmp. 4.119 - saveLowTimeStampCountInto( masterPr->startMasterTSCLow ); 4.120 - #endif 4.121 - //======================================================================== 4.122 - 4.123 - masterEnv = (MasterEnv*)_VMSMasterEnv; 4.124 - 4.125 - //GCC may optimize so doesn't always re-define from frame-storage 4.126 - masterPr = (VirtProcr*)volatileMasterPr; //just to make sure after jmp 4.127 - thisCoresIdx = masterPr->coreAnimatedBy; 4.128 - readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; 4.129 - schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; 4.130 - 4.131 - requestHandler = masterEnv->requestHandler; 4.132 - slaveScheduler = masterEnv->slaveScheduler; 4.133 - semanticEnv = masterEnv->semanticEnv; 4.134 - 4.135 - 4.136 - //Poll each slot's Done flag 4.137 - numSlotsFilled = 0; 4.138 - for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++) 4.139 - { 4.140 - currSlot = schedSlots[ slotIdx ]; 4.141 - 4.142 - if( currSlot->workIsDone ) 4.143 - { 4.144 - currSlot->workIsDone = FALSE; 4.145 - currSlot->needsProcrAssigned = TRUE; 4.146 - 4.147 - //process requests from slave to master 4.148 - //====================== MEASUREMENT STUFF =================== 4.149 - #ifdef MEAS__TIME_PLUGIN 4.150 - int32 startStamp1, endStamp1; 4.151 - saveLowTimeStampCountInto( startStamp1 ); 4.152 - #endif 4.153 - //============================================================ 4.154 - (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv ); 4.155 - //====================== MEASUREMENT STUFF =================== 4.156 - #ifdef MEAS__TIME_PLUGIN 4.157 - saveLowTimeStampCountInto( endStamp1 ); 4.158 - addIntervalToHist( startStamp1, endStamp1, 4.159 - _VMSMasterEnv->reqHdlrLowTimeHist ); 4.160 - addIntervalToHist( startStamp1, endStamp1, 4.161 - _VMSMasterEnv->reqHdlrHighTimeHist ); 4.162 - #endif 4.163 - //============================================================ 4.164 - } 4.165 - if( currSlot->needsProcrAssigned ) 4.166 - { //give slot a new virt procr 4.167 - schedVirtPr = 4.168 - (*slaveScheduler)( semanticEnv, thisCoresIdx ); 4.169 - 4.170 - if( schedVirtPr != NULL ) 4.171 - { currSlot->procrAssignedToSlot = schedVirtPr; 4.172 - schedVirtPr->schedSlot = currSlot; 4.173 - currSlot->needsProcrAssigned = FALSE; 4.174 - numSlotsFilled += 1; 4.175 - 4.176 - writeVMSQ( schedVirtPr, readyToAnimateQ ); 4.177 - } 4.178 - } 4.179 - } 4.180 - 4.181 - 4.182 - #ifdef USE_WORK_STEALING 4.183 - //If no slots filled, means no more work, look for work to steal. 4.184 - if( numSlotsFilled == 0 ) 4.185 - { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr ); 4.186 - } 4.187 - #endif 4.188 - 4.189 - 4.190 - #ifdef MEAS__TIME_MASTER 4.191 - saveLowTimeStampCountInto( masterPr->endMasterTSCLow ); 4.192 - #endif 4.193 - 4.194 - masterSwitchToCoreLoop(animatingPr); 4.195 - flushRegisters(); 4.196 - }//MasterLoop 4.197 - 4.198 - 4.199 - } 4.200 - 4.201 - 4.202 - 4.203 -/*This has a race condition -- the coreloops are accessing their own queues 4.204 - * at the same time that this work-stealer on a different core is trying to 4.205 - */ 4.206 -void inline 4.207 -stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 4.208 - VirtProcr *masterPr ) 4.209 - { 4.210 - VirtProcr *stolenPr; 4.211 - int32 coreIdx, i; 4.212 - VMSQueueStruc *currQ; 4.213 - 4.214 - stolenPr = NULL; 4.215 - coreIdx = masterPr->coreAnimatedBy; 4.216 - for( i = 0; i < NUM_CORES -1; i++ ) 4.217 - { 4.218 - if( coreIdx >= NUM_CORES -1 ) 4.219 - { coreIdx = 0; 4.220 - } 4.221 - else 4.222 - { coreIdx++; 4.223 - } 4.224 - currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 4.225 - if( numInVMSQ( currQ ) > 0 ) 4.226 - { stolenPr = readVMSQ (currQ ); 4.227 - break; 4.228 - } 4.229 - } 4.230 - 4.231 - if( stolenPr != NULL ) 4.232 - { currSlot->procrAssignedToSlot = stolenPr; 4.233 - stolenPr->schedSlot = currSlot; 4.234 - currSlot->needsProcrAssigned = FALSE; 4.235 - 4.236 - writeVMSQ( stolenPr, readyToAnimateQ ); 4.237 - } 4.238 - } 4.239 - 4.240 -/*This algorithm makes the common case fast. Make the coreloop passive, 4.241 - * and show its progress. Make the stealer control a gate that coreloop 4.242 - * has to pass. 4.243 - *To avoid interference, only one stealer at a time. Use a global 4.244 - * stealer-lock. 4.245 - * 4.246 - *The pattern is based on a gate -- stealer shuts the gate, then monitors 4.247 - * to be sure any already past make it all the way out, before starting. 4.248 - *So, have a "progress" measure just before the gate, then have two after it, 4.249 - * one is in a "waiting room" outside the gate, the other is at the exit. 4.250 - *Then, the stealer first shuts the gate, then checks the progress measure 4.251 - * outside it, then looks to see if the progress measure at the exit is the 4.252 - * same. If yes, it knows the protected area is empty 'cause no other way 4.253 - * to get in and the last to get in also exited. 4.254 - *If the progress measure at the exit is not the same, then the stealer goes 4.255 - * into a loop checking both the waiting-area and the exit progress-measures 4.256 - * until one of them shows the same as the measure outside the gate. Might 4.257 - * as well re-read the measure outside the gate each go around, just to be 4.258 - * sure. It is guaranteed that one of the two will eventually match the one 4.259 - * outside the gate. 4.260 - * 4.261 - *Here's an informal proof of correctness: 4.262 - *The gate can be closed at any point, and have only four cases: 4.263 - * 1) coreloop made it past the gate-closing but not yet past the exit 4.264 - * 2) coreloop made it past the pre-gate progress update but not yet past 4.265 - * the gate, 4.266 - * 3) coreloop is right before the pre-gate update 4.267 - * 4) coreloop is past the exit and far from the pre-gate update. 4.268 - * 4.269 - * Covering the cases in reverse order, 4.270 - * 4) is not a problem -- stealer will read pre-gate progress, see that it 4.271 - * matches exit progress, and the gate is closed, so stealer can proceed. 4.272 - * 3) stealer will read pre-gate progress just after coreloop updates it.. 4.273 - * so stealer goes into a loop until the coreloop causes wait-progress 4.274 - * to match pre-gate progress, so then stealer can proceed 4.275 - * 2) same as 3.. 4.276 - * 1) stealer reads pre-gate progress, sees that it's different than exit, 4.277 - * so goes into loop until exit matches pre-gate, now it knows coreloop 4.278 - * is not in protected and cannot get back in, so can proceed. 4.279 - * 4.280 - *Implementation for the stealer: 4.281 - * 4.282 - *First, acquire the stealer lock -- only cores with no work to do will 4.283 - * compete to steal, so not a big performance penalty having only one -- 4.284 - * will rarely have multiple stealers in a system with plenty of work -- and 4.285 - * in a system with little work, it doesn't matter. 4.286 - * 4.287 - *Note, have single-reader, single-writer pattern for all variables used to 4.288 - * communicate between stealer and victims 4.289 - * 4.290 - *So, scan the queues of the core loops, until find non-empty. Each core 4.291 - * has its own list that it scans. The list goes in order from closest to 4.292 - * furthest core, so it steals first from close cores. Later can add 4.293 - * taking info from the app about overlapping footprints, and scan all the 4.294 - * others then choose work with the most footprint overlap with the contents 4.295 - * of this core's cache. 4.296 - * 4.297 - *Now, have a victim want to take work from. So, shut the gate in that 4.298 - * coreloop, by setting the "gate closed" var on its stack to TRUE. 4.299 - *Then, read the core's pre-gate progress and compare to the core's exit 4.300 - * progress. 4.301 - *If same, can proceed to take work from the coreloop's queue. When done, 4.302 - * write FALSE to gate closed var. 4.303 - *If different, then enter a loop that reads the pre-gate progress, then 4.304 - * compares to exit progress then to wait progress. When one of two 4.305 - * matches, proceed. Take work from the coreloop's queue. When done, 4.306 - * write FALSE to the gate closed var. 4.307 - * 4.308 - */ 4.309 -void inline 4.310 -gateProtected_stealWorkInto( SchedSlot *currSlot, 4.311 - VMSQueueStruc *myReadyToAnimateQ, 4.312 - VirtProcr *masterPr ) 4.313 - { 4.314 - VirtProcr *stolenPr; 4.315 - int32 coreIdx, i, haveAVictim, gotLock; 4.316 - VMSQueueStruc *victimsQ; 4.317 - 4.318 - volatile GateStruc *vicGate; 4.319 - int32 coreMightBeInProtected; 4.320 - 4.321 - 4.322 - 4.323 - //see if any other cores have work available to steal 4.324 - haveAVictim = FALSE; 4.325 - coreIdx = masterPr->coreAnimatedBy; 4.326 - for( i = 0; i < NUM_CORES -1; i++ ) 4.327 - { 4.328 - if( coreIdx >= NUM_CORES -1 ) 4.329 - { coreIdx = 0; 4.330 - } 4.331 - else 4.332 - { coreIdx++; 4.333 - } 4.334 - victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 4.335 - if( numInVMSQ( victimsQ ) > 0 ) 4.336 - { haveAVictim = TRUE; 4.337 - vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ]; 4.338 - break; 4.339 - } 4.340 - } 4.341 - if( !haveAVictim ) return; //no work to steal, exit 4.342 - 4.343 - //have a victim core, now get the stealer-lock 4.344 - gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock), 4.345 - UNLOCKED, LOCKED ); 4.346 - if( !gotLock ) return; //go back to core loop, which will re-start master 4.347 - 4.348 - 4.349 - //====== Start Gate-protection ======= 4.350 - vicGate->gateClosed = TRUE; 4.351 - coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress; 4.352 - while( coreMightBeInProtected ) 4.353 - { //wait until sure 4.354 - if( vicGate->preGateProgress == vicGate->waitProgress ) 4.355 - coreMightBeInProtected = FALSE; 4.356 - if( vicGate->preGateProgress == vicGate->exitProgress ) 4.357 - coreMightBeInProtected = FALSE; 4.358 - } 4.359 - 4.360 - stolenPr = readVMSQ ( victimsQ ); 4.361 - 4.362 - vicGate->gateClosed = FALSE; 4.363 - //======= End Gate-protection ======= 4.364 - 4.365 - 4.366 - if( stolenPr != NULL ) //victim could have been in protected and taken 4.367 - { currSlot->procrAssignedToSlot = stolenPr; 4.368 - stolenPr->schedSlot = currSlot; 4.369 - currSlot->needsProcrAssigned = FALSE; 4.370 - 4.371 - writeVMSQ( stolenPr, myReadyToAnimateQ ); 4.372 - } 4.373 - 4.374 - //unlock the work stealing lock 4.375 - _VMSMasterEnv->workStealingLock = UNLOCKED; 4.376 - }
5.1 --- a/ProcrContext.c Fri Feb 10 12:05:17 2012 +0100 5.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 5.3 @@ -1,65 +0,0 @@ 5.4 -/* 5.5 - * This File contains all hardware dependent C code. 5.6 - */ 5.7 - 5.8 - 5.9 -#include "VMS.h" 5.10 - 5.11 -/*Create stack, then create __cdecl structure on it and put initialData and 5.12 - * pointer to the new structure instance into the parameter positions on 5.13 - * the stack 5.14 - *Then put function pointer into nextInstrPt -- the stack is setup in std 5.15 - * call structure, so jumping to function ptr is same as a GCC generated 5.16 - * function call 5.17 - *No need to save registers on old stack frame, because there's no old 5.18 - * animator state to return to -- 5.19 - * 5.20 - */ 5.21 -inline VirtProcr * 5.22 -create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, 5.23 - void *initialData, void *stackLocs ) 5.24 - { 5.25 - void *stackPtr; 5.26 - 5.27 - newPr->startOfStack = stackLocs; 5.28 - newPr->procrID = _VMSMasterEnv->numProcrsCreated++; 5.29 - newPr->initialData = initialData; 5.30 - newPr->requests = NULL; 5.31 - newPr->schedSlot = NULL; 5.32 - 5.33 - /* 5.34 - * Hardware dependent part 5.35 - */ 5.36 - //instead of calling the function directly, call a wrapper function to fetch 5.37 - //arguments from stack 5.38 - newPr->nextInstrPt = (VirtProcrFnPtr)&startVirtProcrFn; 5.39 - 5.40 - //fnPtr takes two params -- void *initData & void *animProcr 5.41 - //alloc stack locations, make stackPtr be the highest addr minus room 5.42 - // for 2 params + return addr. Return addr (NULL) is in loc pointed to 5.43 - // by stackPtr, initData at stackPtr + 8 bytes, animatingPr just above 5.44 - stackPtr = ( (void *)stackLocs + VIRT_PROCR_STACK_SIZE - 4*sizeof(void*)); 5.45 - 5.46 - //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp 5.47 - *((VirtProcr**)stackPtr + 2 ) = newPr; //rightmost param 5.48 - *((void**)stackPtr + 1 ) = initialData; //next param to left 5.49 - *((void**)stackPtr) = (void*)fnPtr; 5.50 - 5.51 - /* 5.52 - * end of Hardware dependent part 5.53 - */ 5.54 - 5.55 - newPr->stackPtr = stackPtr; //core loop will switch to this, then 5.56 - newPr->framePtr = stackPtr; //suspend loop will save new stack & frame ptr 5.57 - 5.58 - //============================= MEASUREMENT STUFF ======================== 5.59 - #ifdef STATS__TURN_ON_PROBES 5.60 - //struct timeval timeStamp; 5.61 - //gettimeofday( &(timeStamp), NULL); 5.62 - //newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0) - 5.63 - // _VMSMasterEnv->createPtInSecs; 5.64 - #endif 5.65 - //======================================================================== 5.66 - 5.67 - return newPr; 5.68 - } 5.69 \ No newline at end of file
6.1 --- a/ProcrContext.h Fri Feb 10 12:05:17 2012 +0100 6.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 6.3 @@ -1,33 +0,0 @@ 6.4 -/* 6.5 - * Copyright 2009 OpenSourceStewardshipFoundation.org 6.6 - * Licensed under GNU General Public License version 2 6.7 - * 6.8 - * Author: seanhalle@yahoo.com 6.9 - * 6.10 - */ 6.11 - 6.12 -#ifndef _ProcrContext_H 6.13 -#define _ProcrContext_H 6.14 -#define _GNU_SOURCE 6.15 - 6.16 -void saveCoreLoopReturnAddr(void **returnAddress); 6.17 - 6.18 -void switchToVP(VirtProcr *nextProcr); 6.19 - 6.20 -void switchToCoreLoop(VirtProcr *nextProcr); 6.21 - 6.22 -void masterSwitchToCoreLoop(VirtProcr *nextProcr); 6.23 - 6.24 -void startVirtProcrFn(); 6.25 - 6.26 -void *asmTerminateCoreLoop(VirtProcr *currPr); 6.27 - 6.28 -#define flushRegisters() \ 6.29 - asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") 6.30 - 6.31 -inline VirtProcr * 6.32 -create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, 6.33 - void *initialData, void *stackLocs ); 6.34 - 6.35 -#endif /* _ProcrContext_H */ 6.36 -
7.1 --- a/VMS.c Fri Feb 10 12:05:17 2012 +0100 7.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 7.3 @@ -1,772 +0,0 @@ 7.4 -/* 7.5 - * Copyright 2010 OpenSourceStewardshipFoundation 7.6 - * 7.7 - * Licensed under BSD 7.8 - */ 7.9 - 7.10 -#include <stdio.h> 7.11 -#include <stdlib.h> 7.12 -#include <string.h> 7.13 -#include <malloc.h> 7.14 -#include <inttypes.h> 7.15 -#include <sys/time.h> 7.16 - 7.17 -#include "VMS.h" 7.18 -#include "ProcrContext.h" 7.19 -#include "Queue_impl/BlockingQueue.h" 7.20 -#include "Histogram/Histogram.h" 7.21 - 7.22 - 7.23 -#define thdAttrs NULL 7.24 - 7.25 -//=========================================================================== 7.26 -void 7.27 -shutdownFn( void *dummy, VirtProcr *dummy2 ); 7.28 - 7.29 -SchedSlot ** 7.30 -create_sched_slots(); 7.31 - 7.32 -void 7.33 -create_masterEnv(); 7.34 - 7.35 -void 7.36 -create_the_coreLoop_OS_threads(); 7.37 - 7.38 -MallocProlog * 7.39 -create_free_list(); 7.40 - 7.41 -void 7.42 -endOSThreadFn( void *initData, VirtProcr *animatingPr ); 7.43 - 7.44 -pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER; 7.45 -pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER; 7.46 - 7.47 -//=========================================================================== 7.48 - 7.49 -/*Setup has two phases: 7.50 - * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts 7.51 - * the master virt procr into the work-queue, ready for first "call" 7.52 - * 2) Semantic layer then does its own init, which creates the seed virt 7.53 - * procr inside the semantic layer, ready to schedule it when 7.54 - * asked by the first run of the masterLoop. 7.55 - * 7.56 - *This part is bit weird because VMS really wants to be "always there", and 7.57 - * have applications attach and detach.. for now, this VMS is part of 7.58 - * the app, so the VMS system starts up as part of running the app. 7.59 - * 7.60 - *The semantic layer is isolated from the VMS internals by making the 7.61 - * semantic layer do setup to a state that it's ready with its 7.62 - * initial virt procrs, ready to schedule them to slots when the masterLoop 7.63 - * asks. Without this pattern, the semantic layer's setup would 7.64 - * have to modify slots directly to assign the initial virt-procrs, and put 7.65 - * them into the readyToAnimateQ itself, breaking the isolation completely. 7.66 - * 7.67 - * 7.68 - *The semantic layer creates the initial virt procr(s), and adds its 7.69 - * own environment to masterEnv, and fills in the pointers to 7.70 - * the requestHandler and slaveScheduler plug-in functions 7.71 - */ 7.72 - 7.73 -/*This allocates VMS data structures, populates the master VMSProc, 7.74 - * and master environment, and returns the master environment to the semantic 7.75 - * layer. 7.76 - */ 7.77 -void 7.78 -VMS__init() 7.79 - { 7.80 - create_masterEnv(); 7.81 - create_the_coreLoop_OS_threads(); 7.82 - } 7.83 - 7.84 -#ifdef SEQUENTIAL 7.85 - 7.86 -/*To initialize the sequential version, just don't create the threads 7.87 - */ 7.88 -void 7.89 -VMS__init_Seq() 7.90 - { 7.91 - create_masterEnv(); 7.92 - } 7.93 - 7.94 -#endif 7.95 - 7.96 -void 7.97 -create_masterEnv() 7.98 - { MasterEnv *masterEnv; 7.99 - VMSQueueStruc **readyToAnimateQs; 7.100 - int coreIdx; 7.101 - VirtProcr **masterVPs; 7.102 - SchedSlot ***allSchedSlots; //ptr to array of ptrs 7.103 - 7.104 - 7.105 - //Make the master env, which holds everything else 7.106 - _VMSMasterEnv = malloc( sizeof(MasterEnv) ); 7.107 - 7.108 - //Very first thing put into the master env is the free-list, seeded 7.109 - // with a massive initial chunk of memory. 7.110 - //After this, all other mallocs are VMS__malloc. 7.111 - _VMSMasterEnv->freeListHead = VMS_ext__create_free_list(); 7.112 - 7.113 - 7.114 - //============================= MEASUREMENT STUFF ======================== 7.115 - #ifdef MEAS__TIME_MALLOC 7.116 - _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 100, 0, 30, 7.117 - "malloc_time_hist"); 7.118 - _VMSMasterEnv->freeTimeHist = makeFixedBinHistExt( 100, 0, 30, 7.119 - "free_time_hist"); 7.120 - #endif 7.121 - #ifdef MEAS__TIME_PLUGIN 7.122 - _VMSMasterEnv->reqHdlrLowTimeHist = makeFixedBinHistExt( 100, 0, 200, 7.123 - "plugin_low_time_hist"); 7.124 - _VMSMasterEnv->reqHdlrHighTimeHist = makeFixedBinHistExt( 100, 0, 200, 7.125 - "plugin_high_time_hist"); 7.126 - #endif 7.127 - //======================================================================== 7.128 - 7.129 - //===================== Only VMS__malloc after this ==================== 7.130 - masterEnv = (MasterEnv*)_VMSMasterEnv; 7.131 - 7.132 - //Make a readyToAnimateQ for each core loop 7.133 - readyToAnimateQs = VMS__malloc( NUM_CORES * sizeof(VMSQueueStruc *) ); 7.134 - masterVPs = VMS__malloc( NUM_CORES * sizeof(VirtProcr *) ); 7.135 - 7.136 - //One array for each core, 3 in array, core's masterVP scheds all 7.137 - allSchedSlots = VMS__malloc( NUM_CORES * sizeof(SchedSlot *) ); 7.138 - 7.139 - _VMSMasterEnv->numProcrsCreated = 0; //used by create procr 7.140 - for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 7.141 - { 7.142 - readyToAnimateQs[ coreIdx ] = makeVMSQ(); 7.143 - 7.144 - //Q: should give masterVP core-specific info as its init data? 7.145 - masterVPs[ coreIdx ] = VMS__create_procr( (VirtProcrFnPtr)&masterLoop, (void*)masterEnv ); 7.146 - masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx; 7.147 - allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core 7.148 - _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; 7.149 - _VMSMasterEnv->workStealingGates[ coreIdx ] = NULL; 7.150 - } 7.151 - _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs; 7.152 - _VMSMasterEnv->masterVPs = masterVPs; 7.153 - _VMSMasterEnv->masterLock = UNLOCKED; 7.154 - _VMSMasterEnv->allSchedSlots = allSchedSlots; 7.155 - _VMSMasterEnv->workStealingLock = UNLOCKED; 7.156 - 7.157 - 7.158 - //Aug 19, 2010: no longer need to place initial masterVP into queue 7.159 - // because coreLoop now controls -- animates its masterVP when no work 7.160 - 7.161 - 7.162 - //============================= MEASUREMENT STUFF ======================== 7.163 - #ifdef STATS__TURN_ON_PROBES 7.164 - _VMSMasterEnv->dynIntervalProbesInfo = 7.165 - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->intervalProbes), 200); 7.166 - 7.167 - _VMSMasterEnv->probeNameHashTbl = makeHashTable( 1000, &VMS__free ); 7.168 - 7.169 - //put creation time directly into master env, for fast retrieval 7.170 - struct timeval timeStamp; 7.171 - gettimeofday( &(timeStamp), NULL); 7.172 - _VMSMasterEnv->createPtInSecs = 7.173 - timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0); 7.174 - #endif 7.175 - #ifdef MEAS__TIME_MASTER_LOCK 7.176 - _VMSMasterEnv->masterLockLowTimeHist = makeFixedBinHist( 50, 0, 2, 7.177 - "master lock low time hist"); 7.178 - _VMSMasterEnv->masterLockHighTimeHist = makeFixedBinHist( 50, 0, 100, 7.179 - "master lock high time hist"); 7.180 - #endif 7.181 - 7.182 - MakeTheMeasHists(); 7.183 - //======================================================================== 7.184 - 7.185 - } 7.186 - 7.187 -SchedSlot ** 7.188 -create_sched_slots() 7.189 - { SchedSlot **schedSlots; 7.190 - int i; 7.191 - 7.192 - schedSlots = VMS__malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) ); 7.193 - 7.194 - for( i = 0; i < NUM_SCHED_SLOTS; i++ ) 7.195 - { 7.196 - schedSlots[i] = VMS__malloc( sizeof(SchedSlot) ); 7.197 - 7.198 - //Set state to mean "handling requests done, slot needs filling" 7.199 - schedSlots[i]->workIsDone = FALSE; 7.200 - schedSlots[i]->needsProcrAssigned = TRUE; 7.201 - } 7.202 - return schedSlots; 7.203 - } 7.204 - 7.205 - 7.206 -void 7.207 -freeSchedSlots( SchedSlot **schedSlots ) 7.208 - { int i; 7.209 - for( i = 0; i < NUM_SCHED_SLOTS; i++ ) 7.210 - { 7.211 - VMS__free( schedSlots[i] ); 7.212 - } 7.213 - VMS__free( schedSlots ); 7.214 - } 7.215 - 7.216 - 7.217 -void 7.218 -create_the_coreLoop_OS_threads() 7.219 - { 7.220 - //======================================================================== 7.221 - // Create the Threads 7.222 - int coreIdx, retCode; 7.223 - 7.224 - //Need the threads to be created suspended, and wait for a signal 7.225 - // before proceeding -- gives time after creating to initialize other 7.226 - // stuff before the coreLoops set off. 7.227 - _VMSMasterEnv->setupComplete = 0; 7.228 - 7.229 - //Make the threads that animate the core loops 7.230 - for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 7.231 - { coreLoopThdParams[coreIdx] = VMS__malloc( sizeof(ThdParams) ); 7.232 - coreLoopThdParams[coreIdx]->coreNum = coreIdx; 7.233 - 7.234 - retCode = 7.235 - pthread_create( &(coreLoopThdHandles[coreIdx]), 7.236 - thdAttrs, 7.237 - &coreLoop, 7.238 - (void *)(coreLoopThdParams[coreIdx]) ); 7.239 - if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);} 7.240 - } 7.241 - } 7.242 - 7.243 -/*Semantic layer calls this when it want the system to start running.. 7.244 - * 7.245 - *This starts the core loops running then waits for them to exit. 7.246 - */ 7.247 -void 7.248 -VMS__start_the_work_then_wait_until_done() 7.249 - { int coreIdx; 7.250 - //Start the core loops running 7.251 - 7.252 - //tell the core loop threads that setup is complete 7.253 - //get lock, to lock out any threads still starting up -- they'll see 7.254 - // that setupComplete is true before entering while loop, and so never 7.255 - // wait on the condition 7.256 - pthread_mutex_lock( &suspendLock ); 7.257 - _VMSMasterEnv->setupComplete = 1; 7.258 - pthread_mutex_unlock( &suspendLock ); 7.259 - pthread_cond_broadcast( &suspend_cond ); 7.260 - 7.261 - 7.262 - //wait for all to complete 7.263 - for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 7.264 - { 7.265 - pthread_join( coreLoopThdHandles[coreIdx], NULL ); 7.266 - } 7.267 - 7.268 - //NOTE: do not clean up VMS env here -- semantic layer has to have 7.269 - // a chance to clean up its environment first, then do a call to free 7.270 - // the Master env and rest of VMS locations 7.271 - } 7.272 - 7.273 -#ifdef SEQUENTIAL 7.274 -/*Only difference between version with an OS thread pinned to each core and 7.275 - * the sequential version of VMS is VMS__init_Seq, this, and coreLoop_Seq. 7.276 - */ 7.277 -void 7.278 -VMS__start_the_work_then_wait_until_done_Seq() 7.279 - { 7.280 - //Instead of un-suspending threads, just call the one and only 7.281 - // core loop (sequential version), in the main thread. 7.282 - coreLoop_Seq( NULL ); 7.283 - flushRegisters(); 7.284 - 7.285 - } 7.286 -#endif 7.287 - 7.288 -inline VirtProcr * 7.289 -VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) 7.290 - { VirtProcr *newPr; 7.291 - void *stackLocs; 7.292 - 7.293 - newPr = VMS__malloc( sizeof(VirtProcr) ); 7.294 - stackLocs = VMS__malloc( VIRT_PROCR_STACK_SIZE ); 7.295 - if( stackLocs == 0 ) 7.296 - { perror("VMS__malloc stack"); exit(1); } 7.297 - 7.298 - return create_procr_helper( newPr, fnPtr, initialData, stackLocs ); 7.299 - } 7.300 - 7.301 -/* "ext" designates that it's for use outside the VMS system -- should only 7.302 - * be called from main thread or other thread -- never from code animated by 7.303 - * a VMS virtual processor. 7.304 - */ 7.305 -inline VirtProcr * 7.306 -VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) 7.307 - { VirtProcr *newPr; 7.308 - char *stackLocs; 7.309 - 7.310 - newPr = malloc( sizeof(VirtProcr) ); 7.311 - stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); 7.312 - if( stackLocs == 0 ) 7.313 - { perror("malloc stack"); exit(1); } 7.314 - 7.315 - return create_procr_helper( newPr, fnPtr, initialData, stackLocs ); 7.316 - } 7.317 - 7.318 - 7.319 -/*Anticipating multi-tasking 7.320 - */ 7.321 -void * 7.322 -VMS__give_sem_env_for( VirtProcr *animPr ) 7.323 - { 7.324 - return _VMSMasterEnv->semanticEnv; 7.325 - } 7.326 -//=========================================================================== 7.327 -/*there is a label inside this function -- save the addr of this label in 7.328 - * the callingPr struc, as the pick-up point from which to start the next 7.329 - * work-unit for that procr. If turns out have to save registers, then 7.330 - * save them in the procr struc too. Then do assembly jump to the CoreLoop's 7.331 - * "done with work-unit" label. The procr struc is in the request in the 7.332 - * slave that animated the just-ended work-unit, so all the state is saved 7.333 - * there, and will get passed along, inside the request handler, to the 7.334 - * next work-unit for that procr. 7.335 - */ 7.336 -void 7.337 -VMS__suspend_procr( VirtProcr *animatingPr ) 7.338 - { 7.339 - 7.340 - //The request to master will cause this suspended virt procr to get 7.341 - // scheduled again at some future point -- to resume, core loop jumps 7.342 - // to the resume point (below), which causes restore of saved regs and 7.343 - // "return" from this call. 7.344 - //animatingPr->nextInstrPt = &&ResumePt; 7.345 - 7.346 - //return ownership of the virt procr and sched slot to Master virt pr 7.347 - animatingPr->schedSlot->workIsDone = TRUE; 7.348 - 7.349 - //=========================== Measurement stuff ======================== 7.350 - #ifdef MEAS__TIME_STAMP_SUSP 7.351 - //record time stamp: compare to time-stamp recorded below 7.352 - saveLowTimeStampCountInto( animatingPr->preSuspTSCLow ); 7.353 - #endif 7.354 - //======================================================================= 7.355 - 7.356 - switchToCoreLoop(animatingPr); 7.357 - flushRegisters(); 7.358 - 7.359 - //======================================================================= 7.360 - 7.361 - #ifdef MEAS__TIME_STAMP_SUSP 7.362 - //NOTE: only take low part of count -- do sanity check when take diff 7.363 - saveLowTimeStampCountInto( animatingPr->postSuspTSCLow ); 7.364 - #endif 7.365 - 7.366 - return; 7.367 - } 7.368 - 7.369 - 7.370 - 7.371 -/*For this implementation of VMS, it may not make much sense to have the 7.372 - * system of requests for creating a new processor done this way.. but over 7.373 - * the scope of single-master, multi-master, mult-tasking, OS-implementing, 7.374 - * distributed-memory, and so on, this gives VMS implementation a chance to 7.375 - * do stuff before suspend, in the AppVP, and in the Master before the plugin 7.376 - * is called, as well as in the lang-lib before this is called, and in the 7.377 - * plugin. So, this gives both VMS and language implementations a chance to 7.378 - * intercept at various points and do order-dependent stuff. 7.379 - *Having a standard VMSNewPrReqData struc allows the language to create and 7.380 - * free the struc, while VMS knows how to get the newPr if it wants it, and 7.381 - * it lets the lang have lang-specific data related to creation transported 7.382 - * to the plugin. 7.383 - */ 7.384 -void 7.385 -VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ) 7.386 - { VMSReqst req; 7.387 - 7.388 - req.reqType = createReq; 7.389 - req.semReqData = semReqData; 7.390 - req.nextReqst = reqstingPr->requests; 7.391 - reqstingPr->requests = &req; 7.392 - 7.393 - VMS__suspend_procr( reqstingPr ); 7.394 - } 7.395 - 7.396 - 7.397 -/* 7.398 - *This adds a request to dissipate, then suspends the processor so that the 7.399 - * request handler will receive the request. The request handler is what 7.400 - * does the work of freeing memory and removing the processor from the 7.401 - * semantic environment's data structures. 7.402 - *The request handler also is what figures out when to shutdown the VMS 7.403 - * system -- which causes all the core loop threads to die, and returns from 7.404 - * the call that started up VMS to perform the work. 7.405 - * 7.406 - *This form is a bit misleading to understand if one is trying to figure out 7.407 - * how VMS works -- it looks like a normal function call, but inside it 7.408 - * sends a request to the request handler and suspends the processor, which 7.409 - * jumps out of the VMS__dissipate_procr function, and out of all nestings 7.410 - * above it, transferring the work of dissipating to the request handler, 7.411 - * which then does the actual work -- causing the processor that animated 7.412 - * the call of this function to disappear and the "hanging" state of this 7.413 - * function to just poof into thin air -- the virtual processor's trace 7.414 - * never returns from this call, but instead the virtual processor's trace 7.415 - * gets suspended in this call and all the virt processor's state disap- 7.416 - * pears -- making that suspend the last thing in the virt procr's trace. 7.417 - */ 7.418 -void 7.419 -VMS__send_dissipate_req( VirtProcr *procrToDissipate ) 7.420 - { VMSReqst req; 7.421 - 7.422 - req.reqType = dissipate; 7.423 - req.nextReqst = procrToDissipate->requests; 7.424 - procrToDissipate->requests = &req; 7.425 - 7.426 - VMS__suspend_procr( procrToDissipate ); 7.427 - } 7.428 - 7.429 - 7.430 -/* "ext" designates that it's for use outside the VMS system -- should only 7.431 - * be called from main thread or other thread -- never from code animated by 7.432 - * a VMS virtual processor. 7.433 - * 7.434 - *Use this version to dissipate VPs created outside the VMS system. 7.435 - */ 7.436 -void 7.437 -VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ) 7.438 - { 7.439 - //NOTE: initialData was given to the processor, so should either have 7.440 - // been alloc'd with VMS__malloc, or freed by the level above animPr. 7.441 - //So, all that's left to free here is the stack and the VirtProcr struc 7.442 - // itself 7.443 - //Note, should not stack-allocate initial data -- no guarantee, in 7.444 - // general that creating processor will outlive ones it creates. 7.445 - free( procrToDissipate->startOfStack ); 7.446 - free( procrToDissipate ); 7.447 - } 7.448 - 7.449 - 7.450 - 7.451 -/*This call's name indicates that request is malloc'd -- so req handler 7.452 - * has to free any extra requests tacked on before a send, using this. 7.453 - * 7.454 - * This inserts the semantic-layer's request data into standard VMS carrier 7.455 - * request data-struct that is mallocd. The sem request doesn't need to 7.456 - * be malloc'd if this is called inside the same call chain before the 7.457 - * send of the last request is called. 7.458 - * 7.459 - *The request handler has to call VMS__free_VMSReq for any of these 7.460 - */ 7.461 -inline void 7.462 -VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, 7.463 - VirtProcr *callingPr ) 7.464 - { VMSReqst *req; 7.465 - 7.466 - req = VMS__malloc( sizeof(VMSReqst) ); 7.467 - req->reqType = semantic; 7.468 - req->semReqData = semReqData; 7.469 - req->nextReqst = callingPr->requests; 7.470 - callingPr->requests = req; 7.471 - } 7.472 - 7.473 -/*This inserts the semantic-layer's request data into standard VMS carrier 7.474 - * request data-struct is allocated on stack of this call & ptr to it sent 7.475 - * to plugin 7.476 - *Then it does suspend, to cause request to be sent. 7.477 - */ 7.478 -inline void 7.479 -VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ) 7.480 - { VMSReqst req; 7.481 - 7.482 - req.reqType = semantic; 7.483 - req.semReqData = semReqData; 7.484 - req.nextReqst = callingPr->requests; 7.485 - callingPr->requests = &req; 7.486 - 7.487 - VMS__suspend_procr( callingPr ); 7.488 - } 7.489 - 7.490 - 7.491 -inline void 7.492 -VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ) 7.493 - { VMSReqst req; 7.494 - 7.495 - req.reqType = VMSSemantic; 7.496 - req.semReqData = semReqData; 7.497 - req.nextReqst = callingPr->requests; //gab any other preceeding 7.498 - callingPr->requests = &req; 7.499 - 7.500 - VMS__suspend_procr( callingPr ); 7.501 - } 7.502 - 7.503 - 7.504 -/* 7.505 - */ 7.506 -VMSReqst * 7.507 -VMS__take_next_request_out_of( VirtProcr *procrWithReq ) 7.508 - { VMSReqst *req; 7.509 - 7.510 - req = procrWithReq->requests; 7.511 - if( req == NULL ) return NULL; 7.512 - 7.513 - procrWithReq->requests = procrWithReq->requests->nextReqst; 7.514 - return req; 7.515 - } 7.516 - 7.517 - 7.518 -inline void * 7.519 -VMS__take_sem_reqst_from( VMSReqst *req ) 7.520 - { 7.521 - return req->semReqData; 7.522 - } 7.523 - 7.524 - 7.525 - 7.526 -/* This is for OS requests and VMS infrastructure requests, such as to create 7.527 - * a probe -- a probe is inside the heart of VMS-core, it's not part of any 7.528 - * language -- but it's also a semantic thing that's triggered from and used 7.529 - * in the application.. so it crosses abstractions.. so, need some special 7.530 - * pattern here for handling such requests. 7.531 - * Doing this just like it were a second language sharing VMS-core. 7.532 - * 7.533 - * This is called from the language's request handler when it sees a request 7.534 - * of type VMSSemReq 7.535 - * 7.536 - * TODO: Later change this, to give probes their own separate plugin & have 7.537 - * VMS-core steer the request to appropriate plugin 7.538 - * Do the same for OS calls -- look later at it.. 7.539 - */ 7.540 -void inline 7.541 -VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, 7.542 - ResumePrFnPtr resumePrFnPtr ) 7.543 - { VMSSemReq *semReq; 7.544 - IntervalProbe *newProbe; 7.545 - 7.546 - semReq = req->semReqData; 7.547 - 7.548 - newProbe = VMS__malloc( sizeof(IntervalProbe) ); 7.549 - newProbe->nameStr = VMS__strDup( semReq->nameStr ); 7.550 - newProbe->hist = NULL; 7.551 - newProbe->schedChoiceWasRecorded = FALSE; 7.552 - 7.553 - //This runs in masterVP, so no race-condition worries 7.554 - newProbe->probeID = 7.555 - addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); 7.556 - 7.557 - requestingPr->dataRetFromReq = newProbe; 7.558 - 7.559 - (*resumePrFnPtr)( requestingPr, semEnv ); 7.560 - } 7.561 - 7.562 - 7.563 - 7.564 -/*This must be called by the request handler plugin -- it cannot be called 7.565 - * from the semantic library "dissipate processor" function -- instead, the 7.566 - * semantic layer has to generate a request, and the plug-in calls this 7.567 - * function. 7.568 - *The reason is that this frees the virtual processor's stack -- which is 7.569 - * still in use inside semantic library calls! 7.570 - * 7.571 - *This frees or recycles all the state owned by and comprising the VMS 7.572 - * portion of the animating virtual procr. The request handler must first 7.573 - * free any semantic data created for the processor that didn't use the 7.574 - * VMS_malloc mechanism. Then it calls this, which first asks the malloc 7.575 - * system to disown any state that did use VMS_malloc, and then frees the 7.576 - * statck and the processor-struct itself. 7.577 - *If the dissipated processor is the sole (remaining) owner of VMS__malloc'd 7.578 - * state, then that state gets freed (or sent to recycling) as a side-effect 7.579 - * of dis-owning it. 7.580 - */ 7.581 -void 7.582 -VMS__dissipate_procr( VirtProcr *animatingPr ) 7.583 - { 7.584 - //dis-own all locations owned by this processor, causing to be freed 7.585 - // any locations that it is (was) sole owner of 7.586 -//TODO: implement VMS__malloc system, including "give up ownership" 7.587 - 7.588 - 7.589 - //NOTE: initialData was given to the processor, so should either have 7.590 - // been alloc'd with VMS__malloc, or freed by the level above animPr. 7.591 - //So, all that's left to free here is the stack and the VirtProcr struc 7.592 - // itself 7.593 - //Note, should not stack-allocate initial data -- no guarantee, in 7.594 - // general that creating processor will outlive ones it creates. 7.595 - VMS__free( animatingPr->startOfStack ); 7.596 - VMS__free( animatingPr ); 7.597 - } 7.598 - 7.599 - 7.600 -//TODO: look at architecting cleanest separation between request handler 7.601 -// and master loop, for dissipate, create, shutdown, and other non-semantic 7.602 -// requests. Issue is chain: one removes requests from AppVP, one dispatches 7.603 -// on type of request, and one handles each type.. but some types require 7.604 -// action from both request handler and master loop -- maybe just give the 7.605 -// request handler calls like: VMS__handle_X_request_type 7.606 - 7.607 - 7.608 -/*This is called by the semantic layer's request handler when it decides its 7.609 - * time to shut down the VMS system. Calling this causes the core loop OS 7.610 - * threads to exit, which unblocks the entry-point function that started up 7.611 - * VMS, and allows it to grab the result and return to the original single- 7.612 - * threaded application. 7.613 - * 7.614 - *The _VMSMasterEnv is needed by this shut down function, so the create-seed- 7.615 - * and-wait function has to free a bunch of stuff after it detects the 7.616 - * threads have all died: the masterEnv, the thread-related locations, 7.617 - * masterVP any AppVPs that might still be allocated and sitting in the 7.618 - * semantic environment, or have been orphaned in the _VMSWorkQ. 7.619 - * 7.620 - *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the 7.621 - * locations it needs, and give ownership to masterVP. Then, they will be 7.622 - * automatically freed. 7.623 - * 7.624 - *In here,create one core-loop shut-down processor for each core loop and put 7.625 - * them all directly into the readyToAnimateQ. 7.626 - *Note, this function can ONLY be called after the semantic environment no 7.627 - * longer cares if AppVPs get animated after the point this is called. In 7.628 - * other words, this can be used as an abort, or else it should only be 7.629 - * called when all AppVPs have finished dissipate requests -- only at that 7.630 - * point is it sure that all results have completed. 7.631 - */ 7.632 -void 7.633 -VMS__shutdown() 7.634 - { int coreIdx; 7.635 - VirtProcr *shutDownPr; 7.636 - 7.637 - //create the shutdown processors, one for each core loop -- put them 7.638 - // directly into the Q -- each core will die when gets one 7.639 - for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 7.640 - { //Note, this is running in the master 7.641 - shutDownPr = VMS__create_procr( &endOSThreadFn, NULL ); 7.642 - writeVMSQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] ); 7.643 - } 7.644 - 7.645 - } 7.646 - 7.647 - 7.648 -/*Am trying to be cute, avoiding IF statement in coreLoop that checks for 7.649 - * a special shutdown procr. Ended up with extra-complex shutdown sequence. 7.650 - *This function has the sole purpose of setting the stack and framePtr 7.651 - * to the coreLoop's stack and framePtr.. it does that then jumps to the 7.652 - * core loop's shutdown point -- might be able to just call Pthread_exit 7.653 - * from here, but am going back to the pthread's stack and setting everything 7.654 - * up just as if it never jumped out, before calling pthread_exit. 7.655 - *The end-point of core loop will free the stack and so forth of the 7.656 - * processor that animates this function, (this fn is transfering the 7.657 - * animator of the AppVP that is in turn animating this function over 7.658 - * to core loop function -- note that this slices out a level of virtual 7.659 - * processors). 7.660 - */ 7.661 -void 7.662 -endOSThreadFn( void *initData, VirtProcr *animatingPr ) 7.663 - { 7.664 -#ifdef SEQUENTIAL 7.665 - asmTerminateCoreLoopSeq(animatingPr); 7.666 -#else 7.667 - asmTerminateCoreLoop(animatingPr); 7.668 -#endif 7.669 - } 7.670 - 7.671 - 7.672 -/*This is called from the startup & shutdown 7.673 - */ 7.674 -void 7.675 -VMS__cleanup_at_end_of_shutdown() 7.676 - { 7.677 - //unused 7.678 - //VMSQueueStruc **readyToAnimateQs; 7.679 - //int coreIdx; 7.680 - //VirtProcr **masterVPs; 7.681 - //SchedSlot ***allSchedSlots; //ptr to array of ptrs 7.682 - 7.683 - //Before getting rid of everything, print out any measurements made 7.684 - forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist ); 7.685 - forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile); 7.686 - forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist ); 7.687 - #ifdef MEAS__TIME_PLUGIN 7.688 - printHist( _VMSMasterEnv->reqHdlrLowTimeHist ); 7.689 - saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist ); 7.690 - printHist( _VMSMasterEnv->reqHdlrHighTimeHist ); 7.691 - saveHistToFile( _VMSMasterEnv->reqHdlrHighTimeHist ); 7.692 - freeHistExt( _VMSMasterEnv->reqHdlrLowTimeHist ); 7.693 - freeHistExt( _VMSMasterEnv->reqHdlrHighTimeHist ); 7.694 - #endif 7.695 - #ifdef MEAS__TIME_MALLOC 7.696 - printHist( _VMSMasterEnv->mallocTimeHist ); 7.697 - saveHistToFile( _VMSMasterEnv->mallocTimeHist ); 7.698 - printHist( _VMSMasterEnv->freeTimeHist ); 7.699 - saveHistToFile( _VMSMasterEnv->freeTimeHist ); 7.700 - freeHistExt( _VMSMasterEnv->mallocTimeHist ); 7.701 - freeHistExt( _VMSMasterEnv->freeTimeHist ); 7.702 - #endif 7.703 - #ifdef MEAS__TIME_MASTER_LOCK 7.704 - printHist( _VMSMasterEnv->masterLockLowTimeHist ); 7.705 - printHist( _VMSMasterEnv->masterLockHighTimeHist ); 7.706 - #endif 7.707 - #ifdef MEAS__TIME_MASTER 7.708 - printHist( _VMSMasterEnv->pluginTimeHist ); 7.709 - for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 7.710 - { 7.711 - freeVMSQ( readyToAnimateQs[ coreIdx ] ); 7.712 - //master VPs were created external to VMS, so use external free 7.713 - VMS__dissipate_procr( masterVPs[ coreIdx ] ); 7.714 - 7.715 - freeSchedSlots( allSchedSlots[ coreIdx ] ); 7.716 - } 7.717 - #endif 7.718 - #ifdef MEAS__TIME_STAMP_SUSP 7.719 - printHist( _VMSMasterEnv->pluginTimeHist ); 7.720 - for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 7.721 - { 7.722 - freeVMSQ( readyToAnimateQs[ coreIdx ] ); 7.723 - //master VPs were created external to VMS, so use external free 7.724 - VMS__dissipate_procr( masterVPs[ coreIdx ] ); 7.725 - 7.726 - freeSchedSlots( allSchedSlots[ coreIdx ] ); 7.727 - } 7.728 - #endif 7.729 - 7.730 - //All the environment data has been allocated with VMS__malloc, so just 7.731 - // free its internal big-chunk and all inside it disappear. 7.732 -/* 7.733 - readyToAnimateQs = _VMSMasterEnv->readyToAnimateQs; 7.734 - masterVPs = _VMSMasterEnv->masterVPs; 7.735 - allSchedSlots = _VMSMasterEnv->allSchedSlots; 7.736 - 7.737 - for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 7.738 - { 7.739 - freeVMSQ( readyToAnimateQs[ coreIdx ] ); 7.740 - //master VPs were created external to VMS, so use external free 7.741 - VMS__dissipate_procr( masterVPs[ coreIdx ] ); 7.742 - 7.743 - freeSchedSlots( allSchedSlots[ coreIdx ] ); 7.744 - } 7.745 - 7.746 - VMS__free( _VMSMasterEnv->readyToAnimateQs ); 7.747 - VMS__free( _VMSMasterEnv->masterVPs ); 7.748 - VMS__free( _VMSMasterEnv->allSchedSlots ); 7.749 - 7.750 - //============================= MEASUREMENT STUFF ======================== 7.751 - #ifdef STATS__TURN_ON_PROBES 7.752 - freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS__free_probe); 7.753 - #endif 7.754 - //======================================================================== 7.755 -*/ 7.756 - //These are the only two that use system free 7.757 - VMS_ext__free_free_list( _VMSMasterEnv->freeListHead ); 7.758 - free( (void *)_VMSMasterEnv ); 7.759 - } 7.760 - 7.761 - 7.762 -//================================ 7.763 - 7.764 - 7.765 -/*Later, improve this -- for now, just exits the application after printing 7.766 - * the error message. 7.767 - */ 7.768 -void 7.769 -VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ) 7.770 - { 7.771 - printf("%s",msgStr); 7.772 - fflush(stdin); 7.773 - exit(1); 7.774 - } 7.775 -
8.1 --- a/VMS.h Fri Feb 10 12:05:17 2012 +0100 8.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 8.3 @@ -1,579 +0,0 @@ 8.4 -/* 8.5 - * Copyright 2009 OpenSourceStewardshipFoundation.org 8.6 - * Licensed under GNU General Public License version 2 8.7 - * 8.8 - * Author: seanhalle@yahoo.com 8.9 - * 8.10 - */ 8.11 - 8.12 -#ifndef _VMS_H 8.13 -#define _VMS_H 8.14 -#define _GNU_SOURCE 8.15 - 8.16 -#include "VMS_primitive_data_types.h" 8.17 -#include "../../C_Libraries/Queue_impl/PrivateQueue.h" 8.18 -#include "../../C_Libraries/Histogram/Histogram.h" 8.19 -#include "../../C_Libraries/DynArray/DynArray.h" 8.20 -#include "../../C_Libraries/Hash_impl/PrivateHash.h" 8.21 -#include "vmalloc.h" 8.22 - 8.23 -#include <pthread.h> 8.24 -#include <sys/time.h> 8.25 - 8.26 - 8.27 -//=============================== Debug =================================== 8.28 -// 8.29 -//When SEQUENTIAL is defined, VMS does sequential exe in the main thread 8.30 -// It still does co-routines and all the mechanisms are the same, it just 8.31 -// has only a single thread and animates VPs one at a time 8.32 -//#define SEQUENTIAL 8.33 - 8.34 -//#define USE_WORK_STEALING 8.35 - 8.36 -//turns on the probe-instrumentation in the application -- when not 8.37 -// defined, the calls to the probe functions turn into comments 8.38 -#define STATS__ENABLE_PROBES 8.39 -//#define TURN_ON_DEBUG_PROBES 8.40 - 8.41 -//These defines turn types of bug messages on and off 8.42 -// be sure debug messages are un-commented (next block of defines) 8.43 -#define dbgAppFlow TRUE /* Top level flow of application code -- general*/ 8.44 -#define dbgProbes FALSE /* for issues inside probes themselves*/ 8.45 -#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ 8.46 -#define dbgRqstHdlr FALSE /* in request handler code*/ 8.47 - 8.48 -//Comment or un- the substitute half to turn on/off types of debug message 8.49 -#define DEBUG( bool, msg) \ 8.50 -// if( bool){ printf(msg); fflush(stdin);} 8.51 -#define DEBUG1( bool, msg, param) \ 8.52 -// if(bool){printf(msg, param); fflush(stdin);} 8.53 -#define DEBUG2( bool, msg, p1, p2) \ 8.54 -// if(bool) {printf(msg, p1, p2); fflush(stdin);} 8.55 - 8.56 -#define ERROR(msg) printf(msg); 8.57 -#define ERROR1(msg, param) printf(msg, param); 8.58 -#define ERROR2(msg, p1, p2) printf(msg, p1, p2); 8.59 - 8.60 -//=========================== STATS ======================= 8.61 - 8.62 - //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and 8.63 - // compiled-in that saves the low part of the time stamp count just before 8.64 - // suspending a processor and just after resuming that processorsrc/VPThread_lib/VMS/VMS.h:322: warning: previous declaration of ‘VMS__create_procr’ was here. It is 8.65 - // saved into a field added to VirtProcr. Have to sanity-check for 8.66 - // rollover of low portion into high portion. 8.67 -//#define MEAS__TIME_STAMP_SUSP 8.68 -//#define MEAS__TIME_MASTER 8.69 -#define MEAS__TIME_PLUGIN 8.70 -#define MEAS__TIME_MALLOC 8.71 -//#define MEAS__TIME_MASTER_LOCK 8.72 -#define MEAS__NUM_TIMES_TO_RUN 100000 8.73 - 8.74 - //For code that calculates normalization-offset between TSC counts of 8.75 - // different cores. 8.76 -#define NUM_TSC_ROUND_TRIPS 10 8.77 - 8.78 - 8.79 -//========================= Hardware related Constants ===================== 8.80 - //This value is the number of hardware threads in the shared memory 8.81 - // machine 8.82 -//#define NUM_CORES 8 8.83 - 8.84 - // tradeoff amortizing master fixed overhead vs imbalance potential 8.85 - // when work-stealing, can make bigger, at risk of losing cache affinity 8.86 -#define NUM_SCHED_SLOTS 5 8.87 - 8.88 -#define MIN_WORK_UNIT_CYCLES 20000 8.89 - 8.90 -#define MASTERLOCK_RETRIES 10000 8.91 - 8.92 - // stack size in virtual processors created 8.93 -#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */ 8.94 - 8.95 - // memory for VMS__malloc 8.96 -#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */ 8.97 - 8.98 -#define CACHE_LINE 64 8.99 -#define PAGE_SIZE 4096 8.100 - 8.101 - 8.102 -//============================== 8.103 - 8.104 -#define SUCCESS 0 8.105 - 8.106 -#define writeVMSQ writePrivQ 8.107 -#define readVMSQ readPrivQ 8.108 -#define makeVMSQ makeVMSPrivQ 8.109 -#define numInVMSQ numInPrivQ 8.110 -#define VMSQueueStruc PrivQueueStruc 8.111 - 8.112 - 8.113 - 8.114 -//=========================================================================== 8.115 -typedef unsigned long long TSCount; 8.116 - 8.117 -typedef struct _SchedSlot SchedSlot; 8.118 -typedef struct _VMSReqst VMSReqst; 8.119 -typedef struct _VirtProcr VirtProcr; 8.120 -typedef struct _IntervalProbe IntervalProbe; 8.121 -typedef struct _GateStruc GateStruc; 8.122 - 8.123 - 8.124 -typedef VirtProcr * (*SlaveScheduler) ( void *, int ); //semEnv, coreIdx 8.125 -typedef void (*RequestHandler) ( VirtProcr *, void * ); //prWReqst, semEnv 8.126 -typedef void (*VirtProcrFnPtr) ( void *, VirtProcr * ); //initData, animPr 8.127 -typedef void VirtProcrFn ( void *, VirtProcr * ); //initData, animPr 8.128 -typedef void (*ResumePrFnPtr) ( VirtProcr *, void * ); 8.129 - 8.130 - 8.131 -//============= Requests =========== 8.132 -// 8.133 - 8.134 -enum VMSReqstType //avoid starting enums at 0, for debug reasons 8.135 - { 8.136 - semantic = 1, 8.137 - createReq, 8.138 - dissipate, 8.139 - VMSSemantic //goes with VMSSemReqst below 8.140 - }; 8.141 - 8.142 -struct _VMSReqst 8.143 - { 8.144 - enum VMSReqstType reqType;//used for dissipate and in future for IO requests 8.145 - void *semReqData; 8.146 - 8.147 - VMSReqst *nextReqst; 8.148 - }; 8.149 -//VMSReqst 8.150 - 8.151 -enum VMSSemReqstType //These are equivalent to semantic requests, but for 8.152 - { // VMS's services available directly to app, like OS 8.153 - createProbe = 1, // and probe services -- like a VMS-wide built-in lang 8.154 - openFile, 8.155 - otherIO 8.156 - }; 8.157 - 8.158 -typedef struct 8.159 - { enum VMSSemReqstType reqType; 8.160 - VirtProcr *requestingPr; 8.161 - char *nameStr; //for create probe 8.162 - } 8.163 - VMSSemReq; 8.164 - 8.165 - 8.166 -//==================== Core data structures =================== 8.167 - 8.168 -struct _SchedSlot 8.169 - { 8.170 - int workIsDone; 8.171 - int needsProcrAssigned; 8.172 - VirtProcr *procrAssignedToSlot; 8.173 - }; 8.174 -//SchedSlot 8.175 - 8.176 -/*WARNING: re-arranging this data structure could cause VP switching 8.177 - * assembly code to fail -- hard-codes offsets of fields 8.178 - */ 8.179 -struct _VirtProcr 8.180 - { int procrID; //for debugging -- count up each time create 8.181 - int coreAnimatedBy; 8.182 - void *startOfStack; 8.183 - void *stackPtr; 8.184 - void *framePtr; 8.185 - void *nextInstrPt; 8.186 - 8.187 - void *coreLoopStartPt; //allows proto-runtime to be linked later 8.188 - void *coreLoopFramePtr; //restore before jmp back to core loop 8.189 - void *coreLoopStackPtr; //restore before jmp back to core loop 8.190 - 8.191 - void *initialData; 8.192 - 8.193 - SchedSlot *schedSlot; 8.194 - VMSReqst *requests; 8.195 - 8.196 - void *semanticData; //this livesUSE_GNU here for the life of VP 8.197 - void *dataRetFromReq;//values returned from plugin to VP go here 8.198 - 8.199 - //=========== MEASUREMENT STUFF ========== 8.200 - #ifdef MEAS__TIME_STAMP_SUSP 8.201 - unsigned int preSuspTSCLow; 8.202 - unsigned int postSuspTSCLow; 8.203 - #endif 8.204 - #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/ 8.205 - unsigned int startMasterTSCLow;USE_GNU 8.206 - unsigned int endMasterTSCLow; 8.207 - #endif 8.208 - //======================================== 8.209 - 8.210 - float64 createPtInSecs; //have space but don't use on some configs 8.211 - }; 8.212 -//VirtProcr 8.213 - 8.214 - 8.215 -/*WARNING: re-arranging this data structure could cause VP-switching 8.216 - * assembly code to fail -- hard-codes offsets of fields 8.217 - * (because -O3 messes with things otherwise) 8.218 - */ 8.219 -typedef struct 8.220 - { 8.221 - SlaveScheduler slaveScheduler; 8.222 - RequestHandler requestHandler; 8.223 - 8.224 - SchedSlot ***allSchedSlots; 8.225 - VMSQueueStruc **readyToAnimateQs; 8.226 - VirtProcr **masterVPs; 8.227 - 8.228 - void *semanticEnv; 8.229 - void *OSEventStruc; //for future, when add I/O to BLIS 8.230 - MallocProlog *freeListHead; 8.231 - int32 amtOfOutstandingMem; //total currently allocated 8.232 - 8.233 - void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop 8.234 - 8.235 - int32 setupComplete; 8.236 - volatile int32 masterLock; 8.237 - 8.238 - int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP 8.239 - GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal 8.240 - int32 workStealingLock; 8.241 - 8.242 - int32 numProcrsCreated; //gives ordering to processor creation 8.243 - 8.244 - //=========== MEASUREMENT STUFF ============= 8.245 - IntervalProbe **intervalProbes; 8.246 - PrivDynArrayInfo *dynIntervalProbesInfo; 8.247 - HashTable *probeNameHashTbl; 8.248 - int32 masterCreateProbeID; 8.249 - float64 createPtInSecs; 8.250 - Histogram **measHists; 8.251 - PrivDynArrayInfo *measHistsInfo; 8.252 - #ifdef MEAS__TIME_PLUGIN 8.253 - Histogram *reqHdlrLowTimeHist; 8.254 - Histogram *reqHdlrHighTimeHist; 8.255 - #endif 8.256 - #ifdef MEAS__TIME_MALLOC 8.257 - Histogram *mallocTimeHist; 8.258 - Histogram *freeTimeHist; 8.259 - #endif 8.260 - #ifdef MEAS__TIME_MASTER_LOCK 8.261 - Histogram *masterLockLowTimeHist; 8.262 - Histogram *masterLockHighTimeHist; 8.263 - #endif 8.264 - } 8.265 -MasterEnv; 8.266 - 8.267 -//========================= Extra Stuff Data Strucs ======================= 8.268 -typedef struct 8.269 - { 8.270 - 8.271 - } 8.272 -VMSExcp; 8.273 - 8.274 -struct _GateStruc 8.275 - { 8.276 - int32 gateClosed; 8.277 - int32 preGateProgress; 8.278 - int32 waitProgress; 8.279 - int32 exitProgress; 8.280 - }; 8.281 -//GateStruc 8.282 - 8.283 -//======================= OS Thread related =============================== 8.284 - 8.285 -void * coreLoop( void *paramsIn ); //standard PThreads fn prototype 8.286 -void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype 8.287 -void masterLoop( void *initData, VirtProcr *masterPr ); 8.288 - 8.289 - 8.290 -typedef struct 8.291 - { 8.292 - void *endThdPt; 8.293 - unsigned int coreNum; 8.294 - } 8.295 -ThdParams; 8.296 - 8.297 -pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state 8.298 -ThdParams *coreLoopThdParams [ NUM_CORES ]; 8.299 -pthread_mutex_t suspendLock; 8.300 -pthread_cond_t suspend_cond; 8.301 - 8.302 - 8.303 - 8.304 -//===================== Global Vars =================== 8.305 - 8.306 -volatile MasterEnv *_VMSMasterEnv; 8.307 - 8.308 - 8.309 - 8.310 - 8.311 -//=========================== Function Prototypes ========================= 8.312 - 8.313 - 8.314 -//========== Setup and shutdown ========== 8.315 -void 8.316 -VMS__init(); 8.317 - 8.318 -void 8.319 -VMS__init_Seq(); 8.320 - 8.321 -void 8.322 -VMS__start_the_work_then_wait_until_done(); 8.323 - 8.324 -void 8.325 -VMS__start_the_work_then_wait_until_done_Seq(); 8.326 - 8.327 -inline VirtProcr * 8.328 -VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); 8.329 - 8.330 -void 8.331 -VMS__dissipate_procr( VirtProcr *procrToDissipate ); 8.332 - 8.333 - //Use this to create processor inside entry point & other places outside 8.334 - // the VMS system boundary (IE, not run in slave nor Master) 8.335 -VirtProcr * 8.336 -VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); 8.337 - 8.338 -void 8.339 -VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ); 8.340 - 8.341 -void 8.342 -VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ); 8.343 - 8.344 -void 8.345 -VMS__shutdown(); 8.346 - 8.347 -void 8.348 -VMS__cleanup_at_end_of_shutdown(); 8.349 - 8.350 -void * 8.351 -VMS__give_sem_env_for( VirtProcr *animPr ); 8.352 - 8.353 - 8.354 -//============== Request Related =============== 8.355 - 8.356 -void 8.357 -VMS__suspend_procr( VirtProcr *callingPr ); 8.358 - 8.359 -inline void 8.360 -VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr ); 8.361 - 8.362 -inline void 8.363 -VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ); 8.364 - 8.365 -void 8.366 -VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ); 8.367 - 8.368 -void inline 8.369 -VMS__send_dissipate_req( VirtProcr *prToDissipate ); 8.370 - 8.371 -inline void 8.372 -VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ); 8.373 - 8.374 -VMSReqst * 8.375 -VMS__take_next_request_out_of( VirtProcr *procrWithReq ); 8.376 - 8.377 -inline void * 8.378 -VMS__take_sem_reqst_from( VMSReqst *req ); 8.379 - 8.380 -void inline 8.381 -VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, 8.382 - ResumePrFnPtr resumePrFnPtr ); 8.383 - 8.384 -//======================== STATS ====================== 8.385 - 8.386 -//===== RDTSC wrapper ===== //Also runs with x86_64 code 8.387 - 8.388 -#define saveTimeStampCountInto(low, high) \ 8.389 - asm volatile("RDTSC; \ 8.390 - movl %%eax, %0; \ 8.391 - movl %%edx, %1;" \ 8.392 - /* outputs */ : "=m" (low), "=m" (high)\ 8.393 - /* inputs */ : \ 8.394 - /* clobber */ : "%eax", "%edx" \ 8.395 - ); 8.396 - 8.397 -#define saveLowTimeStampCountInto(low) \ 8.398 - asm volatile("RDTSC; \ 8.399 - movl %%eax, %0;" \ 8.400 - /* outputs */ : "=m" (low) \ 8.401 - /* inputs */ : \ 8.402 - /* clobber */ : "%eax", "%edx" \ 8.403 - ); 8.404 - 8.405 -//==================== 8.406 -#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ 8.407 - makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ 8.408 - _VMSMasterEnv->measHists[idx] = \ 8.409 - makeFixedBinHist( numBins, startVal, binWidth, name ); 8.410 - 8.411 - 8.412 -#define MEAS__SUB_CREATE /*turn on/off subtraction of create from plugin*/ 8.413 - 8.414 -#ifdef VPTHREAD 8.415 - 8.416 -//VPThread 8.417 -#define createHistIdx 0 8.418 -#define mutexLockHistIdx 1 8.419 -#define mutexUnlockHistIdx 2 8.420 -#define condWaitHistIdx 3 8.421 -#define condSignalHistIdx 4 8.422 - 8.423 -#define MakeTheMeasHists() \ 8.424 - _VMSMasterEnv->measHistsInfo = \ 8.425 - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 8.426 - makeAMeasHist( createHistIdx, "create", 250, 0, 100 ) \ 8.427 - makeAMeasHist( mutexLockHistIdx, "mutex_lock", 50, 0, 100 ) \ 8.428 - makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock", 50, 0, 100 ) \ 8.429 - makeAMeasHist( condWaitHistIdx, "cond_wait", 50, 0, 100 ) \ 8.430 - makeAMeasHist( condSignalHistIdx, "cond_signal", 50, 0, 100 ) 8.431 - 8.432 -#endif 8.433 - 8.434 - 8.435 -#ifdef VCILK 8.436 - 8.437 -//VCilk 8.438 -#define spawnHistIdx 0 8.439 -#define syncHistIdx 1 8.440 - 8.441 -#define MakeTheMeasHists() \ 8.442 - _VMSMasterEnv->measHistsInfo = \ 8.443 - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 8.444 - makeAMeasHist( spawnHistIdx, "Spawn", 50, 0, 200 ) \ 8.445 - makeAMeasHist( syncHistIdx, "Sync", 50, 0, 200 ) 8.446 - 8.447 - 8.448 -#endif 8.449 - 8.450 -#ifdef SSR 8.451 - 8.452 -//SSR 8.453 -#define SendFromToHistIdx 0 8.454 -#define SendOfTypeHistIdx 1 8.455 -#define ReceiveFromToHistIdx 2 8.456 -#define ReceiveOfTypeHistIdx 3 8.457 - 8.458 -#define MakeTheMeasHists() \ 8.459 - _VMSMasterEnv->measHistsInfo = \ 8.460 - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ 8.461 - makeAMeasHist( SendFromToHistIdx, "SendFromTo", 50, 0, 100 ) \ 8.462 - makeAMeasHist( SendOfTypeHistIdx, "SendOfType", 50, 0, 100 ) \ 8.463 - makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \ 8.464 - makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 ) 8.465 - 8.466 -#endif 8.467 - 8.468 -//=========================================================================== 8.469 -//VPThread 8.470 - 8.471 - 8.472 -#define Meas_startCreate \ 8.473 - int32 startStamp, endStamp; \ 8.474 - saveLowTimeStampCountInto( startStamp ); \ 8.475 - 8.476 -#define Meas_endCreate \ 8.477 - saveLowTimeStampCountInto( endStamp ); \ 8.478 - addIntervalToHist( startStamp, endStamp, \ 8.479 - _VMSMasterEnv->measHists[ createHistIdx ] ); 8.480 - 8.481 -#define Meas_startMutexLock \ 8.482 - int32 startStamp, endStamp; \ 8.483 - saveLowTimeStampCountInto( startStamp ); \ 8.484 - 8.485 -#define Meas_endMutexLock \ 8.486 - saveLowTimeStampCountInto( endStamp ); \ 8.487 - addIntervalToHist( startStamp, endStamp, \ 8.488 - _VMSMasterEnv->measHists[ mutexLockHistIdx ] ); 8.489 - 8.490 -#define Meas_startMutexUnlock \ 8.491 - int32 startStamp, endStamp; \ 8.492 - saveLowTimeStampCountInto( startStamp ); \ 8.493 - 8.494 -#define Meas_endMutexUnlock \ 8.495 - saveLowTimeStampCountInto( endStamp ); \ 8.496 - addIntervalToHist( startStamp, endStamp, \ 8.497 - _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] ); 8.498 - 8.499 -#define Meas_startCondWait \ 8.500 - int32 startStamp, endStamp; \ 8.501 - saveLowTimeStampCountInto( startStamp ); \ 8.502 - 8.503 -#define Meas_endCondWait \ 8.504 - saveLowTimeStampCountInto( endStamp ); \ 8.505 - addIntervalToHist( startStamp, endStamp, \ 8.506 - _VMSMasterEnv->measHists[ condWaitHistIdx ] ); 8.507 - 8.508 -#define Meas_startCondSignal \ 8.509 - int32 startStamp, endStamp; \ 8.510 - saveLowTimeStampCountInto( startStamp ); \ 8.511 - 8.512 -#define Meas_endCondSignal \ 8.513 - saveLowTimeStampCountInto( endStamp ); \ 8.514 - addIntervalToHist( startStamp, endStamp, \ 8.515 - _VMSMasterEnv->measHists[ condSignalHistIdx ] ); 8.516 - 8.517 -//=========================================================================== 8.518 -// VCilk 8.519 -#define Meas_startSpawn \ 8.520 - int32 startStamp, endStamp; \ 8.521 - saveLowTimeStampCountInto( startStamp ); \ 8.522 - 8.523 -#define Meas_endSpawn \ 8.524 - saveLowTimeStampCountInto( endStamp ); \ 8.525 - addIntervalToHist( startStamp, endStamp, \ 8.526 - _VMSMasterEnv->measHists[ spawnHistIdx ] ); 8.527 - 8.528 -#define Meas_startSync \ 8.529 - int32 startStamp, endStamp; \ 8.530 - saveLowTimeStampCountInto( startStamp ); \ 8.531 - 8.532 -#define Meas_endSync \ 8.533 - saveLowTimeStampCountInto( endStamp ); \ 8.534 - addIntervalToHist( startStamp, endStamp, \ 8.535 - _VMSMasterEnv->measHists[ syncHistIdx ] ); 8.536 - 8.537 -//=========================================================================== 8.538 -// SSR 8.539 -#define Meas_startSendFromTo \ 8.540 - int32 startStamp, endStamp; \ 8.541 - saveLowTimeStampCountInto( startStamp ); \ 8.542 - 8.543 -#define Meas_endSendFromTo \ 8.544 - saveLowTimeStampCountInto( endStamp ); \ 8.545 - addIntervalToHist( startStamp, endStamp, \ 8.546 - _VMSMasterEnv->measHists[ SendFromToHistIdx ] ); 8.547 - 8.548 -#define Meas_startSendOfType \ 8.549 - int32 startStamp, endStamp; \ 8.550 - saveLowTimeStampCountInto( startStamp ); \ 8.551 - 8.552 -#define Meas_endSendOfType \ 8.553 - saveLowTimeStampCountInto( endStamp ); \ 8.554 - addIntervalToHist( startStamp, endStamp, \ 8.555 - _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] ); 8.556 - 8.557 -#define Meas_startReceiveFromTo \ 8.558 - int32 startStamp, endStamp; \ 8.559 - saveLowTimeStampCountInto( startStamp ); \ 8.560 - 8.561 -#define Meas_endReceiveFromTo \ 8.562 - saveLowTimeStampCountInto( endStamp ); \ 8.563 - addIntervalToHist( startStamp, endStamp, \ 8.564 - _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] ); 8.565 - 8.566 -#define Meas_startReceiveOfType \ 8.567 - int32 startStamp, endStamp; \ 8.568 - saveLowTimeStampCountInto( startStamp ); \ 8.569 - 8.570 -#define Meas_endReceiveOfType \ 8.571 - saveLowTimeStampCountInto( endStamp ); \ 8.572 - addIntervalToHist( startStamp, endStamp, \ 8.573 - _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] ); 8.574 - 8.575 -//===== 8.576 - 8.577 -#include "ProcrContext.h" 8.578 -#include "probes.h" 8.579 -#include "vutilities.h" 8.580 - 8.581 -#endif /* _VMS_H */ 8.582 -
9.1 --- a/VMS_primitive_data_types.h Fri Feb 10 12:05:17 2012 +0100 9.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 9.3 @@ -1,53 +0,0 @@ 9.4 -/* 9.5 - * Copyright 2009 OpenSourceStewardshipFoundation.org 9.6 - * Licensed under GNU General Public License version 2 9.7 - * 9.8 - * Author: seanhalle@yahoo.com 9.9 - * 9.10 - 9.11 - */ 9.12 - 9.13 -#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H 9.14 -#define _BLIS_PRIMITIVE_DATA_TYPES_H 9.15 - 9.16 - 9.17 -/*For portability, need primitive data types that have a well defined 9.18 - * size, and well-defined layout into bytes 9.19 - *To do this, provide BLIS standard aliases for all primitive data types 9.20 - *These aliases must be used in all BLIS functions instead of the ANSI types 9.21 - * 9.22 - *These definitions will be replaced inside each specialization module 9.23 - * according to the compiler used in that module and the hardware being 9.24 - * specialized to. 9.25 - */ 9.26 -/* 9.27 -#define int8 char 9.28 -#define uint8 char 9.29 -#define int16 short 9.30 -#define uint16 unsigned short 9.31 -#define int32 int 9.32 -#define uint32 unsigned int 9.33 -#define int64 long long 9.34 -#define uint64 unsigned long long 9.35 -#define float32 float 9.36 -#define float64 double 9.37 -*/ 9.38 -typedef char bool8; 9.39 -typedef char int8; 9.40 -typedef char uint8; 9.41 -typedef short int16; 9.42 -typedef unsigned short uint16; 9.43 -typedef int int32; 9.44 -typedef unsigned int uint32; 9.45 -typedef long long int64; 9.46 -typedef unsigned long long uint64; 9.47 -typedef float float32; 9.48 -typedef double float64; 9.49 -//typedef double double float128; 9.50 -#define float128 double double 9.51 - 9.52 -#define TRUE 1 9.53 -#define FALSE 0 9.54 - 9.55 -#endif /* _BLIS_PRIMITIVE_DATA_TYPES_H */ 9.56 -
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/__brch__DEPRECATED_README Sat Feb 11 21:47:25 2012 -0800 10.3 @@ -0,0 +1,29 @@ 10.4 +*DEPRECATED* as of Feb 2012, this branch should not be used. Too many variations of VMS for MC_shared exist. 10.5 + 10.6 +Instead, choose a branch that has the best implementation for the machine being run on. For example, single-socket with 2 cores, or with 4 cores, or with 8 cores all have their own branches with code tuned to that number of cores. AMD processors require different low-level tweaking than Intel, and so on. 10.7 + 10.8 +============== Background on Branch Naming ============ 10.9 + 10.10 +There are two kinds of branchs: ones used to develop features, and ones tuned to particular hardware. A given HW branch may combine features from several feature-branches, picking and choosing among them. 10.11 + 10.12 +Legacy branches, from before Feb 2012 have random names. After Feb 2012, they're named by the scheme: 10.13 + 10.14 +feat__<feat_descr>__<HW_feat_dev_on> 10.15 + 10.16 +HW__<desc_of_HW_brch_tuned_for> 10.17 + 10.18 +where <HW_feat_dev_on> and <desc_of_HW_brch_tuned_for> follow the pattern: 10.19 + 10.20 +<num_socket> x <num_cores>_<Manuf>_<special_features> 10.21 + 10.22 +Examples: 10.23 + 10.24 +feat__exp_array_malloc 10.25 + 10.26 +feat__rand_backoff__4x10_Intel_WestmereEx 10.27 + 10.28 +HW__1x4_Intel_SandyBridge 10.29 + 10.30 +HW__4x10_Intel_WestmereEx 10.31 + 10.32 +HW__1x4_AMD_mobile
11.1 --- a/contextSwitch.s Fri Feb 10 12:05:17 2012 +0100 11.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 11.3 @@ -1,149 +0,0 @@ 11.4 -.data 11.5 - 11.6 - 11.7 -.text 11.8 - 11.9 -//Save return label address for the coreLoop to pointer 11.10 -//Arguments: Pointer to variable holding address 11.11 -.globl saveCoreLoopReturnAddr 11.12 -saveCoreLoopReturnAddr: 11.13 - movq $coreLoopReturn, %rcx #load label address 11.14 - movq %rcx, (%rdi) #save address to pointer 11.15 - ret 11.16 - 11.17 - 11.18 -//Initializes VirtProcrFn at first run for 64 bit mode 11.19 -//Puts argument from stack into registers 11.20 -.globl startVirtProcrFn 11.21 -startVirtProcrFn: 11.22 - movq %rdi , %rsi #get second argument from first argument of switchVP 11.23 - movq 0x08(%rsp), %rdi #get first argument 11.24 - movq (%rsp) , %rax #get function addr 11.25 - jmp *%rax 11.26 - 11.27 -//Switches form CoreLoop to VP ether a normal VP or the Master Loop 11.28 -//switch to virt procr's stack and frame ptr then jump to virt procr fn 11.29 -/* VirtProcr offsets: 11.30 - * 0x10 stackPtr 11.31 - * 0x18 framePtr 11.32 - * 0x20 nextInstrPt 11.33 - * 0x30 coreLoopFramePtr 11.34 - * 0x38 coreLoopStackPtr 11.35 - * 11.36 - * _VMSMasterEnv offsets: 11.37 - * 0x48 coreLoopReturnPt 11.38 - * 0x54 masterLock 11.39 - */ 11.40 -.globl switchToVP 11.41 -switchToVP: 11.42 - #VirtProcr in %rdi 11.43 - movq %rsp , 0x38(%rdi) #save core loop stack pointer 11.44 - movq %rbp , 0x30(%rdi) #save core loop frame pointer 11.45 - movq 0x10(%rdi), %rsp #restore stack pointer 11.46 - movq 0x18(%rdi), %rbp #restore frame pointer 11.47 - movq 0x20(%rdi), %rax #get jmp pointer 11.48 - jmp *%rax #jmp to VP 11.49 -coreLoopReturn: 11.50 - ret 11.51 - 11.52 - 11.53 -//switches to core loop. saves return address 11.54 -/* VirtProcr offsets: 11.55 - * 0x10 stackPtr 11.56 - * 0x18 framePtr 11.57 - * 0x20 nextInstrPt 11.58 - * 0x30 coreLoopFramePtr 11.59 - * 0x38 coreLoopStackPtr 11.60 - * 11.61 - * _VMSMasterEnv offsets: 11.62 - * 0x48 coreLoopReturnPt 11.63 - * 0x54 masterLock 11.64 - */ 11.65 -.globl switchToCoreLoop 11.66 -switchToCoreLoop: 11.67 - #VirtProcr in %rdi 11.68 - movq $VPReturn , 0x20(%rdi) #store return address 11.69 - movq %rsp , 0x10(%rdi) #save stack pointer 11.70 - movq %rbp , 0x18(%rdi) #save frame pointer 11.71 - movq 0x38(%rdi), %rsp #restore stack pointer 11.72 - movq 0x30(%rdi), %rbp #restore frame pointer 11.73 - movq $_VMSMasterEnv, %rcx 11.74 - movq (%rcx) , %rcx 11.75 - movq 0x48(%rcx), %rax #get CoreLoopStartPt 11.76 - jmp *%rax #jmp to CoreLoop 11.77 -VPReturn: 11.78 - ret 11.79 - 11.80 - 11.81 - 11.82 -//switches to core loop from master. saves return address 11.83 -//Releases masterLock so the next MasterLoop can be executed 11.84 -/* VirtProcr offsets: 11.85 - * 0x10 stackPtr 11.86 - * 0x18 framePtr 11.87 - * 0x20 nextInstrPt 11.88 - * 0x30 coreLoopFramePtr 11.89 - * 0x38 coreLoopStackPtr 11.90 - * 11.91 - * _VMSMasterEnv offsets: 11.92 - * 0x48 coreLoopReturnPt 11.93 - * 0x54 masterLock 11.94 - */ 11.95 -.globl masterSwitchToCoreLoop 11.96 -masterSwitchToCoreLoop: 11.97 - #VirtProcr in %rdi 11.98 - movq $MasterReturn, 0x20(%rdi) #store return address 11.99 - movq %rsp , 0x10(%rdi) #save stack pointer 11.100 - movq %rbp , 0x18(%rdi) #save frame pointer 11.101 - movq 0x38(%rdi), %rsp #restore stack pointer 11.102 - movq 0x30(%rdi), %rbp #restore frame pointer 11.103 - movq $_VMSMasterEnv, %rcx 11.104 - movq (%rcx) , %rcx 11.105 - movq 0x48(%rcx), %rax #get CoreLoopStartPt 11.106 - movl $0x0 , 0x54(%rcx) #release lock 11.107 - jmp *%rax #jmp to CoreLoop 11.108 -MasterReturn: 11.109 - ret 11.110 - 11.111 - 11.112 -//Switch to terminateCoreLoop 11.113 -//therefor switch to coreLoop context from master context 11.114 -// no need to call because the stack is already set up for switchVP 11.115 -// and virtPr is in %rdi 11.116 -// and both functions have the same argument. 11.117 -// do not save register of VP because this function will never return 11.118 -/* VirtProcr offsets: 11.119 - * 0x10 stackPtr 11.120 - * 0x18 framePtr 11.121 - * 0x20 nextInstrPt 11.122 - * 0x30 coreLoopFramePtr 11.123 - * 0x38 coreLoopStackPtr 11.124 - * 11.125 - * _VMSMasterEnv offsets: 11.126 - * 0x48 coreLoopReturnPt 11.127 - * 0x58 masterLock 11.128 - */ 11.129 -.globl asmTerminateCoreLoop 11.130 -asmTerminateCoreLoop: 11.131 - #VirtProcr in %rdi 11.132 - movq 0x38(%rdi), %rsp #restore stack pointer 11.133 - movq 0x30(%rdi), %rbp #restore frame pointer 11.134 - movq $terminateCoreLoop, %rax 11.135 - jmp *%rax #jmp to CoreLoop 11.136 - 11.137 - 11.138 -/* 11.139 - * This one for the sequential version is special. It discards the current stack 11.140 - * and returns directly from the coreLoop after VMS__dissipate_procr was called 11.141 - */ 11.142 -.globl asmTerminateCoreLoopSeq 11.143 -asmTerminateCoreLoopSeq: 11.144 - #VirtProcr in %rdi 11.145 - movq 0x38(%rdi), %rsp #restore stack pointer 11.146 - movq 0x30(%rdi), %rbp #restore frame pointer 11.147 - #argument is in %rdi 11.148 - call VMS__dissipate_procr 11.149 - movq %rbp , %rsp #goto the coreLoops stack 11.150 - pop %rbp #restore the old framepointer 11.151 - ret #return from core loop 11.152 -
12.1 --- a/probes.c Fri Feb 10 12:05:17 2012 +0100 12.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 12.3 @@ -1,354 +0,0 @@ 12.4 -/* 12.5 - * Copyright 2010 OpenSourceStewardshipFoundation 12.6 - * 12.7 - * Licensed under BSD 12.8 - */ 12.9 - 12.10 -#include <stdio.h> 12.11 -#include <malloc.h> 12.12 -#include <sys/time.h> 12.13 - 12.14 -#include "VMS.h" 12.15 -#include "Queue_impl/BlockingQueue.h" 12.16 -#include "Histogram/Histogram.h" 12.17 - 12.18 - 12.19 -//================================ STATS ==================================== 12.20 - 12.21 -inline TSCount getTSCount() 12.22 - { unsigned int low, high; 12.23 - TSCount out; 12.24 - 12.25 - saveTimeStampCountInto( low, high ); 12.26 - out = high; 12.27 - out = (out << 32) + low; 12.28 - return out; 12.29 - } 12.30 - 12.31 - 12.32 - 12.33 -//==================== Probes ================= 12.34 -#ifdef STATS__USE_TSC_PROBES 12.35 - 12.36 -int32 12.37 -VMS__create_histogram_probe( int32 numBins, float32 startValue, 12.38 - float32 binWidth, char *nameStr ) 12.39 - { IntervalProbe *newProbe; 12.40 - int32 idx; 12.41 - FloatHist *hist; 12.42 - 12.43 - idx = VMS__create_single_interval_probe( nameStr ); 12.44 - newProbe = _VMSMasterEnv->intervalProbes[ idx ]; 12.45 - 12.46 - hist = makeFloatHistogram( numBins, startValue, binWidth ); 12.47 - newProbe->hist = hist; 12.48 - return idx; 12.49 - } 12.50 - 12.51 -void 12.52 -VMS_impl__record_interval_start_in_probe( int32 probeID ) 12.53 - { IntervalProbe *probe; 12.54 - 12.55 - probe = _VMSMasterEnv->intervalProbes[ probeID ]; 12.56 - probe->startStamp = getTSCount(); 12.57 - } 12.58 - 12.59 -void 12.60 -VMS_impl__record_interval_end_in_probe( int32 probeID ) 12.61 - { IntervalProbe *probe; 12.62 - TSCount endStamp; 12.63 - 12.64 - endStamp = getTSCount(); 12.65 - 12.66 - probe = _VMSMasterEnv->intervalProbes[ probeID ]; 12.67 - probe->endStamp = endStamp; 12.68 - 12.69 - if( probe->hist != NULL ) 12.70 - { TSCount interval = probe->endStamp - probe->startStamp; 12.71 - //if the interval is sane, then add to histogram 12.72 - if( interval < probe->hist->endOfRange * 10 ) 12.73 - addToFloatHist( interval, probe->hist ); 12.74 - } 12.75 - } 12.76 - 12.77 -void 12.78 -VMS_impl__print_stats_of_probe( int32 probeID ) 12.79 - { IntervalProbe *probe; 12.80 - 12.81 - probe = _VMSMasterEnv->intervalProbes[ probeID ]; 12.82 - 12.83 - if( probe->hist == NULL ) 12.84 - { 12.85 - printf("probe: %s, interval: %.6lf\n", probe->nameStr,probe->interval); 12.86 - } 12.87 - 12.88 - else 12.89 - { 12.90 - printf( "probe: %s\n", probe->nameStr ); 12.91 - printFloatHist( probe->hist ); 12.92 - } 12.93 - } 12.94 -#else 12.95 - 12.96 -/* 12.97 - * In practice, probe operations are called from the app, from inside slaves 12.98 - * -- so have to be sure each probe is single-VP owned, and be sure that 12.99 - * any place common structures are modified it's done inside the master. 12.100 - * So -- the only place common structures are modified is during creation. 12.101 - * after that, all mods are to individual instances. 12.102 - * 12.103 - * Thniking perhaps should change the semantics to be that probes are 12.104 - * attached to the virtual processor -- and then everything is guaranteed 12.105 - * to be isolated -- except then can't take any intervals that span VPs, 12.106 - * and would have to transfer the probes to Master env when VP dissipates.. 12.107 - * gets messy.. 12.108 - * 12.109 - * For now, just making so that probe creation causes a suspend, so that 12.110 - * the dynamic array in the master env is only modified from the master 12.111 - * 12.112 - */ 12.113 -IntervalProbe * 12.114 -create_generic_probe( char *nameStr, VirtProcr *animPr ) 12.115 -{ 12.116 - VMSSemReq reqData; 12.117 - 12.118 - reqData.reqType = createProbe; 12.119 - reqData.nameStr = nameStr; 12.120 - 12.121 - VMS__send_VMSSem_request( &reqData, animPr ); 12.122 - 12.123 - return animPr->dataRetFromReq; 12.124 - } 12.125 - 12.126 -/*Use this version from outside VMS -- it uses external malloc, and modifies 12.127 - * dynamic array, so can't be animated in a slave VP 12.128 - */ 12.129 -IntervalProbe * 12.130 -ext__create_generic_probe( char *nameStr ) 12.131 - { IntervalProbe *newProbe; 12.132 - int32 nameLen; 12.133 - 12.134 - newProbe = malloc( sizeof(IntervalProbe) ); 12.135 - nameLen = strlen( nameStr ); 12.136 - newProbe->nameStr = malloc( nameLen ); 12.137 - memcpy( newProbe->nameStr, nameStr, nameLen ); 12.138 - newProbe->hist = NULL; 12.139 - newProbe->schedChoiceWasRecorded = FALSE; 12.140 - newProbe->probeID = 12.141 - addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); 12.142 - 12.143 - return newProbe; 12.144 - } 12.145 - 12.146 - 12.147 -/*Only call from inside master or main startup/shutdown thread 12.148 - */ 12.149 -void 12.150 -VMS_impl__free_probe( IntervalProbe *probe ) 12.151 - { if( probe->hist != NULL ) freeDblHist( probe->hist ); 12.152 - if( probe->nameStr != NULL) VMS__free( probe->nameStr ); 12.153 - VMS__free( probe ); 12.154 - } 12.155 - 12.156 - 12.157 -int32 12.158 -VMS_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr) 12.159 - { IntervalProbe *newProbe; 12.160 - struct timeval *startStamp; 12.161 - float64 startSecs; 12.162 - 12.163 - newProbe = create_generic_probe( nameStr, animPr ); 12.164 - newProbe->endSecs = 0; 12.165 - 12.166 - gettimeofday( &(newProbe->startStamp), NULL); 12.167 - 12.168 - //turn into a double 12.169 - startStamp = &(newProbe->startStamp); 12.170 - startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); 12.171 - newProbe->startSecs = startSecs; 12.172 - 12.173 - return newProbe->probeID; 12.174 - } 12.175 - 12.176 -int32 12.177 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ) 12.178 - { IntervalProbe *newProbe; 12.179 - struct timeval *startStamp; 12.180 - float64 startSecs; 12.181 - 12.182 - newProbe = ext__create_generic_probe( nameStr ); 12.183 - newProbe->endSecs = 0; 12.184 - 12.185 - gettimeofday( &(newProbe->startStamp), NULL); 12.186 - 12.187 - //turn into a double 12.188 - startStamp = &(newProbe->startStamp); 12.189 - startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); 12.190 - newProbe->startSecs = startSecs; 12.191 - 12.192 - return newProbe->probeID; 12.193 - } 12.194 - 12.195 -int32 12.196 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ) 12.197 - { IntervalProbe *newProbe; 12.198 - 12.199 - newProbe = create_generic_probe( nameStr, animPr ); 12.200 - 12.201 - return newProbe->probeID; 12.202 - } 12.203 - 12.204 -int32 12.205 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 12.206 - float64 binWidth, char *nameStr, VirtProcr *animPr ) 12.207 - { IntervalProbe *newProbe; 12.208 - DblHist *hist; 12.209 - 12.210 - newProbe = create_generic_probe( nameStr, animPr ); 12.211 - 12.212 - hist = makeDblHistogram( numBins, startValue, binWidth ); 12.213 - newProbe->hist = hist; 12.214 - return newProbe->probeID; 12.215 - } 12.216 - 12.217 -void 12.218 -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ) 12.219 - { IntervalProbe *probe; 12.220 - 12.221 - //TODO: fix this To be in Master -- race condition 12.222 - probe = _VMSMasterEnv->intervalProbes[ probeID ]; 12.223 - 12.224 - addValueIntoTable(probe->nameStr, probe, _VMSMasterEnv->probeNameHashTbl); 12.225 - } 12.226 - 12.227 -IntervalProbe * 12.228 -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ) 12.229 - { 12.230 - //TODO: fix this To be in Master -- race condition 12.231 - return getValueFromTable( probeName, _VMSMasterEnv->probeNameHashTbl ); 12.232 - } 12.233 - 12.234 - 12.235 -/*Everything is local to the animating procr, so no need for request, do 12.236 - * work locally, in the anim Pr 12.237 - */ 12.238 -void 12.239 -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animatingPr ) 12.240 - { IntervalProbe *probe; 12.241 - 12.242 - probe = _VMSMasterEnv->intervalProbes[ probeID ]; 12.243 - probe->schedChoiceWasRecorded = TRUE; 12.244 - probe->coreNum = animatingPr->coreAnimatedBy; 12.245 - probe->procrID = animatingPr->procrID; 12.246 - probe->procrCreateSecs = animatingPr->createPtInSecs; 12.247 - } 12.248 - 12.249 -/*Everything is local to the animating procr, so no need for request, do 12.250 - * work locally, in the anim Pr 12.251 - */ 12.252 -void 12.253 -VMS_impl__record_interval_start_in_probe( int32 probeID ) 12.254 - { IntervalProbe *probe; 12.255 - 12.256 - DEBUG( dbgProbes, "record start of interval\n" ) 12.257 - probe = _VMSMasterEnv->intervalProbes[ probeID ]; 12.258 - gettimeofday( &(probe->startStamp), NULL ); 12.259 - } 12.260 - 12.261 - 12.262 -/*Everything is local to the animating procr, so no need for request, do 12.263 - * work locally, in the anim Pr 12.264 - */ 12.265 -void 12.266 -VMS_impl__record_interval_end_in_probe( int32 probeID ) 12.267 - { IntervalProbe *probe; 12.268 - struct timeval *endStamp, *startStamp; 12.269 - float64 startSecs, endSecs; 12.270 - 12.271 - DEBUG( dbgProbes, "record end of interval\n" ) 12.272 - //possible seg-fault if array resized by diff core right after this 12.273 - // one gets probe..? Something like that? Might be safe.. don't care 12.274 - probe = _VMSMasterEnv->intervalProbes[ probeID ]; 12.275 - gettimeofday( &(probe->endStamp), NULL); 12.276 - 12.277 - //now turn into an interval held in a double 12.278 - startStamp = &(probe->startStamp); 12.279 - endStamp = &(probe->endStamp); 12.280 - 12.281 - startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); 12.282 - endSecs = endStamp->tv_sec + ( endStamp->tv_usec / 1000000.0 ); 12.283 - 12.284 - probe->interval = endSecs - startSecs; 12.285 - probe->startSecs = startSecs; 12.286 - probe->endSecs = endSecs; 12.287 - 12.288 - if( probe->hist != NULL ) 12.289 - { 12.290 - //if the interval is sane, then add to histogram 12.291 - if( probe->interval < probe->hist->endOfRange * 10 ) 12.292 - addToDblHist( probe->interval, probe->hist ); 12.293 - } 12.294 - } 12.295 - 12.296 -void 12.297 -print_probe_helper( IntervalProbe *probe ) 12.298 - { 12.299 - printf( "\nprobe: %s, ", probe->nameStr ); 12.300 - 12.301 - 12.302 - if( probe->schedChoiceWasRecorded ) 12.303 - { printf( "coreNum: %d, procrID: %d, procrCreated: %0.6f | ", 12.304 - probe->coreNum, probe->procrID, probe->procrCreateSecs ); 12.305 - } 12.306 - 12.307 - if( probe->endSecs == 0 ) //just a single point in time 12.308 - { 12.309 - printf( " time point: %.6f\n", 12.310 - probe->startSecs - _VMSMasterEnv->createPtInSecs ); 12.311 - } 12.312 - else if( probe->hist == NULL ) //just an interval 12.313 - { 12.314 - printf( " startSecs: %.6f interval: %.6f\n", 12.315 - (probe->startSecs - _VMSMasterEnv->createPtInSecs), probe->interval); 12.316 - } 12.317 - else //a full histogram of intervals 12.318 - { 12.319 - printDblHist( probe->hist ); 12.320 - } 12.321 - } 12.322 - 12.323 -//TODO: change so pass around pointer to probe instead of its array-index.. 12.324 -// will eliminate chance for timing of resize to cause problems with the 12.325 -// lookup -- even though don't think it actually can cause problems.. 12.326 -// there's no need to pass index around -- have hash table for names, and 12.327 -// only need it once, then have ptr to probe.. the thing about enum the 12.328 -// index and use that as name is clunky in practice -- just hash. 12.329 -void 12.330 -VMS_impl__print_stats_of_probe( int32 probeID ) 12.331 - { IntervalProbe *probe; 12.332 - 12.333 - probe = _VMSMasterEnv->intervalProbes[ probeID ]; 12.334 - 12.335 - print_probe_helper( probe ); 12.336 - } 12.337 - 12.338 - 12.339 -inline void doNothing(){}; 12.340 - 12.341 -void 12.342 -generic_print_probe( void *_probe ) 12.343 - { 12.344 - IntervalProbe *probe = (IntervalProbe *)_probe; 12.345 - 12.346 - //TODO segfault in printf 12.347 - //print_probe_helper( probe ); 12.348 - } 12.349 - 12.350 -void 12.351 -VMS_impl__print_stats_of_all_probes() 12.352 - { 12.353 - forAllInDynArrayDo( _VMSMasterEnv->dynIntervalProbesInfo, 12.354 - &generic_print_probe ); 12.355 - fflush( stdout ); 12.356 - } 12.357 -#endif
13.1 --- a/probes.h Fri Feb 10 12:05:17 2012 +0100 13.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 13.3 @@ -1,195 +0,0 @@ 13.4 -/* 13.5 - * Copyright 2009 OpenSourceStewardshipFoundation.org 13.6 - * Licensed under GNU General Public License version 2 13.7 - * 13.8 - * Author: seanhalle@yahoo.com 13.9 - * 13.10 - */ 13.11 - 13.12 -#ifndef _PROBES_H 13.13 -#define _PROBES_H 13.14 -#define _GNU_SOURCE 13.15 - 13.16 -#include "VMS_primitive_data_types.h" 13.17 - 13.18 -#include <sys/time.h> 13.19 - 13.20 - 13.21 - //when STATS__TURN_ON_PROBES is defined allows using probes to measure 13.22 - // time intervals. The probes are macros that only compile to something 13.23 - // when STATS__TURN_ON_PROBES is defined. The probes are saved in the 13.24 - // master env -- but only when this is defined. 13.25 - //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday 13.26 -#define STATS__TURN_ON_PROBES 13.27 -//#define STATS__USE_TSC_PROBES 13.28 -#define STATS__USE_DBL_PROBES 13.29 - 13.30 -//typedef struct _IntervalProbe IntervalProbe; //in VMS.h 13.31 - 13.32 -struct _IntervalProbe 13.33 - { 13.34 - char *nameStr; 13.35 - int32 probeID; 13.36 - 13.37 - int32 schedChoiceWasRecorded; 13.38 - int32 coreNum; 13.39 - int32 procrID; 13.40 - float64 procrCreateSecs; 13.41 - 13.42 - #ifdef STATS__USE_TSC_PROBES 13.43 - TSCount startStamp; 13.44 - TSCount endStamp; 13.45 - #else 13.46 - struct timeval startStamp; 13.47 - struct timeval endStamp; 13.48 - #endif 13.49 - float64 startSecs; 13.50 - float64 endSecs; 13.51 - float64 interval; 13.52 - DblHist *hist;//if NULL, then is single interval probe 13.53 - }; 13.54 - 13.55 - 13.56 -//============================= Statistics ================================== 13.57 - 13.58 - //Frequency of TS counts 13.59 - //TODO: change freq for each machine 13.60 -#define TSCOUNT_FREQ 3180000000 13.61 - 13.62 -inline TSCount getTSCount(); 13.63 - 13.64 - 13.65 -//======================== Probes ============================= 13.66 -// 13.67 -// Use macros to allow turning probes off with a #define switch 13.68 -#ifdef STATS__ENABLE_PROBES 13.69 -int32 13.70 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 13.71 -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 13.72 - VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) 13.73 - 13.74 -int32 13.75 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 13.76 -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 13.77 - VMS_ext_impl__record_time_point_into_new_probe( nameStr ) 13.78 - 13.79 - 13.80 -int32 13.81 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 13.82 -#define VMS__create_single_interval_probe( nameStr, animPr ) \ 13.83 - VMS_impl__create_single_interval_probe( nameStr, animPr ) 13.84 - 13.85 - 13.86 -int32 13.87 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 13.88 - float64 binWidth, char *nameStr, VirtProcr *animPr ); 13.89 -#define VMS__create_histogram_probe( numBins, startValue, \ 13.90 - binWidth, nameStr, animPr ) \ 13.91 - VMS_impl__create_histogram_probe( numBins, startValue, \ 13.92 - binWidth, nameStr, animPr ) 13.93 -void 13.94 -VMS_impl__free_probe( IntervalProbe *probe ); 13.95 -#define VMS__free_probe( probe ) \ 13.96 - VMS_impl__free_probe( probe ) 13.97 - 13.98 -void 13.99 -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 13.100 -#define VMS__index_probe_by_its_name( probeID, animPr ) \ 13.101 - VMS_impl__index_probe_by_its_name( probeID, animPr ) 13.102 - 13.103 -IntervalProbe * 13.104 -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 13.105 -#define VMS__get_probe_by_name( probeID, animPr ) \ 13.106 - VMS_impl__get_probe_by_name( probeName, animPr ) 13.107 - 13.108 -void 13.109 -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 13.110 -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 13.111 - VMS_impl__record_sched_choice_into_probe( probeID, animPr ) 13.112 - 13.113 -void 13.114 -VMS_impl__record_interval_start_in_probe( int32 probeID ); 13.115 -#define VMS__record_interval_start_in_probe( probeID ) \ 13.116 - VMS_impl__record_interval_start_in_probe( probeID ) 13.117 - 13.118 -void 13.119 -VMS_impl__record_interval_end_in_probe( int32 probeID ); 13.120 -#define VMS__record_interval_end_in_probe( probeID ) \ 13.121 - VMS_impl__record_interval_end_in_probe( probeID ) 13.122 - 13.123 -void 13.124 -VMS_impl__print_stats_of_probe( int32 probeID ); 13.125 -#define VMS__print_stats_of_probe( probeID ) \ 13.126 - VMS_impl__print_stats_of_probe( probeID ) 13.127 - 13.128 -void 13.129 -VMS_impl__print_stats_of_all_probes(); 13.130 -#define VMS__print_stats_of_all_probes() \ 13.131 - VMS_impl__print_stats_of_all_probes() 13.132 - 13.133 - 13.134 -#else 13.135 -int32 13.136 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 13.137 -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 13.138 - 0 /* do nothing */ 13.139 - 13.140 -int32 13.141 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 13.142 -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 13.143 - 0 /* do nothing */ 13.144 - 13.145 - 13.146 -int32 13.147 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 13.148 -#define VMS__create_single_interval_probe( nameStr, animPr ) \ 13.149 - 0 /* do nothing */ 13.150 - 13.151 - 13.152 -int32 13.153 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 13.154 - float64 binWidth, char *nameStr, VirtProcr *animPr ); 13.155 -#define VMS__create_histogram_probe( numBins, startValue, \ 13.156 - binWidth, nameStr, animPr ) \ 13.157 - 0 /* do nothing */ 13.158 - 13.159 -void 13.160 -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 13.161 -#define VMS__index_probe_by_its_name( probeID, animPr ) \ 13.162 - /* do nothing */ 13.163 - 13.164 -IntervalProbe * 13.165 -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 13.166 -#define VMS__get_probe_by_name( probeID, animPr ) \ 13.167 - NULL /* do nothing */ 13.168 - 13.169 -void 13.170 -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 13.171 -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 13.172 - /* do nothing */ 13.173 - 13.174 -void 13.175 -VMS_impl__record_interval_start_in_probe( int32 probeID ); 13.176 -#define VMS__record_interval_start_in_probe( probeID ) \ 13.177 - /* do nothing */ 13.178 - 13.179 -void 13.180 -VMS_impl__record_interval_end_in_probe( int32 probeID ); 13.181 -#define VMS__record_interval_end_in_probe( probeID ) \ 13.182 - /* do nothing */ 13.183 - 13.184 -inline void doNothing(); 13.185 -void 13.186 -VMS_impl__print_stats_of_probe( int32 probeID ); 13.187 -#define VMS__print_stats_of_probe( probeID ) \ 13.188 - doNothing/* do nothing */ 13.189 - 13.190 -void 13.191 -VMS_impl__print_stats_of_all_probes(); 13.192 -#define VMS__print_stats_of_all_probes \ 13.193 - doNothing/* do nothing */ 13.194 - 13.195 -#endif /* defined STATS__ENABLE_PROBES */ 13.196 - 13.197 -#endif /* _PROBES_H */ 13.198 -
14.1 --- a/vmalloc.c Fri Feb 10 12:05:17 2012 +0100 14.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 14.3 @@ -1,495 +0,0 @@ 14.4 -/* 14.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 14.6 - * Licensed under GNU General Public License version 2 14.7 - * 14.8 - * Author: seanhalle@yahoo.com 14.9 - * 14.10 - * Created on November 14, 2009, 9:07 PM 14.11 - */ 14.12 - 14.13 -#include <malloc.h> 14.14 -#include <inttypes.h> 14.15 -#include <stdlib.h> 14.16 -#include <stdio.h> 14.17 - 14.18 -#include "VMS.h" 14.19 -#include "Histogram/Histogram.h" 14.20 - 14.21 -/*Helper function 14.22 - *Insert a newly generated free chunk into the first spot on the free list. 14.23 - * The chunk is cast as a MallocProlog, so the various pointers in it are 14.24 - * accessed with C's help -- and the size of the prolog is easily added to 14.25 - * the pointer when a chunk is returned to the app -- so C handles changes 14.26 - * in pointer sizes among machines. 14.27 - * 14.28 - *The list head is a normal MallocProlog struct -- identified by its 14.29 - * prevChunkInFreeList being NULL -- the only one. 14.30 - * 14.31 - *The end of the list is identified by next chunk being NULL, as usual. 14.32 - */ 14.33 -void inline 14.34 -add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) 14.35 - { 14.36 - chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; 14.37 - if( chunk->nextChunkInFreeList != NULL ) //if not last in free list 14.38 - chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; 14.39 - chunk->prevChunkInFreeList = listHead; 14.40 - listHead->nextChunkInFreeList = chunk; 14.41 - } 14.42 - 14.43 - 14.44 -/*This is sequential code, meant to only be called from the Master, not from 14.45 - * any slave VPs. 14.46 - *Search down list, checking size by the nextHigherInMem pointer, to find 14.47 - * first chunk bigger than size needed. 14.48 - *Shave off the extra and make it into a new free-list element, hook it in 14.49 - * then return the address of the found element plus size of prolog. 14.50 - * 14.51 - *Will find a 14.52 - */ 14.53 -void *VMS__malloc( size_t sizeRequested ) 14.54 - { MallocProlog *foundElem = NULL, *currElem, *newElem; 14.55 - ssize_t amountExtra, sizeConsumed,sizeOfFound; 14.56 - uint32 foundElemIsTopOfHeap; 14.57 - 14.58 - //============================= MEASUREMENT STUFF ======================== 14.59 - #ifdef MEAS__TIME_MALLOC 14.60 - int32 startStamp, endStamp; 14.61 - saveLowTimeStampCountInto( startStamp ); 14.62 - #endif 14.63 - //======================================================================== 14.64 - 14.65 - //step up the size to be aligned at 16-byte boundary, prob better ways 14.66 - sizeRequested = (sizeRequested + 16) & ~15; 14.67 - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 14.68 - 14.69 - while( currElem != NULL ) 14.70 - { //check if size of currElem is big enough 14.71 - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 14.72 - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 14.73 - if( amountExtra > 0 ) 14.74 - { //found it, get out of loop 14.75 - foundElem = currElem; 14.76 - currElem = NULL; 14.77 - } 14.78 - else 14.79 - currElem = currElem->nextChunkInFreeList; 14.80 - } 14.81 - 14.82 - if( foundElem == NULL ) 14.83 - { ERROR("\nmalloc failed\n") 14.84 - return (void *)NULL; //indicates malloc failed 14.85 - } 14.86 - //Using a kludge to identify the element that is the top chunk in the 14.87 - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 14.88 - // save addr of start of heap in head's nextLowerInMem 14.89 - //Will handle top of Heap specially 14.90 - foundElemIsTopOfHeap = foundElem->nextHigherInMem == 14.91 - _VMSMasterEnv->freeListHead->nextHigherInMem; 14.92 - 14.93 - //before shave off and try to insert new elem, remove found elem 14.94 - //note, foundElem will never be the head, so always has valid prevChunk 14.95 - foundElem->prevChunkInFreeList->nextChunkInFreeList = 14.96 - foundElem->nextChunkInFreeList; 14.97 - if( foundElem->nextChunkInFreeList != NULL ) 14.98 - { foundElem->nextChunkInFreeList->prevChunkInFreeList = 14.99 - foundElem->prevChunkInFreeList; 14.100 - } 14.101 - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 14.102 - 14.103 - //if enough, turn extra into new elem & insert it 14.104 - if( amountExtra > 64 ) 14.105 - { //make new elem by adding to addr of curr elem then casting 14.106 - sizeConsumed = sizeof(MallocProlog) + sizeRequested; 14.107 - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 14.108 - newElem->nextLowerInMem = foundElem; //This is evil (but why?) 14.109 - newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) 14.110 - foundElem->nextHigherInMem = newElem; 14.111 - if( ! foundElemIsTopOfHeap ) 14.112 - { //there is no next higher for top of heap, so can't write to it 14.113 - newElem->nextHigherInMem->nextLowerInMem = newElem; 14.114 - } 14.115 - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 14.116 - } 14.117 - else 14.118 - { 14.119 - sizeConsumed = sizeOfFound; 14.120 - } 14.121 - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 14.122 - 14.123 - //============================= MEASUREMENT STUFF ======================== 14.124 - #ifdef MEAS__TIME_MALLOC 14.125 - saveLowTimeStampCountInto( endStamp ); 14.126 - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 14.127 - #endif 14.128 - //======================================================================== 14.129 - 14.130 - //skip over the prolog by adding its size to the pointer return 14.131 - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 14.132 - } 14.133 - 14.134 -/*This is sequential code, meant to only be called from the Master, not from 14.135 - * any slave VPs. 14.136 - *Search down list, checking size by the nextHigherInMem pointer, to find 14.137 - * first chunk bigger than size needed. 14.138 - *Shave off the extra and make it into a new free-list element, hook it in 14.139 - * then return the address of the found element plus size of prolog. 14.140 - * 14.141 - * The difference to the regular malloc is, that all the allocated chunks are 14.142 - * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk 14.143 - * before the aligned chunk. 14.144 - */ 14.145 -void *VMS__malloc_aligned( size_t sizeRequested ) 14.146 - { MallocProlog *foundElem = NULL, *currElem, *newElem; 14.147 - ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; 14.148 - uint32 foundElemIsTopOfHeap; 14.149 - 14.150 - //============================= MEASUREMENT STUFF ======================== 14.151 - #ifdef MEAS__TIME_MALLOC 14.152 - uint32 startStamp, endStamp; 14.153 - saveLowTimeStampCountInto( startStamp ); 14.154 - #endif 14.155 - //======================================================================== 14.156 - 14.157 - //step up the size to be multiple of the cache line size 14.158 - sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1); 14.159 - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 14.160 - 14.161 - while( currElem != NULL ) 14.162 - { //check if size of currElem is big enough 14.163 - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 14.164 - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 14.165 - if( amountExtra > 0 ) 14.166 - { 14.167 - //look if the found element is already aligned 14.168 - if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){ 14.169 - //found it, get out of loop 14.170 - foundElem = currElem; 14.171 - break; 14.172 - }else{ 14.173 - //find first aligned address and check if it's still big enough 14.174 - //check also if the space before the aligned address is big enough 14.175 - //for a new element 14.176 - void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1))); 14.177 - prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; 14.178 - sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); 14.179 - amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); 14.180 - if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ 14.181 - //found suitable element 14.182 - //create new previous element and exit loop 14.183 - MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; 14.184 - 14.185 - //insert new element into free list 14.186 - if(currElem->nextChunkInFreeList != NULL) 14.187 - currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; 14.188 - newAlignedElem->prevChunkInFreeList = currElem; 14.189 - newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; 14.190 - currElem->nextChunkInFreeList = newAlignedElem; 14.191 - 14.192 - //set higherInMem and lowerInMem 14.193 - newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; 14.194 - foundElemIsTopOfHeap = currElem->nextHigherInMem == 14.195 - _VMSMasterEnv->freeListHead->nextHigherInMem; 14.196 - if(!foundElemIsTopOfHeap) 14.197 - currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; 14.198 - currElem->nextHigherInMem = newAlignedElem; 14.199 - newAlignedElem->nextLowerInMem = currElem; 14.200 - 14.201 - //Found new element leaving loop 14.202 - foundElem = newAlignedElem; 14.203 - break; 14.204 - } 14.205 - } 14.206 - 14.207 - } 14.208 - currElem = currElem->nextChunkInFreeList; 14.209 - } 14.210 - 14.211 - if( foundElem == NULL ) 14.212 - { ERROR("\nmalloc failed\n") 14.213 - return (void *)NULL; //indicates malloc failed 14.214 - } 14.215 - //Using a kludge to identify the element that is the top chunk in the 14.216 - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 14.217 - // save addr of start of heap in head's nextLowerInMem 14.218 - //Will handle top of Heap specially 14.219 - foundElemIsTopOfHeap = foundElem->nextHigherInMem == 14.220 - _VMSMasterEnv->freeListHead->nextHigherInMem; 14.221 - 14.222 - //before shave off and try to insert new elem, remove found elem 14.223 - //note, foundElem will never be the head, so always has valid prevChunk 14.224 - foundElem->prevChunkInFreeList->nextChunkInFreeList = 14.225 - foundElem->nextChunkInFreeList; 14.226 - if( foundElem->nextChunkInFreeList != NULL ) 14.227 - { foundElem->nextChunkInFreeList->prevChunkInFreeList = 14.228 - foundElem->prevChunkInFreeList; 14.229 - } 14.230 - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 14.231 - 14.232 - //if enough, turn extra into new elem & insert it 14.233 - if( amountExtra > 64 ) 14.234 - { //make new elem by adding to addr of curr elem then casting 14.235 - sizeConsumed = sizeof(MallocProlog) + sizeRequested; 14.236 - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 14.237 - newElem->nextHigherInMem = foundElem->nextHigherInMem; 14.238 - newElem->nextLowerInMem = foundElem; 14.239 - foundElem->nextHigherInMem = newElem; 14.240 - 14.241 - if( ! foundElemIsTopOfHeap ) 14.242 - { //there is no next higher for top of heap, so can't write to it 14.243 - newElem->nextHigherInMem->nextLowerInMem = newElem; 14.244 - } 14.245 - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 14.246 - } 14.247 - else 14.248 - { 14.249 - sizeConsumed = sizeOfFound; 14.250 - } 14.251 - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 14.252 - 14.253 - //============================= MEASUREMENT STUFF ======================== 14.254 - #ifdef MEAS__TIME_MALLOC 14.255 - saveLowTimeStampCountInto( endStamp ); 14.256 - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 14.257 - #endif 14.258 - //======================================================================== 14.259 - 14.260 - //skip over the prolog by adding its size to the pointer return 14.261 - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 14.262 - } 14.263 - 14.264 - 14.265 -/*This is sequential code -- only to be called from the Master 14.266 - * When free, subtract the size of prolog from pointer, then cast it to a 14.267 - * MallocProlog. Then check the nextLower and nextHigher chunks to see if 14.268 - * one or both are also free, and coalesce if so, and if neither free, then 14.269 - * add this one to free-list. 14.270 - */ 14.271 -void 14.272 -VMS__free( void *ptrToFree ) 14.273 - { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; 14.274 - size_t sizeOfElem; 14.275 - uint32 lowerExistsAndIsFree, higherExistsAndIsFree; 14.276 - 14.277 - //============================= MEASUREMENT STUFF ======================== 14.278 - #ifdef MEAS__TIME_MALLOC 14.279 - int32 startStamp, endStamp; 14.280 - saveLowTimeStampCountInto( startStamp ); 14.281 - #endif 14.282 - //======================================================================== 14.283 - 14.284 - if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || 14.285 - ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) 14.286 - { //outside the range of data owned by VMS's malloc, so do nothing 14.287 - return; 14.288 - } 14.289 - //subtract size of prolog to get pointer to prolog, then cast 14.290 - elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog)); 14.291 - sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree); 14.292 - 14.293 - if( elemToFree->prevChunkInFreeList != NULL ) 14.294 - { printf( "error: freeing same element twice!" ); exit(1); 14.295 - } 14.296 - 14.297 - _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; 14.298 - 14.299 - nextLowerElem = elemToFree->nextLowerInMem; 14.300 - nextHigherElem = elemToFree->nextHigherInMem; 14.301 - 14.302 - if( nextHigherElem == NULL ) 14.303 - higherExistsAndIsFree = FALSE; 14.304 - else //okay exists, now check if in the free-list by checking back ptr 14.305 - higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); 14.306 - 14.307 - if( nextLowerElem == NULL ) 14.308 - lowerExistsAndIsFree = FALSE; 14.309 - else //okay, it exists, now check if it's free 14.310 - lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); 14.311 - 14.312 - 14.313 - //now, know what exists and what's free 14.314 - if( lowerExistsAndIsFree ) 14.315 - { if( higherExistsAndIsFree ) 14.316 - { //both exist and are free, so coalesce all three 14.317 - //First, remove higher from free-list 14.318 - nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = 14.319 - nextHigherElem->nextChunkInFreeList; 14.320 - if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? 14.321 - nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = 14.322 - nextHigherElem->prevChunkInFreeList; 14.323 - //Now, fix-up sequence-in-mem list -- by side-effect, this also 14.324 - // changes size of the lower elem, which is still in free-list 14.325 - nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; 14.326 - if( nextHigherElem->nextHigherInMem != 14.327 - _VMSMasterEnv->freeListHead->nextHigherInMem ) 14.328 - nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; 14.329 - //notice didn't do anything to elemToFree -- it simply is no 14.330 - // longer reachable from any of the lists. Wonder if could be a 14.331 - // security leak because left valid addresses in it, 14.332 - // but don't care for now. 14.333 - } 14.334 - else 14.335 - { //lower is the only of the two that exists and is free, 14.336 - //In this case, no adjustment to free-list, just change mem-list. 14.337 - // By side-effect, changes size of the lower elem 14.338 - nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; 14.339 - if( elemToFree->nextHigherInMem != 14.340 - _VMSMasterEnv->freeListHead->nextHigherInMem ) 14.341 - elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; 14.342 - } 14.343 - } 14.344 - else 14.345 - { //lower either doesn't exist or isn't free, so check higher 14.346 - if( higherExistsAndIsFree ) 14.347 - { //higher exists and is the only of the two free 14.348 - //First, in free-list, replace higher elem with the one to free 14.349 - elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; 14.350 - elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; 14.351 - elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; 14.352 - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 14.353 - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 14.354 - //Now chg mem-list. By side-effect, changes size of elemToFree 14.355 - elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; 14.356 - if( elemToFree->nextHigherInMem != 14.357 - _VMSMasterEnv->freeListHead->nextHigherInMem ) 14.358 - elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; 14.359 - } 14.360 - else 14.361 - { //neither lower nor higher is availabe to coalesce so add to list 14.362 - // this makes prev chunk ptr non-null, which indicates it's free 14.363 - elemToFree->nextChunkInFreeList = 14.364 - _VMSMasterEnv->freeListHead->nextChunkInFreeList; 14.365 - _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; 14.366 - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 14.367 - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 14.368 - elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; 14.369 - } 14.370 - } 14.371 - //============================= MEASUREMENT STUFF ======================== 14.372 - #ifdef MEAS__TIME_MALLOC 14.373 - saveLowTimeStampCountInto( endStamp ); 14.374 - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist ); 14.375 - #endif 14.376 - //======================================================================== 14.377 - 14.378 - } 14.379 - 14.380 - 14.381 -/*Allocates memory from the external system -- higher overhead 14.382 - * 14.383 - *Because of Linux's malloc throwing bizarre random faults when malloc is 14.384 - * used inside a VMS virtual processor, have to pass this as a request and 14.385 - * have the core loop do it when it gets around to it -- will look for these 14.386 - * chores leftover from the previous animation of masterVP the next time it 14.387 - * goes to animate the masterVP -- so it takes two separate masterVP 14.388 - * animations, separated by work, to complete an external malloc or 14.389 - * external free request. 14.390 - * 14.391 - *Thinking core loop accepts signals -- just looks if signal-location is 14.392 - * empty or not -- 14.393 - */ 14.394 -void * 14.395 -VMS__malloc_in_ext( size_t sizeRequested ) 14.396 - { 14.397 - /* 14.398 - //This is running in the master, so no chance for multiple cores to be 14.399 - // competing for the core's flag. 14.400 - if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) 14.401 - { //something has already signalled to core loop, so save the signal 14.402 - // and look, next time master animated, to see if can send it. 14.403 - //Note, the addr to put a signal is in the coreloop's frame, so just 14.404 - // checks it each time through -- make it volatile to avoid GCC 14.405 - // optimizations -- it's a coreloop local var that only changes 14.406 - // after jumping away. The signal includes the addr to send the 14.407 - //return to -- even if just empty return completion-signal 14.408 - // 14.409 - //save the signal in some queue that the master looks at each time 14.410 - // it starts up -- one loc says if empty for fast common case -- 14.411 - //something like that -- want to hide this inside this call -- but 14.412 - // think this has to come as a request -- req handler gives procr 14.413 - // back to master loop, which gives it back to req handler at point 14.414 - // it sees that core loop has sent return signal. Something like 14.415 - // that. 14.416 - saveTheSignal 14.417 - 14.418 - } 14.419 - coreSigData->type = malloc; 14.420 - coreSigData->sizeToMalloc = sizeRequested; 14.421 - coreSigData->locToSignalCompletion = &figureOut; 14.422 - _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; 14.423 - */ 14.424 - //just risk system-stack faults until get this figured out 14.425 - return malloc( sizeRequested ); 14.426 - } 14.427 - 14.428 - 14.429 -/*Frees memory that was allocated in the external system -- higher overhead 14.430 - * 14.431 - *As noted in external malloc comment, this is clunky 'cause the free has 14.432 - * to be called in the core loop. 14.433 - */ 14.434 -void 14.435 -VMS__free_in_ext( void *ptrToFree ) 14.436 - { 14.437 - //just risk system-stack faults until get this figured out 14.438 - free( ptrToFree ); 14.439 - 14.440 - //TODO: fix this -- so 14.441 - } 14.442 - 14.443 - 14.444 -/*Designed to be called from the main thread outside of VMS, during init 14.445 - */ 14.446 -MallocProlog * 14.447 -VMS_ext__create_free_list() 14.448 - { MallocProlog *freeListHead, *firstChunk; 14.449 - 14.450 - //Note, this is running in the main thread -- all increases in malloc 14.451 - // mem and all frees of it must be done in this thread, with the 14.452 - // thread's original stack available 14.453 - freeListHead = malloc( sizeof(MallocProlog) ); 14.454 - firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); 14.455 - if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} 14.456 - 14.457 - //Touch memory to avoid page faults 14.458 - void *ptr,*endPtr; 14.459 - endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE; 14.460 - for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE) 14.461 - { 14.462 - *(char*)ptr = 0; 14.463 - } 14.464 - 14.465 - freeListHead->prevChunkInFreeList = NULL; 14.466 - //Use this addr to free the heap when cleanup 14.467 - freeListHead->nextLowerInMem = firstChunk; 14.468 - //to identify top-of-heap elem, compare this addr to elem's next higher 14.469 - freeListHead->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 14.470 - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 14.471 - freeListHead->nextChunkInFreeList = firstChunk; 14.472 - 14.473 - firstChunk->nextChunkInFreeList = NULL; 14.474 - firstChunk->prevChunkInFreeList = freeListHead; 14.475 - //next Higher has to be set to top of chunk, so can calc size in malloc 14.476 - firstChunk->nextHigherInMem = (void*)( (uintptr_t)firstChunk + 14.477 - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 14.478 - firstChunk->nextLowerInMem = NULL; //identifies as bott of heap 14.479 - 14.480 - _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet 14.481 - 14.482 - return freeListHead; 14.483 - } 14.484 - 14.485 - 14.486 -/*Designed to be called from the main thread outside of VMS, during cleanup 14.487 - */ 14.488 -void 14.489 -VMS_ext__free_free_list( MallocProlog *freeListHead ) 14.490 - { 14.491 - //stashed a ptr to the one and only bug chunk malloc'd from OS in the 14.492 - // free list head's next lower in mem pointer 14.493 - free( freeListHead->nextLowerInMem ); 14.494 - 14.495 - //don't free the head -- it'll be in an array eventually -- free whole 14.496 - // array when all the free lists linked from it have already been freed 14.497 - } 14.498 -
15.1 --- a/vmalloc.h Fri Feb 10 12:05:17 2012 +0100 15.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 15.3 @@ -1,61 +0,0 @@ 15.4 -/* 15.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 15.6 - * Licensed under GNU General Public License version 2 15.7 - * 15.8 - * Author: seanhalle@yahoo.com 15.9 - * 15.10 - * Created on November 14, 2009, 9:07 PM 15.11 - */ 15.12 - 15.13 -#ifndef _VMALLOC_H 15.14 -#define _VMALLOC_H 15.15 - 15.16 -#include <malloc.h> 15.17 -#include <inttypes.h> 15.18 -#include "VMS_primitive_data_types.h" 15.19 - 15.20 -typedef struct _MallocProlog MallocProlog; 15.21 - 15.22 -struct _MallocProlog 15.23 - { 15.24 - MallocProlog *nextChunkInFreeList; 15.25 - MallocProlog *prevChunkInFreeList; 15.26 - MallocProlog *nextHigherInMem; 15.27 - MallocProlog *nextLowerInMem; 15.28 - }; 15.29 -//MallocProlog 15.30 - 15.31 -typedef struct 15.32 - { 15.33 - MallocProlog *firstChunkInFreeList; 15.34 - int32 numInList; //TODO not used 15.35 - } 15.36 -FreeListHead; 15.37 - 15.38 -void * 15.39 -VMS__malloc( size_t sizeRequested ); 15.40 - 15.41 -void * 15.42 -VMS__malloc_aligned( size_t sizeRequested ); 15.43 - 15.44 -void 15.45 -VMS__free( void *ptrToFree ); 15.46 - 15.47 -/*Allocates memory from the external system -- higher overhead 15.48 - */ 15.49 -void * 15.50 -VMS__malloc_in_ext( size_t sizeRequested ); 15.51 - 15.52 -/*Frees memory that was allocated in the external system -- higher overhead 15.53 - */ 15.54 -void 15.55 -VMS__free_in_ext( void *ptrToFree ); 15.56 - 15.57 - 15.58 -MallocProlog * 15.59 -VMS_ext__create_free_list(); 15.60 - 15.61 -void 15.62 -VMS_ext__free_free_list( MallocProlog *freeListHead ); 15.63 - 15.64 -#endif 15.65 \ No newline at end of file
16.1 --- a/vutilities.c Fri Feb 10 12:05:17 2012 +0100 16.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 16.3 @@ -1,25 +0,0 @@ 16.4 -/* 16.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 16.6 - * Licensed under GNU General Public License version 2 16.7 - * 16.8 - * Author: seanhalle@yahoo.com 16.9 - * 16.10 - * Created on November 14, 2009, 9:07 PM 16.11 - */ 16.12 - 16.13 -#include <malloc.h> 16.14 -#include <stdlib.h> 16.15 - 16.16 -#include "VMS.h" 16.17 - 16.18 - 16.19 -inline char * 16.20 -VMS__strDup( char *str ) 16.21 - { char *retStr; 16.22 - 16.23 - retStr = VMS__malloc( strlen(str) + 1 ); 16.24 - if( str == NULL ) return str; 16.25 - strcpy( retStr, str ); 16.26 - 16.27 - return retStr; 16.28 - }
17.1 --- a/vutilities.h Fri Feb 10 12:05:17 2012 +0100 17.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 17.3 @@ -1,20 +0,0 @@ 17.4 -/* 17.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 17.6 - * Licensed under GNU General Public License version 2 17.7 - * 17.8 - * Author: seanhalle@yahoo.com 17.9 - * 17.10 - * Created on November 14, 2009, 9:07 PM 17.11 - */ 17.12 - 17.13 - 17.14 -#ifndef _UTILITIES_H 17.15 -#define _UTILITIES_H 17.16 - 17.17 -#include <string.h> 17.18 -#include "VMS_primitive_data_types.h" 17.19 - 17.20 -inline char * 17.21 -VMS__strDup( char *str ); 17.22 - 17.23 -#endif
