Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 61:984f7d78bfdf measure_brch
Merge See what happens -- merged test stuff into Nov 8 VMS version
| author | SeanHalle |
|---|---|
| date | Thu, 11 Nov 2010 06:19:51 -0800 |
| parents | 4fbc2165e493 7b799a46cc87 |
| children | |
| files | CoreLoop.c DESIGN_NOTES__VMS.txt MasterLoop.c VMS.c VMS.h |
| diffstat | 11 files changed, 1821 insertions(+), 478 deletions(-) [+] |
line diff
1.1 --- a/CoreLoop.c Tue Oct 26 18:31:34 2010 -0700 1.2 +++ b/CoreLoop.c Thu Nov 11 06:19:51 2010 -0800 1.3 @@ -41,10 +41,32 @@ 1.4 VMSQueueStruc *readyToAnimateQ; 1.5 unsigned long coreMask; //has 1 in bit positions of allowed cores 1.6 int errorCode; 1.7 - 1.8 + 1.9 + //work-stealing struc on stack to prevent false-sharing in cache-line 1.10 + volatile GateStruc gate; 1.11 + //preGateProgress, waitProgress, exitProgress, gateClosed; 1.12 + 1.13 + 1.14 coreLoopThdParams = (ThdParams *)paramsIn; 1.15 thisCoresIdx = coreLoopThdParams->coreNum; 1.16 1.17 + gate.gateClosed = FALSE; 1.18 + gate.preGateProgress = 0; 1.19 + gate.waitProgress = 0; 1.20 + gate.exitProgress = 0; 1.21 + _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = &gate;//race @startup 1.22 + 1.23 + //wait until signalled that setup is complete 1.24 + pthread_mutex_lock( &suspendLock ); 1.25 + while( !(_VMSMasterEnv->setupComplete) ) 1.26 + { 1.27 + pthread_cond_wait( &suspend_cond, 1.28 + &suspendLock ); 1.29 + } 1.30 + pthread_mutex_unlock( &suspendLock ); 1.31 + 1.32 + //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); 1.33 + 1.34 //set thread affinity 1.35 //Linux requires pinning thd to core inside thread-function 1.36 //Designate a core by a 1 in bit-position corresponding to the core 1.37 @@ -53,25 +75,9 @@ 1.38 pthread_t selfThd = pthread_self(); 1.39 errorCode = 1.40 pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); 1.41 - 1.42 + 1.43 if(errorCode){ printf("\nset affinity failure\n"); exit(0); } 1.44 1.45 - //measure offsets between TSCs 1.46 - //Core 0 is the reference core, the rest react to it. 1.47 - if( thisCoresIdx == 0 ) measureTSCOffsetsAsCore0(); 1.48 - else measureTSCOffsetsAsRemoteCore( thisCoresIdx ); 1.49 - 1.50 - //wait until signalled that setup is complete 1.51 - pthread_mutex_lock( &suspendLock ); 1.52 - while( !(_VMSMasterEnv->setupComplete) ) 1.53 - { pthread_cond_wait( &suspend_cond, &suspendLock ); 1.54 - } 1.55 - pthread_mutex_unlock( &suspendLock ); 1.56 - 1.57 - 1.58 - //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); 1.59 - 1.60 - 1.61 1.62 //Save addr of "end core loop" label - jump to it to shut down coreloop 1.63 //To get label addr in non-gcc compiler, can trick it by making a call 1.64 @@ -88,82 +94,64 @@ 1.65 1.66 // Get to work! -- virt procr jumps back here when suspends 1.67 //Note, have to restore the frame-pointer before jump to here, to get 1.68 - // this code to work right (readyToAnimateQ and so forth are frame-ptr 1.69 - // relative) 1.70 + // this code to work right (readyToAnimateQ and so forth are frame-ptr relative) 1.71 CoreLoopStartPt: 1.72 1.73 //Get virtual processor from queue 1.74 - //_VMSWorkQ must be a global, static volatile var, so not kept in reg, 1.75 + //The Q must be a global, static volatile var, so not kept in reg, 1.76 // which forces reloading the pointer after each jmp to this point 1.77 readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 1.78 1.79 - currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ ); 1.80 + #ifdef USE_WORK_STEALING 1.81 + //Alg for work-stealing designed to make common case fast. Comment 1.82 + // in stealer code explains. 1.83 + gate.preGateProgress++; 1.84 + if( gate.gateClosed ) 1.85 + { //now, set coreloop's progress, so stealer can see that core loop 1.86 + // has made it into the waiting area. 1.87 + gate.waitProgress = gate.preGateProgress; 1.88 + while( gate.gateClosed ) /*busy wait*/; 1.89 + } 1.90 + 1.91 + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 1.92 + 1.93 + //Set the coreloop's progress, so stealer can see it has made it out 1.94 + // of the protected area 1.95 + gate.exitProgress = gate.preGateProgress; 1.96 + #else 1.97 + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 1.98 + #endif 1.99 + 1.100 + if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 1.101 + 1.102 int tries = 0; int gotLock = 0; 1.103 - while( currPr == NULL ) 1.104 - { //no VPs ready to animate, so run MasterVP --later make "try Master" 1.105 - // VPs & put one in every queue at strategic point -- so have work 1.106 - // avail if don't get lock & short-circuit out of it if master has 1.107 - // recently run on another core 1.108 - //TODO: perf -- "try Master" VP that checks if should run Master Fn 1.109 - //But just letting queue run empty is quickest to see if pinning VP 1.110 - // to core will solve the bizarre random seg-faults in system stack. 1.111 - 1.112 - //check if get the MasterLock 1.113 + while( currPr == NULL ) //if queue was empty, enter get masterLock loop 1.114 + { //queue was empty, so get master lock 1.115 gotLock = __sync_bool_compare_and_swap( &(_VMSMasterEnv->masterLock), \ 1.116 - UNLOCKED, LOCKED ); 1.117 - 1.118 + UNLOCKED, LOCKED ); 1.119 if( gotLock ) 1.120 - { 1.121 - //run own MasterVP -- when its done, unlocks MasterLock and 1.122 - // jumps back to coreLoops's startPt 1.123 + { //run own MasterVP -- jmps to coreLoops startPt when done 1.124 currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 1.125 - addToHist( tries, _VMSMasterEnv->stats->masterLockHist ); 1.126 + if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 1.127 + { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); 1.128 + pthread_yield(); 1.129 + } 1.130 + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 1.131 break; //end while -- have a VP to animate now 1.132 } 1.133 1.134 - tries++; 1.135 - 1.136 - if( tries % READYTOANIMATE_RETRIES == 0 ) pthread_yield(); 1.137 + tries++; //if too many, means master on other core taking too long 1.138 + if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } 1.139 } 1.140 1.141 - //switch to virt procr's stack and frame ptr then jump to virt procr fn 1.142 - void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \ 1.143 - *coreLoopStackPtrAddr; 1.144 - 1.145 - stackPtr = currPr->stackPtr; 1.146 - framePtr = currPr->framePtr; 1.147 - jmpPt = currPr->nextInstrPt; 1.148 - coreLoopFramePtrAddr = &(currPr->coreLoopFramePtr); 1.149 - coreLoopStackPtrAddr = &(currPr->coreLoopStackPtr); 1.150 1.151 - //Save the core loop's stack and frame pointers into virt procr struct 1.152 - // then switch to stack ptr and frame ptr of virt procr & jmp to it 1.153 - //This was a pain to get right because GCC converts the "(jmpPt)" to 1.154 - // frame-relative mem-op -- so generated machine code first changed the 1.155 - // frame pointer, then tried to jump to an addr stored on stack, which 1.156 - // it accessed as an offset from frame-ptr! (wrong frame-ptr now) 1.157 - //Explicitly loading into eax before changing frame-ptr fixed it 1.158 - //Also, it turns "(currPr->coreLoopFramePtr)" into a temporary on the 1.159 - // stack, so "movl %%ebp, %0" saves to the temp, NOT the data-struc! 1.160 - asm volatile("movl %0, %%eax; \ 1.161 - movl %%esp, (%%eax); \ 1.162 - movl %1, %%eax; \ 1.163 - movl %%ebp, (%%eax); \ 1.164 - movl %2, %%eax; \ 1.165 - movl %3, %%esp; \ 1.166 - movl %4, %%ebp; \ 1.167 - jmp %%eax" \ 1.168 - /* outputs */ : "=g"(coreLoopStackPtrAddr), \ 1.169 - "=g"(coreLoopFramePtrAddr) \ 1.170 - /* inputs */ : "g" (jmpPt), "g" (stackPtr), "g" (framePtr) \ 1.171 - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ 1.172 - ); 1.173 + SwitchToVP( currPr ) 1.174 1.175 //=========== jmp to here when want to shut down the VMS system ========== 1.176 CoreLoopEndPt: 1.177 //first free shutdown VP that jumped here -- it first restores the 1.178 // coreloop's stack, so addr of currPr in stack frame is still correct 1.179 - VMS__handle_dissipate_reqst( currPr ); 1.180 + VMS__dissipate_procr( currPr ); 1.181 pthread_exit( NULL ); 1.182 } 1.183 1.184 @@ -195,62 +183,33 @@ 1.185 _VMSMasterEnv->coreLoopStartPt = &&SeqCoreLoopStartPt; 1.186 _VMSMasterEnv->coreLoopEndPt = &&SeqCoreLoopEndPt; 1.187 1.188 - //Core loop has no values live upon CoreLoopStartPt except 1.189 - // readyToAnimateQ 1.190 + //Core loop has no values live upon CoreLoopStartPt except readyToAnimateQ 1.191 // every value in the code is defined by a statement in core loop, 1.192 // after the start point -- with the one exception of _VMSWorkQ 1.193 1.194 1.195 // Get to work! -- virt procr jumps back here when done or suspends 1.196 //Note, have to restore the frame-pointer before jump to here, to get 1.197 - // this code to work right (readyToAnimateQ and so forth are frame-ptr 1.198 - // relative) 1.199 + // this code to work right (readyToAnimateQ and so forth are frame-ptr relative) 1.200 SeqCoreLoopStartPt: 1.201 1.202 //Get virtual processor from queue 1.203 //_VMSWorkQ must be a global, static volatile var, so not kept in reg, 1.204 // which forces reloading the pointer after each jmp to this point 1.205 readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; 1.206 - currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ ); 1.207 + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); 1.208 if( currPr == NULL ) 1.209 + { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) 1.210 + { printf("too many back to back MasterVP\n"); exit(1); } 1.211 + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; 1.212 + 1.213 currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; 1.214 - 1.215 + } 1.216 + else 1.217 + _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; 1.218 1.219 -// printf("core %d loop procr addr: %d\n", coreLoopThdParams->coreNum, \ 1.220 -// (int)currPr ); fflush(stdin); 1.221 1.222 - //switch to virt procr's stack and frame ptr then jump to virt procr 1.223 - void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \ 1.224 - *coreLoopStackPtrAddr; 1.225 - 1.226 - stackPtr = currPr->stackPtr; 1.227 - framePtr = currPr->framePtr; 1.228 - jmpPt = currPr->nextInstrPt; 1.229 - coreLoopFramePtrAddr = &(currPr->coreLoopFramePtr); 1.230 - coreLoopStackPtrAddr = &(currPr->coreLoopStackPtr); 1.231 - 1.232 - //Save the core loop's stack and frame pointers into virt procr struct 1.233 - // then switch to stack ptr and frame ptr of virt procr & jmp to it 1.234 - //This was a pain to get right because GCC converts the "(jmpPt)" to 1.235 - // frame-relative mem-op -- so generated machine code first changed the 1.236 - // frame pointer, then tried to jump to an addr stored on stack, which 1.237 - // it accessed as an offset from frame-ptr! (wrong frame-ptr now) 1.238 - //Explicitly loading into eax before changing frame-ptr fixed it 1.239 - //Also, it turns "(currPr->coreLoopFramePtr)" into a temporary on the 1.240 - // stack, so "movl %%ebp, %0" saves to the temp, NOT the data-struc! 1.241 - asm volatile("movl %0, %%eax; \ 1.242 - movl %%esp, (%%eax); \ 1.243 - movl %1, %%eax; \ 1.244 - movl %%ebp, (%%eax); \ 1.245 - movl %2, %%eax; \ 1.246 - movl %3, %%esp; \ 1.247 - movl %4, %%ebp; \ 1.248 - jmp %%eax" \ 1.249 - /* outputs */ : "=g"(coreLoopStackPtrAddr), \ 1.250 - "=g"(coreLoopFramePtrAddr) \ 1.251 - /* inputs */ : "g" (jmpPt), "g" (stackPtr), "g" (framePtr) \ 1.252 - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ 1.253 - ); 1.254 + SwitchToVP( currPr ) 1.255 1.256 //======================================================================== 1.257 //jmp to here when want to shut down the VMS system. A shutdown VP is 1.258 @@ -260,7 +219,7 @@ 1.259 // all the threads to die will proceed, gather the result, and 1.260 // return to the calling application. 1.261 SeqCoreLoopEndPt: 1.262 - VMS__handle_dissipate_reqst( currPr ); //free shutdown pr, that jmpd here 1.263 + VMS__dissipate_procr( currPr ); //free shutdown pr, that jmpd here 1.264 return; 1.265 } 1.266 1.267 @@ -380,6 +339,3 @@ 1.268 } 1.269 1.270 1.271 - 1.272 - 1.273 -
2.1 --- a/DESIGN_NOTES__VMS.txt Tue Oct 26 18:31:34 2010 -0700 2.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 2.3 @@ -1,2 +0,0 @@ 2.4 - 2.5 -Implement VMS this way:
3.1 --- a/MasterLoop.c Tue Oct 26 18:31:34 2010 -0700 3.2 +++ b/MasterLoop.c Thu Nov 11 06:19:51 2010 -0800 3.3 @@ -7,12 +7,19 @@ 3.4 3.5 3.6 #include <stdio.h> 3.7 -#include <malloc.h> 3.8 #include <stddef.h> 3.9 3.10 #include "VMS.h" 3.11 3.12 3.13 +//=========================================================================== 3.14 +void inline 3.15 +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 3.16 + VirtProcr *masterPr ); 3.17 + 3.18 +//=========================================================================== 3.19 + 3.20 + 3.21 3.22 /*This code is animated by the virtual Master processor. 3.23 * 3.24 @@ -65,7 +72,7 @@ 3.25 */ 3.26 void masterLoop( void *initData, VirtProcr *animatingPr ) 3.27 { 3.28 - int slotIdx; 3.29 + int32 slotIdx, numSlotsFilled; 3.30 VirtProcr *schedVirtPr; 3.31 SchedSlot *currSlot, **schedSlots; 3.32 MasterEnv *masterEnv; 3.33 @@ -75,7 +82,7 @@ 3.34 RequestHandler requestHandler; 3.35 void *semanticEnv; 3.36 3.37 - int thisCoresIdx; 3.38 + int32 thisCoresIdx; 3.39 VirtProcr *masterPr; 3.40 volatile VirtProcr *volatileMasterPr; 3.41 3.42 @@ -110,7 +117,7 @@ 3.43 3.44 masterEnv = _VMSMasterEnv; 3.45 3.46 -//TODO: check that compiles so that always re-define from frame-storage 3.47 + //GCC may optimize so doesn't always re-define from frame-storage 3.48 masterPr = volatileMasterPr; //just to make sure after jmp 3.49 thisCoresIdx = masterPr->coreAnimatedBy; 3.50 readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; 3.51 @@ -122,6 +129,7 @@ 3.52 3.53 3.54 //Poll each slot's Done flag 3.55 + numSlotsFilled = 0; 3.56 for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++) 3.57 { 3.58 currSlot = schedSlots[ slotIdx ]; 3.59 @@ -143,27 +151,21 @@ 3.60 { currSlot->procrAssignedToSlot = schedVirtPr; 3.61 schedVirtPr->schedSlot = currSlot; 3.62 currSlot->needsProcrAssigned = FALSE; 3.63 - 3.64 - writeSRSWQ( schedVirtPr, readyToAnimateQ ); 3.65 + numSlotsFilled += 1; 3.66 + 3.67 + writeVMSQ( schedVirtPr, readyToAnimateQ ); 3.68 } 3.69 } 3.70 } 3.71 3.72 + 3.73 + #ifdef USE_WORK_STEALING 3.74 + //If no slots filled, means no more work, look for work to steal. 3.75 + if( numSlotsFilled == 0 ) 3.76 + { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr ); 3.77 + } 3.78 + #endif 3.79 3.80 - //Save stack ptr and frame, restore CoreLoop's stack and frame, 3.81 - // and clear the MasterLock 3.82 - //TODO: cafefully verify don't need to force saving anything to stack 3.83 - // before jumping back to core loop. 3.84 - void *stackPtrAddr, *framePtrAddr, *masterLockAddr; 3.85 - void *jmpPt, *coreLoopFramePtr, *coreLoopStackPtr; 3.86 - 3.87 - stackPtrAddr = &(masterPr->stackPtr); 3.88 - framePtrAddr = &(masterPr->framePtr); 3.89 - masterLockAddr = &(_VMSMasterEnv->masterLock); 3.90 - 3.91 - jmpPt = _VMSMasterEnv->coreLoopStartPt; 3.92 - coreLoopFramePtr = masterPr->coreLoopFramePtr;//need this only 3.93 - coreLoopStackPtr = masterPr->coreLoopStackPtr;//shouldn't need -- safety 3.94 3.95 //============================= MEASUREMENT STUFF ======================== 3.96 #ifdef MEAS__TIME_MASTER 3.97 @@ -172,21 +174,183 @@ 3.98 #endif 3.99 //======================================================================== 3.100 3.101 - asm volatile("movl %0, %%eax; \ 3.102 - movl %%esp, (%%eax); \ 3.103 - movl %1, %%eax; \ 3.104 - movl %%ebp, (%%eax); \ 3.105 - movl %2, %%ebx; \ 3.106 - movl %3, %%eax; \ 3.107 - movl %4, %%esp; \ 3.108 - movl %5, %%ebp; \ 3.109 - movl $0x0, (%%ebx); \ 3.110 - jmp %%eax;" \ 3.111 - /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr), \ 3.112 - "=g"(masterLockAddr) \ 3.113 - /* inputs */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\ 3.114 - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ 3.115 - );//can probably make clobber list empty -- but safe for now 3.116 + 3.117 + masterSwitchToCoreLoop( masterPr ) 3.118 } 3.119 3.120 3.121 + 3.122 +/*This has a race condition -- the coreloops are accessing their own queues 3.123 + * at the same time that this work-stealer on a different core is trying to 3.124 + */ 3.125 +void inline 3.126 +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, 3.127 + VirtProcr *masterPr ) 3.128 + { 3.129 + VirtProcr *stolenPr; 3.130 + int32 coreIdx, i; 3.131 + VMSQueueStruc *currQ; 3.132 + 3.133 + stolenPr = NULL; 3.134 + coreIdx = masterPr->coreAnimatedBy; 3.135 + for( i = 0; i < NUM_CORES -1; i++ ) 3.136 + { 3.137 + if( coreIdx >= NUM_CORES -1 ) 3.138 + { coreIdx = 0; 3.139 + } 3.140 + else 3.141 + { coreIdx++; 3.142 + } 3.143 + currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 3.144 + if( numInVMSQ( currQ ) > 0 ) 3.145 + { stolenPr = readVMSQ (currQ ); 3.146 + break; 3.147 + } 3.148 + } 3.149 + 3.150 + if( stolenPr != NULL ) 3.151 + { currSlot->procrAssignedToSlot = stolenPr; 3.152 + stolenPr->schedSlot = currSlot; 3.153 + currSlot->needsProcrAssigned = FALSE; 3.154 + 3.155 + writeVMSQ( stolenPr, readyToAnimateQ ); 3.156 + } 3.157 + } 3.158 + 3.159 +/*This algorithm makes the common case fast. Make the coreloop passive, 3.160 + * and show its progress. Make the stealer control a gate that coreloop 3.161 + * has to pass. 3.162 + *To avoid interference, only one stealer at a time. Use a global 3.163 + * stealer-lock. 3.164 + * 3.165 + *The pattern is based on a gate -- stealer shuts the gate, then monitors 3.166 + * to be sure any already past make it all the way out, before starting. 3.167 + *So, have a "progress" measure just before the gate, then have two after it, 3.168 + * one is in a "waiting room" outside the gate, the other is at the exit. 3.169 + *Then, the stealer first shuts the gate, then checks the progress measure 3.170 + * outside it, then looks to see if the progress measure at the exit is the 3.171 + * same. If yes, it knows the protected area is empty 'cause no other way 3.172 + * to get in and the last to get in also exited. 3.173 + *If the progress measure at the exit is not the same, then the stealer goes 3.174 + * into a loop checking both the waiting-area and the exit progress-measures 3.175 + * until one of them shows the same as the measure outside the gate. Might 3.176 + * as well re-read the measure outside the gate each go around, just to be 3.177 + * sure. It is guaranteed that one of the two will eventually match the one 3.178 + * outside the gate. 3.179 + * 3.180 + *Here's an informal proof of correctness: 3.181 + *The gate can be closed at any point, and have only four cases: 3.182 + * 1) coreloop made it past the gate-closing but not yet past the exit 3.183 + * 2) coreloop made it past the pre-gate progress update but not yet past 3.184 + * the gate, 3.185 + * 3) coreloop is right before the pre-gate update 3.186 + * 4) coreloop is past the exit and far from the pre-gate update. 3.187 + * 3.188 + * Covering the cases in reverse order, 3.189 + * 4) is not a problem -- stealer will read pre-gate progress, see that it 3.190 + * matches exit progress, and the gate is closed, so stealer can proceed. 3.191 + * 3) stealer will read pre-gate progress just after coreloop updates it.. 3.192 + * so stealer goes into a loop until the coreloop causes wait-progress 3.193 + * to match pre-gate progress, so then stealer can proceed 3.194 + * 2) same as 3.. 3.195 + * 1) stealer reads pre-gate progress, sees that it's different than exit, 3.196 + * so goes into loop until exit matches pre-gate, now it knows coreloop 3.197 + * is not in protected and cannot get back in, so can proceed. 3.198 + * 3.199 + *Implementation for the stealer: 3.200 + * 3.201 + *First, acquire the stealer lock -- only cores with no work to do will 3.202 + * compete to steal, so not a big performance penalty having only one -- 3.203 + * will rarely have multiple stealers in a system with plenty of work -- and 3.204 + * in a system with little work, it doesn't matter. 3.205 + * 3.206 + *Note, have single-reader, single-writer pattern for all variables used to 3.207 + * communicate between stealer and victims 3.208 + * 3.209 + *So, scan the queues of the core loops, until find non-empty. Each core 3.210 + * has its own list that it scans. The list goes in order from closest to 3.211 + * furthest core, so it steals first from close cores. Later can add 3.212 + * taking info from the app about overlapping footprints, and scan all the 3.213 + * others then choose work with the most footprint overlap with the contents 3.214 + * of this core's cache. 3.215 + * 3.216 + *Now, have a victim want to take work from. So, shut the gate in that 3.217 + * coreloop, by setting the "gate closed" var on its stack to TRUE. 3.218 + *Then, read the core's pre-gate progress and compare to the core's exit 3.219 + * progress. 3.220 + *If same, can proceed to take work from the coreloop's queue. When done, 3.221 + * write FALSE to gate closed var. 3.222 + *If different, then enter a loop that reads the pre-gate progress, then 3.223 + * compares to exit progress then to wait progress. When one of two 3.224 + * matches, proceed. Take work from the coreloop's queue. When done, 3.225 + * write FALSE to the gate closed var. 3.226 + * 3.227 + */ 3.228 +void inline 3.229 +gateProtected_stealWorkInto( SchedSlot *currSlot, 3.230 + VMSQueueStruc *myReadyToAnimateQ, 3.231 + VirtProcr *masterPr ) 3.232 + { 3.233 + VirtProcr *stolenPr; 3.234 + int32 coreIdx, i, haveAVictim, gotLock; 3.235 + VMSQueueStruc *victimsQ; 3.236 + 3.237 + volatile GateStruc *vicGate; 3.238 + int32 coreMightBeInProtected; 3.239 + 3.240 + 3.241 + 3.242 + //see if any other cores have work available to steal 3.243 + haveAVictim = FALSE; 3.244 + coreIdx = masterPr->coreAnimatedBy; 3.245 + for( i = 0; i < NUM_CORES -1; i++ ) 3.246 + { 3.247 + if( coreIdx >= NUM_CORES -1 ) 3.248 + { coreIdx = 0; 3.249 + } 3.250 + else 3.251 + { coreIdx++; 3.252 + } 3.253 + victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; 3.254 + if( numInVMSQ( victimsQ ) > 0 ) 3.255 + { haveAVictim = TRUE; 3.256 + vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ]; 3.257 + break; 3.258 + } 3.259 + } 3.260 + if( !haveAVictim ) return; //no work to steal, exit 3.261 + 3.262 + //have a victim core, now get the stealer-lock 3.263 + gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock), 3.264 + UNLOCKED, LOCKED ); 3.265 + if( !gotLock ) return; //go back to core loop, which will re-start master 3.266 + 3.267 + 3.268 + //====== Start Gate-protection ======= 3.269 + vicGate->gateClosed = TRUE; 3.270 + coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress; 3.271 + while( coreMightBeInProtected ) 3.272 + { //wait until sure 3.273 + if( vicGate->preGateProgress == vicGate->waitProgress ) 3.274 + coreMightBeInProtected = FALSE; 3.275 + if( vicGate->preGateProgress == vicGate->exitProgress ) 3.276 + coreMightBeInProtected = FALSE; 3.277 + } 3.278 + 3.279 + stolenPr = readVMSQ ( victimsQ ); 3.280 + 3.281 + vicGate->gateClosed = FALSE; 3.282 + //======= End Gate-protection ======= 3.283 + 3.284 + 3.285 + if( stolenPr != NULL ) //victim could have been in protected and taken 3.286 + { currSlot->procrAssignedToSlot = stolenPr; 3.287 + stolenPr->schedSlot = currSlot; 3.288 + currSlot->needsProcrAssigned = FALSE; 3.289 + 3.290 + writeVMSQ( stolenPr, myReadyToAnimateQ ); 3.291 + } 3.292 + 3.293 + //unlock the work stealing lock 3.294 + _VMSMasterEnv->workStealingLock = UNLOCKED; 3.295 + }
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/SwitchAnimators.h Thu Nov 11 06:19:51 2010 -0800 4.3 @@ -0,0 +1,138 @@ 4.4 +/* 4.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 4.6 + * Licensed under GNU General Public License version 2 4.7 + * 4.8 + * Author: seanhalle@yahoo.com 4.9 + * 4.10 + */ 4.11 + 4.12 +#ifndef _SwitchAnimators_H 4.13 +#define _SwitchAnimators_H 4.14 +#define __USE_GNU 4.15 + 4.16 +/*Isolating code for switching between animators within these macros -- at 4.17 + * some point will make switches to compile for 32 bit or for 64 bit, which 4.18 + * having these isolated will make cleaner 4.19 + * 4.20 + *This also makes it easier to change architectures, at some point 4.21 + *And it cleans the code up, having the ugly assembly out of the way 4.22 + */ 4.23 + 4.24 +//=========================== MasterVP to CoreLoop ========================== 4.25 +// 4.26 + //Save stack ptr and frame, restore CoreLoop's stack and frame, 4.27 + // and clear the MasterLock 4.28 + //GCC's -O3 messes with this -- go through generated -- protect somehow 4.29 + // 4.30 +#define masterSwitchToCoreLoop( masterPr ) \ 4.31 + void *stackPtrAddr, *framePtrAddr, *masterLockAddr; \ 4.32 + void *jmpPt, *coreLoopFramePtr, *coreLoopStackPtr; \ 4.33 +\ 4.34 + stackPtrAddr = &(masterPr->stackPtr); \ 4.35 + framePtrAddr = &(masterPr->framePtr); \ 4.36 + masterLockAddr = &(_VMSMasterEnv->masterLock); \ 4.37 +\ 4.38 + jmpPt = _VMSMasterEnv->coreLoopStartPt; \ 4.39 + coreLoopFramePtr = masterPr->coreLoopFramePtr; \ 4.40 + coreLoopStackPtr = masterPr->coreLoopStackPtr; \ 4.41 +\ 4.42 + asm volatile("movl %0, %%eax; \ 4.43 + movl %%esp, (%%eax); \ 4.44 + movl %1, %%eax; \ 4.45 + movl %%ebp, (%%eax); \ 4.46 + movl %2, %%ebx; \ 4.47 + movl %3, %%eax; \ 4.48 + movl %4, %%esp; \ 4.49 + movl %5, %%ebp; \ 4.50 + movl $0x0, (%%ebx); \ 4.51 + jmp %%eax;" \ 4.52 + /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr), \ 4.53 + "=g"(masterLockAddr) \ 4.54 + /* inputs */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\ 4.55 + /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ 4.56 + );//can probably make clobber list empty -- but safe for now 4.57 + 4.58 + 4.59 +//=========================== SlaveVP to CoreLoop =========================== 4.60 +// 4.61 + 4.62 +#define SwitchToCoreLoop( animatingPr ) \ 4.63 + void *jmpPt, *stackPtrAddr, *framePtrAddr, *coreLoopStackPtr; \ 4.64 + void *coreLoopFramePtr; \ 4.65 +\ 4.66 + stackPtrAddr = &(animatingPr->stackPtr); \ 4.67 + framePtrAddr = &(animatingPr->framePtr); \ 4.68 +\ 4.69 + jmpPt = _VMSMasterEnv->coreLoopStartPt; \ 4.70 + coreLoopFramePtr = animatingPr->coreLoopFramePtr; \ 4.71 + coreLoopStackPtr = animatingPr->coreLoopStackPtr; \ 4.72 +\ 4.73 + /*Save the virt procr's stack and frame ptrs*/ \ 4.74 + asm volatile("movl %0, %%eax; \ 4.75 + movl %%esp, (%%eax); \ 4.76 + movl %1, %%eax; \ 4.77 + movl %%ebp, (%%eax) "\ 4.78 + /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr) \ 4.79 + /* inputs */ : \ 4.80 + /* clobber */ : "%eax" \ 4.81 + ); \ 4.82 +\ 4.83 + /*restore coreloop's frame ptr, then jump back to "start" of core loop*/\ 4.84 + /*Note, GCC compiles to assembly that saves esp and ebp in the stack*/ \ 4.85 + /* frame -- so have to explicitly do assembly that saves to memory*/ \ 4.86 + asm volatile("movl %0, %%eax; \ 4.87 + movl %1, %%esp; \ 4.88 + movl %2, %%ebp; \ 4.89 + jmp %%eax " \ 4.90 + /* outputs */ : \ 4.91 + /* inputs */ : "m" (jmpPt), "m"(coreLoopStackPtr), "m"(coreLoopFramePtr)\ 4.92 + /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi" \ 4.93 + ); 4.94 + //list everything as clobbered to force GCC to save all 4.95 + // live vars that are in regs on stack before this 4.96 + // assembly, so that stack pointer is correct, before jmp 4.97 + 4.98 + 4.99 + 4.100 +//============================== CoreLoop to VP ============================= 4.101 +// 4.102 + //Save the core loop's stack and frame pointers into virt procr struct 4.103 + // then switch to stack ptr and frame ptr of virt procr & jmp to it 4.104 + //This was a pain to get right because GCC converts the "(jmpPt)" to 4.105 + // frame-relative mem-op -- so generated machine code first changed the 4.106 + // frame pointer, then tried to jump to an addr stored on stack, which 4.107 + // it accessed as an offset from frame-ptr! (wrong frame-ptr now) 4.108 + //Explicitly loading into eax before changing frame-ptr fixed it 4.109 + //Also, it turns "(currPr->coreLoopFramePtr)" into a temporary on the 4.110 + // stack, so "movl %%ebp, %0" saves to the temp, NOT the data-struc! 4.111 + 4.112 + 4.113 + //switch to virt procr's stack and frame ptr then jump to virt procr fn 4.114 + 4.115 +#define SwitchToVP( currPr ) \ 4.116 + void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \ 4.117 + *coreLoopStackPtrAddr; \ 4.118 +\ 4.119 + stackPtr = currPr->stackPtr; \ 4.120 + framePtr = currPr->framePtr; \ 4.121 + jmpPt = currPr->nextInstrPt; \ 4.122 + coreLoopFramePtrAddr = &(currPr->coreLoopFramePtr); \ 4.123 + coreLoopStackPtrAddr = &(currPr->coreLoopStackPtr); \ 4.124 +\ 4.125 + asm volatile("movl %0, %%eax; \ 4.126 + movl %%esp, (%%eax); \ 4.127 + movl %1, %%eax; \ 4.128 + movl %%ebp, (%%eax); \ 4.129 + movl %2, %%eax; \ 4.130 + movl %3, %%esp; \ 4.131 + movl %4, %%ebp; \ 4.132 + jmp %%eax" \ 4.133 + /* outputs */ : "=g"(coreLoopStackPtrAddr), \ 4.134 + "=g"(coreLoopFramePtrAddr) \ 4.135 + /* inputs */ : "g" (jmpPt), "g" (stackPtr), "g" (framePtr) \ 4.136 + /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ 4.137 + ); 4.138 + 4.139 + 4.140 +#endif /* _SwitchAnimators_H */ 4.141 +
5.1 --- a/VMS.c Tue Oct 26 18:31:34 2010 -0700 5.2 +++ b/VMS.c Thu Nov 11 06:19:51 2010 -0800 5.3 @@ -6,7 +6,9 @@ 5.4 5.5 #include <stdio.h> 5.6 #include <stdlib.h> 5.7 +#include <string.h> 5.8 #include <malloc.h> 5.9 +#include <sys/time.h> 5.10 5.11 #include "VMS.h" 5.12 #include "Queue_impl/BlockingQueue.h" 5.13 @@ -28,6 +30,12 @@ 5.14 void 5.15 create_the_coreLoop_OS_threads(); 5.16 5.17 +MallocProlog * 5.18 +create_free_list(); 5.19 + 5.20 +void 5.21 +endOSThreadFn( void *initData, VirtProcr *animatingPr ); 5.22 + 5.23 pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER; 5.24 pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER; 5.25 5.26 @@ -83,34 +91,43 @@ 5.27 int coreIdx; 5.28 VirtProcr **masterVPs; 5.29 SchedSlot ***allSchedSlots; //ptr to array of ptrs 5.30 - 5.31 + 5.32 + 5.33 //Make the master env, which holds everything else 5.34 _VMSMasterEnv = malloc( sizeof(MasterEnv) ); 5.35 + 5.36 + //Very first thing put into the master env is the free-list, seeded 5.37 + // with a massive initial chunk of memory. 5.38 + //After this, all other mallocs are VMS__malloc. 5.39 + _VMSMasterEnv->freeListHead = VMS_ext__create_free_list(); 5.40 + 5.41 + //===================== Only VMS__malloc after this ==================== 5.42 masterEnv = _VMSMasterEnv; 5.43 - //Need to set start pt here 'cause used by seed procr, which is created 5.44 - // before the first core loop starts up. -- not sure how yet.. 5.45 -// masterEnv->coreLoopStartPt = ; 5.46 -// masterEnv->coreLoopEndPt = ; 5.47 5.48 //Make a readyToAnimateQ for each core loop 5.49 - readyToAnimateQs = malloc( NUM_CORES * sizeof(VMSQueueStruc *) ); 5.50 - masterVPs = malloc( NUM_CORES * sizeof(VirtProcr *) ); 5.51 + readyToAnimateQs = VMS__malloc( NUM_CORES * sizeof(VMSQueueStruc *) ); 5.52 + masterVPs = VMS__malloc( NUM_CORES * sizeof(VirtProcr *) ); 5.53 5.54 //One array for each core, 3 in array, core's masterVP scheds all 5.55 - allSchedSlots = malloc( NUM_CORES * sizeof(SchedSlot *) ); 5.56 + allSchedSlots = VMS__malloc( NUM_CORES * sizeof(SchedSlot *) ); 5.57 5.58 + _VMSMasterEnv->numProcrsCreated = 0; //used by create procr 5.59 for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 5.60 - { 5.61 - readyToAnimateQs[ coreIdx ] = makeSRSWQ(); 5.62 + { 5.63 + readyToAnimateQs[ coreIdx ] = makeVMSQ(); 5.64 5.65 - //Q: should give masterVP core-specific into as its init data? 5.66 + //Q: should give masterVP core-specific info as its init data? 5.67 masterVPs[ coreIdx ] = VMS__create_procr( &masterLoop, masterEnv ); 5.68 masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx; 5.69 allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core 5.70 + _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; 5.71 + _VMSMasterEnv->workStealingGates[ coreIdx ] = NULL; 5.72 } 5.73 _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs; 5.74 _VMSMasterEnv->masterVPs = masterVPs; 5.75 + _VMSMasterEnv->masterLock = UNLOCKED; 5.76 _VMSMasterEnv->allSchedSlots = allSchedSlots; 5.77 + _VMSMasterEnv->workStealingLock = UNLOCKED; 5.78 5.79 //============================= MEASUREMENT STUFF ======================== 5.80 #ifdef MEAS__TIME_MASTER 5.81 @@ -125,59 +142,33 @@ 5.82 // because coreLoop now controls -- animates its masterVP when no work 5.83 5.84 5.85 - //==================== malloc substitute ======================== 5.86 - // 5.87 - //Testing whether malloc is using thread-local storage and therefore 5.88 - // causing unreliable behavior. 5.89 - //Just allocate a massive chunk of memory and roll own malloc/free and 5.90 - // make app use VMS__malloc_to, which will suspend and perform malloc 5.91 - // in the master, taking from this massive chunk. 5.92 + //============================= MEASUREMENT STUFF ======================== 5.93 + #ifdef STATS__TURN_ON_PROBES 5.94 + _VMSMasterEnv->dynIntervalProbesInfo = 5.95 + makePrivDynArrayOfSize( &(_VMSMasterEnv->intervalProbes), 200); 5.96 5.97 -// initFreeList(); 5.98 + _VMSMasterEnv->probeNameHashTbl = makeHashTable( 1000, &VMS__free ); 5.99 + 5.100 + //put creation time directly into master env, for fast retrieval 5.101 + struct timeval timeStamp; 5.102 + gettimeofday( &(timeStamp), NULL); 5.103 + _VMSMasterEnv->createPtInSecs = 5.104 + timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0); 5.105 + #endif 5.106 + //======================================================================== 5.107 5.108 } 5.109 5.110 -/* 5.111 -void 5.112 -initMasterMalloc() 5.113 - { 5.114 - _VMSMasterEnv->mallocChunk = malloc( MASSIVE_MALLOC_SIZE ); 5.115 - 5.116 - //The free-list element is the first several locations of an 5.117 - // allocated chunk -- the address given to the application is pre- 5.118 - // pended with both the ownership structure and the free-list struc. 5.119 - //So, write the values of these into the first locations of 5.120 - // mallocChunk -- which marks it as free & puts in its size. 5.121 - listElem = (FreeListElem *)_VMSMasterEnv->mallocChunk; 5.122 - listElem->size = MASSIVE_MALLOC_SIZE - NUM_PREPEND_BYTES 5.123 - listElem->next = NULL; 5.124 - } 5.125 - 5.126 -void 5.127 -dissipateMasterMalloc() 5.128 - { 5.129 - //Just foo code -- to get going -- doing as if free list were link-list 5.130 - currElem = _VMSMasterEnv->freeList; 5.131 - while( currElem != NULL ) 5.132 - { 5.133 - nextElem = currElem->next; 5.134 - masterFree( currElem ); 5.135 - currElem = nextElem; 5.136 - } 5.137 - free( _VMSMasterEnv->freeList ); 5.138 - } 5.139 - */ 5.140 - 5.141 SchedSlot ** 5.142 create_sched_slots() 5.143 { SchedSlot **schedSlots; 5.144 int i; 5.145 5.146 - schedSlots = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) ); 5.147 + schedSlots = VMS__malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) ); 5.148 5.149 for( i = 0; i < NUM_SCHED_SLOTS; i++ ) 5.150 { 5.151 - schedSlots[i] = malloc( sizeof(SchedSlot) ); 5.152 + schedSlots[i] = VMS__malloc( sizeof(SchedSlot) ); 5.153 5.154 //Set state to mean "handling requests done, slot needs filling" 5.155 schedSlots[i]->workIsDone = FALSE; 5.156 @@ -192,9 +183,9 @@ 5.157 { int i; 5.158 for( i = 0; i < NUM_SCHED_SLOTS; i++ ) 5.159 { 5.160 - free( schedSlots[i] ); 5.161 + VMS__free( schedSlots[i] ); 5.162 } 5.163 - free( schedSlots ); 5.164 + VMS__free( schedSlots ); 5.165 } 5.166 5.167 5.168 @@ -203,7 +194,7 @@ 5.169 { 5.170 //======================================================================== 5.171 // Create the Threads 5.172 - int coreIdx, retCode, i; 5.173 + int coreIdx, retCode; 5.174 5.175 //create the arrays used to measure TSC offsets between cores 5.176 pongNums = malloc( NUM_CORES * sizeof( int ) ); 5.177 @@ -227,7 +218,7 @@ 5.178 5.179 //Make the threads that animate the core loops 5.180 for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 5.181 - { coreLoopThdParams[coreIdx] = malloc( sizeof(ThdParams) ); 5.182 + { coreLoopThdParams[coreIdx] = VMS__malloc( sizeof(ThdParams) ); 5.183 coreLoopThdParams[coreIdx]->coreNum = coreIdx; 5.184 5.185 retCode = 5.186 @@ -235,7 +226,7 @@ 5.187 thdAttrs, 5.188 &coreLoop, 5.189 (void *)(coreLoopThdParams[coreIdx]) ); 5.190 - if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(0);} 5.191 + if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);} 5.192 } 5.193 } 5.194 5.195 @@ -307,10 +298,11 @@ 5.196 * animator state to return to -- 5.197 * 5.198 */ 5.199 -VirtProcr * 5.200 -VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) 5.201 - { VirtProcr *newPr; 5.202 - char *stackLocs, *stackPtr; 5.203 +inline VirtProcr * 5.204 +create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, 5.205 + void *initialData, char *stackLocs ) 5.206 + { 5.207 + char *stackPtr; 5.208 5.209 //============================= MEASUREMENT STUFF ======================== 5.210 #ifdef MEAS__TIME_MASTER 5.211 @@ -318,23 +310,19 @@ 5.212 saveLowTimeStampCountInto( startStamp ); 5.213 #endif 5.214 //======================================================================== 5.215 - 5.216 - newPr = malloc( sizeof(VirtProcr) ); 5.217 - newPr->procrID = numProcrsCreated++; 5.218 - newPr->nextInstrPt = fnPtr; 5.219 - newPr->initialData = initialData; 5.220 - newPr->requests = NULL; 5.221 - newPr->schedSlot = NULL; 5.222 -// newPr->coreLoopStartPt = _VMSMasterEnv->coreLoopStartPt; 5.223 + newPr->startOfStack = stackLocs; 5.224 + newPr->procrID = _VMSMasterEnv->numProcrsCreated++; 5.225 + newPr->nextInstrPt = fnPtr; 5.226 + newPr->initialData = initialData; 5.227 + newPr->requests = NULL; 5.228 + newPr->schedSlot = NULL; 5.229 5.230 //fnPtr takes two params -- void *initData & void *animProcr 5.231 //alloc stack locations, make stackPtr be the highest addr minus room 5.232 // for 2 params + return addr. Return addr (NULL) is in loc pointed to 5.233 // by stackPtr, initData at stackPtr + 4 bytes, animatingPr just above 5.234 - stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); 5.235 - if(stackLocs == 0) {perror("error: malloc stack"); exit(1);} 5.236 - newPr->startOfStack = stackLocs; 5.237 stackPtr = ( (char *)stackLocs + VIRT_PROCR_STACK_SIZE - 0x10 ); 5.238 + 5.239 //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp 5.240 *( (int *)stackPtr + 2 ) = (int) newPr; //rightmost param -- 32bit pointer 5.241 *( (int *)stackPtr + 1 ) = (int) initialData; //next param to left 5.242 @@ -347,12 +335,48 @@ 5.243 saveLowTimeStampCountInto( endStamp ); 5.244 addIntervalToHist( startStamp, endStamp, 5.245 _VMSMasterEnv->stats->createHist ); 5.246 + //============================= MEASUREMENT STUFF ======================== 5.247 + #ifdef STATS__TURN_ON_PROBES 5.248 + struct timeval timeStamp; 5.249 + gettimeofday( &(timeStamp), NULL); 5.250 + newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0) - 5.251 + _VMSMasterEnv->createPtInSecs; 5.252 #endif 5.253 //======================================================================== 5.254 - 5.255 + 5.256 return newPr; 5.257 } 5.258 5.259 +inline VirtProcr * 5.260 +VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) 5.261 + { VirtProcr *newPr; 5.262 + char *stackLocs; 5.263 + 5.264 + newPr = VMS__malloc( sizeof(VirtProcr) ); 5.265 + stackLocs = VMS__malloc( VIRT_PROCR_STACK_SIZE ); 5.266 + if( stackLocs == 0 ) 5.267 + { perror("VMS__malloc stack"); exit(1); } 5.268 + 5.269 + return create_procr_helper( newPr, fnPtr, initialData, stackLocs ); 5.270 + } 5.271 + 5.272 +/* "ext" designates that it's for use outside the VMS system -- should only 5.273 + * be called from main thread or other thread -- never from code animated by 5.274 + * a VMS virtual processor. 5.275 + */ 5.276 +inline VirtProcr * 5.277 +VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) 5.278 + { VirtProcr *newPr; 5.279 + char *stackLocs; 5.280 + 5.281 + newPr = malloc( sizeof(VirtProcr) ); 5.282 + stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); 5.283 + if( stackLocs == 0 ) 5.284 + { perror("malloc stack"); exit(1); } 5.285 + 5.286 + return create_procr_helper( newPr, fnPtr, initialData, stackLocs ); 5.287 + } 5.288 + 5.289 5.290 /*there is a label inside this function -- save the addr of this label in 5.291 * the callingPr struc, as the pick-up point from which to start the next 5.292 @@ -365,8 +389,7 @@ 5.293 */ 5.294 void 5.295 VMS__suspend_procr( VirtProcr *animatingPr ) 5.296 - { void *jmpPt, *stackPtrAddr, *framePtrAddr, *coreLoopStackPtr; 5.297 - void *coreLoopFramePtr; 5.298 + { 5.299 5.300 //The request to master will cause this suspended virt procr to get 5.301 // scheduled again at some future point -- to resume, core loop jumps 5.302 @@ -376,24 +399,6 @@ 5.303 5.304 //return ownership of the virt procr and sched slot to Master virt pr 5.305 animatingPr->schedSlot->workIsDone = TRUE; 5.306 -// coreIdx = callingPr->coreAnimatedBy; 5.307 - 5.308 - stackPtrAddr = &(animatingPr->stackPtr); 5.309 - framePtrAddr = &(animatingPr->framePtr); 5.310 - 5.311 - jmpPt = _VMSMasterEnv->coreLoopStartPt; 5.312 - coreLoopFramePtr = animatingPr->coreLoopFramePtr;//need this only 5.313 - coreLoopStackPtr = animatingPr->coreLoopStackPtr;//safety 5.314 - 5.315 - //Save the virt procr's stack and frame ptrs, 5.316 - asm volatile("movl %0, %%eax; \ 5.317 - movl %%esp, (%%eax); \ 5.318 - movl %1, %%eax; \ 5.319 - movl %%ebp, (%%eax) "\ 5.320 - /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr) \ 5.321 - /* inputs */ : \ 5.322 - /* clobber */ : "%eax" \ 5.323 - ); 5.324 5.325 //=========================== Measurement stuff ======================== 5.326 #ifdef MEAS__TIME_STAMP_SUSP 5.327 @@ -402,20 +407,10 @@ 5.328 #endif 5.329 //======================================================================= 5.330 5.331 - //restore coreloop's frame ptr, then jump back to "start" of core loop 5.332 - //Note, GCC compiles to assembly that saves esp and ebp in the stack 5.333 - // frame -- so have to explicitly do assembly that saves to memory 5.334 - asm volatile("movl %0, %%eax; \ 5.335 - movl %1, %%esp; \ 5.336 - movl %2, %%ebp; \ 5.337 - jmp %%eax " \ 5.338 - /* outputs */ : \ 5.339 - /* inputs */ : "m" (jmpPt), "m"(coreLoopStackPtr), "m"(coreLoopFramePtr)\ 5.340 - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi" \ 5.341 - ); //list everything as clobbered to force GCC to save all 5.342 - // live vars that are in regs on stack before this 5.343 - // assembly, so that stack pointer is correct, before jmp 5.344 5.345 + SwitchToCoreLoop( animatingPr ) 5.346 + 5.347 + //======================================================================= 5.348 ResumePt: 5.349 #ifdef MEAS__TIME_STAMP_SUSP 5.350 //NOTE: only take low part of count -- do sanity check when take diff 5.351 @@ -427,6 +422,31 @@ 5.352 5.353 5.354 5.355 +/*For this implementation of VMS, it may not make much sense to have the 5.356 + * system of requests for creating a new processor done this way.. but over 5.357 + * the scope of single-master, multi-master, mult-tasking, OS-implementing, 5.358 + * distributed-memory, and so on, this gives VMS implementation a chance to 5.359 + * do stuff before suspend, in the AppVP, and in the Master before the plugin 5.360 + * is called, as well as in the lang-lib before this is called, and in the 5.361 + * plugin. So, this gives both VMS and language implementations a chance to 5.362 + * intercept at various points and do order-dependent stuff. 5.363 + *Having a standard VMSNewPrReqData struc allows the language to create and 5.364 + * free the struc, while VMS knows how to get the newPr if it wants it, and 5.365 + * it lets the lang have lang-specific data related to creation transported 5.366 + * to the plugin. 5.367 + */ 5.368 +void 5.369 +VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ) 5.370 + { VMSReqst req; 5.371 + 5.372 + req.reqType = createReq; 5.373 + req.semReqData = semReqData; 5.374 + req.nextReqst = reqstingPr->requests; 5.375 + reqstingPr->requests = &req; 5.376 + 5.377 + VMS__suspend_procr( reqstingPr ); 5.378 + } 5.379 + 5.380 5.381 /* 5.382 *This adds a request to dissipate, then suspends the processor so that the 5.383 @@ -450,81 +470,102 @@ 5.384 * pears -- making that suspend the last thing in the virt procr's trace. 5.385 */ 5.386 void 5.387 -VMS__dissipate_procr( VirtProcr *procrToDissipate ) 5.388 +VMS__send_dissipate_req( VirtProcr *procrToDissipate ) 5.389 + { VMSReqst req; 5.390 + 5.391 + req.reqType = dissipate; 5.392 + req.nextReqst = procrToDissipate->requests; 5.393 + procrToDissipate->requests = &req; 5.394 + 5.395 + VMS__suspend_procr( procrToDissipate ); 5.396 + } 5.397 + 5.398 + 5.399 +/* "ext" designates that it's for use outside the VMS system -- should only 5.400 + * be called from main thread or other thread -- never from code animated by 5.401 + * a VMS virtual processor. 5.402 + * 5.403 + *Use this version to dissipate VPs created outside the VMS system. 5.404 + */ 5.405 +void 5.406 +VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ) 5.407 + { 5.408 + //NOTE: initialData was given to the processor, so should either have 5.409 + // been alloc'd with VMS__malloc, or freed by the level above animPr. 5.410 + //So, all that's left to free here is the stack and the VirtProcr struc 5.411 + // itself 5.412 + //Note, should not stack-allocate initial data -- no guarantee, in 5.413 + // general that creating processor will outlive ones it creates. 5.414 + free( procrToDissipate->startOfStack ); 5.415 + free( procrToDissipate ); 5.416 + } 5.417 + 5.418 + 5.419 + 5.420 +/*This call's name indicates that request is malloc'd -- so req handler 5.421 + * has to free any extra requests tacked on before a send, using this. 5.422 + * 5.423 + * This inserts the semantic-layer's request data into standard VMS carrier 5.424 + * request data-struct that is mallocd. The sem request doesn't need to 5.425 + * be malloc'd if this is called inside the same call chain before the 5.426 + * send of the last request is called. 5.427 + * 5.428 + *The request handler has to call VMS__free_VMSReq for any of these 5.429 + */ 5.430 +inline void 5.431 +VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, 5.432 + VirtProcr *callingPr ) 5.433 { VMSReqst *req; 5.434 5.435 - req = malloc( sizeof(VMSReqst) ); 5.436 -// req->virtProcrFrom = callingPr; 5.437 - req->reqType = dissipate; 5.438 - req->nextReqst = procrToDissipate->requests; 5.439 - procrToDissipate->requests = req; 5.440 - 5.441 - VMS__suspend_procr( procrToDissipate ); 5.442 -} 5.443 - 5.444 - 5.445 -/*This inserts the semantic-layer's request data into standard VMS carrier 5.446 - */ 5.447 -inline void 5.448 -VMS__add_sem_request( void *semReqData, VirtProcr *callingPr ) 5.449 - { VMSReqst *req; 5.450 - 5.451 - req = malloc( sizeof(VMSReqst) ); 5.452 -// req->virtProcrFrom = callingPr; 5.453 - req->reqType = semantic; 5.454 - req->semReqData = semReqData; 5.455 - req->nextReqst = callingPr->requests; 5.456 + req = VMS__malloc( sizeof(VMSReqst) ); 5.457 + req->reqType = semantic; 5.458 + req->semReqData = semReqData; 5.459 + req->nextReqst = callingPr->requests; 5.460 callingPr->requests = req; 5.461 } 5.462 5.463 +/*This inserts the semantic-layer's request data into standard VMS carrier 5.464 + * request data-struct is allocated on stack of this call & ptr to it sent 5.465 + * to plugin 5.466 + *Then it does suspend, to cause request to be sent. 5.467 + */ 5.468 +inline void 5.469 +VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ) 5.470 + { VMSReqst req; 5.471 5.472 -/*Use this to get first request before starting request handler's loop 5.473 + req.reqType = semantic; 5.474 + req.semReqData = semReqData; 5.475 + req.nextReqst = callingPr->requests; 5.476 + callingPr->requests = &req; 5.477 + 5.478 + VMS__suspend_procr( callingPr ); 5.479 + } 5.480 + 5.481 + 5.482 +inline void 5.483 +VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ) 5.484 + { VMSReqst req; 5.485 + 5.486 + req.reqType = VMSSemantic; 5.487 + req.semReqData = semReqData; 5.488 + req.nextReqst = callingPr->requests; //gab any other preceeding 5.489 + callingPr->requests = &req; 5.490 + 5.491 + VMS__suspend_procr( callingPr ); 5.492 + } 5.493 + 5.494 + 5.495 +/* 5.496 */ 5.497 VMSReqst * 5.498 -VMS__take_top_request_from( VirtProcr *procrWithReq ) 5.499 - { VMSReqst *req; 5.500 - 5.501 - req = procrWithReq->requests; 5.502 - if( req == NULL ) return req; 5.503 - 5.504 - procrWithReq->requests = procrWithReq->requests->nextReqst; 5.505 - return req; 5.506 - } 5.507 - 5.508 -/*A subtle bug due to freeing then accessing "next" after freed caused this 5.509 - * form of call to be put in -- so call this at end of request handler loop 5.510 - * that iterates through the requests. 5.511 - */ 5.512 -VMSReqst * 5.513 -VMS__free_top_and_give_next_request_from( VirtProcr *procrWithReq ) 5.514 +VMS__take_next_request_out_of( VirtProcr *procrWithReq ) 5.515 { VMSReqst *req; 5.516 5.517 req = procrWithReq->requests; 5.518 if( req == NULL ) return NULL; 5.519 5.520 procrWithReq->requests = procrWithReq->requests->nextReqst; 5.521 - VMS__free_request( req ); 5.522 - return procrWithReq->requests; 5.523 - } 5.524 - 5.525 - 5.526 -//TODO: add a semantic-layer supplied "freer" for the semantic-data portion 5.527 -// of a request -- IE call with both a virt procr and a fn-ptr to request 5.528 -// freer (also maybe put sem request freer as a field in virt procr?) 5.529 -//MeasVMS relies right now on this only freeing VMS layer of request -- the 5.530 -// semantic portion of request is alloc'd and freed by request handler 5.531 -void 5.532 -VMS__free_request( VMSReqst *req ) 5.533 - { 5.534 - free( req ); 5.535 - } 5.536 - 5.537 - 5.538 - 5.539 -inline int 5.540 -VMS__isSemanticReqst( VMSReqst *req ) 5.541 - { 5.542 - return ( req->reqType == semantic ); 5.543 + return req; 5.544 } 5.545 5.546 5.547 @@ -534,36 +575,52 @@ 5.548 return req->semReqData; 5.549 } 5.550 5.551 -inline int 5.552 -VMS__isDissipateReqst( VMSReqst *req ) 5.553 - { 5.554 - return ( req->reqType == dissipate ); 5.555 - } 5.556 5.557 -inline int 5.558 -VMS__isCreateReqst( VMSReqst *req ) 5.559 - { 5.560 - return ( req->reqType == regCreated ); 5.561 - } 5.562 5.563 -void 5.564 -VMS__send_req_to_register_new_procr(VirtProcr *newPr, VirtProcr *reqstingPr) 5.565 - { VMSReqst *req; 5.566 +/* This is for OS requests and VMS infrastructure requests, such as to create 5.567 + * a probe -- a probe is inside the heart of VMS-core, it's not part of any 5.568 + * language -- but it's also a semantic thing that's triggered from and used 5.569 + * in the application.. so it crosses abstractions.. so, need some special 5.570 + * pattern here for handling such requests. 5.571 + * Doing this just like it were a second language sharing VMS-core. 5.572 + * 5.573 + * This is called from the language's request handler when it sees a request 5.574 + * of type VMSSemReq 5.575 + * 5.576 + * TODO: Later change this, to give probes their own separate plugin & have 5.577 + * VMS-core steer the request to appropriate plugin 5.578 + * Do the same for OS calls -- look later at it.. 5.579 + */ 5.580 +void inline 5.581 +VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, 5.582 + ResumePrFnPtr resumePrFnPtr ) 5.583 + { VMSSemReq *semReq; 5.584 + IntervalProbe *newProbe; 5.585 + int32 nameLen; 5.586 5.587 - req = malloc( sizeof(VMSReqst) ); 5.588 - req->reqType = regCreated; 5.589 - req->semReqData = newPr; 5.590 - req->nextReqst = reqstingPr->requests; 5.591 - reqstingPr->requests = req; 5.592 + semReq = req->semReqData; 5.593 5.594 - VMS__suspend_procr( reqstingPr ); 5.595 + newProbe = VMS__malloc( sizeof(IntervalProbe) ); 5.596 + nameLen = strlen( semReq->nameStr ); 5.597 + newProbe->nameStr = VMS__malloc( nameLen ); 5.598 + memcpy( newProbe->nameStr, semReq->nameStr, nameLen ); 5.599 + newProbe->hist = NULL; 5.600 + newProbe->schedChoiceWasRecorded = FALSE; 5.601 + 5.602 + //This runs in masterVP, so no race-condition worries 5.603 + newProbe->probeID = 5.604 + addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); 5.605 + 5.606 + requestingPr->dataRetFromReq = newProbe; 5.607 + 5.608 + (*resumePrFnPtr)( requestingPr, semEnv ); 5.609 } 5.610 5.611 5.612 5.613 /*This must be called by the request handler plugin -- it cannot be called 5.614 * from the semantic library "dissipate processor" function -- instead, the 5.615 - * semantic layer has to generate a request for the plug-in to call this 5.616 + * semantic layer has to generate a request, and the plug-in calls this 5.617 * function. 5.618 *The reason is that this frees the virtual processor's stack -- which is 5.619 * still in use inside semantic library calls! 5.620 @@ -579,33 +636,31 @@ 5.621 * of dis-owning it. 5.622 */ 5.623 void 5.624 -VMS__handle_dissipate_reqst( VirtProcr *animatingPr ) 5.625 +VMS__dissipate_procr( VirtProcr *animatingPr ) 5.626 { 5.627 //dis-own all locations owned by this processor, causing to be freed 5.628 // any locations that it is (was) sole owner of 5.629 //TODO: implement VMS__malloc system, including "give up ownership" 5.630 5.631 - //The dissipate request might still be attached, so remove and free it 5.632 - VMS__free_top_and_give_next_request_from( animatingPr ); 5.633 5.634 //NOTE: initialData was given to the processor, so should either have 5.635 // been alloc'd with VMS__malloc, or freed by the level above animPr. 5.636 //So, all that's left to free here is the stack and the VirtProcr struc 5.637 // itself 5.638 - free( animatingPr->startOfStack ); 5.639 - free( animatingPr ); 5.640 + //Note, should not stack-allocate initial data -- no guarantee, in 5.641 + // general that creating processor will outlive ones it creates. 5.642 + VMS__free( animatingPr->startOfStack ); 5.643 + VMS__free( animatingPr ); 5.644 } 5.645 5.646 5.647 -//TODO: re-architect so that have clean separation between request handler 5.648 +//TODO: look at architecting cleanest separation between request handler 5.649 // and master loop, for dissipate, create, shutdown, and other non-semantic 5.650 // requests. Issue is chain: one removes requests from AppVP, one dispatches 5.651 // on type of request, and one handles each type.. but some types require 5.652 // action from both request handler and master loop -- maybe just give the 5.653 // request handler calls like: VMS__handle_X_request_type 5.654 5.655 -void 5.656 -endOSThreadFn( void *initData, VirtProcr *animatingPr ); 5.657 5.658 /*This is called by the semantic layer's request handler when it decides its 5.659 * time to shut down the VMS system. Calling this causes the core loop OS 5.660 @@ -619,10 +674,9 @@ 5.661 * masterVP any AppVPs that might still be allocated and sitting in the 5.662 * semantic environment, or have been orphaned in the _VMSWorkQ. 5.663 * 5.664 - *NOTE: the semantic plug-in is expected to use VMS__malloc_to to get all the 5.665 + *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the 5.666 * locations it needs, and give ownership to masterVP. Then, they will be 5.667 - * automatically freed when the masterVP is dissipated. (This happens after 5.668 - * the core loop threads have all exited) 5.669 + * automatically freed. 5.670 * 5.671 *In here,create one core-loop shut-down processor for each core loop and put 5.672 * them all directly into the readyToAnimateQ. 5.673 @@ -633,16 +687,16 @@ 5.674 * point is it sure that all results have completed. 5.675 */ 5.676 void 5.677 -VMS__handle_shutdown_reqst( void *dummy, VirtProcr *animatingPr ) 5.678 +VMS__shutdown() 5.679 { int coreIdx; 5.680 VirtProcr *shutDownPr; 5.681 5.682 //create the shutdown processors, one for each core loop -- put them 5.683 // directly into the Q -- each core will die when gets one 5.684 for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 5.685 - { 5.686 + { //Note, this is running in the master 5.687 shutDownPr = VMS__create_procr( &endOSThreadFn, NULL ); 5.688 - writeSRSWQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] ); 5.689 + writeVMSQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] ); 5.690 } 5.691 5.692 } 5.693 @@ -681,49 +735,60 @@ 5.694 } 5.695 5.696 5.697 -/*This is called after the threads have shut down and control has returned 5.698 - * to the semantic layer, in the entry point function in the main thread. 5.699 - * It has to free anything allocated during VMS_init, and any other alloc'd 5.700 - * locations that might be left over. 5.701 +/*This is called from the startup & shutdown 5.702 */ 5.703 void 5.704 -VMS__cleanup_after_shutdown() 5.705 +VMS__cleanup_at_end_of_shutdown() 5.706 { 5.707 VMSQueueStruc **readyToAnimateQs; 5.708 int coreIdx; 5.709 VirtProcr **masterVPs; 5.710 SchedSlot ***allSchedSlots; //ptr to array of ptrs 5.711 5.712 + //All the environment data has been allocated with VMS__malloc, so just 5.713 + // free its internal big-chunk and all inside it disappear. 5.714 +/* 5.715 readyToAnimateQs = _VMSMasterEnv->readyToAnimateQs; 5.716 masterVPs = _VMSMasterEnv->masterVPs; 5.717 allSchedSlots = _VMSMasterEnv->allSchedSlots; 5.718 5.719 for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 5.720 { 5.721 - freeSRSWQ( readyToAnimateQs[ coreIdx ] ); 5.722 - 5.723 - VMS__handle_dissipate_reqst( masterVPs[ coreIdx ] ); 5.724 + freeVMSQ( readyToAnimateQs[ coreIdx ] ); 5.725 + //master VPs were created external to VMS, so use external free 5.726 + VMS__dissipate_procr( masterVPs[ coreIdx ] ); 5.727 5.728 freeSchedSlots( allSchedSlots[ coreIdx ] ); 5.729 } 5.730 5.731 - free( _VMSMasterEnv->readyToAnimateQs ); 5.732 - free( _VMSMasterEnv->masterVPs ); 5.733 - free( _VMSMasterEnv->allSchedSlots ); 5.734 - 5.735 - free( _VMSMasterEnv ); 5.736 + VMS__free( _VMSMasterEnv->readyToAnimateQs ); 5.737 + VMS__free( _VMSMasterEnv->masterVPs ); 5.738 + VMS__free( _VMSMasterEnv->allSchedSlots ); 5.739 + 5.740 + //============================= MEASUREMENT STUFF ======================== 5.741 + #ifdef STATS__TURN_ON_PROBES 5.742 + freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS__free_probe); 5.743 + #endif 5.744 + //======================================================================== 5.745 +*/ 5.746 + //These are the only two that use system free 5.747 + VMS_ext__free_free_list( _VMSMasterEnv->freeListHead ); 5.748 + free( (void *)_VMSMasterEnv ); 5.749 } 5.750 5.751 5.752 -//=========================================================================== 5.753 +//================================ 5.754 5.755 -inline TSCount getTSC() 5.756 - { unsigned int low, high; 5.757 - TSCount out; 5.758 5.759 - saveTimeStampCountInto( low, high ); 5.760 - out = high; 5.761 - out = (out << 32) + low; 5.762 - return out; 5.763 +/*Later, improve this -- for now, just exits the application after printing 5.764 + * the error message. 5.765 + */ 5.766 +void 5.767 +VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ) 5.768 + { 5.769 + printf(msgStr); 5.770 + fflush(stdin); 5.771 + exit(1); 5.772 } 5.773 5.774 +
6.1 --- a/VMS.h Tue Oct 26 18:31:34 2010 -0700 6.2 +++ b/VMS.h Thu Nov 11 06:19:51 2010 -0800 6.3 @@ -7,22 +7,54 @@ 6.4 */ 6.5 6.6 #ifndef _VMS_H 6.7 -#define _VMS_H 6.8 +#define _VMS_H 6.9 #define __USE_GNU 6.10 6.11 #include "VMS_primitive_data_types.h" 6.12 -#include "Queue_impl/BlockingQueue.h" 6.13 +#include "Queue_impl/PrivateQueue.h" 6.14 #include "Histogram/Histogram.h" 6.15 +#include "DynArray/DynArray.h" 6.16 +#include "Hash_impl/PrivateHash.h" 6.17 +#include "vmalloc.h" 6.18 + 6.19 #include <pthread.h> 6.20 +#include <sys/time.h> 6.21 6.22 + 6.23 +//=============================== Debug =================================== 6.24 +// 6.25 //When SEQUENTIAL is defined, VMS does sequential exe in the main thread 6.26 // It still does co-routines and all the mechanisms are the same, it just 6.27 // has only a single thread and animates VPs one at a time 6.28 //#define SEQUENTIAL 6.29 6.30 -#define PRINT_DEBUG(msg) //printf(msg); fflush(stdin); 6.31 -#define PRINT1_DEBUG(msg, param) //printf(msg, param); fflush(stdin); 6.32 -#define PRINT2_DEBUG(msg, p1, p2) //printf(msg, p1, p2); fflush(stdin); 6.33 +//#define USE_WORK_STEALING 6.34 + 6.35 + //turns on the probe-instrumentation in the application -- when not 6.36 + // defined, the calls to the probe functions turn into comments 6.37 +#define STATS__ENABLE_PROBES 6.38 +//#define TURN_ON_DEBUG_PROBES 6.39 + 6.40 + //These defines turn types of bug messages on and off 6.41 + // be sure debug messages are un-commented (next block of defines) 6.42 +#define dbgProbes FALSE /* for issues inside probes themselves*/ 6.43 +#define dbgAppFlow FALSE /* Top level flow of application code -- general*/ 6.44 +#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ 6.45 +#define dbgRqstHdlr FALSE /* in request handler code*/ 6.46 + 6.47 + //Comment or un- the substitute half to turn on/off types of debug message 6.48 +#define DEBUG( bool, msg) \ 6.49 +// if( bool){ printf(msg); fflush(stdin);} 6.50 +#define DEBUG1( bool, msg, param) \ 6.51 +// if(bool){printf(msg, param); fflush(stdin);} 6.52 +#define DEBUG2( bool, msg, p1, p2) \ 6.53 +// if(bool) {printf(msg, p1, p2); fflush(stdin);} 6.54 + 6.55 +#define ERROR(msg) printf(msg); fflush(stdin); 6.56 +#define ERROR1(msg, param) printf(msg, param); fflush(stdin); 6.57 +#define ERROR2(msg, p1, p2) printf(msg, p1, p2); fflush(stdin); 6.58 + 6.59 +//=========================== STATS ======================= 6.60 6.61 //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and 6.62 // compiled-in that saves the low part of the time stamp count just before 6.63 @@ -33,53 +65,97 @@ 6.64 #define MEAS__TIME_MASTER 6.65 #define MEAS__NUM_TIMES_TO_RUN 100000 6.66 6.67 + //For code that calculates normalization-offset between TSC counts of 6.68 + // different cores. 6.69 #define NUM_TSC_ROUND_TRIPS 10 6.70 6.71 + 6.72 +//========================= Hardware related Constants ===================== 6.73 //This value is the number of hardware threads in the shared memory 6.74 // machine 6.75 #define NUM_CORES 4 6.76 6.77 - // balance amortizing master fixed overhead vs imbalance potential 6.78 -#define NUM_SCHED_SLOTS 3 6.79 + // tradeoff amortizing master fixed overhead vs imbalance potential 6.80 + // when work-stealing, can make bigger, at risk of losing cache affinity 6.81 +#define NUM_SCHED_SLOTS 5 6.82 6.83 #define MIN_WORK_UNIT_CYCLES 20000 6.84 6.85 -#define READYTOANIMATE_RETRIES 10000 6.86 +#define MASTERLOCK_RETRIES 10000 6.87 6.88 - // stack 6.89 -#define VIRT_PROCR_STACK_SIZE 0x10000 6.90 + // stack size in virtual processors created 6.91 +#define VIRT_PROCR_STACK_SIZE 0x4000 /* 16K */ 6.92 6.93 - //256M of total memory for VMS__malloc 6.94 -#define MASSIVE_MALLOC_SIZE 0x10000000 6.95 + // memory for VMS__malloc 6.96 +#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */ 6.97 6.98 -#define NUM_PREPEND_BYTES sizeof(FreeListElem) + sizeof(ownerElem); 6.99 + 6.100 +//============================== 6.101 6.102 #define SUCCESS 0 6.103 6.104 -#define writeVMSQ writeSRSWQ 6.105 -#define readVMSQ readSRSWQ 6.106 -#define makeVMSQ makeSRSWQ 6.107 -#define VMSQueueStruc SRSWQueueStruc 6.108 +#define writeVMSQ writePrivQ 6.109 +#define readVMSQ readPrivQ 6.110 +#define makeVMSQ makePrivQ 6.111 +#define numInVMSQ numInPrivQ 6.112 +#define VMSQueueStruc PrivQueueStruc 6.113 6.114 -//#define thdAttrs NULL //For PThreads 6.115 6.116 -typedef struct _SchedSlot SchedSlot; 6.117 -typedef struct _VMSReqst VMSReqst; 6.118 -typedef struct _VirtProcr VirtProcr; 6.119 + 6.120 +//=========================================================================== 6.121 +typedef unsigned long long TSCount; 6.122 + 6.123 +typedef struct _SchedSlot SchedSlot; 6.124 +typedef struct _VMSReqst VMSReqst; 6.125 +typedef struct _VirtProcr VirtProcr; 6.126 +typedef struct _IntervalProbe IntervalProbe; 6.127 +typedef struct _GateStruc GateStruc; 6.128 + 6.129 6.130 typedef VirtProcr * (*SlaveScheduler) ( void *, int ); //semEnv, coreIdx 6.131 typedef void (*RequestHandler) ( VirtProcr *, void * ); //prWReqst, semEnv 6.132 typedef void (*VirtProcrFnPtr) ( void *, VirtProcr * ); //initData, animPr 6.133 typedef void VirtProcrFn ( void *, VirtProcr * ); //initData, animPr 6.134 +typedef void (*ResumePrFnPtr) ( VirtProcr *, void * ); 6.135 + 6.136 + 6.137 +//============= Requests =========== 6.138 +// 6.139 + 6.140 +enum VMSReqstType //avoid starting enums at 0, for debug reasons 6.141 + { 6.142 + semantic = 1, 6.143 + createReq, 6.144 + dissipate, 6.145 + VMSSemantic //goes with VMSSemReqst below 6.146 + }; 6.147 + 6.148 +struct _VMSReqst 6.149 + { 6.150 + enum VMSReqstType reqType;//used for dissipate and in future for IO requests 6.151 + void *semReqData; 6.152 + 6.153 + VMSReqst *nextReqst; 6.154 + }; 6.155 +//VMSReqst 6.156 + 6.157 +enum VMSSemReqstType //These are equivalent to semantic requests, but for 6.158 + { // VMS's services available directly to app, like OS 6.159 + createProbe = 1, // and probe services -- like a VMS-wide built-in lang 6.160 + openFile, 6.161 + otherIO 6.162 + }; 6.163 6.164 typedef struct 6.165 - { 6.166 - void *endThdPt; 6.167 - unsigned int coreNum; 6.168 + { enum VMSSemReqstType reqType; 6.169 + VirtProcr *requestingPr; 6.170 + char *nameStr; //for create probe 6.171 } 6.172 -ThdParams; 6.173 + VMSSemReq; 6.174 6.175 6.176 +//==================== Core data structures =================== 6.177 + 6.178 struct _SchedSlot 6.179 { 6.180 int workIsDone; 6.181 @@ -87,24 +163,6 @@ 6.182 VirtProcr *procrAssignedToSlot; 6.183 }; 6.184 //SchedSlot 6.185 - 6.186 -enum ReqstType 6.187 - { 6.188 - semantic = 1, 6.189 - dissipate, 6.190 - regCreated, 6.191 - IO 6.192 - }; 6.193 - 6.194 -struct _VMSReqst 6.195 - { 6.196 -// VirtProcr *virtProcrFrom; 6.197 - enum ReqstType reqType;//used for dissipate and in future for IO requests 6.198 - void *semReqData; 6.199 - 6.200 - VMSReqst *nextReqst; 6.201 - }; 6.202 -//VMSReqst 6.203 6.204 struct _VirtProcr 6.205 { int procrID; //for debugging -- count up each time create 6.206 @@ -123,9 +181,10 @@ 6.207 SchedSlot *schedSlot; 6.208 VMSReqst *requests; 6.209 6.210 - void *semanticData; 6.211 + void *semanticData; //this lives here for the life of VP 6.212 + void *dataRetFromReq;//values returned from plugin to VP go here 6.213 6.214 - //============================= MEASUREMENT STUFF ======================== 6.215 + //=========== MEASUREMENT STUFF ========== 6.216 #ifdef MEAS__TIME_STAMP_SUSP 6.217 unsigned int preSuspTSCLow; 6.218 unsigned int postSuspTSCLow; 6.219 @@ -134,7 +193,8 @@ 6.220 unsigned int startMasterTSCLow; 6.221 unsigned int endMasterTSCLow; 6.222 #endif 6.223 - //======================================================================== 6.224 + 6.225 + float64 createPtInSecs; //have space but don't use on some configs 6.226 }; 6.227 //VirtProcr 6.228 6.229 @@ -158,37 +218,79 @@ 6.230 6.231 void *semanticEnv; 6.232 void *OSEventStruc; //for future, when add I/O to BLIS 6.233 + MallocProlog *freeListHead; 6.234 + int32 amtOfOutstandingMem; //total currently allocated 6.235 6.236 void *coreLoopStartPt;//addr to jump to to re-enter coreLoop 6.237 void *coreLoopEndPt; //addr to jump to to shut down a coreLoop 6.238 6.239 - int setupComplete; 6.240 - int masterLock; 6.241 + int32 setupComplete; 6.242 + int32 masterLock; 6.243 6.244 VMSStats *stats; 6.245 + int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP 6.246 + GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal 6.247 + int32 workStealingLock; 6.248 + 6.249 + int32 numProcrsCreated; //gives ordering to processor creation 6.250 + 6.251 + //=========== MEASUREMENT STUFF ============= 6.252 + IntervalProbe **intervalProbes; 6.253 + PrivDynArrayInfo *dynIntervalProbesInfo; 6.254 + HashTable *probeNameHashTbl; 6.255 + int32 masterCreateProbeID; 6.256 + float64 createPtInSecs; 6.257 } 6.258 MasterEnv; 6.259 6.260 +//========================= Extra Stuff Data Strucs ======================= 6.261 +typedef struct 6.262 + { 6.263 6.264 -//========================================================== 6.265 + } 6.266 +VMSExcp; 6.267 + 6.268 +struct _GateStruc 6.269 + { 6.270 + int32 gateClosed; 6.271 + int32 preGateProgress; 6.272 + int32 waitProgress; 6.273 + int32 exitProgress; 6.274 + }; 6.275 +//GateStruc 6.276 + 6.277 +//======================= OS Thread related =============================== 6.278 6.279 void * coreLoop( void *paramsIn ); //standard PThreads fn prototype 6.280 void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype 6.281 void masterLoop( void *initData, VirtProcr *masterPr ); 6.282 6.283 6.284 -//===================== Global Vars =================== 6.285 - 6.286 +typedef struct 6.287 + { 6.288 + void *endThdPt; 6.289 + unsigned int coreNum; 6.290 + } 6.291 +ThdParams; 6.292 6.293 pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state 6.294 ThdParams *coreLoopThdParams [ NUM_CORES ]; 6.295 pthread_mutex_t suspendLock; 6.296 pthread_cond_t suspend_cond; 6.297 6.298 + 6.299 + 6.300 +//===================== Global Vars =================== 6.301 + 6.302 volatile MasterEnv *_VMSMasterEnv; 6.303 6.304 6.305 -//========================== 6.306 + 6.307 + 6.308 +//=========================== Function Prototypes ========================= 6.309 + 6.310 + 6.311 +//========== Setup and shutdown ========== 6.312 void 6.313 VMS__init(); 6.314 6.315 @@ -204,69 +306,59 @@ 6.316 VirtProcr * 6.317 VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); 6.318 6.319 +void 6.320 +VMS__dissipate_procr( VirtProcr *procrToDissipate ); 6.321 + 6.322 + //Use this to create processor inside entry point & other places outside 6.323 + // the VMS system boundary (IE, not run in slave nor Master) 6.324 VirtProcr * 6.325 -VMS__create_the_shutdown_procr(); 6.326 - 6.327 -//========================== 6.328 -inline void 6.329 -VMS__add_sem_request( void *semReqData, VirtProcr *callingPr ); 6.330 +VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); 6.331 6.332 void 6.333 -VMS__send_req_to_register_new_procr( VirtProcr *newPrToRegister, 6.334 - VirtProcr *reqstingPr ); 6.335 +VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ); 6.336 6.337 void 6.338 -VMS__free_request( VMSReqst *req ); 6.339 +VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ); 6.340 6.341 void 6.342 -VMS__remove_and_free_top_request( VirtProcr *reqstingPr ); 6.343 +VMS__shutdown(); 6.344 + 6.345 +void 6.346 +VMS__cleanup_at_end_of_shutdown(); 6.347 + 6.348 + 6.349 +//============== Request Related =============== 6.350 + 6.351 +void 6.352 +VMS__suspend_procr( VirtProcr *callingPr ); 6.353 + 6.354 +inline void 6.355 +VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr ); 6.356 + 6.357 +inline void 6.358 +VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ); 6.359 + 6.360 +void 6.361 +VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ); 6.362 + 6.363 +void inline 6.364 +VMS__send_dissipate_req( VirtProcr *prToDissipate ); 6.365 + 6.366 +inline void 6.367 +VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ); 6.368 6.369 VMSReqst * 6.370 -VMS__take_top_request_from( VirtProcr *reqstingPr ); 6.371 - 6.372 -VMSReqst * 6.373 -VMS__free_top_and_give_next_request_from( VirtProcr *procrWithReq ); 6.374 +VMS__take_next_request_out_of( VirtProcr *procrWithReq ); 6.375 6.376 inline void * 6.377 VMS__take_sem_reqst_from( VMSReqst *req ); 6.378 6.379 -inline int 6.380 -VMS__isSemanticReqst( VMSReqst *req ); 6.381 - 6.382 -inline int 6.383 -VMS__isDissipateReqst( VMSReqst *req ); 6.384 - 6.385 -inline int 6.386 -VMS__isCreateReqst( VMSReqst *req ); 6.387 - 6.388 -//========================== 6.389 - 6.390 -void 6.391 -VMS__suspend_procr( VirtProcr *callingPr ); 6.392 - 6.393 -void 6.394 -VMS__dissipate_procr( VirtProcr *prToDissipate ); 6.395 - 6.396 -void 6.397 -VMS__handle_dissipate_reqst( VirtProcr *procrToDissipate ); 6.398 - 6.399 -void 6.400 -VMS__cleanup_after_shutdown(); 6.401 - 6.402 -//========================== 6.403 -void 6.404 -measureTSCOffsetsAsCore0(); 6.405 - 6.406 -void 6.407 -measureTSCOffsetsAsRemoteCore( int coreIdx ); 6.408 - 6.409 -//============================= Statistics ================================== 6.410 - 6.411 -typedef unsigned long long TSCount; 6.412 - 6.413 //Frequency of TS counts 6.414 //TODO: change freq for each machine 6.415 #define TSCOUNT_FREQ 3180000000 6.416 +//======================== STATS ====================== 6.417 + 6.418 +//===== RDTSC wrapper ===== 6.419 6.420 #define saveTimeStampCountInto(low, high) \ 6.421 asm volatile("RDTSC; \ 6.422 @@ -284,10 +376,12 @@ 6.423 /* inputs */ : \ 6.424 /* clobber */ : "%eax", "%edx" \ 6.425 ); 6.426 +//===== 6.427 6.428 -inline TSCount getTSC(); 6.429 +#include "SwitchAnimators.h" 6.430 +#include "probes.h" 6.431 6.432 -inline TSCount getTSC(); 6.433 + 6.434 6.435 //===================== Debug ========================== 6.436 int numProcrsCreated; 6.437 @@ -298,4 +392,3 @@ 6.438 TSCount *pingTimes; 6.439 6.440 #endif /* _VMS_H */ 6.441 -
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/VMS__DESIGN_NOTES.txt Thu Nov 11 06:19:51 2010 -0800 7.3 @@ -0,0 +1,2 @@ 7.4 + 7.5 +Implement VMS this way:
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/probes.c Thu Nov 11 06:19:51 2010 -0800 8.3 @@ -0,0 +1,354 @@ 8.4 +/* 8.5 + * Copyright 2010 OpenSourceStewardshipFoundation 8.6 + * 8.7 + * Licensed under BSD 8.8 + */ 8.9 + 8.10 +#include <stdio.h> 8.11 +#include <stdlib.h> 8.12 +#include <malloc.h> 8.13 +#include <sys/time.h> 8.14 +#include <string.h> 8.15 + 8.16 +#include "VMS.h" 8.17 +#include "Queue_impl/BlockingQueue.h" 8.18 +#include "Histogram/Histogram.h" 8.19 + 8.20 + 8.21 +//================================ STATS ==================================== 8.22 + 8.23 +inline TSCount getTSCount() 8.24 + { unsigned int low, high; 8.25 + TSCount out; 8.26 + 8.27 + saveTimeStampCountInto( low, high ); 8.28 + out = high; 8.29 + out = (out << 32) + low; 8.30 + return out; 8.31 + } 8.32 + 8.33 + 8.34 + 8.35 +//==================== Probes ================= 8.36 +#ifdef STATS__USE_TSC_PROBES 8.37 + 8.38 +int32 8.39 +VMS__create_histogram_probe( int32 numBins, float32 startValue, 8.40 + float32 binWidth, char *nameStr ) 8.41 + { IntervalProbe *newProbe; 8.42 + int32 idx; 8.43 + FloatHist *hist; 8.44 + 8.45 + idx = VMS__create_single_interval_probe( nameStr ); 8.46 + newProbe = _VMSMasterEnv->intervalProbes[ idx ]; 8.47 + 8.48 + hist = makeFloatHistogram( numBins, startValue, binWidth ); 8.49 + newProbe->hist = hist; 8.50 + return idx; 8.51 + } 8.52 + 8.53 +void 8.54 +VMS_impl__record_interval_start_in_probe( int32 probeID ) 8.55 + { IntervalProbe *probe; 8.56 + 8.57 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 8.58 + probe->startStamp = getTSCount(); 8.59 + } 8.60 + 8.61 +void 8.62 +VMS_impl__record_interval_end_in_probe( int32 probeID ) 8.63 + { IntervalProbe *probe; 8.64 + TSCount endStamp; 8.65 + 8.66 + endStamp = getTSCount(); 8.67 + 8.68 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 8.69 + probe->endStamp = endStamp; 8.70 + 8.71 + if( probe->hist != NULL ) 8.72 + { TSCount interval = probe->endStamp - probe->startStamp; 8.73 + //if the interval is sane, then add to histogram 8.74 + if( interval < probe->hist->endOfRange * 10 ) 8.75 + addToFloatHist( interval, probe->hist ); 8.76 + } 8.77 + } 8.78 + 8.79 +void 8.80 +VMS_impl__print_stats_of_probe( int32 probeID ) 8.81 + { IntervalProbe *probe; 8.82 + 8.83 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 8.84 + 8.85 + if( probe->hist == NULL ) 8.86 + { 8.87 + printf("probe: %s, interval: %.6lf\n", probe->nameStr,probe->interval); 8.88 + } 8.89 + 8.90 + else 8.91 + { 8.92 + printf( "probe: %s\n", probe->nameStr ); 8.93 + printFloatHist( probe->hist ); 8.94 + } 8.95 + } 8.96 +#else 8.97 + 8.98 +/* 8.99 + * In practice, probe operations are called from the app, from inside slaves 8.100 + * -- so have to be sure each probe is single-VP owned, and be sure that 8.101 + * any place common structures are modified it's done inside the master. 8.102 + * So -- the only place common structures are modified is during creation. 8.103 + * after that, all mods are to individual instances. 8.104 + * 8.105 + * Thniking perhaps should change the semantics to be that probes are 8.106 + * attached to the virtual processor -- and then everything is guaranteed 8.107 + * to be isolated -- except then can't take any intervals that span VPs, 8.108 + * and would have to transfer the probes to Master env when VP dissipates.. 8.109 + * gets messy.. 8.110 + * 8.111 + * For now, just making so that probe creation causes a suspend, so that 8.112 + * the dynamic array in the master env is only modified from the master 8.113 + * 8.114 + */ 8.115 +IntervalProbe * 8.116 +create_generic_probe( char *nameStr, VirtProcr *animPr ) 8.117 + { IntervalProbe *newProbe; 8.118 + VMSSemReq reqData; 8.119 + 8.120 + reqData.reqType = createProbe; 8.121 + reqData.nameStr = nameStr; 8.122 + 8.123 + VMS__send_VMSSem_request( &reqData, animPr ); 8.124 + 8.125 + return animPr->dataRetFromReq; 8.126 + } 8.127 + 8.128 +/*Use this version from outside VMS -- it uses external malloc, and modifies 8.129 + * dynamic array, so can't be animated in a slave VP 8.130 + */ 8.131 +IntervalProbe * 8.132 +ext__create_generic_probe( char *nameStr ) 8.133 + { IntervalProbe *newProbe; 8.134 + int32 nameLen; 8.135 + 8.136 + newProbe = malloc( sizeof(IntervalProbe) ); 8.137 + nameLen = strlen( nameStr ); 8.138 + newProbe->nameStr = malloc( nameLen ); 8.139 + memcpy( newProbe->nameStr, nameStr, nameLen ); 8.140 + newProbe->hist = NULL; 8.141 + newProbe->schedChoiceWasRecorded = FALSE; 8.142 + newProbe->probeID = 8.143 + addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); 8.144 + 8.145 + return newProbe; 8.146 + } 8.147 + 8.148 + 8.149 +/*Only call from inside master or main startup/shutdown thread 8.150 + */ 8.151 +void 8.152 +VMS_impl__free_probe( IntervalProbe *probe ) 8.153 + { if( probe->hist != NULL ) freeDblHist( probe->hist ); 8.154 + if( probe->nameStr != NULL) VMS__free( probe->nameStr ); 8.155 + VMS__free( probe ); 8.156 + } 8.157 + 8.158 + 8.159 +int32 8.160 +VMS_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr) 8.161 + { IntervalProbe *newProbe; 8.162 + struct timeval *startStamp; 8.163 + float64 startSecs; 8.164 + 8.165 + newProbe = create_generic_probe( nameStr, animPr ); 8.166 + newProbe->endSecs = 0; 8.167 + 8.168 + gettimeofday( &(newProbe->startStamp), NULL); 8.169 + 8.170 + //turn into a double 8.171 + startStamp = &(newProbe->startStamp); 8.172 + startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); 8.173 + newProbe->startSecs = startSecs; 8.174 + 8.175 + return newProbe->probeID; 8.176 + } 8.177 + 8.178 +int32 8.179 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ) 8.180 + { IntervalProbe *newProbe; 8.181 + struct timeval *startStamp; 8.182 + float64 startSecs; 8.183 + 8.184 + newProbe = ext__create_generic_probe( nameStr ); 8.185 + newProbe->endSecs = 0; 8.186 + 8.187 + gettimeofday( &(newProbe->startStamp), NULL); 8.188 + 8.189 + //turn into a double 8.190 + startStamp = &(newProbe->startStamp); 8.191 + startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); 8.192 + newProbe->startSecs = startSecs; 8.193 + 8.194 + return newProbe->probeID; 8.195 + } 8.196 + 8.197 +int32 8.198 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ) 8.199 + { IntervalProbe *newProbe; 8.200 + 8.201 + newProbe = create_generic_probe( nameStr, animPr ); 8.202 + 8.203 + return newProbe->probeID; 8.204 + } 8.205 + 8.206 +int32 8.207 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 8.208 + float64 binWidth, char *nameStr, VirtProcr *animPr ) 8.209 + { IntervalProbe *newProbe; 8.210 + DblHist *hist; 8.211 + 8.212 + newProbe = create_generic_probe( nameStr, animPr ); 8.213 + 8.214 + hist = makeDblHistogram( numBins, startValue, binWidth ); 8.215 + newProbe->hist = hist; 8.216 + return newProbe->probeID; 8.217 + } 8.218 + 8.219 +void 8.220 +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ) 8.221 + { IntervalProbe *probe; 8.222 + 8.223 + //TODO: fix this To be in Master -- race condition 8.224 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 8.225 + 8.226 + addValueIntoTable(probe->nameStr, probe, _VMSMasterEnv->probeNameHashTbl); 8.227 + } 8.228 + 8.229 +IntervalProbe * 8.230 +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ) 8.231 + { 8.232 + //TODO: fix this To be in Master -- race condition 8.233 + return getValueFromTable( probeName, _VMSMasterEnv->probeNameHashTbl ); 8.234 + } 8.235 + 8.236 + 8.237 +/*Everything is local to the animating procr, so no need for request, do 8.238 + * work locally, in the anim Pr 8.239 + */ 8.240 +void 8.241 +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animatingPr ) 8.242 + { IntervalProbe *probe; 8.243 + 8.244 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 8.245 + probe->schedChoiceWasRecorded = TRUE; 8.246 + probe->coreNum = animatingPr->coreAnimatedBy; 8.247 + probe->procrID = animatingPr->procrID; 8.248 + probe->procrCreateSecs = animatingPr->createPtInSecs; 8.249 + } 8.250 + 8.251 +/*Everything is local to the animating procr, so no need for request, do 8.252 + * work locally, in the anim Pr 8.253 + */ 8.254 +void 8.255 +VMS_impl__record_interval_start_in_probe( int32 probeID ) 8.256 + { IntervalProbe *probe; 8.257 + 8.258 + DEBUG( dbgProbes, "record start of interval\n" ) 8.259 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 8.260 + gettimeofday( &(probe->startStamp), NULL ); 8.261 + } 8.262 + 8.263 + 8.264 +/*Everything is local to the animating procr, so no need for request, do 8.265 + * work locally, in the anim Pr 8.266 + */ 8.267 +void 8.268 +VMS_impl__record_interval_end_in_probe( int32 probeID ) 8.269 + { IntervalProbe *probe; 8.270 + struct timeval *endStamp, *startStamp; 8.271 + float64 startSecs, endSecs; 8.272 + 8.273 + DEBUG( dbgProbes, "record end of interval\n" ) 8.274 + //possible seg-fault if array resized by diff core right after this 8.275 + // one gets probe..? Something like that? Might be safe.. don't care 8.276 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 8.277 + gettimeofday( &(probe->endStamp), NULL); 8.278 + 8.279 + //now turn into an interval held in a double 8.280 + startStamp = &(probe->startStamp); 8.281 + endStamp = &(probe->endStamp); 8.282 + 8.283 + startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); 8.284 + endSecs = endStamp->tv_sec + ( endStamp->tv_usec / 1000000.0 ); 8.285 + 8.286 + probe->interval = endSecs - startSecs; 8.287 + probe->startSecs = startSecs; 8.288 + probe->endSecs = endSecs; 8.289 + 8.290 + if( probe->hist != NULL ) 8.291 + { 8.292 + //if the interval is sane, then add to histogram 8.293 + if( probe->interval < probe->hist->endOfRange * 10 ) 8.294 + addToDblHist( probe->interval, probe->hist ); 8.295 + } 8.296 + } 8.297 + 8.298 +void 8.299 +print_probe_helper( IntervalProbe *probe ) 8.300 + { 8.301 + printf( "\nprobe: %s, ", probe->nameStr ); 8.302 + 8.303 + if( probe->schedChoiceWasRecorded ) 8.304 + { printf( "coreNum: %d, procrID: %d, procrCreated: %.6lf | ", 8.305 + probe->coreNum, probe->procrID, probe->procrCreateSecs ); 8.306 + } 8.307 + 8.308 + if( probe->endSecs == 0 ) //just a single point in time 8.309 + { 8.310 + printf( " time point: %.6lf\n", 8.311 + probe->startSecs - _VMSMasterEnv->createPtInSecs ); 8.312 + } 8.313 + else if( probe->hist == NULL ) //just an interval 8.314 + { 8.315 + printf( " startSecs: %.6lf, interval: %.6lf\n", 8.316 + probe->startSecs - _VMSMasterEnv->createPtInSecs, probe->interval); 8.317 + } 8.318 + else //a full histogram of intervals 8.319 + { 8.320 + printDblHist( probe->hist ); 8.321 + } 8.322 + } 8.323 + 8.324 +//TODO: change so pass around pointer to probe instead of its array-index.. 8.325 +// will eliminate chance for timing of resize to cause problems with the 8.326 +// lookup -- even though don't think it actually can cause problems.. 8.327 +// there's no need to pass index around -- have hash table for names, and 8.328 +// only need it once, then have ptr to probe.. the thing about enum the 8.329 +// index and use that as name is clunky in practice -- just hash. 8.330 +void 8.331 +VMS_impl__print_stats_of_probe( int32 probeID ) 8.332 + { IntervalProbe *probe; 8.333 + 8.334 + probe = _VMSMasterEnv->intervalProbes[ probeID ]; 8.335 + 8.336 + print_probe_helper( probe ); 8.337 + } 8.338 + 8.339 + 8.340 + 8.341 +void 8.342 +generic_print_probe( void *_probe ) 8.343 + { IntervalProbe *probe; 8.344 + 8.345 + probe = (IntervalProbe *)_probe; 8.346 + print_probe_helper( probe ); 8.347 + } 8.348 + 8.349 +void 8.350 +VMS_impl__print_stats_of_all_probes() 8.351 + { IntervalProbe *probe; 8.352 + 8.353 + forAllInDynArrayDo( _VMSMasterEnv->dynIntervalProbesInfo, 8.354 + &generic_print_probe ); 8.355 + fflush( stdout ); 8.356 + } 8.357 +#endif
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/probes.h Thu Nov 11 06:19:51 2010 -0800 9.3 @@ -0,0 +1,194 @@ 9.4 +/* 9.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 9.6 + * Licensed under GNU General Public License version 2 9.7 + * 9.8 + * Author: seanhalle@yahoo.com 9.9 + * 9.10 + */ 9.11 + 9.12 +#ifndef _PROBES_H 9.13 +#define _PROBES_H 9.14 +#define __USE_GNU 9.15 + 9.16 +#include "VMS_primitive_data_types.h" 9.17 + 9.18 +#include <sys/time.h> 9.19 + 9.20 + 9.21 + //when STATS__TURN_ON_PROBES is defined allows using probes to measure 9.22 + // time intervals. The probes are macros that only compile to something 9.23 + // when STATS__TURN_ON_PROBES is defined. The probes are saved in the 9.24 + // master env -- but only when this is defined. 9.25 + //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday 9.26 +#define STATS__TURN_ON_PROBES 9.27 +//#define STATS__USE_TSC_PROBES 9.28 +#define STATS__USE_DBL_PROBES 9.29 + 9.30 +//typedef struct _IntervalProbe IntervalProbe; //in VMS.h 9.31 + 9.32 +struct _IntervalProbe 9.33 + { 9.34 + char *nameStr; 9.35 + int32 probeID; 9.36 + 9.37 + int32 schedChoiceWasRecorded; 9.38 + int32 coreNum; 9.39 + int32 procrID; 9.40 + float64 procrCreateSecs; 9.41 + 9.42 + #ifdef STATS__USE_TSC_PROBES 9.43 + TSCount startStamp; 9.44 + TSCount endStamp; 9.45 + #else 9.46 + struct timeval startStamp; 9.47 + struct timeval endStamp; 9.48 + #endif 9.49 + float64 startSecs; 9.50 + float64 endSecs; 9.51 + float64 interval; 9.52 + DblHist *hist;//if NULL, then is single interval probe 9.53 + }; 9.54 + 9.55 + 9.56 +//============================= Statistics ================================== 9.57 + 9.58 + //Frequency of TS counts 9.59 + //TODO: change freq for each machine 9.60 +#define TSCOUNT_FREQ 3180000000 9.61 + 9.62 +inline TSCount getTSCount(); 9.63 + 9.64 + 9.65 +//======================== Probes ============================= 9.66 +// 9.67 +// Use macros to allow turning probes off with a #define switch 9.68 +#ifdef STATS__ENABLE_PROBES 9.69 +int32 9.70 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 9.71 +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 9.72 + VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) 9.73 + 9.74 +int32 9.75 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); 9.76 +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 9.77 + VMS_ext_impl__record_time_point_into_new_probe( nameStr ) 9.78 + 9.79 + 9.80 +int32 9.81 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 9.82 +#define VMS__create_single_interval_probe( nameStr, animPr ) \ 9.83 + VMS_impl__create_single_interval_probe( nameStr, animPr ) 9.84 + 9.85 + 9.86 +int32 9.87 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 9.88 + float64 binWidth, char *nameStr, VirtProcr *animPr ); 9.89 +#define VMS__create_histogram_probe( numBins, startValue, \ 9.90 + binWidth, nameStr, animPr ) \ 9.91 + VMS_impl__create_histogram_probe( numBins, startValue, \ 9.92 + binWidth, nameStr, animPr ) 9.93 +void 9.94 +VMS_impl__free_probe( IntervalProbe *probe ); 9.95 +#define VMS__free_probe( probe ) \ 9.96 + VMS_impl__free_probe( probe ) 9.97 + 9.98 +void 9.99 +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 9.100 +#define VMS__index_probe_by_its_name( probeID, animPr ) \ 9.101 + VMS_impl__index_probe_by_its_name( probeID, animPr ) 9.102 + 9.103 +IntervalProbe * 9.104 +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 9.105 +#define VMS__get_probe_by_name( probeID, animPr ) \ 9.106 + VMS_impl__get_probe_by_name( probeName, animPr ) 9.107 + 9.108 +void 9.109 +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 9.110 +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 9.111 + VMS_impl__record_sched_choice_into_probe( probeID, animPr ) 9.112 + 9.113 +void 9.114 +VMS_impl__record_interval_start_in_probe( int32 probeID ); 9.115 +#define VMS__record_interval_start_in_probe( probeID ) \ 9.116 + VMS_impl__record_interval_start_in_probe( probeID ) 9.117 + 9.118 +void 9.119 +VMS_impl__record_interval_end_in_probe( int32 probeID ); 9.120 +#define VMS__record_interval_end_in_probe( probeID ) \ 9.121 + VMS_impl__record_interval_end_in_probe( probeID ) 9.122 + 9.123 +void 9.124 +VMS_impl__print_stats_of_probe( int32 probeID ); 9.125 +#define VMS__print_stats_of_probe( probeID ) \ 9.126 + VMS_impl__print_stats_of_probe( probeID ) 9.127 + 9.128 +void 9.129 +VMS_impl__print_stats_of_all_probes(); 9.130 +#define VMS__print_stats_of_all_probes \ 9.131 + VMS_impl__print_stats_of_all_probes 9.132 + 9.133 + 9.134 +#else 9.135 +int32 9.136 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); 9.137 +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ 9.138 + 0 /* do nothing */ 9.139 + 9.140 +int32 9.141 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr); 9.142 +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ 9.143 + 0 /* do nothing */ 9.144 + 9.145 + 9.146 +int32 9.147 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); 9.148 +#define VMS__create_single_interval_probe( nameStr, animPr ) \ 9.149 + 0 /* do nothing */ 9.150 + 9.151 + 9.152 +int32 9.153 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, 9.154 + float64 binWidth, char *nameStr, VirtProcr *animPr ); 9.155 +#define VMS__create_histogram_probe( numBins, startValue, \ 9.156 + binWidth, nameStr, animPr ) \ 9.157 + 0 /* do nothing */ 9.158 + 9.159 +void 9.160 +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); 9.161 +#define VMS__index_probe_by_its_name( probeID, animPr ) \ 9.162 + /* do nothing */ 9.163 + 9.164 +IntervalProbe * 9.165 +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); 9.166 +#define VMS__get_probe_by_name( probeID, animPr ) \ 9.167 + NULL /* do nothing */ 9.168 + 9.169 +void 9.170 +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); 9.171 +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ 9.172 + /* do nothing */ 9.173 + 9.174 +void 9.175 +VMS_impl__record_interval_start_in_probe( int32 probeID ); 9.176 +#define VMS__record_interval_start_in_probe( probeID ) \ 9.177 + /* do nothing */ 9.178 + 9.179 +void 9.180 +VMS_impl__record_interval_end_in_probe( int32 probeID ); 9.181 +#define VMS__record_interval_end_in_probe( probeID ) \ 9.182 + /* do nothing */ 9.183 + 9.184 +void 9.185 +VMS_impl__print_stats_of_probe( int32 probeID ); 9.186 +#define VMS__print_stats_of_probe( probeID ) \ 9.187 + /* do nothing */ 9.188 + 9.189 +void 9.190 +VMS_impl__print_stats_of_all_probes(); 9.191 +#define VMS__print_stats_of_all_probes \ 9.192 + /* do nothing */ 9.193 + 9.194 +#endif /* defined STATS__ENABLE_PROBES */ 9.195 + 9.196 +#endif /* _PROBES_H */ 9.197 +
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/vmalloc.c Thu Nov 11 06:19:51 2010 -0800 10.3 @@ -0,0 +1,327 @@ 10.4 +/* 10.5 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 10.6 + * Licensed under GNU General Public License version 2 10.7 + * 10.8 + * Author: seanhalle@yahoo.com 10.9 + * 10.10 + * Created on November 14, 2009, 9:07 PM 10.11 + */ 10.12 + 10.13 +#include <malloc.h> 10.14 +#include <stdlib.h> 10.15 + 10.16 +#include "VMS.h" 10.17 + 10.18 +/*Helper function 10.19 + *Insert a newly generated free chunk into the first spot on the free list. 10.20 + * The chunk is cast as a MallocProlog, so the various pointers in it are 10.21 + * accessed with C's help -- and the size of the prolog is easily added to 10.22 + * the pointer when a chunk is returned to the app -- so C handles changes 10.23 + * in pointer sizes among machines. 10.24 + * 10.25 + *The list head is a normal MallocProlog struct -- identified by its 10.26 + * prevChunkInFreeList being NULL -- the only one. 10.27 + * 10.28 + *The end of the list is identified by next chunk being NULL, as usual. 10.29 + */ 10.30 +void inline 10.31 +add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) 10.32 + { 10.33 + chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; 10.34 + if( chunk->nextChunkInFreeList != NULL ) //if not last in free list 10.35 + chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; 10.36 + chunk->prevChunkInFreeList = listHead; 10.37 + listHead->nextChunkInFreeList = chunk; 10.38 + } 10.39 + 10.40 + 10.41 +/*This is sequential code, meant to only be called from the Master, not from 10.42 + * any slave VPs. 10.43 + *Search down list, checking size by the nextHigherInMem pointer, to find 10.44 + * first chunk bigger than size needed. 10.45 + *Shave off the extra and make it into a new free-list element, hook it in 10.46 + * then return the address of the found element plus size of prolog. 10.47 + * 10.48 + *Will find a 10.49 + */ 10.50 +void * 10.51 +VMS__malloc( int32 sizeRequested ) 10.52 + { MallocProlog *foundElem = NULL, *currElem, *newElem; 10.53 + int32 amountExtra, foundElemIsTopOfHeap, sizeConsumed,sizeOfFound; 10.54 + 10.55 + //step up the size to be aligned at 16-byte boundary, prob better ways 10.56 + sizeRequested = ((sizeRequested + 16) >> 4) << 4; 10.57 + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 10.58 + 10.59 + while( currElem != NULL ) 10.60 + { //check if size of currElem is big enough 10.61 + sizeOfFound=(int32)((char*)currElem->nextHigherInMem -(char*)currElem); 10.62 + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 10.63 + if( amountExtra > 0 ) 10.64 + { //found it, get out of loop 10.65 + foundElem = currElem; 10.66 + currElem = NULL; 10.67 + } 10.68 + else 10.69 + currElem = currElem->nextChunkInFreeList; 10.70 + } 10.71 + 10.72 + if( foundElem == NULL ) 10.73 + { ERROR("\nmalloc failed\n") 10.74 + return (void *)NULL; //indicates malloc failed 10.75 + } 10.76 + //Using a kludge to identify the element that is the top chunk in the 10.77 + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 10.78 + // save addr of start of heap in head's nextLowerInMem 10.79 + //Will handle top of Heap specially 10.80 + foundElemIsTopOfHeap = foundElem->nextHigherInMem == 10.81 + _VMSMasterEnv->freeListHead->nextHigherInMem; 10.82 + 10.83 + //before shave off and try to insert new elem, remove found elem 10.84 + //note, foundElem will never be the head, so always has valid prevChunk 10.85 + foundElem->prevChunkInFreeList->nextChunkInFreeList = 10.86 + foundElem->nextChunkInFreeList; 10.87 + if( foundElem->nextChunkInFreeList != NULL ) 10.88 + { foundElem->nextChunkInFreeList->prevChunkInFreeList = 10.89 + foundElem->prevChunkInFreeList; 10.90 + } 10.91 + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 10.92 + 10.93 + //if enough, turn extra into new elem & insert it 10.94 + if( amountExtra > 64 ) 10.95 + { //make new elem by adding to addr of curr elem then casting 10.96 + sizeConsumed = sizeof(MallocProlog) + sizeRequested; 10.97 + newElem = (MallocProlog *)( (char *)foundElem + sizeConsumed ); 10.98 + newElem->nextHigherInMem = foundElem->nextHigherInMem; 10.99 + newElem->nextLowerInMem = foundElem; 10.100 + foundElem->nextHigherInMem = newElem; 10.101 + 10.102 + if( ! foundElemIsTopOfHeap ) 10.103 + { //there is no next higher for top of heap, so can't write to it 10.104 + newElem->nextHigherInMem->nextLowerInMem = newElem; 10.105 + } 10.106 + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 10.107 + } 10.108 + else 10.109 + { 10.110 + sizeConsumed = sizeOfFound; 10.111 + } 10.112 + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 10.113 + 10.114 + //skip over the prolog by adding its size to the pointer return 10.115 + return (void *)((char *)foundElem + sizeof(MallocProlog)); 10.116 + } 10.117 + 10.118 + 10.119 +/*This is sequential code -- only to be called from the Master 10.120 + * When free, subtract the size of prolog from pointer, then cast it to a 10.121 + * MallocProlog. Then check the nextLower and nextHigher chunks to see if 10.122 + * one or both are also free, and coalesce if so, and if neither free, then 10.123 + * add this one to free-list. 10.124 + */ 10.125 +void 10.126 +VMS__free( void *ptrToFree ) 10.127 + { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; 10.128 + int32 lowerExistsAndIsFree, higherExistsAndIsFree, sizeOfElem; 10.129 + 10.130 + if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || 10.131 + ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) 10.132 + { //outside the range of data owned by VMS's malloc, so do nothing 10.133 + return; 10.134 + } 10.135 + //subtract size of prolog to get pointer to prolog, then cast 10.136 + elemToFree = (MallocProlog *)((char *)ptrToFree - sizeof(MallocProlog)); 10.137 + sizeOfElem =(int32)((char*)elemToFree->nextHigherInMem-(char*)elemToFree); 10.138 + 10.139 + if( elemToFree->prevChunkInFreeList != NULL ) 10.140 + { printf( "error: freeing same element twice!" ); exit(1); 10.141 + } 10.142 + 10.143 + _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; 10.144 + 10.145 + nextLowerElem = elemToFree->nextLowerInMem; 10.146 + nextHigherElem = elemToFree->nextHigherInMem; 10.147 + 10.148 + if( nextHigherElem == NULL ) 10.149 + higherExistsAndIsFree = FALSE; 10.150 + else //okay exists, now check if in the free-list by checking back ptr 10.151 + higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); 10.152 + 10.153 + if( nextLowerElem == NULL ) 10.154 + lowerExistsAndIsFree = FALSE; 10.155 + else //okay, it exists, now check if it's free 10.156 + lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); 10.157 + 10.158 + 10.159 + //now, know what exists and what's free 10.160 + if( lowerExistsAndIsFree ) 10.161 + { if( higherExistsAndIsFree ) 10.162 + { //both exist and are free, so coalesce all three 10.163 + //First, remove higher from free-list 10.164 + nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = 10.165 + nextHigherElem->nextChunkInFreeList; 10.166 + if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? 10.167 + nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = 10.168 + nextHigherElem->prevChunkInFreeList; 10.169 + //Now, fix-up sequence-in-mem list -- by side-effect, this also 10.170 + // changes size of the lower elem, which is still in free-list 10.171 + nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; 10.172 + if( nextHigherElem->nextHigherInMem != 10.173 + _VMSMasterEnv->freeListHead->nextHigherInMem ) 10.174 + nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; 10.175 + //notice didn't do anything to elemToFree -- it simply is no 10.176 + // longer reachable from any of the lists. Wonder if could be a 10.177 + // security leak because left valid addresses in it, 10.178 + // but don't care for now. 10.179 + } 10.180 + else 10.181 + { //lower is the only of the two that exists and is free, 10.182 + //In this case, no adjustment to free-list, just change mem-list. 10.183 + // By side-effect, changes size of the lower elem 10.184 + nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; 10.185 + if( elemToFree->nextHigherInMem != 10.186 + _VMSMasterEnv->freeListHead->nextHigherInMem ) 10.187 + elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; 10.188 + } 10.189 + } 10.190 + else 10.191 + { //lower either doesn't exist or isn't free, so check higher 10.192 + if( higherExistsAndIsFree ) 10.193 + { //higher exists and is the only of the two free 10.194 + //First, in free-list, replace higher elem with the one to free 10.195 + elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; 10.196 + elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; 10.197 + elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; 10.198 + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 10.199 + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 10.200 + //Now chg mem-list. By side-effect, changes size of elemToFree 10.201 + elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; 10.202 + if( elemToFree->nextHigherInMem != 10.203 + _VMSMasterEnv->freeListHead->nextHigherInMem ) 10.204 + elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; 10.205 + } 10.206 + else 10.207 + { //neither lower nor higher is availabe to coalesce so add to list 10.208 + // this makes prev chunk ptr non-null, which indicates it's free 10.209 + elemToFree->nextChunkInFreeList = 10.210 + _VMSMasterEnv->freeListHead->nextChunkInFreeList; 10.211 + _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; 10.212 + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? 10.213 + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; 10.214 + elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; 10.215 + } 10.216 + } 10.217 + 10.218 + } 10.219 + 10.220 + 10.221 +/*Allocates memory from the external system -- higher overhead 10.222 + * 10.223 + *Because of Linux's malloc throwing bizarre random faults when malloc is 10.224 + * used inside a VMS virtual processor, have to pass this as a request and 10.225 + * have the core loop do it when it gets around to it -- will look for these 10.226 + * chores leftover from the previous animation of masterVP the next time it 10.227 + * goes to animate the masterVP -- so it takes two separate masterVP 10.228 + * animations, separated by work, to complete an external malloc or 10.229 + * external free request. 10.230 + * 10.231 + *Thinking core loop accepts signals -- just looks if signal-location is 10.232 + * empty or not -- 10.233 + */ 10.234 +void * 10.235 +VMS__malloc_in_ext( int32 sizeRequested ) 10.236 + { 10.237 + /* 10.238 + //This is running in the master, so no chance for multiple cores to be 10.239 + // competing for the core's flag. 10.240 + if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) 10.241 + { //something has already signalled to core loop, so save the signal 10.242 + // and look, next time master animated, to see if can send it. 10.243 + //Note, the addr to put a signal is in the coreloop's frame, so just 10.244 + // checks it each time through -- make it volatile to avoid GCC 10.245 + // optimizations -- it's a coreloop local var that only changes 10.246 + // after jumping away. The signal includes the addr to send the 10.247 + //return to -- even if just empty return completion-signal 10.248 + // 10.249 + //save the signal in some queue that the master looks at each time 10.250 + // it starts up -- one loc says if empty for fast common case -- 10.251 + //something like that -- want to hide this inside this call -- but 10.252 + // think this has to come as a request -- req handler gives procr 10.253 + // back to master loop, which gives it back to req handler at point 10.254 + // it sees that core loop has sent return signal. Something like 10.255 + // that. 10.256 + saveTheSignal 10.257 + 10.258 + } 10.259 + coreSigData->type = malloc; 10.260 + coreSigData->sizeToMalloc = sizeRequested; 10.261 + coreSigData->locToSignalCompletion = &figureOut; 10.262 + _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; 10.263 + */ 10.264 + //just risk system-stack faults until get this figured out 10.265 + return malloc( sizeRequested ); 10.266 + } 10.267 + 10.268 + 10.269 +/*Frees memory that was allocated in the external system -- higher overhead 10.270 + * 10.271 + *As noted in external malloc comment, this is clunky 'cause the free has 10.272 + * to be called in the core loop. 10.273 + */ 10.274 +void 10.275 +VMS__free_in_ext( void *ptrToFree ) 10.276 + { 10.277 + //just risk system-stack faults until get this figured out 10.278 + free( ptrToFree ); 10.279 + 10.280 + //TODO: fix this -- so 10.281 + } 10.282 + 10.283 + 10.284 +/*Designed to be called from the main thread outside of VMS, during init 10.285 + */ 10.286 +MallocProlog * 10.287 +VMS_ext__create_free_list() 10.288 + { MallocProlog *freeListHead, *firstChunk; 10.289 + 10.290 + //Note, this is running in the main thread -- all increases in malloc 10.291 + // mem and all frees of it must be done in this thread, with the 10.292 + // thread's original stack available 10.293 + freeListHead = malloc( sizeof(MallocProlog) ); 10.294 + firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); 10.295 + if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} 10.296 + 10.297 + freeListHead->prevChunkInFreeList = NULL; 10.298 + //Use this addr to free the heap when cleanup 10.299 + freeListHead->nextLowerInMem = firstChunk; 10.300 + //to identify top-of-heap elem, compare this addr to elem's next higher 10.301 + freeListHead->nextHigherInMem = (void*)( (char*)firstChunk + 10.302 + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 10.303 + freeListHead->nextChunkInFreeList = firstChunk; 10.304 + 10.305 + firstChunk->nextChunkInFreeList = NULL; 10.306 + firstChunk->prevChunkInFreeList = freeListHead; 10.307 + //next Higher has to be set to top of chunk, so can calc size in malloc 10.308 + firstChunk->nextHigherInMem = (void*)( (char*)firstChunk + 10.309 + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); 10.310 + firstChunk->nextLowerInMem = NULL; //identifies as bott of heap 10.311 + 10.312 + _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet 10.313 + 10.314 + return freeListHead; 10.315 + } 10.316 + 10.317 + 10.318 +/*Designed to be called from the main thread outside of VMS, during cleanup 10.319 + */ 10.320 +void 10.321 +VMS_ext__free_free_list( MallocProlog *freeListHead ) 10.322 + { 10.323 + //stashed a ptr to the one and only bug chunk malloc'd from OS in the 10.324 + // free list head's next lower in mem pointer 10.325 + free( freeListHead->nextLowerInMem ); 10.326 + 10.327 + //don't free the head -- it'll be in an array eventually -- free whole 10.328 + // array when all the free lists linked from it have already been freed 10.329 + } 10.330 +
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/vmalloc.h Thu Nov 11 06:19:51 2010 -0800 11.3 @@ -0,0 +1,52 @@ 11.4 +/* 11.5 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 11.6 + * Licensed under GNU General Public License version 2 11.7 + * 11.8 + * Author: seanhalle@yahoo.com 11.9 + * 11.10 + * Created on November 14, 2009, 9:07 PM 11.11 + */ 11.12 + 11.13 +#include <malloc.h> 11.14 +#include "VMS_primitive_data_types.h" 11.15 + 11.16 +typedef struct _MallocProlog MallocProlog; 11.17 + 11.18 +struct _MallocProlog 11.19 + { 11.20 + MallocProlog *nextChunkInFreeList; 11.21 + MallocProlog *prevChunkInFreeList; 11.22 + MallocProlog *nextHigherInMem; 11.23 + MallocProlog *nextLowerInMem; 11.24 + }; 11.25 +//MallocProlog 11.26 + 11.27 +typedef struct 11.28 + { 11.29 + MallocProlog *firstChunkInFreeList; 11.30 + int32 numInList; 11.31 + } 11.32 +FreeListHead; 11.33 + 11.34 +void * 11.35 +VMS__malloc( int32 sizeRequested ); 11.36 + 11.37 +void 11.38 +VMS__free( void *ptrToFree ); 11.39 + 11.40 +/*Allocates memory from the external system -- higher overhead 11.41 + */ 11.42 +void * 11.43 +VMS__malloc_in_ext( int32 sizeRequested ); 11.44 + 11.45 +/*Frees memory that was allocated in the external system -- higher overhead 11.46 + */ 11.47 +void 11.48 +VMS__free_in_ext( void *ptrToFree ); 11.49 + 11.50 + 11.51 +MallocProlog * 11.52 +VMS_ext__create_free_list(); 11.53 + 11.54 +void 11.55 +VMS_ext__free_free_list( MallocProlog *freeListHead );
