# HG changeset patch # User SeanHalle # Date 1289485191 28800 # Node ID 984f7d78bfdf0b3d976557bdf8511e95fed3368b # Parent 4fbc2165e493d10b78cccfd8b8298e46cef04dd3# Parent 7b799a46cc8721572b30ffa19773b7a4b54bb582 Merge See what happens -- merged test stuff into Nov 8 VMS version diff -r 4fbc2165e493 -r 984f7d78bfdf CoreLoop.c --- a/CoreLoop.c Tue Oct 26 18:31:34 2010 -0700 +++ b/CoreLoop.c Thu Nov 11 06:19:51 2010 -0800 @@ -41,10 +41,32 @@ VMSQueueStruc *readyToAnimateQ; unsigned long coreMask; //has 1 in bit positions of allowed cores int errorCode; - + + //work-stealing struc on stack to prevent false-sharing in cache-line + volatile GateStruc gate; + //preGateProgress, waitProgress, exitProgress, gateClosed; + + coreLoopThdParams = (ThdParams *)paramsIn; thisCoresIdx = coreLoopThdParams->coreNum; + gate.gateClosed = FALSE; + gate.preGateProgress = 0; + gate.waitProgress = 0; + gate.exitProgress = 0; + _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = &gate;//race @startup + + //wait until signalled that setup is complete + pthread_mutex_lock( &suspendLock ); + while( !(_VMSMasterEnv->setupComplete) ) + { + pthread_cond_wait( &suspend_cond, + &suspendLock ); + } + pthread_mutex_unlock( &suspendLock ); + + //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); + //set thread affinity //Linux requires pinning thd to core inside thread-function //Designate a core by a 1 in bit-position corresponding to the core @@ -53,25 +75,9 @@ pthread_t selfThd = pthread_self(); errorCode = pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); - + if(errorCode){ printf("\nset affinity failure\n"); exit(0); } - //measure offsets between TSCs - //Core 0 is the reference core, the rest react to it. - if( thisCoresIdx == 0 ) measureTSCOffsetsAsCore0(); - else measureTSCOffsetsAsRemoteCore( thisCoresIdx ); - - //wait until signalled that setup is complete - pthread_mutex_lock( &suspendLock ); - while( !(_VMSMasterEnv->setupComplete) ) - { pthread_cond_wait( &suspend_cond, &suspendLock ); - } - pthread_mutex_unlock( &suspendLock ); - - - //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); - - //Save addr of "end core loop" label - jump to it to shut down coreloop //To get label addr in non-gcc compiler, can trick it by making a call @@ -88,82 +94,64 @@ // Get to work! -- virt procr jumps back here when suspends //Note, have to restore the frame-pointer before jump to here, to get - // this code to work right (readyToAnimateQ and so forth are frame-ptr - // relative) + // this code to work right (readyToAnimateQ and so forth are frame-ptr relative) CoreLoopStartPt: //Get virtual processor from queue - //_VMSWorkQ must be a global, static volatile var, so not kept in reg, + //The Q must be a global, static volatile var, so not kept in reg, // which forces reloading the pointer after each jmp to this point readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; - currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ ); + #ifdef USE_WORK_STEALING + //Alg for work-stealing designed to make common case fast. Comment + // in stealer code explains. + gate.preGateProgress++; + if( gate.gateClosed ) + { //now, set coreloop's progress, so stealer can see that core loop + // has made it into the waiting area. + gate.waitProgress = gate.preGateProgress; + while( gate.gateClosed ) /*busy wait*/; + } + + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); + + //Set the coreloop's progress, so stealer can see it has made it out + // of the protected area + gate.exitProgress = gate.preGateProgress; + #else + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); + #endif + + if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; + int tries = 0; int gotLock = 0; - while( currPr == NULL ) - { //no VPs ready to animate, so run MasterVP --later make "try Master" - // VPs & put one in every queue at strategic point -- so have work - // avail if don't get lock & short-circuit out of it if master has - // recently run on another core - //TODO: perf -- "try Master" VP that checks if should run Master Fn - //But just letting queue run empty is quickest to see if pinning VP - // to core will solve the bizarre random seg-faults in system stack. - - //check if get the MasterLock + while( currPr == NULL ) //if queue was empty, enter get masterLock loop + { //queue was empty, so get master lock gotLock = __sync_bool_compare_and_swap( &(_VMSMasterEnv->masterLock), \ - UNLOCKED, LOCKED ); - + UNLOCKED, LOCKED ); if( gotLock ) - { - //run own MasterVP -- when its done, unlocks MasterLock and - // jumps back to coreLoops's startPt + { //run own MasterVP -- jmps to coreLoops startPt when done currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; - addToHist( tries, _VMSMasterEnv->stats->masterLockHist ); + if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) + { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); + pthread_yield(); + } + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; break; //end while -- have a VP to animate now } - tries++; - - if( tries % READYTOANIMATE_RETRIES == 0 ) pthread_yield(); + tries++; //if too many, means master on other core taking too long + if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } } - //switch to virt procr's stack and frame ptr then jump to virt procr fn - void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \ - *coreLoopStackPtrAddr; - - stackPtr = currPr->stackPtr; - framePtr = currPr->framePtr; - jmpPt = currPr->nextInstrPt; - coreLoopFramePtrAddr = &(currPr->coreLoopFramePtr); - coreLoopStackPtrAddr = &(currPr->coreLoopStackPtr); - //Save the core loop's stack and frame pointers into virt procr struct - // then switch to stack ptr and frame ptr of virt procr & jmp to it - //This was a pain to get right because GCC converts the "(jmpPt)" to - // frame-relative mem-op -- so generated machine code first changed the - // frame pointer, then tried to jump to an addr stored on stack, which - // it accessed as an offset from frame-ptr! (wrong frame-ptr now) - //Explicitly loading into eax before changing frame-ptr fixed it - //Also, it turns "(currPr->coreLoopFramePtr)" into a temporary on the - // stack, so "movl %%ebp, %0" saves to the temp, NOT the data-struc! - asm volatile("movl %0, %%eax; \ - movl %%esp, (%%eax); \ - movl %1, %%eax; \ - movl %%ebp, (%%eax); \ - movl %2, %%eax; \ - movl %3, %%esp; \ - movl %4, %%ebp; \ - jmp %%eax" \ - /* outputs */ : "=g"(coreLoopStackPtrAddr), \ - "=g"(coreLoopFramePtrAddr) \ - /* inputs */ : "g" (jmpPt), "g" (stackPtr), "g" (framePtr) \ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - ); + SwitchToVP( currPr ) //=========== jmp to here when want to shut down the VMS system ========== CoreLoopEndPt: //first free shutdown VP that jumped here -- it first restores the // coreloop's stack, so addr of currPr in stack frame is still correct - VMS__handle_dissipate_reqst( currPr ); + VMS__dissipate_procr( currPr ); pthread_exit( NULL ); } @@ -195,62 +183,33 @@ _VMSMasterEnv->coreLoopStartPt = &&SeqCoreLoopStartPt; _VMSMasterEnv->coreLoopEndPt = &&SeqCoreLoopEndPt; - //Core loop has no values live upon CoreLoopStartPt except - // readyToAnimateQ + //Core loop has no values live upon CoreLoopStartPt except readyToAnimateQ // every value in the code is defined by a statement in core loop, // after the start point -- with the one exception of _VMSWorkQ // Get to work! -- virt procr jumps back here when done or suspends //Note, have to restore the frame-pointer before jump to here, to get - // this code to work right (readyToAnimateQ and so forth are frame-ptr - // relative) + // this code to work right (readyToAnimateQ and so forth are frame-ptr relative) SeqCoreLoopStartPt: //Get virtual processor from queue //_VMSWorkQ must be a global, static volatile var, so not kept in reg, // which forces reloading the pointer after each jmp to this point readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; - currPr = (VirtProcr *) readSRSWQ_NonBlocking( readyToAnimateQ ); + currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); if( currPr == NULL ) + { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) + { printf("too many back to back MasterVP\n"); exit(1); } + _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; + currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; - + } + else + _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; -// printf("core %d loop procr addr: %d\n", coreLoopThdParams->coreNum, \ -// (int)currPr ); fflush(stdin); - //switch to virt procr's stack and frame ptr then jump to virt procr - void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \ - *coreLoopStackPtrAddr; - - stackPtr = currPr->stackPtr; - framePtr = currPr->framePtr; - jmpPt = currPr->nextInstrPt; - coreLoopFramePtrAddr = &(currPr->coreLoopFramePtr); - coreLoopStackPtrAddr = &(currPr->coreLoopStackPtr); - - //Save the core loop's stack and frame pointers into virt procr struct - // then switch to stack ptr and frame ptr of virt procr & jmp to it - //This was a pain to get right because GCC converts the "(jmpPt)" to - // frame-relative mem-op -- so generated machine code first changed the - // frame pointer, then tried to jump to an addr stored on stack, which - // it accessed as an offset from frame-ptr! (wrong frame-ptr now) - //Explicitly loading into eax before changing frame-ptr fixed it - //Also, it turns "(currPr->coreLoopFramePtr)" into a temporary on the - // stack, so "movl %%ebp, %0" saves to the temp, NOT the data-struc! - asm volatile("movl %0, %%eax; \ - movl %%esp, (%%eax); \ - movl %1, %%eax; \ - movl %%ebp, (%%eax); \ - movl %2, %%eax; \ - movl %3, %%esp; \ - movl %4, %%ebp; \ - jmp %%eax" \ - /* outputs */ : "=g"(coreLoopStackPtrAddr), \ - "=g"(coreLoopFramePtrAddr) \ - /* inputs */ : "g" (jmpPt), "g" (stackPtr), "g" (framePtr) \ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - ); + SwitchToVP( currPr ) //======================================================================== //jmp to here when want to shut down the VMS system. A shutdown VP is @@ -260,7 +219,7 @@ // all the threads to die will proceed, gather the result, and // return to the calling application. SeqCoreLoopEndPt: - VMS__handle_dissipate_reqst( currPr ); //free shutdown pr, that jmpd here + VMS__dissipate_procr( currPr ); //free shutdown pr, that jmpd here return; } @@ -380,6 +339,3 @@ } - - - diff -r 4fbc2165e493 -r 984f7d78bfdf DESIGN_NOTES__VMS.txt --- a/DESIGN_NOTES__VMS.txt Tue Oct 26 18:31:34 2010 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ - -Implement VMS this way: diff -r 4fbc2165e493 -r 984f7d78bfdf MasterLoop.c --- a/MasterLoop.c Tue Oct 26 18:31:34 2010 -0700 +++ b/MasterLoop.c Thu Nov 11 06:19:51 2010 -0800 @@ -7,12 +7,19 @@ #include -#include #include #include "VMS.h" +//=========================================================================== +void inline +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, + VirtProcr *masterPr ); + +//=========================================================================== + + /*This code is animated by the virtual Master processor. * @@ -65,7 +72,7 @@ */ void masterLoop( void *initData, VirtProcr *animatingPr ) { - int slotIdx; + int32 slotIdx, numSlotsFilled; VirtProcr *schedVirtPr; SchedSlot *currSlot, **schedSlots; MasterEnv *masterEnv; @@ -75,7 +82,7 @@ RequestHandler requestHandler; void *semanticEnv; - int thisCoresIdx; + int32 thisCoresIdx; VirtProcr *masterPr; volatile VirtProcr *volatileMasterPr; @@ -110,7 +117,7 @@ masterEnv = _VMSMasterEnv; -//TODO: check that compiles so that always re-define from frame-storage + //GCC may optimize so doesn't always re-define from frame-storage masterPr = volatileMasterPr; //just to make sure after jmp thisCoresIdx = masterPr->coreAnimatedBy; readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; @@ -122,6 +129,7 @@ //Poll each slot's Done flag + numSlotsFilled = 0; for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++) { currSlot = schedSlots[ slotIdx ]; @@ -143,27 +151,21 @@ { currSlot->procrAssignedToSlot = schedVirtPr; schedVirtPr->schedSlot = currSlot; currSlot->needsProcrAssigned = FALSE; - - writeSRSWQ( schedVirtPr, readyToAnimateQ ); + numSlotsFilled += 1; + + writeVMSQ( schedVirtPr, readyToAnimateQ ); } } } + + #ifdef USE_WORK_STEALING + //If no slots filled, means no more work, look for work to steal. + if( numSlotsFilled == 0 ) + { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr ); + } + #endif - //Save stack ptr and frame, restore CoreLoop's stack and frame, - // and clear the MasterLock - //TODO: cafefully verify don't need to force saving anything to stack - // before jumping back to core loop. - void *stackPtrAddr, *framePtrAddr, *masterLockAddr; - void *jmpPt, *coreLoopFramePtr, *coreLoopStackPtr; - - stackPtrAddr = &(masterPr->stackPtr); - framePtrAddr = &(masterPr->framePtr); - masterLockAddr = &(_VMSMasterEnv->masterLock); - - jmpPt = _VMSMasterEnv->coreLoopStartPt; - coreLoopFramePtr = masterPr->coreLoopFramePtr;//need this only - coreLoopStackPtr = masterPr->coreLoopStackPtr;//shouldn't need -- safety //============================= MEASUREMENT STUFF ======================== #ifdef MEAS__TIME_MASTER @@ -172,21 +174,183 @@ #endif //======================================================================== - asm volatile("movl %0, %%eax; \ - movl %%esp, (%%eax); \ - movl %1, %%eax; \ - movl %%ebp, (%%eax); \ - movl %2, %%ebx; \ - movl %3, %%eax; \ - movl %4, %%esp; \ - movl %5, %%ebp; \ - movl $0x0, (%%ebx); \ - jmp %%eax;" \ - /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr), \ - "=g"(masterLockAddr) \ - /* inputs */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ - );//can probably make clobber list empty -- but safe for now + + masterSwitchToCoreLoop( masterPr ) } + +/*This has a race condition -- the coreloops are accessing their own queues + * at the same time that this work-stealer on a different core is trying to + */ +void inline +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, + VirtProcr *masterPr ) + { + VirtProcr *stolenPr; + int32 coreIdx, i; + VMSQueueStruc *currQ; + + stolenPr = NULL; + coreIdx = masterPr->coreAnimatedBy; + for( i = 0; i < NUM_CORES -1; i++ ) + { + if( coreIdx >= NUM_CORES -1 ) + { coreIdx = 0; + } + else + { coreIdx++; + } + currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; + if( numInVMSQ( currQ ) > 0 ) + { stolenPr = readVMSQ (currQ ); + break; + } + } + + if( stolenPr != NULL ) + { currSlot->procrAssignedToSlot = stolenPr; + stolenPr->schedSlot = currSlot; + currSlot->needsProcrAssigned = FALSE; + + writeVMSQ( stolenPr, readyToAnimateQ ); + } + } + +/*This algorithm makes the common case fast. Make the coreloop passive, + * and show its progress. Make the stealer control a gate that coreloop + * has to pass. + *To avoid interference, only one stealer at a time. Use a global + * stealer-lock. + * + *The pattern is based on a gate -- stealer shuts the gate, then monitors + * to be sure any already past make it all the way out, before starting. + *So, have a "progress" measure just before the gate, then have two after it, + * one is in a "waiting room" outside the gate, the other is at the exit. + *Then, the stealer first shuts the gate, then checks the progress measure + * outside it, then looks to see if the progress measure at the exit is the + * same. If yes, it knows the protected area is empty 'cause no other way + * to get in and the last to get in also exited. + *If the progress measure at the exit is not the same, then the stealer goes + * into a loop checking both the waiting-area and the exit progress-measures + * until one of them shows the same as the measure outside the gate. Might + * as well re-read the measure outside the gate each go around, just to be + * sure. It is guaranteed that one of the two will eventually match the one + * outside the gate. + * + *Here's an informal proof of correctness: + *The gate can be closed at any point, and have only four cases: + * 1) coreloop made it past the gate-closing but not yet past the exit + * 2) coreloop made it past the pre-gate progress update but not yet past + * the gate, + * 3) coreloop is right before the pre-gate update + * 4) coreloop is past the exit and far from the pre-gate update. + * + * Covering the cases in reverse order, + * 4) is not a problem -- stealer will read pre-gate progress, see that it + * matches exit progress, and the gate is closed, so stealer can proceed. + * 3) stealer will read pre-gate progress just after coreloop updates it.. + * so stealer goes into a loop until the coreloop causes wait-progress + * to match pre-gate progress, so then stealer can proceed + * 2) same as 3.. + * 1) stealer reads pre-gate progress, sees that it's different than exit, + * so goes into loop until exit matches pre-gate, now it knows coreloop + * is not in protected and cannot get back in, so can proceed. + * + *Implementation for the stealer: + * + *First, acquire the stealer lock -- only cores with no work to do will + * compete to steal, so not a big performance penalty having only one -- + * will rarely have multiple stealers in a system with plenty of work -- and + * in a system with little work, it doesn't matter. + * + *Note, have single-reader, single-writer pattern for all variables used to + * communicate between stealer and victims + * + *So, scan the queues of the core loops, until find non-empty. Each core + * has its own list that it scans. The list goes in order from closest to + * furthest core, so it steals first from close cores. Later can add + * taking info from the app about overlapping footprints, and scan all the + * others then choose work with the most footprint overlap with the contents + * of this core's cache. + * + *Now, have a victim want to take work from. So, shut the gate in that + * coreloop, by setting the "gate closed" var on its stack to TRUE. + *Then, read the core's pre-gate progress and compare to the core's exit + * progress. + *If same, can proceed to take work from the coreloop's queue. When done, + * write FALSE to gate closed var. + *If different, then enter a loop that reads the pre-gate progress, then + * compares to exit progress then to wait progress. When one of two + * matches, proceed. Take work from the coreloop's queue. When done, + * write FALSE to the gate closed var. + * + */ +void inline +gateProtected_stealWorkInto( SchedSlot *currSlot, + VMSQueueStruc *myReadyToAnimateQ, + VirtProcr *masterPr ) + { + VirtProcr *stolenPr; + int32 coreIdx, i, haveAVictim, gotLock; + VMSQueueStruc *victimsQ; + + volatile GateStruc *vicGate; + int32 coreMightBeInProtected; + + + + //see if any other cores have work available to steal + haveAVictim = FALSE; + coreIdx = masterPr->coreAnimatedBy; + for( i = 0; i < NUM_CORES -1; i++ ) + { + if( coreIdx >= NUM_CORES -1 ) + { coreIdx = 0; + } + else + { coreIdx++; + } + victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; + if( numInVMSQ( victimsQ ) > 0 ) + { haveAVictim = TRUE; + vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ]; + break; + } + } + if( !haveAVictim ) return; //no work to steal, exit + + //have a victim core, now get the stealer-lock + gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock), + UNLOCKED, LOCKED ); + if( !gotLock ) return; //go back to core loop, which will re-start master + + + //====== Start Gate-protection ======= + vicGate->gateClosed = TRUE; + coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress; + while( coreMightBeInProtected ) + { //wait until sure + if( vicGate->preGateProgress == vicGate->waitProgress ) + coreMightBeInProtected = FALSE; + if( vicGate->preGateProgress == vicGate->exitProgress ) + coreMightBeInProtected = FALSE; + } + + stolenPr = readVMSQ ( victimsQ ); + + vicGate->gateClosed = FALSE; + //======= End Gate-protection ======= + + + if( stolenPr != NULL ) //victim could have been in protected and taken + { currSlot->procrAssignedToSlot = stolenPr; + stolenPr->schedSlot = currSlot; + currSlot->needsProcrAssigned = FALSE; + + writeVMSQ( stolenPr, myReadyToAnimateQ ); + } + + //unlock the work stealing lock + _VMSMasterEnv->workStealingLock = UNLOCKED; + } diff -r 4fbc2165e493 -r 984f7d78bfdf SwitchAnimators.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SwitchAnimators.h Thu Nov 11 06:19:51 2010 -0800 @@ -0,0 +1,138 @@ +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + */ + +#ifndef _SwitchAnimators_H +#define _SwitchAnimators_H +#define __USE_GNU + +/*Isolating code for switching between animators within these macros -- at + * some point will make switches to compile for 32 bit or for 64 bit, which + * having these isolated will make cleaner + * + *This also makes it easier to change architectures, at some point + *And it cleans the code up, having the ugly assembly out of the way + */ + +//=========================== MasterVP to CoreLoop ========================== +// + //Save stack ptr and frame, restore CoreLoop's stack and frame, + // and clear the MasterLock + //GCC's -O3 messes with this -- go through generated -- protect somehow + // +#define masterSwitchToCoreLoop( masterPr ) \ + void *stackPtrAddr, *framePtrAddr, *masterLockAddr; \ + void *jmpPt, *coreLoopFramePtr, *coreLoopStackPtr; \ +\ + stackPtrAddr = &(masterPr->stackPtr); \ + framePtrAddr = &(masterPr->framePtr); \ + masterLockAddr = &(_VMSMasterEnv->masterLock); \ +\ + jmpPt = _VMSMasterEnv->coreLoopStartPt; \ + coreLoopFramePtr = masterPr->coreLoopFramePtr; \ + coreLoopStackPtr = masterPr->coreLoopStackPtr; \ +\ + asm volatile("movl %0, %%eax; \ + movl %%esp, (%%eax); \ + movl %1, %%eax; \ + movl %%ebp, (%%eax); \ + movl %2, %%ebx; \ + movl %3, %%eax; \ + movl %4, %%esp; \ + movl %5, %%ebp; \ + movl $0x0, (%%ebx); \ + jmp %%eax;" \ + /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr), \ + "=g"(masterLockAddr) \ + /* inputs */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\ + /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ + );//can probably make clobber list empty -- but safe for now + + +//=========================== SlaveVP to CoreLoop =========================== +// + +#define SwitchToCoreLoop( animatingPr ) \ + void *jmpPt, *stackPtrAddr, *framePtrAddr, *coreLoopStackPtr; \ + void *coreLoopFramePtr; \ +\ + stackPtrAddr = &(animatingPr->stackPtr); \ + framePtrAddr = &(animatingPr->framePtr); \ +\ + jmpPt = _VMSMasterEnv->coreLoopStartPt; \ + coreLoopFramePtr = animatingPr->coreLoopFramePtr; \ + coreLoopStackPtr = animatingPr->coreLoopStackPtr; \ +\ + /*Save the virt procr's stack and frame ptrs*/ \ + asm volatile("movl %0, %%eax; \ + movl %%esp, (%%eax); \ + movl %1, %%eax; \ + movl %%ebp, (%%eax) "\ + /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr) \ + /* inputs */ : \ + /* clobber */ : "%eax" \ + ); \ +\ + /*restore coreloop's frame ptr, then jump back to "start" of core loop*/\ + /*Note, GCC compiles to assembly that saves esp and ebp in the stack*/ \ + /* frame -- so have to explicitly do assembly that saves to memory*/ \ + asm volatile("movl %0, %%eax; \ + movl %1, %%esp; \ + movl %2, %%ebp; \ + jmp %%eax " \ + /* outputs */ : \ + /* inputs */ : "m" (jmpPt), "m"(coreLoopStackPtr), "m"(coreLoopFramePtr)\ + /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi" \ + ); + //list everything as clobbered to force GCC to save all + // live vars that are in regs on stack before this + // assembly, so that stack pointer is correct, before jmp + + + +//============================== CoreLoop to VP ============================= +// + //Save the core loop's stack and frame pointers into virt procr struct + // then switch to stack ptr and frame ptr of virt procr & jmp to it + //This was a pain to get right because GCC converts the "(jmpPt)" to + // frame-relative mem-op -- so generated machine code first changed the + // frame pointer, then tried to jump to an addr stored on stack, which + // it accessed as an offset from frame-ptr! (wrong frame-ptr now) + //Explicitly loading into eax before changing frame-ptr fixed it + //Also, it turns "(currPr->coreLoopFramePtr)" into a temporary on the + // stack, so "movl %%ebp, %0" saves to the temp, NOT the data-struc! + + + //switch to virt procr's stack and frame ptr then jump to virt procr fn + +#define SwitchToVP( currPr ) \ + void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \ + *coreLoopStackPtrAddr; \ +\ + stackPtr = currPr->stackPtr; \ + framePtr = currPr->framePtr; \ + jmpPt = currPr->nextInstrPt; \ + coreLoopFramePtrAddr = &(currPr->coreLoopFramePtr); \ + coreLoopStackPtrAddr = &(currPr->coreLoopStackPtr); \ +\ + asm volatile("movl %0, %%eax; \ + movl %%esp, (%%eax); \ + movl %1, %%eax; \ + movl %%ebp, (%%eax); \ + movl %2, %%eax; \ + movl %3, %%esp; \ + movl %4, %%ebp; \ + jmp %%eax" \ + /* outputs */ : "=g"(coreLoopStackPtrAddr), \ + "=g"(coreLoopFramePtrAddr) \ + /* inputs */ : "g" (jmpPt), "g" (stackPtr), "g" (framePtr) \ + /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \ + ); + + +#endif /* _SwitchAnimators_H */ + diff -r 4fbc2165e493 -r 984f7d78bfdf VMS.c --- a/VMS.c Tue Oct 26 18:31:34 2010 -0700 +++ b/VMS.c Thu Nov 11 06:19:51 2010 -0800 @@ -6,7 +6,9 @@ #include #include +#include #include +#include #include "VMS.h" #include "Queue_impl/BlockingQueue.h" @@ -28,6 +30,12 @@ void create_the_coreLoop_OS_threads(); +MallocProlog * +create_free_list(); + +void +endOSThreadFn( void *initData, VirtProcr *animatingPr ); + pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER; @@ -83,34 +91,43 @@ int coreIdx; VirtProcr **masterVPs; SchedSlot ***allSchedSlots; //ptr to array of ptrs - + + //Make the master env, which holds everything else _VMSMasterEnv = malloc( sizeof(MasterEnv) ); + + //Very first thing put into the master env is the free-list, seeded + // with a massive initial chunk of memory. + //After this, all other mallocs are VMS__malloc. + _VMSMasterEnv->freeListHead = VMS_ext__create_free_list(); + + //===================== Only VMS__malloc after this ==================== masterEnv = _VMSMasterEnv; - //Need to set start pt here 'cause used by seed procr, which is created - // before the first core loop starts up. -- not sure how yet.. -// masterEnv->coreLoopStartPt = ; -// masterEnv->coreLoopEndPt = ; //Make a readyToAnimateQ for each core loop - readyToAnimateQs = malloc( NUM_CORES * sizeof(VMSQueueStruc *) ); - masterVPs = malloc( NUM_CORES * sizeof(VirtProcr *) ); + readyToAnimateQs = VMS__malloc( NUM_CORES * sizeof(VMSQueueStruc *) ); + masterVPs = VMS__malloc( NUM_CORES * sizeof(VirtProcr *) ); //One array for each core, 3 in array, core's masterVP scheds all - allSchedSlots = malloc( NUM_CORES * sizeof(SchedSlot *) ); + allSchedSlots = VMS__malloc( NUM_CORES * sizeof(SchedSlot *) ); + _VMSMasterEnv->numProcrsCreated = 0; //used by create procr for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) - { - readyToAnimateQs[ coreIdx ] = makeSRSWQ(); + { + readyToAnimateQs[ coreIdx ] = makeVMSQ(); - //Q: should give masterVP core-specific into as its init data? + //Q: should give masterVP core-specific info as its init data? masterVPs[ coreIdx ] = VMS__create_procr( &masterLoop, masterEnv ); masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx; allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core + _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; + _VMSMasterEnv->workStealingGates[ coreIdx ] = NULL; } _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs; _VMSMasterEnv->masterVPs = masterVPs; + _VMSMasterEnv->masterLock = UNLOCKED; _VMSMasterEnv->allSchedSlots = allSchedSlots; + _VMSMasterEnv->workStealingLock = UNLOCKED; //============================= MEASUREMENT STUFF ======================== #ifdef MEAS__TIME_MASTER @@ -125,59 +142,33 @@ // because coreLoop now controls -- animates its masterVP when no work - //==================== malloc substitute ======================== - // - //Testing whether malloc is using thread-local storage and therefore - // causing unreliable behavior. - //Just allocate a massive chunk of memory and roll own malloc/free and - // make app use VMS__malloc_to, which will suspend and perform malloc - // in the master, taking from this massive chunk. + //============================= MEASUREMENT STUFF ======================== + #ifdef STATS__TURN_ON_PROBES + _VMSMasterEnv->dynIntervalProbesInfo = + makePrivDynArrayOfSize( &(_VMSMasterEnv->intervalProbes), 200); -// initFreeList(); + _VMSMasterEnv->probeNameHashTbl = makeHashTable( 1000, &VMS__free ); + + //put creation time directly into master env, for fast retrieval + struct timeval timeStamp; + gettimeofday( &(timeStamp), NULL); + _VMSMasterEnv->createPtInSecs = + timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0); + #endif + //======================================================================== } -/* -void -initMasterMalloc() - { - _VMSMasterEnv->mallocChunk = malloc( MASSIVE_MALLOC_SIZE ); - - //The free-list element is the first several locations of an - // allocated chunk -- the address given to the application is pre- - // pended with both the ownership structure and the free-list struc. - //So, write the values of these into the first locations of - // mallocChunk -- which marks it as free & puts in its size. - listElem = (FreeListElem *)_VMSMasterEnv->mallocChunk; - listElem->size = MASSIVE_MALLOC_SIZE - NUM_PREPEND_BYTES - listElem->next = NULL; - } - -void -dissipateMasterMalloc() - { - //Just foo code -- to get going -- doing as if free list were link-list - currElem = _VMSMasterEnv->freeList; - while( currElem != NULL ) - { - nextElem = currElem->next; - masterFree( currElem ); - currElem = nextElem; - } - free( _VMSMasterEnv->freeList ); - } - */ - SchedSlot ** create_sched_slots() { SchedSlot **schedSlots; int i; - schedSlots = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) ); + schedSlots = VMS__malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) ); for( i = 0; i < NUM_SCHED_SLOTS; i++ ) { - schedSlots[i] = malloc( sizeof(SchedSlot) ); + schedSlots[i] = VMS__malloc( sizeof(SchedSlot) ); //Set state to mean "handling requests done, slot needs filling" schedSlots[i]->workIsDone = FALSE; @@ -192,9 +183,9 @@ { int i; for( i = 0; i < NUM_SCHED_SLOTS; i++ ) { - free( schedSlots[i] ); + VMS__free( schedSlots[i] ); } - free( schedSlots ); + VMS__free( schedSlots ); } @@ -203,7 +194,7 @@ { //======================================================================== // Create the Threads - int coreIdx, retCode, i; + int coreIdx, retCode; //create the arrays used to measure TSC offsets between cores pongNums = malloc( NUM_CORES * sizeof( int ) ); @@ -227,7 +218,7 @@ //Make the threads that animate the core loops for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) - { coreLoopThdParams[coreIdx] = malloc( sizeof(ThdParams) ); + { coreLoopThdParams[coreIdx] = VMS__malloc( sizeof(ThdParams) ); coreLoopThdParams[coreIdx]->coreNum = coreIdx; retCode = @@ -235,7 +226,7 @@ thdAttrs, &coreLoop, (void *)(coreLoopThdParams[coreIdx]) ); - if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(0);} + if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);} } } @@ -307,10 +298,11 @@ * animator state to return to -- * */ -VirtProcr * -VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) - { VirtProcr *newPr; - char *stackLocs, *stackPtr; +inline VirtProcr * +create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, + void *initialData, char *stackLocs ) + { + char *stackPtr; //============================= MEASUREMENT STUFF ======================== #ifdef MEAS__TIME_MASTER @@ -318,23 +310,19 @@ saveLowTimeStampCountInto( startStamp ); #endif //======================================================================== - - newPr = malloc( sizeof(VirtProcr) ); - newPr->procrID = numProcrsCreated++; - newPr->nextInstrPt = fnPtr; - newPr->initialData = initialData; - newPr->requests = NULL; - newPr->schedSlot = NULL; -// newPr->coreLoopStartPt = _VMSMasterEnv->coreLoopStartPt; + newPr->startOfStack = stackLocs; + newPr->procrID = _VMSMasterEnv->numProcrsCreated++; + newPr->nextInstrPt = fnPtr; + newPr->initialData = initialData; + newPr->requests = NULL; + newPr->schedSlot = NULL; //fnPtr takes two params -- void *initData & void *animProcr //alloc stack locations, make stackPtr be the highest addr minus room // for 2 params + return addr. Return addr (NULL) is in loc pointed to // by stackPtr, initData at stackPtr + 4 bytes, animatingPr just above - stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); - if(stackLocs == 0) {perror("error: malloc stack"); exit(1);} - newPr->startOfStack = stackLocs; stackPtr = ( (char *)stackLocs + VIRT_PROCR_STACK_SIZE - 0x10 ); + //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp *( (int *)stackPtr + 2 ) = (int) newPr; //rightmost param -- 32bit pointer *( (int *)stackPtr + 1 ) = (int) initialData; //next param to left @@ -347,12 +335,48 @@ saveLowTimeStampCountInto( endStamp ); addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->stats->createHist ); + //============================= MEASUREMENT STUFF ======================== + #ifdef STATS__TURN_ON_PROBES + struct timeval timeStamp; + gettimeofday( &(timeStamp), NULL); + newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0) - + _VMSMasterEnv->createPtInSecs; #endif //======================================================================== - + return newPr; } +inline VirtProcr * +VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) + { VirtProcr *newPr; + char *stackLocs; + + newPr = VMS__malloc( sizeof(VirtProcr) ); + stackLocs = VMS__malloc( VIRT_PROCR_STACK_SIZE ); + if( stackLocs == 0 ) + { perror("VMS__malloc stack"); exit(1); } + + return create_procr_helper( newPr, fnPtr, initialData, stackLocs ); + } + +/* "ext" designates that it's for use outside the VMS system -- should only + * be called from main thread or other thread -- never from code animated by + * a VMS virtual processor. + */ +inline VirtProcr * +VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) + { VirtProcr *newPr; + char *stackLocs; + + newPr = malloc( sizeof(VirtProcr) ); + stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); + if( stackLocs == 0 ) + { perror("malloc stack"); exit(1); } + + return create_procr_helper( newPr, fnPtr, initialData, stackLocs ); + } + /*there is a label inside this function -- save the addr of this label in * the callingPr struc, as the pick-up point from which to start the next @@ -365,8 +389,7 @@ */ void VMS__suspend_procr( VirtProcr *animatingPr ) - { void *jmpPt, *stackPtrAddr, *framePtrAddr, *coreLoopStackPtr; - void *coreLoopFramePtr; + { //The request to master will cause this suspended virt procr to get // scheduled again at some future point -- to resume, core loop jumps @@ -376,24 +399,6 @@ //return ownership of the virt procr and sched slot to Master virt pr animatingPr->schedSlot->workIsDone = TRUE; -// coreIdx = callingPr->coreAnimatedBy; - - stackPtrAddr = &(animatingPr->stackPtr); - framePtrAddr = &(animatingPr->framePtr); - - jmpPt = _VMSMasterEnv->coreLoopStartPt; - coreLoopFramePtr = animatingPr->coreLoopFramePtr;//need this only - coreLoopStackPtr = animatingPr->coreLoopStackPtr;//safety - - //Save the virt procr's stack and frame ptrs, - asm volatile("movl %0, %%eax; \ - movl %%esp, (%%eax); \ - movl %1, %%eax; \ - movl %%ebp, (%%eax) "\ - /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr) \ - /* inputs */ : \ - /* clobber */ : "%eax" \ - ); //=========================== Measurement stuff ======================== #ifdef MEAS__TIME_STAMP_SUSP @@ -402,20 +407,10 @@ #endif //======================================================================= - //restore coreloop's frame ptr, then jump back to "start" of core loop - //Note, GCC compiles to assembly that saves esp and ebp in the stack - // frame -- so have to explicitly do assembly that saves to memory - asm volatile("movl %0, %%eax; \ - movl %1, %%esp; \ - movl %2, %%ebp; \ - jmp %%eax " \ - /* outputs */ : \ - /* inputs */ : "m" (jmpPt), "m"(coreLoopStackPtr), "m"(coreLoopFramePtr)\ - /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi" \ - ); //list everything as clobbered to force GCC to save all - // live vars that are in regs on stack before this - // assembly, so that stack pointer is correct, before jmp + SwitchToCoreLoop( animatingPr ) + + //======================================================================= ResumePt: #ifdef MEAS__TIME_STAMP_SUSP //NOTE: only take low part of count -- do sanity check when take diff @@ -427,6 +422,31 @@ +/*For this implementation of VMS, it may not make much sense to have the + * system of requests for creating a new processor done this way.. but over + * the scope of single-master, multi-master, mult-tasking, OS-implementing, + * distributed-memory, and so on, this gives VMS implementation a chance to + * do stuff before suspend, in the AppVP, and in the Master before the plugin + * is called, as well as in the lang-lib before this is called, and in the + * plugin. So, this gives both VMS and language implementations a chance to + * intercept at various points and do order-dependent stuff. + *Having a standard VMSNewPrReqData struc allows the language to create and + * free the struc, while VMS knows how to get the newPr if it wants it, and + * it lets the lang have lang-specific data related to creation transported + * to the plugin. + */ +void +VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ) + { VMSReqst req; + + req.reqType = createReq; + req.semReqData = semReqData; + req.nextReqst = reqstingPr->requests; + reqstingPr->requests = &req; + + VMS__suspend_procr( reqstingPr ); + } + /* *This adds a request to dissipate, then suspends the processor so that the @@ -450,81 +470,102 @@ * pears -- making that suspend the last thing in the virt procr's trace. */ void -VMS__dissipate_procr( VirtProcr *procrToDissipate ) +VMS__send_dissipate_req( VirtProcr *procrToDissipate ) + { VMSReqst req; + + req.reqType = dissipate; + req.nextReqst = procrToDissipate->requests; + procrToDissipate->requests = &req; + + VMS__suspend_procr( procrToDissipate ); + } + + +/* "ext" designates that it's for use outside the VMS system -- should only + * be called from main thread or other thread -- never from code animated by + * a VMS virtual processor. + * + *Use this version to dissipate VPs created outside the VMS system. + */ +void +VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ) + { + //NOTE: initialData was given to the processor, so should either have + // been alloc'd with VMS__malloc, or freed by the level above animPr. + //So, all that's left to free here is the stack and the VirtProcr struc + // itself + //Note, should not stack-allocate initial data -- no guarantee, in + // general that creating processor will outlive ones it creates. + free( procrToDissipate->startOfStack ); + free( procrToDissipate ); + } + + + +/*This call's name indicates that request is malloc'd -- so req handler + * has to free any extra requests tacked on before a send, using this. + * + * This inserts the semantic-layer's request data into standard VMS carrier + * request data-struct that is mallocd. The sem request doesn't need to + * be malloc'd if this is called inside the same call chain before the + * send of the last request is called. + * + *The request handler has to call VMS__free_VMSReq for any of these + */ +inline void +VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, + VirtProcr *callingPr ) { VMSReqst *req; - req = malloc( sizeof(VMSReqst) ); -// req->virtProcrFrom = callingPr; - req->reqType = dissipate; - req->nextReqst = procrToDissipate->requests; - procrToDissipate->requests = req; - - VMS__suspend_procr( procrToDissipate ); -} - - -/*This inserts the semantic-layer's request data into standard VMS carrier - */ -inline void -VMS__add_sem_request( void *semReqData, VirtProcr *callingPr ) - { VMSReqst *req; - - req = malloc( sizeof(VMSReqst) ); -// req->virtProcrFrom = callingPr; - req->reqType = semantic; - req->semReqData = semReqData; - req->nextReqst = callingPr->requests; + req = VMS__malloc( sizeof(VMSReqst) ); + req->reqType = semantic; + req->semReqData = semReqData; + req->nextReqst = callingPr->requests; callingPr->requests = req; } +/*This inserts the semantic-layer's request data into standard VMS carrier + * request data-struct is allocated on stack of this call & ptr to it sent + * to plugin + *Then it does suspend, to cause request to be sent. + */ +inline void +VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ) + { VMSReqst req; -/*Use this to get first request before starting request handler's loop + req.reqType = semantic; + req.semReqData = semReqData; + req.nextReqst = callingPr->requests; + callingPr->requests = &req; + + VMS__suspend_procr( callingPr ); + } + + +inline void +VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ) + { VMSReqst req; + + req.reqType = VMSSemantic; + req.semReqData = semReqData; + req.nextReqst = callingPr->requests; //gab any other preceeding + callingPr->requests = &req; + + VMS__suspend_procr( callingPr ); + } + + +/* */ VMSReqst * -VMS__take_top_request_from( VirtProcr *procrWithReq ) - { VMSReqst *req; - - req = procrWithReq->requests; - if( req == NULL ) return req; - - procrWithReq->requests = procrWithReq->requests->nextReqst; - return req; - } - -/*A subtle bug due to freeing then accessing "next" after freed caused this - * form of call to be put in -- so call this at end of request handler loop - * that iterates through the requests. - */ -VMSReqst * -VMS__free_top_and_give_next_request_from( VirtProcr *procrWithReq ) +VMS__take_next_request_out_of( VirtProcr *procrWithReq ) { VMSReqst *req; req = procrWithReq->requests; if( req == NULL ) return NULL; procrWithReq->requests = procrWithReq->requests->nextReqst; - VMS__free_request( req ); - return procrWithReq->requests; - } - - -//TODO: add a semantic-layer supplied "freer" for the semantic-data portion -// of a request -- IE call with both a virt procr and a fn-ptr to request -// freer (also maybe put sem request freer as a field in virt procr?) -//MeasVMS relies right now on this only freeing VMS layer of request -- the -// semantic portion of request is alloc'd and freed by request handler -void -VMS__free_request( VMSReqst *req ) - { - free( req ); - } - - - -inline int -VMS__isSemanticReqst( VMSReqst *req ) - { - return ( req->reqType == semantic ); + return req; } @@ -534,36 +575,52 @@ return req->semReqData; } -inline int -VMS__isDissipateReqst( VMSReqst *req ) - { - return ( req->reqType == dissipate ); - } -inline int -VMS__isCreateReqst( VMSReqst *req ) - { - return ( req->reqType == regCreated ); - } -void -VMS__send_req_to_register_new_procr(VirtProcr *newPr, VirtProcr *reqstingPr) - { VMSReqst *req; +/* This is for OS requests and VMS infrastructure requests, such as to create + * a probe -- a probe is inside the heart of VMS-core, it's not part of any + * language -- but it's also a semantic thing that's triggered from and used + * in the application.. so it crosses abstractions.. so, need some special + * pattern here for handling such requests. + * Doing this just like it were a second language sharing VMS-core. + * + * This is called from the language's request handler when it sees a request + * of type VMSSemReq + * + * TODO: Later change this, to give probes their own separate plugin & have + * VMS-core steer the request to appropriate plugin + * Do the same for OS calls -- look later at it.. + */ +void inline +VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, + ResumePrFnPtr resumePrFnPtr ) + { VMSSemReq *semReq; + IntervalProbe *newProbe; + int32 nameLen; - req = malloc( sizeof(VMSReqst) ); - req->reqType = regCreated; - req->semReqData = newPr; - req->nextReqst = reqstingPr->requests; - reqstingPr->requests = req; + semReq = req->semReqData; - VMS__suspend_procr( reqstingPr ); + newProbe = VMS__malloc( sizeof(IntervalProbe) ); + nameLen = strlen( semReq->nameStr ); + newProbe->nameStr = VMS__malloc( nameLen ); + memcpy( newProbe->nameStr, semReq->nameStr, nameLen ); + newProbe->hist = NULL; + newProbe->schedChoiceWasRecorded = FALSE; + + //This runs in masterVP, so no race-condition worries + newProbe->probeID = + addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); + + requestingPr->dataRetFromReq = newProbe; + + (*resumePrFnPtr)( requestingPr, semEnv ); } /*This must be called by the request handler plugin -- it cannot be called * from the semantic library "dissipate processor" function -- instead, the - * semantic layer has to generate a request for the plug-in to call this + * semantic layer has to generate a request, and the plug-in calls this * function. *The reason is that this frees the virtual processor's stack -- which is * still in use inside semantic library calls! @@ -579,33 +636,31 @@ * of dis-owning it. */ void -VMS__handle_dissipate_reqst( VirtProcr *animatingPr ) +VMS__dissipate_procr( VirtProcr *animatingPr ) { //dis-own all locations owned by this processor, causing to be freed // any locations that it is (was) sole owner of //TODO: implement VMS__malloc system, including "give up ownership" - //The dissipate request might still be attached, so remove and free it - VMS__free_top_and_give_next_request_from( animatingPr ); //NOTE: initialData was given to the processor, so should either have // been alloc'd with VMS__malloc, or freed by the level above animPr. //So, all that's left to free here is the stack and the VirtProcr struc // itself - free( animatingPr->startOfStack ); - free( animatingPr ); + //Note, should not stack-allocate initial data -- no guarantee, in + // general that creating processor will outlive ones it creates. + VMS__free( animatingPr->startOfStack ); + VMS__free( animatingPr ); } -//TODO: re-architect so that have clean separation between request handler +//TODO: look at architecting cleanest separation between request handler // and master loop, for dissipate, create, shutdown, and other non-semantic // requests. Issue is chain: one removes requests from AppVP, one dispatches // on type of request, and one handles each type.. but some types require // action from both request handler and master loop -- maybe just give the // request handler calls like: VMS__handle_X_request_type -void -endOSThreadFn( void *initData, VirtProcr *animatingPr ); /*This is called by the semantic layer's request handler when it decides its * time to shut down the VMS system. Calling this causes the core loop OS @@ -619,10 +674,9 @@ * masterVP any AppVPs that might still be allocated and sitting in the * semantic environment, or have been orphaned in the _VMSWorkQ. * - *NOTE: the semantic plug-in is expected to use VMS__malloc_to to get all the + *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the * locations it needs, and give ownership to masterVP. Then, they will be - * automatically freed when the masterVP is dissipated. (This happens after - * the core loop threads have all exited) + * automatically freed. * *In here,create one core-loop shut-down processor for each core loop and put * them all directly into the readyToAnimateQ. @@ -633,16 +687,16 @@ * point is it sure that all results have completed. */ void -VMS__handle_shutdown_reqst( void *dummy, VirtProcr *animatingPr ) +VMS__shutdown() { int coreIdx; VirtProcr *shutDownPr; //create the shutdown processors, one for each core loop -- put them // directly into the Q -- each core will die when gets one for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) - { + { //Note, this is running in the master shutDownPr = VMS__create_procr( &endOSThreadFn, NULL ); - writeSRSWQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] ); + writeVMSQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] ); } } @@ -681,49 +735,60 @@ } -/*This is called after the threads have shut down and control has returned - * to the semantic layer, in the entry point function in the main thread. - * It has to free anything allocated during VMS_init, and any other alloc'd - * locations that might be left over. +/*This is called from the startup & shutdown */ void -VMS__cleanup_after_shutdown() +VMS__cleanup_at_end_of_shutdown() { VMSQueueStruc **readyToAnimateQs; int coreIdx; VirtProcr **masterVPs; SchedSlot ***allSchedSlots; //ptr to array of ptrs + //All the environment data has been allocated with VMS__malloc, so just + // free its internal big-chunk and all inside it disappear. +/* readyToAnimateQs = _VMSMasterEnv->readyToAnimateQs; masterVPs = _VMSMasterEnv->masterVPs; allSchedSlots = _VMSMasterEnv->allSchedSlots; for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) { - freeSRSWQ( readyToAnimateQs[ coreIdx ] ); - - VMS__handle_dissipate_reqst( masterVPs[ coreIdx ] ); + freeVMSQ( readyToAnimateQs[ coreIdx ] ); + //master VPs were created external to VMS, so use external free + VMS__dissipate_procr( masterVPs[ coreIdx ] ); freeSchedSlots( allSchedSlots[ coreIdx ] ); } - free( _VMSMasterEnv->readyToAnimateQs ); - free( _VMSMasterEnv->masterVPs ); - free( _VMSMasterEnv->allSchedSlots ); - - free( _VMSMasterEnv ); + VMS__free( _VMSMasterEnv->readyToAnimateQs ); + VMS__free( _VMSMasterEnv->masterVPs ); + VMS__free( _VMSMasterEnv->allSchedSlots ); + + //============================= MEASUREMENT STUFF ======================== + #ifdef STATS__TURN_ON_PROBES + freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS__free_probe); + #endif + //======================================================================== +*/ + //These are the only two that use system free + VMS_ext__free_free_list( _VMSMasterEnv->freeListHead ); + free( (void *)_VMSMasterEnv ); } -//=========================================================================== +//================================ -inline TSCount getTSC() - { unsigned int low, high; - TSCount out; - saveTimeStampCountInto( low, high ); - out = high; - out = (out << 32) + low; - return out; +/*Later, improve this -- for now, just exits the application after printing + * the error message. + */ +void +VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ) + { + printf(msgStr); + fflush(stdin); + exit(1); } + diff -r 4fbc2165e493 -r 984f7d78bfdf VMS.h --- a/VMS.h Tue Oct 26 18:31:34 2010 -0700 +++ b/VMS.h Thu Nov 11 06:19:51 2010 -0800 @@ -7,22 +7,54 @@ */ #ifndef _VMS_H -#define _VMS_H +#define _VMS_H #define __USE_GNU #include "VMS_primitive_data_types.h" -#include "Queue_impl/BlockingQueue.h" +#include "Queue_impl/PrivateQueue.h" #include "Histogram/Histogram.h" +#include "DynArray/DynArray.h" +#include "Hash_impl/PrivateHash.h" +#include "vmalloc.h" + #include +#include + +//=============================== Debug =================================== +// //When SEQUENTIAL is defined, VMS does sequential exe in the main thread // It still does co-routines and all the mechanisms are the same, it just // has only a single thread and animates VPs one at a time //#define SEQUENTIAL -#define PRINT_DEBUG(msg) //printf(msg); fflush(stdin); -#define PRINT1_DEBUG(msg, param) //printf(msg, param); fflush(stdin); -#define PRINT2_DEBUG(msg, p1, p2) //printf(msg, p1, p2); fflush(stdin); +//#define USE_WORK_STEALING + + //turns on the probe-instrumentation in the application -- when not + // defined, the calls to the probe functions turn into comments +#define STATS__ENABLE_PROBES +//#define TURN_ON_DEBUG_PROBES + + //These defines turn types of bug messages on and off + // be sure debug messages are un-commented (next block of defines) +#define dbgProbes FALSE /* for issues inside probes themselves*/ +#define dbgAppFlow FALSE /* Top level flow of application code -- general*/ +#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ +#define dbgRqstHdlr FALSE /* in request handler code*/ + + //Comment or un- the substitute half to turn on/off types of debug message +#define DEBUG( bool, msg) \ +// if( bool){ printf(msg); fflush(stdin);} +#define DEBUG1( bool, msg, param) \ +// if(bool){printf(msg, param); fflush(stdin);} +#define DEBUG2( bool, msg, p1, p2) \ +// if(bool) {printf(msg, p1, p2); fflush(stdin);} + +#define ERROR(msg) printf(msg); fflush(stdin); +#define ERROR1(msg, param) printf(msg, param); fflush(stdin); +#define ERROR2(msg, p1, p2) printf(msg, p1, p2); fflush(stdin); + +//=========================== STATS ======================= //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and // compiled-in that saves the low part of the time stamp count just before @@ -33,53 +65,97 @@ #define MEAS__TIME_MASTER #define MEAS__NUM_TIMES_TO_RUN 100000 + //For code that calculates normalization-offset between TSC counts of + // different cores. #define NUM_TSC_ROUND_TRIPS 10 + +//========================= Hardware related Constants ===================== //This value is the number of hardware threads in the shared memory // machine #define NUM_CORES 4 - // balance amortizing master fixed overhead vs imbalance potential -#define NUM_SCHED_SLOTS 3 + // tradeoff amortizing master fixed overhead vs imbalance potential + // when work-stealing, can make bigger, at risk of losing cache affinity +#define NUM_SCHED_SLOTS 5 #define MIN_WORK_UNIT_CYCLES 20000 -#define READYTOANIMATE_RETRIES 10000 +#define MASTERLOCK_RETRIES 10000 - // stack -#define VIRT_PROCR_STACK_SIZE 0x10000 + // stack size in virtual processors created +#define VIRT_PROCR_STACK_SIZE 0x4000 /* 16K */ - //256M of total memory for VMS__malloc -#define MASSIVE_MALLOC_SIZE 0x10000000 + // memory for VMS__malloc +#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */ -#define NUM_PREPEND_BYTES sizeof(FreeListElem) + sizeof(ownerElem); + +//============================== #define SUCCESS 0 -#define writeVMSQ writeSRSWQ -#define readVMSQ readSRSWQ -#define makeVMSQ makeSRSWQ -#define VMSQueueStruc SRSWQueueStruc +#define writeVMSQ writePrivQ +#define readVMSQ readPrivQ +#define makeVMSQ makePrivQ +#define numInVMSQ numInPrivQ +#define VMSQueueStruc PrivQueueStruc -//#define thdAttrs NULL //For PThreads -typedef struct _SchedSlot SchedSlot; -typedef struct _VMSReqst VMSReqst; -typedef struct _VirtProcr VirtProcr; + +//=========================================================================== +typedef unsigned long long TSCount; + +typedef struct _SchedSlot SchedSlot; +typedef struct _VMSReqst VMSReqst; +typedef struct _VirtProcr VirtProcr; +typedef struct _IntervalProbe IntervalProbe; +typedef struct _GateStruc GateStruc; + typedef VirtProcr * (*SlaveScheduler) ( void *, int ); //semEnv, coreIdx typedef void (*RequestHandler) ( VirtProcr *, void * ); //prWReqst, semEnv typedef void (*VirtProcrFnPtr) ( void *, VirtProcr * ); //initData, animPr typedef void VirtProcrFn ( void *, VirtProcr * ); //initData, animPr +typedef void (*ResumePrFnPtr) ( VirtProcr *, void * ); + + +//============= Requests =========== +// + +enum VMSReqstType //avoid starting enums at 0, for debug reasons + { + semantic = 1, + createReq, + dissipate, + VMSSemantic //goes with VMSSemReqst below + }; + +struct _VMSReqst + { + enum VMSReqstType reqType;//used for dissipate and in future for IO requests + void *semReqData; + + VMSReqst *nextReqst; + }; +//VMSReqst + +enum VMSSemReqstType //These are equivalent to semantic requests, but for + { // VMS's services available directly to app, like OS + createProbe = 1, // and probe services -- like a VMS-wide built-in lang + openFile, + otherIO + }; typedef struct - { - void *endThdPt; - unsigned int coreNum; + { enum VMSSemReqstType reqType; + VirtProcr *requestingPr; + char *nameStr; //for create probe } -ThdParams; + VMSSemReq; +//==================== Core data structures =================== + struct _SchedSlot { int workIsDone; @@ -87,24 +163,6 @@ VirtProcr *procrAssignedToSlot; }; //SchedSlot - -enum ReqstType - { - semantic = 1, - dissipate, - regCreated, - IO - }; - -struct _VMSReqst - { -// VirtProcr *virtProcrFrom; - enum ReqstType reqType;//used for dissipate and in future for IO requests - void *semReqData; - - VMSReqst *nextReqst; - }; -//VMSReqst struct _VirtProcr { int procrID; //for debugging -- count up each time create @@ -123,9 +181,10 @@ SchedSlot *schedSlot; VMSReqst *requests; - void *semanticData; + void *semanticData; //this lives here for the life of VP + void *dataRetFromReq;//values returned from plugin to VP go here - //============================= MEASUREMENT STUFF ======================== + //=========== MEASUREMENT STUFF ========== #ifdef MEAS__TIME_STAMP_SUSP unsigned int preSuspTSCLow; unsigned int postSuspTSCLow; @@ -134,7 +193,8 @@ unsigned int startMasterTSCLow; unsigned int endMasterTSCLow; #endif - //======================================================================== + + float64 createPtInSecs; //have space but don't use on some configs }; //VirtProcr @@ -158,37 +218,79 @@ void *semanticEnv; void *OSEventStruc; //for future, when add I/O to BLIS + MallocProlog *freeListHead; + int32 amtOfOutstandingMem; //total currently allocated void *coreLoopStartPt;//addr to jump to to re-enter coreLoop void *coreLoopEndPt; //addr to jump to to shut down a coreLoop - int setupComplete; - int masterLock; + int32 setupComplete; + int32 masterLock; VMSStats *stats; + int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP + GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal + int32 workStealingLock; + + int32 numProcrsCreated; //gives ordering to processor creation + + //=========== MEASUREMENT STUFF ============= + IntervalProbe **intervalProbes; + PrivDynArrayInfo *dynIntervalProbesInfo; + HashTable *probeNameHashTbl; + int32 masterCreateProbeID; + float64 createPtInSecs; } MasterEnv; +//========================= Extra Stuff Data Strucs ======================= +typedef struct + { -//========================================================== + } +VMSExcp; + +struct _GateStruc + { + int32 gateClosed; + int32 preGateProgress; + int32 waitProgress; + int32 exitProgress; + }; +//GateStruc + +//======================= OS Thread related =============================== void * coreLoop( void *paramsIn ); //standard PThreads fn prototype void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype void masterLoop( void *initData, VirtProcr *masterPr ); -//===================== Global Vars =================== - +typedef struct + { + void *endThdPt; + unsigned int coreNum; + } +ThdParams; pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state ThdParams *coreLoopThdParams [ NUM_CORES ]; pthread_mutex_t suspendLock; pthread_cond_t suspend_cond; + + +//===================== Global Vars =================== + volatile MasterEnv *_VMSMasterEnv; -//========================== + + +//=========================== Function Prototypes ========================= + + +//========== Setup and shutdown ========== void VMS__init(); @@ -204,69 +306,59 @@ VirtProcr * VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); +void +VMS__dissipate_procr( VirtProcr *procrToDissipate ); + + //Use this to create processor inside entry point & other places outside + // the VMS system boundary (IE, not run in slave nor Master) VirtProcr * -VMS__create_the_shutdown_procr(); - -//========================== -inline void -VMS__add_sem_request( void *semReqData, VirtProcr *callingPr ); +VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); void -VMS__send_req_to_register_new_procr( VirtProcr *newPrToRegister, - VirtProcr *reqstingPr ); +VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ); void -VMS__free_request( VMSReqst *req ); +VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ); void -VMS__remove_and_free_top_request( VirtProcr *reqstingPr ); +VMS__shutdown(); + +void +VMS__cleanup_at_end_of_shutdown(); + + +//============== Request Related =============== + +void +VMS__suspend_procr( VirtProcr *callingPr ); + +inline void +VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr ); + +inline void +VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ); + +void +VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ); + +void inline +VMS__send_dissipate_req( VirtProcr *prToDissipate ); + +inline void +VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ); VMSReqst * -VMS__take_top_request_from( VirtProcr *reqstingPr ); - -VMSReqst * -VMS__free_top_and_give_next_request_from( VirtProcr *procrWithReq ); +VMS__take_next_request_out_of( VirtProcr *procrWithReq ); inline void * VMS__take_sem_reqst_from( VMSReqst *req ); -inline int -VMS__isSemanticReqst( VMSReqst *req ); - -inline int -VMS__isDissipateReqst( VMSReqst *req ); - -inline int -VMS__isCreateReqst( VMSReqst *req ); - -//========================== - -void -VMS__suspend_procr( VirtProcr *callingPr ); - -void -VMS__dissipate_procr( VirtProcr *prToDissipate ); - -void -VMS__handle_dissipate_reqst( VirtProcr *procrToDissipate ); - -void -VMS__cleanup_after_shutdown(); - -//========================== -void -measureTSCOffsetsAsCore0(); - -void -measureTSCOffsetsAsRemoteCore( int coreIdx ); - -//============================= Statistics ================================== - -typedef unsigned long long TSCount; - //Frequency of TS counts //TODO: change freq for each machine #define TSCOUNT_FREQ 3180000000 +//======================== STATS ====================== + +//===== RDTSC wrapper ===== #define saveTimeStampCountInto(low, high) \ asm volatile("RDTSC; \ @@ -284,10 +376,12 @@ /* inputs */ : \ /* clobber */ : "%eax", "%edx" \ ); +//===== -inline TSCount getTSC(); +#include "SwitchAnimators.h" +#include "probes.h" -inline TSCount getTSC(); + //===================== Debug ========================== int numProcrsCreated; @@ -298,4 +392,3 @@ TSCount *pingTimes; #endif /* _VMS_H */ - diff -r 4fbc2165e493 -r 984f7d78bfdf VMS__DESIGN_NOTES.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VMS__DESIGN_NOTES.txt Thu Nov 11 06:19:51 2010 -0800 @@ -0,0 +1,2 @@ + +Implement VMS this way: diff -r 4fbc2165e493 -r 984f7d78bfdf probes.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/probes.c Thu Nov 11 06:19:51 2010 -0800 @@ -0,0 +1,354 @@ +/* + * Copyright 2010 OpenSourceStewardshipFoundation + * + * Licensed under BSD + */ + +#include +#include +#include +#include +#include + +#include "VMS.h" +#include "Queue_impl/BlockingQueue.h" +#include "Histogram/Histogram.h" + + +//================================ STATS ==================================== + +inline TSCount getTSCount() + { unsigned int low, high; + TSCount out; + + saveTimeStampCountInto( low, high ); + out = high; + out = (out << 32) + low; + return out; + } + + + +//==================== Probes ================= +#ifdef STATS__USE_TSC_PROBES + +int32 +VMS__create_histogram_probe( int32 numBins, float32 startValue, + float32 binWidth, char *nameStr ) + { IntervalProbe *newProbe; + int32 idx; + FloatHist *hist; + + idx = VMS__create_single_interval_probe( nameStr ); + newProbe = _VMSMasterEnv->intervalProbes[ idx ]; + + hist = makeFloatHistogram( numBins, startValue, binWidth ); + newProbe->hist = hist; + return idx; + } + +void +VMS_impl__record_interval_start_in_probe( int32 probeID ) + { IntervalProbe *probe; + + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + probe->startStamp = getTSCount(); + } + +void +VMS_impl__record_interval_end_in_probe( int32 probeID ) + { IntervalProbe *probe; + TSCount endStamp; + + endStamp = getTSCount(); + + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + probe->endStamp = endStamp; + + if( probe->hist != NULL ) + { TSCount interval = probe->endStamp - probe->startStamp; + //if the interval is sane, then add to histogram + if( interval < probe->hist->endOfRange * 10 ) + addToFloatHist( interval, probe->hist ); + } + } + +void +VMS_impl__print_stats_of_probe( int32 probeID ) + { IntervalProbe *probe; + + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + + if( probe->hist == NULL ) + { + printf("probe: %s, interval: %.6lf\n", probe->nameStr,probe->interval); + } + + else + { + printf( "probe: %s\n", probe->nameStr ); + printFloatHist( probe->hist ); + } + } +#else + +/* + * In practice, probe operations are called from the app, from inside slaves + * -- so have to be sure each probe is single-VP owned, and be sure that + * any place common structures are modified it's done inside the master. + * So -- the only place common structures are modified is during creation. + * after that, all mods are to individual instances. + * + * Thniking perhaps should change the semantics to be that probes are + * attached to the virtual processor -- and then everything is guaranteed + * to be isolated -- except then can't take any intervals that span VPs, + * and would have to transfer the probes to Master env when VP dissipates.. + * gets messy.. + * + * For now, just making so that probe creation causes a suspend, so that + * the dynamic array in the master env is only modified from the master + * + */ +IntervalProbe * +create_generic_probe( char *nameStr, VirtProcr *animPr ) + { IntervalProbe *newProbe; + VMSSemReq reqData; + + reqData.reqType = createProbe; + reqData.nameStr = nameStr; + + VMS__send_VMSSem_request( &reqData, animPr ); + + return animPr->dataRetFromReq; + } + +/*Use this version from outside VMS -- it uses external malloc, and modifies + * dynamic array, so can't be animated in a slave VP + */ +IntervalProbe * +ext__create_generic_probe( char *nameStr ) + { IntervalProbe *newProbe; + int32 nameLen; + + newProbe = malloc( sizeof(IntervalProbe) ); + nameLen = strlen( nameStr ); + newProbe->nameStr = malloc( nameLen ); + memcpy( newProbe->nameStr, nameStr, nameLen ); + newProbe->hist = NULL; + newProbe->schedChoiceWasRecorded = FALSE; + newProbe->probeID = + addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); + + return newProbe; + } + + +/*Only call from inside master or main startup/shutdown thread + */ +void +VMS_impl__free_probe( IntervalProbe *probe ) + { if( probe->hist != NULL ) freeDblHist( probe->hist ); + if( probe->nameStr != NULL) VMS__free( probe->nameStr ); + VMS__free( probe ); + } + + +int32 +VMS_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr) + { IntervalProbe *newProbe; + struct timeval *startStamp; + float64 startSecs; + + newProbe = create_generic_probe( nameStr, animPr ); + newProbe->endSecs = 0; + + gettimeofday( &(newProbe->startStamp), NULL); + + //turn into a double + startStamp = &(newProbe->startStamp); + startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); + newProbe->startSecs = startSecs; + + return newProbe->probeID; + } + +int32 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ) + { IntervalProbe *newProbe; + struct timeval *startStamp; + float64 startSecs; + + newProbe = ext__create_generic_probe( nameStr ); + newProbe->endSecs = 0; + + gettimeofday( &(newProbe->startStamp), NULL); + + //turn into a double + startStamp = &(newProbe->startStamp); + startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); + newProbe->startSecs = startSecs; + + return newProbe->probeID; + } + +int32 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ) + { IntervalProbe *newProbe; + + newProbe = create_generic_probe( nameStr, animPr ); + + return newProbe->probeID; + } + +int32 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, + float64 binWidth, char *nameStr, VirtProcr *animPr ) + { IntervalProbe *newProbe; + DblHist *hist; + + newProbe = create_generic_probe( nameStr, animPr ); + + hist = makeDblHistogram( numBins, startValue, binWidth ); + newProbe->hist = hist; + return newProbe->probeID; + } + +void +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ) + { IntervalProbe *probe; + + //TODO: fix this To be in Master -- race condition + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + + addValueIntoTable(probe->nameStr, probe, _VMSMasterEnv->probeNameHashTbl); + } + +IntervalProbe * +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ) + { + //TODO: fix this To be in Master -- race condition + return getValueFromTable( probeName, _VMSMasterEnv->probeNameHashTbl ); + } + + +/*Everything is local to the animating procr, so no need for request, do + * work locally, in the anim Pr + */ +void +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animatingPr ) + { IntervalProbe *probe; + + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + probe->schedChoiceWasRecorded = TRUE; + probe->coreNum = animatingPr->coreAnimatedBy; + probe->procrID = animatingPr->procrID; + probe->procrCreateSecs = animatingPr->createPtInSecs; + } + +/*Everything is local to the animating procr, so no need for request, do + * work locally, in the anim Pr + */ +void +VMS_impl__record_interval_start_in_probe( int32 probeID ) + { IntervalProbe *probe; + + DEBUG( dbgProbes, "record start of interval\n" ) + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + gettimeofday( &(probe->startStamp), NULL ); + } + + +/*Everything is local to the animating procr, so no need for request, do + * work locally, in the anim Pr + */ +void +VMS_impl__record_interval_end_in_probe( int32 probeID ) + { IntervalProbe *probe; + struct timeval *endStamp, *startStamp; + float64 startSecs, endSecs; + + DEBUG( dbgProbes, "record end of interval\n" ) + //possible seg-fault if array resized by diff core right after this + // one gets probe..? Something like that? Might be safe.. don't care + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + gettimeofday( &(probe->endStamp), NULL); + + //now turn into an interval held in a double + startStamp = &(probe->startStamp); + endStamp = &(probe->endStamp); + + startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); + endSecs = endStamp->tv_sec + ( endStamp->tv_usec / 1000000.0 ); + + probe->interval = endSecs - startSecs; + probe->startSecs = startSecs; + probe->endSecs = endSecs; + + if( probe->hist != NULL ) + { + //if the interval is sane, then add to histogram + if( probe->interval < probe->hist->endOfRange * 10 ) + addToDblHist( probe->interval, probe->hist ); + } + } + +void +print_probe_helper( IntervalProbe *probe ) + { + printf( "\nprobe: %s, ", probe->nameStr ); + + if( probe->schedChoiceWasRecorded ) + { printf( "coreNum: %d, procrID: %d, procrCreated: %.6lf | ", + probe->coreNum, probe->procrID, probe->procrCreateSecs ); + } + + if( probe->endSecs == 0 ) //just a single point in time + { + printf( " time point: %.6lf\n", + probe->startSecs - _VMSMasterEnv->createPtInSecs ); + } + else if( probe->hist == NULL ) //just an interval + { + printf( " startSecs: %.6lf, interval: %.6lf\n", + probe->startSecs - _VMSMasterEnv->createPtInSecs, probe->interval); + } + else //a full histogram of intervals + { + printDblHist( probe->hist ); + } + } + +//TODO: change so pass around pointer to probe instead of its array-index.. +// will eliminate chance for timing of resize to cause problems with the +// lookup -- even though don't think it actually can cause problems.. +// there's no need to pass index around -- have hash table for names, and +// only need it once, then have ptr to probe.. the thing about enum the +// index and use that as name is clunky in practice -- just hash. +void +VMS_impl__print_stats_of_probe( int32 probeID ) + { IntervalProbe *probe; + + probe = _VMSMasterEnv->intervalProbes[ probeID ]; + + print_probe_helper( probe ); + } + + + +void +generic_print_probe( void *_probe ) + { IntervalProbe *probe; + + probe = (IntervalProbe *)_probe; + print_probe_helper( probe ); + } + +void +VMS_impl__print_stats_of_all_probes() + { IntervalProbe *probe; + + forAllInDynArrayDo( _VMSMasterEnv->dynIntervalProbesInfo, + &generic_print_probe ); + fflush( stdout ); + } +#endif diff -r 4fbc2165e493 -r 984f7d78bfdf probes.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/probes.h Thu Nov 11 06:19:51 2010 -0800 @@ -0,0 +1,194 @@ +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + */ + +#ifndef _PROBES_H +#define _PROBES_H +#define __USE_GNU + +#include "VMS_primitive_data_types.h" + +#include + + + //when STATS__TURN_ON_PROBES is defined allows using probes to measure + // time intervals. The probes are macros that only compile to something + // when STATS__TURN_ON_PROBES is defined. The probes are saved in the + // master env -- but only when this is defined. + //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday +#define STATS__TURN_ON_PROBES +//#define STATS__USE_TSC_PROBES +#define STATS__USE_DBL_PROBES + +//typedef struct _IntervalProbe IntervalProbe; //in VMS.h + +struct _IntervalProbe + { + char *nameStr; + int32 probeID; + + int32 schedChoiceWasRecorded; + int32 coreNum; + int32 procrID; + float64 procrCreateSecs; + + #ifdef STATS__USE_TSC_PROBES + TSCount startStamp; + TSCount endStamp; + #else + struct timeval startStamp; + struct timeval endStamp; + #endif + float64 startSecs; + float64 endSecs; + float64 interval; + DblHist *hist;//if NULL, then is single interval probe + }; + + +//============================= Statistics ================================== + + //Frequency of TS counts + //TODO: change freq for each machine +#define TSCOUNT_FREQ 3180000000 + +inline TSCount getTSCount(); + + +//======================== Probes ============================= +// +// Use macros to allow turning probes off with a #define switch +#ifdef STATS__ENABLE_PROBES +int32 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ + VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) + +int32 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ + VMS_ext_impl__record_time_point_into_new_probe( nameStr ) + + +int32 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); +#define VMS__create_single_interval_probe( nameStr, animPr ) \ + VMS_impl__create_single_interval_probe( nameStr, animPr ) + + +int32 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, + float64 binWidth, char *nameStr, VirtProcr *animPr ); +#define VMS__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animPr ) \ + VMS_impl__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animPr ) +void +VMS_impl__free_probe( IntervalProbe *probe ); +#define VMS__free_probe( probe ) \ + VMS_impl__free_probe( probe ) + +void +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); +#define VMS__index_probe_by_its_name( probeID, animPr ) \ + VMS_impl__index_probe_by_its_name( probeID, animPr ) + +IntervalProbe * +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); +#define VMS__get_probe_by_name( probeID, animPr ) \ + VMS_impl__get_probe_by_name( probeName, animPr ) + +void +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ + VMS_impl__record_sched_choice_into_probe( probeID, animPr ) + +void +VMS_impl__record_interval_start_in_probe( int32 probeID ); +#define VMS__record_interval_start_in_probe( probeID ) \ + VMS_impl__record_interval_start_in_probe( probeID ) + +void +VMS_impl__record_interval_end_in_probe( int32 probeID ); +#define VMS__record_interval_end_in_probe( probeID ) \ + VMS_impl__record_interval_end_in_probe( probeID ) + +void +VMS_impl__print_stats_of_probe( int32 probeID ); +#define VMS__print_stats_of_probe( probeID ) \ + VMS_impl__print_stats_of_probe( probeID ) + +void +VMS_impl__print_stats_of_all_probes(); +#define VMS__print_stats_of_all_probes \ + VMS_impl__print_stats_of_all_probes + + +#else +int32 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ + 0 /* do nothing */ + +int32 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr); +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ + 0 /* do nothing */ + + +int32 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); +#define VMS__create_single_interval_probe( nameStr, animPr ) \ + 0 /* do nothing */ + + +int32 +VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, + float64 binWidth, char *nameStr, VirtProcr *animPr ); +#define VMS__create_histogram_probe( numBins, startValue, \ + binWidth, nameStr, animPr ) \ + 0 /* do nothing */ + +void +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); +#define VMS__index_probe_by_its_name( probeID, animPr ) \ + /* do nothing */ + +IntervalProbe * +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); +#define VMS__get_probe_by_name( probeID, animPr ) \ + NULL /* do nothing */ + +void +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ + /* do nothing */ + +void +VMS_impl__record_interval_start_in_probe( int32 probeID ); +#define VMS__record_interval_start_in_probe( probeID ) \ + /* do nothing */ + +void +VMS_impl__record_interval_end_in_probe( int32 probeID ); +#define VMS__record_interval_end_in_probe( probeID ) \ + /* do nothing */ + +void +VMS_impl__print_stats_of_probe( int32 probeID ); +#define VMS__print_stats_of_probe( probeID ) \ + /* do nothing */ + +void +VMS_impl__print_stats_of_all_probes(); +#define VMS__print_stats_of_all_probes \ + /* do nothing */ + +#endif /* defined STATS__ENABLE_PROBES */ + +#endif /* _PROBES_H */ + diff -r 4fbc2165e493 -r 984f7d78bfdf vmalloc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vmalloc.c Thu Nov 11 06:19:51 2010 -0800 @@ -0,0 +1,327 @@ +/* + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + * Created on November 14, 2009, 9:07 PM + */ + +#include +#include + +#include "VMS.h" + +/*Helper function + *Insert a newly generated free chunk into the first spot on the free list. + * The chunk is cast as a MallocProlog, so the various pointers in it are + * accessed with C's help -- and the size of the prolog is easily added to + * the pointer when a chunk is returned to the app -- so C handles changes + * in pointer sizes among machines. + * + *The list head is a normal MallocProlog struct -- identified by its + * prevChunkInFreeList being NULL -- the only one. + * + *The end of the list is identified by next chunk being NULL, as usual. + */ +void inline +add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) + { + chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; + if( chunk->nextChunkInFreeList != NULL ) //if not last in free list + chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; + chunk->prevChunkInFreeList = listHead; + listHead->nextChunkInFreeList = chunk; + } + + +/*This is sequential code, meant to only be called from the Master, not from + * any slave VPs. + *Search down list, checking size by the nextHigherInMem pointer, to find + * first chunk bigger than size needed. + *Shave off the extra and make it into a new free-list element, hook it in + * then return the address of the found element plus size of prolog. + * + *Will find a + */ +void * +VMS__malloc( int32 sizeRequested ) + { MallocProlog *foundElem = NULL, *currElem, *newElem; + int32 amountExtra, foundElemIsTopOfHeap, sizeConsumed,sizeOfFound; + + //step up the size to be aligned at 16-byte boundary, prob better ways + sizeRequested = ((sizeRequested + 16) >> 4) << 4; + currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; + + while( currElem != NULL ) + { //check if size of currElem is big enough + sizeOfFound=(int32)((char*)currElem->nextHigherInMem -(char*)currElem); + amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); + if( amountExtra > 0 ) + { //found it, get out of loop + foundElem = currElem; + currElem = NULL; + } + else + currElem = currElem->nextChunkInFreeList; + } + + if( foundElem == NULL ) + { ERROR("\nmalloc failed\n") + return (void *)NULL; //indicates malloc failed + } + //Using a kludge to identify the element that is the top chunk in the + // heap -- saving top-of-heap addr in head's nextHigherInMem -- and + // save addr of start of heap in head's nextLowerInMem + //Will handle top of Heap specially + foundElemIsTopOfHeap = foundElem->nextHigherInMem == + _VMSMasterEnv->freeListHead->nextHigherInMem; + + //before shave off and try to insert new elem, remove found elem + //note, foundElem will never be the head, so always has valid prevChunk + foundElem->prevChunkInFreeList->nextChunkInFreeList = + foundElem->nextChunkInFreeList; + if( foundElem->nextChunkInFreeList != NULL ) + { foundElem->nextChunkInFreeList->prevChunkInFreeList = + foundElem->prevChunkInFreeList; + } + foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated + + //if enough, turn extra into new elem & insert it + if( amountExtra > 64 ) + { //make new elem by adding to addr of curr elem then casting + sizeConsumed = sizeof(MallocProlog) + sizeRequested; + newElem = (MallocProlog *)( (char *)foundElem + sizeConsumed ); + newElem->nextHigherInMem = foundElem->nextHigherInMem; + newElem->nextLowerInMem = foundElem; + foundElem->nextHigherInMem = newElem; + + if( ! foundElemIsTopOfHeap ) + { //there is no next higher for top of heap, so can't write to it + newElem->nextHigherInMem->nextLowerInMem = newElem; + } + add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); + } + else + { + sizeConsumed = sizeOfFound; + } + _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; + + //skip over the prolog by adding its size to the pointer return + return (void *)((char *)foundElem + sizeof(MallocProlog)); + } + + +/*This is sequential code -- only to be called from the Master + * When free, subtract the size of prolog from pointer, then cast it to a + * MallocProlog. Then check the nextLower and nextHigher chunks to see if + * one or both are also free, and coalesce if so, and if neither free, then + * add this one to free-list. + */ +void +VMS__free( void *ptrToFree ) + { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; + int32 lowerExistsAndIsFree, higherExistsAndIsFree, sizeOfElem; + + if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || + ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) + { //outside the range of data owned by VMS's malloc, so do nothing + return; + } + //subtract size of prolog to get pointer to prolog, then cast + elemToFree = (MallocProlog *)((char *)ptrToFree - sizeof(MallocProlog)); + sizeOfElem =(int32)((char*)elemToFree->nextHigherInMem-(char*)elemToFree); + + if( elemToFree->prevChunkInFreeList != NULL ) + { printf( "error: freeing same element twice!" ); exit(1); + } + + _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; + + nextLowerElem = elemToFree->nextLowerInMem; + nextHigherElem = elemToFree->nextHigherInMem; + + if( nextHigherElem == NULL ) + higherExistsAndIsFree = FALSE; + else //okay exists, now check if in the free-list by checking back ptr + higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); + + if( nextLowerElem == NULL ) + lowerExistsAndIsFree = FALSE; + else //okay, it exists, now check if it's free + lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); + + + //now, know what exists and what's free + if( lowerExistsAndIsFree ) + { if( higherExistsAndIsFree ) + { //both exist and are free, so coalesce all three + //First, remove higher from free-list + nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = + nextHigherElem->nextChunkInFreeList; + if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? + nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = + nextHigherElem->prevChunkInFreeList; + //Now, fix-up sequence-in-mem list -- by side-effect, this also + // changes size of the lower elem, which is still in free-list + nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; + if( nextHigherElem->nextHigherInMem != + _VMSMasterEnv->freeListHead->nextHigherInMem ) + nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; + //notice didn't do anything to elemToFree -- it simply is no + // longer reachable from any of the lists. Wonder if could be a + // security leak because left valid addresses in it, + // but don't care for now. + } + else + { //lower is the only of the two that exists and is free, + //In this case, no adjustment to free-list, just change mem-list. + // By side-effect, changes size of the lower elem + nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; + if( elemToFree->nextHigherInMem != + _VMSMasterEnv->freeListHead->nextHigherInMem ) + elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; + } + } + else + { //lower either doesn't exist or isn't free, so check higher + if( higherExistsAndIsFree ) + { //higher exists and is the only of the two free + //First, in free-list, replace higher elem with the one to free + elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; + elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; + elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; + //Now chg mem-list. By side-effect, changes size of elemToFree + elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; + if( elemToFree->nextHigherInMem != + _VMSMasterEnv->freeListHead->nextHigherInMem ) + elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; + } + else + { //neither lower nor higher is availabe to coalesce so add to list + // this makes prev chunk ptr non-null, which indicates it's free + elemToFree->nextChunkInFreeList = + _VMSMasterEnv->freeListHead->nextChunkInFreeList; + _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; + if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? + elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; + elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; + } + } + + } + + +/*Allocates memory from the external system -- higher overhead + * + *Because of Linux's malloc throwing bizarre random faults when malloc is + * used inside a VMS virtual processor, have to pass this as a request and + * have the core loop do it when it gets around to it -- will look for these + * chores leftover from the previous animation of masterVP the next time it + * goes to animate the masterVP -- so it takes two separate masterVP + * animations, separated by work, to complete an external malloc or + * external free request. + * + *Thinking core loop accepts signals -- just looks if signal-location is + * empty or not -- + */ +void * +VMS__malloc_in_ext( int32 sizeRequested ) + { + /* + //This is running in the master, so no chance for multiple cores to be + // competing for the core's flag. + if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) + { //something has already signalled to core loop, so save the signal + // and look, next time master animated, to see if can send it. + //Note, the addr to put a signal is in the coreloop's frame, so just + // checks it each time through -- make it volatile to avoid GCC + // optimizations -- it's a coreloop local var that only changes + // after jumping away. The signal includes the addr to send the + //return to -- even if just empty return completion-signal + // + //save the signal in some queue that the master looks at each time + // it starts up -- one loc says if empty for fast common case -- + //something like that -- want to hide this inside this call -- but + // think this has to come as a request -- req handler gives procr + // back to master loop, which gives it back to req handler at point + // it sees that core loop has sent return signal. Something like + // that. + saveTheSignal + + } + coreSigData->type = malloc; + coreSigData->sizeToMalloc = sizeRequested; + coreSigData->locToSignalCompletion = &figureOut; + _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; + */ + //just risk system-stack faults until get this figured out + return malloc( sizeRequested ); + } + + +/*Frees memory that was allocated in the external system -- higher overhead + * + *As noted in external malloc comment, this is clunky 'cause the free has + * to be called in the core loop. + */ +void +VMS__free_in_ext( void *ptrToFree ) + { + //just risk system-stack faults until get this figured out + free( ptrToFree ); + + //TODO: fix this -- so + } + + +/*Designed to be called from the main thread outside of VMS, during init + */ +MallocProlog * +VMS_ext__create_free_list() + { MallocProlog *freeListHead, *firstChunk; + + //Note, this is running in the main thread -- all increases in malloc + // mem and all frees of it must be done in this thread, with the + // thread's original stack available + freeListHead = malloc( sizeof(MallocProlog) ); + firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); + if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} + + freeListHead->prevChunkInFreeList = NULL; + //Use this addr to free the heap when cleanup + freeListHead->nextLowerInMem = firstChunk; + //to identify top-of-heap elem, compare this addr to elem's next higher + freeListHead->nextHigherInMem = (void*)( (char*)firstChunk + + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); + freeListHead->nextChunkInFreeList = firstChunk; + + firstChunk->nextChunkInFreeList = NULL; + firstChunk->prevChunkInFreeList = freeListHead; + //next Higher has to be set to top of chunk, so can calc size in malloc + firstChunk->nextHigherInMem = (void*)( (char*)firstChunk + + MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); + firstChunk->nextLowerInMem = NULL; //identifies as bott of heap + + _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet + + return freeListHead; + } + + +/*Designed to be called from the main thread outside of VMS, during cleanup + */ +void +VMS_ext__free_free_list( MallocProlog *freeListHead ) + { + //stashed a ptr to the one and only bug chunk malloc'd from OS in the + // free list head's next lower in mem pointer + free( freeListHead->nextLowerInMem ); + + //don't free the head -- it'll be in an array eventually -- free whole + // array when all the free lists linked from it have already been freed + } + diff -r 4fbc2165e493 -r 984f7d78bfdf vmalloc.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vmalloc.h Thu Nov 11 06:19:51 2010 -0800 @@ -0,0 +1,52 @@ +/* + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + * Created on November 14, 2009, 9:07 PM + */ + +#include +#include "VMS_primitive_data_types.h" + +typedef struct _MallocProlog MallocProlog; + +struct _MallocProlog + { + MallocProlog *nextChunkInFreeList; + MallocProlog *prevChunkInFreeList; + MallocProlog *nextHigherInMem; + MallocProlog *nextLowerInMem; + }; +//MallocProlog + +typedef struct + { + MallocProlog *firstChunkInFreeList; + int32 numInList; + } +FreeListHead; + +void * +VMS__malloc( int32 sizeRequested ); + +void +VMS__free( void *ptrToFree ); + +/*Allocates memory from the external system -- higher overhead + */ +void * +VMS__malloc_in_ext( int32 sizeRequested ); + +/*Frees memory that was allocated in the external system -- higher overhead + */ +void +VMS__free_in_ext( void *ptrToFree ); + + +MallocProlog * +VMS_ext__create_free_list(); + +void +VMS_ext__free_free_list( MallocProlog *freeListHead );