# HG changeset patch # User Me@portablequad # Date 1329025645 28800 # Node ID 651ee45615ae3692f7b0e077bd81c8f6ab0eade9 # Parent 7cff4e13d5c423299ec9e8ea2d687a90f75f8a50 made default brch deprecated (see README) diff -r 7cff4e13d5c4 -r 651ee45615ae .hgignore --- a/.hgignore Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -syntax: glob - -*.o diff -r 7cff4e13d5c4 -r 651ee45615ae .hgtags --- a/.hgtags Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -9c3107044f86c36fea3a8f72f64910b1363555be Dec27_2010_about_to_add_sched_record diff -r 7cff4e13d5c4 -r 651ee45615ae CoreLoop.c --- a/CoreLoop.c Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,215 +0,0 @@ -/* - * Copyright 2010 OpenSourceStewardshipFoundation - * - * Licensed under BSD - */ - - -#include "VMS.h" -#include "Queue_impl/BlockingQueue.h" -#include "ProcrContext.h" - -#include -#include -#include - -#include -#include - -void *terminateCoreLoop(VirtProcr *currPr); - -/*This is the loop that runs in the OS Thread pinned to each core - *Get virt procr from queue, - * save state of current animator, then load in state of virt procr, using - * jmp instr to switch the program-counter state -- making the virt procr - * the new animator. - *At some point, the virt procr will suspend itself by saving out its - * animator state (stack ptr, frame ptr, program counter) and switching - * back to the OS Thread's animator state, which means restoring the - * stack and frame and jumping to the core loop start point. - *This cycle then repeats, until a special shutdown virtual processor is - * animated, which jumps to the end point at the bottom of core loop. - */ -void * -coreLoop( void *paramsIn ) - { - ThdParams *coreLoopThdParams; - int thisCoresIdx; - VirtProcr *currPr; - VMSQueueStruc *readyToAnimateQ; - cpu_set_t coreMask; //has 1 in bit positions of allowed cores - int errorCode; - - //work-stealing struc on stack to prevent false-sharing in cache-line - volatile GateStruc gate; - //preGateProgress, waitProgress, exitProgress, gateClosed; - - - coreLoopThdParams = (ThdParams *)paramsIn; - thisCoresIdx = coreLoopThdParams->coreNum; - - gate.gateClosed = FALSE; - gate.preGateProgress = 0; - gate.waitProgress = 0; - gate.exitProgress = 0; - _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup - - //wait until signalled that setup is complete - pthread_mutex_lock( &suspendLock ); - while( !(_VMSMasterEnv->setupComplete) ) - { - pthread_cond_wait( &suspend_cond, - &suspendLock ); - } - pthread_mutex_unlock( &suspendLock ); - - //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); - - //set thread affinity - //Linux requires pinning thd to core inside thread-function - //Designate a core by a 1 in bit-position corresponding to the core - CPU_ZERO(&coreMask); - CPU_SET(coreLoopThdParams->coreNum,&coreMask); - //coreMask = 1L << coreLoopThdParams->coreNum; - - pthread_t selfThd = pthread_self(); - errorCode = - pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); - - if(errorCode){ printf("\nset affinity failure\n"); exit(0); } - - - //Save the return address in the SwitchVP function - saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt)); - - - while(1){ - - //Get virtual processor from queue - //The Q must be a global, static volatile var, so not kept in reg, - // which forces reloading the pointer after each jmp to this point - readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; - - #ifdef USE_WORK_STEALING - //Alg for work-stealing designed to make common case fast. Comment - // in stealer code explains. - gate.preGateProgress++; - if( gate.gateClosed ) - { //now, set coreloop's progress, so stealer can see that core loop - // has made it into the waiting area. - gate.waitProgress = gate.preGateProgress; - while( gate.gateClosed ) /*busy wait*/; - } - - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); - - //Set the coreloop's progress, so stealer can see it has made it out - // of the protected area - gate.exitProgress = gate.preGateProgress; - #else - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); - #endif - - if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; - else - { - //============================= MEASUREMENT STUFF ===================== - #ifdef MEAS__TIME_MASTER_LOCK - int32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //===================================================================== - int tries = 0; int gotLock = 0; - while( currPr == NULL ) //if queue was empty, enter get masterLock loop - { //queue was empty, so get master lock - - gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock), - UNLOCKED, LOCKED ); - if( gotLock ) - { //run own MasterVP -- jmps to coreLoops startPt when done - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; - if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) - { DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n"); - pthread_yield(); - } - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; - break; //end while -- have a VP to animate now - } - - tries++; //if too many, means master on other core taking too long - if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); } - } - //============================= MEASUREMENT STUFF ===================== - #ifdef MEAS__TIME_MASTER_LOCK - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, - _VMSMasterEnv->masterLockLowTimeHist ); - addIntervalToHist( startStamp, endStamp, - _VMSMasterEnv->masterLockHighTimeHist ); - #endif - //===================================================================== - - } - - - switchToVP(currPr); //The VPs return in here - flushRegisters(); - }//CoreLoop - } - - -void * -terminateCoreLoop(VirtProcr *currPr){ - //first free shutdown VP that jumped here -- it first restores the - // coreloop's stack, so addr of currPr in stack frame is still correct - VMS__dissipate_procr( currPr ); - pthread_exit( NULL ); -} - - - -#ifdef SEQUENTIAL - -//=========================================================================== -/*This sequential version is exact same as threaded, except doesn't do the - * pin-threads part, nor the wait until setup complete part. - */ -void * -coreLoop_Seq( void *paramsIn ) - { - VirtProcr *currPr; - VMSQueueStruc *readyToAnimateQ; - - ThdParams *coreLoopThdParams; - int thisCoresIdx; - - coreLoopThdParams = (ThdParams *)paramsIn; -// thisCoresIdx = coreLoopThdParams->coreNum; - thisCoresIdx = 0; - - //Save the return address in the SwitchVP function - saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt)); - - - while(1){ - //Get virtual processor from queue - //_VMSWorkQ must be a global, static volatile var, so not kept in reg, - // which forces reloading the pointer after each jmp to this point - readyToAnimateQ = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx]; - currPr = (VirtProcr *) readVMSQ( readyToAnimateQ ); - if( currPr == NULL ) - { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 ) - { printf("too many back to back MasterVP\n"); exit(1); } - _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1; - - currPr = _VMSMasterEnv->masterVPs[thisCoresIdx]; - } - else - _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0; - - - switchToVP( currPr ); - flushRegisters(); - } - } -#endif diff -r 7cff4e13d5c4 -r 651ee45615ae MasterLoop.c --- a/MasterLoop.c Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,373 +0,0 @@ -/* - * Copyright 2010 OpenSourceStewardshipFoundation - * - * Licensed under BSD - */ - - - -#include -#include - -#include "VMS.h" -#include "ProcrContext.h" - - -//=========================================================================== -void inline -stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, - VirtProcr *masterPr ); - -//=========================================================================== - - - -/*This code is animated by the virtual Master processor. - * - *Polls each sched slot exactly once, hands any requests made by a newly - * done slave to the "request handler" plug-in function - * - *Any slots that need a virt procr assigned are given to the "schedule" - * plug-in function, which tries to assign a virt procr (slave) to it. - * - *When all slots needing a processor have been given to the schedule plug-in, - * a fraction of the procrs successfully scheduled are put into the - * work queue, then a continuation of this function is put in, then the rest - * of the virt procrs that were successfully scheduled. - * - *The first thing the continuation does is busy-wait until the previous - * animation completes. This is because an (unlikely) continuation may - * sneak through queue before previous continuation is done putting second - * part of scheduled slaves in, which is the only race condition. - * - */ - -/*May 29, 2010 -- birth a Master during init so that first core loop to - * start running gets it and does all the stuff for a newly born -- - * from then on, will be doing continuation, but do suspension self - * directly at end of master loop - *So VMS__init just births the master virtual processor same way it births - * all the others -- then does any extra setup needed and puts it into the - * work queue. - *However means have to make masterEnv a global static volatile the same way - * did with readyToAnimateQ in core loop. -- for performance, put the - * jump to the core loop directly in here, and have it directly jump back. - * - * - *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this - * avoids the suspected bug in the system stack that causes bizarre faults - * at random places in the system code. - * - *So, this function is coupled to each of the MasterVPs, -- meaning this - * function can't rely on a particular stack and frame -- each MasterVP that - * animates this function has a different one. - * - *At this point, the masterLoop does not write itself into the queue anymore, - * instead, the coreLoop acquires the masterLock when it has nothing to - * animate, and then animates its own masterLoop. However, still try to put - * several AppVPs into the queue to amortize the startup cost of switching - * to the MasterVP. Note, don't have to worry about latency of requests much - * because most requests generate work for same core -- only latency issue - * is case when other cores starved and one core's requests generate work - * for them -- so keep max in queue to 3 or 4.. - */ -void masterLoop( void *initData, VirtProcr *animatingPr ) - { - int32 slotIdx, numSlotsFilled; - VirtProcr *schedVirtPr; - SchedSlot *currSlot, **schedSlots; - MasterEnv *masterEnv; - VMSQueueStruc *readyToAnimateQ; - - SlaveScheduler slaveScheduler; - RequestHandler requestHandler; - void *semanticEnv; - - int32 thisCoresIdx; - VirtProcr *masterPr; - volatile VirtProcr *volatileMasterPr; - - volatileMasterPr = animatingPr; - masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp - - //First animation of each MasterVP will in turn animate this part - // of setup code.. (VP creator sets up the stack as if this function - // was called normally, but actually get here by jmp) - //So, setup values about stack ptr, jmp pt and all that - //masterPr->nextInstrPt = &&masterLoopStartPt; - - - //Note, got rid of writing the stack and frame ptr up here, because - // only one - // core can ever animate a given MasterVP, so don't need to communicate - // new frame and stack ptr to the MasterVP storage before a second - // version of that MasterVP can get animated on a different core. - //Also got rid of the busy-wait. - - - //masterLoopStartPt: - while(1){ - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MASTER - //Total Master time includes one coreloop time -- just assume the core - // loop time is same for Master as for AppVPs, even though it may be - // smaller due to higher predictability of the fixed jmp. - saveLowTimeStampCountInto( masterPr->startMasterTSCLow ); - #endif - //======================================================================== - - masterEnv = (MasterEnv*)_VMSMasterEnv; - - //GCC may optimize so doesn't always re-define from frame-storage - masterPr = (VirtProcr*)volatileMasterPr; //just to make sure after jmp - thisCoresIdx = masterPr->coreAnimatedBy; - readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; - schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; - - requestHandler = masterEnv->requestHandler; - slaveScheduler = masterEnv->slaveScheduler; - semanticEnv = masterEnv->semanticEnv; - - - //Poll each slot's Done flag - numSlotsFilled = 0; - for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++) - { - currSlot = schedSlots[ slotIdx ]; - - if( currSlot->workIsDone ) - { - currSlot->workIsDone = FALSE; - currSlot->needsProcrAssigned = TRUE; - - //process requests from slave to master - //====================== MEASUREMENT STUFF =================== - #ifdef MEAS__TIME_PLUGIN - int32 startStamp1, endStamp1; - saveLowTimeStampCountInto( startStamp1 ); - #endif - //============================================================ - (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv ); - //====================== MEASUREMENT STUFF =================== - #ifdef MEAS__TIME_PLUGIN - saveLowTimeStampCountInto( endStamp1 ); - addIntervalToHist( startStamp1, endStamp1, - _VMSMasterEnv->reqHdlrLowTimeHist ); - addIntervalToHist( startStamp1, endStamp1, - _VMSMasterEnv->reqHdlrHighTimeHist ); - #endif - //============================================================ - } - if( currSlot->needsProcrAssigned ) - { //give slot a new virt procr - schedVirtPr = - (*slaveScheduler)( semanticEnv, thisCoresIdx ); - - if( schedVirtPr != NULL ) - { currSlot->procrAssignedToSlot = schedVirtPr; - schedVirtPr->schedSlot = currSlot; - currSlot->needsProcrAssigned = FALSE; - numSlotsFilled += 1; - - writeVMSQ( schedVirtPr, readyToAnimateQ ); - } - } - } - - - #ifdef USE_WORK_STEALING - //If no slots filled, means no more work, look for work to steal. - if( numSlotsFilled == 0 ) - { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr ); - } - #endif - - - #ifdef MEAS__TIME_MASTER - saveLowTimeStampCountInto( masterPr->endMasterTSCLow ); - #endif - - masterSwitchToCoreLoop(animatingPr); - flushRegisters(); - }//MasterLoop - - - } - - - -/*This has a race condition -- the coreloops are accessing their own queues - * at the same time that this work-stealer on a different core is trying to - */ -void inline -stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ, - VirtProcr *masterPr ) - { - VirtProcr *stolenPr; - int32 coreIdx, i; - VMSQueueStruc *currQ; - - stolenPr = NULL; - coreIdx = masterPr->coreAnimatedBy; - for( i = 0; i < NUM_CORES -1; i++ ) - { - if( coreIdx >= NUM_CORES -1 ) - { coreIdx = 0; - } - else - { coreIdx++; - } - currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; - if( numInVMSQ( currQ ) > 0 ) - { stolenPr = readVMSQ (currQ ); - break; - } - } - - if( stolenPr != NULL ) - { currSlot->procrAssignedToSlot = stolenPr; - stolenPr->schedSlot = currSlot; - currSlot->needsProcrAssigned = FALSE; - - writeVMSQ( stolenPr, readyToAnimateQ ); - } - } - -/*This algorithm makes the common case fast. Make the coreloop passive, - * and show its progress. Make the stealer control a gate that coreloop - * has to pass. - *To avoid interference, only one stealer at a time. Use a global - * stealer-lock. - * - *The pattern is based on a gate -- stealer shuts the gate, then monitors - * to be sure any already past make it all the way out, before starting. - *So, have a "progress" measure just before the gate, then have two after it, - * one is in a "waiting room" outside the gate, the other is at the exit. - *Then, the stealer first shuts the gate, then checks the progress measure - * outside it, then looks to see if the progress measure at the exit is the - * same. If yes, it knows the protected area is empty 'cause no other way - * to get in and the last to get in also exited. - *If the progress measure at the exit is not the same, then the stealer goes - * into a loop checking both the waiting-area and the exit progress-measures - * until one of them shows the same as the measure outside the gate. Might - * as well re-read the measure outside the gate each go around, just to be - * sure. It is guaranteed that one of the two will eventually match the one - * outside the gate. - * - *Here's an informal proof of correctness: - *The gate can be closed at any point, and have only four cases: - * 1) coreloop made it past the gate-closing but not yet past the exit - * 2) coreloop made it past the pre-gate progress update but not yet past - * the gate, - * 3) coreloop is right before the pre-gate update - * 4) coreloop is past the exit and far from the pre-gate update. - * - * Covering the cases in reverse order, - * 4) is not a problem -- stealer will read pre-gate progress, see that it - * matches exit progress, and the gate is closed, so stealer can proceed. - * 3) stealer will read pre-gate progress just after coreloop updates it.. - * so stealer goes into a loop until the coreloop causes wait-progress - * to match pre-gate progress, so then stealer can proceed - * 2) same as 3.. - * 1) stealer reads pre-gate progress, sees that it's different than exit, - * so goes into loop until exit matches pre-gate, now it knows coreloop - * is not in protected and cannot get back in, so can proceed. - * - *Implementation for the stealer: - * - *First, acquire the stealer lock -- only cores with no work to do will - * compete to steal, so not a big performance penalty having only one -- - * will rarely have multiple stealers in a system with plenty of work -- and - * in a system with little work, it doesn't matter. - * - *Note, have single-reader, single-writer pattern for all variables used to - * communicate between stealer and victims - * - *So, scan the queues of the core loops, until find non-empty. Each core - * has its own list that it scans. The list goes in order from closest to - * furthest core, so it steals first from close cores. Later can add - * taking info from the app about overlapping footprints, and scan all the - * others then choose work with the most footprint overlap with the contents - * of this core's cache. - * - *Now, have a victim want to take work from. So, shut the gate in that - * coreloop, by setting the "gate closed" var on its stack to TRUE. - *Then, read the core's pre-gate progress and compare to the core's exit - * progress. - *If same, can proceed to take work from the coreloop's queue. When done, - * write FALSE to gate closed var. - *If different, then enter a loop that reads the pre-gate progress, then - * compares to exit progress then to wait progress. When one of two - * matches, proceed. Take work from the coreloop's queue. When done, - * write FALSE to the gate closed var. - * - */ -void inline -gateProtected_stealWorkInto( SchedSlot *currSlot, - VMSQueueStruc *myReadyToAnimateQ, - VirtProcr *masterPr ) - { - VirtProcr *stolenPr; - int32 coreIdx, i, haveAVictim, gotLock; - VMSQueueStruc *victimsQ; - - volatile GateStruc *vicGate; - int32 coreMightBeInProtected; - - - - //see if any other cores have work available to steal - haveAVictim = FALSE; - coreIdx = masterPr->coreAnimatedBy; - for( i = 0; i < NUM_CORES -1; i++ ) - { - if( coreIdx >= NUM_CORES -1 ) - { coreIdx = 0; - } - else - { coreIdx++; - } - victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx]; - if( numInVMSQ( victimsQ ) > 0 ) - { haveAVictim = TRUE; - vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ]; - break; - } - } - if( !haveAVictim ) return; //no work to steal, exit - - //have a victim core, now get the stealer-lock - gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock), - UNLOCKED, LOCKED ); - if( !gotLock ) return; //go back to core loop, which will re-start master - - - //====== Start Gate-protection ======= - vicGate->gateClosed = TRUE; - coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress; - while( coreMightBeInProtected ) - { //wait until sure - if( vicGate->preGateProgress == vicGate->waitProgress ) - coreMightBeInProtected = FALSE; - if( vicGate->preGateProgress == vicGate->exitProgress ) - coreMightBeInProtected = FALSE; - } - - stolenPr = readVMSQ ( victimsQ ); - - vicGate->gateClosed = FALSE; - //======= End Gate-protection ======= - - - if( stolenPr != NULL ) //victim could have been in protected and taken - { currSlot->procrAssignedToSlot = stolenPr; - stolenPr->schedSlot = currSlot; - currSlot->needsProcrAssigned = FALSE; - - writeVMSQ( stolenPr, myReadyToAnimateQ ); - } - - //unlock the work stealing lock - _VMSMasterEnv->workStealingLock = UNLOCKED; - } diff -r 7cff4e13d5c4 -r 651ee45615ae ProcrContext.c --- a/ProcrContext.c Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ -/* - * This File contains all hardware dependent C code. - */ - - -#include "VMS.h" - -/*Create stack, then create __cdecl structure on it and put initialData and - * pointer to the new structure instance into the parameter positions on - * the stack - *Then put function pointer into nextInstrPt -- the stack is setup in std - * call structure, so jumping to function ptr is same as a GCC generated - * function call - *No need to save registers on old stack frame, because there's no old - * animator state to return to -- - * - */ -inline VirtProcr * -create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, - void *initialData, void *stackLocs ) - { - void *stackPtr; - - newPr->startOfStack = stackLocs; - newPr->procrID = _VMSMasterEnv->numProcrsCreated++; - newPr->initialData = initialData; - newPr->requests = NULL; - newPr->schedSlot = NULL; - - /* - * Hardware dependent part - */ - //instead of calling the function directly, call a wrapper function to fetch - //arguments from stack - newPr->nextInstrPt = (VirtProcrFnPtr)&startVirtProcrFn; - - //fnPtr takes two params -- void *initData & void *animProcr - //alloc stack locations, make stackPtr be the highest addr minus room - // for 2 params + return addr. Return addr (NULL) is in loc pointed to - // by stackPtr, initData at stackPtr + 8 bytes, animatingPr just above - stackPtr = ( (void *)stackLocs + VIRT_PROCR_STACK_SIZE - 4*sizeof(void*)); - - //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp - *((VirtProcr**)stackPtr + 2 ) = newPr; //rightmost param - *((void**)stackPtr + 1 ) = initialData; //next param to left - *((void**)stackPtr) = (void*)fnPtr; - - /* - * end of Hardware dependent part - */ - - newPr->stackPtr = stackPtr; //core loop will switch to this, then - newPr->framePtr = stackPtr; //suspend loop will save new stack & frame ptr - - //============================= MEASUREMENT STUFF ======================== - #ifdef STATS__TURN_ON_PROBES - //struct timeval timeStamp; - //gettimeofday( &(timeStamp), NULL); - //newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0) - - // _VMSMasterEnv->createPtInSecs; - #endif - //======================================================================== - - return newPr; - } \ No newline at end of file diff -r 7cff4e13d5c4 -r 651ee45615ae ProcrContext.h --- a/ProcrContext.h Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -/* - * Copyright 2009 OpenSourceStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - */ - -#ifndef _ProcrContext_H -#define _ProcrContext_H -#define _GNU_SOURCE - -void saveCoreLoopReturnAddr(void **returnAddress); - -void switchToVP(VirtProcr *nextProcr); - -void switchToCoreLoop(VirtProcr *nextProcr); - -void masterSwitchToCoreLoop(VirtProcr *nextProcr); - -void startVirtProcrFn(); - -void *asmTerminateCoreLoop(VirtProcr *currPr); - -#define flushRegisters() \ - asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15") - -inline VirtProcr * -create_procr_helper( VirtProcr *newPr, VirtProcrFnPtr fnPtr, - void *initialData, void *stackLocs ); - -#endif /* _ProcrContext_H */ - diff -r 7cff4e13d5c4 -r 651ee45615ae VMS.c --- a/VMS.c Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,772 +0,0 @@ -/* - * Copyright 2010 OpenSourceStewardshipFoundation - * - * Licensed under BSD - */ - -#include -#include -#include -#include -#include -#include - -#include "VMS.h" -#include "ProcrContext.h" -#include "Queue_impl/BlockingQueue.h" -#include "Histogram/Histogram.h" - - -#define thdAttrs NULL - -//=========================================================================== -void -shutdownFn( void *dummy, VirtProcr *dummy2 ); - -SchedSlot ** -create_sched_slots(); - -void -create_masterEnv(); - -void -create_the_coreLoop_OS_threads(); - -MallocProlog * -create_free_list(); - -void -endOSThreadFn( void *initData, VirtProcr *animatingPr ); - -pthread_mutex_t suspendLock = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER; - -//=========================================================================== - -/*Setup has two phases: - * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts - * the master virt procr into the work-queue, ready for first "call" - * 2) Semantic layer then does its own init, which creates the seed virt - * procr inside the semantic layer, ready to schedule it when - * asked by the first run of the masterLoop. - * - *This part is bit weird because VMS really wants to be "always there", and - * have applications attach and detach.. for now, this VMS is part of - * the app, so the VMS system starts up as part of running the app. - * - *The semantic layer is isolated from the VMS internals by making the - * semantic layer do setup to a state that it's ready with its - * initial virt procrs, ready to schedule them to slots when the masterLoop - * asks. Without this pattern, the semantic layer's setup would - * have to modify slots directly to assign the initial virt-procrs, and put - * them into the readyToAnimateQ itself, breaking the isolation completely. - * - * - *The semantic layer creates the initial virt procr(s), and adds its - * own environment to masterEnv, and fills in the pointers to - * the requestHandler and slaveScheduler plug-in functions - */ - -/*This allocates VMS data structures, populates the master VMSProc, - * and master environment, and returns the master environment to the semantic - * layer. - */ -void -VMS__init() - { - create_masterEnv(); - create_the_coreLoop_OS_threads(); - } - -#ifdef SEQUENTIAL - -/*To initialize the sequential version, just don't create the threads - */ -void -VMS__init_Seq() - { - create_masterEnv(); - } - -#endif - -void -create_masterEnv() - { MasterEnv *masterEnv; - VMSQueueStruc **readyToAnimateQs; - int coreIdx; - VirtProcr **masterVPs; - SchedSlot ***allSchedSlots; //ptr to array of ptrs - - - //Make the master env, which holds everything else - _VMSMasterEnv = malloc( sizeof(MasterEnv) ); - - //Very first thing put into the master env is the free-list, seeded - // with a massive initial chunk of memory. - //After this, all other mallocs are VMS__malloc. - _VMSMasterEnv->freeListHead = VMS_ext__create_free_list(); - - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - _VMSMasterEnv->mallocTimeHist = makeFixedBinHistExt( 100, 0, 30, - "malloc_time_hist"); - _VMSMasterEnv->freeTimeHist = makeFixedBinHistExt( 100, 0, 30, - "free_time_hist"); - #endif - #ifdef MEAS__TIME_PLUGIN - _VMSMasterEnv->reqHdlrLowTimeHist = makeFixedBinHistExt( 100, 0, 200, - "plugin_low_time_hist"); - _VMSMasterEnv->reqHdlrHighTimeHist = makeFixedBinHistExt( 100, 0, 200, - "plugin_high_time_hist"); - #endif - //======================================================================== - - //===================== Only VMS__malloc after this ==================== - masterEnv = (MasterEnv*)_VMSMasterEnv; - - //Make a readyToAnimateQ for each core loop - readyToAnimateQs = VMS__malloc( NUM_CORES * sizeof(VMSQueueStruc *) ); - masterVPs = VMS__malloc( NUM_CORES * sizeof(VirtProcr *) ); - - //One array for each core, 3 in array, core's masterVP scheds all - allSchedSlots = VMS__malloc( NUM_CORES * sizeof(SchedSlot *) ); - - _VMSMasterEnv->numProcrsCreated = 0; //used by create procr - for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) - { - readyToAnimateQs[ coreIdx ] = makeVMSQ(); - - //Q: should give masterVP core-specific info as its init data? - masterVPs[ coreIdx ] = VMS__create_procr( (VirtProcrFnPtr)&masterLoop, (void*)masterEnv ); - masterVPs[ coreIdx ]->coreAnimatedBy = coreIdx; - allSchedSlots[ coreIdx ] = create_sched_slots(); //makes for one core - _VMSMasterEnv->numMasterInARow[ coreIdx ] = 0; - _VMSMasterEnv->workStealingGates[ coreIdx ] = NULL; - } - _VMSMasterEnv->readyToAnimateQs = readyToAnimateQs; - _VMSMasterEnv->masterVPs = masterVPs; - _VMSMasterEnv->masterLock = UNLOCKED; - _VMSMasterEnv->allSchedSlots = allSchedSlots; - _VMSMasterEnv->workStealingLock = UNLOCKED; - - - //Aug 19, 2010: no longer need to place initial masterVP into queue - // because coreLoop now controls -- animates its masterVP when no work - - - //============================= MEASUREMENT STUFF ======================== - #ifdef STATS__TURN_ON_PROBES - _VMSMasterEnv->dynIntervalProbesInfo = - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->intervalProbes), 200); - - _VMSMasterEnv->probeNameHashTbl = makeHashTable( 1000, &VMS__free ); - - //put creation time directly into master env, for fast retrieval - struct timeval timeStamp; - gettimeofday( &(timeStamp), NULL); - _VMSMasterEnv->createPtInSecs = - timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0); - #endif - #ifdef MEAS__TIME_MASTER_LOCK - _VMSMasterEnv->masterLockLowTimeHist = makeFixedBinHist( 50, 0, 2, - "master lock low time hist"); - _VMSMasterEnv->masterLockHighTimeHist = makeFixedBinHist( 50, 0, 100, - "master lock high time hist"); - #endif - - MakeTheMeasHists(); - //======================================================================== - - } - -SchedSlot ** -create_sched_slots() - { SchedSlot **schedSlots; - int i; - - schedSlots = VMS__malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) ); - - for( i = 0; i < NUM_SCHED_SLOTS; i++ ) - { - schedSlots[i] = VMS__malloc( sizeof(SchedSlot) ); - - //Set state to mean "handling requests done, slot needs filling" - schedSlots[i]->workIsDone = FALSE; - schedSlots[i]->needsProcrAssigned = TRUE; - } - return schedSlots; - } - - -void -freeSchedSlots( SchedSlot **schedSlots ) - { int i; - for( i = 0; i < NUM_SCHED_SLOTS; i++ ) - { - VMS__free( schedSlots[i] ); - } - VMS__free( schedSlots ); - } - - -void -create_the_coreLoop_OS_threads() - { - //======================================================================== - // Create the Threads - int coreIdx, retCode; - - //Need the threads to be created suspended, and wait for a signal - // before proceeding -- gives time after creating to initialize other - // stuff before the coreLoops set off. - _VMSMasterEnv->setupComplete = 0; - - //Make the threads that animate the core loops - for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) - { coreLoopThdParams[coreIdx] = VMS__malloc( sizeof(ThdParams) ); - coreLoopThdParams[coreIdx]->coreNum = coreIdx; - - retCode = - pthread_create( &(coreLoopThdHandles[coreIdx]), - thdAttrs, - &coreLoop, - (void *)(coreLoopThdParams[coreIdx]) ); - if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);} - } - } - -/*Semantic layer calls this when it want the system to start running.. - * - *This starts the core loops running then waits for them to exit. - */ -void -VMS__start_the_work_then_wait_until_done() - { int coreIdx; - //Start the core loops running - - //tell the core loop threads that setup is complete - //get lock, to lock out any threads still starting up -- they'll see - // that setupComplete is true before entering while loop, and so never - // wait on the condition - pthread_mutex_lock( &suspendLock ); - _VMSMasterEnv->setupComplete = 1; - pthread_mutex_unlock( &suspendLock ); - pthread_cond_broadcast( &suspend_cond ); - - - //wait for all to complete - for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) - { - pthread_join( coreLoopThdHandles[coreIdx], NULL ); - } - - //NOTE: do not clean up VMS env here -- semantic layer has to have - // a chance to clean up its environment first, then do a call to free - // the Master env and rest of VMS locations - } - -#ifdef SEQUENTIAL -/*Only difference between version with an OS thread pinned to each core and - * the sequential version of VMS is VMS__init_Seq, this, and coreLoop_Seq. - */ -void -VMS__start_the_work_then_wait_until_done_Seq() - { - //Instead of un-suspending threads, just call the one and only - // core loop (sequential version), in the main thread. - coreLoop_Seq( NULL ); - flushRegisters(); - - } -#endif - -inline VirtProcr * -VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) - { VirtProcr *newPr; - void *stackLocs; - - newPr = VMS__malloc( sizeof(VirtProcr) ); - stackLocs = VMS__malloc( VIRT_PROCR_STACK_SIZE ); - if( stackLocs == 0 ) - { perror("VMS__malloc stack"); exit(1); } - - return create_procr_helper( newPr, fnPtr, initialData, stackLocs ); - } - -/* "ext" designates that it's for use outside the VMS system -- should only - * be called from main thread or other thread -- never from code animated by - * a VMS virtual processor. - */ -inline VirtProcr * -VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ) - { VirtProcr *newPr; - char *stackLocs; - - newPr = malloc( sizeof(VirtProcr) ); - stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); - if( stackLocs == 0 ) - { perror("malloc stack"); exit(1); } - - return create_procr_helper( newPr, fnPtr, initialData, stackLocs ); - } - - -/*Anticipating multi-tasking - */ -void * -VMS__give_sem_env_for( VirtProcr *animPr ) - { - return _VMSMasterEnv->semanticEnv; - } -//=========================================================================== -/*there is a label inside this function -- save the addr of this label in - * the callingPr struc, as the pick-up point from which to start the next - * work-unit for that procr. If turns out have to save registers, then - * save them in the procr struc too. Then do assembly jump to the CoreLoop's - * "done with work-unit" label. The procr struc is in the request in the - * slave that animated the just-ended work-unit, so all the state is saved - * there, and will get passed along, inside the request handler, to the - * next work-unit for that procr. - */ -void -VMS__suspend_procr( VirtProcr *animatingPr ) - { - - //The request to master will cause this suspended virt procr to get - // scheduled again at some future point -- to resume, core loop jumps - // to the resume point (below), which causes restore of saved regs and - // "return" from this call. - //animatingPr->nextInstrPt = &&ResumePt; - - //return ownership of the virt procr and sched slot to Master virt pr - animatingPr->schedSlot->workIsDone = TRUE; - - //=========================== Measurement stuff ======================== - #ifdef MEAS__TIME_STAMP_SUSP - //record time stamp: compare to time-stamp recorded below - saveLowTimeStampCountInto( animatingPr->preSuspTSCLow ); - #endif - //======================================================================= - - switchToCoreLoop(animatingPr); - flushRegisters(); - - //======================================================================= - - #ifdef MEAS__TIME_STAMP_SUSP - //NOTE: only take low part of count -- do sanity check when take diff - saveLowTimeStampCountInto( animatingPr->postSuspTSCLow ); - #endif - - return; - } - - - -/*For this implementation of VMS, it may not make much sense to have the - * system of requests for creating a new processor done this way.. but over - * the scope of single-master, multi-master, mult-tasking, OS-implementing, - * distributed-memory, and so on, this gives VMS implementation a chance to - * do stuff before suspend, in the AppVP, and in the Master before the plugin - * is called, as well as in the lang-lib before this is called, and in the - * plugin. So, this gives both VMS and language implementations a chance to - * intercept at various points and do order-dependent stuff. - *Having a standard VMSNewPrReqData struc allows the language to create and - * free the struc, while VMS knows how to get the newPr if it wants it, and - * it lets the lang have lang-specific data related to creation transported - * to the plugin. - */ -void -VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ) - { VMSReqst req; - - req.reqType = createReq; - req.semReqData = semReqData; - req.nextReqst = reqstingPr->requests; - reqstingPr->requests = &req; - - VMS__suspend_procr( reqstingPr ); - } - - -/* - *This adds a request to dissipate, then suspends the processor so that the - * request handler will receive the request. The request handler is what - * does the work of freeing memory and removing the processor from the - * semantic environment's data structures. - *The request handler also is what figures out when to shutdown the VMS - * system -- which causes all the core loop threads to die, and returns from - * the call that started up VMS to perform the work. - * - *This form is a bit misleading to understand if one is trying to figure out - * how VMS works -- it looks like a normal function call, but inside it - * sends a request to the request handler and suspends the processor, which - * jumps out of the VMS__dissipate_procr function, and out of all nestings - * above it, transferring the work of dissipating to the request handler, - * which then does the actual work -- causing the processor that animated - * the call of this function to disappear and the "hanging" state of this - * function to just poof into thin air -- the virtual processor's trace - * never returns from this call, but instead the virtual processor's trace - * gets suspended in this call and all the virt processor's state disap- - * pears -- making that suspend the last thing in the virt procr's trace. - */ -void -VMS__send_dissipate_req( VirtProcr *procrToDissipate ) - { VMSReqst req; - - req.reqType = dissipate; - req.nextReqst = procrToDissipate->requests; - procrToDissipate->requests = &req; - - VMS__suspend_procr( procrToDissipate ); - } - - -/* "ext" designates that it's for use outside the VMS system -- should only - * be called from main thread or other thread -- never from code animated by - * a VMS virtual processor. - * - *Use this version to dissipate VPs created outside the VMS system. - */ -void -VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ) - { - //NOTE: initialData was given to the processor, so should either have - // been alloc'd with VMS__malloc, or freed by the level above animPr. - //So, all that's left to free here is the stack and the VirtProcr struc - // itself - //Note, should not stack-allocate initial data -- no guarantee, in - // general that creating processor will outlive ones it creates. - free( procrToDissipate->startOfStack ); - free( procrToDissipate ); - } - - - -/*This call's name indicates that request is malloc'd -- so req handler - * has to free any extra requests tacked on before a send, using this. - * - * This inserts the semantic-layer's request data into standard VMS carrier - * request data-struct that is mallocd. The sem request doesn't need to - * be malloc'd if this is called inside the same call chain before the - * send of the last request is called. - * - *The request handler has to call VMS__free_VMSReq for any of these - */ -inline void -VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, - VirtProcr *callingPr ) - { VMSReqst *req; - - req = VMS__malloc( sizeof(VMSReqst) ); - req->reqType = semantic; - req->semReqData = semReqData; - req->nextReqst = callingPr->requests; - callingPr->requests = req; - } - -/*This inserts the semantic-layer's request data into standard VMS carrier - * request data-struct is allocated on stack of this call & ptr to it sent - * to plugin - *Then it does suspend, to cause request to be sent. - */ -inline void -VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ) - { VMSReqst req; - - req.reqType = semantic; - req.semReqData = semReqData; - req.nextReqst = callingPr->requests; - callingPr->requests = &req; - - VMS__suspend_procr( callingPr ); - } - - -inline void -VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ) - { VMSReqst req; - - req.reqType = VMSSemantic; - req.semReqData = semReqData; - req.nextReqst = callingPr->requests; //gab any other preceeding - callingPr->requests = &req; - - VMS__suspend_procr( callingPr ); - } - - -/* - */ -VMSReqst * -VMS__take_next_request_out_of( VirtProcr *procrWithReq ) - { VMSReqst *req; - - req = procrWithReq->requests; - if( req == NULL ) return NULL; - - procrWithReq->requests = procrWithReq->requests->nextReqst; - return req; - } - - -inline void * -VMS__take_sem_reqst_from( VMSReqst *req ) - { - return req->semReqData; - } - - - -/* This is for OS requests and VMS infrastructure requests, such as to create - * a probe -- a probe is inside the heart of VMS-core, it's not part of any - * language -- but it's also a semantic thing that's triggered from and used - * in the application.. so it crosses abstractions.. so, need some special - * pattern here for handling such requests. - * Doing this just like it were a second language sharing VMS-core. - * - * This is called from the language's request handler when it sees a request - * of type VMSSemReq - * - * TODO: Later change this, to give probes their own separate plugin & have - * VMS-core steer the request to appropriate plugin - * Do the same for OS calls -- look later at it.. - */ -void inline -VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, - ResumePrFnPtr resumePrFnPtr ) - { VMSSemReq *semReq; - IntervalProbe *newProbe; - - semReq = req->semReqData; - - newProbe = VMS__malloc( sizeof(IntervalProbe) ); - newProbe->nameStr = VMS__strDup( semReq->nameStr ); - newProbe->hist = NULL; - newProbe->schedChoiceWasRecorded = FALSE; - - //This runs in masterVP, so no race-condition worries - newProbe->probeID = - addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); - - requestingPr->dataRetFromReq = newProbe; - - (*resumePrFnPtr)( requestingPr, semEnv ); - } - - - -/*This must be called by the request handler plugin -- it cannot be called - * from the semantic library "dissipate processor" function -- instead, the - * semantic layer has to generate a request, and the plug-in calls this - * function. - *The reason is that this frees the virtual processor's stack -- which is - * still in use inside semantic library calls! - * - *This frees or recycles all the state owned by and comprising the VMS - * portion of the animating virtual procr. The request handler must first - * free any semantic data created for the processor that didn't use the - * VMS_malloc mechanism. Then it calls this, which first asks the malloc - * system to disown any state that did use VMS_malloc, and then frees the - * statck and the processor-struct itself. - *If the dissipated processor is the sole (remaining) owner of VMS__malloc'd - * state, then that state gets freed (or sent to recycling) as a side-effect - * of dis-owning it. - */ -void -VMS__dissipate_procr( VirtProcr *animatingPr ) - { - //dis-own all locations owned by this processor, causing to be freed - // any locations that it is (was) sole owner of -//TODO: implement VMS__malloc system, including "give up ownership" - - - //NOTE: initialData was given to the processor, so should either have - // been alloc'd with VMS__malloc, or freed by the level above animPr. - //So, all that's left to free here is the stack and the VirtProcr struc - // itself - //Note, should not stack-allocate initial data -- no guarantee, in - // general that creating processor will outlive ones it creates. - VMS__free( animatingPr->startOfStack ); - VMS__free( animatingPr ); - } - - -//TODO: look at architecting cleanest separation between request handler -// and master loop, for dissipate, create, shutdown, and other non-semantic -// requests. Issue is chain: one removes requests from AppVP, one dispatches -// on type of request, and one handles each type.. but some types require -// action from both request handler and master loop -- maybe just give the -// request handler calls like: VMS__handle_X_request_type - - -/*This is called by the semantic layer's request handler when it decides its - * time to shut down the VMS system. Calling this causes the core loop OS - * threads to exit, which unblocks the entry-point function that started up - * VMS, and allows it to grab the result and return to the original single- - * threaded application. - * - *The _VMSMasterEnv is needed by this shut down function, so the create-seed- - * and-wait function has to free a bunch of stuff after it detects the - * threads have all died: the masterEnv, the thread-related locations, - * masterVP any AppVPs that might still be allocated and sitting in the - * semantic environment, or have been orphaned in the _VMSWorkQ. - * - *NOTE: the semantic plug-in is expected to use VMS__malloc to get all the - * locations it needs, and give ownership to masterVP. Then, they will be - * automatically freed. - * - *In here,create one core-loop shut-down processor for each core loop and put - * them all directly into the readyToAnimateQ. - *Note, this function can ONLY be called after the semantic environment no - * longer cares if AppVPs get animated after the point this is called. In - * other words, this can be used as an abort, or else it should only be - * called when all AppVPs have finished dissipate requests -- only at that - * point is it sure that all results have completed. - */ -void -VMS__shutdown() - { int coreIdx; - VirtProcr *shutDownPr; - - //create the shutdown processors, one for each core loop -- put them - // directly into the Q -- each core will die when gets one - for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) - { //Note, this is running in the master - shutDownPr = VMS__create_procr( &endOSThreadFn, NULL ); - writeVMSQ( shutDownPr, _VMSMasterEnv->readyToAnimateQs[coreIdx] ); - } - - } - - -/*Am trying to be cute, avoiding IF statement in coreLoop that checks for - * a special shutdown procr. Ended up with extra-complex shutdown sequence. - *This function has the sole purpose of setting the stack and framePtr - * to the coreLoop's stack and framePtr.. it does that then jumps to the - * core loop's shutdown point -- might be able to just call Pthread_exit - * from here, but am going back to the pthread's stack and setting everything - * up just as if it never jumped out, before calling pthread_exit. - *The end-point of core loop will free the stack and so forth of the - * processor that animates this function, (this fn is transfering the - * animator of the AppVP that is in turn animating this function over - * to core loop function -- note that this slices out a level of virtual - * processors). - */ -void -endOSThreadFn( void *initData, VirtProcr *animatingPr ) - { -#ifdef SEQUENTIAL - asmTerminateCoreLoopSeq(animatingPr); -#else - asmTerminateCoreLoop(animatingPr); -#endif - } - - -/*This is called from the startup & shutdown - */ -void -VMS__cleanup_at_end_of_shutdown() - { - //unused - //VMSQueueStruc **readyToAnimateQs; - //int coreIdx; - //VirtProcr **masterVPs; - //SchedSlot ***allSchedSlots; //ptr to array of ptrs - - //Before getting rid of everything, print out any measurements made - forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&printHist ); - forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile); - forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist ); - #ifdef MEAS__TIME_PLUGIN - printHist( _VMSMasterEnv->reqHdlrLowTimeHist ); - saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist ); - printHist( _VMSMasterEnv->reqHdlrHighTimeHist ); - saveHistToFile( _VMSMasterEnv->reqHdlrHighTimeHist ); - freeHistExt( _VMSMasterEnv->reqHdlrLowTimeHist ); - freeHistExt( _VMSMasterEnv->reqHdlrHighTimeHist ); - #endif - #ifdef MEAS__TIME_MALLOC - printHist( _VMSMasterEnv->mallocTimeHist ); - saveHistToFile( _VMSMasterEnv->mallocTimeHist ); - printHist( _VMSMasterEnv->freeTimeHist ); - saveHistToFile( _VMSMasterEnv->freeTimeHist ); - freeHistExt( _VMSMasterEnv->mallocTimeHist ); - freeHistExt( _VMSMasterEnv->freeTimeHist ); - #endif - #ifdef MEAS__TIME_MASTER_LOCK - printHist( _VMSMasterEnv->masterLockLowTimeHist ); - printHist( _VMSMasterEnv->masterLockHighTimeHist ); - #endif - #ifdef MEAS__TIME_MASTER - printHist( _VMSMasterEnv->pluginTimeHist ); - for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) - { - freeVMSQ( readyToAnimateQs[ coreIdx ] ); - //master VPs were created external to VMS, so use external free - VMS__dissipate_procr( masterVPs[ coreIdx ] ); - - freeSchedSlots( allSchedSlots[ coreIdx ] ); - } - #endif - #ifdef MEAS__TIME_STAMP_SUSP - printHist( _VMSMasterEnv->pluginTimeHist ); - for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) - { - freeVMSQ( readyToAnimateQs[ coreIdx ] ); - //master VPs were created external to VMS, so use external free - VMS__dissipate_procr( masterVPs[ coreIdx ] ); - - freeSchedSlots( allSchedSlots[ coreIdx ] ); - } - #endif - - //All the environment data has been allocated with VMS__malloc, so just - // free its internal big-chunk and all inside it disappear. -/* - readyToAnimateQs = _VMSMasterEnv->readyToAnimateQs; - masterVPs = _VMSMasterEnv->masterVPs; - allSchedSlots = _VMSMasterEnv->allSchedSlots; - - for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) - { - freeVMSQ( readyToAnimateQs[ coreIdx ] ); - //master VPs were created external to VMS, so use external free - VMS__dissipate_procr( masterVPs[ coreIdx ] ); - - freeSchedSlots( allSchedSlots[ coreIdx ] ); - } - - VMS__free( _VMSMasterEnv->readyToAnimateQs ); - VMS__free( _VMSMasterEnv->masterVPs ); - VMS__free( _VMSMasterEnv->allSchedSlots ); - - //============================= MEASUREMENT STUFF ======================== - #ifdef STATS__TURN_ON_PROBES - freeDynArrayDeep( _VMSMasterEnv->dynIntervalProbesInfo, &VMS__free_probe); - #endif - //======================================================================== -*/ - //These are the only two that use system free - VMS_ext__free_free_list( _VMSMasterEnv->freeListHead ); - free( (void *)_VMSMasterEnv ); - } - - -//================================ - - -/*Later, improve this -- for now, just exits the application after printing - * the error message. - */ -void -VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ) - { - printf("%s",msgStr); - fflush(stdin); - exit(1); - } - diff -r 7cff4e13d5c4 -r 651ee45615ae VMS.h --- a/VMS.h Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,579 +0,0 @@ -/* - * Copyright 2009 OpenSourceStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - */ - -#ifndef _VMS_H -#define _VMS_H -#define _GNU_SOURCE - -#include "VMS_primitive_data_types.h" -#include "../../C_Libraries/Queue_impl/PrivateQueue.h" -#include "../../C_Libraries/Histogram/Histogram.h" -#include "../../C_Libraries/DynArray/DynArray.h" -#include "../../C_Libraries/Hash_impl/PrivateHash.h" -#include "vmalloc.h" - -#include -#include - - -//=============================== Debug =================================== -// -//When SEQUENTIAL is defined, VMS does sequential exe in the main thread -// It still does co-routines and all the mechanisms are the same, it just -// has only a single thread and animates VPs one at a time -//#define SEQUENTIAL - -//#define USE_WORK_STEALING - -//turns on the probe-instrumentation in the application -- when not -// defined, the calls to the probe functions turn into comments -#define STATS__ENABLE_PROBES -//#define TURN_ON_DEBUG_PROBES - -//These defines turn types of bug messages on and off -// be sure debug messages are un-commented (next block of defines) -#define dbgAppFlow TRUE /* Top level flow of application code -- general*/ -#define dbgProbes FALSE /* for issues inside probes themselves*/ -#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/ -#define dbgRqstHdlr FALSE /* in request handler code*/ - -//Comment or un- the substitute half to turn on/off types of debug message -#define DEBUG( bool, msg) \ -// if( bool){ printf(msg); fflush(stdin);} -#define DEBUG1( bool, msg, param) \ -// if(bool){printf(msg, param); fflush(stdin);} -#define DEBUG2( bool, msg, p1, p2) \ -// if(bool) {printf(msg, p1, p2); fflush(stdin);} - -#define ERROR(msg) printf(msg); -#define ERROR1(msg, param) printf(msg, param); -#define ERROR2(msg, p1, p2) printf(msg, p1, p2); - -//=========================== STATS ======================= - - //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and - // compiled-in that saves the low part of the time stamp count just before - // suspending a processor and just after resuming that processorsrc/VPThread_lib/VMS/VMS.h:322: warning: previous declaration of ‘VMS__create_procr’ was here. It is - // saved into a field added to VirtProcr. Have to sanity-check for - // rollover of low portion into high portion. -//#define MEAS__TIME_STAMP_SUSP -//#define MEAS__TIME_MASTER -#define MEAS__TIME_PLUGIN -#define MEAS__TIME_MALLOC -//#define MEAS__TIME_MASTER_LOCK -#define MEAS__NUM_TIMES_TO_RUN 100000 - - //For code that calculates normalization-offset between TSC counts of - // different cores. -#define NUM_TSC_ROUND_TRIPS 10 - - -//========================= Hardware related Constants ===================== - //This value is the number of hardware threads in the shared memory - // machine -//#define NUM_CORES 8 - - // tradeoff amortizing master fixed overhead vs imbalance potential - // when work-stealing, can make bigger, at risk of losing cache affinity -#define NUM_SCHED_SLOTS 5 - -#define MIN_WORK_UNIT_CYCLES 20000 - -#define MASTERLOCK_RETRIES 10000 - - // stack size in virtual processors created -#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */ - - // memory for VMS__malloc -#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */ - -#define CACHE_LINE 64 -#define PAGE_SIZE 4096 - - -//============================== - -#define SUCCESS 0 - -#define writeVMSQ writePrivQ -#define readVMSQ readPrivQ -#define makeVMSQ makeVMSPrivQ -#define numInVMSQ numInPrivQ -#define VMSQueueStruc PrivQueueStruc - - - -//=========================================================================== -typedef unsigned long long TSCount; - -typedef struct _SchedSlot SchedSlot; -typedef struct _VMSReqst VMSReqst; -typedef struct _VirtProcr VirtProcr; -typedef struct _IntervalProbe IntervalProbe; -typedef struct _GateStruc GateStruc; - - -typedef VirtProcr * (*SlaveScheduler) ( void *, int ); //semEnv, coreIdx -typedef void (*RequestHandler) ( VirtProcr *, void * ); //prWReqst, semEnv -typedef void (*VirtProcrFnPtr) ( void *, VirtProcr * ); //initData, animPr -typedef void VirtProcrFn ( void *, VirtProcr * ); //initData, animPr -typedef void (*ResumePrFnPtr) ( VirtProcr *, void * ); - - -//============= Requests =========== -// - -enum VMSReqstType //avoid starting enums at 0, for debug reasons - { - semantic = 1, - createReq, - dissipate, - VMSSemantic //goes with VMSSemReqst below - }; - -struct _VMSReqst - { - enum VMSReqstType reqType;//used for dissipate and in future for IO requests - void *semReqData; - - VMSReqst *nextReqst; - }; -//VMSReqst - -enum VMSSemReqstType //These are equivalent to semantic requests, but for - { // VMS's services available directly to app, like OS - createProbe = 1, // and probe services -- like a VMS-wide built-in lang - openFile, - otherIO - }; - -typedef struct - { enum VMSSemReqstType reqType; - VirtProcr *requestingPr; - char *nameStr; //for create probe - } - VMSSemReq; - - -//==================== Core data structures =================== - -struct _SchedSlot - { - int workIsDone; - int needsProcrAssigned; - VirtProcr *procrAssignedToSlot; - }; -//SchedSlot - -/*WARNING: re-arranging this data structure could cause VP switching - * assembly code to fail -- hard-codes offsets of fields - */ -struct _VirtProcr - { int procrID; //for debugging -- count up each time create - int coreAnimatedBy; - void *startOfStack; - void *stackPtr; - void *framePtr; - void *nextInstrPt; - - void *coreLoopStartPt; //allows proto-runtime to be linked later - void *coreLoopFramePtr; //restore before jmp back to core loop - void *coreLoopStackPtr; //restore before jmp back to core loop - - void *initialData; - - SchedSlot *schedSlot; - VMSReqst *requests; - - void *semanticData; //this livesUSE_GNU here for the life of VP - void *dataRetFromReq;//values returned from plugin to VP go here - - //=========== MEASUREMENT STUFF ========== - #ifdef MEAS__TIME_STAMP_SUSP - unsigned int preSuspTSCLow; - unsigned int postSuspTSCLow; - #endif - #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/ - unsigned int startMasterTSCLow;USE_GNU - unsigned int endMasterTSCLow; - #endif - //======================================== - - float64 createPtInSecs; //have space but don't use on some configs - }; -//VirtProcr - - -/*WARNING: re-arranging this data structure could cause VP-switching - * assembly code to fail -- hard-codes offsets of fields - * (because -O3 messes with things otherwise) - */ -typedef struct - { - SlaveScheduler slaveScheduler; - RequestHandler requestHandler; - - SchedSlot ***allSchedSlots; - VMSQueueStruc **readyToAnimateQs; - VirtProcr **masterVPs; - - void *semanticEnv; - void *OSEventStruc; //for future, when add I/O to BLIS - MallocProlog *freeListHead; - int32 amtOfOutstandingMem; //total currently allocated - - void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop - - int32 setupComplete; - volatile int32 masterLock; - - int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP - GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal - int32 workStealingLock; - - int32 numProcrsCreated; //gives ordering to processor creation - - //=========== MEASUREMENT STUFF ============= - IntervalProbe **intervalProbes; - PrivDynArrayInfo *dynIntervalProbesInfo; - HashTable *probeNameHashTbl; - int32 masterCreateProbeID; - float64 createPtInSecs; - Histogram **measHists; - PrivDynArrayInfo *measHistsInfo; - #ifdef MEAS__TIME_PLUGIN - Histogram *reqHdlrLowTimeHist; - Histogram *reqHdlrHighTimeHist; - #endif - #ifdef MEAS__TIME_MALLOC - Histogram *mallocTimeHist; - Histogram *freeTimeHist; - #endif - #ifdef MEAS__TIME_MASTER_LOCK - Histogram *masterLockLowTimeHist; - Histogram *masterLockHighTimeHist; - #endif - } -MasterEnv; - -//========================= Extra Stuff Data Strucs ======================= -typedef struct - { - - } -VMSExcp; - -struct _GateStruc - { - int32 gateClosed; - int32 preGateProgress; - int32 waitProgress; - int32 exitProgress; - }; -//GateStruc - -//======================= OS Thread related =============================== - -void * coreLoop( void *paramsIn ); //standard PThreads fn prototype -void * coreLoop_Seq( void *paramsIn ); //standard PThreads fn prototype -void masterLoop( void *initData, VirtProcr *masterPr ); - - -typedef struct - { - void *endThdPt; - unsigned int coreNum; - } -ThdParams; - -pthread_t coreLoopThdHandles[ NUM_CORES ]; //pthread's virt-procr state -ThdParams *coreLoopThdParams [ NUM_CORES ]; -pthread_mutex_t suspendLock; -pthread_cond_t suspend_cond; - - - -//===================== Global Vars =================== - -volatile MasterEnv *_VMSMasterEnv; - - - - -//=========================== Function Prototypes ========================= - - -//========== Setup and shutdown ========== -void -VMS__init(); - -void -VMS__init_Seq(); - -void -VMS__start_the_work_then_wait_until_done(); - -void -VMS__start_the_work_then_wait_until_done_Seq(); - -inline VirtProcr * -VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); - -void -VMS__dissipate_procr( VirtProcr *procrToDissipate ); - - //Use this to create processor inside entry point & other places outside - // the VMS system boundary (IE, not run in slave nor Master) -VirtProcr * -VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData ); - -void -VMS_ext__dissipate_procr( VirtProcr *procrToDissipate ); - -void -VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData ); - -void -VMS__shutdown(); - -void -VMS__cleanup_at_end_of_shutdown(); - -void * -VMS__give_sem_env_for( VirtProcr *animPr ); - - -//============== Request Related =============== - -void -VMS__suspend_procr( VirtProcr *callingPr ); - -inline void -VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr ); - -inline void -VMS__send_sem_request( void *semReqData, VirtProcr *callingPr ); - -void -VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr ); - -void inline -VMS__send_dissipate_req( VirtProcr *prToDissipate ); - -inline void -VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr ); - -VMSReqst * -VMS__take_next_request_out_of( VirtProcr *procrWithReq ); - -inline void * -VMS__take_sem_reqst_from( VMSReqst *req ); - -void inline -VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv, - ResumePrFnPtr resumePrFnPtr ); - -//======================== STATS ====================== - -//===== RDTSC wrapper ===== //Also runs with x86_64 code - -#define saveTimeStampCountInto(low, high) \ - asm volatile("RDTSC; \ - movl %%eax, %0; \ - movl %%edx, %1;" \ - /* outputs */ : "=m" (low), "=m" (high)\ - /* inputs */ : \ - /* clobber */ : "%eax", "%edx" \ - ); - -#define saveLowTimeStampCountInto(low) \ - asm volatile("RDTSC; \ - movl %%eax, %0;" \ - /* outputs */ : "=m" (low) \ - /* inputs */ : \ - /* clobber */ : "%eax", "%edx" \ - ); - -//==================== -#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ - makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ - _VMSMasterEnv->measHists[idx] = \ - makeFixedBinHist( numBins, startVal, binWidth, name ); - - -#define MEAS__SUB_CREATE /*turn on/off subtraction of create from plugin*/ - -#ifdef VPTHREAD - -//VPThread -#define createHistIdx 0 -#define mutexLockHistIdx 1 -#define mutexUnlockHistIdx 2 -#define condWaitHistIdx 3 -#define condSignalHistIdx 4 - -#define MakeTheMeasHists() \ - _VMSMasterEnv->measHistsInfo = \ - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ - makeAMeasHist( createHistIdx, "create", 250, 0, 100 ) \ - makeAMeasHist( mutexLockHistIdx, "mutex_lock", 50, 0, 100 ) \ - makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock", 50, 0, 100 ) \ - makeAMeasHist( condWaitHistIdx, "cond_wait", 50, 0, 100 ) \ - makeAMeasHist( condSignalHistIdx, "cond_signal", 50, 0, 100 ) - -#endif - - -#ifdef VCILK - -//VCilk -#define spawnHistIdx 0 -#define syncHistIdx 1 - -#define MakeTheMeasHists() \ - _VMSMasterEnv->measHistsInfo = \ - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ - makeAMeasHist( spawnHistIdx, "Spawn", 50, 0, 200 ) \ - makeAMeasHist( syncHistIdx, "Sync", 50, 0, 200 ) - - -#endif - -#ifdef SSR - -//SSR -#define SendFromToHistIdx 0 -#define SendOfTypeHistIdx 1 -#define ReceiveFromToHistIdx 2 -#define ReceiveOfTypeHistIdx 3 - -#define MakeTheMeasHists() \ - _VMSMasterEnv->measHistsInfo = \ - makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \ - makeAMeasHist( SendFromToHistIdx, "SendFromTo", 50, 0, 100 ) \ - makeAMeasHist( SendOfTypeHistIdx, "SendOfType", 50, 0, 100 ) \ - makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \ - makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 ) - -#endif - -//=========================================================================== -//VPThread - - -#define Meas_startCreate \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endCreate \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ createHistIdx ] ); - -#define Meas_startMutexLock \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endMutexLock \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ mutexLockHistIdx ] ); - -#define Meas_startMutexUnlock \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endMutexUnlock \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] ); - -#define Meas_startCondWait \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endCondWait \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ condWaitHistIdx ] ); - -#define Meas_startCondSignal \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endCondSignal \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ condSignalHistIdx ] ); - -//=========================================================================== -// VCilk -#define Meas_startSpawn \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endSpawn \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ spawnHistIdx ] ); - -#define Meas_startSync \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endSync \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ syncHistIdx ] ); - -//=========================================================================== -// SSR -#define Meas_startSendFromTo \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endSendFromTo \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ SendFromToHistIdx ] ); - -#define Meas_startSendOfType \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endSendOfType \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] ); - -#define Meas_startReceiveFromTo \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endReceiveFromTo \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] ); - -#define Meas_startReceiveOfType \ - int32 startStamp, endStamp; \ - saveLowTimeStampCountInto( startStamp ); \ - -#define Meas_endReceiveOfType \ - saveLowTimeStampCountInto( endStamp ); \ - addIntervalToHist( startStamp, endStamp, \ - _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] ); - -//===== - -#include "ProcrContext.h" -#include "probes.h" -#include "vutilities.h" - -#endif /* _VMS_H */ - diff -r 7cff4e13d5c4 -r 651ee45615ae VMS_primitive_data_types.h --- a/VMS_primitive_data_types.h Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,53 +0,0 @@ -/* - * Copyright 2009 OpenSourceStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - - */ - -#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H -#define _BLIS_PRIMITIVE_DATA_TYPES_H - - -/*For portability, need primitive data types that have a well defined - * size, and well-defined layout into bytes - *To do this, provide BLIS standard aliases for all primitive data types - *These aliases must be used in all BLIS functions instead of the ANSI types - * - *These definitions will be replaced inside each specialization module - * according to the compiler used in that module and the hardware being - * specialized to. - */ -/* -#define int8 char -#define uint8 char -#define int16 short -#define uint16 unsigned short -#define int32 int -#define uint32 unsigned int -#define int64 long long -#define uint64 unsigned long long -#define float32 float -#define float64 double -*/ -typedef char bool8; -typedef char int8; -typedef char uint8; -typedef short int16; -typedef unsigned short uint16; -typedef int int32; -typedef unsigned int uint32; -typedef long long int64; -typedef unsigned long long uint64; -typedef float float32; -typedef double float64; -//typedef double double float128; -#define float128 double double - -#define TRUE 1 -#define FALSE 0 - -#endif /* _BLIS_PRIMITIVE_DATA_TYPES_H */ - diff -r 7cff4e13d5c4 -r 651ee45615ae __brch__DEPRECATED_README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/__brch__DEPRECATED_README Sat Feb 11 21:47:25 2012 -0800 @@ -0,0 +1,29 @@ +*DEPRECATED* as of Feb 2012, this branch should not be used. Too many variations of VMS for MC_shared exist. + +Instead, choose a branch that has the best implementation for the machine being run on. For example, single-socket with 2 cores, or with 4 cores, or with 8 cores all have their own branches with code tuned to that number of cores. AMD processors require different low-level tweaking than Intel, and so on. + +============== Background on Branch Naming ============ + +There are two kinds of branchs: ones used to develop features, and ones tuned to particular hardware. A given HW branch may combine features from several feature-branches, picking and choosing among them. + +Legacy branches, from before Feb 2012 have random names. After Feb 2012, they're named by the scheme: + +feat____ + +HW__ + +where and follow the pattern: + + x __ + +Examples: + +feat__exp_array_malloc + +feat__rand_backoff__4x10_Intel_WestmereEx + +HW__1x4_Intel_SandyBridge + +HW__4x10_Intel_WestmereEx + +HW__1x4_AMD_mobile diff -r 7cff4e13d5c4 -r 651ee45615ae contextSwitch.s --- a/contextSwitch.s Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,149 +0,0 @@ -.data - - -.text - -//Save return label address for the coreLoop to pointer -//Arguments: Pointer to variable holding address -.globl saveCoreLoopReturnAddr -saveCoreLoopReturnAddr: - movq $coreLoopReturn, %rcx #load label address - movq %rcx, (%rdi) #save address to pointer - ret - - -//Initializes VirtProcrFn at first run for 64 bit mode -//Puts argument from stack into registers -.globl startVirtProcrFn -startVirtProcrFn: - movq %rdi , %rsi #get second argument from first argument of switchVP - movq 0x08(%rsp), %rdi #get first argument - movq (%rsp) , %rax #get function addr - jmp *%rax - -//Switches form CoreLoop to VP ether a normal VP or the Master Loop -//switch to virt procr's stack and frame ptr then jump to virt procr fn -/* VirtProcr offsets: - * 0x10 stackPtr - * 0x18 framePtr - * 0x20 nextInstrPt - * 0x30 coreLoopFramePtr - * 0x38 coreLoopStackPtr - * - * _VMSMasterEnv offsets: - * 0x48 coreLoopReturnPt - * 0x54 masterLock - */ -.globl switchToVP -switchToVP: - #VirtProcr in %rdi - movq %rsp , 0x38(%rdi) #save core loop stack pointer - movq %rbp , 0x30(%rdi) #save core loop frame pointer - movq 0x10(%rdi), %rsp #restore stack pointer - movq 0x18(%rdi), %rbp #restore frame pointer - movq 0x20(%rdi), %rax #get jmp pointer - jmp *%rax #jmp to VP -coreLoopReturn: - ret - - -//switches to core loop. saves return address -/* VirtProcr offsets: - * 0x10 stackPtr - * 0x18 framePtr - * 0x20 nextInstrPt - * 0x30 coreLoopFramePtr - * 0x38 coreLoopStackPtr - * - * _VMSMasterEnv offsets: - * 0x48 coreLoopReturnPt - * 0x54 masterLock - */ -.globl switchToCoreLoop -switchToCoreLoop: - #VirtProcr in %rdi - movq $VPReturn , 0x20(%rdi) #store return address - movq %rsp , 0x10(%rdi) #save stack pointer - movq %rbp , 0x18(%rdi) #save frame pointer - movq 0x38(%rdi), %rsp #restore stack pointer - movq 0x30(%rdi), %rbp #restore frame pointer - movq $_VMSMasterEnv, %rcx - movq (%rcx) , %rcx - movq 0x48(%rcx), %rax #get CoreLoopStartPt - jmp *%rax #jmp to CoreLoop -VPReturn: - ret - - - -//switches to core loop from master. saves return address -//Releases masterLock so the next MasterLoop can be executed -/* VirtProcr offsets: - * 0x10 stackPtr - * 0x18 framePtr - * 0x20 nextInstrPt - * 0x30 coreLoopFramePtr - * 0x38 coreLoopStackPtr - * - * _VMSMasterEnv offsets: - * 0x48 coreLoopReturnPt - * 0x54 masterLock - */ -.globl masterSwitchToCoreLoop -masterSwitchToCoreLoop: - #VirtProcr in %rdi - movq $MasterReturn, 0x20(%rdi) #store return address - movq %rsp , 0x10(%rdi) #save stack pointer - movq %rbp , 0x18(%rdi) #save frame pointer - movq 0x38(%rdi), %rsp #restore stack pointer - movq 0x30(%rdi), %rbp #restore frame pointer - movq $_VMSMasterEnv, %rcx - movq (%rcx) , %rcx - movq 0x48(%rcx), %rax #get CoreLoopStartPt - movl $0x0 , 0x54(%rcx) #release lock - jmp *%rax #jmp to CoreLoop -MasterReturn: - ret - - -//Switch to terminateCoreLoop -//therefor switch to coreLoop context from master context -// no need to call because the stack is already set up for switchVP -// and virtPr is in %rdi -// and both functions have the same argument. -// do not save register of VP because this function will never return -/* VirtProcr offsets: - * 0x10 stackPtr - * 0x18 framePtr - * 0x20 nextInstrPt - * 0x30 coreLoopFramePtr - * 0x38 coreLoopStackPtr - * - * _VMSMasterEnv offsets: - * 0x48 coreLoopReturnPt - * 0x58 masterLock - */ -.globl asmTerminateCoreLoop -asmTerminateCoreLoop: - #VirtProcr in %rdi - movq 0x38(%rdi), %rsp #restore stack pointer - movq 0x30(%rdi), %rbp #restore frame pointer - movq $terminateCoreLoop, %rax - jmp *%rax #jmp to CoreLoop - - -/* - * This one for the sequential version is special. It discards the current stack - * and returns directly from the coreLoop after VMS__dissipate_procr was called - */ -.globl asmTerminateCoreLoopSeq -asmTerminateCoreLoopSeq: - #VirtProcr in %rdi - movq 0x38(%rdi), %rsp #restore stack pointer - movq 0x30(%rdi), %rbp #restore frame pointer - #argument is in %rdi - call VMS__dissipate_procr - movq %rbp , %rsp #goto the coreLoops stack - pop %rbp #restore the old framepointer - ret #return from core loop - diff -r 7cff4e13d5c4 -r 651ee45615ae probes.c --- a/probes.c Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,354 +0,0 @@ -/* - * Copyright 2010 OpenSourceStewardshipFoundation - * - * Licensed under BSD - */ - -#include -#include -#include - -#include "VMS.h" -#include "Queue_impl/BlockingQueue.h" -#include "Histogram/Histogram.h" - - -//================================ STATS ==================================== - -inline TSCount getTSCount() - { unsigned int low, high; - TSCount out; - - saveTimeStampCountInto( low, high ); - out = high; - out = (out << 32) + low; - return out; - } - - - -//==================== Probes ================= -#ifdef STATS__USE_TSC_PROBES - -int32 -VMS__create_histogram_probe( int32 numBins, float32 startValue, - float32 binWidth, char *nameStr ) - { IntervalProbe *newProbe; - int32 idx; - FloatHist *hist; - - idx = VMS__create_single_interval_probe( nameStr ); - newProbe = _VMSMasterEnv->intervalProbes[ idx ]; - - hist = makeFloatHistogram( numBins, startValue, binWidth ); - newProbe->hist = hist; - return idx; - } - -void -VMS_impl__record_interval_start_in_probe( int32 probeID ) - { IntervalProbe *probe; - - probe = _VMSMasterEnv->intervalProbes[ probeID ]; - probe->startStamp = getTSCount(); - } - -void -VMS_impl__record_interval_end_in_probe( int32 probeID ) - { IntervalProbe *probe; - TSCount endStamp; - - endStamp = getTSCount(); - - probe = _VMSMasterEnv->intervalProbes[ probeID ]; - probe->endStamp = endStamp; - - if( probe->hist != NULL ) - { TSCount interval = probe->endStamp - probe->startStamp; - //if the interval is sane, then add to histogram - if( interval < probe->hist->endOfRange * 10 ) - addToFloatHist( interval, probe->hist ); - } - } - -void -VMS_impl__print_stats_of_probe( int32 probeID ) - { IntervalProbe *probe; - - probe = _VMSMasterEnv->intervalProbes[ probeID ]; - - if( probe->hist == NULL ) - { - printf("probe: %s, interval: %.6lf\n", probe->nameStr,probe->interval); - } - - else - { - printf( "probe: %s\n", probe->nameStr ); - printFloatHist( probe->hist ); - } - } -#else - -/* - * In practice, probe operations are called from the app, from inside slaves - * -- so have to be sure each probe is single-VP owned, and be sure that - * any place common structures are modified it's done inside the master. - * So -- the only place common structures are modified is during creation. - * after that, all mods are to individual instances. - * - * Thniking perhaps should change the semantics to be that probes are - * attached to the virtual processor -- and then everything is guaranteed - * to be isolated -- except then can't take any intervals that span VPs, - * and would have to transfer the probes to Master env when VP dissipates.. - * gets messy.. - * - * For now, just making so that probe creation causes a suspend, so that - * the dynamic array in the master env is only modified from the master - * - */ -IntervalProbe * -create_generic_probe( char *nameStr, VirtProcr *animPr ) -{ - VMSSemReq reqData; - - reqData.reqType = createProbe; - reqData.nameStr = nameStr; - - VMS__send_VMSSem_request( &reqData, animPr ); - - return animPr->dataRetFromReq; - } - -/*Use this version from outside VMS -- it uses external malloc, and modifies - * dynamic array, so can't be animated in a slave VP - */ -IntervalProbe * -ext__create_generic_probe( char *nameStr ) - { IntervalProbe *newProbe; - int32 nameLen; - - newProbe = malloc( sizeof(IntervalProbe) ); - nameLen = strlen( nameStr ); - newProbe->nameStr = malloc( nameLen ); - memcpy( newProbe->nameStr, nameStr, nameLen ); - newProbe->hist = NULL; - newProbe->schedChoiceWasRecorded = FALSE; - newProbe->probeID = - addToDynArray( newProbe, _VMSMasterEnv->dynIntervalProbesInfo ); - - return newProbe; - } - - -/*Only call from inside master or main startup/shutdown thread - */ -void -VMS_impl__free_probe( IntervalProbe *probe ) - { if( probe->hist != NULL ) freeDblHist( probe->hist ); - if( probe->nameStr != NULL) VMS__free( probe->nameStr ); - VMS__free( probe ); - } - - -int32 -VMS_impl__record_time_point_into_new_probe( char *nameStr, VirtProcr *animPr) - { IntervalProbe *newProbe; - struct timeval *startStamp; - float64 startSecs; - - newProbe = create_generic_probe( nameStr, animPr ); - newProbe->endSecs = 0; - - gettimeofday( &(newProbe->startStamp), NULL); - - //turn into a double - startStamp = &(newProbe->startStamp); - startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); - newProbe->startSecs = startSecs; - - return newProbe->probeID; - } - -int32 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ) - { IntervalProbe *newProbe; - struct timeval *startStamp; - float64 startSecs; - - newProbe = ext__create_generic_probe( nameStr ); - newProbe->endSecs = 0; - - gettimeofday( &(newProbe->startStamp), NULL); - - //turn into a double - startStamp = &(newProbe->startStamp); - startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); - newProbe->startSecs = startSecs; - - return newProbe->probeID; - } - -int32 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ) - { IntervalProbe *newProbe; - - newProbe = create_generic_probe( nameStr, animPr ); - - return newProbe->probeID; - } - -int32 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, - float64 binWidth, char *nameStr, VirtProcr *animPr ) - { IntervalProbe *newProbe; - DblHist *hist; - - newProbe = create_generic_probe( nameStr, animPr ); - - hist = makeDblHistogram( numBins, startValue, binWidth ); - newProbe->hist = hist; - return newProbe->probeID; - } - -void -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ) - { IntervalProbe *probe; - - //TODO: fix this To be in Master -- race condition - probe = _VMSMasterEnv->intervalProbes[ probeID ]; - - addValueIntoTable(probe->nameStr, probe, _VMSMasterEnv->probeNameHashTbl); - } - -IntervalProbe * -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ) - { - //TODO: fix this To be in Master -- race condition - return getValueFromTable( probeName, _VMSMasterEnv->probeNameHashTbl ); - } - - -/*Everything is local to the animating procr, so no need for request, do - * work locally, in the anim Pr - */ -void -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animatingPr ) - { IntervalProbe *probe; - - probe = _VMSMasterEnv->intervalProbes[ probeID ]; - probe->schedChoiceWasRecorded = TRUE; - probe->coreNum = animatingPr->coreAnimatedBy; - probe->procrID = animatingPr->procrID; - probe->procrCreateSecs = animatingPr->createPtInSecs; - } - -/*Everything is local to the animating procr, so no need for request, do - * work locally, in the anim Pr - */ -void -VMS_impl__record_interval_start_in_probe( int32 probeID ) - { IntervalProbe *probe; - - DEBUG( dbgProbes, "record start of interval\n" ) - probe = _VMSMasterEnv->intervalProbes[ probeID ]; - gettimeofday( &(probe->startStamp), NULL ); - } - - -/*Everything is local to the animating procr, so no need for request, do - * work locally, in the anim Pr - */ -void -VMS_impl__record_interval_end_in_probe( int32 probeID ) - { IntervalProbe *probe; - struct timeval *endStamp, *startStamp; - float64 startSecs, endSecs; - - DEBUG( dbgProbes, "record end of interval\n" ) - //possible seg-fault if array resized by diff core right after this - // one gets probe..? Something like that? Might be safe.. don't care - probe = _VMSMasterEnv->intervalProbes[ probeID ]; - gettimeofday( &(probe->endStamp), NULL); - - //now turn into an interval held in a double - startStamp = &(probe->startStamp); - endStamp = &(probe->endStamp); - - startSecs = startStamp->tv_sec + ( startStamp->tv_usec / 1000000.0 ); - endSecs = endStamp->tv_sec + ( endStamp->tv_usec / 1000000.0 ); - - probe->interval = endSecs - startSecs; - probe->startSecs = startSecs; - probe->endSecs = endSecs; - - if( probe->hist != NULL ) - { - //if the interval is sane, then add to histogram - if( probe->interval < probe->hist->endOfRange * 10 ) - addToDblHist( probe->interval, probe->hist ); - } - } - -void -print_probe_helper( IntervalProbe *probe ) - { - printf( "\nprobe: %s, ", probe->nameStr ); - - - if( probe->schedChoiceWasRecorded ) - { printf( "coreNum: %d, procrID: %d, procrCreated: %0.6f | ", - probe->coreNum, probe->procrID, probe->procrCreateSecs ); - } - - if( probe->endSecs == 0 ) //just a single point in time - { - printf( " time point: %.6f\n", - probe->startSecs - _VMSMasterEnv->createPtInSecs ); - } - else if( probe->hist == NULL ) //just an interval - { - printf( " startSecs: %.6f interval: %.6f\n", - (probe->startSecs - _VMSMasterEnv->createPtInSecs), probe->interval); - } - else //a full histogram of intervals - { - printDblHist( probe->hist ); - } - } - -//TODO: change so pass around pointer to probe instead of its array-index.. -// will eliminate chance for timing of resize to cause problems with the -// lookup -- even though don't think it actually can cause problems.. -// there's no need to pass index around -- have hash table for names, and -// only need it once, then have ptr to probe.. the thing about enum the -// index and use that as name is clunky in practice -- just hash. -void -VMS_impl__print_stats_of_probe( int32 probeID ) - { IntervalProbe *probe; - - probe = _VMSMasterEnv->intervalProbes[ probeID ]; - - print_probe_helper( probe ); - } - - -inline void doNothing(){}; - -void -generic_print_probe( void *_probe ) - { - IntervalProbe *probe = (IntervalProbe *)_probe; - - //TODO segfault in printf - //print_probe_helper( probe ); - } - -void -VMS_impl__print_stats_of_all_probes() - { - forAllInDynArrayDo( _VMSMasterEnv->dynIntervalProbesInfo, - &generic_print_probe ); - fflush( stdout ); - } -#endif diff -r 7cff4e13d5c4 -r 651ee45615ae probes.h --- a/probes.h Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,195 +0,0 @@ -/* - * Copyright 2009 OpenSourceStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - */ - -#ifndef _PROBES_H -#define _PROBES_H -#define _GNU_SOURCE - -#include "VMS_primitive_data_types.h" - -#include - - - //when STATS__TURN_ON_PROBES is defined allows using probes to measure - // time intervals. The probes are macros that only compile to something - // when STATS__TURN_ON_PROBES is defined. The probes are saved in the - // master env -- but only when this is defined. - //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday -#define STATS__TURN_ON_PROBES -//#define STATS__USE_TSC_PROBES -#define STATS__USE_DBL_PROBES - -//typedef struct _IntervalProbe IntervalProbe; //in VMS.h - -struct _IntervalProbe - { - char *nameStr; - int32 probeID; - - int32 schedChoiceWasRecorded; - int32 coreNum; - int32 procrID; - float64 procrCreateSecs; - - #ifdef STATS__USE_TSC_PROBES - TSCount startStamp; - TSCount endStamp; - #else - struct timeval startStamp; - struct timeval endStamp; - #endif - float64 startSecs; - float64 endSecs; - float64 interval; - DblHist *hist;//if NULL, then is single interval probe - }; - - -//============================= Statistics ================================== - - //Frequency of TS counts - //TODO: change freq for each machine -#define TSCOUNT_FREQ 3180000000 - -inline TSCount getTSCount(); - - -//======================== Probes ============================= -// -// Use macros to allow turning probes off with a #define switch -#ifdef STATS__ENABLE_PROBES -int32 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ - VMS_impl__record_time_point_in_new_probe( nameStr, animPr ) - -int32 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ - VMS_ext_impl__record_time_point_into_new_probe( nameStr ) - - -int32 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); -#define VMS__create_single_interval_probe( nameStr, animPr ) \ - VMS_impl__create_single_interval_probe( nameStr, animPr ) - - -int32 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, - float64 binWidth, char *nameStr, VirtProcr *animPr ); -#define VMS__create_histogram_probe( numBins, startValue, \ - binWidth, nameStr, animPr ) \ - VMS_impl__create_histogram_probe( numBins, startValue, \ - binWidth, nameStr, animPr ) -void -VMS_impl__free_probe( IntervalProbe *probe ); -#define VMS__free_probe( probe ) \ - VMS_impl__free_probe( probe ) - -void -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); -#define VMS__index_probe_by_its_name( probeID, animPr ) \ - VMS_impl__index_probe_by_its_name( probeID, animPr ) - -IntervalProbe * -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); -#define VMS__get_probe_by_name( probeID, animPr ) \ - VMS_impl__get_probe_by_name( probeName, animPr ) - -void -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ - VMS_impl__record_sched_choice_into_probe( probeID, animPr ) - -void -VMS_impl__record_interval_start_in_probe( int32 probeID ); -#define VMS__record_interval_start_in_probe( probeID ) \ - VMS_impl__record_interval_start_in_probe( probeID ) - -void -VMS_impl__record_interval_end_in_probe( int32 probeID ); -#define VMS__record_interval_end_in_probe( probeID ) \ - VMS_impl__record_interval_end_in_probe( probeID ) - -void -VMS_impl__print_stats_of_probe( int32 probeID ); -#define VMS__print_stats_of_probe( probeID ) \ - VMS_impl__print_stats_of_probe( probeID ) - -void -VMS_impl__print_stats_of_all_probes(); -#define VMS__print_stats_of_all_probes() \ - VMS_impl__print_stats_of_all_probes() - - -#else -int32 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr); -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \ - 0 /* do nothing */ - -int32 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr ); -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \ - 0 /* do nothing */ - - -int32 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr ); -#define VMS__create_single_interval_probe( nameStr, animPr ) \ - 0 /* do nothing */ - - -int32 -VMS_impl__create_histogram_probe( int32 numBins, float64 startValue, - float64 binWidth, char *nameStr, VirtProcr *animPr ); -#define VMS__create_histogram_probe( numBins, startValue, \ - binWidth, nameStr, animPr ) \ - 0 /* do nothing */ - -void -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr ); -#define VMS__index_probe_by_its_name( probeID, animPr ) \ - /* do nothing */ - -IntervalProbe * -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr ); -#define VMS__get_probe_by_name( probeID, animPr ) \ - NULL /* do nothing */ - -void -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr ); -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \ - /* do nothing */ - -void -VMS_impl__record_interval_start_in_probe( int32 probeID ); -#define VMS__record_interval_start_in_probe( probeID ) \ - /* do nothing */ - -void -VMS_impl__record_interval_end_in_probe( int32 probeID ); -#define VMS__record_interval_end_in_probe( probeID ) \ - /* do nothing */ - -inline void doNothing(); -void -VMS_impl__print_stats_of_probe( int32 probeID ); -#define VMS__print_stats_of_probe( probeID ) \ - doNothing/* do nothing */ - -void -VMS_impl__print_stats_of_all_probes(); -#define VMS__print_stats_of_all_probes \ - doNothing/* do nothing */ - -#endif /* defined STATS__ENABLE_PROBES */ - -#endif /* _PROBES_H */ - diff -r 7cff4e13d5c4 -r 651ee45615ae vmalloc.c --- a/vmalloc.c Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,495 +0,0 @@ -/* - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - * Created on November 14, 2009, 9:07 PM - */ - -#include -#include -#include -#include - -#include "VMS.h" -#include "Histogram/Histogram.h" - -/*Helper function - *Insert a newly generated free chunk into the first spot on the free list. - * The chunk is cast as a MallocProlog, so the various pointers in it are - * accessed with C's help -- and the size of the prolog is easily added to - * the pointer when a chunk is returned to the app -- so C handles changes - * in pointer sizes among machines. - * - *The list head is a normal MallocProlog struct -- identified by its - * prevChunkInFreeList being NULL -- the only one. - * - *The end of the list is identified by next chunk being NULL, as usual. - */ -void inline -add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead ) - { - chunk->nextChunkInFreeList = listHead->nextChunkInFreeList; - if( chunk->nextChunkInFreeList != NULL ) //if not last in free list - chunk->nextChunkInFreeList->prevChunkInFreeList = chunk; - chunk->prevChunkInFreeList = listHead; - listHead->nextChunkInFreeList = chunk; - } - - -/*This is sequential code, meant to only be called from the Master, not from - * any slave VPs. - *Search down list, checking size by the nextHigherInMem pointer, to find - * first chunk bigger than size needed. - *Shave off the extra and make it into a new free-list element, hook it in - * then return the address of the found element plus size of prolog. - * - *Will find a - */ -void *VMS__malloc( size_t sizeRequested ) - { MallocProlog *foundElem = NULL, *currElem, *newElem; - ssize_t amountExtra, sizeConsumed,sizeOfFound; - uint32 foundElemIsTopOfHeap; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - int32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //======================================================================== - - //step up the size to be aligned at 16-byte boundary, prob better ways - sizeRequested = (sizeRequested + 16) & ~15; - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; - - while( currElem != NULL ) - { //check if size of currElem is big enough - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); - if( amountExtra > 0 ) - { //found it, get out of loop - foundElem = currElem; - currElem = NULL; - } - else - currElem = currElem->nextChunkInFreeList; - } - - if( foundElem == NULL ) - { ERROR("\nmalloc failed\n") - return (void *)NULL; //indicates malloc failed - } - //Using a kludge to identify the element that is the top chunk in the - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and - // save addr of start of heap in head's nextLowerInMem - //Will handle top of Heap specially - foundElemIsTopOfHeap = foundElem->nextHigherInMem == - _VMSMasterEnv->freeListHead->nextHigherInMem; - - //before shave off and try to insert new elem, remove found elem - //note, foundElem will never be the head, so always has valid prevChunk - foundElem->prevChunkInFreeList->nextChunkInFreeList = - foundElem->nextChunkInFreeList; - if( foundElem->nextChunkInFreeList != NULL ) - { foundElem->nextChunkInFreeList->prevChunkInFreeList = - foundElem->prevChunkInFreeList; - } - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated - - //if enough, turn extra into new elem & insert it - if( amountExtra > 64 ) - { //make new elem by adding to addr of curr elem then casting - sizeConsumed = sizeof(MallocProlog) + sizeRequested; - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); - newElem->nextLowerInMem = foundElem; //This is evil (but why?) - newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) - foundElem->nextHigherInMem = newElem; - if( ! foundElemIsTopOfHeap ) - { //there is no next higher for top of heap, so can't write to it - newElem->nextHigherInMem->nextLowerInMem = newElem; - } - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); - } - else - { - sizeConsumed = sizeOfFound; - } - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); - #endif - //======================================================================== - - //skip over the prolog by adding its size to the pointer return - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); - } - -/*This is sequential code, meant to only be called from the Master, not from - * any slave VPs. - *Search down list, checking size by the nextHigherInMem pointer, to find - * first chunk bigger than size needed. - *Shave off the extra and make it into a new free-list element, hook it in - * then return the address of the found element plus size of prolog. - * - * The difference to the regular malloc is, that all the allocated chunks are - * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk - * before the aligned chunk. - */ -void *VMS__malloc_aligned( size_t sizeRequested ) - { MallocProlog *foundElem = NULL, *currElem, *newElem; - ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; - uint32 foundElemIsTopOfHeap; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - uint32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //======================================================================== - - //step up the size to be multiple of the cache line size - sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1); - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; - - while( currElem != NULL ) - { //check if size of currElem is big enough - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); - if( amountExtra > 0 ) - { - //look if the found element is already aligned - if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){ - //found it, get out of loop - foundElem = currElem; - break; - }else{ - //find first aligned address and check if it's still big enough - //check also if the space before the aligned address is big enough - //for a new element - void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1))); - prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; - sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); - amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); - if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ - //found suitable element - //create new previous element and exit loop - MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; - - //insert new element into free list - if(currElem->nextChunkInFreeList != NULL) - currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; - newAlignedElem->prevChunkInFreeList = currElem; - newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; - currElem->nextChunkInFreeList = newAlignedElem; - - //set higherInMem and lowerInMem - newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; - foundElemIsTopOfHeap = currElem->nextHigherInMem == - _VMSMasterEnv->freeListHead->nextHigherInMem; - if(!foundElemIsTopOfHeap) - currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; - currElem->nextHigherInMem = newAlignedElem; - newAlignedElem->nextLowerInMem = currElem; - - //Found new element leaving loop - foundElem = newAlignedElem; - break; - } - } - - } - currElem = currElem->nextChunkInFreeList; - } - - if( foundElem == NULL ) - { ERROR("\nmalloc failed\n") - return (void *)NULL; //indicates malloc failed - } - //Using a kludge to identify the element that is the top chunk in the - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and - // save addr of start of heap in head's nextLowerInMem - //Will handle top of Heap specially - foundElemIsTopOfHeap = foundElem->nextHigherInMem == - _VMSMasterEnv->freeListHead->nextHigherInMem; - - //before shave off and try to insert new elem, remove found elem - //note, foundElem will never be the head, so always has valid prevChunk - foundElem->prevChunkInFreeList->nextChunkInFreeList = - foundElem->nextChunkInFreeList; - if( foundElem->nextChunkInFreeList != NULL ) - { foundElem->nextChunkInFreeList->prevChunkInFreeList = - foundElem->prevChunkInFreeList; - } - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated - - //if enough, turn extra into new elem & insert it - if( amountExtra > 64 ) - { //make new elem by adding to addr of curr elem then casting - sizeConsumed = sizeof(MallocProlog) + sizeRequested; - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); - newElem->nextHigherInMem = foundElem->nextHigherInMem; - newElem->nextLowerInMem = foundElem; - foundElem->nextHigherInMem = newElem; - - if( ! foundElemIsTopOfHeap ) - { //there is no next higher for top of heap, so can't write to it - newElem->nextHigherInMem->nextLowerInMem = newElem; - } - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); - } - else - { - sizeConsumed = sizeOfFound; - } - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); - #endif - //======================================================================== - - //skip over the prolog by adding its size to the pointer return - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); - } - - -/*This is sequential code -- only to be called from the Master - * When free, subtract the size of prolog from pointer, then cast it to a - * MallocProlog. Then check the nextLower and nextHigher chunks to see if - * one or both are also free, and coalesce if so, and if neither free, then - * add this one to free-list. - */ -void -VMS__free( void *ptrToFree ) - { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; - size_t sizeOfElem; - uint32 lowerExistsAndIsFree, higherExistsAndIsFree; - - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - int32 startStamp, endStamp; - saveLowTimeStampCountInto( startStamp ); - #endif - //======================================================================== - - if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem || - ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem ) - { //outside the range of data owned by VMS's malloc, so do nothing - return; - } - //subtract size of prolog to get pointer to prolog, then cast - elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog)); - sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree); - - if( elemToFree->prevChunkInFreeList != NULL ) - { printf( "error: freeing same element twice!" ); exit(1); - } - - _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem; - - nextLowerElem = elemToFree->nextLowerInMem; - nextHigherElem = elemToFree->nextHigherInMem; - - if( nextHigherElem == NULL ) - higherExistsAndIsFree = FALSE; - else //okay exists, now check if in the free-list by checking back ptr - higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL); - - if( nextLowerElem == NULL ) - lowerExistsAndIsFree = FALSE; - else //okay, it exists, now check if it's free - lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL); - - - //now, know what exists and what's free - if( lowerExistsAndIsFree ) - { if( higherExistsAndIsFree ) - { //both exist and are free, so coalesce all three - //First, remove higher from free-list - nextHigherElem->prevChunkInFreeList->nextChunkInFreeList = - nextHigherElem->nextChunkInFreeList; - if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list? - nextHigherElem->nextChunkInFreeList->prevChunkInFreeList = - nextHigherElem->prevChunkInFreeList; - //Now, fix-up sequence-in-mem list -- by side-effect, this also - // changes size of the lower elem, which is still in free-list - nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem; - if( nextHigherElem->nextHigherInMem != - _VMSMasterEnv->freeListHead->nextHigherInMem ) - nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem; - //notice didn't do anything to elemToFree -- it simply is no - // longer reachable from any of the lists. Wonder if could be a - // security leak because left valid addresses in it, - // but don't care for now. - } - else - { //lower is the only of the two that exists and is free, - //In this case, no adjustment to free-list, just change mem-list. - // By side-effect, changes size of the lower elem - nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem; - if( elemToFree->nextHigherInMem != - _VMSMasterEnv->freeListHead->nextHigherInMem ) - elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem; - } - } - else - { //lower either doesn't exist or isn't free, so check higher - if( higherExistsAndIsFree ) - { //higher exists and is the only of the two free - //First, in free-list, replace higher elem with the one to free - elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList; - elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList; - elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree; - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; - //Now chg mem-list. By side-effect, changes size of elemToFree - elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem; - if( elemToFree->nextHigherInMem != - _VMSMasterEnv->freeListHead->nextHigherInMem ) - elemToFree->nextHigherInMem->nextLowerInMem = elemToFree; - } - else - { //neither lower nor higher is availabe to coalesce so add to list - // this makes prev chunk ptr non-null, which indicates it's free - elemToFree->nextChunkInFreeList = - _VMSMasterEnv->freeListHead->nextChunkInFreeList; - _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree; - if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list? - elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree; - elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead; - } - } - //============================= MEASUREMENT STUFF ======================== - #ifdef MEAS__TIME_MALLOC - saveLowTimeStampCountInto( endStamp ); - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist ); - #endif - //======================================================================== - - } - - -/*Allocates memory from the external system -- higher overhead - * - *Because of Linux's malloc throwing bizarre random faults when malloc is - * used inside a VMS virtual processor, have to pass this as a request and - * have the core loop do it when it gets around to it -- will look for these - * chores leftover from the previous animation of masterVP the next time it - * goes to animate the masterVP -- so it takes two separate masterVP - * animations, separated by work, to complete an external malloc or - * external free request. - * - *Thinking core loop accepts signals -- just looks if signal-location is - * empty or not -- - */ -void * -VMS__malloc_in_ext( size_t sizeRequested ) - { - /* - //This is running in the master, so no chance for multiple cores to be - // competing for the core's flag. - if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 ) - { //something has already signalled to core loop, so save the signal - // and look, next time master animated, to see if can send it. - //Note, the addr to put a signal is in the coreloop's frame, so just - // checks it each time through -- make it volatile to avoid GCC - // optimizations -- it's a coreloop local var that only changes - // after jumping away. The signal includes the addr to send the - //return to -- even if just empty return completion-signal - // - //save the signal in some queue that the master looks at each time - // it starts up -- one loc says if empty for fast common case -- - //something like that -- want to hide this inside this call -- but - // think this has to come as a request -- req handler gives procr - // back to master loop, which gives it back to req handler at point - // it sees that core loop has sent return signal. Something like - // that. - saveTheSignal - - } - coreSigData->type = malloc; - coreSigData->sizeToMalloc = sizeRequested; - coreSigData->locToSignalCompletion = &figureOut; - _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData; - */ - //just risk system-stack faults until get this figured out - return malloc( sizeRequested ); - } - - -/*Frees memory that was allocated in the external system -- higher overhead - * - *As noted in external malloc comment, this is clunky 'cause the free has - * to be called in the core loop. - */ -void -VMS__free_in_ext( void *ptrToFree ) - { - //just risk system-stack faults until get this figured out - free( ptrToFree ); - - //TODO: fix this -- so - } - - -/*Designed to be called from the main thread outside of VMS, during init - */ -MallocProlog * -VMS_ext__create_free_list() - { MallocProlog *freeListHead, *firstChunk; - - //Note, this is running in the main thread -- all increases in malloc - // mem and all frees of it must be done in this thread, with the - // thread's original stack available - freeListHead = malloc( sizeof(MallocProlog) ); - firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); - if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} - - //Touch memory to avoid page faults - void *ptr,*endPtr; - endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE; - for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE) - { - *(char*)ptr = 0; - } - - freeListHead->prevChunkInFreeList = NULL; - //Use this addr to free the heap when cleanup - freeListHead->nextLowerInMem = firstChunk; - //to identify top-of-heap elem, compare this addr to elem's next higher - freeListHead->nextHigherInMem = (void*)( (uintptr_t)firstChunk + - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); - freeListHead->nextChunkInFreeList = firstChunk; - - firstChunk->nextChunkInFreeList = NULL; - firstChunk->prevChunkInFreeList = freeListHead; - //next Higher has to be set to top of chunk, so can calc size in malloc - firstChunk->nextHigherInMem = (void*)( (uintptr_t)firstChunk + - MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE); - firstChunk->nextLowerInMem = NULL; //identifies as bott of heap - - _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet - - return freeListHead; - } - - -/*Designed to be called from the main thread outside of VMS, during cleanup - */ -void -VMS_ext__free_free_list( MallocProlog *freeListHead ) - { - //stashed a ptr to the one and only bug chunk malloc'd from OS in the - // free list head's next lower in mem pointer - free( freeListHead->nextLowerInMem ); - - //don't free the head -- it'll be in an array eventually -- free whole - // array when all the free lists linked from it have already been freed - } - diff -r 7cff4e13d5c4 -r 651ee45615ae vmalloc.h --- a/vmalloc.h Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ -/* - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - * Created on November 14, 2009, 9:07 PM - */ - -#ifndef _VMALLOC_H -#define _VMALLOC_H - -#include -#include -#include "VMS_primitive_data_types.h" - -typedef struct _MallocProlog MallocProlog; - -struct _MallocProlog - { - MallocProlog *nextChunkInFreeList; - MallocProlog *prevChunkInFreeList; - MallocProlog *nextHigherInMem; - MallocProlog *nextLowerInMem; - }; -//MallocProlog - -typedef struct - { - MallocProlog *firstChunkInFreeList; - int32 numInList; //TODO not used - } -FreeListHead; - -void * -VMS__malloc( size_t sizeRequested ); - -void * -VMS__malloc_aligned( size_t sizeRequested ); - -void -VMS__free( void *ptrToFree ); - -/*Allocates memory from the external system -- higher overhead - */ -void * -VMS__malloc_in_ext( size_t sizeRequested ); - -/*Frees memory that was allocated in the external system -- higher overhead - */ -void -VMS__free_in_ext( void *ptrToFree ); - - -MallocProlog * -VMS_ext__create_free_list(); - -void -VMS_ext__free_free_list( MallocProlog *freeListHead ); - -#endif \ No newline at end of file diff -r 7cff4e13d5c4 -r 651ee45615ae vutilities.c --- a/vutilities.c Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -/* - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - * Created on November 14, 2009, 9:07 PM - */ - -#include -#include - -#include "VMS.h" - - -inline char * -VMS__strDup( char *str ) - { char *retStr; - - retStr = VMS__malloc( strlen(str) + 1 ); - if( str == NULL ) return str; - strcpy( retStr, str ); - - return retStr; - } diff -r 7cff4e13d5c4 -r 651ee45615ae vutilities.h --- a/vutilities.h Fri Feb 10 12:05:17 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -/* - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org - * Licensed under GNU General Public License version 2 - * - * Author: seanhalle@yahoo.com - * - * Created on November 14, 2009, 9:07 PM - */ - - -#ifndef _UTILITIES_H -#define _UTILITIES_H - -#include -#include "VMS_primitive_data_types.h" - -inline char * -VMS__strDup( char *str ); - -#endif