# HG changeset patch # User Me # Date 1287227475 25200 # Node ID 72373405c8163cc74f64f6ff5c9613b0b386c20d # Parent 5388f1c2da6f17bfa3ba1bbbac67ea2062f7891a Adding TSC normalization -- still in progress, not working diff -r 5388f1c2da6f -r 72373405c816 CoreLoop.c --- a/CoreLoop.c Thu Oct 14 17:07:23 2010 -0700 +++ b/CoreLoop.c Sat Oct 16 04:11:15 2010 -0700 @@ -15,7 +15,11 @@ #include #include +//=========================================================================== +void +calcOffsets(); +//=========================================================================== /*This is the loop that runs in the OS Thread pinned to each core *Get virt procr from queue, * save state of current animator, then load in state of virt procr, using @@ -34,24 +38,13 @@ ThdParams *coreLoopThdParams; int thisCoresIdx; VirtProcr *currPr; - SRSWQueueStruc *readyToAnimateQ; + VMSQueueStruc *readyToAnimateQ; unsigned long coreMask; //has 1 in bit positions of allowed cores int errorCode; coreLoopThdParams = (ThdParams *)paramsIn; thisCoresIdx = coreLoopThdParams->coreNum; - //wait until signalled that setup is complete - pthread_mutex_lock( &suspendLock ); - while( !(_VMSMasterEnv->setupComplete) ) - { - pthread_cond_wait( &suspend_cond, - &suspendLock ); - } - pthread_mutex_unlock( &suspendLock ); - - //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); - //set thread affinity //Linux requires pinning thd to core inside thread-function //Designate a core by a 1 in bit-position corresponding to the core @@ -60,8 +53,24 @@ pthread_t selfThd = pthread_self(); errorCode = pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); + + if(errorCode){ printf("\nset affinity failure\n"); exit(0); } + + //measure offsets between TSCs + //Core 0 is the reference core, the rest react to it. + if( thisCoresIdx == 0 ) measureTSCOffsetsAsCore0(); + else measureTSCOffsetsAsRemoteCore( thisCoresIdx ); - if(errorCode){ printf("\nset affinity failure\n"); exit(0); } + //wait until signalled that setup is complete + pthread_mutex_lock( &suspendLock ); + while( !(_VMSMasterEnv->setupComplete) ) + { pthread_cond_wait( &suspend_cond, &suspendLock ); + } + pthread_mutex_unlock( &suspendLock ); + + + //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum ); + //Save addr of "end core loop" label - jump to it to shut down coreloop @@ -79,7 +88,8 @@ // Get to work! -- virt procr jumps back here when suspends //Note, have to restore the frame-pointer before jump to here, to get - // this code to work right (readyToAnimateQ and so forth are frame-ptr relative) + // this code to work right (readyToAnimateQ and so forth are frame-ptr + // relative) CoreLoopStartPt: //Get virtual processor from queue @@ -172,7 +182,7 @@ coreLoop_Seq( void *paramsIn ) { VirtProcr *currPr; - SRSWQueueStruc *readyToAnimateQ; + VMSQueueStruc *readyToAnimateQ; ThdParams *coreLoopThdParams; int thisCoresIdx; @@ -189,14 +199,16 @@ _VMSMasterEnv->coreLoopStartPt = &&SeqCoreLoopStartPt; _VMSMasterEnv->coreLoopEndPt = &&SeqCoreLoopEndPt; - //Core loop has no values live upon CoreLoopStartPt except readyToAnimateQ + //Core loop has no values live upon CoreLoopStartPt except + // readyToAnimateQ // every value in the code is defined by a statement in core loop, // after the start point -- with the one exception of _VMSWorkQ // Get to work! -- virt procr jumps back here when done or suspends //Note, have to restore the frame-pointer before jump to here, to get - // this code to work right (readyToAnimateQ and so forth are frame-ptr relative) + // this code to work right (readyToAnimateQ and so forth are frame-ptr + // relative) SeqCoreLoopStartPt: //Get virtual processor from queue @@ -255,3 +267,123 @@ VMS__handle_dissipate_reqst( currPr ); //free shutdown pr, that jmpd here return; } + + +/*Core 0 does a poll-loop, with a stop for each other core. + * (Later do more sophisticated, pairing cores with least comm time, or maybe + * all cores to all cores to get better statistics.) + *It has an array of TSC stamps for each remote core. + *It looks in the core loop param of each remote, checks if the flag is + * reset. + * If yes, records its own TSC into its array for that core, then sets flag. + * Each time sees flag cleared, increases a counter of num times it's seen + * that. When reaches NUM_TSC_OFFSET_SAMPLES it stops. + *Then, uses values in the TSC arrays to estimate the offset between TSCs in + * different cores. + *Here's how: + * 1) throw out first round-trip (mis-match btwn times the different cores + * enter the loop show up in firt round-trip). + * 2) Take difference in local TSC between two successive sightings of flag + * being cleared. This is the round-trip time. + * 3) Take difference between local TSC at a given index in array and the + * remote TSC at the same index. This is one-way time plus offset. + * 4) Take difference between the two remote TSCs. This is remote's view of + * round-trip time. + * 5) take half the round-trip time as one-way time, subtract that from the + * "one-way+offset" value, for local round-trip and remote round-trip. + */ +void +measureTSCOffsetsAsCore0() + { + int coreIdx, coreOffset, pongNum, numRemotesDone = 0, moreToDo = TRUE; + TSCount timeStamp; + + //Do a poll-loop, see if other cores have responded + while( moreToDo ) + { + // printf("error: TSC\n"); + for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) + { coreOffset = coreIdx * NUM_TSC_ROUND_TRIPS; // column * num in row + pongNum = pongNums[ coreIdx ]; + if( pongTimes[ coreOffset + pongNum ] != 0 ) + { //remote has set pong, so set the next ping for it to see + timeStamp = ggetTSC); + pingTimes[ coreOffset + pongNum + 1 ] = timeStamp; + + if( pongNum == NUM_TSC_ROUND_TRIPS - 1 ) + { //last pong, check if all are done + numRemotesDone += 1; + if( numRemotesDone == NUM_CORES ) + moreToDo = FALSE; + } + if( pongNum >= NUM_TSC_ROUND_TRIPS ) printf("error: TSC\n"); + pongNums[ coreIdx ] += 1; + } + }//for + }//while + + calcOffsets(); + } + + +void +measureTSCOffsetsAsRemoteCore( int coreIdx ) + { + int coreOffset, pongNum, numRemotesDone = 0, moreToDo = TRUE; + TSCount timeStamp; + + //Do a poll-loop, see if other cores have responded + coreOffset = coreIdx * NUM_TSC_ROUND_TRIPS; // column * num in row + while( moreToDo ) + { + pongNum = pongNums[ coreIdx ]; + if( pingTimes[ coreOffset + pongNum ] != 0 ) + { //core0 has set next ping, so set the next pong back to it + timeStamp = ggetTSC); + pongTimes[ coreOffset + pongNum ] = timeStamp; + + if( pongNum >= NUM_TSC_ROUND_TRIPS - 1 ) + { moreToDo = FALSE; + } + } + } + } + + +/*Have the sets of times from the ping-pongs, now from those estimate the + * offsets. + * + */ +void +calcOffsets() + { + int i, coreIdx, coreOffset; + int localRoundTrip, remoteRoundTrip; + int localToRemoteDiff, remoteToLocalDiff; + int offsetGuessL2R, offsetGuessR2L; + + //Take all round-trip times, skipping the first, adding them up + for( coreIdx = 1; coreIdx < NUM_CORES; coreIdx++ ) + { coreOffset = coreIdx * NUM_CORES; + for( i = 1; i < NUM_TSC_ROUND_TRIPS - 1; i++ ) + { + localRoundTrip = pingTimes[ i ] - pingTimes[ i + 1 ]; + remoteRoundTrip = pongTimes[ coreOffset + i ] - + pongTimes[ coreOffset + i + 1 ]; + //Take diff btwn local TSC and remote TSC + localToRemoteDiff = pongTimes[ coreOffset + i + 1 ] - pingTimes[ i]; + remoteToLocalDiff = pingTimes[ i ] - pongTimes[ coreOffset + i ]; + offsetGuessL2R = localToRemoteDiff - localRoundTrip/2; + offsetGuessR2L = -(remoteToLocalDiff - localRoundTrip/2); + printf("offL2R: %d | ", offsetGuessL2R); + printf("offR2L: %d | ", offsetGuessR2L); + printf("localRT: %d | ", localRoundTrip); + printf("remRT: %d \n", remoteRoundTrip); + } + } + } + + + + + diff -r 5388f1c2da6f -r 72373405c816 VMS.c --- a/VMS.c Thu Oct 14 17:07:23 2010 -0700 +++ b/VMS.c Sat Oct 16 04:11:15 2010 -0700 @@ -79,7 +79,7 @@ void create_masterEnv() { MasterEnv *masterEnv; - SRSWQueueStruc **readyToAnimateQs; + VMSQueueStruc **readyToAnimateQs; int coreIdx; VirtProcr **masterVPs; SchedSlot ***allSchedSlots; //ptr to array of ptrs @@ -93,7 +93,7 @@ // masterEnv->coreLoopEndPt = ; //Make a readyToAnimateQ for each core loop - readyToAnimateQs = malloc( NUM_CORES * sizeof(SRSWQueueStruc *) ); + readyToAnimateQs = malloc( NUM_CORES * sizeof(VMSQueueStruc *) ); masterVPs = malloc( NUM_CORES * sizeof(VirtProcr *) ); //One array for each core, 3 in array, core's masterVP scheds all @@ -196,7 +196,22 @@ { //======================================================================== // Create the Threads - int coreIdx, retCode; + int coreIdx, retCode, i; + + //create the arrays used to measure TSC offsets between cores + pongNums = malloc( NUM_CORES * sizeof( int ) ); + pingTimes = malloc( NUM_CORES * NUM_TSC_ROUND_TRIPS * sizeof( TSCount ) ); + pongTimes = malloc( NUM_CORES * NUM_TSC_ROUND_TRIPS * sizeof( TSCount ) ); + + for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) + { + pongNums[ coreIdx ] = 0; + for( i = 0; i < NUM_TSC_ROUND_TRIPS; i++ ) + { + pingTimes[ coreIdx * NUM_TSC_ROUND_TRIPS + i ] = (TSCount) 0; + pingTimes[ coreIdx * NUM_TSC_ROUND_TRIPS + i ] = (TSCount) 0; + } + } //Need the threads to be created suspended, and wait for a signal // before proceeding -- gives time after creating to initialize other @@ -230,7 +245,7 @@ unsigned long long count = 0, freq = 0; double runTime; - startCount = getTSCount(); + startCount = getTSC(); //tell the core loop threads that setup is complete //get lock, to lock out any threads still starting up -- they'll see @@ -253,7 +268,7 @@ // the Master env and rest of VMS locations - endCount = getTSCount(); + endCount = getTSC(); count = endCount - startCount; runTime = (double)count / (double)TSCOUNT_FREQ; @@ -303,8 +318,7 @@ // for 2 params + return addr. Return addr (NULL) is in loc pointed to // by stackPtr, initData at stackPtr + 4 bytes, animatingPr just above stackLocs = malloc( VIRT_PROCR_STACK_SIZE ); - if(stackLocs == 0) - {perror("malloc stack"); exit(1);} + if(stackLocs == 0) {perror("error: malloc stack"); exit(1);} newPr->startOfStack = stackLocs; stackPtr = ( (char *)stackLocs + VIRT_PROCR_STACK_SIZE - 0x10 ); //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp @@ -652,7 +666,7 @@ void VMS__cleanup_after_shutdown() { - SRSWQueueStruc **readyToAnimateQs; + VMSQueueStruc **readyToAnimateQs; int coreIdx; VirtProcr **masterVPs; SchedSlot ***allSchedSlots; //ptr to array of ptrs @@ -680,7 +694,7 @@ //=========================================================================== -inline TSCount getTSCount() +inline TSCount getTSC() { unsigned int low, high; TSCount out; diff -r 5388f1c2da6f -r 72373405c816 VMS.h --- a/VMS.h Thu Oct 14 17:07:23 2010 -0700 +++ b/VMS.h Sat Oct 16 04:11:15 2010 -0700 @@ -7,7 +7,7 @@ */ #ifndef _VMS_H -#define _VMS_H +#define _VMS_H #define __USE_GNU #include "VMS_primitive_data_types.h" @@ -56,10 +56,10 @@ #define SUCCESS 0 -#define writeVMSQ writeCASQ -#define readVMSQ readCASQ -#define makeVMSQ makeCASQ -#define VMSQueueStruc CASQueueStruc +#define writeVMSQ writeSRSWQ +#define readVMSQ readSRSWQ +#define makeVMSQ makeSRSWQ +#define VMSQueueStruc SRSWQueueStruc //#define thdAttrs NULL //For PThreads @@ -146,7 +146,7 @@ RequestHandler requestHandler; SchedSlot ***allSchedSlots; - SRSWQueueStruc **readyToAnimateQs; + VMSQueueStruc **readyToAnimateQs; VirtProcr **masterVPs; void *semanticEnv; @@ -179,6 +179,7 @@ volatile MasterEnv *_VMSMasterEnv; + //========================== void VMS__init(); @@ -244,6 +245,13 @@ void VMS__cleanup_after_shutdown(); +//========================== +void +measureTSCOffsetsAsCore0(); + +void +measureTSCOffsetsAsRemoteCore( int coreIdx ); + //============================= Statistics ================================== typedef unsigned long long TSCount; @@ -269,11 +277,17 @@ /* clobber */ : "%eax", "%edx" \ ); -inline TSCount getTSCount(); +inline TSCount getTSC(); + +inline TSCount getTSC(); //===================== Debug ========================== int numProcrsCreated; +int *pongNums; +TSCount *pongTimes; +TSCount *pingTimes; + #endif /* _VMS_H */