changeset 47:72373405c816 measure_brch

Adding TSC normalization -- still in progress, not working
author Me
date Sat, 16 Oct 2010 04:11:15 -0700
parents 5388f1c2da6f
children 054006c26b92
files CoreLoop.c VMS.c VMS.h
diffstat 3 files changed, 193 insertions(+), 33 deletions(-) [+]
line diff
     1.1 --- a/CoreLoop.c	Thu Oct 14 17:07:23 2010 -0700
     1.2 +++ b/CoreLoop.c	Sat Oct 16 04:11:15 2010 -0700
     1.3 @@ -15,7 +15,11 @@
     1.4  #include <pthread.h>
     1.5  #include <sched.h>
     1.6  
     1.7 +//===========================================================================
     1.8 +void
     1.9 +calcOffsets();
    1.10  
    1.11 +//===========================================================================
    1.12  /*This is the loop that runs in the OS Thread pinned to each core
    1.13   *Get virt procr from queue,
    1.14   * save state of current animator, then load in state of virt procr, using
    1.15 @@ -34,24 +38,13 @@
    1.16     ThdParams      *coreLoopThdParams;
    1.17     int             thisCoresIdx;
    1.18     VirtProcr      *currPr;
    1.19 -   SRSWQueueStruc *readyToAnimateQ;
    1.20 +   VMSQueueStruc *readyToAnimateQ;
    1.21     unsigned long   coreMask;  //has 1 in bit positions of allowed cores
    1.22     int             errorCode;
    1.23     
    1.24     coreLoopThdParams = (ThdParams *)paramsIn;
    1.25     thisCoresIdx = coreLoopThdParams->coreNum;
    1.26  
    1.27 -      //wait until signalled that setup is complete
    1.28 -   pthread_mutex_lock(   &suspendLock );
    1.29 -   while( !(_VMSMasterEnv->setupComplete) )
    1.30 -    {
    1.31 -      pthread_cond_wait( &suspend_cond,
    1.32 -                         &suspendLock );
    1.33 -    }
    1.34 -   pthread_mutex_unlock( &suspendLock );
    1.35 -
    1.36 -      //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum );
    1.37 -
    1.38        //set thread affinity
    1.39        //Linux requires pinning thd to core inside thread-function
    1.40        //Designate a core by a 1 in bit-position corresponding to the core
    1.41 @@ -60,8 +53,24 @@
    1.42     pthread_t selfThd = pthread_self();
    1.43     errorCode =
    1.44     pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
    1.45 +
    1.46 +   if(errorCode){ printf("\nset affinity failure\n"); exit(0); }
    1.47 +
    1.48 +      //measure offsets between TSCs
    1.49 +      //Core 0 is the reference core, the rest react to it.
    1.50 +   if( thisCoresIdx == 0 ) measureTSCOffsetsAsCore0();
    1.51 +   else measureTSCOffsetsAsRemoteCore( thisCoresIdx );
    1.52     
    1.53 -   if(errorCode){ printf("\nset affinity failure\n"); exit(0); }
    1.54 +      //wait until signalled that setup is complete
    1.55 +   pthread_mutex_lock(   &suspendLock );
    1.56 +   while( !(_VMSMasterEnv->setupComplete) )
    1.57 +    { pthread_cond_wait( &suspend_cond, &suspendLock );
    1.58 +    }
    1.59 +   pthread_mutex_unlock( &suspendLock );
    1.60 +
    1.61 +
    1.62 +      //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum );
    1.63 +
    1.64  
    1.65     
    1.66        //Save addr of "end core loop" label - jump to it to shut down coreloop
    1.67 @@ -79,7 +88,8 @@
    1.68     
    1.69        // Get to work!  --  virt procr jumps back here when suspends
    1.70        //Note, have to restore the frame-pointer before jump to here, to get
    1.71 -      // this code to work right (readyToAnimateQ and so forth are frame-ptr relative)
    1.72 +      // this code to work right (readyToAnimateQ and so forth are frame-ptr
    1.73 +      // relative)
    1.74  CoreLoopStartPt:
    1.75     
    1.76        //Get virtual processor from queue
    1.77 @@ -172,7 +182,7 @@
    1.78  coreLoop_Seq( void *paramsIn )
    1.79   {
    1.80     VirtProcr      *currPr;
    1.81 -   SRSWQueueStruc *readyToAnimateQ;
    1.82 +   VMSQueueStruc *readyToAnimateQ;
    1.83     
    1.84     ThdParams      *coreLoopThdParams;
    1.85     int             thisCoresIdx;
    1.86 @@ -189,14 +199,16 @@
    1.87     _VMSMasterEnv->coreLoopStartPt = &&SeqCoreLoopStartPt;
    1.88     _VMSMasterEnv->coreLoopEndPt   = &&SeqCoreLoopEndPt;
    1.89  
    1.90 -      //Core loop has no values live upon CoreLoopStartPt except readyToAnimateQ
    1.91 +      //Core loop has no values live upon CoreLoopStartPt except
    1.92 +      // readyToAnimateQ
    1.93        // every value in the code is defined by a statement in core loop,
    1.94        // after the start point -- with the one exception of _VMSWorkQ
    1.95  
    1.96  
    1.97        // Get to work!  --  virt procr jumps back here when done or suspends
    1.98        //Note, have to restore the frame-pointer before jump to here, to get
    1.99 -      // this code to work right (readyToAnimateQ and so forth are frame-ptr relative)
   1.100 +      // this code to work right (readyToAnimateQ and so forth are frame-ptr
   1.101 +      // relative)
   1.102  SeqCoreLoopStartPt:
   1.103  
   1.104        //Get virtual processor from queue
   1.105 @@ -255,3 +267,123 @@
   1.106     VMS__handle_dissipate_reqst( currPr ); //free shutdown pr, that jmpd here
   1.107     return;
   1.108   }
   1.109 +
   1.110 +
   1.111 +/*Core 0 does a poll-loop, with a stop for each other core.
   1.112 + * (Later do more sophisticated, pairing cores with least comm time, or maybe
   1.113 + *  all cores to all cores to get better statistics.)
   1.114 + *It has an array of TSC stamps for each remote core.
   1.115 + *It looks in the core loop param of each remote, checks if the flag is
   1.116 + * reset.
   1.117 + * If yes, records its own TSC into its array for that core, then sets flag.
   1.118 + * Each time sees flag cleared, increases a counter of num times it's seen
   1.119 + * that.  When reaches NUM_TSC_OFFSET_SAMPLES it stops.
   1.120 + *Then, uses values in the TSC arrays to estimate the offset between TSCs in
   1.121 + * different cores.
   1.122 + *Here's how:
   1.123 + * 1) throw out first round-trip (mis-match btwn times the different cores
   1.124 + *    enter the loop show up in firt round-trip).
   1.125 + * 2) Take difference in local TSC between two successive sightings of flag
   1.126 + *    being cleared.  This is the round-trip time.
   1.127 + * 3) Take difference between local TSC at a given index in array and the
   1.128 + *    remote TSC at the same index.  This is one-way time plus offset.
   1.129 + * 4) Take difference between the two remote TSCs.  This is remote's view of
   1.130 + *    round-trip time.
   1.131 + * 5) take half the round-trip time as one-way time, subtract that from the
   1.132 + *    "one-way+offset" value, for local round-trip and remote round-trip.
   1.133 + */
   1.134 +void
   1.135 +measureTSCOffsetsAsCore0()
   1.136 + {
   1.137 +   int coreIdx, coreOffset, pongNum, numRemotesDone = 0, moreToDo = TRUE;
   1.138 +   TSCount timeStamp;
   1.139 +
   1.140 +      //Do a poll-loop, see if other cores have responded
   1.141 +   while( moreToDo )
   1.142 +    {
   1.143 +     // printf("error: TSC\n");
   1.144 +      for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
   1.145 +       { coreOffset = coreIdx * NUM_TSC_ROUND_TRIPS; // column * num in row
   1.146 +         pongNum = pongNums[ coreIdx ];
   1.147 +         if( pongTimes[ coreOffset + pongNum ] != 0 )
   1.148 +          {    //remote has set pong, so set the next ping for it to see
   1.149 +            timeStamp = ggetTSC);
   1.150 +            pingTimes[ coreOffset + pongNum + 1 ] = timeStamp;
   1.151 +
   1.152 +            if( pongNum == NUM_TSC_ROUND_TRIPS - 1 )
   1.153 +             {    //last pong, check if all are done
   1.154 +               numRemotesDone += 1;
   1.155 +               if( numRemotesDone == NUM_CORES )
   1.156 +                  moreToDo = FALSE;
   1.157 +             }
   1.158 +            if( pongNum >= NUM_TSC_ROUND_TRIPS ) printf("error: TSC\n");
   1.159 +            pongNums[ coreIdx ] += 1;
   1.160 +          }
   1.161 +       }//for
   1.162 +    }//while
   1.163 +
   1.164 +   calcOffsets();
   1.165 + }
   1.166 +
   1.167 +
   1.168 +void
   1.169 +measureTSCOffsetsAsRemoteCore( int coreIdx )
   1.170 + {
   1.171 +   int coreOffset, pongNum, numRemotesDone = 0, moreToDo = TRUE;
   1.172 +   TSCount timeStamp;
   1.173 +
   1.174 +      //Do a poll-loop, see if other cores have responded
   1.175 +   coreOffset = coreIdx * NUM_TSC_ROUND_TRIPS; // column * num in row
   1.176 +   while( moreToDo )
   1.177 +    {
   1.178 +      pongNum = pongNums[ coreIdx ];
   1.179 +      if( pingTimes[ coreOffset + pongNum ] != 0 )
   1.180 +       {    //core0 has set next ping, so set the next pong back to it
   1.181 +         timeStamp = ggetTSC);
   1.182 +         pongTimes[ coreOffset + pongNum ] = timeStamp;
   1.183 +
   1.184 +         if( pongNum >= NUM_TSC_ROUND_TRIPS - 1 )
   1.185 +          { moreToDo = FALSE;
   1.186 +          }
   1.187 +       }
   1.188 +    }
   1.189 + }
   1.190 +
   1.191 +
   1.192 +/*Have the sets of times from the ping-pongs, now from those estimate the
   1.193 + * offsets.
   1.194 + * 
   1.195 + */
   1.196 +void
   1.197 +calcOffsets()
   1.198 + {
   1.199 +   int i, coreIdx, coreOffset;
   1.200 +   int localRoundTrip, remoteRoundTrip;
   1.201 +   int localToRemoteDiff, remoteToLocalDiff;
   1.202 +   int offsetGuessL2R, offsetGuessR2L;
   1.203 +
   1.204 +      //Take all round-trip times, skipping the first, adding them up
   1.205 +   for( coreIdx = 1; coreIdx < NUM_CORES; coreIdx++ )
   1.206 +    { coreOffset = coreIdx * NUM_CORES;
   1.207 +      for( i = 1; i < NUM_TSC_ROUND_TRIPS - 1; i++ )
   1.208 +       {
   1.209 +         localRoundTrip    = pingTimes[ i ] - pingTimes[ i + 1 ];
   1.210 +         remoteRoundTrip   = pongTimes[ coreOffset + i ] -
   1.211 +                             pongTimes[ coreOffset + i + 1 ];
   1.212 +            //Take diff btwn local TSC and remote TSC
   1.213 +         localToRemoteDiff = pongTimes[ coreOffset + i + 1 ] - pingTimes[ i];
   1.214 +         remoteToLocalDiff = pingTimes[ i ] - pongTimes[ coreOffset + i ];
   1.215 +         offsetGuessL2R    = localToRemoteDiff - localRoundTrip/2;
   1.216 +         offsetGuessR2L    = -(remoteToLocalDiff - localRoundTrip/2);
   1.217 +               printf("offL2R:  %d | ", offsetGuessL2R);
   1.218 +               printf("offR2L:  %d | ", offsetGuessR2L);
   1.219 +               printf("localRT: %d | ", localRoundTrip);
   1.220 +               printf("remRT:   %d \n", remoteRoundTrip);
   1.221 +       }
   1.222 +    }
   1.223 + }
   1.224 +
   1.225 +
   1.226 +
   1.227 +
   1.228 +
     2.1 --- a/VMS.c	Thu Oct 14 17:07:23 2010 -0700
     2.2 +++ b/VMS.c	Sat Oct 16 04:11:15 2010 -0700
     2.3 @@ -79,7 +79,7 @@
     2.4  void
     2.5  create_masterEnv()
     2.6   { MasterEnv       *masterEnv;
     2.7 -   SRSWQueueStruc **readyToAnimateQs;
     2.8 +   VMSQueueStruc **readyToAnimateQs;
     2.9     int              coreIdx;
    2.10     VirtProcr      **masterVPs;
    2.11     SchedSlot     ***allSchedSlots; //ptr to array of ptrs
    2.12 @@ -93,7 +93,7 @@
    2.13  //   masterEnv->coreLoopEndPt   = ;
    2.14     
    2.15        //Make a readyToAnimateQ for each core loop
    2.16 -   readyToAnimateQs = malloc( NUM_CORES * sizeof(SRSWQueueStruc *) );
    2.17 +   readyToAnimateQs = malloc( NUM_CORES * sizeof(VMSQueueStruc *) );
    2.18     masterVPs        = malloc( NUM_CORES * sizeof(VirtProcr *) );
    2.19  
    2.20        //One array for each core, 3 in array, core's masterVP scheds all
    2.21 @@ -196,7 +196,22 @@
    2.22   {
    2.23     //========================================================================
    2.24     //                      Create the Threads
    2.25 -   int coreIdx, retCode;
    2.26 +   int coreIdx, retCode, i;
    2.27 +
    2.28 +      //create the arrays used to measure TSC offsets between cores
    2.29 +   pongNums  = malloc( NUM_CORES * sizeof( int ) );
    2.30 +   pingTimes = malloc( NUM_CORES * NUM_TSC_ROUND_TRIPS * sizeof( TSCount ) );
    2.31 +   pongTimes = malloc( NUM_CORES * NUM_TSC_ROUND_TRIPS * sizeof( TSCount ) );
    2.32 +
    2.33 +   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
    2.34 +    {
    2.35 +      pongNums[ coreIdx ] = 0;
    2.36 +      for( i = 0; i < NUM_TSC_ROUND_TRIPS; i++ )
    2.37 +       {
    2.38 +         pingTimes[ coreIdx * NUM_TSC_ROUND_TRIPS + i ] = (TSCount) 0;
    2.39 +         pingTimes[ coreIdx * NUM_TSC_ROUND_TRIPS + i ] = (TSCount) 0;
    2.40 +       }
    2.41 +    }
    2.42  
    2.43        //Need the threads to be created suspended, and wait for a signal
    2.44        // before proceeding -- gives time after creating to initialize other
    2.45 @@ -230,7 +245,7 @@
    2.46     unsigned long long count = 0, freq = 0;
    2.47     double   runTime;
    2.48  
    2.49 -      startCount = getTSCount();
    2.50 +      startCount = getTSC();
    2.51     
    2.52        //tell the core loop threads that setup is complete
    2.53        //get lock, to lock out any threads still starting up -- they'll see
    2.54 @@ -253,7 +268,7 @@
    2.55        // the Master env and rest of VMS locations
    2.56  
    2.57  
    2.58 -      endCount = getTSCount();
    2.59 +      endCount = getTSC();
    2.60        count = endCount - startCount;
    2.61  
    2.62        runTime = (double)count / (double)TSCOUNT_FREQ;
    2.63 @@ -303,8 +318,7 @@
    2.64        // for 2 params + return addr.  Return addr (NULL) is in loc pointed to
    2.65        // by stackPtr, initData at stackPtr + 4 bytes, animatingPr just above
    2.66     stackLocs = malloc( VIRT_PROCR_STACK_SIZE );
    2.67 -   if(stackLocs == 0)
    2.68 -   {perror("malloc stack"); exit(1);}
    2.69 +         if(stackLocs == 0) {perror("error: malloc stack"); exit(1);}
    2.70     newPr->startOfStack = stackLocs;
    2.71     stackPtr = ( (char *)stackLocs + VIRT_PROCR_STACK_SIZE - 0x10 );
    2.72        //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp
    2.73 @@ -652,7 +666,7 @@
    2.74  void
    2.75  VMS__cleanup_after_shutdown()
    2.76   { 
    2.77 -   SRSWQueueStruc **readyToAnimateQs;
    2.78 +   VMSQueueStruc **readyToAnimateQs;
    2.79     int              coreIdx;
    2.80     VirtProcr      **masterVPs;
    2.81     SchedSlot     ***allSchedSlots; //ptr to array of ptrs
    2.82 @@ -680,7 +694,7 @@
    2.83  
    2.84  //===========================================================================
    2.85  
    2.86 -inline TSCount getTSCount()
    2.87 +inline TSCount getTSC()
    2.88   { unsigned int low, high;
    2.89     TSCount  out;
    2.90  
     3.1 --- a/VMS.h	Thu Oct 14 17:07:23 2010 -0700
     3.2 +++ b/VMS.h	Sat Oct 16 04:11:15 2010 -0700
     3.3 @@ -7,7 +7,7 @@
     3.4   */
     3.5  
     3.6  #ifndef _VMS_H
     3.7 -#define	_VMS_H
     3.8 +#define _VMS_H
     3.9  #define __USE_GNU
    3.10  
    3.11  #include "VMS_primitive_data_types.h"
    3.12 @@ -56,10 +56,10 @@
    3.13  
    3.14  #define SUCCESS 0
    3.15  
    3.16 -#define writeVMSQ     writeCASQ
    3.17 -#define readVMSQ      readCASQ
    3.18 -#define makeVMSQ      makeCASQ
    3.19 -#define VMSQueueStruc CASQueueStruc
    3.20 +#define writeVMSQ     writeSRSWQ
    3.21 +#define readVMSQ      readSRSWQ
    3.22 +#define makeVMSQ      makeSRSWQ
    3.23 +#define VMSQueueStruc SRSWQueueStruc
    3.24  
    3.25  //#define thdAttrs NULL  //For PThreads
    3.26  
    3.27 @@ -146,7 +146,7 @@
    3.28     RequestHandler   requestHandler;
    3.29     
    3.30     SchedSlot     ***allSchedSlots;
    3.31 -   SRSWQueueStruc **readyToAnimateQs;
    3.32 +   VMSQueueStruc **readyToAnimateQs;
    3.33     VirtProcr      **masterVPs;
    3.34  
    3.35     void            *semanticEnv;
    3.36 @@ -179,6 +179,7 @@
    3.37  
    3.38  volatile MasterEnv      *_VMSMasterEnv;
    3.39  
    3.40 +
    3.41  //==========================
    3.42  void
    3.43  VMS__init();
    3.44 @@ -244,6 +245,13 @@
    3.45  void
    3.46  VMS__cleanup_after_shutdown();
    3.47  
    3.48 +//==========================
    3.49 +void
    3.50 +measureTSCOffsetsAsCore0();
    3.51 +
    3.52 +void
    3.53 +measureTSCOffsetsAsRemoteCore( int coreIdx );
    3.54 +
    3.55  //============================= Statistics ==================================
    3.56  
    3.57  typedef unsigned long long TSCount;
    3.58 @@ -269,11 +277,17 @@
    3.59     /* clobber */ : "%eax", "%edx"         \
    3.60                  );
    3.61  
    3.62 -inline TSCount getTSCount();
    3.63 +inline TSCount getTSC();
    3.64 +
    3.65 +inline TSCount getTSC();
    3.66  
    3.67  //===================== Debug ==========================
    3.68  int numProcrsCreated;
    3.69  
    3.70  
    3.71 +int      *pongNums;
    3.72 +TSCount  *pongTimes;
    3.73 +TSCount  *pingTimes;
    3.74 +
    3.75  #endif	/* _VMS_H */
    3.76