changeset 238:b95711c6965c Common_Ancestor

counters work now
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Wed, 21 Mar 2012 11:09:11 +0100
parents ce1f57e10fac
children 7ed97c961901
files AnimationMaster.c CoreController.c Hardware_Dependent/VMS__HW_measurement.c Hardware_Dependent/VMS__HW_measurement.h Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h VMS__int.c
diffstat 6 files changed, 229 insertions(+), 15 deletions(-) [+]
line diff
     1.1 --- a/AnimationMaster.c	Mon Mar 19 10:03:45 2012 -0700
     1.2 +++ b/AnimationMaster.c	Wed Mar 21 11:09:11 2012 +0100
     1.3 @@ -130,7 +130,7 @@
     1.4     RequestHandler  requestHandler;
     1.5     void           *semanticEnv;
     1.6     int32           thisCoresIdx;
     1.7 -      
     1.8 +  
     1.9     //======================== Initializations ========================
    1.10     masterEnv        = (MasterEnv*)_VMSMasterEnv;
    1.11     
    1.12 @@ -140,7 +140,8 @@
    1.13     requestHandler   = masterEnv->requestHandler;
    1.14     slaveAssigner    = masterEnv->slaveAssigner;
    1.15     semanticEnv      = masterEnv->semanticEnv;
    1.16 -
    1.17 +   
    1.18 +      HOLISTIC__Insert_Master_Global_Vars;
    1.19     
    1.20     //======================== animationMaster ========================
    1.21     while(1){
    1.22 @@ -158,17 +159,20 @@
    1.23         {
    1.24           currSlot->workIsDone         = FALSE;
    1.25           currSlot->needsSlaveAssigned = TRUE;
    1.26 -
    1.27 +         
    1.28 +       HOLISTIC__Record_AppResponder_start;
    1.29                 MEAS__startReqHdlr;
    1.30                 
    1.31              //process the requests made by the slave (held inside slave struc)
    1.32           (*requestHandler)( currSlot->slaveAssignedToSlot, semanticEnv );
    1.33           
    1.34 +         HOLISTIC__Record_AppResponder_end;
    1.35                 MEAS__endReqHdlr;
    1.36         }
    1.37           //If slot empty, hand to Assigner to fill with a slave
    1.38        if( currSlot->needsSlaveAssigned )
    1.39         {    //Call plugin's Assigner to give slot a new slave
    1.40 +          HOLISTIC__Record_Assigner_start;
    1.41           assignedSlaveVP =
    1.42            (*slaveAssigner)( semanticEnv, currSlot );
    1.43           
    1.44 @@ -178,6 +182,8 @@
    1.45              assignedSlaveVP->animSlotAssignedTo       = currSlot;
    1.46              currSlot->needsSlaveAssigned  = FALSE;
    1.47              numSlotsFilled               += 1;
    1.48 +            
    1.49 +            HOLISTIC__Record_Assigner_end;
    1.50            }
    1.51         }
    1.52      }
     2.1 --- a/CoreController.c	Mon Mar 19 10:03:45 2012 -0700
     2.2 +++ b/CoreController.c	Wed Mar 21 11:09:11 2012 +0100
     2.3 @@ -77,7 +77,7 @@
     2.4     volatile int32 *addrOfMasterLock; //thing pointed to is volatile, not ptr
     2.5     SlaveVP        *thisCoresMasterVP;
     2.6        //Variables used for pthread related things
     2.7 -   ThdParams      *coreCtlrThdParams;
     2.8 +   ThdParams      *thisCoresThdParams;
     2.9     cpu_set_t       coreMask;  //used during pinning pthread to CPU core
    2.10     int32           errorCode;
    2.11        //Variables used during measurements
    2.12 @@ -88,8 +88,8 @@
    2.13  
    2.14     
    2.15     //===============  Initializations ===================
    2.16 -   coreCtlrThdParams = (ThdParams *)paramsIn;
    2.17 -   thisCoresIdx = coreCtlrThdParams->coreNum;
    2.18 +   thisCoresThdParams = (ThdParams *)paramsIn;
    2.19 +   thisCoresIdx = thisCoresThdParams->coreNum;
    2.20  
    2.21        //Assembly that saves addr of label of return instr -- label in assmbly
    2.22     recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt));
    2.23 @@ -105,7 +105,7 @@
    2.24        //Linux requires pinning to be done inside the thread-function
    2.25        //Designate a core by a 1 in bit-position corresponding to the core
    2.26     CPU_ZERO(&coreMask); //initialize mask bits to zero
    2.27 -   CPU_SET(coreCtlrThdParams->coreNum,&coreMask); //set bit repr the coreNum
    2.28 +   CPU_SET(thisCoresThdParams->coreNum,&coreMask); //set bit repr the coreNum
    2.29     pthread_t selfThd = pthread_self();
    2.30     errorCode =
    2.31     pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
    2.32 @@ -118,8 +118,10 @@
    2.33      }
    2.34     pthread_mutex_unlock( &suspendLock );
    2.35     
    2.36 +            HOLISTIC__CoreCtrl_Setup;
    2.37 +   
    2.38           DEBUG__printf1(TRUE, "started coreCtrlr", thisCoresIdx );
    2.39 -
    2.40 +         
    2.41     //====================== The Core Controller ======================
    2.42     while(1)  //An endless loop is just one way of doing the control structure
    2.43      {        //Assembly code switches the core between animating a VP and
    2.44 @@ -141,6 +143,7 @@
    2.45         { numRepetitionsWithNoWork = 0;     //reset back2back master count
    2.46           currSlotIdx ++;
    2.47           currVP = currSlot->slaveAssignedToSlot;
    2.48 +         HOLISTIC__Record_last_work;
    2.49         }
    2.50        else //slot is empty, so switch to master
    2.51         {
    2.52 @@ -149,6 +152,7 @@
    2.53           currVP = NULL;
    2.54  
    2.55                 MEAS__Capture_Pre_Master_Lock_Point;
    2.56 +               HOLISTIC__Record_AppResponderInvocation_start;
    2.57  
    2.58           int numTriesToGetLock = 0; int gotLock = 0;
    2.59           while( currVP == NULL ) //keep going until get master lock
    2.60 @@ -189,10 +193,13 @@
    2.61                 MEAS__Capture_Post_Master_Lock_Point;
    2.62         }
    2.63  
    2.64 +        HOLISTIC__Record_Work_start;
    2.65  
    2.66        switchToSlv(currVP); //Slave suspend makes core "return" from this call
    2.67        flushRegisters();    //prevent GCC optimization from doing bad things 
    2.68  
    2.69 +        HOLISTIC__Record_Work_end;
    2.70 +      
    2.71               MEAS__Capture_End_Susp_in_CoreCtlr_ForSys;
    2.72            
    2.73      }//while(1)
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/Hardware_Dependent/VMS__HW_measurement.c	Wed Mar 21 11:09:11 2012 +0100
     3.3 @@ -0,0 +1,74 @@
     3.4 +#include <unistd.h>
     3.5 +#include <fcntl.h>
     3.6 +#include <linux/types.h>
     3.7 +#include <linux/perf_event.h>
     3.8 +#include <errno.h>
     3.9 +#include <sys/syscall.h>
    3.10 +#include <linux/prctl.h>
    3.11 +
    3.12 +#include "../VMS.h"
    3.13 +
    3.14 +void setup_perf_counters(){
    3.15 +#ifdef HOLISTIC__TURN_ON_PERF_COUNTERS
    3.16 +    struct perf_event_attr hw_event;
    3.17 +   memset(&hw_event,0,sizeof(hw_event));
    3.18 +   	hw_event.type = PERF_TYPE_HARDWARE;
    3.19 +	hw_event.size = sizeof(hw_event);
    3.20 +	hw_event.disabled = 1;
    3.21 +        hw_event.freq = 0;
    3.22 +	hw_event.inherit = 1; /* children inherit it   */
    3.23 +	hw_event.pinned = 1; /* must always be on PMU */
    3.24 +	hw_event.exclusive = 0; /* only group on PMU     */
    3.25 +	hw_event.exclude_user = 0; /* don't count user      */
    3.26 +	hw_event.exclude_kernel = 0; /* ditto kernel          */
    3.27 +	hw_event.exclude_hv = 0; /* ditto hypervisor      */
    3.28 +	hw_event.exclude_idle = 0; /* don't count when idle */
    3.29 +	hw_event.mmap = 0; /* include mmap data     */
    3.30 +	hw_event.comm = 0; /* include comm data     */
    3.31 +
    3.32 +        int coreIdx;
    3.33 +   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
    3.34 +    {
    3.35 +       	hw_event.config = 0x0000000000000000; //cycles
    3.36 +        _VMSMasterEnv->cycles_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event,
    3.37 + 		0,//pid_t pid, 
    3.38 +		coreIdx,//int cpu, 
    3.39 +		-1,//int group_fd,
    3.40 +		0//unsigned long flags
    3.41 +	);
    3.42 +        if (_VMSMasterEnv->cycles_counter_fd[coreIdx]<0){
    3.43 +            fprintf(stderr,"On core %d: ",coreIdx);
    3.44 +            perror("Failed to open cycles counter");
    3.45 +        }
    3.46 +        hw_event.config = 0x0000000000000001; //instrs
    3.47 +        _VMSMasterEnv->instrs_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event,
    3.48 + 		0,//pid_t pid, 
    3.49 +		coreIdx,//int cpu, 
    3.50 +		-1,//int group_fd,
    3.51 +		0//unsigned long flags
    3.52 +	);
    3.53 +        if (_VMSMasterEnv->instrs_counter_fd[coreIdx]<0){
    3.54 +            fprintf(stderr,"On core %d: ",coreIdx);
    3.55 +            perror("Failed to open instrs counter");
    3.56 +        }
    3.57 +   }
    3.58 +        
    3.59 +   prctl(PR_TASK_PERF_EVENTS_ENABLE);
    3.60 +#endif
    3.61 +}
    3.62 +
    3.63 +__inline__ uint64_t rdtsc(){
    3.64 +    uint32_t lo, hi;
    3.65 +    __asm__ __volatile__ (      // serialize
    3.66 +    "xorl %%eax,%%eax \n        cpuid"
    3.67 +    ::: "%rax", "%rbx", "%rcx", "%rdx");
    3.68 +    __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); 
    3.69 +   /* asm volatile("RDTSC;"                   
    3.70 +                 "movl %%eax, %0;"         
    3.71 +                 "movl %%edx, %1;"         
    3.72 +               : "=m" (lo), "=m" (hi)
    3.73 +               :                        
    3.74 +               : "%eax", "%edx"         
    3.75 +                ); */
    3.76 +    return (uint64_t)hi << 32 | lo;
    3.77 +}
    3.78 \ No newline at end of file
     4.1 --- a/Hardware_Dependent/VMS__HW_measurement.h	Mon Mar 19 10:03:45 2012 -0700
     4.2 +++ b/Hardware_Dependent/VMS__HW_measurement.h	Wed Mar 21 11:09:11 2012 +0100
     4.3 @@ -58,5 +58,6 @@
     4.4  //#define NUM_TSC_ROUND_TRIPS 10
     4.5  
     4.6  void setup_perf_counters();
     4.7 +uint64_t rdtsc(void);
     4.8  #endif	/* */
     4.9  
     5.1 --- a/Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h	Mon Mar 19 10:03:45 2012 -0700
     5.2 +++ b/Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h	Wed Mar 21 11:09:11 2012 +0100
     5.3 @@ -313,6 +313,23 @@
     5.4      Timestamp_end
     5.5     };
     5.6     
     5.7 +   #define saveCyclesAndInstrs(core,cycles,instrs) do{ \
     5.8 +   int cycles_fd = _VMSMasterEnv->cycles_counter_fd[core]; \
     5.9 +   int instrs_fd = _VMSMasterEnv->instrs_counter_fd[core]; \
    5.10 +   int nread;                                           \
    5.11 +                                                        \
    5.12 +   nread = read(cycles_fd,&(cycles),sizeof(cycles));    \
    5.13 +   if(nread<0){                                         \
    5.14 +       perror("Error reading cycles counter");          \
    5.15 +       cycles = 0;                                      \
    5.16 +   }                                                    \
    5.17 +                                                        \
    5.18 +   nread = read(instrs_fd,&(instrs),sizeof(instrs));    \
    5.19 +   if(nread<0){                                         \
    5.20 +       perror("Error reading cycles counter");          \
    5.21 +       instrs = 0;                                      \
    5.22 +   }                                                    \
    5.23 +   } while (0) 
    5.24  
    5.25     #define MEAS__Insert_Counter_Meas_Fields_into_MasterEnv \
    5.26       int cycles_counter_fd[NUM_CORES]; \
    5.27 @@ -320,22 +337,130 @@
    5.28       uint64 start_master_lock[NUM_CORES][2]; \
    5.29       CounterHandler counterHandler;
    5.30  
    5.31 -   #define HOLISTIC__Setup_Perf_Counters void setup_perf_counters();
    5.32 +   #define HOLISTIC__Setup_Perf_Counters setup_perf_counters();
    5.33     
    5.34 -   #define HOLISTIC__Start_Perf_Counters prctl(PR_TASK_PERF_EVENTS_ENABLE);
    5.35 +
    5.36 +   #define HOLISTIC__CoreCtrl_Setup \
    5.37 +   CounterHandler counterHandler = _VMSMasterEnv->counterHandler; \
    5.38 +   SlaveVP      *lastVPBeforeMaster = NULL; \
    5.39 +   /*if(thisCoresThdParams->coreNum == 0){ \
    5.40 +       uint64 initval = tsc_offset_send(thisCoresThdParams,0); \
    5.41 +       while(!coreCtlrThdParams[NUM_CORES - 2]->ret_tsc); \
    5.42 +   } \
    5.43 +   if(0 < (thisCoresThdParams->coreNum) && (thisCoresThdParams->coreNum) < (NUM_CORES - 1)){ \
    5.44 +       ThdParams* sendCoresThdParams = coreCtlrThdParams[thisCoresThdParams->coreNum - 1]; \
    5.45 +       int sndctr = tsc_offset_resp(sendCoresThdParams, 0); \
    5.46 +       uint64 initval = tsc_offset_send(thisCoresThdParams,0); \
    5.47 +       while(!coreCtlrThdParams[NUM_CORES - 2]->ret_tsc); \
    5.48 +   }  \
    5.49 +   if(thisCoresThdParams->coreNum == (NUM_CORES - 1)){ \
    5.50 +       ThdParams* sendCoresThdParams = coreCtlrThdParams[thisCoresThdParams->coreNum - 1]; \
    5.51 +       int sndctr = tsc_offset_resp(sendCoresThdParams,0); \
    5.52 +   }*/
    5.53 +   
    5.54 +#define HOLISTIC__Record_last_work lastVPBeforeMaster = currVP;
    5.55 +   
    5.56 +   #define HOLISTIC__Insert_Master_Global_Vars \
    5.57 +        int vpid,task; \
    5.58 +        CounterHandler counterHandler = masterEnv->counterHandler;
    5.59 +   
    5.60 +   #define HOLISTIC__Record_AppResponderInvocation_start \
    5.61 +      uint64 cycles,instrs; \
    5.62 +      saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \
    5.63 +      if(lastVPBeforeMaster){ \
    5.64 +        (*counterHandler)(AppResponderInvocation_start,lastVPBeforeMaster->slaveID,lastVPBeforeMaster->assignCount,lastVPBeforeMaster,cycles,instrs); \
    5.65 +        lastVPBeforeMaster = NULL; \
    5.66 +      } else { \
    5.67 +          _VMSMasterEnv->start_master_lock[thisCoresIdx][0] = cycles; \
    5.68 +          _VMSMasterEnv->start_master_lock[thisCoresIdx][1] = instrs; \
    5.69 +      }
    5.70 + 
    5.71 +           /* Request Handler may call resume() on the VP, but we want to 
    5.72 +                * account the whole interval to the same task. Therefore, need
    5.73 +                * to save task ID at the beginning.
    5.74 +                * 
    5.75 +                * Using this value as "end of AppResponder Invocation Time"
    5.76 +                * is possible if there is only one SchedSlot per core -
    5.77 +                * invoking processor is last to be treated here! If more than
    5.78 +                * one slot, MasterLoop processing time for all but the last VP
    5.79 +                * would be erroneously counted as invocation time.
    5.80 +                */
    5.81 +   #define HOLISTIC__Record_AppResponder_start \
    5.82 +               vpid = currSlot->slaveAssignedToSlot->slaveID; \
    5.83 +               task = currSlot->slaveAssignedToSlot->assignCount; \
    5.84 +               uint64 cycles, instrs; \
    5.85 +               saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \
    5.86 +               (*counterHandler)(AppResponder_start,vpid,task,currSlot->slaveAssignedToSlot,cycles,instrs);
    5.87 +
    5.88 +   #define HOLISTIC__Record_AppResponder_end \
    5.89 +        uint64 cycles2,instrs2; \
    5.90 +        saveCyclesAndInstrs(thisCoresIdx,cycles2, instrs2); \
    5.91 +        (*counterHandler)(AppResponder_end,vpid,task,currSlot->slaveAssignedToSlot,cycles2,instrs2); \
    5.92 +        (*counterHandler)(Timestamp_end,vpid,task,currSlot->slaveAssignedToSlot,rdtsc(),0);
    5.93 +
    5.94 +   
    5.95 +   /* Don't know who to account time to yet - goes to assigned VP
    5.96 +    * after the call.
    5.97 +    */
    5.98 +   #define HOLISTIC__Record_Assigner_start \
    5.99 +       int empty = FALSE; \
   5.100 +       if(currSlot->slaveAssignedToSlot == NULL){ \
   5.101 +           empty= TRUE; \
   5.102 +       } \
   5.103 +       uint64 tmp_cycles; \
   5.104 +       uint64 tmp_instrs; \
   5.105 +       saveCyclesAndInstrs(thisCoresIdx,tmp_cycles,tmp_instrs); \
   5.106 +       uint64 tsc = rdtsc(); \
   5.107 +       if(vpid > 0) { \
   5.108 +           (*counterHandler)(NextAssigner_start,vpid,task,currSlot->slaveAssignedToSlot,tmp_cycles,tmp_instrs); \
   5.109 +           vpid = 0; \
   5.110 +           task = 0; \
   5.111 +        }
   5.112 +
   5.113 +   #define HOLISTIC__Record_Assigner_end \
   5.114 +        uint64 cycles; \
   5.115 +        uint64 instrs; \
   5.116 +        saveCyclesAndInstrs(thisCoresIdx,cycles,instrs); \
   5.117 +        if(empty){ \
   5.118 +            (*counterHandler)(AssignerInvocation_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,masterEnv->start_master_lock[thisCoresIdx][0],masterEnv->start_master_lock[thisCoresIdx][1]); \
   5.119 +        } \
   5.120 +        (*counterHandler)(Timestamp_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tsc,0); \
   5.121 +        (*counterHandler)(Assigner_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tmp_cycles,tmp_instrs); \
   5.122 +        (*counterHandler)(Assigner_end,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,cycles,instrs);
   5.123 +
   5.124 +   #define HOLISTIC__Record_Work_start \
   5.125 +        if(currVP){ \
   5.126 +                uint64 cycles,instrs; \
   5.127 +                saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \
   5.128 +                (*counterHandler)(Work_start,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs); \
   5.129 +        }
   5.130 +   
   5.131 +   #define HOLISTIC__Record_Work_end \
   5.132 +       if(currVP){ \
   5.133 +               uint64 cycles,instrs; \
   5.134 +               saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \
   5.135 +               (*counterHandler)(Work_end,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs); \
   5.136 +       }
   5.137  
   5.138     #define HOLISTIC__Record_HwResponderInvocation_start \
   5.139          uint64 cycles,instrs; \
   5.140 -        saveCyclesAndInstrs(animatingPr->coreAnimatedBy,cycles, instrs); \
   5.141 -        (*(_VMSMasterEnv->counterHandler))(HwResponderInvocation_start,animatingPr->procrID,animatingPr->numTimesScheduled,animatingPr,cycles,instrs); 
   5.142 +        saveCyclesAndInstrs(animatingSlv->coreAnimatedBy,cycles, instrs); \
   5.143 +        (*(_VMSMasterEnv->counterHandler))(HwResponderInvocation_start,animatingSlv->slaveID,animatingSlv->assignCount,animatingSlv,cycles,instrs); 
   5.144 +        
   5.145  
   5.146 -   
   5.147 -   
   5.148 +
   5.149  #else  
   5.150     #define MEAS__Insert_Counter_Handler
   5.151     #define MEAS__Insert_Counter_Meas_Fields_into_MasterEnv
   5.152     #define HOLISTIC__Setup_Perf_Counters
   5.153 -   #define HOLISTIC__Start_Perf_Counters
   5.154 +   #define HOLISTIC__Record_AppResponderInvocation_start
   5.155 +   #define HOLISTIC__Record_AppResponder_start
   5.156 +   #define HOLISTIC__Record_AppResponder_end
   5.157 +   #define HOLISTIC__Record_Assigner_start
   5.158 +   #define HOLISTIC__Record_Assigner_end
   5.159 +   #define HOLISTIC__Record_Work_start
   5.160 +   #define HOLISTIC__Record_Work_end
   5.161 +   #define HOLISTIC__Record_HwResponderInvocation_start
   5.162  #endif
   5.163  
   5.164  //Experiment in two-step macros -- if doesn't work, insert each separately
     6.1 --- a/VMS__int.c	Mon Mar 19 10:03:45 2012 -0700
     6.2 +++ b/VMS__int.c	Wed Mar 21 11:09:11 2012 +0100
     6.3 @@ -81,6 +81,7 @@
     6.4        //return ownership of the Slv and anim slot to Master virt pr
     6.5     animatingSlv->animSlotAssignedTo->workIsDone = TRUE;
     6.6  
     6.7 +        HOLISTIC__Record_HwResponderInvocation_start;
     6.8           MEAS__Capture_Pre_Susp_Point;
     6.9     switchToCoreCtlr(animatingSlv);
    6.10     flushRegisters();