changeset 108:3bc3b89630c7 perf_counters

perf counters
author engelhardt@cray1
date Tue, 26 Jul 2011 15:36:24 +0200
parents 388af85fe190
children 659299627e70
files CoreLoop.c MasterLoop.c ProcrContext.c VMS.c VMS.h
diffstat 5 files changed, 158 insertions(+), 5 deletions(-) [+]
line diff
     1.1 --- a/CoreLoop.c	Tue Jul 26 15:35:57 2011 +0200
     1.2 +++ b/CoreLoop.c	Tue Jul 26 15:36:24 2011 +0200
     1.3 @@ -71,7 +71,7 @@
     1.4     CPU_ZERO(&coreMask);
     1.5     CPU_SET(coreLoopThdParams->coreNum,&coreMask);
     1.6     //coreMask = 1L << coreLoopThdParams->coreNum;
     1.7 -
     1.8 +   
     1.9     pthread_t selfThd = pthread_self();
    1.10     errorCode =
    1.11     pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
    1.12 @@ -151,8 +151,38 @@
    1.13  
    1.14      }
    1.15  
    1.16 -   
    1.17 +   #ifdef MEAS__PERF_COUNTER
    1.18 +        //start work
    1.19 +   int lastRecordIdx = currPr->counter_history_array_info->numInArray -1;
    1.20 +   CounterRecord* lastRecord = currPr->counter_history[lastRecordIdx];
    1.21 +   lastRecord->work_core = thisCoresIdx;
    1.22 +   int cycles_fd = _VMSMasterEnv->cycles_counter_fd[thisCoresIdx];
    1.23 +   int instrs_fd = _VMSMasterEnv->instrs_counter_fd[thisCoresIdx];
    1.24 +   int nread;
    1.25 +               
    1.26 +   nread = read(cycles_fd,&(lastRecord->start_work_cycles),sizeof(lastRecord->start_work_cycles));
    1.27 +   if(nread<0){
    1.28 +       lastRecord->start_work_cycles = 0;
    1.29 +   }
    1.30 +
    1.31 +   nread = read(instrs_fd,&(lastRecord->start_work_instrs),sizeof(lastRecord->start_work_instrs));
    1.32 +   if(nread<0){
    1.33 +       lastRecord->start_work_instrs = 0;
    1.34 +   }
    1.35 +   #endif
    1.36     switchToVP(currPr); //The VPs return in here
    1.37 +   #ifdef MEAS__PERF_COUNTER
    1.38 +        //end work
    1.39 +   nread = read(cycles_fd,&(lastRecord->start_work_cycles),sizeof(lastRecord->start_work_cycles));
    1.40 +   if(nread<0){
    1.41 +       lastRecord->start_work_cycles = 0;
    1.42 +   }
    1.43 +
    1.44 +   nread = read(instrs_fd,&(lastRecord->start_work_instrs),sizeof(lastRecord->start_work_instrs));
    1.45 +   if(nread<0){
    1.46 +       lastRecord->start_work_instrs = 0;
    1.47 +   }
    1.48 +   #endif
    1.49     flushRegisters();
    1.50     }//CoreLoop      
    1.51   }
     2.1 --- a/MasterLoop.c	Tue Jul 26 15:35:57 2011 +0200
     2.2 +++ b/MasterLoop.c	Tue Jul 26 15:36:24 2011 +0200
     2.3 @@ -147,6 +147,32 @@
     2.4                 int32 startStamp1, endStamp1;
     2.5                 saveLowTimeStampCountInto( startStamp1 );
     2.6                 #endif
     2.7 +               #ifdef MEAS__PERF_COUNTER
     2.8 +               int lastRecordIdx = currSlot->procrAssignedToSlot->counter_history_array_info->numInArray -1;
     2.9 +               CounterRecord* lastRecord = currSlot->procrAssignedToSlot->counter_history[lastRecordIdx];
    2.10 +               lastRecord->req_core = thisCoresIdx;
    2.11 +               int cycles_fd = masterEnv->cycles_counter_fd[thisCoresIdx];
    2.12 +               int instrs_fd = masterEnv->instrs_counter_fd[thisCoresIdx];
    2.13 +               int nread;
    2.14 +               
    2.15 +               nread = read(cycles_fd,&(lastRecord->req_cycles),sizeof(lastRecord->req_cycles));
    2.16 +               if(nread<0){
    2.17 +                   lastRecord->req_cycles = 0;
    2.18 +               }
    2.19 +
    2.20 +               nread = read(instrs_fd,&(lastRecord->req_instrs),sizeof(lastRecord->req_instrs));
    2.21 +               if(nread<0){
    2.22 +                   lastRecord->req_instrs = 0;
    2.23 +               }
    2.24 +               //End of task, start of next task
    2.25 +               //print counters from last run
    2.26 +               print_record(lastRecord);
    2.27 +               //create new entry in record array here
    2.28 +               CounterRecord* newRecord = VMS__malloc(sizeof(CounterRecord));
    2.29 +               newRecord->req_core = thisCoresIdx;
    2.30 +               addToDynArray( (void*) newRecord, currSlot->procrAssignedToSlot->counter_history_array_info);
    2.31 +               lastRecord = newRecord;
    2.32 +               #endif
    2.33                 //============================================================
    2.34           (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv );
    2.35                 //====================== MEASUREMENT STUFF ===================
    2.36 @@ -157,13 +183,59 @@
    2.37                 addIntervalToHist( startStamp1, endStamp1,
    2.38                                          _VMSMasterEnv->reqHdlrHighTimeHist );
    2.39                 #endif
    2.40 +               #ifdef MEAS__PERF_COUNTER
    2.41 +
    2.42 +               nread = read(cycles_fd,&(lastRecord->sc_done_cycles),sizeof(lastRecord->sc_done_cycles));
    2.43 +               if(nread<0){
    2.44 +                   lastRecord->sc_done_cycles = 0;
    2.45 +               }
    2.46 +               nread = read(instrs_fd,&(lastRecord->sc_done_instrs),sizeof(lastRecord->sc_done_instrs));
    2.47 +               if(nread<0){
    2.48 +                   lastRecord->sc_done_instrs = 0;
    2.49 +               }
    2.50 +               #endif
    2.51                 //============================================================
    2.52         }
    2.53        if( currSlot->needsProcrAssigned )
    2.54         {    //give slot a new virt procr
    2.55 +               #ifdef MEAS__PERF_COUNTER
    2.56 +                //start assigner
    2.57 +               int cycles_fd = masterEnv->cycles_counter_fd[thisCoresIdx];
    2.58 +               int instrs_fd = masterEnv->instrs_counter_fd[thisCoresIdx];
    2.59 +               uint64 tmp_cycles;
    2.60 +               uint64 tmp_instrs;
    2.61 +               int nread=0;
    2.62 +               
    2.63 +               nread = read(cycles_fd,&tmp_cycles,sizeof(uint64));
    2.64 +               if(nread<0){
    2.65 +                   tmp_cycles = 0;
    2.66 +               }
    2.67 +
    2.68 +               nread = read(instrs_fd,&tmp_instrs,sizeof(uint64));
    2.69 +               if(nread<0){
    2.70 +                   tmp_instrs = 0;
    2.71 +               }
    2.72 +               #endif
    2.73           schedVirtPr =
    2.74            (*slaveScheduler)( semanticEnv, thisCoresIdx );
    2.75 -         
    2.76 +               #ifdef MEAS__PERF_COUNTER
    2.77 +               //end assigner
    2.78 +               int lastRecordIdx = currSlot->procrAssignedToSlot->counter_history_array_info->numInArray -1;
    2.79 +               CounterRecord* lastRecord = currSlot->procrAssignedToSlot->counter_history[lastRecordIdx];
    2.80 +               lastRecord->assigning_core = thisCoresIdx;
    2.81 +               lastRecord->start_assign_cycles = tmp_cycles;
    2.82 +               lastRecord->start_assign_instrs = tmp_instrs;
    2.83 +               
    2.84 +               nread = read(cycles_fd,&(lastRecord->end_assign_cycles),sizeof(lastRecord->end_assign_cycles));
    2.85 +               if(nread<0){
    2.86 +                   lastRecord->end_assign_cycles = 0;
    2.87 +               }
    2.88 +
    2.89 +               nread = read(instrs_fd,&(lastRecord->end_assign_instrs),sizeof(lastRecord->end_assign_instrs));
    2.90 +               if(nread<0){
    2.91 +                   lastRecord->end_assign_instrs = 0;
    2.92 +               }
    2.93 +               #endif
    2.94           if( schedVirtPr != NULL )
    2.95            { currSlot->procrAssignedToSlot = schedVirtPr;
    2.96              schedVirtPr->schedSlot        = currSlot;
     3.1 --- a/ProcrContext.c	Tue Jul 26 15:35:57 2011 +0200
     3.2 +++ b/ProcrContext.c	Tue Jul 26 15:36:24 2011 +0200
     3.3 @@ -59,6 +59,7 @@
     3.4     //newPr->createPtInSecs = timeStamp.tv_sec +(timeStamp.tv_usec/1000000.0) -
     3.5     //                                            _VMSMasterEnv->createPtInSecs;
     3.6     #endif
     3.7 +
     3.8     //========================================================================
     3.9  
    3.10     return newPr;
     4.1 --- a/VMS.c	Tue Jul 26 15:35:57 2011 +0200
     4.2 +++ b/VMS.c	Tue Jul 26 15:36:24 2011 +0200
     4.3 @@ -16,6 +16,9 @@
     4.4  #include "Queue_impl/BlockingQueue.h"
     4.5  #include "Histogram/Histogram.h"
     4.6  
     4.7 +#include <linux/perf_event.h>
     4.8 +#include <syscall.h>
     4.9 +#include <sys/prctl.h>
    4.10  
    4.11  #define thdAttrs NULL
    4.12  
    4.13 @@ -177,6 +180,43 @@
    4.14     #endif
    4.15     
    4.16     MakeTheMeasHists();
    4.17 +   
    4.18 +   #ifdef MEAS__PERF_COUNTER
    4.19 +   printf("Creating HW counters...");
    4.20 +   struct perf_event_attr hw_event;
    4.21 +   	hw_event.type = PERF_TYPE_HARDWARE;
    4.22 +	hw_event.size = sizeof(struct perf_event_attr);
    4.23 +	hw_event.disabled = 1;
    4.24 +	hw_event.inherit = 1; /* children inherit it   */
    4.25 +	hw_event.pinned = 1; /* must always be on PMU */
    4.26 +	hw_event.exclusive = 0; /* only group on PMU     */
    4.27 +	hw_event.exclude_user = 0; /* don't count user      */
    4.28 +	hw_event.exclude_kernel = 1; /* ditto kernel          */
    4.29 +	hw_event.exclude_hv = 1; /* ditto hypervisor      */
    4.30 +	hw_event.exclude_idle = 0; /* don't count when idle */
    4.31 +	hw_event.mmap = 0; /* include mmap data     */
    4.32 +	hw_event.comm = 0; /* include comm data     */
    4.33 +
    4.34 +   for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
    4.35 +    {
    4.36 +       	hw_event.config = PERF_COUNT_HW_CPU_CYCLES; //cycles
    4.37 +        _VMSMasterEnv->cycles_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event,
    4.38 + 		0,//pid_t pid, 
    4.39 +		coreIdx,//int cpu, 
    4.40 +		-1,//int group_fd,
    4.41 +		0//unsigned long flags
    4.42 +	);
    4.43 +        hw_event.config = PERF_COUNT_HW_INSTRUCTIONS; //instrs
    4.44 +        _VMSMasterEnv->instrs_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event,
    4.45 + 		0,//pid_t pid, 
    4.46 +		coreIdx,//int cpu, 
    4.47 +		-1,//int group_fd,
    4.48 +		0//unsigned long flags
    4.49 +	);
    4.50 +   }
    4.51 +   prctl(PR_TASK_PERF_EVENTS_ENABLE);
    4.52 +   #endif
    4.53 +   
    4.54     //========================================================================
    4.55  
    4.56   }
     5.1 --- a/VMS.h	Tue Jul 26 15:35:57 2011 +0200
     5.2 +++ b/VMS.h	Tue Jul 26 15:36:24 2011 +0200
     5.3 @@ -16,6 +16,7 @@
     5.4  #include "DynArray/DynArray.h"
     5.5  #include "Hash_impl/PrivateHash.h"
     5.6  #include "vmalloc.h"
     5.7 +#include "Counters/Counters.h"
     5.8  
     5.9  #include <pthread.h>
    5.10  #include <sys/time.h>
    5.11 @@ -58,7 +59,7 @@
    5.12  
    5.13     //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and
    5.14     // compiled-in that saves the low part of the time stamp count just before
    5.15 -   // suspending a processor and just after resuming that processorsrc/VPThread_lib/VMS/VMS.h:322: warning: previous declaration of ‘VMS__create_procr’ was here.  It is
    5.16 +   // suspending a processor and just after resuming that processor.  It is
    5.17     // saved into a field added to VirtProcr.  Have to sanity-check for
    5.18     // rollover of low portion into high portion.
    5.19  //#define MEAS__TIME_STAMP_SUSP
    5.20 @@ -72,11 +73,12 @@
    5.21     // different cores.
    5.22  #define NUM_TSC_ROUND_TRIPS 10
    5.23  
    5.24 +#define MEAS__PERF_COUNTERS
    5.25  
    5.26  //=========================  Hardware related Constants =====================
    5.27     //This value is the number of hardware threads in the shared memory
    5.28     // machine
    5.29 -#define NUM_CORES        8
    5.30 +#define NUM_CORES        2
    5.31  
    5.32     // tradeoff amortizing master fixed overhead vs imbalance potential
    5.33     // when work-stealing, can make bigger, at risk of losing cache affinity
    5.34 @@ -202,6 +204,10 @@
    5.35     unsigned int startMasterTSCLow;USE_GNU
    5.36     unsigned int endMasterTSCLow;
    5.37     #endif
    5.38 +   #ifdef MEAS__PERF_COUNTERS //
    5.39 +   CounterRecord** counter_history;
    5.40 +   PrivDynArrayInfo* counter_history_array_info;
    5.41 +   #endif
    5.42        //========================================
    5.43     
    5.44     float64      createPtInSecs;  //have space but don't use on some configs
    5.45 @@ -258,6 +264,10 @@
    5.46     Histogram       *masterLockLowTimeHist;
    5.47     Histogram       *masterLockHighTimeHist;
    5.48     #endif
    5.49 +   #ifdef MEAS__PERF_COUNTERS
    5.50 +   int cycles_counter_fd[NUM_CORES];
    5.51 +   int instrs_counter_fd[NUM_CORES];
    5.52 +   #endif
    5.53   }
    5.54  MasterEnv;
    5.55