changeset 15:a1269b1549fc

added preprocessor defines to disable perf_counters for vtune mesurement
author Merten Sach <msach@mailbox.tu-berlin.de>
date Fri, 06 Jan 2012 19:09:38 +0100
parents 1320dd56673a
children 5887fbce425f
files src/Application/main.c
diffstat 1 files changed, 19 insertions(+), 2 deletions(-) [+]
line diff
     1.1 --- a/src/Application/main.c	Fri Dec 16 16:40:07 2011 +0100
     1.2 +++ b/src/Application/main.c	Fri Jan 06 19:09:38 2012 +0100
     1.3 @@ -19,6 +19,8 @@
     1.4  #undef DEBUG
     1.5  //#define DEBUG
     1.6  
     1.7 +#define MEASURE_PERF
     1.8 +
     1.9  #if !defined(unix) && !defined(__unix__)
    1.10  #ifdef __MACH__
    1.11  #define unix		1
    1.12 @@ -104,8 +106,10 @@
    1.13     barr->counter++;
    1.14     if(barr->counter == barr->nthreads)
    1.15      { 
    1.16 +#ifdef MEASURE_PERF
    1.17        read(cycles_counter_main_fd, &(barr->endBarrierCycles.cycles), \
    1.18                  sizeof(barr->endBarrierCycles.cycles));
    1.19 +#endif
    1.20         
    1.21        barr->counter = 0;
    1.22        for(i=0; i < barr->nthreads; i++)
    1.23 @@ -183,8 +187,9 @@
    1.24     uint64 numCycles;
    1.25     for(o=0; o < outer_iters; o++)
    1.26      {
    1.27 -       
    1.28 +#ifdef MEASURE_PERF
    1.29            saveCyclesAndInstrs(cpuid,startWorkload.cycles);
    1.30 +#endif
    1.31         
    1.32        //workltask
    1.33        for(i=0; i < inner_iters; i++)
    1.34 @@ -192,12 +197,14 @@
    1.35           workspace1 += (workspace1 + 32)/2;
    1.36           workspace2 += (workspace2 + 23.2)/1.4;
    1.37         }
    1.38 -      
    1.39 +  
    1.40 +#ifdef MEASURE_PERF
    1.41            saveCyclesAndInstrs(cpuid,endWorkload.cycles);
    1.42            numCycles = endWorkload.cycles - startWorkload.cycles;
    1.43            //sanity check (400K is about 20K iters)
    1.44            if( numCycles < 400000 ) {totalWorkCycles += numCycles; numGoodTasks++;}
    1.45            else                     {totalBadCycles  += numCycles; }
    1.46 +#endif
    1.47  
    1.48        //mutex access often causes switch to different Slave VP
    1.49        VPThread__mutex_lock(privateMutex, animatingPr);
    1.50 @@ -266,9 +273,11 @@
    1.51     measurement_t *startExeCycles, *endExeCycles;
    1.52     startExeCycles = params->startExeCycles;
    1.53     
    1.54 +#ifdef MEASURE_PERF
    1.55     int nread = read(cycles_counter_main_fd, &(startExeCycles->cycles),
    1.56                  sizeof(startExeCycles->cycles));
    1.57     if(nread<0) perror("Error reading cycles counter");
    1.58 +#endif
    1.59     
    1.60     //create (which starts running) all threads
    1.61     for(i=0; i<num_threads; i++)
    1.62 @@ -277,8 +286,10 @@
    1.63     //wait for all threads to finish
    1.64     barrier_wait(&barr, animatingPr);
    1.65    
    1.66 +#ifdef MEASURE_PERF
    1.67     //endBarrierCycles read in barrier_wait()!  Merten, email me if want to chg
    1.68     params->endExeCycles->cycles = barr.endBarrierCycles.cycles;
    1.69 +#endif
    1.70     
    1.71  
    1.72  /*
    1.73 @@ -358,6 +369,7 @@
    1.74      }//for
    1.75     
    1.76     
    1.77 +#ifdef MEASURE_PERF
    1.78     //setup performance counters
    1.79      hw_event = malloc(sizeof(struct perf_event_attr));
    1.80      memset(hw_event,0,sizeof(struct perf_event_attr));
    1.81 @@ -419,6 +431,7 @@
    1.82        fprintf(stderr,"in main ");
    1.83        perror("Failed to open cycles counter");
    1.84      }
    1.85 +#endif
    1.86     
    1.87     measurement_t startExeCycles, endExeCycles;
    1.88     BenchParams *benchParams;
    1.89 @@ -435,6 +448,7 @@
    1.90     //This is the transition to the VMS runtime
    1.91     VPThread__create_seed_procr_and_do_work( &benchmark, benchParams );
    1.92     
    1.93 +#ifdef MEASURE_PERF
    1.94     uint64_t totalWorkCyclesAcrossCores = 0, totalBadCyclesAcrossCores = 0;
    1.95     uint64_t totalSyncCyclesAcrossCores = 0, totalBadSyncCyclesAcrossCores = 0;
    1.96     for(i=0; i<num_threads; i++){ 
    1.97 @@ -459,5 +473,8 @@
    1.98     printf("Overhead per sync: %f\n", (double)totalOverhead / (double)numSyncs );
    1.99     printf("ExeCycles/WorkCycles Ratio %f\n", 
   1.100            (double)totalExeCycles / (double)totalWorkCyclesAcrossCores);
   1.101 +#else
   1.102 +   printf("No measurement done!\n");
   1.103 +#endif
   1.104     return 0;
   1.105   }