changeset 10:662089f010bb

Cycles measurement done with perf counter, correct command line parsing
author Merten Sach <msach@mailbox.tu-berlin.de>
date Fri, 09 Dec 2011 15:28:12 +0100
parents 5d3b5e58456e
children bef7a9083bd4
files src/Application/main.c
diffstat 1 files changed, 16 insertions(+), 12 deletions(-) [+]
line diff
     1.1 --- a/src/Application/main.c	Wed Dec 07 06:17:46 2011 +0100
     1.2 +++ b/src/Application/main.c	Fri Dec 09 15:28:12 2011 +0100
     1.3 @@ -185,8 +185,8 @@
     1.4     for(o=0; o < outer_iters; o++)
     1.5      {
     1.6         
     1.7 -//          saveCyclesAndInstrs(cpuid,startWorkload.cycles);
     1.8 -          saveTSCLowHigh(startTask);
     1.9 +          saveCyclesAndInstrs(cpuid,startWorkload.cycles);
    1.10 +//          saveTSCLowHigh(startTask);
    1.11         
    1.12        //workload
    1.13        for(i=0; i < inner_iters; i++)
    1.14 @@ -195,10 +195,10 @@
    1.15           workspace2 += (workspace2 + 23.2)/1.4;
    1.16         }
    1.17        
    1.18 -          saveTSCLowHigh(endTask);
    1.19 -          numCycles = endTask.longVal - startTask.longVal;
    1.20 -//          saveCyclesAndInstrs(cpuid,endWorkload.cycles);
    1.21 -//          numCycles = endWorkload.cycles - startWorkload.cycles;
    1.22 +//          saveTSCLowHigh(endTask);
    1.23 +//          numCycles = endTask.longVal - startTask.longVal;
    1.24 +          saveCyclesAndInstrs(cpuid,endWorkload.cycles);
    1.25 +          numCycles = endWorkload.cycles - startWorkload.cycles;
    1.26  
    1.27            //sanity check (400K is about 20K iters)
    1.28            if( numCycles < 400000 ) {totalWorkCycles += numCycles; numGoodTasks++;}
    1.29 @@ -206,18 +206,22 @@
    1.30  
    1.31        //mutex access often causes switch to different Slave VP
    1.32        VPThread__mutex_lock(privateMutex, animatingPr);
    1.33 +/*
    1.34            saveTSCLowHigh(endSync1);
    1.35            numCycles = endSync1.longVal - endTask.longVal;
    1.36            //sanity check (400K is about 20K iters)
    1.37            if( numCycles < 400000 ) {totalSyncCycles += numCycles; numGoodSyncs++;}
    1.38            else                     totalBadSyncCycles  += numCycles;
    1.39 +*/
    1.40        
    1.41        VPThread__mutex_unlock(privateMutex, animatingPr);
    1.42 +/*
    1.43            saveTSCLowHigh(endSync2);
    1.44            numCycles = endSync2.longVal - endSync1.longVal;
    1.45            //sanity check (400K is about 20K iters)
    1.46            if( numCycles < 400000 ) {totalSyncCycles += numCycles; numGoodSyncs++;}
    1.47            else                     totalBadSyncCycles  += numCycles;
    1.48 +*/
    1.49  
    1.50      }
    1.51  
    1.52 @@ -322,7 +326,6 @@
    1.53                    fprintf(stderr, "invalid number of threads specified: %d\n", num_threads);
    1.54                    return EXIT_FAILURE;
    1.55                  }
    1.56 -               num_threads *= NUM_CORES;
    1.57              break;
    1.58              case 'o':
    1.59                 if(!isdigit(argv[++i][0]))
    1.60 @@ -451,13 +454,14 @@
    1.61  
    1.62     uint64_t totalExeCycles = endExeCycles.cycles - startExeCycles.cycles;
    1.63     totalExeCycles -= totalBadCyclesAcrossCores;
    1.64 -   
    1.65 +   uint64 totalOverhead = totalExeCycles - totalWorkCyclesAcrossCores;
    1.66 +   int32  numSyncs = outer_iters * num_threads * 2;
    1.67 +   printf("Total Execution Cycles: %lu\n", totalExeCycles);
    1.68     printf("Sum across threads of work cycles: %lu\n", totalWorkCyclesAcrossCores);
    1.69 -   printf("Total Execution Cycles: %lu\n", totalExeCycles);
    1.70 -   printf("Sum across threads of Sync cycles: %lu\n", totalSyncCyclesAcrossCores);
    1.71 -   printf("Sum across threads of Bad Sync cycles: %lu\n", totalBadSyncCyclesAcrossCores);
    1.72 +   printf("Sum across threads of bad work cycles: %lu\n", totalBadCyclesAcrossCores);
    1.73 +//   printf("Sum across threads of Bad Sync cycles: %lu\n", totalBadSyncCyclesAcrossCores);
    1.74 +   printf("Overhead per sync: %f\n", (double)totalOverhead / (double)numSyncs );
    1.75     printf("ExeCycles/WorkCycles Ratio %f\n", 
    1.76            (double)totalExeCycles / (double)totalWorkCyclesAcrossCores);
    1.77 -
    1.78     return 0;
    1.79   }