# HG changeset patch # User Merten Sach # Date 1323440892 -3600 # Node ID 662089f010bbe57e8771f393a53ffdcc1255dfea # Parent 5d3b5e58456e325009300a03dd688333fb78d46e Cycles measurement done with perf counter, correct command line parsing diff -r 5d3b5e58456e -r 662089f010bb src/Application/main.c --- a/src/Application/main.c Wed Dec 07 06:17:46 2011 +0100 +++ b/src/Application/main.c Fri Dec 09 15:28:12 2011 +0100 @@ -185,8 +185,8 @@ for(o=0; o < outer_iters; o++) { -// saveCyclesAndInstrs(cpuid,startWorkload.cycles); - saveTSCLowHigh(startTask); + saveCyclesAndInstrs(cpuid,startWorkload.cycles); +// saveTSCLowHigh(startTask); //workload for(i=0; i < inner_iters; i++) @@ -195,10 +195,10 @@ workspace2 += (workspace2 + 23.2)/1.4; } - saveTSCLowHigh(endTask); - numCycles = endTask.longVal - startTask.longVal; -// saveCyclesAndInstrs(cpuid,endWorkload.cycles); -// numCycles = endWorkload.cycles - startWorkload.cycles; +// saveTSCLowHigh(endTask); +// numCycles = endTask.longVal - startTask.longVal; + saveCyclesAndInstrs(cpuid,endWorkload.cycles); + numCycles = endWorkload.cycles - startWorkload.cycles; //sanity check (400K is about 20K iters) if( numCycles < 400000 ) {totalWorkCycles += numCycles; numGoodTasks++;} @@ -206,18 +206,22 @@ //mutex access often causes switch to different Slave VP VPThread__mutex_lock(privateMutex, animatingPr); +/* saveTSCLowHigh(endSync1); numCycles = endSync1.longVal - endTask.longVal; //sanity check (400K is about 20K iters) if( numCycles < 400000 ) {totalSyncCycles += numCycles; numGoodSyncs++;} else totalBadSyncCycles += numCycles; +*/ VPThread__mutex_unlock(privateMutex, animatingPr); +/* saveTSCLowHigh(endSync2); numCycles = endSync2.longVal - endSync1.longVal; //sanity check (400K is about 20K iters) if( numCycles < 400000 ) {totalSyncCycles += numCycles; numGoodSyncs++;} else totalBadSyncCycles += numCycles; +*/ } @@ -322,7 +326,6 @@ fprintf(stderr, "invalid number of threads specified: %d\n", num_threads); return EXIT_FAILURE; } - num_threads *= NUM_CORES; break; case 'o': if(!isdigit(argv[++i][0])) @@ -451,13 +454,14 @@ uint64_t totalExeCycles = endExeCycles.cycles - startExeCycles.cycles; totalExeCycles -= totalBadCyclesAcrossCores; - + uint64 totalOverhead = totalExeCycles - totalWorkCyclesAcrossCores; + int32 numSyncs = outer_iters * num_threads * 2; + printf("Total Execution Cycles: %lu\n", totalExeCycles); printf("Sum across threads of work cycles: %lu\n", totalWorkCyclesAcrossCores); - printf("Total Execution Cycles: %lu\n", totalExeCycles); - printf("Sum across threads of Sync cycles: %lu\n", totalSyncCyclesAcrossCores); - printf("Sum across threads of Bad Sync cycles: %lu\n", totalBadSyncCyclesAcrossCores); + printf("Sum across threads of bad work cycles: %lu\n", totalBadCyclesAcrossCores); +// printf("Sum across threads of Bad Sync cycles: %lu\n", totalBadSyncCyclesAcrossCores); + printf("Overhead per sync: %f\n", (double)totalOverhead / (double)numSyncs ); printf("ExeCycles/WorkCycles Ratio %f\n", (double)totalExeCycles / (double)totalWorkCyclesAcrossCores); - return 0; }