Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > Vthread > Vthread__Best_Effort_Msg__Bench
changeset 10:662089f010bb
Cycles measurement done with perf counter, correct command line parsing
| author | Merten Sach <msach@mailbox.tu-berlin.de> |
|---|---|
| date | Fri, 09 Dec 2011 15:28:12 +0100 |
| parents | 5d3b5e58456e |
| children | bef7a9083bd4 |
| files | src/Application/main.c |
| diffstat | 1 files changed, 16 insertions(+), 12 deletions(-) [+] |
line diff
1.1 --- a/src/Application/main.c Wed Dec 07 06:17:46 2011 +0100 1.2 +++ b/src/Application/main.c Fri Dec 09 15:28:12 2011 +0100 1.3 @@ -185,8 +185,8 @@ 1.4 for(o=0; o < outer_iters; o++) 1.5 { 1.6 1.7 -// saveCyclesAndInstrs(cpuid,startWorkload.cycles); 1.8 - saveTSCLowHigh(startTask); 1.9 + saveCyclesAndInstrs(cpuid,startWorkload.cycles); 1.10 +// saveTSCLowHigh(startTask); 1.11 1.12 //workload 1.13 for(i=0; i < inner_iters; i++) 1.14 @@ -195,10 +195,10 @@ 1.15 workspace2 += (workspace2 + 23.2)/1.4; 1.16 } 1.17 1.18 - saveTSCLowHigh(endTask); 1.19 - numCycles = endTask.longVal - startTask.longVal; 1.20 -// saveCyclesAndInstrs(cpuid,endWorkload.cycles); 1.21 -// numCycles = endWorkload.cycles - startWorkload.cycles; 1.22 +// saveTSCLowHigh(endTask); 1.23 +// numCycles = endTask.longVal - startTask.longVal; 1.24 + saveCyclesAndInstrs(cpuid,endWorkload.cycles); 1.25 + numCycles = endWorkload.cycles - startWorkload.cycles; 1.26 1.27 //sanity check (400K is about 20K iters) 1.28 if( numCycles < 400000 ) {totalWorkCycles += numCycles; numGoodTasks++;} 1.29 @@ -206,18 +206,22 @@ 1.30 1.31 //mutex access often causes switch to different Slave VP 1.32 VPThread__mutex_lock(privateMutex, animatingPr); 1.33 +/* 1.34 saveTSCLowHigh(endSync1); 1.35 numCycles = endSync1.longVal - endTask.longVal; 1.36 //sanity check (400K is about 20K iters) 1.37 if( numCycles < 400000 ) {totalSyncCycles += numCycles; numGoodSyncs++;} 1.38 else totalBadSyncCycles += numCycles; 1.39 +*/ 1.40 1.41 VPThread__mutex_unlock(privateMutex, animatingPr); 1.42 +/* 1.43 saveTSCLowHigh(endSync2); 1.44 numCycles = endSync2.longVal - endSync1.longVal; 1.45 //sanity check (400K is about 20K iters) 1.46 if( numCycles < 400000 ) {totalSyncCycles += numCycles; numGoodSyncs++;} 1.47 else totalBadSyncCycles += numCycles; 1.48 +*/ 1.49 1.50 } 1.51 1.52 @@ -322,7 +326,6 @@ 1.53 fprintf(stderr, "invalid number of threads specified: %d\n", num_threads); 1.54 return EXIT_FAILURE; 1.55 } 1.56 - num_threads *= NUM_CORES; 1.57 break; 1.58 case 'o': 1.59 if(!isdigit(argv[++i][0])) 1.60 @@ -451,13 +454,14 @@ 1.61 1.62 uint64_t totalExeCycles = endExeCycles.cycles - startExeCycles.cycles; 1.63 totalExeCycles -= totalBadCyclesAcrossCores; 1.64 - 1.65 + uint64 totalOverhead = totalExeCycles - totalWorkCyclesAcrossCores; 1.66 + int32 numSyncs = outer_iters * num_threads * 2; 1.67 + printf("Total Execution Cycles: %lu\n", totalExeCycles); 1.68 printf("Sum across threads of work cycles: %lu\n", totalWorkCyclesAcrossCores); 1.69 - printf("Total Execution Cycles: %lu\n", totalExeCycles); 1.70 - printf("Sum across threads of Sync cycles: %lu\n", totalSyncCyclesAcrossCores); 1.71 - printf("Sum across threads of Bad Sync cycles: %lu\n", totalBadSyncCyclesAcrossCores); 1.72 + printf("Sum across threads of bad work cycles: %lu\n", totalBadCyclesAcrossCores); 1.73 +// printf("Sum across threads of Bad Sync cycles: %lu\n", totalBadSyncCyclesAcrossCores); 1.74 + printf("Overhead per sync: %f\n", (double)totalOverhead / (double)numSyncs ); 1.75 printf("ExeCycles/WorkCycles Ratio %f\n", 1.76 (double)totalExeCycles / (double)totalWorkCyclesAcrossCores); 1.77 - 1.78 return 0; 1.79 }
