Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > Vthread > Vthread__Best_Effort_Msg__Bench
changeset 14:c3561dbac1dc false_sharing
added cache misses counter
| author | Merten Sach <msach@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 20 Dec 2011 17:21:27 +0100 |
| parents | 85f55731f6cd |
| children | 281cadcbb796 |
| files | src/Application/main.c |
| diffstat | 1 files changed, 55 insertions(+), 16 deletions(-) [+] |
line diff
1.1 --- a/src/Application/main.c Tue Dec 20 15:00:07 2011 +0100 1.2 +++ b/src/Application/main.c Tue Dec 20 17:21:27 2011 +0100 1.3 @@ -60,6 +60,21 @@ 1.4 //#error "I don't know how to measure time on your platform" 1.5 #endif 1.6 1.7 +//======================== Globals ========================= 1.8 +char __ProgrammName[] = "overhead_test"; 1.9 +char __DataSet[255]; 1.10 + 1.11 +int outer_iters, inner_iters, num_threads; 1.12 +size_t chunk_size = 0; 1.13 + 1.14 +int cycles_counter_main_fd; 1.15 +int misses_counter_fd; 1.16 + 1.17 +uint64_t cache_misses; 1.18 + 1.19 +int cycles_counter_fd[NUM_CORES]; 1.20 +struct perf_event_attr* hw_event; 1.21 + 1.22 //======================== Defines ========================= 1.23 typedef struct perfData measurement_t; 1.24 struct perfData{ 1.25 @@ -95,7 +110,6 @@ 1.26 barr->cond = VPThread__make_cond(barr->mutex, animatingPr); 1.27 } 1.28 1.29 -int cycles_counter_main_fd; 1.30 void inline barrier_wait(barrier *barr, VirtProcr *animatingPr) 1.31 { int i; 1.32 1.33 @@ -133,24 +147,14 @@ 1.34 struct WorkerParams_t data; 1.35 char padding[CACHELINE_SIZE]; 1.36 } WorkerParams __align_to_cacheline__; 1.37 + 1.38 +WorkerParams *workerParamsArray; 1.39 1.40 typedef struct 1.41 { measurement_t *startExeCycles; 1.42 measurement_t *endExeCycles; 1.43 } BenchParams __align_to_cacheline__; 1.44 1.45 -//======================== Globals ========================= 1.46 -char __ProgrammName[] = "overhead_test"; 1.47 -char __DataSet[255]; 1.48 - 1.49 -int outer_iters, inner_iters, num_threads; 1.50 -size_t chunk_size = 0; 1.51 - 1.52 -int cycles_counter_fd[NUM_CORES]; 1.53 -struct perf_event_attr* hw_event; 1.54 - 1.55 -WorkerParams *workerParamsArray; 1.56 - 1.57 //======================== App Code ========================= 1.58 /* 1.59 p* Workload 1.60 @@ -166,6 +170,16 @@ 1.61 cycles = 0; \ 1.62 } \ 1.63 } while (0) //macro magic for scoping 1.64 + 1.65 +#define saveMisses(misses) do{ \ 1.66 + int nread; \ 1.67 + \ 1.68 + nread = read(misses_counter_fd,&(misses),sizeof(misses)); \ 1.69 + if(nread<0){ \ 1.70 + perror("Error reading misses counter"); \ 1.71 + misses = 0; \ 1.72 + } \ 1.73 +} while (0) //macro magic for scoping 1.74 1.75 1.76 double 1.77 @@ -264,7 +278,9 @@ 1.78 { 1.79 workerParamsArray[i].data.barrier = &barr; 1.80 } 1.81 - 1.82 + 1.83 + uint64_t cache_misses_at_start, cache_misses_at_end; 1.84 + saveMisses(cache_misses_at_start); 1.85 //save cycles before execution of threads, to get total exe cycles 1.86 int nread = read(cycles_counter_main_fd, &(params->startExeCycles->cycles), 1.87 sizeof(params->startExeCycles->cycles)); 1.88 @@ -279,8 +295,8 @@ 1.89 1.90 //endBarrierCycles read in barrier_wait()! Merten, email me if want to chg 1.91 params->endExeCycles->cycles = barr.endBarrierCycles.cycles; 1.92 - 1.93 - 1.94 + saveMisses(cache_misses_at_end); 1.95 + cache_misses = cache_misses_at_end-cache_misses_at_start; 1.96 /* 1.97 uint64_t overallWorkCycles = 0; 1.98 for(i=0; i<num_threads; i++){ 1.99 @@ -420,6 +436,28 @@ 1.100 perror("Failed to open cycles counter"); 1.101 } 1.102 1.103 + //Set up counters to count cache misses 1.104 + hw_event->type = PERF_TYPE_HARDWARE; 1.105 + hw_event->config = PERF_COUNT_HW_CACHE_MISSES; //misses 1.106 + 1.107 + retries = 0; 1.108 + do 1.109 + { retries += 1; 1.110 + misses_counter_fd = 1.111 + syscall(__NR_perf_event_open, hw_event, 1.112 + 0,//pid_t: 0 is "pid of calling process" 1.113 + -1,//int: cpu, -1 means accumulate from all cores 1.114 + -1,//int: group_fd, -1 is "leader" == independent 1.115 + 0//unsigned long: flags 1.116 + ); 1.117 + } 1.118 + while(misses_counter_fd<0 && retries < 100); 1.119 + if(retries >= 100) 1.120 + { 1.121 + fprintf(stderr,"in main "); 1.122 + perror("Failed to misses counter"); 1.123 + } 1.124 + 1.125 measurement_t startExeCycles, endExeCycles; 1.126 BenchParams *benchParams; 1.127 1.128 @@ -453,6 +491,7 @@ 1.129 uint64 totalOverhead = totalExeCycles - totalWorkCyclesAcrossCores; 1.130 int32 numSyncs = outer_iters * num_threads * 2; 1.131 printf("Total Execution Cycles: %lu\n", totalExeCycles); 1.132 + printf("Total number of cache misses: %lu\n", cache_misses); 1.133 printf("Sum across threads of work cycles: %lu\n", totalWorkCyclesAcrossCores); 1.134 printf("Sum across threads of bad work cycles: %lu\n", totalBadCyclesAcrossCores); 1.135 // printf("Sum across threads of Bad Sync cycles: %lu\n", totalBadSyncCyclesAcrossCores);
