# HG changeset patch # User Merten Sach # Date 1324398087 -3600 # Node ID c3561dbac1dcd2156032a1cecca7ac65a25055ab # Parent 85f55731f6cd919fef6816481b17076531f75ae0 added cache misses counter diff -r 85f55731f6cd -r c3561dbac1dc src/Application/main.c --- a/src/Application/main.c Tue Dec 20 15:00:07 2011 +0100 +++ b/src/Application/main.c Tue Dec 20 17:21:27 2011 +0100 @@ -60,6 +60,21 @@ //#error "I don't know how to measure time on your platform" #endif +//======================== Globals ========================= +char __ProgrammName[] = "overhead_test"; +char __DataSet[255]; + +int outer_iters, inner_iters, num_threads; +size_t chunk_size = 0; + +int cycles_counter_main_fd; +int misses_counter_fd; + +uint64_t cache_misses; + +int cycles_counter_fd[NUM_CORES]; +struct perf_event_attr* hw_event; + //======================== Defines ========================= typedef struct perfData measurement_t; struct perfData{ @@ -95,7 +110,6 @@ barr->cond = VPThread__make_cond(barr->mutex, animatingPr); } -int cycles_counter_main_fd; void inline barrier_wait(barrier *barr, VirtProcr *animatingPr) { int i; @@ -133,24 +147,14 @@ struct WorkerParams_t data; char padding[CACHELINE_SIZE]; } WorkerParams __align_to_cacheline__; + +WorkerParams *workerParamsArray; typedef struct { measurement_t *startExeCycles; measurement_t *endExeCycles; } BenchParams __align_to_cacheline__; -//======================== Globals ========================= -char __ProgrammName[] = "overhead_test"; -char __DataSet[255]; - -int outer_iters, inner_iters, num_threads; -size_t chunk_size = 0; - -int cycles_counter_fd[NUM_CORES]; -struct perf_event_attr* hw_event; - -WorkerParams *workerParamsArray; - //======================== App Code ========================= /* p* Workload @@ -166,6 +170,16 @@ cycles = 0; \ } \ } while (0) //macro magic for scoping + +#define saveMisses(misses) do{ \ + int nread; \ + \ + nread = read(misses_counter_fd,&(misses),sizeof(misses)); \ + if(nread<0){ \ + perror("Error reading misses counter"); \ + misses = 0; \ + } \ +} while (0) //macro magic for scoping double @@ -264,7 +278,9 @@ { workerParamsArray[i].data.barrier = &barr; } - + + uint64_t cache_misses_at_start, cache_misses_at_end; + saveMisses(cache_misses_at_start); //save cycles before execution of threads, to get total exe cycles int nread = read(cycles_counter_main_fd, &(params->startExeCycles->cycles), sizeof(params->startExeCycles->cycles)); @@ -279,8 +295,8 @@ //endBarrierCycles read in barrier_wait()! Merten, email me if want to chg params->endExeCycles->cycles = barr.endBarrierCycles.cycles; - - + saveMisses(cache_misses_at_end); + cache_misses = cache_misses_at_end-cache_misses_at_start; /* uint64_t overallWorkCycles = 0; for(i=0; itype = PERF_TYPE_HARDWARE; + hw_event->config = PERF_COUNT_HW_CACHE_MISSES; //misses + + retries = 0; + do + { retries += 1; + misses_counter_fd = + syscall(__NR_perf_event_open, hw_event, + 0,//pid_t: 0 is "pid of calling process" + -1,//int: cpu, -1 means accumulate from all cores + -1,//int: group_fd, -1 is "leader" == independent + 0//unsigned long: flags + ); + } + while(misses_counter_fd<0 && retries < 100); + if(retries >= 100) + { + fprintf(stderr,"in main "); + perror("Failed to misses counter"); + } + measurement_t startExeCycles, endExeCycles; BenchParams *benchParams; @@ -453,6 +491,7 @@ uint64 totalOverhead = totalExeCycles - totalWorkCyclesAcrossCores; int32 numSyncs = outer_iters * num_threads * 2; printf("Total Execution Cycles: %lu\n", totalExeCycles); + printf("Total number of cache misses: %lu\n", cache_misses); printf("Sum across threads of work cycles: %lu\n", totalWorkCyclesAcrossCores); printf("Sum across threads of bad work cycles: %lu\n", totalBadCyclesAcrossCores); // printf("Sum across threads of Bad Sync cycles: %lu\n", totalBadSyncCyclesAcrossCores);