# HG changeset patch # User Merten Sach # Date 1324389607 -3600 # Node ID 85f55731f6cd919fef6816481b17076531f75ae0 # Parent 1320dd56673ace11ad8766eeec9df4875dbdaed2 Padded variables to avoid false sharing in the application diff -r 1320dd56673a -r 85f55731f6cd src/Application/main.c --- a/src/Application/main.c Fri Dec 16 16:40:07 2011 +0100 +++ b/src/Application/main.c Tue Dec 20 15:00:07 2011 +0100 @@ -64,8 +64,7 @@ typedef struct perfData measurement_t; struct perfData{ uint64 cycles; - uint64 instructions; -}; +} __align_to_cacheline__; const char *usage = { "Usage: malloc_test [options]\n" @@ -85,7 +84,7 @@ int32 cond; measurement_t endBarrierCycles; -}; +} __align_to_cacheline__; typedef struct barrier_t barrier; void inline barrier_init(barrier *barr, int nthreads, VirtProcr *animatingPr) @@ -104,7 +103,7 @@ barr->counter++; if(barr->counter == barr->nthreads) { - read(cycles_counter_main_fd, &(barr->endBarrierCycles.cycles), \ + read(cycles_counter_main_fd, &(barr->endBarrierCycles.cycles), \ sizeof(barr->endBarrierCycles.cycles)); barr->counter = 0; @@ -119,7 +118,7 @@ -typedef struct +struct WorkerParams_t { struct barrier_t* barrier; uint64_t totalWorkCycles; uint64_t totalBadCycles; @@ -127,15 +126,18 @@ uint64_t totalBadSyncCycles; uint64 numGoodSyncs; uint64 numGoodTasks; - } -WorkerParams; - + }; + + typedef union + { + struct WorkerParams_t data; + char padding[CACHELINE_SIZE]; + } WorkerParams __align_to_cacheline__; typedef struct { measurement_t *startExeCycles; measurement_t *endExeCycles; - } -BenchParams; + } BenchParams __align_to_cacheline__; //======================== Globals ========================= char __ProgrammName[] = "overhead_test"; @@ -151,7 +153,7 @@ //======================== App Code ========================= /* - * Workload + p* Workload */ #define saveCyclesAndInstrs(core,cycles) do{ \ @@ -179,14 +181,14 @@ int cpuid = sched_getcpu(); - measurement_t startWorkload, endWorkload, startWorkload2, endWorkload2; + measurement_t startWorkload, endWorkload; uint64 numCycles; for(o=0; o < outer_iters; o++) { saveCyclesAndInstrs(cpuid,startWorkload.cycles); - //workltask + //task for(i=0; i < inner_iters; i++) { workspace1 += (workspace1 + 32)/2; @@ -221,12 +223,12 @@ VPThread__mutex_unlock(privateMutex, animatingPr); } - params->totalWorkCycles = totalWorkCycles; - params->totalBadCycles = totalBadCycles; - params->numGoodTasks = numGoodTasks; - params->totalSyncCycles = totalSyncCycles; - params->totalBadSyncCycles = totalBadSyncCycles; - params->numGoodSyncs = numGoodSyncs; + params->data.totalWorkCycles = totalWorkCycles; + params->data.totalBadCycles = totalBadCycles; + params->data.numGoodTasks = numGoodTasks; + params->data.totalSyncCycles = totalSyncCycles; + params->data.totalBadSyncCycles = totalBadSyncCycles; + params->data.numGoodSyncs = numGoodSyncs; /* params->totalSyncCycles = VMS__give_num_plugin_cycles(); params->totalBadSyncCycles = 0; @@ -235,7 +237,7 @@ //Wait for all threads to end - barrier_wait(params->barrier, animatingPr); + barrier_wait(params->data.barrier, animatingPr); //Shutdown worker VPThread__dissipate_thread(animatingPr); @@ -244,14 +246,15 @@ return (workspace1 + workspace2); //to prevent gcc from optimizing work out } +//local variables of benchmark, made global for alignment +struct barrier_t barr __align_to_cacheline__; +BenchParams *params __align_to_cacheline__; /* this is run after the VMS is set up*/ void benchmark(void *_params, VirtProcr *animatingPr) { - int i, cpuID; - struct barrier_t barr; - BenchParams *params; - + int i; + params = (BenchParams *)_params; barrier_init(&barr, num_threads+1, animatingPr); @@ -259,15 +262,12 @@ //prepare input for(i=0; istartExeCycles; - - int nread = read(cycles_counter_main_fd, &(startExeCycles->cycles), - sizeof(startExeCycles->cycles)); + int nread = read(cycles_counter_main_fd, &(params->startExeCycles->cycles), + sizeof(params->startExeCycles->cycles)); if(nread<0) perror("Error reading cycles counter"); //create (which starts running) all threads @@ -438,14 +438,14 @@ uint64_t totalWorkCyclesAcrossCores = 0, totalBadCyclesAcrossCores = 0; uint64_t totalSyncCyclesAcrossCores = 0, totalBadSyncCyclesAcrossCores = 0; for(i=0; i