# HG changeset patch # User Merten Sach # Date 1321899363 -3600 # Node ID c8995a602b4663d7d98ccf6e4be43a550dea003f # Parent 535c119ba09046657913be8b65ce09785c65cfa6 mallocing hw_event diff -r 535c119ba090 -r c8995a602b46 .hgignore --- a/.hgignore Fri Oct 28 06:56:35 2011 -0700 +++ b/.hgignore Mon Nov 21 19:16:03 2011 +0100 @@ -7,3 +7,4 @@ c-ray-mt *.ppm *.o +*~ diff -r 535c119ba090 -r c8995a602b46 Makefile --- a/Makefile Fri Oct 28 06:56:35 2011 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ -obj = \ - src/VPThread_lib/VMS/Histogram/Histogram.o \ - src/VPThread_lib/VMS/Histogram/FloatHist.o \ - src/VPThread_lib/VMS/CoreLoop.o \ - src/VPThread_lib/VMS/VMS.o \ - src/VPThread_lib/VMS/MasterLoop.o \ - src/VPThread_lib/VMS/Queue_impl/PrivateQueue.o \ - src/VPThread_lib/VMS/Hash_impl/PrivateHash.o \ - src/VPThread_lib/VMS/DynArray/DynArray.o \ - src/VPThread_lib/VPThread_PluginFns.o \ - src/VPThread_lib/VPThread_lib.o \ - src/VPThread_lib/VMS/Histogram/DblHist.o \ - src/VPThread_lib/VPThread.o \ - src/VPThread_lib/VMS/probes.o \ - src/VPThread_lib/VMS/ProcrContext.o \ - src/VPThread_lib/VPThread_Request_Handlers.o \ - src/VPThread_lib/VPThread_helper.o \ - src/VPThread_lib/VMS/Hash_impl/MurmurHash2.o \ - src/VPThread_lib/VMS/vmalloc.o \ - src/VPThread_lib/VMS/contextSwitch.o \ - src/VPThread_lib/VMS/Queue_impl/BlockingQueue.o \ - src/VPThread_lib/VMS/vutilities.o \ - src/Application/main.o - -bin = task_size_vs_exe_time - -NUM_CORES=4 - -CC = gcc -CFLAGS = -m64 -ffast-math -fwrapv -fno-omit-frame-pointer -O3 -D VPTHREAD -D APPLICATION=C-RAY -D NUM_CORES=$(NUM_CORES) -g -Wall - -$(bin): $(obj) - $(CC) -o $@ $(obj) -lm -lpthread - -%.o : %.c - $(CC) -c $(CFLAGS) -o $@ $< - -.PHONY: clean -clean: - rm -f $(obj) $(bin) - -.PHONY: install -install: - cp $(bin) /usr/local/bin/$(bin) - -.PHONY: uninstall -uninstall: - rm -f /usr/local/bin/$(bin) - - -# $@ Name des Targets diff -r 535c119ba090 -r c8995a602b46 src/Application/main.c --- a/src/Application/main.c Fri Oct 28 06:56:35 2011 -0700 +++ b/src/Application/main.c Mon Nov 21 19:16:03 2011 +0100 @@ -8,9 +8,14 @@ #include #include #include +#include #include "VPThread_lib/VPThread.h" #include "VPThread_lib/VMS/Queue_impl/PrivateQueue.h" +#include +#include +#include + #undef DEBUG //#define DEBUG @@ -61,9 +66,9 @@ "Usage: malloc_test [options]\n" " Spwans a number of threads and allocates memory.\n\n" "Options:\n" - " -t how many threads to use (default: 1)\n" - " -m repeat workload and sync operation times\n" - " -n size of workload, repeat times\n" + " -t how many threads to use (default: 1). This is internaly multiplied by the number of cores.\n" + " -o repeat workload and sync operation times\n" + " -i size of workload, repeat times\n" " -h this help screen\n\n" }; @@ -100,19 +105,16 @@ VPThread__mutex_unlock(barr->mutex, animatingPr); } +struct perfData{ + uint64 cycles; + uint64 instructions; +}; - -union timeStamp{ - uint32 highLow[2]; - uint64 total; -}; +typedef struct perfData measurement_t; struct input_t{ struct barrier_t* barrier; uint64 totalWorkCycles; - uint64 workPlusMutexCycles; - union timeStamp startTime; - union timeStamp endTime; }; @@ -123,11 +125,24 @@ int repetitions, workload_size, num_threads; size_t chunk_size = 0; +int cycles_counter_fd[NUM_CORES]; //======================== App Code ========================= /* * Workload */ + +#define saveCyclesAndInstrs(core,cycles) do{ \ + int cycles_fd = cycles_counter_fd[core]; \ + int nread; \ + \ + nread = read(cycles_fd,&(cycles),sizeof(cycles)); \ + if(nread<=0){ \ + perror("Error reading cycles counter"); \ + cycles = 0; \ + } \ +} while (0) //macro magic for scoping + void work(void* input, VirtProcr* animatingPr) { int n,m; @@ -136,35 +151,48 @@ unsigned int workspace1; double workspace2; int32 privateMutex = VPThread__make_mutex(animatingPr); - - saveTimeStampCountInto(in->startTime.highLow[0], in->startTime.highLow[1]); + + int cpuid = sched_getcpu(); + for(m=0; mendTime.highLow[0], in->endTime.highLow[1] ); in->totalWorkCycles = totalWorkCycles; - in->workPlusMutexCycles = in->endTime.total - in->startTime.total; + printf("Cycles: %lu on CPU %lu\n", totalWorkCycles, cpuid); + + + //Wait for all threads to end barrier_wait(in->barrier, animatingPr); //Shutdown worker VPThread__dissipate_thread(animatingPr); -// printf("%d", workspace1); //Should never execute! VMS bug if does -// printf("%f", workspace2); + + printf("%d", workspace1); //This is to prevent gcc from optimizing out the + printf("%f", workspace2); //two workspace variables } /* this is run after the VMS is set up*/ @@ -174,29 +202,89 @@ struct input_t input[num_threads]; struct barrier_t barr; barrier_init(&barr, num_threads+1, animatingPr); + + //setup performance counters + struct perf_event_attr* hw_event; + hw_event = VMS__malloc(sizeof(struct perf_event_attr)); + memset(hw_event,0,sizeof(hw_event)); + hw_event->type = PERF_TYPE_HARDWARE; + hw_event->size = sizeof(hw_event); + hw_event->disabled = 0; + hw_event->freq = 0; + hw_event->inherit = 1; /* children inherit it */ + hw_event->pinned = 1; /* must always be on PMU */ + hw_event->exclusive = 0; /* only group on PMU */ + hw_event->exclude_user = 0; /* don't count user */ + hw_event->exclude_kernel = 1; /* ditto kernel */ + hw_event->exclude_hv = 1; /* ditto hypervisor */ + hw_event->exclude_idle = 1; /* don't count when idle */ + hw_event->mmap = 0; /* include mmap data */ + hw_event->comm = 0; /* include comm data */ - for(i=0; iconfig = PERF_COUNT_HW_CPU_CYCLES; //cycles + cycles_counter_fd[i] = syscall(__NR_perf_event_open, hw_event, + 0,//pid_t pid, + i,//int cpu, + -1,//int group_fd, + 0//unsigned long flags + ); + if (cycles_counter_fd[i]<0){ + fprintf(stderr,"On core %d: ",i); + perror("Failed to open cycles counter"); + } } - union timeStamp startBenchTime, endBenchTime; - uint64 lastThreadFinishTime = 0L; - saveTimeStampCountInto(startBenchTime.highLow[0], startBenchTime.highLow[1]); + //Count on all CPUs + int cycles_counter_main_fd = syscall(__NR_perf_event_open, hw_event, + 0,//pid_t pid, + -1,//int cpu, + -1,//int group_fd, + 0//unsigned long flags + ); + if (cycles_counter_main_fd<0){ + fprintf(stderr,"On core %d: ",i); + perror("Failed to open cycles counter"); + } + + //prepare input for(i=0; i lastThreadFinishTime) - lastThreadFinishTime = input[i].endTime.total; + uint64_t overallWorkCycles = 0; + for(i=0; i