Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > Vthread > Vthread__Best_Effort_Msg__Bench
changeset 7:28650a4df2b9
working version
| author | Merten Sach <msach@mailbox.tu-berlin.de> |
|---|---|
| date | Mon, 21 Nov 2011 21:39:03 +0100 |
| parents | c8995a602b46 |
| children | b2a84bc2b274 |
| files | src/Application/main.c |
| diffstat | 1 files changed, 87 insertions(+), 61 deletions(-) [+] |
line diff
1.1 --- a/src/Application/main.c Mon Nov 21 19:16:03 2011 +0100 1.2 +++ b/src/Application/main.c Mon Nov 21 21:39:03 2011 +0100 1.3 @@ -126,6 +126,8 @@ 1.4 size_t chunk_size = 0; 1.5 1.6 int cycles_counter_fd[NUM_CORES]; 1.7 +int cycles_counter_main_fd; 1.8 +struct perf_event_attr* hw_event; 1.9 1.10 //======================== App Code ========================= 1.11 /* 1.12 @@ -137,15 +139,16 @@ 1.13 int nread; \ 1.14 \ 1.15 nread = read(cycles_fd,&(cycles),sizeof(cycles)); \ 1.16 - if(nread<=0){ \ 1.17 + if(nread<0){ \ 1.18 perror("Error reading cycles counter"); \ 1.19 cycles = 0; \ 1.20 } \ 1.21 } while (0) //macro magic for scoping 1.22 1.23 + 1.24 void work(void* input, VirtProcr* animatingPr) 1.25 { 1.26 - int n,m; 1.27 + int i,o; 1.28 struct input_t* in = (struct input_t*)input; 1.29 unsigned int totalWorkCycles = 0; 1.30 unsigned int workspace1; 1.31 @@ -154,7 +157,7 @@ 1.32 1.33 int cpuid = sched_getcpu(); 1.34 1.35 - for(m=0; m<repetitions; m++) 1.36 + for(o=0; o<repetitions; o++) 1.37 { 1.38 1.39 //measure inner workload to determine synchronisation overhead by subtraction 1.40 @@ -162,7 +165,7 @@ 1.41 saveCyclesAndInstrs(cpuid,startWorkload.cycles); 1.42 1.43 //workload 1.44 - for(n=0; n<workload_size; n++) 1.45 + for(i=0; i<workload_size; i++) 1.46 { 1.47 workspace1 += (workspace1 + 32)/2; 1.48 workspace2 += (workspace2 + 23.2)/1.4; 1.49 @@ -174,10 +177,11 @@ 1.50 uint64 numCycles = endWorkload.cycles - startWorkload.cycles; 1.51 1.52 1.53 - if( numCycles < 100000000 ) totalWorkCycles += numCycles;//sanity check 1.54 + if( numCycles < 4000000 ) //sanity check (4M is about 200K iters) 1.55 + totalWorkCycles += numCycles; 1.56 1.57 + //mutex access causes switch to different Slave VP 1.58 VPThread__mutex_lock(privateMutex, animatingPr); 1.59 - //lock access to switch to different tast 1.60 VPThread__mutex_unlock(privateMutex, animatingPr); 1.61 } 1.62 1.63 @@ -190,7 +194,7 @@ 1.64 1.65 //Shutdown worker 1.66 VPThread__dissipate_thread(animatingPr); 1.67 - 1.68 + //below printfs never reached --> there for gcc 1.69 printf("%d", workspace1); //This is to prevent gcc from optimizing out the 1.70 printf("%f", workspace2); //two workspace variables 1.71 } 1.72 @@ -198,63 +202,21 @@ 1.73 /* this is run after the VMS is set up*/ 1.74 void benchmark(void *in, VirtProcr *animatingPr) 1.75 { 1.76 - int i; 1.77 + int i, cpuID; 1.78 struct input_t input[num_threads]; 1.79 struct barrier_t barr; 1.80 barrier_init(&barr, num_threads+1, animatingPr); 1.81 1.82 - //setup performance counters 1.83 - struct perf_event_attr* hw_event; 1.84 - hw_event = VMS__malloc(sizeof(struct perf_event_attr)); 1.85 - memset(hw_event,0,sizeof(hw_event)); 1.86 - hw_event->type = PERF_TYPE_HARDWARE; 1.87 - hw_event->size = sizeof(hw_event); 1.88 - hw_event->disabled = 0; 1.89 - hw_event->freq = 0; 1.90 - hw_event->inherit = 1; /* children inherit it */ 1.91 - hw_event->pinned = 1; /* must always be on PMU */ 1.92 - hw_event->exclusive = 0; /* only group on PMU */ 1.93 - hw_event->exclude_user = 0; /* don't count user */ 1.94 - hw_event->exclude_kernel = 1; /* ditto kernel */ 1.95 - hw_event->exclude_hv = 1; /* ditto hypervisor */ 1.96 - hw_event->exclude_idle = 1; /* don't count when idle */ 1.97 - hw_event->mmap = 0; /* include mmap data */ 1.98 - hw_event->comm = 0; /* include comm data */ 1.99 - 1.100 - 1.101 - for( i = 0; i < NUM_CORES; i++ ) 1.102 - { 1.103 - hw_event->config = PERF_COUNT_HW_CPU_CYCLES; //cycles 1.104 - cycles_counter_fd[i] = syscall(__NR_perf_event_open, hw_event, 1.105 - 0,//pid_t pid, 1.106 - i,//int cpu, 1.107 - -1,//int group_fd, 1.108 - 0//unsigned long flags 1.109 - ); 1.110 - if (cycles_counter_fd[i]<0){ 1.111 - fprintf(stderr,"On core %d: ",i); 1.112 - perror("Failed to open cycles counter"); 1.113 - } 1.114 - } 1.115 - 1.116 - //Count on all CPUs 1.117 - int cycles_counter_main_fd = syscall(__NR_perf_event_open, hw_event, 1.118 - 0,//pid_t pid, 1.119 - -1,//int cpu, 1.120 - -1,//int group_fd, 1.121 - 0//unsigned long flags 1.122 - ); 1.123 - if (cycles_counter_main_fd<0){ 1.124 - fprintf(stderr,"On core %d: ",i); 1.125 - perror("Failed to open cycles counter"); 1.126 - } 1.127 + 1.128 1.129 //prepare input 1.130 for(i=0; i<num_threads; i++) 1.131 { 1.132 input[i].barrier = &barr; 1.133 } 1.134 - 1.135 + 1.136 + printf("just before first counter read, inside benchmark\n"); 1.137 + 1.138 //save cycles before execution of threads to get longest runtime 1.139 measurement_t startBenchTime, endBenchTime; 1.140 int nread = read(cycles_counter_main_fd,&(startBenchTime.cycles), 1.141 @@ -262,7 +224,7 @@ 1.142 if(nread<0){ 1.143 perror("Error reading cycles counter"); 1.144 } 1.145 - 1.146 + printf("finished first counter read, inside benchmark\n"); 1.147 //create all threads 1.148 for(i=0; i<num_threads; i++) 1.149 { VPThread__create_thread((VirtProcrFnPtr)work, (void*)&input[i], animatingPr);} 1.150 @@ -270,7 +232,7 @@ 1.151 barrier_wait(&barr, animatingPr); 1.152 1.153 1.154 - //longest thread measurement 1.155 + //accumulated cycles of all cores 1.156 nread = read(cycles_counter_main_fd,&(endBenchTime.cycles), 1.157 sizeof(endBenchTime.cycles)); 1.158 if(nread<0){ 1.159 @@ -279,7 +241,7 @@ 1.160 1.161 uint64_t overallWorkCycles = 0; 1.162 for(i=0; i<num_threads; i++){ 1.163 - printf("WorkCycles: %d\n",input[i].totalWorkCycles); 1.164 + printf("WorkCycles: %lu\n",input[i].totalWorkCycles); 1.165 overallWorkCycles += input[i].totalWorkCycles; 1.166 } 1.167 1.168 @@ -349,9 +311,73 @@ 1.169 return EXIT_FAILURE; 1.170 } 1.171 }//for 1.172 - 1.173 - //This is the transition to the VMS runtime 1.174 - VPThread__create_seed_procr_and_do_work(benchmark, NULL); 1.175 + 1.176 + //setup performance counters 1.177 + hw_event = malloc(sizeof(struct perf_event_attr)); 1.178 + memset(hw_event,0,sizeof(struct perf_event_attr)); 1.179 + 1.180 + hw_event->type = PERF_TYPE_HARDWARE; 1.181 + hw_event->size = sizeof(hw_event); 1.182 + hw_event->disabled = 0; 1.183 + hw_event->freq = 0; 1.184 + hw_event->inherit = 1; /* children inherit it */ 1.185 + hw_event->pinned = 1; /* says this virt counter must always be on HW */ 1.186 + hw_event->exclusive = 0; /* only group on PMU */ 1.187 + hw_event->exclude_user = 0; /* don't count user */ 1.188 + hw_event->exclude_kernel = 1; /* don't count kernel */ 1.189 + hw_event->exclude_hv = 1; /* ditto hypervisor */ 1.190 + hw_event->exclude_idle = 1; /* don't count when idle */ 1.191 + hw_event->mmap = 0; /* include mmap data */ 1.192 + hw_event->comm = 0; /* include comm data */ 1.193 + 1.194 + hw_event->config = PERF_COUNT_HW_CPU_CYCLES; //cycles 1.195 + 1.196 + int cpuID, retries; 1.197 + 1.198 + for( cpuID = 0; cpuID < NUM_CORES; cpuID++ ) 1.199 + { retries = 0; 1.200 + do 1.201 + { retries += 1; 1.202 + cycles_counter_fd[cpuID] = 1.203 + syscall(__NR_perf_event_open, hw_event, 1.204 + 0,//pid_t: 0 is "pid of calling process" 1.205 + cpuID,//int: cpu, the value returned by "CPUID" instr(?) 1.206 + -1,//int: group_fd, -1 is "leader" or independent 1.207 + 0//unsigned long: flags 1.208 + ); 1.209 + } 1.210 + while(cycles_counter_fd[cpuID]<0 && retries < 100); 1.211 + if(retries >= 100) 1.212 + { 1.213 + fprintf(stderr,"On core %d: ",cpuID); 1.214 + perror("Failed to open cycles counter"); 1.215 + } 1.216 + } 1.217 + printf("counters now set up\n"); 1.218 + 1.219 + //Set up counter to accumulate total cycles to process, across all CPUs 1.220 + 1.221 + retries = 0; 1.222 + do 1.223 + { retries += 1; 1.224 + cycles_counter_main_fd = 1.225 + syscall(__NR_perf_event_open, hw_event, 1.226 + 0,//pid_t: 0 is "pid of calling process" 1.227 + -1,//int: cpu, -1 means accumulate from all cores 1.228 + -1,//int: group_fd, -1 is "leader" == independent 1.229 + 0//unsigned long: flags 1.230 + ); 1.231 + } 1.232 + while(cycles_counter_main_fd<0 && retries < 100); 1.233 + if(retries >= 100) 1.234 + { 1.235 + fprintf(stderr,"in main "); 1.236 + perror("Failed to open cycles counter"); 1.237 + } 1.238 + 1.239 + 1.240 + //This is the transition to the VMS runtime 1.241 + VPThread__create_seed_procr_and_do_work(benchmark, NULL); 1.242 1.243 return 0; 1.244 -} 1.245 + }
