Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > Vthread > Vthread__Best_Effort_Msg__Bench
changeset 8:b2a84bc2b274
working version of exe time vs task size
| author | kshalle |
|---|---|
| date | Mon, 28 Nov 2011 23:58:58 +0100 |
| parents | 28650a4df2b9 |
| children | 5d3b5e58456e |
| files | src/Application/main.c |
| diffstat | 1 files changed, 95 insertions(+), 44 deletions(-) [+] |
line diff
1.1 --- a/src/Application/main.c Mon Nov 21 21:39:03 2011 +0100 1.2 +++ b/src/Application/main.c Mon Nov 28 23:58:58 2011 +0100 1.3 @@ -61,6 +61,11 @@ 1.4 #endif 1.5 1.6 //======================== Defines ========================= 1.7 +typedef struct perfData measurement_t; 1.8 +struct perfData{ 1.9 + uint64 cycles; 1.10 + uint64 instructions; 1.11 +}; 1.12 1.13 const char *usage = { 1.14 "Usage: malloc_test [options]\n" 1.15 @@ -78,6 +83,8 @@ 1.16 int nthreads; 1.17 int32 mutex; 1.18 int32 cond; 1.19 + measurement_t endBarrierCycles; 1.20 + 1.21 }; 1.22 typedef struct barrier_t barrier; 1.23 1.24 @@ -89,13 +96,18 @@ 1.25 barr->cond = VPThread__make_cond(barr->mutex, animatingPr); 1.26 } 1.27 1.28 +int cycles_counter_main_fd; 1.29 void inline barrier_wait(barrier *barr, VirtProcr *animatingPr) 1.30 { int i; 1.31 1.32 VPThread__mutex_lock(barr->mutex, animatingPr); 1.33 barr->counter++; 1.34 if(barr->counter == barr->nthreads) 1.35 - { barr->counter = 0; 1.36 + { 1.37 + read(cycles_counter_main_fd, &(barr->endBarrierCycles.cycles), \ 1.38 + sizeof(barr->endBarrierCycles.cycles)); 1.39 + 1.40 + barr->counter = 0; 1.41 for(i=0; i < barr->nthreads; i++) 1.42 VPThread__cond_signal(barr->cond, animatingPr); 1.43 } 1.44 @@ -105,18 +117,20 @@ 1.45 VPThread__mutex_unlock(barr->mutex, animatingPr); 1.46 } 1.47 1.48 -struct perfData{ 1.49 - uint64 cycles; 1.50 - uint64 instructions; 1.51 + 1.52 + 1.53 +struct WorkerParamsStr{ 1.54 + struct barrier_t* barrier; 1.55 + uint64_t totalWorkCycles; 1.56 }; 1.57 1.58 -typedef struct perfData measurement_t; 1.59 +typedef struct WorkerParamsStr WorkerParams; 1.60 1.61 -struct input_t{ 1.62 - struct barrier_t* barrier; 1.63 - uint64 totalWorkCycles; 1.64 -}; 1.65 - 1.66 +typedef struct 1.67 + { measurement_t *startExeCycles; 1.68 + measurement_t *endExeCycles; 1.69 + } 1.70 +BenchParams; 1.71 1.72 //======================== Globals ========================= 1.73 char __ProgrammName[] = "overhead_test"; 1.74 @@ -126,9 +140,10 @@ 1.75 size_t chunk_size = 0; 1.76 1.77 int cycles_counter_fd[NUM_CORES]; 1.78 -int cycles_counter_main_fd; 1.79 struct perf_event_attr* hw_event; 1.80 1.81 +WorkerParams *workerParamsArray; 1.82 + 1.83 //======================== App Code ========================= 1.84 /* 1.85 * Workload 1.86 @@ -146,13 +161,13 @@ 1.87 } while (0) //macro magic for scoping 1.88 1.89 1.90 -void work(void* input, VirtProcr* animatingPr) 1.91 +void worker_TLF(void* _params, VirtProcr* animatingPr) 1.92 { 1.93 int i,o; 1.94 - struct input_t* in = (struct input_t*)input; 1.95 + WorkerParams* params = (struct WorkerParamsStr*)_params; 1.96 unsigned int totalWorkCycles = 0; 1.97 - unsigned int workspace1; 1.98 - double workspace2; 1.99 + unsigned int workspace1=0; 1.100 + double workspace2=0.0; 1.101 int32 privateMutex = VPThread__make_mutex(animatingPr); 1.102 1.103 int cpuid = sched_getcpu(); 1.104 @@ -185,12 +200,12 @@ 1.105 VPThread__mutex_unlock(privateMutex, animatingPr); 1.106 } 1.107 1.108 - in->totalWorkCycles = totalWorkCycles; 1.109 - printf("Cycles: %lu on CPU %lu\n", totalWorkCycles, cpuid); 1.110 + params->totalWorkCycles = totalWorkCycles; 1.111 + //printf("Cycles: %lu on CPU %lu\n", totalWorkCycles, cpuid); 1.112 1.113 1.114 //Wait for all threads to end 1.115 - barrier_wait(in->barrier, animatingPr); 1.116 + barrier_wait(params->barrier, animatingPr); 1.117 1.118 //Shutdown worker 1.119 VPThread__dissipate_thread(animatingPr); 1.120 @@ -199,46 +214,55 @@ 1.121 printf("%f", workspace2); //two workspace variables 1.122 } 1.123 1.124 + 1.125 /* this is run after the VMS is set up*/ 1.126 -void benchmark(void *in, VirtProcr *animatingPr) 1.127 +void benchmark(void *_params, VirtProcr *animatingPr) 1.128 { 1.129 int i, cpuID; 1.130 - struct input_t input[num_threads]; 1.131 - struct barrier_t barr; 1.132 + struct barrier_t barr; 1.133 + BenchParams *params; 1.134 + 1.135 + params = (BenchParams *)_params; 1.136 + 1.137 barrier_init(&barr, num_threads+1, animatingPr); 1.138 - 1.139 - 1.140 - 1.141 + 1.142 //prepare input 1.143 for(i=0; i<num_threads; i++) 1.144 { 1.145 - input[i].barrier = &barr; 1.146 + workerParamsArray[i].barrier = &barr; 1.147 } 1.148 1.149 - printf("just before first counter read, inside benchmark\n"); 1.150 + //printf("just before first counter read, inside benchmark\n"); 1.151 1.152 - //save cycles before execution of threads to get longest runtime 1.153 - measurement_t startBenchTime, endBenchTime; 1.154 - int nread = read(cycles_counter_main_fd,&(startBenchTime.cycles), 1.155 - sizeof(startBenchTime.cycles)); 1.156 - if(nread<0){ 1.157 - perror("Error reading cycles counter"); 1.158 - } 1.159 - printf("finished first counter read, inside benchmark\n"); 1.160 + //save cycles before execution of threads, to get total exe cycles 1.161 + measurement_t *startExeCycles, *endExeCycles; 1.162 + startExeCycles = params->startExeCycles; 1.163 + //endExeCycles = params->endExeCycles; 1.164 + 1.165 + //printf("finished first counter read, inside benchmark\n"); 1.166 //create all threads 1.167 for(i=0; i<num_threads; i++) 1.168 - { VPThread__create_thread((VirtProcrFnPtr)work, (void*)&input[i], animatingPr);} 1.169 + { VPThread__create_thread((VirtProcrFnPtr)worker_TLF, &(workerParamsArray[i]), animatingPr); 1.170 + } 1.171 + 1.172 + int nread = read(cycles_counter_main_fd, &(startExeCycles->cycles), 1.173 + sizeof(startExeCycles->cycles)); 1.174 + if(nread<0) 1.175 + { perror("Error reading cycles counter"); 1.176 + } 1.177 //wait for all threads to finish 1.178 barrier_wait(&barr, animatingPr); 1.179 1.180 1.181 //accumulated cycles of all cores 1.182 - nread = read(cycles_counter_main_fd,&(endBenchTime.cycles), 1.183 - sizeof(endBenchTime.cycles)); 1.184 - if(nread<0){ 1.185 - perror("Error reading cycles counter"); 1.186 - } 1.187 + // nread = read(cycles_counter_main_fd, &(endExeCycles->cycles), \ 1.188 + sizeof(endExeCycles->cycles)); 1.189 + // if(nread<0){ 1.190 + // perror("Error reading cycles counter"); 1.191 + params->endExeCycles->cycles = barr.endBarrierCycles.cycles; 1.192 + 1.193 1.194 +/* 1.195 uint64_t overallWorkCycles = 0; 1.196 for(i=0; i<num_threads; i++){ 1.197 printf("WorkCycles: %lu\n",input[i].totalWorkCycles); 1.198 @@ -247,6 +271,9 @@ 1.199 1.200 printf("Sum across threads of work cycles: %lu\n", overallWorkCycles); 1.201 printf("Total Execution: %lu\n", endBenchTime.cycles-startBenchTime.cycles); 1.202 + printf("Runtime/Workcycle Ratio %lu\n", 1.203 + ((endBenchTime.cycles-startBenchTime.cycles)*100)/overallWorkCycles); 1.204 +*/ 1.205 1.206 //====================================================== 1.207 1.208 @@ -256,6 +283,10 @@ 1.209 int main(int argc, char **argv) 1.210 { 1.211 int i; 1.212 + measurement_t startExeCycles, endExeCycles; 1.213 + BenchParams *benchParams; 1.214 + 1.215 + benchParams = malloc(sizeof(BenchParams)); 1.216 1.217 //set global static variables, based on cmd-line args 1.218 for(i=1; i<argc; i++) 1.219 @@ -267,7 +298,7 @@ 1.220 case 't': 1.221 if(!isdigit(argv[++i][0])) 1.222 { 1.223 - fprintf(stderr, "-t mus be followed by the number of worker threads to spawn\n"); 1.224 + fprintf(stderr, "-t must be followed by the number of worker threads to spawn\n"); 1.225 return EXIT_FAILURE; 1.226 } 1.227 num_threads = atoi(argv[i]); 1.228 @@ -312,6 +343,7 @@ 1.229 } 1.230 }//for 1.231 1.232 + 1.233 //setup performance counters 1.234 hw_event = malloc(sizeof(struct perf_event_attr)); 1.235 memset(hw_event,0,sizeof(struct perf_event_attr)); 1.236 @@ -353,7 +385,6 @@ 1.237 perror("Failed to open cycles counter"); 1.238 } 1.239 } 1.240 - printf("counters now set up\n"); 1.241 1.242 //Set up counter to accumulate total cycles to process, across all CPUs 1.243 1.244 @@ -374,10 +405,30 @@ 1.245 fprintf(stderr,"in main "); 1.246 perror("Failed to open cycles counter"); 1.247 } 1.248 + 1.249 + //printf("counters now set up\n"); 1.250 + workerParamsArray = (WorkerParams *)malloc( (num_threads + 1) * sizeof(WorkerParams) ); 1.251 + if(workerParamsArray == NULL ) printf("error mallocing worker params array\n"); 1.252 + 1.253 + workerParamsArray[0].totalWorkCycles = 0; 1.254 + 1.255 + benchParams->startExeCycles = &startExeCycles; 1.256 + benchParams->endExeCycles = &endExeCycles; 1.257 1.258 + //This is the transition to the VMS runtime 1.259 + VPThread__create_seed_procr_and_do_work( &benchmark, benchParams ); 1.260 + 1.261 + uint64_t totalWorkCyclesAcrossCores = 0; 1.262 + for(i=0; i<num_threads; i++){ 1.263 + printf("WorkCycles: %lu\n",workerParamsArray[i].totalWorkCycles); 1.264 + totalWorkCyclesAcrossCores += workerParamsArray[i].totalWorkCycles; 1.265 + } 1.266 1.267 - //This is the transition to the VMS runtime 1.268 - VPThread__create_seed_procr_and_do_work(benchmark, NULL); 1.269 + uint64_t totalExeCycles = endExeCycles.cycles - startExeCycles.cycles; 1.270 + printf("Sum across threads of work cycles: %lu\n", totalWorkCyclesAcrossCores); 1.271 + printf("Total Execution Cycles: %lu\n", totalExeCycles); 1.272 + printf("ExeCycles/WorkCycles Ratio %f\n", 1.273 + (double)totalExeCycles / (double)totalWorkCyclesAcrossCores); 1.274 1.275 return 0; 1.276 }
