Mercurial > cgi-bin > hgwebdir.cgi > VMS > 2__runs_and_data
changeset 1:75c55af8338f
Overhead.py: Script to generate the results of the exec_vs_task_size benchmarks
author | Merten Sach <msach@mailbox.tu-berlin.de> |
---|---|
date | Fri, 09 Dec 2011 15:09:08 +0100 |
parents | 21573f5b2e84 |
children | c2e8c3b49545 |
files | scripts/overhead.py |
diffstat | 1 files changed, 144 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/scripts/overhead.py Fri Dec 09 15:09:08 2011 +0100 1.3 @@ -0,0 +1,144 @@ 1.4 +#! /usr/bin/env python 1.5 +# -*- coding: utf-8 -*- 1.6 + 1.7 +import sys 1.8 +from re import match, search 1.9 +from datetime import datetime 1.10 +from subprocess import call,Popen,PIPE 1.11 + 1.12 +""" 1.13 +This script generates a graph that represents the overhead 1.14 + 1.15 +involved in synchronisation operations 1.16 +""" 1.17 + 1.18 +usage=""" 1.19 + This runs the exec time vs task size in three levels of loop nest. The outer most iterates through 1.20 + a selection of numbers-of-thread. For each of those, the next lever iterates over a number of work-loops-per-task 1.21 + values. The innermost repeats several times and chooses the best. 1.22 + Finally, it generates an output file for each value of number-of-threads that a companion gluplot script turns 1.23 + into a .eps graph. 1.24 + It is expected that the output directory's path is meaningful, such as machine-name, date, and so on 1.25 + Usage: 1.26 + overhead.py [executable binary] [path to output dir] 1.27 +""" 1.28 + 1.29 +NUM_CORES = 4 #Number of Cores the code was compiled for 1.30 +ITERS_PER_TASK_TABLE = [2, 5, 10, 20, 40, 80, 160, 320, 640] #Number of iterations of inner loop 1.31 +TASKS_PER_THREAD = 30000 #Number of interations of outer loop 1.32 +TOTAL_THREADS_TABLE = [8, 32, 128, 512] 1.33 + 1.34 +def getNumber(line): 1.35 + match_obj = search("(\d+\.?\d*)", line) 1.36 + if match_obj != None: 1.37 + return match_obj.groups()[0] 1.38 + else: 1.39 + raise ValueError 1.40 + 1.41 +if len(sys.argv) != 3: 1.42 + print usage 1.43 + sys.exit(0) 1.44 + 1.45 +cmd=sys.argv[1] 1.46 +try: 1.47 + f = open(cmd) 1.48 +except IOError: 1.49 + print "Please provide a valid executable." 1.50 + f.close() 1.51 + sys.exit(1) 1.52 +finally: 1.53 + f.close() 1.54 + 1.55 +output_dir_path = sys.argv[2] 1.56 + 1.57 +#=================================================================== 1.58 +# Done with parsing cmd line inputs, start doing the runs 1.59 +# 1.60 + 1.61 +for totalThreads in TOTAL_THREADS_TABLE: 1.62 + print "\nDoing run with %d threads" % totalThreads 1.63 + output = "%s/%d_thds__o%d__perfCtrs.meas" % (output_dir_path, totalThreads, TASKS_PER_THREAD) 1.64 + print "output file: %s" % output 1.65 + threadsPerCore = totalThreads/NUM_CORES 1.66 + array_of_results = [] 1.67 + for workload_iterations_in_task in ITERS_PER_TASK_TABLE: 1.68 + print "Run for %s workload iterations in a task" % workload_iterations_in_task 1.69 + results = [] 1.70 + for run in range(5): 1.71 + print "Run %d" % run, 1.72 + program_output = Popen("%s -t %d -i %d -o %d" % (cmd, 1.73 + totalThreads, 1.74 + workload_iterations_in_task, 1.75 + TASKS_PER_THREAD), 1.76 + stdout=PIPE, stderr=None, shell=True).stdout.read() 1.77 + #parse arguments for 1.78 + for line in program_output.split("\n"): 1.79 + if match("^Sum across threads of work cycles:", line) != None: 1.80 + total_workcycles = int(getNumber(line)) 1.81 + if match("^Total Execution Cycles:", line) != None: 1.82 + total_exe_cycles = int(getNumber(line)) 1.83 + if match("^ExeCycles/WorkCycles Ratio", line) != None: 1.84 + exeCycles_workCycles_ratio = float(getNumber(line)) 1.85 + results.append({"total_workcycles" : total_workcycles, 1.86 + "total_exe_cycles" : total_exe_cycles, 1.87 + "exeCycles_workCycles_ratio" : exeCycles_workCycles_ratio}) 1.88 + print "ratio %f" % exeCycles_workCycles_ratio 1.89 + array_of_results.append(results) 1.90 + 1.91 + 1.92 + #open gnuplot output 1.93 + try: 1.94 + gnuplot_output = open(output,"w") 1.95 + except IOError: 1.96 + print "Cannot open output file %s" % output 1.97 + sys.exit(1) 1.98 + 1.99 + table_header = "# %20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\n" % ( 1.100 + "<iters per task>", 1.101 + "<total exe cycles>", 1.102 + "<total work cyc>", 1.103 + "<one task cyc>", 1.104 + "<total overhead cyc>", 1.105 + "<num syncs>", 1.106 + "<overhead per Sync cyc>", 1.107 + "<Exe/Work ratio>") 1.108 + 1.109 + #write header to file 1.110 + gnuplot_output.writelines(["# Output file name: %s\n" % output, 1.111 + "# Date of Run: %s\n" % str(datetime.now()), 1.112 + "# Number of Cores: %d\n" % NUM_CORES, 1.113 + "# Number of Threads: %f per Core, %d total\n" % (threadsPerCore, totalThreads), 1.114 + table_header, 1.115 + "# " + (len(table_header)-3)*"-" + "\n"]) 1.116 + 1.117 + #Now print the results out 1.118 + idx = -1 1.119 + for workload_iterations_in_task in ITERS_PER_TASK_TABLE: 1.120 + idx += 1 1.121 + results = array_of_results[idx] 1.122 + 1.123 + #take shortest run 1.124 + results.sort(lambda x,y: cmp(x["total_exe_cycles"],y["total_exe_cycles"])) 1.125 + total_workcycles = results[0]["total_workcycles"] 1.126 + total_exe_cycles = results[0]["total_exe_cycles"] 1.127 + exeCycles_workCycles_ratio = results[0]["exeCycles_workCycles_ratio"] 1.128 + 1.129 + #Calculate numbers 1.130 + overhead = total_exe_cycles - total_workcycles 1.131 + total_syncs = totalThreads * TASKS_PER_THREAD * 2 1.132 + overhead_per_sync = float(overhead) / float(total_syncs) 1.133 + cycles_of_task = float(total_workcycles) / float(TASKS_PER_THREAD * totalThreads) 1.134 + overhead_per_core = float(overhead) / NUM_CORES 1.135 + workcycles_per_core = total_workcycles / NUM_CORES 1.136 + 1.137 + gnuplot_output.write("%20d\t%20d\t%20d\t%20f\t%20d\t%20d\t%20f\t%20f\n" % ( 1.138 + workload_iterations_in_task, 1.139 + total_exe_cycles, 1.140 + total_workcycles, 1.141 + cycles_of_task, 1.142 + overhead, 1.143 + total_syncs, 1.144 + overhead_per_sync, 1.145 + exeCycles_workCycles_ratio)) 1.146 + 1.147 + gnuplot_output.close();