Mercurial > cgi-bin > hgwebdir.cgi > VMS > 2__runs_and_data
changeset 15:db3409eab322
exec_time_vs_task_size: data generation script for xoanon, because of higher core count
author | Merten Sach <msach@mailbox.tu-berlin.de> |
---|---|
date | Mon, 06 Feb 2012 16:29:36 +0100 |
parents | 3716aef85ba7 |
children | eea8cf5846c7 |
files | scripts/overhead_2_tasks_per_outer_iter.py |
diffstat | 1 files changed, 147 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/scripts/overhead_2_tasks_per_outer_iter.py Mon Feb 06 16:29:36 2012 +0100 1.3 @@ -0,0 +1,147 @@ 1.4 +#! /usr/bin/env python 1.5 +# -*- coding: utf-8 -*- 1.6 + 1.7 +import sys 1.8 +from re import match, search 1.9 +from datetime import datetime 1.10 +from subprocess import call,Popen,PIPE 1.11 + 1.12 +""" 1.13 +This script generates a graph that represents the overhead 1.14 + 1.15 +involved in synchronisation operations 1.16 +""" 1.17 + 1.18 +usage=""" 1.19 + This runs the exec time vs task size in three levels of loop nest. The outer most iterates through 1.20 + a selection of numbers-of-thread. For each of those, the next lever iterates over a number of work-loops-per-task 1.21 + values. The innermost repeats several times and chooses the best. 1.22 + Finally, it generates an output file for each value of number-of-threads that a companion gluplot script turns 1.23 + into a .eps graph. 1.24 + It is expected that the output directory's path is meaningful, such as machine-name, date, and so on 1.25 + Usage: 1.26 + overhead.py [executable binary] [path to output dir] 1.27 +""" 1.28 + 1.29 +NUM_CORES = 4 #Number of Cores the code was compiled for 1.30 +ITERS_PER_TASK_TABLE = [2, 5, 10, 20, 40, 80, 160, 320, 640] #Number of iterations of inner loop 1.31 +ITERS_PER_OUTER_LOOP = 30000 #Number of interations of outer loop 1.32 +TOTAL_THREADS_TABLE = [8, 32, 128, 512] 1.33 + 1.34 +tasks_per_thread = ITERS_PER_OUTER_LOOP * 2 1.35 + 1.36 +def getNumber(line): 1.37 + match_obj = search("(\d+\.?\d*)", line) 1.38 + if match_obj != None: 1.39 + return match_obj.groups()[0] 1.40 + else: 1.41 + raise ValueError 1.42 + 1.43 +if len(sys.argv) != 3: 1.44 + print usage 1.45 + sys.exit(0) 1.46 + 1.47 +cmd=sys.argv[1] 1.48 +try: 1.49 + f = open(cmd) 1.50 +except IOError: 1.51 + print "Please provide a valid executable." 1.52 + f.close() 1.53 + sys.exit(1) 1.54 +finally: 1.55 + f.close() 1.56 + 1.57 +output_dir_path = sys.argv[2] 1.58 + 1.59 +#=================================================================== 1.60 +# Done with parsing cmd line inputs, start doing the runs 1.61 +# 1.62 + 1.63 +for totalThreads in TOTAL_THREADS_TABLE: 1.64 + print "\nDoing run with %d threads" % totalThreads 1.65 + output = "%s/%d_thds__o%d__perfCtrs.meas" % (output_dir_path, totalThreads, ITERS_PER_OUTER_LOOP) 1.66 + print "output file: %s" % output 1.67 + threadsPerCore = totalThreads/NUM_CORES 1.68 + array_of_results = [] 1.69 + for workload_iterations_in_task in ITERS_PER_TASK_TABLE: 1.70 + print "Run for %s workload iterations in a task" % workload_iterations_in_task 1.71 + results = [] 1.72 + for run in range(5): 1.73 + print "Run %d" % run, 1.74 + program_output = Popen("%s -t %d -i %d -o %d" % (cmd, 1.75 + totalThreads, 1.76 + workload_iterations_in_task, 1.77 + ITERS_PER_OUTER_LOOP), 1.78 + stdout=PIPE, stderr=None, shell=True).stdout.read() 1.79 + #parse arguments for 1.80 + for line in program_output.split("\n"): 1.81 + if match("^Sum across threads of work cycles:", line) != None: 1.82 + total_workcycles = int(getNumber(line)) 1.83 + if match("^Total Execution Cycles:", line) != None: 1.84 + total_exe_cycles = int(getNumber(line)) 1.85 + if match("^ExeCycles/WorkCycles Ratio", line) != None: 1.86 + exeCycles_workCycles_ratio = float(getNumber(line)) 1.87 + results.append({"total_workcycles" : total_workcycles, 1.88 + "total_exe_cycles" : total_exe_cycles, 1.89 + "exeCycles_workCycles_ratio" : exeCycles_workCycles_ratio}) 1.90 + print "ratio %f" % exeCycles_workCycles_ratio 1.91 + array_of_results.append(results) 1.92 + 1.93 + 1.94 + #open gnuplot output 1.95 + try: 1.96 + gnuplot_output = open(output,"w") 1.97 + except IOError: 1.98 + print "Cannot open output file %s" % output 1.99 + sys.exit(1) 1.100 + 1.101 + table_header = "# %20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\n" % ( 1.102 + "<iters per task>", 1.103 + "<total exe cycles>", 1.104 + "<total work cyc>", 1.105 + "<one task cyc>", 1.106 + "<total overhead cyc>", 1.107 + "<num syncs>", 1.108 + "<overhead per Sync cyc>", 1.109 + "<Exe/Work ratio>") 1.110 + 1.111 + #write header to file 1.112 + gnuplot_output.writelines(["# Output file name: %s\n" % output, 1.113 + "# Date of Run: %s\n" % str(datetime.now()), 1.114 + "# Number of Cores: %d\n" % NUM_CORES, 1.115 + "# Number of Threads: %f per Core, %d total\n" % (threadsPerCore, totalThreads), 1.116 + table_header, 1.117 + "# " + (len(table_header)-3)*"-" + "\n"]) 1.118 + 1.119 + #Now print the results out 1.120 + idx = -1 1.121 + for workload_iterations_in_task in ITERS_PER_TASK_TABLE: 1.122 + idx += 1 1.123 + results = array_of_results[idx] 1.124 + 1.125 + #take shortest run 1.126 + results.sort(lambda x,y: cmp(x["total_exe_cycles"],y["total_exe_cycles"])) 1.127 + total_workcycles = results[0]["total_workcycles"] 1.128 + total_exe_cycles = results[0]["total_exe_cycles"] 1.129 + #exeCycles_workCycles_ratio = results[0]["exeCycles_workCycles_ratio"] 1.130 + exeCycles_workCycles_ratio = float(total_exe_cycles)/float(total_workcycles) 1.131 + 1.132 + #Calculate numbers 1.133 + overhead = total_exe_cycles - total_workcycles 1.134 + total_syncs = totalThreads * tasks_per_thread 1.135 + overhead_per_sync = float(overhead) / float(total_syncs) 1.136 + cycles_of_task = float(total_workcycles) / float(tasks_per_thread * totalThreads) 1.137 + overhead_per_core = float(overhead) / NUM_CORES 1.138 + workcycles_per_core = total_workcycles / NUM_CORES 1.139 + 1.140 + gnuplot_output.write("%20d\t%20d\t%20d\t%20f\t%20d\t%20d\t%20f\t%20f\n" % ( 1.141 + workload_iterations_in_task, 1.142 + total_exe_cycles, 1.143 + total_workcycles, 1.144 + cycles_of_task, 1.145 + overhead, 1.146 + total_syncs, 1.147 + overhead_per_sync, 1.148 + exeCycles_workCycles_ratio)) 1.149 + 1.150 + gnuplot_output.close();