changeset 1:75c55af8338f

Overhead.py: Script to generate the results of the exec_vs_task_size benchmarks
author Merten Sach <msach@mailbox.tu-berlin.de>
date Fri, 09 Dec 2011 15:09:08 +0100
parents 21573f5b2e84
children c2e8c3b49545
files scripts/overhead.py
diffstat 1 files changed, 144 insertions(+), 0 deletions(-) [+]
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/scripts/overhead.py	Fri Dec 09 15:09:08 2011 +0100
     1.3 @@ -0,0 +1,144 @@
     1.4 +#! /usr/bin/env python
     1.5 +# -*- coding: utf-8 -*-
     1.6 +
     1.7 +import sys
     1.8 +from re import match, search
     1.9 +from datetime import datetime
    1.10 +from subprocess import call,Popen,PIPE
    1.11 +
    1.12 +"""
    1.13 +This script generates a graph that represents the overhead
    1.14 +
    1.15 +involved in synchronisation operations
    1.16 +"""
    1.17 +
    1.18 +usage="""
    1.19 +	This runs the exec time vs task size in three levels of loop nest.  The outer most iterates through 
    1.20 +	a selection of numbers-of-thread.  For each of those, the next lever iterates over a number of work-loops-per-task
    1.21 +	values.  The innermost repeats several times and chooses the best.
    1.22 +	Finally, it generates an output file for each value of number-of-threads that a companion gluplot script turns
    1.23 +	into a .eps graph.
    1.24 +	It is expected that the output directory's path is meaningful, such as machine-name, date, and so on
    1.25 +	Usage:
    1.26 +		overhead.py [executable binary] [path to output dir]
    1.27 +"""
    1.28 +
    1.29 +NUM_CORES = 4 #Number of Cores the code was compiled for
    1.30 +ITERS_PER_TASK_TABLE = [2, 5, 10, 20, 40, 80, 160, 320, 640] #Number of iterations of inner loop
    1.31 +TASKS_PER_THREAD = 30000 #Number of interations of outer loop 
    1.32 +TOTAL_THREADS_TABLE = [8, 32, 128, 512]
    1.33 +
    1.34 +def getNumber(line):
    1.35 +	match_obj = search("(\d+\.?\d*)", line)
    1.36 +	if match_obj != None:
    1.37 +		return match_obj.groups()[0]
    1.38 +	else:
    1.39 +		raise ValueError
    1.40 +
    1.41 +if len(sys.argv) != 3:
    1.42 +	print usage
    1.43 +	sys.exit(0)
    1.44 +    
    1.45 +cmd=sys.argv[1]
    1.46 +try:
    1.47 +	f = open(cmd)
    1.48 +except IOError:
    1.49 +	print "Please provide a valid executable."
    1.50 +	f.close()
    1.51 +	sys.exit(1)
    1.52 +finally:
    1.53 +	f.close()
    1.54 +
    1.55 +output_dir_path = sys.argv[2]
    1.56 +
    1.57 +#===================================================================
    1.58 +#  Done with parsing cmd line inputs, start doing the runs 
    1.59 +#
    1.60 +
    1.61 +for totalThreads in TOTAL_THREADS_TABLE:
    1.62 +	print "\nDoing run with %d threads" % totalThreads
    1.63 +	output = "%s/%d_thds__o%d__perfCtrs.meas" % (output_dir_path, totalThreads, TASKS_PER_THREAD)
    1.64 +	print "output file: %s" % output
    1.65 +	threadsPerCore = totalThreads/NUM_CORES
    1.66 +	array_of_results = []
    1.67 +	for workload_iterations_in_task in ITERS_PER_TASK_TABLE:
    1.68 +		print "Run for %s workload iterations in a task" % workload_iterations_in_task
    1.69 +		results = []
    1.70 +		for run in range(5):
    1.71 +			print "Run %d" % run,
    1.72 +			program_output = Popen("%s -t %d -i %d -o %d" % (cmd,
    1.73 +												totalThreads,
    1.74 +												workload_iterations_in_task,
    1.75 +												TASKS_PER_THREAD),
    1.76 +								stdout=PIPE, stderr=None, shell=True).stdout.read()
    1.77 +			#parse arguments for
    1.78 +			for line in program_output.split("\n"):
    1.79 +				if match("^Sum across threads of work cycles:", line) != None:
    1.80 +					total_workcycles = int(getNumber(line))
    1.81 +				if match("^Total Execution Cycles:", line) != None:
    1.82 +					total_exe_cycles = int(getNumber(line))
    1.83 +				if match("^ExeCycles/WorkCycles Ratio", line) != None:
    1.84 +					exeCycles_workCycles_ratio = float(getNumber(line))
    1.85 +			results.append({"total_workcycles"            : total_workcycles,
    1.86 +						"total_exe_cycles"            : total_exe_cycles,
    1.87 +						"exeCycles_workCycles_ratio" : exeCycles_workCycles_ratio})
    1.88 +			print "ratio %f" % exeCycles_workCycles_ratio
    1.89 +		array_of_results.append(results)
    1.90 +
    1.91 +
    1.92 +	#open gnuplot output
    1.93 +	try:
    1.94 +		gnuplot_output = open(output,"w")
    1.95 +	except IOError:
    1.96 +		print "Cannot open output file %s" % output
    1.97 +		sys.exit(1)
    1.98 +	
    1.99 +	table_header = "# %20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\n" % (
   1.100 +							 "<iters per task>",
   1.101 +							 "<total exe cycles>",
   1.102 +							 "<total work cyc>",
   1.103 +							 "<one task cyc>",
   1.104 +							 "<total overhead cyc>",
   1.105 +							 "<num syncs>",
   1.106 +							 "<overhead per Sync cyc>",
   1.107 +							 "<Exe/Work ratio>")
   1.108 +    
   1.109 +	#write header to file
   1.110 +	gnuplot_output.writelines(["# Output file name: %s\n" % output,
   1.111 +							"# Date of Run: %s\n" % str(datetime.now()),
   1.112 +							"# Number of Cores: %d\n" % NUM_CORES,
   1.113 +							"# Number of Threads: %f per Core, %d total\n" % (threadsPerCore, totalThreads),
   1.114 +							table_header,
   1.115 +							"# " + (len(table_header)-3)*"-" + "\n"])
   1.116 +
   1.117 +	#Now print the results out
   1.118 +	idx = -1		
   1.119 +	for workload_iterations_in_task in ITERS_PER_TASK_TABLE:
   1.120 +		idx += 1
   1.121 +		results = array_of_results[idx]
   1.122 +	
   1.123 +		#take shortest run
   1.124 +		results.sort(lambda x,y: cmp(x["total_exe_cycles"],y["total_exe_cycles"]))
   1.125 +		total_workcycles = results[0]["total_workcycles"]
   1.126 +		total_exe_cycles  = results[0]["total_exe_cycles"]
   1.127 +		exeCycles_workCycles_ratio = results[0]["exeCycles_workCycles_ratio"]
   1.128 +	
   1.129 +		#Calculate numbers
   1.130 +		overhead             = total_exe_cycles - total_workcycles
   1.131 +		total_syncs          = totalThreads * TASKS_PER_THREAD * 2
   1.132 +		overhead_per_sync    = float(overhead) / float(total_syncs)
   1.133 +		cycles_of_task       = float(total_workcycles) / float(TASKS_PER_THREAD * totalThreads)
   1.134 +		overhead_per_core    = float(overhead) / NUM_CORES
   1.135 +		workcycles_per_core  = total_workcycles / NUM_CORES
   1.136 +	
   1.137 +		gnuplot_output.write("%20d\t%20d\t%20d\t%20f\t%20d\t%20d\t%20f\t%20f\n" % (
   1.138 +						  workload_iterations_in_task,
   1.139 +						  total_exe_cycles,
   1.140 +						  total_workcycles,
   1.141 +						  cycles_of_task,
   1.142 +						  overhead,
   1.143 +						  total_syncs,
   1.144 +						  overhead_per_sync,
   1.145 +						  exeCycles_workCycles_ratio))
   1.146 +
   1.147 +	gnuplot_output.close();