changeset 5:535c119ba090

rearranged code to fit project patterns
author Me@portablequad
date Fri, 28 Oct 2011 06:56:35 -0700
parents e512fcf3748e
children c8995a602b46
files .hgignore Makefile README.txt c-ray-mt.c scene sphfract src/Application/main.c
diffstat 7 files changed, 77 insertions(+), 1016 deletions(-) [+]
line diff
     1.1 --- a/.hgignore	Sat Oct 22 19:27:29 2011 -0700
     1.2 +++ b/.hgignore	Fri Oct 28 06:56:35 2011 -0700
     1.3 @@ -2,6 +2,8 @@
     1.4  
     1.5  histograms
     1.6  nbproject
     1.7 +build
     1.8 +dist
     1.9  c-ray-mt
    1.10  *.ppm
    1.11  *.o
     2.1 --- a/Makefile	Sat Oct 22 19:27:29 2011 -0700
     2.2 +++ b/Makefile	Fri Oct 28 06:56:35 2011 -0700
     2.3 @@ -1,31 +1,33 @@
     2.4  obj = 	\
     2.5 -	VPThread_lib/VMS/Histogram/Histogram.o \
     2.6 -	VPThread_lib/VMS/Histogram/FloatHist.o \
     2.7 -	VPThread_lib/VMS/CoreLoop.o \
     2.8 -	VPThread_lib/VMS/VMS.o \
     2.9 -	VPThread_lib/VMS/MasterLoop.o \
    2.10 -	VPThread_lib/VMS/Queue_impl/PrivateQueue.o \
    2.11 -	VPThread_lib/VMS/Hash_impl/PrivateHash.o \
    2.12 -	VPThread_lib/VMS/DynArray/DynArray.o \
    2.13 -	VPThread_lib/VPThread_PluginFns.o \
    2.14 -	VPThread_lib/VPThread_lib.o \
    2.15 -	VPThread_lib/VMS/Histogram/DblHist.o \
    2.16 -	VPThread_lib/VPThread.o \
    2.17 -	VPThread_lib/VMS/probes.o \
    2.18 -	VPThread_lib/VMS/ProcrContext.o \
    2.19 -	VPThread_lib/VPThread_Request_Handlers.o \
    2.20 -	VPThread_lib/VPThread_helper.o \
    2.21 -	VPThread_lib/VMS/Hash_impl/MurmurHash2.o \
    2.22 -	VPThread_lib/VMS/vmalloc.o \
    2.23 -	VPThread_lib/VMS/contextSwitch.o \
    2.24 -	VPThread_lib/VMS/Queue_impl/BlockingQueue.o \
    2.25 -	VPThread_lib/VMS/vutilities.o \
    2.26 -	c-ray-mt.o
    2.27 +	src/VPThread_lib/VMS/Histogram/Histogram.o \
    2.28 +	src/VPThread_lib/VMS/Histogram/FloatHist.o \
    2.29 +	src/VPThread_lib/VMS/CoreLoop.o \
    2.30 +	src/VPThread_lib/VMS/VMS.o \
    2.31 +	src/VPThread_lib/VMS/MasterLoop.o \
    2.32 +	src/VPThread_lib/VMS/Queue_impl/PrivateQueue.o \
    2.33 +	src/VPThread_lib/VMS/Hash_impl/PrivateHash.o \
    2.34 +	src/VPThread_lib/VMS/DynArray/DynArray.o \
    2.35 +	src/VPThread_lib/VPThread_PluginFns.o \
    2.36 +	src/VPThread_lib/VPThread_lib.o \
    2.37 +	src/VPThread_lib/VMS/Histogram/DblHist.o \
    2.38 +	src/VPThread_lib/VPThread.o \
    2.39 +	src/VPThread_lib/VMS/probes.o \
    2.40 +	src/VPThread_lib/VMS/ProcrContext.o \
    2.41 +	src/VPThread_lib/VPThread_Request_Handlers.o \
    2.42 +	src/VPThread_lib/VPThread_helper.o \
    2.43 +	src/VPThread_lib/VMS/Hash_impl/MurmurHash2.o \
    2.44 +	src/VPThread_lib/VMS/vmalloc.o \
    2.45 +	src/VPThread_lib/VMS/contextSwitch.o \
    2.46 +	src/VPThread_lib/VMS/Queue_impl/BlockingQueue.o \
    2.47 +	src/VPThread_lib/VMS/vutilities.o \
    2.48 +	src/Application/main.o
    2.49  
    2.50 -bin = c-ray-mt
    2.51 +bin = task_size_vs_exe_time
    2.52 +
    2.53 +NUM_CORES=4
    2.54  
    2.55  CC = gcc
    2.56 -CFLAGS = -m64 -ffast-math -fwrapv -fno-omit-frame-pointer -O3 -D VPTHREAD -D APPLICATION=C-RAY -g -Wall
    2.57 +CFLAGS = -m64 -ffast-math -fwrapv -fno-omit-frame-pointer -O3 -D VPTHREAD -D APPLICATION=C-RAY -D NUM_CORES=$(NUM_CORES) -g -Wall
    2.58  
    2.59  $(bin): $(obj)
    2.60  	$(CC) -o $@ $(obj) -lm -lpthread
     3.1 --- a/README.txt	Sat Oct 22 19:27:29 2011 -0700
     3.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.3 @@ -1,27 +0,0 @@
     3.4 -Kernel: Ray Tracing
     3.5 -
     3.6 -This is a kernel-type benchmark of a very simple and brute-force ray tracer.
     3.7 -
     3.8 -Installation:
     3.9 -
    3.10 -To install the kernel benchmark, navigate to the directory this file is located in, open up a terminal and simply type 'make'. For certain architectures 
    3.11 -or special compilation options, you might need to change compilation parameters in the makefile.
    3.12 -
    3.13 -Usage:
    3.14 -
    3.15 -You may execute the benchmark by navigating to this directory after compilation and typing
    3.16 -
    3.17 -./c-ray-mt -i FILENAME -s RESOLUTION -o OUTPUT.ppm 
    3.18 -
    3.19 -'FILENAME' has to be either "scene" or "sphfract" or another predefined scene description file if there is one.
    3.20 -'RESOLUTION' specifies the resolution of the produced image and has to be given in the form 1920x1200, for example.
    3.21 -'OUTPUT' is the name of the file the rendered image will be contained in after the benchmark ran.
    3.22 -
    3.23 -The specification of how many threads are used to perform the rendering depends on the parallel programming model.
    3.24 -
    3.25 -Benchmark Versions:
    3.26 -
    3.27 -Serial
    3.28 -POSIX Threads
    3.29 -OpenMP SuperScalar
    3.30 -
     4.1 --- a/c-ray-mt.c	Sat Oct 22 19:27:29 2011 -0700
     4.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.3 @@ -1,718 +0,0 @@
     4.4 -/* c-ray-mt - a simple multithreaded raytracing filter.
     4.5 - * Copyright (C) 2006 John Tsiombikas <nuclear@siggraph.org>
     4.6 - *
     4.7 - * You are free to use, modify and redistribute this program under the
     4.8 - * terms of the GNU General Public License v2 or (at your option) later.
     4.9 - * see "http://www.gnu.org/licenses/gpl.txt" for details.
    4.10 - * ---------------------------------------------------------------------
    4.11 - * Usage:
    4.12 - *   compile:  just type make
    4.13 - *              (add any arch-specific optimizations for your compiler in CFLAGS first)
    4.14 - *       run:  cat scene | ./c-ray-mt [-t num-threads] >foo.ppm
    4.15 - *              (on broken systems such as windows try: c-ray-mt -i scene -o foo.ppm)
    4.16 - *     enjoy:  display foo.ppm
    4.17 - *              (with imagemagick, or use your favorite image viewer)
    4.18 - * ---------------------------------------------------------------------
    4.19 - * Scene file format:
    4.20 - *   # sphere (many)
    4.21 - *   s  x y z  rad   r g b   shininess   reflectivity
    4.22 - *   # light (many)
    4.23 - *   l  x y z
    4.24 - *   # camera (one)
    4.25 - *   c  x y z  fov   tx ty tz
    4.26 - * ---------------------------------------------------------------------
    4.27 - */
    4.28 -#include <stdio.h>
    4.29 -#include <stdlib.h>
    4.30 -#include <string.h>
    4.31 -#include <math.h>
    4.32 -#include <ctype.h>
    4.33 -#include <errno.h>
    4.34 -#include <pthread.h>
    4.35 -#include "VPThread_lib/VPThread.h"
    4.36 -
    4.37 -#define VER_MAJOR	1
    4.38 -#define VER_MINOR	1
    4.39 -#define VER_STR		"c-ray-mt v%d.%d\n"
    4.40 -
    4.41 -#if !defined(unix) && !defined(__unix__)
    4.42 -#ifdef __MACH__
    4.43 -#define unix		1
    4.44 -#define __unix__	1
    4.45 -#endif	/* __MACH__ */
    4.46 -#endif	/* unix */
    4.47 -
    4.48 -/* find the appropriate way to define explicitly sized types */
    4.49 -/* for C99 or GNU libc (also mach's libc) we can use stdint.h */
    4.50 -#if (__STDC_VERSION__ >= 199900) || defined(__GLIBC__) || defined(__MACH__)
    4.51 -#include <stdint.h>
    4.52 -#elif defined(unix) || defined(__unix__)	/* some UNIX systems have them in sys/types.h */
    4.53 -#include <sys/types.h>
    4.54 -#elif defined(__WIN32__) || defined(WIN32)	/* the nameless one */
    4.55 -typedef unsigned __int8 uint8_t;
    4.56 -typedef unsigned __int32 uint32_t;
    4.57 -#endif	/* sized type detection */
    4.58 -
    4.59 -struct vec3 {
    4.60 -	double x, y, z;
    4.61 -};
    4.62 -
    4.63 -struct ray {
    4.64 -	struct vec3 orig, dir;
    4.65 -};
    4.66 -
    4.67 -struct material {
    4.68 -	struct vec3 col;	/* color */
    4.69 -	double spow;		/* specular power */
    4.70 -	double refl;		/* reflection intensity */
    4.71 -};
    4.72 -
    4.73 -struct sphere {
    4.74 -	struct vec3 pos;
    4.75 -	double rad;
    4.76 -	struct material mat;
    4.77 -	struct sphere *next;
    4.78 -};
    4.79 -
    4.80 -struct spoint {
    4.81 -	struct vec3 pos, normal, vref;	/* position, normal and view reflection */
    4.82 -	double dist;		/* parametric distance of intersection along the ray */
    4.83 -};
    4.84 -
    4.85 -struct camera {
    4.86 -	struct vec3 pos, targ;
    4.87 -	double fov;
    4.88 -};
    4.89 -
    4.90 -struct thread_data {
    4.91 -	VirtProcr *VP;
    4.92 -	int sl_start, sl_count;
    4.93 -	uint32_t *pixels;
    4.94 -};
    4.95 -typedef struct thread_data thread_data;
    4.96 -
    4.97 -void render_scanline(int xsz, int ysz, int sl, uint32_t *fb, int samples);
    4.98 -struct vec3 trace(struct ray ray, int depth);
    4.99 -struct vec3 shade(struct sphere *obj, struct spoint *sp, int depth);
   4.100 -struct vec3 reflect(struct vec3 v, struct vec3 n);
   4.101 -struct vec3 cross_product(struct vec3 v1, struct vec3 v2);
   4.102 -struct ray get_primary_ray(int x, int y, int sample);
   4.103 -struct vec3 get_sample_pos(int x, int y, int sample);
   4.104 -struct vec3 jitter(int x, int y, int s);
   4.105 -int ray_sphere(const struct sphere *sph, struct ray ray, struct spoint *sp);
   4.106 -void load_scene(FILE *fp);
   4.107 -unsigned long get_msec(void);
   4.108 -
   4.109 -void thread_func(void *tdata, VirtProcr *VProc);
   4.110 -
   4.111 -#define MAX_LIGHTS		16				/* maximum number of lights */
   4.112 -#define RAY_MAG			1000.0			/* trace rays of this magnitude */
   4.113 -#define MAX_RAY_DEPTH	5				/* raytrace recursion limit */
   4.114 -#define FOV				0.78539816		/* field of view in rads (pi/4) */
   4.115 -#define HALF_FOV		(FOV * 0.5)
   4.116 -#define ERR_MARGIN		1e-6			/* an arbitrary error margin to avoid surface acne */
   4.117 -
   4.118 -/* bit-shift ammount for packing each color into a 32bit uint */
   4.119 -#ifdef LITTLE_ENDIAN
   4.120 -#define RSHIFT	16
   4.121 -#define BSHIFT	0
   4.122 -#else	/* big endian */
   4.123 -#define RSHIFT	0
   4.124 -#define BSHIFT	16
   4.125 -#endif	/* endianess */
   4.126 -#define GSHIFT	8	/* this is the same in both byte orders */
   4.127 -
   4.128 -/* some helpful macros... */
   4.129 -#define SQ(x)		((x) * (x))
   4.130 -#define MAX(a, b)	((a) > (b) ? (a) : (b))
   4.131 -#define MIN(a, b)	((a) < (b) ? (a) : (b))
   4.132 -#define DOT(a, b)	((a).x * (b).x + (a).y * (b).y + (a).z * (b).z)
   4.133 -#define NORMALIZE(a)  do {\
   4.134 -	double len = sqrt(DOT(a, a));\
   4.135 -	(a).x /= len; (a).y /= len; (a).z /= len;\
   4.136 -} while(0);
   4.137 -
   4.138 -/* global state */
   4.139 -int xres = 800;
   4.140 -int yres = 600;
   4.141 -int rays_per_pixel = 1;
   4.142 -double aspect = 1.333333;
   4.143 -struct sphere *obj_list;
   4.144 -struct vec3 lights[MAX_LIGHTS];
   4.145 -int lnum = 0;
   4.146 -struct camera cam;
   4.147 -
   4.148 -int thread_num = 1;
   4.149 -struct thread_data *threads;
   4.150 -
   4.151 -volatile int end = 0;
   4.152 -volatile int start = 0;
   4.153 -int32 end_mutex, end_cond;
   4.154 -int32 start_cond, start_mutex;
   4.155 -
   4.156 -#define NRAN	1024
   4.157 -#define MASK	(NRAN - 1)
   4.158 -struct vec3 urand[NRAN];
   4.159 -int irand[NRAN];
   4.160 -
   4.161 -unsigned long rend_time, start_time;
   4.162 -
   4.163 -const char *usage = {
   4.164 -	"Usage: c-ray-mt [options]\n"
   4.165 -	"  Reads a scene file from stdin, writes the image to stdout, and stats to stderr.\n\n"
   4.166 -	"Options:\n"
   4.167 -	"  -t <num>   how many threads to use (default: 1)\n"
   4.168 -	"  -s WxH     where W is the width and H the height of the image\n"
   4.169 -	"  -r <rays>  shoot <rays> rays per pixel (antialiasing)\n"
   4.170 -	"  -i <file>  read from <file> instead of stdin\n"
   4.171 -	"  -o <file>  write to <file> instead of stdout\n"
   4.172 -	"  -h         this help screen\n\n"
   4.173 -};
   4.174 -
   4.175 -char __ProgrammName[] = "c-ray";
   4.176 -char __DataSet[255];
   4.177 -
   4.178 -
   4.179 -void raytrace(void *pixels, VirtProcr *Vprocr);
   4.180 -
   4.181 -int main(int argc, char **argv) {
   4.182 -	int i;
   4.183 -	uint32_t *pixels;
   4.184 -	FILE *infile = stdin, *outfile = stdout;
   4.185 -
   4.186 -	for(i=1; i<argc; i++) {
   4.187 -		if(argv[i][0] == '-' && argv[i][2] == 0) {
   4.188 -			char *sep;
   4.189 -			switch(argv[i][1]) {
   4.190 -			case 't':
   4.191 -				if(!isdigit(argv[++i][0])) {
   4.192 -					fprintf(stderr, "-t mus be followed by the number of worker threads to spawn\n");
   4.193 -					return EXIT_FAILURE;
   4.194 -				}
   4.195 -				thread_num = atoi(argv[i]);
   4.196 -				if(!thread_num) {
   4.197 -					fprintf(stderr, "invalid number of threads specified: %d\n", thread_num);
   4.198 -					return EXIT_FAILURE;
   4.199 -				}
   4.200 -				break;
   4.201 -					
   4.202 -			case 's':
   4.203 -				if(!isdigit(argv[++i][0]) || !(sep = strchr(argv[i], 'x')) || !isdigit(*(sep + 1))) {
   4.204 -					fputs("-s must be followed by something like \"640x480\"\n", stderr);
   4.205 -					return EXIT_FAILURE;
   4.206 -				}
   4.207 -				xres = atoi(argv[i]);
   4.208 -				yres = atoi(sep + 1);
   4.209 -				aspect = (double)xres / (double)yres;
   4.210 -				break;
   4.211 -
   4.212 -			case 'i':
   4.213 -				if(!(infile = fopen(argv[++i], "rb"))) {
   4.214 -					fprintf(stderr, "failed to open input file %s: %s\n", argv[i], strerror(errno));
   4.215 -					return EXIT_FAILURE;
   4.216 -				}
   4.217 -				break;
   4.218 -
   4.219 -			case 'o':
   4.220 -				if(!(outfile = fopen(argv[++i], "wb"))) {
   4.221 -					fprintf(stderr, "failed to open output file %s: %s\n", argv[i], strerror(errno));
   4.222 -					return EXIT_FAILURE;
   4.223 -				}
   4.224 -				break;
   4.225 -
   4.226 -			case 'r':
   4.227 -				if(!isdigit(argv[++i][0])) {
   4.228 -					fputs("-r must be followed by a number (rays per pixel)\n", stderr);
   4.229 -					return EXIT_FAILURE;
   4.230 -				}
   4.231 -				rays_per_pixel = atoi(argv[i]);
   4.232 -				break;
   4.233 -
   4.234 -			case 'h':
   4.235 -				fputs(usage, stdout);
   4.236 -				return 0;
   4.237 -				
   4.238 -			default:
   4.239 -				fprintf(stderr, "unrecognized argument: %s\n", argv[i]);
   4.240 -				fputs(usage, stderr);
   4.241 -				return EXIT_FAILURE;
   4.242 -			}
   4.243 -		} else {
   4.244 -			fprintf(stderr, "unrecognized argument: %s\n", argv[i]);
   4.245 -			fputs(usage, stderr);
   4.246 -			return EXIT_FAILURE;
   4.247 -		}
   4.248 -	}
   4.249 -        
   4.250 -        snprintf(__DataSet,255,"file: %s\nsize: %dx%d\nrays per pixel: %d\nthreads: %d\n",
   4.251 -                        infile, xres, yres, rays_per_pixel, thread_num);
   4.252 -
   4.253 -        
   4.254 -        if(!(pixels = malloc(xres * yres * sizeof *pixels))) {
   4.255 -		perror("pixel buffer allocation failed");
   4.256 -		return EXIT_FAILURE;
   4.257 -	}
   4.258 -	load_scene(infile);
   4.259 -        
   4.260 -        //This is the transition to the VMS runtime
   4.261 -        VPThread__create_seed_procr_and_do_work(raytrace, (void*)pixels);
   4.262 -	
   4.263 -	/* output statistics to stderr */
   4.264 -	fprintf(stderr, "Rendering took: %lu seconds (%lu milliseconds)\n", rend_time / 1000, rend_time);
   4.265 -
   4.266 -	/* output the image */
   4.267 -	fprintf(outfile, "P6\n%d %d\n255\n", xres, yres);
   4.268 -	for(i=0; i<xres * yres; i++) {
   4.269 -		fputc((pixels[i] >> RSHIFT) & 0xff, outfile);
   4.270 -		fputc((pixels[i] >> GSHIFT) & 0xff, outfile);
   4.271 -		fputc((pixels[i] >> BSHIFT) & 0xff, outfile);
   4.272 -	}
   4.273 -	fflush(outfile);
   4.274 -
   4.275 -	if(infile != stdin) fclose(infile);
   4.276 -	if(outfile != stdout) fclose(outfile);
   4.277 -
   4.278 -	struct sphere *walker = obj_list;
   4.279 -	while(walker) {
   4.280 -		struct sphere *tmp = walker;
   4.281 -		walker = walker->next;
   4.282 -		free(tmp);
   4.283 -	}
   4.284 -	free(pixels);
   4.285 -	return 0;
   4.286 -}
   4.287 -
   4.288 -/* this is run after the VMS is set up*/
   4.289 -void raytrace(void *pixels, VirtProcr *VProc)
   4.290 -{
   4.291 -    int i;
   4.292 -    double sl, sl_per_thread;
   4.293 -    
   4.294 -    /* initialize the random number tables for the jitter */
   4.295 -    for(i=0; i<NRAN; i++) urand[i].x = (double)rand() / RAND_MAX - 0.5;
   4.296 -    for(i=0; i<NRAN; i++) urand[i].y = (double)rand() / RAND_MAX - 0.5;
   4.297 -    for(i=0; i<NRAN; i++) irand[i] = (int)(NRAN * ((double)rand() / RAND_MAX));
   4.298 -
   4.299 -    if(thread_num > yres) {
   4.300 -            fprintf(stderr, "more threads than scanlines specified, reducing number of threads to %d\n", yres);
   4.301 -            thread_num = yres;
   4.302 -    }
   4.303 -
   4.304 -    
   4.305 -    if(!(threads = VPThread__malloc(thread_num * sizeof(thread_data), VProc))) {
   4.306 -            perror("failed to allocate thread table");
   4.307 -            exit(EXIT_FAILURE);
   4.308 -    }
   4.309 -    
   4.310 -    end_mutex = VPThread__make_mutex(VProc);
   4.311 -    end_cond  = VPThread__make_cond(end_mutex, VProc);
   4.312 -    start_mutex = VPThread__make_mutex(VProc);
   4.313 -    start_cond  = VPThread__make_cond(start_mutex, VProc);    
   4.314 -    
   4.315 -    sl = 0.0;
   4.316 -    sl_per_thread = (double)yres / (double)thread_num;
   4.317 -    for(i=0; i<thread_num; i++) {
   4.318 -            threads[i].sl_start = (int)sl;
   4.319 -            sl += sl_per_thread;
   4.320 -            threads[i].sl_count = (int)sl - threads[i].sl_start;
   4.321 -            threads[i].pixels = (uint32_t*)pixels;
   4.322 -
   4.323 -            threads[i].VP = 
   4.324 -                    VPThread__create_thread((VirtProcrFnPtr)thread_func,
   4.325 -                                   (void*)(&threads[i]), VProc);  
   4.326 -    }
   4.327 -    
   4.328 -    threads[thread_num - 1].sl_count = yres - threads[thread_num - 1].sl_start;
   4.329 -    
   4.330 -    fprintf(stderr, VER_STR, VER_MAJOR, VER_MINOR);
   4.331 -    
   4.332 -    // start worker threads
   4.333 -    //printf("start of worker thread (%d)\n", VProc->procrID);      
   4.334 -    VPThread__mutex_lock(start_mutex, VProc);
   4.335 -    start_time = get_msec();
   4.336 -    start = 1;
   4.337 -    for(i=0; i<thread_num; i++)
   4.338 -        VPThread__cond_signal(start_cond, VProc);
   4.339 -    VPThread__mutex_unlock(start_mutex, VProc);
   4.340 -    
   4.341 -    //printf("wait for worker (%d)\n", VProc->procrID);      
   4.342 -    VPThread__mutex_lock(end_mutex, VProc);
   4.343 -    while(end < thread_num)
   4.344 -        VPThread__cond_wait(end_cond, VProc);
   4.345 -    VPThread__mutex_unlock(end_mutex, VProc);
   4.346 -    
   4.347 -    rend_time = get_msec() - start_time;
   4.348 -    
   4.349 -    VPThread__free(threads,VProc);
   4.350 -    VPThread__dissipate_thread(VProc);
   4.351 -}
   4.352 -
   4.353 -/* render a frame of xsz/ysz dimensions into the provided framebuffer */
   4.354 -void render_scanline(int xsz, int ysz, int sl, uint32_t *fb, int samples) {
   4.355 -	int i, s;
   4.356 -	double rcp_samples = 1.0 / (double)samples;
   4.357 -
   4.358 -	for(i=0; i<xsz; i++) {
   4.359 -		double r, g, b;
   4.360 -		r = g = b = 0.0;
   4.361 -			
   4.362 -		for(s=0; s<samples; s++) {
   4.363 -			struct vec3 col = trace(get_primary_ray(i, sl, s), 0);
   4.364 -			r += col.x;
   4.365 -			g += col.y;
   4.366 -			b += col.z;
   4.367 -		}
   4.368 -
   4.369 -		r = r * rcp_samples;
   4.370 -		g = g * rcp_samples;
   4.371 -		b = b * rcp_samples;
   4.372 -			
   4.373 -		fb[sl * xsz + i] = ((uint32_t)(MIN(r, 1.0) * 255.0) & 0xff) << RSHIFT |
   4.374 -							((uint32_t)(MIN(g, 1.0) * 255.0) & 0xff) << GSHIFT |
   4.375 -							((uint32_t)(MIN(b, 1.0) * 255.0) & 0xff) << BSHIFT;
   4.376 -	}
   4.377 -}
   4.378 -
   4.379 -/* trace a ray throught the scene recursively (the recursion happens through
   4.380 - * shade() to calculate reflection rays if necessary).
   4.381 - */
   4.382 -struct vec3 trace(struct ray ray, int depth) {
   4.383 -	struct vec3 col;
   4.384 -	struct spoint sp, nearest_sp;
   4.385 -	struct sphere *nearest_obj = 0;
   4.386 -	struct sphere *iter = obj_list->next;
   4.387 -
   4.388 -	/* if we reached the recursion limit, bail out */
   4.389 -	if(depth >= MAX_RAY_DEPTH) {
   4.390 -		col.x = col.y = col.z = 0.0;
   4.391 -		return col;
   4.392 -	}
   4.393 -	
   4.394 -	/* find the nearest intersection ... */
   4.395 -	while(iter) {
   4.396 -		if(ray_sphere(iter, ray, &sp)) {
   4.397 -			if(!nearest_obj || sp.dist < nearest_sp.dist) {
   4.398 -				nearest_obj = iter;
   4.399 -				nearest_sp = sp;
   4.400 -			}
   4.401 -		}
   4.402 -		iter = iter->next;
   4.403 -	}
   4.404 -
   4.405 -	/* and perform shading calculations as needed by calling shade() */
   4.406 -	if(nearest_obj) {
   4.407 -		col = shade(nearest_obj, &nearest_sp, depth);
   4.408 -	} else {
   4.409 -		col.x = col.y = col.z = 0.0;
   4.410 -	}
   4.411 -
   4.412 -	return col;
   4.413 -}
   4.414 -
   4.415 -/* Calculates direct illumination with the phong reflectance model.
   4.416 - * Also handles reflections by calling trace again, if necessary.
   4.417 - */
   4.418 -struct vec3 shade(struct sphere *obj, struct spoint *sp, int depth) {
   4.419 -	int i;
   4.420 -	struct vec3 col = {0, 0, 0};
   4.421 -
   4.422 -	/* for all lights ... */
   4.423 -	for(i=0; i<lnum; i++) {
   4.424 -		double ispec, idiff;
   4.425 -		struct vec3 ldir;
   4.426 -		struct ray shadow_ray;
   4.427 -		struct sphere *iter = obj_list->next;
   4.428 -		int in_shadow = 0;
   4.429 -
   4.430 -		ldir.x = lights[i].x - sp->pos.x;
   4.431 -		ldir.y = lights[i].y - sp->pos.y;
   4.432 -		ldir.z = lights[i].z - sp->pos.z;
   4.433 -
   4.434 -		shadow_ray.orig = sp->pos;
   4.435 -		shadow_ray.dir = ldir;
   4.436 -
   4.437 -		/* shoot shadow rays to determine if we have a line of sight with the light */
   4.438 -		while(iter) {
   4.439 -			if(ray_sphere(iter, shadow_ray, 0)) {
   4.440 -				in_shadow = 1;
   4.441 -				break;
   4.442 -			}
   4.443 -			iter = iter->next;
   4.444 -		}
   4.445 -
   4.446 -		/* and if we're not in shadow, calculate direct illumination with the phong model. */
   4.447 -		if(!in_shadow) {
   4.448 -			NORMALIZE(ldir);
   4.449 -
   4.450 -			idiff = MAX(DOT(sp->normal, ldir), 0.0);
   4.451 -			ispec = obj->mat.spow > 0.0 ? pow(MAX(DOT(sp->vref, ldir), 0.0), obj->mat.spow) : 0.0;
   4.452 -
   4.453 -			col.x += idiff * obj->mat.col.x + ispec;
   4.454 -			col.y += idiff * obj->mat.col.y + ispec;
   4.455 -			col.z += idiff * obj->mat.col.z + ispec;
   4.456 -		}
   4.457 -	}
   4.458 -
   4.459 -	/* Also, if the object is reflective, spawn a reflection ray, and call trace()
   4.460 -	 * to calculate the light arriving from the mirror direction.
   4.461 -	 */
   4.462 -	if(obj->mat.refl > 0.0) {
   4.463 -		struct ray ray;
   4.464 -		struct vec3 rcol;
   4.465 -
   4.466 -		ray.orig = sp->pos;
   4.467 -		ray.dir = sp->vref;
   4.468 -		ray.dir.x *= RAY_MAG;
   4.469 -		ray.dir.y *= RAY_MAG;
   4.470 -		ray.dir.z *= RAY_MAG;
   4.471 -
   4.472 -		rcol = trace(ray, depth + 1);
   4.473 -		col.x += rcol.x * obj->mat.refl;
   4.474 -		col.y += rcol.y * obj->mat.refl;
   4.475 -		col.z += rcol.z * obj->mat.refl;
   4.476 -	}
   4.477 -
   4.478 -	return col;
   4.479 -}
   4.480 -
   4.481 -/* calculate reflection vector */
   4.482 -struct vec3 reflect(struct vec3 v, struct vec3 n) {
   4.483 -	struct vec3 res;
   4.484 -	double dot = v.x * n.x + v.y * n.y + v.z * n.z;
   4.485 -	res.x = -(2.0 * dot * n.x - v.x);
   4.486 -	res.y = -(2.0 * dot * n.y - v.y);
   4.487 -	res.z = -(2.0 * dot * n.z - v.z);
   4.488 -	return res;
   4.489 -}
   4.490 -
   4.491 -struct vec3 cross_product(struct vec3 v1, struct vec3 v2) {
   4.492 -	struct vec3 res;
   4.493 -	res.x = v1.y * v2.z - v1.z * v2.y;
   4.494 -	res.y = v1.z * v2.x - v1.x * v2.z;
   4.495 -	res.z = v1.x * v2.y - v1.y * v2.x;
   4.496 -	return res;
   4.497 -}
   4.498 -
   4.499 -/* determine the primary ray corresponding to the specified pixel (x, y) */
   4.500 -struct ray get_primary_ray(int x, int y, int sample) {
   4.501 -	struct ray ray;
   4.502 -	float m[3][3];
   4.503 -	struct vec3 i, j = {0, 1, 0}, k, dir, orig, foo;
   4.504 -
   4.505 -	k.x = cam.targ.x - cam.pos.x;
   4.506 -	k.y = cam.targ.y - cam.pos.y;
   4.507 -	k.z = cam.targ.z - cam.pos.z;
   4.508 -	NORMALIZE(k);
   4.509 -
   4.510 -	i = cross_product(j, k);
   4.511 -	j = cross_product(k, i);
   4.512 -	m[0][0] = i.x; m[0][1] = j.x; m[0][2] = k.x;
   4.513 -	m[1][0] = i.y; m[1][1] = j.y; m[1][2] = k.y;
   4.514 -	m[2][0] = i.z; m[2][1] = j.z; m[2][2] = k.z;
   4.515 -	
   4.516 -	ray.orig.x = ray.orig.y = ray.orig.z = 0.0;
   4.517 -	ray.dir = get_sample_pos(x, y, sample);
   4.518 -	ray.dir.z = 1.0 / HALF_FOV;
   4.519 -	ray.dir.x *= RAY_MAG;
   4.520 -	ray.dir.y *= RAY_MAG;
   4.521 -	ray.dir.z *= RAY_MAG;
   4.522 -	
   4.523 -	dir.x = ray.dir.x + ray.orig.x;
   4.524 -	dir.y = ray.dir.y + ray.orig.y;
   4.525 -	dir.z = ray.dir.z + ray.orig.z;
   4.526 -	foo.x = dir.x * m[0][0] + dir.y * m[0][1] + dir.z * m[0][2];
   4.527 -	foo.y = dir.x * m[1][0] + dir.y * m[1][1] + dir.z * m[1][2];
   4.528 -	foo.z = dir.x * m[2][0] + dir.y * m[2][1] + dir.z * m[2][2];
   4.529 -
   4.530 -	orig.x = ray.orig.x * m[0][0] + ray.orig.y * m[0][1] + ray.orig.z * m[0][2] + cam.pos.x;
   4.531 -	orig.y = ray.orig.x * m[1][0] + ray.orig.y * m[1][1] + ray.orig.z * m[1][2] + cam.pos.y;
   4.532 -	orig.z = ray.orig.x * m[2][0] + ray.orig.y * m[2][1] + ray.orig.z * m[2][2] + cam.pos.z;
   4.533 -
   4.534 -	ray.orig = orig;
   4.535 -	ray.dir.x = foo.x + orig.x;
   4.536 -	ray.dir.y = foo.y + orig.y;
   4.537 -	ray.dir.z = foo.z + orig.z;
   4.538 -	
   4.539 -	return ray;
   4.540 -}
   4.541 -
   4.542 -
   4.543 -struct vec3 get_sample_pos(int x, int y, int sample) {
   4.544 -	struct vec3 pt;
   4.545 -	static double sf = 0.0;
   4.546 -
   4.547 -	if(sf == 0.0) {
   4.548 -		sf = 1.5 / (double)xres;
   4.549 -	}
   4.550 -
   4.551 -	pt.x = ((double)x / (double)xres) - 0.5;
   4.552 -	pt.y = -(((double)y / (double)yres) - 0.65) / aspect;
   4.553 -
   4.554 -	if(sample) {
   4.555 -		struct vec3 jt = jitter(x, y, sample);
   4.556 -		pt.x += jt.x * sf;
   4.557 -		pt.y += jt.y * sf / aspect;
   4.558 -	}
   4.559 -	return pt;
   4.560 -}
   4.561 -
   4.562 -/* jitter function taken from Graphics Gems I. */
   4.563 -struct vec3 jitter(int x, int y, int s) {
   4.564 -	struct vec3 pt;
   4.565 -	pt.x = urand[(x + (y << 2) + irand[(x + s) & MASK]) & MASK].x;
   4.566 -	pt.y = urand[(y + (x << 2) + irand[(y + s) & MASK]) & MASK].y;
   4.567 -	return pt;
   4.568 -}
   4.569 -
   4.570 -/* Calculate ray-sphere intersection, and return {1, 0} to signify hit or no hit.
   4.571 - * Also the surface point parameters like position, normal, etc are returned through
   4.572 - * the sp pointer if it is not NULL.
   4.573 - */
   4.574 -int ray_sphere(const struct sphere *sph, struct ray ray, struct spoint *sp) {
   4.575 -	double a, b, c, d, sqrt_d, t1, t2;
   4.576 -	
   4.577 -	a = SQ(ray.dir.x) + SQ(ray.dir.y) + SQ(ray.dir.z);
   4.578 -	b = 2.0 * ray.dir.x * (ray.orig.x - sph->pos.x) +
   4.579 -				2.0 * ray.dir.y * (ray.orig.y - sph->pos.y) +
   4.580 -				2.0 * ray.dir.z * (ray.orig.z - sph->pos.z);
   4.581 -	c = SQ(sph->pos.x) + SQ(sph->pos.y) + SQ(sph->pos.z) +
   4.582 -				SQ(ray.orig.x) + SQ(ray.orig.y) + SQ(ray.orig.z) +
   4.583 -				2.0 * (-sph->pos.x * ray.orig.x - sph->pos.y * ray.orig.y - sph->pos.z * ray.orig.z) - SQ(sph->rad);
   4.584 -	
   4.585 -	if((d = SQ(b) - 4.0 * a * c) < 0.0) return 0;
   4.586 -
   4.587 -	sqrt_d = sqrt(d);
   4.588 -	t1 = (-b + sqrt_d) / (2.0 * a);
   4.589 -	t2 = (-b - sqrt_d) / (2.0 * a);
   4.590 -
   4.591 -	if((t1 < ERR_MARGIN && t2 < ERR_MARGIN) || (t1 > 1.0 && t2 > 1.0)) return 0;
   4.592 -
   4.593 -	if(sp) {
   4.594 -		if(t1 < ERR_MARGIN) t1 = t2;
   4.595 -		if(t2 < ERR_MARGIN) t2 = t1;
   4.596 -		sp->dist = t1 < t2 ? t1 : t2;
   4.597 -		
   4.598 -		sp->pos.x = ray.orig.x + ray.dir.x * sp->dist;
   4.599 -		sp->pos.y = ray.orig.y + ray.dir.y * sp->dist;
   4.600 -		sp->pos.z = ray.orig.z + ray.dir.z * sp->dist;
   4.601 -		
   4.602 -		sp->normal.x = (sp->pos.x - sph->pos.x) / sph->rad;
   4.603 -		sp->normal.y = (sp->pos.y - sph->pos.y) / sph->rad;
   4.604 -		sp->normal.z = (sp->pos.z - sph->pos.z) / sph->rad;
   4.605 -
   4.606 -		sp->vref = reflect(ray.dir, sp->normal);
   4.607 -		NORMALIZE(sp->vref);
   4.608 -	}
   4.609 -	return 1;
   4.610 -}
   4.611 -
   4.612 -/* Load the scene from an extremely simple scene description file */
   4.613 -#define DELIM	" \t\n"
   4.614 -void load_scene(FILE *fp) {
   4.615 -	char line[256], *ptr, type;
   4.616 -
   4.617 -	obj_list = malloc(sizeof(struct sphere));
   4.618 -	obj_list->next = 0;
   4.619 -	
   4.620 -	while((ptr = fgets(line, 256, fp))) {
   4.621 -		int i;
   4.622 -		struct vec3 pos, col;
   4.623 -		double rad, spow, refl;
   4.624 -		
   4.625 -		while(*ptr == ' ' || *ptr == '\t') ptr++;
   4.626 -		if(*ptr == '#' || *ptr == '\n') continue;
   4.627 -
   4.628 -		if(!(ptr = strtok(line, DELIM))) continue;
   4.629 -		type = *ptr;
   4.630 -		
   4.631 -		for(i=0; i<3; i++) {
   4.632 -			if(!(ptr = strtok(0, DELIM))) break;
   4.633 -			*((double*)&pos.x + i) = atof(ptr);
   4.634 -		}
   4.635 -
   4.636 -		if(type == 'l') {
   4.637 -			lights[lnum++] = pos;
   4.638 -			continue;
   4.639 -		}
   4.640 -
   4.641 -		if(!(ptr = strtok(0, DELIM))) continue;
   4.642 -		rad = atof(ptr);
   4.643 -
   4.644 -		for(i=0; i<3; i++) {
   4.645 -			if(!(ptr = strtok(0, DELIM))) break;
   4.646 -			*((double*)&col.x + i) = atof(ptr);
   4.647 -		}
   4.648 -
   4.649 -		if(type == 'c') {
   4.650 -			cam.pos = pos;
   4.651 -			cam.targ = col;
   4.652 -			cam.fov = rad;
   4.653 -			continue;
   4.654 -		}
   4.655 -
   4.656 -		if(!(ptr = strtok(0, DELIM))) continue;
   4.657 -		spow = atof(ptr);
   4.658 -
   4.659 -		if(!(ptr = strtok(0, DELIM))) continue;
   4.660 -		refl = atof(ptr);
   4.661 -
   4.662 -		if(type == 's') {
   4.663 -			struct sphere *sph = malloc(sizeof *sph);
   4.664 -			sph->next = obj_list->next;
   4.665 -			obj_list->next = sph;
   4.666 -
   4.667 -			sph->pos = pos;
   4.668 -			sph->rad = rad;
   4.669 -			sph->mat.col = col;
   4.670 -			sph->mat.spow = spow;
   4.671 -			sph->mat.refl = refl;
   4.672 -		} else {
   4.673 -			fprintf(stderr, "unknown type: %c\n", type);
   4.674 -		}
   4.675 -	}
   4.676 -}
   4.677 -
   4.678 -
   4.679 -/* provide a millisecond-resolution timer for each system */
   4.680 -#if defined(unix) || defined(__unix__)
   4.681 -#include <time.h>
   4.682 -#include <sys/time.h>
   4.683 -unsigned long get_msec(void) {
   4.684 -	static struct timeval timeval, first_timeval;
   4.685 -	
   4.686 -	gettimeofday(&timeval, 0);
   4.687 -	if(first_timeval.tv_sec == 0) {
   4.688 -		first_timeval = timeval;
   4.689 -		return 0;
   4.690 -	}
   4.691 -	return (timeval.tv_sec - first_timeval.tv_sec) * 1000 + (timeval.tv_usec - first_timeval.tv_usec) / 1000;
   4.692 -}
   4.693 -#elif defined(__WIN32__) || defined(WIN32)
   4.694 -#include <windows.h>
   4.695 -unsigned long get_msec(void) {
   4.696 -	return GetTickCount();
   4.697 -}
   4.698 -#else
   4.699 -#error "I don't know how to measure time on your platform"
   4.700 -#endif
   4.701 -
   4.702 -void thread_func(void *tdata, VirtProcr *VProc) {
   4.703 -	int i;
   4.704 -	struct thread_data *td = (struct thread_data*)tdata;
   4.705 -
   4.706 -        VPThread__mutex_lock(start_mutex, VProc);
   4.707 -        while(!start)
   4.708 -            VPThread__cond_wait(start_cond, VProc);
   4.709 -        VPThread__mutex_unlock(start_mutex, VProc);        
   4.710 -        
   4.711 -	for(i=0; i<td->sl_count; i++) {
   4.712 -		render_scanline(xres, yres, i + td->sl_start, td->pixels, rays_per_pixel);
   4.713 -	}
   4.714 -        
   4.715 -        VPThread__mutex_lock(end_mutex, VProc);
   4.716 -        end++;
   4.717 -        VPThread__cond_signal(end_cond, VProc);
   4.718 -        VPThread__mutex_unlock(end_mutex, VProc);   
   4.719 -
   4.720 -	VPThread__dissipate_thread(VProc);
   4.721 -}
     5.1 --- a/scene	Sat Oct 22 19:27:29 2011 -0700
     5.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.3 @@ -1,18 +0,0 @@
     5.4 -# spheres
     5.5 -#	position		radius	color			shininess	reflectivity
     5.6 -s	-1.5 -0.3 -1	0.7		1.0 0.2 0.05		50.0	0.3
     5.7 -s	1.5 -0.4 0		0.6		0.1 0.85 1.0		50.0	0.4
     5.8 -
     5.9 -# walls
    5.10 -s	0 -1000 2		999		0.1 0.2 0.6			80.0	0.5
    5.11 -
    5.12 -# bouncing ball
    5.13 -s	0 0 2			1		0.0 0.0 0.0			60.0	0.7
    5.14 -
    5.15 -# lights...
    5.16 -l	-50 100 -50
    5.17 -l	40 40 150
    5.18 -
    5.19 -# camera (there can be only one!)
    5.20 -#	position	FOV		target
    5.21 -c	0 6 -17		45		0 -1 0
     6.1 --- a/sphfract	Sat Oct 22 19:27:29 2011 -0700
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,192 +0,0 @@
     6.4 -s	0 0 0 	1.0	0.25 0.25 0.25  50.0	0.65
     6.5 -s	1.4 0 0 	0.4	0.25 0.25 0.25  50.0	0.65
     6.6 -s	1.96 0 0 	0.16	0.25 0.25 0.25  50.0	0.65
     6.7 -s	2.184 0 0 	0.064	0.25 0.25 0.25  50.0	0.65
     6.8 -s	1.96 0.224 0 	0.064	0.25 0.25 0.25  50.0	0.65
     6.9 -s	1.96 -0.224 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.10 -s	1.96 0 0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.11 -s	1.96 0 -0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.12 -s	1.4 0.56 0 	0.16	0.25 0.25 0.25  50.0	0.65
    6.13 -s	1.624 0.56 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.14 -s	1.176 0.56 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.15 -s	1.4 0.784 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.16 -s	1.4 0.56 0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.17 -s	1.4 0.56 -0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.18 -s	1.4 -0.56 0 	0.16	0.25 0.25 0.25  50.0	0.65
    6.19 -s	1.624 -0.56 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.20 -s	1.176 -0.56 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.21 -s	1.4 -0.784 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.22 -s	1.4 -0.56 0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.23 -s	1.4 -0.56 -0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.24 -s	1.4 0 0.56 	0.16	0.25 0.25 0.25  50.0	0.65
    6.25 -s	1.624 0 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.26 -s	1.176 0 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.27 -s	1.4 0.224 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.28 -s	1.4 -0.224 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.29 -s	1.4 0 0.784 	0.064	0.25 0.25 0.25  50.0	0.65
    6.30 -s	1.4 0 -0.56 	0.16	0.25 0.25 0.25  50.0	0.65
    6.31 -s	1.624 0 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.32 -s	1.176 0 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.33 -s	1.4 0.224 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.34 -s	1.4 -0.224 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.35 -s	1.4 0 -0.784 	0.064	0.25 0.25 0.25  50.0	0.65
    6.36 -s	-1.4 0 0 	0.4	0.25 0.25 0.25  50.0	0.65
    6.37 -s	-1.96 0 0 	0.16	0.25 0.25 0.25  50.0	0.65
    6.38 -s	-2.184 0 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.39 -s	-1.96 0.224 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.40 -s	-1.96 -0.224 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.41 -s	-1.96 0 0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.42 -s	-1.96 0 -0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.43 -s	-1.4 0.56 0 	0.16	0.25 0.25 0.25  50.0	0.65
    6.44 -s	-1.176 0.56 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.45 -s	-1.624 0.56 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.46 -s	-1.4 0.784 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.47 -s	-1.4 0.56 0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.48 -s	-1.4 0.56 -0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.49 -s	-1.4 -0.56 0 	0.16	0.25 0.25 0.25  50.0	0.65
    6.50 -s	-1.176 -0.56 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.51 -s	-1.624 -0.56 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.52 -s	-1.4 -0.784 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.53 -s	-1.4 -0.56 0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.54 -s	-1.4 -0.56 -0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.55 -s	-1.4 0 0.56 	0.16	0.25 0.25 0.25  50.0	0.65
    6.56 -s	-1.176 0 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.57 -s	-1.624 0 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.58 -s	-1.4 0.224 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.59 -s	-1.4 -0.224 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.60 -s	-1.4 0 0.784 	0.064	0.25 0.25 0.25  50.0	0.65
    6.61 -s	-1.4 0 -0.56 	0.16	0.25 0.25 0.25  50.0	0.65
    6.62 -s	-1.176 0 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.63 -s	-1.624 0 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.64 -s	-1.4 0.224 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.65 -s	-1.4 -0.224 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.66 -s	-1.4 0 -0.784 	0.064	0.25 0.25 0.25  50.0	0.65
    6.67 -s	0 1.4 0 	0.4	0.25 0.25 0.25  50.0	0.65
    6.68 -s	0.56 1.4 0 	0.16	0.25 0.25 0.25  50.0	0.65
    6.69 -s	0.784 1.4 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.70 -s	0.56 1.624 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.71 -s	0.56 1.176 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.72 -s	0.56 1.4 0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.73 -s	0.56 1.4 -0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.74 -s	-0.56 1.4 0 	0.16	0.25 0.25 0.25  50.0	0.65
    6.75 -s	-0.784 1.4 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.76 -s	-0.56 1.624 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.77 -s	-0.56 1.176 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.78 -s	-0.56 1.4 0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.79 -s	-0.56 1.4 -0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.80 -s	0 1.96 0 	0.16	0.25 0.25 0.25  50.0	0.65
    6.81 -s	0.224 1.96 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.82 -s	-0.224 1.96 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.83 -s	0 2.184 0 	0.064	0.25 0.25 0.25  50.0	0.65
    6.84 -s	0 1.96 0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.85 -s	0 1.96 -0.224 	0.064	0.25 0.25 0.25  50.0	0.65
    6.86 -s	0 1.4 0.56 	0.16	0.25 0.25 0.25  50.0	0.65
    6.87 -s	0.224 1.4 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.88 -s	-0.224 1.4 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.89 -s	0 1.624 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.90 -s	0 1.176 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.91 -s	0 1.4 0.784 	0.064	0.25 0.25 0.25  50.0	0.65
    6.92 -s	0 1.4 -0.56 	0.16	0.25 0.25 0.25  50.0	0.65
    6.93 -s	0.224 1.4 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.94 -s	-0.224 1.4 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.95 -s	0 1.624 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.96 -s	0 1.176 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
    6.97 -s	0 1.4 -0.784 	0.064	0.25 0.25 0.25  50.0	0.65
    6.98 -s	0 -1.4 0 	0.4	0.25 0.25 0.25  50.0	0.65
    6.99 -s	0.56 -1.4 0 	0.16	0.25 0.25 0.25  50.0	0.65
   6.100 -s	0.784 -1.4 0 	0.064	0.25 0.25 0.25  50.0	0.65
   6.101 -s	0.56 -1.176 0 	0.064	0.25 0.25 0.25  50.0	0.65
   6.102 -s	0.56 -1.624 0 	0.064	0.25 0.25 0.25  50.0	0.65
   6.103 -s	0.56 -1.4 0.224 	0.064	0.25 0.25 0.25  50.0	0.65
   6.104 -s	0.56 -1.4 -0.224 	0.064	0.25 0.25 0.25  50.0	0.65
   6.105 -s	-0.56 -1.4 0 	0.16	0.25 0.25 0.25  50.0	0.65
   6.106 -s	-0.784 -1.4 0 	0.064	0.25 0.25 0.25  50.0	0.65
   6.107 -s	-0.56 -1.176 0 	0.064	0.25 0.25 0.25  50.0	0.65
   6.108 -s	-0.56 -1.624 0 	0.064	0.25 0.25 0.25  50.0	0.65
   6.109 -s	-0.56 -1.4 0.224 	0.064	0.25 0.25 0.25  50.0	0.65
   6.110 -s	-0.56 -1.4 -0.224 	0.064	0.25 0.25 0.25  50.0	0.65
   6.111 -s	0 -1.96 0 	0.16	0.25 0.25 0.25  50.0	0.65
   6.112 -s	0.224 -1.96 0 	0.064	0.25 0.25 0.25  50.0	0.65
   6.113 -s	-0.224 -1.96 0 	0.064	0.25 0.25 0.25  50.0	0.65
   6.114 -s	0 -2.184 0 	0.064	0.25 0.25 0.25  50.0	0.65
   6.115 -s	0 -1.96 0.224 	0.064	0.25 0.25 0.25  50.0	0.65
   6.116 -s	0 -1.96 -0.224 	0.064	0.25 0.25 0.25  50.0	0.65
   6.117 -s	0 -1.4 0.56 	0.16	0.25 0.25 0.25  50.0	0.65
   6.118 -s	0.224 -1.4 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
   6.119 -s	-0.224 -1.4 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
   6.120 -s	0 -1.176 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
   6.121 -s	0 -1.624 0.56 	0.064	0.25 0.25 0.25  50.0	0.65
   6.122 -s	0 -1.4 0.784 	0.064	0.25 0.25 0.25  50.0	0.65
   6.123 -s	0 -1.4 -0.56 	0.16	0.25 0.25 0.25  50.0	0.65
   6.124 -s	0.224 -1.4 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
   6.125 -s	-0.224 -1.4 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
   6.126 -s	0 -1.176 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
   6.127 -s	0 -1.624 -0.56 	0.064	0.25 0.25 0.25  50.0	0.65
   6.128 -s	0 -1.4 -0.784 	0.064	0.25 0.25 0.25  50.0	0.65
   6.129 -s	0 0 1.4 	0.4	0.25 0.25 0.25  50.0	0.65
   6.130 -s	0.56 0 1.4 	0.16	0.25 0.25 0.25  50.0	0.65
   6.131 -s	0.784 0 1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.132 -s	0.56 0.224 1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.133 -s	0.56 -0.224 1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.134 -s	0.56 0 1.624 	0.064	0.25 0.25 0.25  50.0	0.65
   6.135 -s	0.56 0 1.176 	0.064	0.25 0.25 0.25  50.0	0.65
   6.136 -s	-0.56 0 1.4 	0.16	0.25 0.25 0.25  50.0	0.65
   6.137 -s	-0.784 0 1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.138 -s	-0.56 0.224 1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.139 -s	-0.56 -0.224 1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.140 -s	-0.56 0 1.624 	0.064	0.25 0.25 0.25  50.0	0.65
   6.141 -s	-0.56 0 1.176 	0.064	0.25 0.25 0.25  50.0	0.65
   6.142 -s	0 0.56 1.4 	0.16	0.25 0.25 0.25  50.0	0.65
   6.143 -s	0.224 0.56 1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.144 -s	-0.224 0.56 1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.145 -s	0 0.784 1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.146 -s	0 0.56 1.624 	0.064	0.25 0.25 0.25  50.0	0.65
   6.147 -s	0 0.56 1.176 	0.064	0.25 0.25 0.25  50.0	0.65
   6.148 -s	0 -0.56 1.4 	0.16	0.25 0.25 0.25  50.0	0.65
   6.149 -s	0.224 -0.56 1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.150 -s	-0.224 -0.56 1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.151 -s	0 -0.784 1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.152 -s	0 -0.56 1.624 	0.064	0.25 0.25 0.25  50.0	0.65
   6.153 -s	0 -0.56 1.176 	0.064	0.25 0.25 0.25  50.0	0.65
   6.154 -s	0 0 1.96 	0.16	0.25 0.25 0.25  50.0	0.65
   6.155 -s	0.224 0 1.96 	0.064	0.25 0.25 0.25  50.0	0.65
   6.156 -s	-0.224 0 1.96 	0.064	0.25 0.25 0.25  50.0	0.65
   6.157 -s	0 0.224 1.96 	0.064	0.25 0.25 0.25  50.0	0.65
   6.158 -s	0 -0.224 1.96 	0.064	0.25 0.25 0.25  50.0	0.65
   6.159 -s	0 0 2.184 	0.064	0.25 0.25 0.25  50.0	0.65
   6.160 -s	0 0 -1.4 	0.4	0.25 0.25 0.25  50.0	0.65
   6.161 -s	0.56 0 -1.4 	0.16	0.25 0.25 0.25  50.0	0.65
   6.162 -s	0.784 0 -1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.163 -s	0.56 0.224 -1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.164 -s	0.56 -0.224 -1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.165 -s	0.56 0 -1.176 	0.064	0.25 0.25 0.25  50.0	0.65
   6.166 -s	0.56 0 -1.624 	0.064	0.25 0.25 0.25  50.0	0.65
   6.167 -s	-0.56 0 -1.4 	0.16	0.25 0.25 0.25  50.0	0.65
   6.168 -s	-0.784 0 -1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.169 -s	-0.56 0.224 -1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.170 -s	-0.56 -0.224 -1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.171 -s	-0.56 0 -1.176 	0.064	0.25 0.25 0.25  50.0	0.65
   6.172 -s	-0.56 0 -1.624 	0.064	0.25 0.25 0.25  50.0	0.65
   6.173 -s	0 0.56 -1.4 	0.16	0.25 0.25 0.25  50.0	0.65
   6.174 -s	0.224 0.56 -1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.175 -s	-0.224 0.56 -1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.176 -s	0 0.784 -1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.177 -s	0 0.56 -1.176 	0.064	0.25 0.25 0.25  50.0	0.65
   6.178 -s	0 0.56 -1.624 	0.064	0.25 0.25 0.25  50.0	0.65
   6.179 -s	0 -0.56 -1.4 	0.16	0.25 0.25 0.25  50.0	0.65
   6.180 -s	0.224 -0.56 -1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.181 -s	-0.224 -0.56 -1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.182 -s	0 -0.784 -1.4 	0.064	0.25 0.25 0.25  50.0	0.65
   6.183 -s	0 -0.56 -1.176 	0.064	0.25 0.25 0.25  50.0	0.65
   6.184 -s	0 -0.56 -1.624 	0.064	0.25 0.25 0.25  50.0	0.65
   6.185 -s	0 0 -1.96 	0.16	0.25 0.25 0.25  50.0	0.65
   6.186 -s	0.224 0 -1.96 	0.064	0.25 0.25 0.25  50.0	0.65
   6.187 -s	-0.224 0 -1.96 	0.064	0.25 0.25 0.25  50.0	0.65
   6.188 -s	0 0.224 -1.96 	0.064	0.25 0.25 0.25  50.0	0.65
   6.189 -s	0 -0.224 -1.96 	0.064	0.25 0.25 0.25  50.0	0.65
   6.190 -s	0 0 -2.184 	0.064	0.25 0.25 0.25  50.0	0.65
   6.191 -s  0 -10002.25 0  10000  0.2 0.35 0.5  80.0  0.4
   6.192 -s  0  10100.00 0  10000  0.5 0.2 0.1  40.0  0.0
   6.193 -l	-50 68 -50
   6.194 -l	40 40 150
   6.195 -c	-7 6 -12 45  0 -0.65 0
     7.1 --- a/src/Application/main.c	Sat Oct 22 19:27:29 2011 -0700
     7.2 +++ b/src/Application/main.c	Fri Oct 28 06:56:35 2011 -0700
     7.3 @@ -102,14 +102,17 @@
     7.4  
     7.5  
     7.6  
     7.7 -union workload{
     7.8 +union timeStamp{
     7.9      uint32 highLow[2];
    7.10      uint64 total;
    7.11  };
    7.12  
    7.13  struct input_t{
    7.14      struct barrier_t* barrier;
    7.15 -    uint64   workcycles;
    7.16 +    uint64  totalWorkCycles;
    7.17 +    uint64  workPlusMutexCycles;
    7.18 +    union timeStamp startTime;
    7.19 +    union timeStamp endTime;
    7.20  };
    7.21  
    7.22  
    7.23 @@ -126,39 +129,43 @@
    7.24   * Workload
    7.25   */
    7.26  void work(void* input, VirtProcr* animatingPr)
    7.27 -{
    7.28 -    int n,m;
    7.29 -    struct input_t* in = (struct input_t*)input;
    7.30 -    unsigned int totalCycles = 0;
    7.31 -    unsigned int workspace1;
    7.32 -    double workspace2;
    7.33 -    int32 privateMutex = VPThread__make_mutex(animatingPr);
    7.34 + {
    7.35 +   int n,m;
    7.36 +   struct input_t* in = (struct input_t*)input;
    7.37 +   unsigned int totalWorkCycles = 0;
    7.38 +   unsigned int workspace1;
    7.39 +   double workspace2;
    7.40 +   int32 privateMutex = VPThread__make_mutex(animatingPr);
    7.41  
    7.42 -    for(m=0; m<repetitions; m++)
    7.43 +   saveTimeStampCountInto(in->startTime.highLow[0], in->startTime.highLow[1]);
    7.44 +   for(m=0; m<repetitions; m++)
    7.45      {
    7.46 -        int32 stamp_startWorkload, stamp_endWorkload;
    7.47 -        saveLowTimeStampCountInto( stamp_startWorkload );
    7.48 -        for(n=0; n<workload_size; n++)
    7.49 -        {
    7.50 -            workspace1 += (workspace1 + 32)/2;
    7.51 -            workspace2 += (workspace2 + 23.2)/1.4;
    7.52 -        }
    7.53 -        saveLowTimeStampCountInto( stamp_endWorkload );
    7.54 -        int32 numCycles = stamp_endWorkload-stamp_startWorkload;
    7.55 -        if( numCycles < 100000000 ) totalCycles += numCycles; //sanity check
    7.56 -        
    7.57 -        VPThread__mutex_lock(privateMutex, animatingPr);
    7.58 -        //access queue
    7.59 -        VPThread__mutex_unlock(privateMutex, animatingPr);
    7.60 +      int32 stamp_startWorkload, stamp_endWorkload;
    7.61 +      saveLowTimeStampCountInto( stamp_startWorkload );
    7.62 +      for(n=0; n<workload_size; n++)
    7.63 +       {
    7.64 +         workspace1 += (workspace1 + 32)/2;
    7.65 +         workspace2 += (workspace2 + 23.2)/1.4;
    7.66 +       }
    7.67 +      saveLowTimeStampCountInto( stamp_endWorkload );
    7.68 +      int32 numCycles = stamp_endWorkload-stamp_startWorkload;
    7.69 +      if( numCycles < 100000000 ) totalWorkCycles += numCycles;//sanity check
    7.70 +
    7.71 +      VPThread__mutex_lock(privateMutex, animatingPr);
    7.72 +      //access queue
    7.73 +      VPThread__mutex_unlock(privateMutex, animatingPr);
    7.74      }
    7.75 -    
    7.76 -    in->workcycles = totalCycles;
    7.77 -    barrier_wait(in->barrier,animatingPr);
    7.78 -    //Shutdown worker
    7.79 -    VPThread__dissipate_thread(animatingPr);
    7.80 -    printf("%d", workspace1);
    7.81 -    printf("%f", workspace2);
    7.82 -}
    7.83 +
    7.84 +   saveTimeStampCountInto( in->endTime.highLow[0], in->endTime.highLow[1] );
    7.85 +   in->totalWorkCycles = totalWorkCycles;
    7.86 +   in->workPlusMutexCycles = in->endTime.total - in->startTime.total;
    7.87 +   barrier_wait(in->barrier, animatingPr);
    7.88 +   
    7.89 +   //Shutdown worker
    7.90 +   VPThread__dissipate_thread(animatingPr);
    7.91 +//   printf("%d", workspace1);  //Should never execute!  VMS bug if does
    7.92 +//   printf("%f", workspace2);
    7.93 + }
    7.94  
    7.95  /* this is run after the VMS is set up*/
    7.96  void benchmark(void *in, VirtProcr *animatingPr)
    7.97 @@ -172,19 +179,24 @@
    7.98      { input[i].barrier = &barr;
    7.99      }
   7.100  
   7.101 -   union workload stamp_startThread, stamp_endThread;
   7.102 -   saveTimeStampCountInto(stamp_startThread.highLow[0], stamp_startThread.highLow[1]);
   7.103 +   union timeStamp startBenchTime, endBenchTime;
   7.104 +   uint64 lastThreadFinishTime = 0L;
   7.105 +   saveTimeStampCountInto(startBenchTime.highLow[0], startBenchTime.highLow[1]);
   7.106     for(i=0; i<num_threads; i++)
   7.107      { VPThread__create_thread((VirtProcrFnPtr)work, (void*)&input[i], animatingPr);
   7.108      }
   7.109     barrier_wait(&barr, animatingPr);
   7.110 -   saveTimeStampCountInto(stamp_endThread.highLow[0], stamp_endThread.highLow[1]);
   7.111 +   saveTimeStampCountInto(endBenchTime.highLow[0], endBenchTime.highLow[1]);
   7.112  
   7.113  
   7.114     for(i=0; i<num_threads; i++)
   7.115 -    { printf("Workcycles: %d\n",input[i].workcycles);
   7.116 +    { printf("WorkCycles: %d\n",input[i].totalWorkCycles);
   7.117 +      printf("Work + Sync Cycles: %lu\n", input[i].workPlusMutexCycles);
   7.118 +      if(input[i].endTime.total > lastThreadFinishTime)
   7.119 +         lastThreadFinishTime = input[i].endTime.total;
   7.120      }
   7.121 -   printf("Total cycles %lu\n", stamp_endThread.total-stamp_startThread.total);
   7.122 +   printf("Time inside Barrier: %lu\n", endBenchTime.total-startBenchTime.total);
   7.123 +   printf("Longest Span: %lu\n", lastThreadFinishTime-startBenchTime.total);
   7.124  
   7.125     //======================================================
   7.126