Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > Vthread > Vthread__Best_Effort_Msg__Bench
changeset 5:535c119ba090
rearranged code to fit project patterns
| author | Me@portablequad |
|---|---|
| date | Fri, 28 Oct 2011 06:56:35 -0700 |
| parents | e512fcf3748e |
| children | c8995a602b46 |
| files | .hgignore Makefile README.txt c-ray-mt.c scene sphfract src/Application/main.c |
| diffstat | 7 files changed, 77 insertions(+), 1016 deletions(-) [+] |
line diff
1.1 --- a/.hgignore Sat Oct 22 19:27:29 2011 -0700 1.2 +++ b/.hgignore Fri Oct 28 06:56:35 2011 -0700 1.3 @@ -2,6 +2,8 @@ 1.4 1.5 histograms 1.6 nbproject 1.7 +build 1.8 +dist 1.9 c-ray-mt 1.10 *.ppm 1.11 *.o
2.1 --- a/Makefile Sat Oct 22 19:27:29 2011 -0700 2.2 +++ b/Makefile Fri Oct 28 06:56:35 2011 -0700 2.3 @@ -1,31 +1,33 @@ 2.4 obj = \ 2.5 - VPThread_lib/VMS/Histogram/Histogram.o \ 2.6 - VPThread_lib/VMS/Histogram/FloatHist.o \ 2.7 - VPThread_lib/VMS/CoreLoop.o \ 2.8 - VPThread_lib/VMS/VMS.o \ 2.9 - VPThread_lib/VMS/MasterLoop.o \ 2.10 - VPThread_lib/VMS/Queue_impl/PrivateQueue.o \ 2.11 - VPThread_lib/VMS/Hash_impl/PrivateHash.o \ 2.12 - VPThread_lib/VMS/DynArray/DynArray.o \ 2.13 - VPThread_lib/VPThread_PluginFns.o \ 2.14 - VPThread_lib/VPThread_lib.o \ 2.15 - VPThread_lib/VMS/Histogram/DblHist.o \ 2.16 - VPThread_lib/VPThread.o \ 2.17 - VPThread_lib/VMS/probes.o \ 2.18 - VPThread_lib/VMS/ProcrContext.o \ 2.19 - VPThread_lib/VPThread_Request_Handlers.o \ 2.20 - VPThread_lib/VPThread_helper.o \ 2.21 - VPThread_lib/VMS/Hash_impl/MurmurHash2.o \ 2.22 - VPThread_lib/VMS/vmalloc.o \ 2.23 - VPThread_lib/VMS/contextSwitch.o \ 2.24 - VPThread_lib/VMS/Queue_impl/BlockingQueue.o \ 2.25 - VPThread_lib/VMS/vutilities.o \ 2.26 - c-ray-mt.o 2.27 + src/VPThread_lib/VMS/Histogram/Histogram.o \ 2.28 + src/VPThread_lib/VMS/Histogram/FloatHist.o \ 2.29 + src/VPThread_lib/VMS/CoreLoop.o \ 2.30 + src/VPThread_lib/VMS/VMS.o \ 2.31 + src/VPThread_lib/VMS/MasterLoop.o \ 2.32 + src/VPThread_lib/VMS/Queue_impl/PrivateQueue.o \ 2.33 + src/VPThread_lib/VMS/Hash_impl/PrivateHash.o \ 2.34 + src/VPThread_lib/VMS/DynArray/DynArray.o \ 2.35 + src/VPThread_lib/VPThread_PluginFns.o \ 2.36 + src/VPThread_lib/VPThread_lib.o \ 2.37 + src/VPThread_lib/VMS/Histogram/DblHist.o \ 2.38 + src/VPThread_lib/VPThread.o \ 2.39 + src/VPThread_lib/VMS/probes.o \ 2.40 + src/VPThread_lib/VMS/ProcrContext.o \ 2.41 + src/VPThread_lib/VPThread_Request_Handlers.o \ 2.42 + src/VPThread_lib/VPThread_helper.o \ 2.43 + src/VPThread_lib/VMS/Hash_impl/MurmurHash2.o \ 2.44 + src/VPThread_lib/VMS/vmalloc.o \ 2.45 + src/VPThread_lib/VMS/contextSwitch.o \ 2.46 + src/VPThread_lib/VMS/Queue_impl/BlockingQueue.o \ 2.47 + src/VPThread_lib/VMS/vutilities.o \ 2.48 + src/Application/main.o 2.49 2.50 -bin = c-ray-mt 2.51 +bin = task_size_vs_exe_time 2.52 + 2.53 +NUM_CORES=4 2.54 2.55 CC = gcc 2.56 -CFLAGS = -m64 -ffast-math -fwrapv -fno-omit-frame-pointer -O3 -D VPTHREAD -D APPLICATION=C-RAY -g -Wall 2.57 +CFLAGS = -m64 -ffast-math -fwrapv -fno-omit-frame-pointer -O3 -D VPTHREAD -D APPLICATION=C-RAY -D NUM_CORES=$(NUM_CORES) -g -Wall 2.58 2.59 $(bin): $(obj) 2.60 $(CC) -o $@ $(obj) -lm -lpthread
3.1 --- a/README.txt Sat Oct 22 19:27:29 2011 -0700 3.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 3.3 @@ -1,27 +0,0 @@ 3.4 -Kernel: Ray Tracing 3.5 - 3.6 -This is a kernel-type benchmark of a very simple and brute-force ray tracer. 3.7 - 3.8 -Installation: 3.9 - 3.10 -To install the kernel benchmark, navigate to the directory this file is located in, open up a terminal and simply type 'make'. For certain architectures 3.11 -or special compilation options, you might need to change compilation parameters in the makefile. 3.12 - 3.13 -Usage: 3.14 - 3.15 -You may execute the benchmark by navigating to this directory after compilation and typing 3.16 - 3.17 -./c-ray-mt -i FILENAME -s RESOLUTION -o OUTPUT.ppm 3.18 - 3.19 -'FILENAME' has to be either "scene" or "sphfract" or another predefined scene description file if there is one. 3.20 -'RESOLUTION' specifies the resolution of the produced image and has to be given in the form 1920x1200, for example. 3.21 -'OUTPUT' is the name of the file the rendered image will be contained in after the benchmark ran. 3.22 - 3.23 -The specification of how many threads are used to perform the rendering depends on the parallel programming model. 3.24 - 3.25 -Benchmark Versions: 3.26 - 3.27 -Serial 3.28 -POSIX Threads 3.29 -OpenMP SuperScalar 3.30 -
4.1 --- a/c-ray-mt.c Sat Oct 22 19:27:29 2011 -0700 4.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 4.3 @@ -1,718 +0,0 @@ 4.4 -/* c-ray-mt - a simple multithreaded raytracing filter. 4.5 - * Copyright (C) 2006 John Tsiombikas <nuclear@siggraph.org> 4.6 - * 4.7 - * You are free to use, modify and redistribute this program under the 4.8 - * terms of the GNU General Public License v2 or (at your option) later. 4.9 - * see "http://www.gnu.org/licenses/gpl.txt" for details. 4.10 - * --------------------------------------------------------------------- 4.11 - * Usage: 4.12 - * compile: just type make 4.13 - * (add any arch-specific optimizations for your compiler in CFLAGS first) 4.14 - * run: cat scene | ./c-ray-mt [-t num-threads] >foo.ppm 4.15 - * (on broken systems such as windows try: c-ray-mt -i scene -o foo.ppm) 4.16 - * enjoy: display foo.ppm 4.17 - * (with imagemagick, or use your favorite image viewer) 4.18 - * --------------------------------------------------------------------- 4.19 - * Scene file format: 4.20 - * # sphere (many) 4.21 - * s x y z rad r g b shininess reflectivity 4.22 - * # light (many) 4.23 - * l x y z 4.24 - * # camera (one) 4.25 - * c x y z fov tx ty tz 4.26 - * --------------------------------------------------------------------- 4.27 - */ 4.28 -#include <stdio.h> 4.29 -#include <stdlib.h> 4.30 -#include <string.h> 4.31 -#include <math.h> 4.32 -#include <ctype.h> 4.33 -#include <errno.h> 4.34 -#include <pthread.h> 4.35 -#include "VPThread_lib/VPThread.h" 4.36 - 4.37 -#define VER_MAJOR 1 4.38 -#define VER_MINOR 1 4.39 -#define VER_STR "c-ray-mt v%d.%d\n" 4.40 - 4.41 -#if !defined(unix) && !defined(__unix__) 4.42 -#ifdef __MACH__ 4.43 -#define unix 1 4.44 -#define __unix__ 1 4.45 -#endif /* __MACH__ */ 4.46 -#endif /* unix */ 4.47 - 4.48 -/* find the appropriate way to define explicitly sized types */ 4.49 -/* for C99 or GNU libc (also mach's libc) we can use stdint.h */ 4.50 -#if (__STDC_VERSION__ >= 199900) || defined(__GLIBC__) || defined(__MACH__) 4.51 -#include <stdint.h> 4.52 -#elif defined(unix) || defined(__unix__) /* some UNIX systems have them in sys/types.h */ 4.53 -#include <sys/types.h> 4.54 -#elif defined(__WIN32__) || defined(WIN32) /* the nameless one */ 4.55 -typedef unsigned __int8 uint8_t; 4.56 -typedef unsigned __int32 uint32_t; 4.57 -#endif /* sized type detection */ 4.58 - 4.59 -struct vec3 { 4.60 - double x, y, z; 4.61 -}; 4.62 - 4.63 -struct ray { 4.64 - struct vec3 orig, dir; 4.65 -}; 4.66 - 4.67 -struct material { 4.68 - struct vec3 col; /* color */ 4.69 - double spow; /* specular power */ 4.70 - double refl; /* reflection intensity */ 4.71 -}; 4.72 - 4.73 -struct sphere { 4.74 - struct vec3 pos; 4.75 - double rad; 4.76 - struct material mat; 4.77 - struct sphere *next; 4.78 -}; 4.79 - 4.80 -struct spoint { 4.81 - struct vec3 pos, normal, vref; /* position, normal and view reflection */ 4.82 - double dist; /* parametric distance of intersection along the ray */ 4.83 -}; 4.84 - 4.85 -struct camera { 4.86 - struct vec3 pos, targ; 4.87 - double fov; 4.88 -}; 4.89 - 4.90 -struct thread_data { 4.91 - VirtProcr *VP; 4.92 - int sl_start, sl_count; 4.93 - uint32_t *pixels; 4.94 -}; 4.95 -typedef struct thread_data thread_data; 4.96 - 4.97 -void render_scanline(int xsz, int ysz, int sl, uint32_t *fb, int samples); 4.98 -struct vec3 trace(struct ray ray, int depth); 4.99 -struct vec3 shade(struct sphere *obj, struct spoint *sp, int depth); 4.100 -struct vec3 reflect(struct vec3 v, struct vec3 n); 4.101 -struct vec3 cross_product(struct vec3 v1, struct vec3 v2); 4.102 -struct ray get_primary_ray(int x, int y, int sample); 4.103 -struct vec3 get_sample_pos(int x, int y, int sample); 4.104 -struct vec3 jitter(int x, int y, int s); 4.105 -int ray_sphere(const struct sphere *sph, struct ray ray, struct spoint *sp); 4.106 -void load_scene(FILE *fp); 4.107 -unsigned long get_msec(void); 4.108 - 4.109 -void thread_func(void *tdata, VirtProcr *VProc); 4.110 - 4.111 -#define MAX_LIGHTS 16 /* maximum number of lights */ 4.112 -#define RAY_MAG 1000.0 /* trace rays of this magnitude */ 4.113 -#define MAX_RAY_DEPTH 5 /* raytrace recursion limit */ 4.114 -#define FOV 0.78539816 /* field of view in rads (pi/4) */ 4.115 -#define HALF_FOV (FOV * 0.5) 4.116 -#define ERR_MARGIN 1e-6 /* an arbitrary error margin to avoid surface acne */ 4.117 - 4.118 -/* bit-shift ammount for packing each color into a 32bit uint */ 4.119 -#ifdef LITTLE_ENDIAN 4.120 -#define RSHIFT 16 4.121 -#define BSHIFT 0 4.122 -#else /* big endian */ 4.123 -#define RSHIFT 0 4.124 -#define BSHIFT 16 4.125 -#endif /* endianess */ 4.126 -#define GSHIFT 8 /* this is the same in both byte orders */ 4.127 - 4.128 -/* some helpful macros... */ 4.129 -#define SQ(x) ((x) * (x)) 4.130 -#define MAX(a, b) ((a) > (b) ? (a) : (b)) 4.131 -#define MIN(a, b) ((a) < (b) ? (a) : (b)) 4.132 -#define DOT(a, b) ((a).x * (b).x + (a).y * (b).y + (a).z * (b).z) 4.133 -#define NORMALIZE(a) do {\ 4.134 - double len = sqrt(DOT(a, a));\ 4.135 - (a).x /= len; (a).y /= len; (a).z /= len;\ 4.136 -} while(0); 4.137 - 4.138 -/* global state */ 4.139 -int xres = 800; 4.140 -int yres = 600; 4.141 -int rays_per_pixel = 1; 4.142 -double aspect = 1.333333; 4.143 -struct sphere *obj_list; 4.144 -struct vec3 lights[MAX_LIGHTS]; 4.145 -int lnum = 0; 4.146 -struct camera cam; 4.147 - 4.148 -int thread_num = 1; 4.149 -struct thread_data *threads; 4.150 - 4.151 -volatile int end = 0; 4.152 -volatile int start = 0; 4.153 -int32 end_mutex, end_cond; 4.154 -int32 start_cond, start_mutex; 4.155 - 4.156 -#define NRAN 1024 4.157 -#define MASK (NRAN - 1) 4.158 -struct vec3 urand[NRAN]; 4.159 -int irand[NRAN]; 4.160 - 4.161 -unsigned long rend_time, start_time; 4.162 - 4.163 -const char *usage = { 4.164 - "Usage: c-ray-mt [options]\n" 4.165 - " Reads a scene file from stdin, writes the image to stdout, and stats to stderr.\n\n" 4.166 - "Options:\n" 4.167 - " -t <num> how many threads to use (default: 1)\n" 4.168 - " -s WxH where W is the width and H the height of the image\n" 4.169 - " -r <rays> shoot <rays> rays per pixel (antialiasing)\n" 4.170 - " -i <file> read from <file> instead of stdin\n" 4.171 - " -o <file> write to <file> instead of stdout\n" 4.172 - " -h this help screen\n\n" 4.173 -}; 4.174 - 4.175 -char __ProgrammName[] = "c-ray"; 4.176 -char __DataSet[255]; 4.177 - 4.178 - 4.179 -void raytrace(void *pixels, VirtProcr *Vprocr); 4.180 - 4.181 -int main(int argc, char **argv) { 4.182 - int i; 4.183 - uint32_t *pixels; 4.184 - FILE *infile = stdin, *outfile = stdout; 4.185 - 4.186 - for(i=1; i<argc; i++) { 4.187 - if(argv[i][0] == '-' && argv[i][2] == 0) { 4.188 - char *sep; 4.189 - switch(argv[i][1]) { 4.190 - case 't': 4.191 - if(!isdigit(argv[++i][0])) { 4.192 - fprintf(stderr, "-t mus be followed by the number of worker threads to spawn\n"); 4.193 - return EXIT_FAILURE; 4.194 - } 4.195 - thread_num = atoi(argv[i]); 4.196 - if(!thread_num) { 4.197 - fprintf(stderr, "invalid number of threads specified: %d\n", thread_num); 4.198 - return EXIT_FAILURE; 4.199 - } 4.200 - break; 4.201 - 4.202 - case 's': 4.203 - if(!isdigit(argv[++i][0]) || !(sep = strchr(argv[i], 'x')) || !isdigit(*(sep + 1))) { 4.204 - fputs("-s must be followed by something like \"640x480\"\n", stderr); 4.205 - return EXIT_FAILURE; 4.206 - } 4.207 - xres = atoi(argv[i]); 4.208 - yres = atoi(sep + 1); 4.209 - aspect = (double)xres / (double)yres; 4.210 - break; 4.211 - 4.212 - case 'i': 4.213 - if(!(infile = fopen(argv[++i], "rb"))) { 4.214 - fprintf(stderr, "failed to open input file %s: %s\n", argv[i], strerror(errno)); 4.215 - return EXIT_FAILURE; 4.216 - } 4.217 - break; 4.218 - 4.219 - case 'o': 4.220 - if(!(outfile = fopen(argv[++i], "wb"))) { 4.221 - fprintf(stderr, "failed to open output file %s: %s\n", argv[i], strerror(errno)); 4.222 - return EXIT_FAILURE; 4.223 - } 4.224 - break; 4.225 - 4.226 - case 'r': 4.227 - if(!isdigit(argv[++i][0])) { 4.228 - fputs("-r must be followed by a number (rays per pixel)\n", stderr); 4.229 - return EXIT_FAILURE; 4.230 - } 4.231 - rays_per_pixel = atoi(argv[i]); 4.232 - break; 4.233 - 4.234 - case 'h': 4.235 - fputs(usage, stdout); 4.236 - return 0; 4.237 - 4.238 - default: 4.239 - fprintf(stderr, "unrecognized argument: %s\n", argv[i]); 4.240 - fputs(usage, stderr); 4.241 - return EXIT_FAILURE; 4.242 - } 4.243 - } else { 4.244 - fprintf(stderr, "unrecognized argument: %s\n", argv[i]); 4.245 - fputs(usage, stderr); 4.246 - return EXIT_FAILURE; 4.247 - } 4.248 - } 4.249 - 4.250 - snprintf(__DataSet,255,"file: %s\nsize: %dx%d\nrays per pixel: %d\nthreads: %d\n", 4.251 - infile, xres, yres, rays_per_pixel, thread_num); 4.252 - 4.253 - 4.254 - if(!(pixels = malloc(xres * yres * sizeof *pixels))) { 4.255 - perror("pixel buffer allocation failed"); 4.256 - return EXIT_FAILURE; 4.257 - } 4.258 - load_scene(infile); 4.259 - 4.260 - //This is the transition to the VMS runtime 4.261 - VPThread__create_seed_procr_and_do_work(raytrace, (void*)pixels); 4.262 - 4.263 - /* output statistics to stderr */ 4.264 - fprintf(stderr, "Rendering took: %lu seconds (%lu milliseconds)\n", rend_time / 1000, rend_time); 4.265 - 4.266 - /* output the image */ 4.267 - fprintf(outfile, "P6\n%d %d\n255\n", xres, yres); 4.268 - for(i=0; i<xres * yres; i++) { 4.269 - fputc((pixels[i] >> RSHIFT) & 0xff, outfile); 4.270 - fputc((pixels[i] >> GSHIFT) & 0xff, outfile); 4.271 - fputc((pixels[i] >> BSHIFT) & 0xff, outfile); 4.272 - } 4.273 - fflush(outfile); 4.274 - 4.275 - if(infile != stdin) fclose(infile); 4.276 - if(outfile != stdout) fclose(outfile); 4.277 - 4.278 - struct sphere *walker = obj_list; 4.279 - while(walker) { 4.280 - struct sphere *tmp = walker; 4.281 - walker = walker->next; 4.282 - free(tmp); 4.283 - } 4.284 - free(pixels); 4.285 - return 0; 4.286 -} 4.287 - 4.288 -/* this is run after the VMS is set up*/ 4.289 -void raytrace(void *pixels, VirtProcr *VProc) 4.290 -{ 4.291 - int i; 4.292 - double sl, sl_per_thread; 4.293 - 4.294 - /* initialize the random number tables for the jitter */ 4.295 - for(i=0; i<NRAN; i++) urand[i].x = (double)rand() / RAND_MAX - 0.5; 4.296 - for(i=0; i<NRAN; i++) urand[i].y = (double)rand() / RAND_MAX - 0.5; 4.297 - for(i=0; i<NRAN; i++) irand[i] = (int)(NRAN * ((double)rand() / RAND_MAX)); 4.298 - 4.299 - if(thread_num > yres) { 4.300 - fprintf(stderr, "more threads than scanlines specified, reducing number of threads to %d\n", yres); 4.301 - thread_num = yres; 4.302 - } 4.303 - 4.304 - 4.305 - if(!(threads = VPThread__malloc(thread_num * sizeof(thread_data), VProc))) { 4.306 - perror("failed to allocate thread table"); 4.307 - exit(EXIT_FAILURE); 4.308 - } 4.309 - 4.310 - end_mutex = VPThread__make_mutex(VProc); 4.311 - end_cond = VPThread__make_cond(end_mutex, VProc); 4.312 - start_mutex = VPThread__make_mutex(VProc); 4.313 - start_cond = VPThread__make_cond(start_mutex, VProc); 4.314 - 4.315 - sl = 0.0; 4.316 - sl_per_thread = (double)yres / (double)thread_num; 4.317 - for(i=0; i<thread_num; i++) { 4.318 - threads[i].sl_start = (int)sl; 4.319 - sl += sl_per_thread; 4.320 - threads[i].sl_count = (int)sl - threads[i].sl_start; 4.321 - threads[i].pixels = (uint32_t*)pixels; 4.322 - 4.323 - threads[i].VP = 4.324 - VPThread__create_thread((VirtProcrFnPtr)thread_func, 4.325 - (void*)(&threads[i]), VProc); 4.326 - } 4.327 - 4.328 - threads[thread_num - 1].sl_count = yres - threads[thread_num - 1].sl_start; 4.329 - 4.330 - fprintf(stderr, VER_STR, VER_MAJOR, VER_MINOR); 4.331 - 4.332 - // start worker threads 4.333 - //printf("start of worker thread (%d)\n", VProc->procrID); 4.334 - VPThread__mutex_lock(start_mutex, VProc); 4.335 - start_time = get_msec(); 4.336 - start = 1; 4.337 - for(i=0; i<thread_num; i++) 4.338 - VPThread__cond_signal(start_cond, VProc); 4.339 - VPThread__mutex_unlock(start_mutex, VProc); 4.340 - 4.341 - //printf("wait for worker (%d)\n", VProc->procrID); 4.342 - VPThread__mutex_lock(end_mutex, VProc); 4.343 - while(end < thread_num) 4.344 - VPThread__cond_wait(end_cond, VProc); 4.345 - VPThread__mutex_unlock(end_mutex, VProc); 4.346 - 4.347 - rend_time = get_msec() - start_time; 4.348 - 4.349 - VPThread__free(threads,VProc); 4.350 - VPThread__dissipate_thread(VProc); 4.351 -} 4.352 - 4.353 -/* render a frame of xsz/ysz dimensions into the provided framebuffer */ 4.354 -void render_scanline(int xsz, int ysz, int sl, uint32_t *fb, int samples) { 4.355 - int i, s; 4.356 - double rcp_samples = 1.0 / (double)samples; 4.357 - 4.358 - for(i=0; i<xsz; i++) { 4.359 - double r, g, b; 4.360 - r = g = b = 0.0; 4.361 - 4.362 - for(s=0; s<samples; s++) { 4.363 - struct vec3 col = trace(get_primary_ray(i, sl, s), 0); 4.364 - r += col.x; 4.365 - g += col.y; 4.366 - b += col.z; 4.367 - } 4.368 - 4.369 - r = r * rcp_samples; 4.370 - g = g * rcp_samples; 4.371 - b = b * rcp_samples; 4.372 - 4.373 - fb[sl * xsz + i] = ((uint32_t)(MIN(r, 1.0) * 255.0) & 0xff) << RSHIFT | 4.374 - ((uint32_t)(MIN(g, 1.0) * 255.0) & 0xff) << GSHIFT | 4.375 - ((uint32_t)(MIN(b, 1.0) * 255.0) & 0xff) << BSHIFT; 4.376 - } 4.377 -} 4.378 - 4.379 -/* trace a ray throught the scene recursively (the recursion happens through 4.380 - * shade() to calculate reflection rays if necessary). 4.381 - */ 4.382 -struct vec3 trace(struct ray ray, int depth) { 4.383 - struct vec3 col; 4.384 - struct spoint sp, nearest_sp; 4.385 - struct sphere *nearest_obj = 0; 4.386 - struct sphere *iter = obj_list->next; 4.387 - 4.388 - /* if we reached the recursion limit, bail out */ 4.389 - if(depth >= MAX_RAY_DEPTH) { 4.390 - col.x = col.y = col.z = 0.0; 4.391 - return col; 4.392 - } 4.393 - 4.394 - /* find the nearest intersection ... */ 4.395 - while(iter) { 4.396 - if(ray_sphere(iter, ray, &sp)) { 4.397 - if(!nearest_obj || sp.dist < nearest_sp.dist) { 4.398 - nearest_obj = iter; 4.399 - nearest_sp = sp; 4.400 - } 4.401 - } 4.402 - iter = iter->next; 4.403 - } 4.404 - 4.405 - /* and perform shading calculations as needed by calling shade() */ 4.406 - if(nearest_obj) { 4.407 - col = shade(nearest_obj, &nearest_sp, depth); 4.408 - } else { 4.409 - col.x = col.y = col.z = 0.0; 4.410 - } 4.411 - 4.412 - return col; 4.413 -} 4.414 - 4.415 -/* Calculates direct illumination with the phong reflectance model. 4.416 - * Also handles reflections by calling trace again, if necessary. 4.417 - */ 4.418 -struct vec3 shade(struct sphere *obj, struct spoint *sp, int depth) { 4.419 - int i; 4.420 - struct vec3 col = {0, 0, 0}; 4.421 - 4.422 - /* for all lights ... */ 4.423 - for(i=0; i<lnum; i++) { 4.424 - double ispec, idiff; 4.425 - struct vec3 ldir; 4.426 - struct ray shadow_ray; 4.427 - struct sphere *iter = obj_list->next; 4.428 - int in_shadow = 0; 4.429 - 4.430 - ldir.x = lights[i].x - sp->pos.x; 4.431 - ldir.y = lights[i].y - sp->pos.y; 4.432 - ldir.z = lights[i].z - sp->pos.z; 4.433 - 4.434 - shadow_ray.orig = sp->pos; 4.435 - shadow_ray.dir = ldir; 4.436 - 4.437 - /* shoot shadow rays to determine if we have a line of sight with the light */ 4.438 - while(iter) { 4.439 - if(ray_sphere(iter, shadow_ray, 0)) { 4.440 - in_shadow = 1; 4.441 - break; 4.442 - } 4.443 - iter = iter->next; 4.444 - } 4.445 - 4.446 - /* and if we're not in shadow, calculate direct illumination with the phong model. */ 4.447 - if(!in_shadow) { 4.448 - NORMALIZE(ldir); 4.449 - 4.450 - idiff = MAX(DOT(sp->normal, ldir), 0.0); 4.451 - ispec = obj->mat.spow > 0.0 ? pow(MAX(DOT(sp->vref, ldir), 0.0), obj->mat.spow) : 0.0; 4.452 - 4.453 - col.x += idiff * obj->mat.col.x + ispec; 4.454 - col.y += idiff * obj->mat.col.y + ispec; 4.455 - col.z += idiff * obj->mat.col.z + ispec; 4.456 - } 4.457 - } 4.458 - 4.459 - /* Also, if the object is reflective, spawn a reflection ray, and call trace() 4.460 - * to calculate the light arriving from the mirror direction. 4.461 - */ 4.462 - if(obj->mat.refl > 0.0) { 4.463 - struct ray ray; 4.464 - struct vec3 rcol; 4.465 - 4.466 - ray.orig = sp->pos; 4.467 - ray.dir = sp->vref; 4.468 - ray.dir.x *= RAY_MAG; 4.469 - ray.dir.y *= RAY_MAG; 4.470 - ray.dir.z *= RAY_MAG; 4.471 - 4.472 - rcol = trace(ray, depth + 1); 4.473 - col.x += rcol.x * obj->mat.refl; 4.474 - col.y += rcol.y * obj->mat.refl; 4.475 - col.z += rcol.z * obj->mat.refl; 4.476 - } 4.477 - 4.478 - return col; 4.479 -} 4.480 - 4.481 -/* calculate reflection vector */ 4.482 -struct vec3 reflect(struct vec3 v, struct vec3 n) { 4.483 - struct vec3 res; 4.484 - double dot = v.x * n.x + v.y * n.y + v.z * n.z; 4.485 - res.x = -(2.0 * dot * n.x - v.x); 4.486 - res.y = -(2.0 * dot * n.y - v.y); 4.487 - res.z = -(2.0 * dot * n.z - v.z); 4.488 - return res; 4.489 -} 4.490 - 4.491 -struct vec3 cross_product(struct vec3 v1, struct vec3 v2) { 4.492 - struct vec3 res; 4.493 - res.x = v1.y * v2.z - v1.z * v2.y; 4.494 - res.y = v1.z * v2.x - v1.x * v2.z; 4.495 - res.z = v1.x * v2.y - v1.y * v2.x; 4.496 - return res; 4.497 -} 4.498 - 4.499 -/* determine the primary ray corresponding to the specified pixel (x, y) */ 4.500 -struct ray get_primary_ray(int x, int y, int sample) { 4.501 - struct ray ray; 4.502 - float m[3][3]; 4.503 - struct vec3 i, j = {0, 1, 0}, k, dir, orig, foo; 4.504 - 4.505 - k.x = cam.targ.x - cam.pos.x; 4.506 - k.y = cam.targ.y - cam.pos.y; 4.507 - k.z = cam.targ.z - cam.pos.z; 4.508 - NORMALIZE(k); 4.509 - 4.510 - i = cross_product(j, k); 4.511 - j = cross_product(k, i); 4.512 - m[0][0] = i.x; m[0][1] = j.x; m[0][2] = k.x; 4.513 - m[1][0] = i.y; m[1][1] = j.y; m[1][2] = k.y; 4.514 - m[2][0] = i.z; m[2][1] = j.z; m[2][2] = k.z; 4.515 - 4.516 - ray.orig.x = ray.orig.y = ray.orig.z = 0.0; 4.517 - ray.dir = get_sample_pos(x, y, sample); 4.518 - ray.dir.z = 1.0 / HALF_FOV; 4.519 - ray.dir.x *= RAY_MAG; 4.520 - ray.dir.y *= RAY_MAG; 4.521 - ray.dir.z *= RAY_MAG; 4.522 - 4.523 - dir.x = ray.dir.x + ray.orig.x; 4.524 - dir.y = ray.dir.y + ray.orig.y; 4.525 - dir.z = ray.dir.z + ray.orig.z; 4.526 - foo.x = dir.x * m[0][0] + dir.y * m[0][1] + dir.z * m[0][2]; 4.527 - foo.y = dir.x * m[1][0] + dir.y * m[1][1] + dir.z * m[1][2]; 4.528 - foo.z = dir.x * m[2][0] + dir.y * m[2][1] + dir.z * m[2][2]; 4.529 - 4.530 - orig.x = ray.orig.x * m[0][0] + ray.orig.y * m[0][1] + ray.orig.z * m[0][2] + cam.pos.x; 4.531 - orig.y = ray.orig.x * m[1][0] + ray.orig.y * m[1][1] + ray.orig.z * m[1][2] + cam.pos.y; 4.532 - orig.z = ray.orig.x * m[2][0] + ray.orig.y * m[2][1] + ray.orig.z * m[2][2] + cam.pos.z; 4.533 - 4.534 - ray.orig = orig; 4.535 - ray.dir.x = foo.x + orig.x; 4.536 - ray.dir.y = foo.y + orig.y; 4.537 - ray.dir.z = foo.z + orig.z; 4.538 - 4.539 - return ray; 4.540 -} 4.541 - 4.542 - 4.543 -struct vec3 get_sample_pos(int x, int y, int sample) { 4.544 - struct vec3 pt; 4.545 - static double sf = 0.0; 4.546 - 4.547 - if(sf == 0.0) { 4.548 - sf = 1.5 / (double)xres; 4.549 - } 4.550 - 4.551 - pt.x = ((double)x / (double)xres) - 0.5; 4.552 - pt.y = -(((double)y / (double)yres) - 0.65) / aspect; 4.553 - 4.554 - if(sample) { 4.555 - struct vec3 jt = jitter(x, y, sample); 4.556 - pt.x += jt.x * sf; 4.557 - pt.y += jt.y * sf / aspect; 4.558 - } 4.559 - return pt; 4.560 -} 4.561 - 4.562 -/* jitter function taken from Graphics Gems I. */ 4.563 -struct vec3 jitter(int x, int y, int s) { 4.564 - struct vec3 pt; 4.565 - pt.x = urand[(x + (y << 2) + irand[(x + s) & MASK]) & MASK].x; 4.566 - pt.y = urand[(y + (x << 2) + irand[(y + s) & MASK]) & MASK].y; 4.567 - return pt; 4.568 -} 4.569 - 4.570 -/* Calculate ray-sphere intersection, and return {1, 0} to signify hit or no hit. 4.571 - * Also the surface point parameters like position, normal, etc are returned through 4.572 - * the sp pointer if it is not NULL. 4.573 - */ 4.574 -int ray_sphere(const struct sphere *sph, struct ray ray, struct spoint *sp) { 4.575 - double a, b, c, d, sqrt_d, t1, t2; 4.576 - 4.577 - a = SQ(ray.dir.x) + SQ(ray.dir.y) + SQ(ray.dir.z); 4.578 - b = 2.0 * ray.dir.x * (ray.orig.x - sph->pos.x) + 4.579 - 2.0 * ray.dir.y * (ray.orig.y - sph->pos.y) + 4.580 - 2.0 * ray.dir.z * (ray.orig.z - sph->pos.z); 4.581 - c = SQ(sph->pos.x) + SQ(sph->pos.y) + SQ(sph->pos.z) + 4.582 - SQ(ray.orig.x) + SQ(ray.orig.y) + SQ(ray.orig.z) + 4.583 - 2.0 * (-sph->pos.x * ray.orig.x - sph->pos.y * ray.orig.y - sph->pos.z * ray.orig.z) - SQ(sph->rad); 4.584 - 4.585 - if((d = SQ(b) - 4.0 * a * c) < 0.0) return 0; 4.586 - 4.587 - sqrt_d = sqrt(d); 4.588 - t1 = (-b + sqrt_d) / (2.0 * a); 4.589 - t2 = (-b - sqrt_d) / (2.0 * a); 4.590 - 4.591 - if((t1 < ERR_MARGIN && t2 < ERR_MARGIN) || (t1 > 1.0 && t2 > 1.0)) return 0; 4.592 - 4.593 - if(sp) { 4.594 - if(t1 < ERR_MARGIN) t1 = t2; 4.595 - if(t2 < ERR_MARGIN) t2 = t1; 4.596 - sp->dist = t1 < t2 ? t1 : t2; 4.597 - 4.598 - sp->pos.x = ray.orig.x + ray.dir.x * sp->dist; 4.599 - sp->pos.y = ray.orig.y + ray.dir.y * sp->dist; 4.600 - sp->pos.z = ray.orig.z + ray.dir.z * sp->dist; 4.601 - 4.602 - sp->normal.x = (sp->pos.x - sph->pos.x) / sph->rad; 4.603 - sp->normal.y = (sp->pos.y - sph->pos.y) / sph->rad; 4.604 - sp->normal.z = (sp->pos.z - sph->pos.z) / sph->rad; 4.605 - 4.606 - sp->vref = reflect(ray.dir, sp->normal); 4.607 - NORMALIZE(sp->vref); 4.608 - } 4.609 - return 1; 4.610 -} 4.611 - 4.612 -/* Load the scene from an extremely simple scene description file */ 4.613 -#define DELIM " \t\n" 4.614 -void load_scene(FILE *fp) { 4.615 - char line[256], *ptr, type; 4.616 - 4.617 - obj_list = malloc(sizeof(struct sphere)); 4.618 - obj_list->next = 0; 4.619 - 4.620 - while((ptr = fgets(line, 256, fp))) { 4.621 - int i; 4.622 - struct vec3 pos, col; 4.623 - double rad, spow, refl; 4.624 - 4.625 - while(*ptr == ' ' || *ptr == '\t') ptr++; 4.626 - if(*ptr == '#' || *ptr == '\n') continue; 4.627 - 4.628 - if(!(ptr = strtok(line, DELIM))) continue; 4.629 - type = *ptr; 4.630 - 4.631 - for(i=0; i<3; i++) { 4.632 - if(!(ptr = strtok(0, DELIM))) break; 4.633 - *((double*)&pos.x + i) = atof(ptr); 4.634 - } 4.635 - 4.636 - if(type == 'l') { 4.637 - lights[lnum++] = pos; 4.638 - continue; 4.639 - } 4.640 - 4.641 - if(!(ptr = strtok(0, DELIM))) continue; 4.642 - rad = atof(ptr); 4.643 - 4.644 - for(i=0; i<3; i++) { 4.645 - if(!(ptr = strtok(0, DELIM))) break; 4.646 - *((double*)&col.x + i) = atof(ptr); 4.647 - } 4.648 - 4.649 - if(type == 'c') { 4.650 - cam.pos = pos; 4.651 - cam.targ = col; 4.652 - cam.fov = rad; 4.653 - continue; 4.654 - } 4.655 - 4.656 - if(!(ptr = strtok(0, DELIM))) continue; 4.657 - spow = atof(ptr); 4.658 - 4.659 - if(!(ptr = strtok(0, DELIM))) continue; 4.660 - refl = atof(ptr); 4.661 - 4.662 - if(type == 's') { 4.663 - struct sphere *sph = malloc(sizeof *sph); 4.664 - sph->next = obj_list->next; 4.665 - obj_list->next = sph; 4.666 - 4.667 - sph->pos = pos; 4.668 - sph->rad = rad; 4.669 - sph->mat.col = col; 4.670 - sph->mat.spow = spow; 4.671 - sph->mat.refl = refl; 4.672 - } else { 4.673 - fprintf(stderr, "unknown type: %c\n", type); 4.674 - } 4.675 - } 4.676 -} 4.677 - 4.678 - 4.679 -/* provide a millisecond-resolution timer for each system */ 4.680 -#if defined(unix) || defined(__unix__) 4.681 -#include <time.h> 4.682 -#include <sys/time.h> 4.683 -unsigned long get_msec(void) { 4.684 - static struct timeval timeval, first_timeval; 4.685 - 4.686 - gettimeofday(&timeval, 0); 4.687 - if(first_timeval.tv_sec == 0) { 4.688 - first_timeval = timeval; 4.689 - return 0; 4.690 - } 4.691 - return (timeval.tv_sec - first_timeval.tv_sec) * 1000 + (timeval.tv_usec - first_timeval.tv_usec) / 1000; 4.692 -} 4.693 -#elif defined(__WIN32__) || defined(WIN32) 4.694 -#include <windows.h> 4.695 -unsigned long get_msec(void) { 4.696 - return GetTickCount(); 4.697 -} 4.698 -#else 4.699 -#error "I don't know how to measure time on your platform" 4.700 -#endif 4.701 - 4.702 -void thread_func(void *tdata, VirtProcr *VProc) { 4.703 - int i; 4.704 - struct thread_data *td = (struct thread_data*)tdata; 4.705 - 4.706 - VPThread__mutex_lock(start_mutex, VProc); 4.707 - while(!start) 4.708 - VPThread__cond_wait(start_cond, VProc); 4.709 - VPThread__mutex_unlock(start_mutex, VProc); 4.710 - 4.711 - for(i=0; i<td->sl_count; i++) { 4.712 - render_scanline(xres, yres, i + td->sl_start, td->pixels, rays_per_pixel); 4.713 - } 4.714 - 4.715 - VPThread__mutex_lock(end_mutex, VProc); 4.716 - end++; 4.717 - VPThread__cond_signal(end_cond, VProc); 4.718 - VPThread__mutex_unlock(end_mutex, VProc); 4.719 - 4.720 - VPThread__dissipate_thread(VProc); 4.721 -}
5.1 --- a/scene Sat Oct 22 19:27:29 2011 -0700 5.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 5.3 @@ -1,18 +0,0 @@ 5.4 -# spheres 5.5 -# position radius color shininess reflectivity 5.6 -s -1.5 -0.3 -1 0.7 1.0 0.2 0.05 50.0 0.3 5.7 -s 1.5 -0.4 0 0.6 0.1 0.85 1.0 50.0 0.4 5.8 - 5.9 -# walls 5.10 -s 0 -1000 2 999 0.1 0.2 0.6 80.0 0.5 5.11 - 5.12 -# bouncing ball 5.13 -s 0 0 2 1 0.0 0.0 0.0 60.0 0.7 5.14 - 5.15 -# lights... 5.16 -l -50 100 -50 5.17 -l 40 40 150 5.18 - 5.19 -# camera (there can be only one!) 5.20 -# position FOV target 5.21 -c 0 6 -17 45 0 -1 0
6.1 --- a/sphfract Sat Oct 22 19:27:29 2011 -0700 6.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 6.3 @@ -1,192 +0,0 @@ 6.4 -s 0 0 0 1.0 0.25 0.25 0.25 50.0 0.65 6.5 -s 1.4 0 0 0.4 0.25 0.25 0.25 50.0 0.65 6.6 -s 1.96 0 0 0.16 0.25 0.25 0.25 50.0 0.65 6.7 -s 2.184 0 0 0.064 0.25 0.25 0.25 50.0 0.65 6.8 -s 1.96 0.224 0 0.064 0.25 0.25 0.25 50.0 0.65 6.9 -s 1.96 -0.224 0 0.064 0.25 0.25 0.25 50.0 0.65 6.10 -s 1.96 0 0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.11 -s 1.96 0 -0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.12 -s 1.4 0.56 0 0.16 0.25 0.25 0.25 50.0 0.65 6.13 -s 1.624 0.56 0 0.064 0.25 0.25 0.25 50.0 0.65 6.14 -s 1.176 0.56 0 0.064 0.25 0.25 0.25 50.0 0.65 6.15 -s 1.4 0.784 0 0.064 0.25 0.25 0.25 50.0 0.65 6.16 -s 1.4 0.56 0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.17 -s 1.4 0.56 -0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.18 -s 1.4 -0.56 0 0.16 0.25 0.25 0.25 50.0 0.65 6.19 -s 1.624 -0.56 0 0.064 0.25 0.25 0.25 50.0 0.65 6.20 -s 1.176 -0.56 0 0.064 0.25 0.25 0.25 50.0 0.65 6.21 -s 1.4 -0.784 0 0.064 0.25 0.25 0.25 50.0 0.65 6.22 -s 1.4 -0.56 0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.23 -s 1.4 -0.56 -0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.24 -s 1.4 0 0.56 0.16 0.25 0.25 0.25 50.0 0.65 6.25 -s 1.624 0 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.26 -s 1.176 0 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.27 -s 1.4 0.224 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.28 -s 1.4 -0.224 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.29 -s 1.4 0 0.784 0.064 0.25 0.25 0.25 50.0 0.65 6.30 -s 1.4 0 -0.56 0.16 0.25 0.25 0.25 50.0 0.65 6.31 -s 1.624 0 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.32 -s 1.176 0 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.33 -s 1.4 0.224 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.34 -s 1.4 -0.224 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.35 -s 1.4 0 -0.784 0.064 0.25 0.25 0.25 50.0 0.65 6.36 -s -1.4 0 0 0.4 0.25 0.25 0.25 50.0 0.65 6.37 -s -1.96 0 0 0.16 0.25 0.25 0.25 50.0 0.65 6.38 -s -2.184 0 0 0.064 0.25 0.25 0.25 50.0 0.65 6.39 -s -1.96 0.224 0 0.064 0.25 0.25 0.25 50.0 0.65 6.40 -s -1.96 -0.224 0 0.064 0.25 0.25 0.25 50.0 0.65 6.41 -s -1.96 0 0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.42 -s -1.96 0 -0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.43 -s -1.4 0.56 0 0.16 0.25 0.25 0.25 50.0 0.65 6.44 -s -1.176 0.56 0 0.064 0.25 0.25 0.25 50.0 0.65 6.45 -s -1.624 0.56 0 0.064 0.25 0.25 0.25 50.0 0.65 6.46 -s -1.4 0.784 0 0.064 0.25 0.25 0.25 50.0 0.65 6.47 -s -1.4 0.56 0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.48 -s -1.4 0.56 -0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.49 -s -1.4 -0.56 0 0.16 0.25 0.25 0.25 50.0 0.65 6.50 -s -1.176 -0.56 0 0.064 0.25 0.25 0.25 50.0 0.65 6.51 -s -1.624 -0.56 0 0.064 0.25 0.25 0.25 50.0 0.65 6.52 -s -1.4 -0.784 0 0.064 0.25 0.25 0.25 50.0 0.65 6.53 -s -1.4 -0.56 0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.54 -s -1.4 -0.56 -0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.55 -s -1.4 0 0.56 0.16 0.25 0.25 0.25 50.0 0.65 6.56 -s -1.176 0 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.57 -s -1.624 0 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.58 -s -1.4 0.224 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.59 -s -1.4 -0.224 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.60 -s -1.4 0 0.784 0.064 0.25 0.25 0.25 50.0 0.65 6.61 -s -1.4 0 -0.56 0.16 0.25 0.25 0.25 50.0 0.65 6.62 -s -1.176 0 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.63 -s -1.624 0 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.64 -s -1.4 0.224 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.65 -s -1.4 -0.224 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.66 -s -1.4 0 -0.784 0.064 0.25 0.25 0.25 50.0 0.65 6.67 -s 0 1.4 0 0.4 0.25 0.25 0.25 50.0 0.65 6.68 -s 0.56 1.4 0 0.16 0.25 0.25 0.25 50.0 0.65 6.69 -s 0.784 1.4 0 0.064 0.25 0.25 0.25 50.0 0.65 6.70 -s 0.56 1.624 0 0.064 0.25 0.25 0.25 50.0 0.65 6.71 -s 0.56 1.176 0 0.064 0.25 0.25 0.25 50.0 0.65 6.72 -s 0.56 1.4 0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.73 -s 0.56 1.4 -0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.74 -s -0.56 1.4 0 0.16 0.25 0.25 0.25 50.0 0.65 6.75 -s -0.784 1.4 0 0.064 0.25 0.25 0.25 50.0 0.65 6.76 -s -0.56 1.624 0 0.064 0.25 0.25 0.25 50.0 0.65 6.77 -s -0.56 1.176 0 0.064 0.25 0.25 0.25 50.0 0.65 6.78 -s -0.56 1.4 0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.79 -s -0.56 1.4 -0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.80 -s 0 1.96 0 0.16 0.25 0.25 0.25 50.0 0.65 6.81 -s 0.224 1.96 0 0.064 0.25 0.25 0.25 50.0 0.65 6.82 -s -0.224 1.96 0 0.064 0.25 0.25 0.25 50.0 0.65 6.83 -s 0 2.184 0 0.064 0.25 0.25 0.25 50.0 0.65 6.84 -s 0 1.96 0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.85 -s 0 1.96 -0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.86 -s 0 1.4 0.56 0.16 0.25 0.25 0.25 50.0 0.65 6.87 -s 0.224 1.4 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.88 -s -0.224 1.4 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.89 -s 0 1.624 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.90 -s 0 1.176 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.91 -s 0 1.4 0.784 0.064 0.25 0.25 0.25 50.0 0.65 6.92 -s 0 1.4 -0.56 0.16 0.25 0.25 0.25 50.0 0.65 6.93 -s 0.224 1.4 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.94 -s -0.224 1.4 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.95 -s 0 1.624 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.96 -s 0 1.176 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.97 -s 0 1.4 -0.784 0.064 0.25 0.25 0.25 50.0 0.65 6.98 -s 0 -1.4 0 0.4 0.25 0.25 0.25 50.0 0.65 6.99 -s 0.56 -1.4 0 0.16 0.25 0.25 0.25 50.0 0.65 6.100 -s 0.784 -1.4 0 0.064 0.25 0.25 0.25 50.0 0.65 6.101 -s 0.56 -1.176 0 0.064 0.25 0.25 0.25 50.0 0.65 6.102 -s 0.56 -1.624 0 0.064 0.25 0.25 0.25 50.0 0.65 6.103 -s 0.56 -1.4 0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.104 -s 0.56 -1.4 -0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.105 -s -0.56 -1.4 0 0.16 0.25 0.25 0.25 50.0 0.65 6.106 -s -0.784 -1.4 0 0.064 0.25 0.25 0.25 50.0 0.65 6.107 -s -0.56 -1.176 0 0.064 0.25 0.25 0.25 50.0 0.65 6.108 -s -0.56 -1.624 0 0.064 0.25 0.25 0.25 50.0 0.65 6.109 -s -0.56 -1.4 0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.110 -s -0.56 -1.4 -0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.111 -s 0 -1.96 0 0.16 0.25 0.25 0.25 50.0 0.65 6.112 -s 0.224 -1.96 0 0.064 0.25 0.25 0.25 50.0 0.65 6.113 -s -0.224 -1.96 0 0.064 0.25 0.25 0.25 50.0 0.65 6.114 -s 0 -2.184 0 0.064 0.25 0.25 0.25 50.0 0.65 6.115 -s 0 -1.96 0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.116 -s 0 -1.96 -0.224 0.064 0.25 0.25 0.25 50.0 0.65 6.117 -s 0 -1.4 0.56 0.16 0.25 0.25 0.25 50.0 0.65 6.118 -s 0.224 -1.4 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.119 -s -0.224 -1.4 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.120 -s 0 -1.176 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.121 -s 0 -1.624 0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.122 -s 0 -1.4 0.784 0.064 0.25 0.25 0.25 50.0 0.65 6.123 -s 0 -1.4 -0.56 0.16 0.25 0.25 0.25 50.0 0.65 6.124 -s 0.224 -1.4 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.125 -s -0.224 -1.4 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.126 -s 0 -1.176 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.127 -s 0 -1.624 -0.56 0.064 0.25 0.25 0.25 50.0 0.65 6.128 -s 0 -1.4 -0.784 0.064 0.25 0.25 0.25 50.0 0.65 6.129 -s 0 0 1.4 0.4 0.25 0.25 0.25 50.0 0.65 6.130 -s 0.56 0 1.4 0.16 0.25 0.25 0.25 50.0 0.65 6.131 -s 0.784 0 1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.132 -s 0.56 0.224 1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.133 -s 0.56 -0.224 1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.134 -s 0.56 0 1.624 0.064 0.25 0.25 0.25 50.0 0.65 6.135 -s 0.56 0 1.176 0.064 0.25 0.25 0.25 50.0 0.65 6.136 -s -0.56 0 1.4 0.16 0.25 0.25 0.25 50.0 0.65 6.137 -s -0.784 0 1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.138 -s -0.56 0.224 1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.139 -s -0.56 -0.224 1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.140 -s -0.56 0 1.624 0.064 0.25 0.25 0.25 50.0 0.65 6.141 -s -0.56 0 1.176 0.064 0.25 0.25 0.25 50.0 0.65 6.142 -s 0 0.56 1.4 0.16 0.25 0.25 0.25 50.0 0.65 6.143 -s 0.224 0.56 1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.144 -s -0.224 0.56 1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.145 -s 0 0.784 1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.146 -s 0 0.56 1.624 0.064 0.25 0.25 0.25 50.0 0.65 6.147 -s 0 0.56 1.176 0.064 0.25 0.25 0.25 50.0 0.65 6.148 -s 0 -0.56 1.4 0.16 0.25 0.25 0.25 50.0 0.65 6.149 -s 0.224 -0.56 1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.150 -s -0.224 -0.56 1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.151 -s 0 -0.784 1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.152 -s 0 -0.56 1.624 0.064 0.25 0.25 0.25 50.0 0.65 6.153 -s 0 -0.56 1.176 0.064 0.25 0.25 0.25 50.0 0.65 6.154 -s 0 0 1.96 0.16 0.25 0.25 0.25 50.0 0.65 6.155 -s 0.224 0 1.96 0.064 0.25 0.25 0.25 50.0 0.65 6.156 -s -0.224 0 1.96 0.064 0.25 0.25 0.25 50.0 0.65 6.157 -s 0 0.224 1.96 0.064 0.25 0.25 0.25 50.0 0.65 6.158 -s 0 -0.224 1.96 0.064 0.25 0.25 0.25 50.0 0.65 6.159 -s 0 0 2.184 0.064 0.25 0.25 0.25 50.0 0.65 6.160 -s 0 0 -1.4 0.4 0.25 0.25 0.25 50.0 0.65 6.161 -s 0.56 0 -1.4 0.16 0.25 0.25 0.25 50.0 0.65 6.162 -s 0.784 0 -1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.163 -s 0.56 0.224 -1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.164 -s 0.56 -0.224 -1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.165 -s 0.56 0 -1.176 0.064 0.25 0.25 0.25 50.0 0.65 6.166 -s 0.56 0 -1.624 0.064 0.25 0.25 0.25 50.0 0.65 6.167 -s -0.56 0 -1.4 0.16 0.25 0.25 0.25 50.0 0.65 6.168 -s -0.784 0 -1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.169 -s -0.56 0.224 -1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.170 -s -0.56 -0.224 -1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.171 -s -0.56 0 -1.176 0.064 0.25 0.25 0.25 50.0 0.65 6.172 -s -0.56 0 -1.624 0.064 0.25 0.25 0.25 50.0 0.65 6.173 -s 0 0.56 -1.4 0.16 0.25 0.25 0.25 50.0 0.65 6.174 -s 0.224 0.56 -1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.175 -s -0.224 0.56 -1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.176 -s 0 0.784 -1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.177 -s 0 0.56 -1.176 0.064 0.25 0.25 0.25 50.0 0.65 6.178 -s 0 0.56 -1.624 0.064 0.25 0.25 0.25 50.0 0.65 6.179 -s 0 -0.56 -1.4 0.16 0.25 0.25 0.25 50.0 0.65 6.180 -s 0.224 -0.56 -1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.181 -s -0.224 -0.56 -1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.182 -s 0 -0.784 -1.4 0.064 0.25 0.25 0.25 50.0 0.65 6.183 -s 0 -0.56 -1.176 0.064 0.25 0.25 0.25 50.0 0.65 6.184 -s 0 -0.56 -1.624 0.064 0.25 0.25 0.25 50.0 0.65 6.185 -s 0 0 -1.96 0.16 0.25 0.25 0.25 50.0 0.65 6.186 -s 0.224 0 -1.96 0.064 0.25 0.25 0.25 50.0 0.65 6.187 -s -0.224 0 -1.96 0.064 0.25 0.25 0.25 50.0 0.65 6.188 -s 0 0.224 -1.96 0.064 0.25 0.25 0.25 50.0 0.65 6.189 -s 0 -0.224 -1.96 0.064 0.25 0.25 0.25 50.0 0.65 6.190 -s 0 0 -2.184 0.064 0.25 0.25 0.25 50.0 0.65 6.191 -s 0 -10002.25 0 10000 0.2 0.35 0.5 80.0 0.4 6.192 -s 0 10100.00 0 10000 0.5 0.2 0.1 40.0 0.0 6.193 -l -50 68 -50 6.194 -l 40 40 150 6.195 -c -7 6 -12 45 0 -0.65 0
7.1 --- a/src/Application/main.c Sat Oct 22 19:27:29 2011 -0700 7.2 +++ b/src/Application/main.c Fri Oct 28 06:56:35 2011 -0700 7.3 @@ -102,14 +102,17 @@ 7.4 7.5 7.6 7.7 -union workload{ 7.8 +union timeStamp{ 7.9 uint32 highLow[2]; 7.10 uint64 total; 7.11 }; 7.12 7.13 struct input_t{ 7.14 struct barrier_t* barrier; 7.15 - uint64 workcycles; 7.16 + uint64 totalWorkCycles; 7.17 + uint64 workPlusMutexCycles; 7.18 + union timeStamp startTime; 7.19 + union timeStamp endTime; 7.20 }; 7.21 7.22 7.23 @@ -126,39 +129,43 @@ 7.24 * Workload 7.25 */ 7.26 void work(void* input, VirtProcr* animatingPr) 7.27 -{ 7.28 - int n,m; 7.29 - struct input_t* in = (struct input_t*)input; 7.30 - unsigned int totalCycles = 0; 7.31 - unsigned int workspace1; 7.32 - double workspace2; 7.33 - int32 privateMutex = VPThread__make_mutex(animatingPr); 7.34 + { 7.35 + int n,m; 7.36 + struct input_t* in = (struct input_t*)input; 7.37 + unsigned int totalWorkCycles = 0; 7.38 + unsigned int workspace1; 7.39 + double workspace2; 7.40 + int32 privateMutex = VPThread__make_mutex(animatingPr); 7.41 7.42 - for(m=0; m<repetitions; m++) 7.43 + saveTimeStampCountInto(in->startTime.highLow[0], in->startTime.highLow[1]); 7.44 + for(m=0; m<repetitions; m++) 7.45 { 7.46 - int32 stamp_startWorkload, stamp_endWorkload; 7.47 - saveLowTimeStampCountInto( stamp_startWorkload ); 7.48 - for(n=0; n<workload_size; n++) 7.49 - { 7.50 - workspace1 += (workspace1 + 32)/2; 7.51 - workspace2 += (workspace2 + 23.2)/1.4; 7.52 - } 7.53 - saveLowTimeStampCountInto( stamp_endWorkload ); 7.54 - int32 numCycles = stamp_endWorkload-stamp_startWorkload; 7.55 - if( numCycles < 100000000 ) totalCycles += numCycles; //sanity check 7.56 - 7.57 - VPThread__mutex_lock(privateMutex, animatingPr); 7.58 - //access queue 7.59 - VPThread__mutex_unlock(privateMutex, animatingPr); 7.60 + int32 stamp_startWorkload, stamp_endWorkload; 7.61 + saveLowTimeStampCountInto( stamp_startWorkload ); 7.62 + for(n=0; n<workload_size; n++) 7.63 + { 7.64 + workspace1 += (workspace1 + 32)/2; 7.65 + workspace2 += (workspace2 + 23.2)/1.4; 7.66 + } 7.67 + saveLowTimeStampCountInto( stamp_endWorkload ); 7.68 + int32 numCycles = stamp_endWorkload-stamp_startWorkload; 7.69 + if( numCycles < 100000000 ) totalWorkCycles += numCycles;//sanity check 7.70 + 7.71 + VPThread__mutex_lock(privateMutex, animatingPr); 7.72 + //access queue 7.73 + VPThread__mutex_unlock(privateMutex, animatingPr); 7.74 } 7.75 - 7.76 - in->workcycles = totalCycles; 7.77 - barrier_wait(in->barrier,animatingPr); 7.78 - //Shutdown worker 7.79 - VPThread__dissipate_thread(animatingPr); 7.80 - printf("%d", workspace1); 7.81 - printf("%f", workspace2); 7.82 -} 7.83 + 7.84 + saveTimeStampCountInto( in->endTime.highLow[0], in->endTime.highLow[1] ); 7.85 + in->totalWorkCycles = totalWorkCycles; 7.86 + in->workPlusMutexCycles = in->endTime.total - in->startTime.total; 7.87 + barrier_wait(in->barrier, animatingPr); 7.88 + 7.89 + //Shutdown worker 7.90 + VPThread__dissipate_thread(animatingPr); 7.91 +// printf("%d", workspace1); //Should never execute! VMS bug if does 7.92 +// printf("%f", workspace2); 7.93 + } 7.94 7.95 /* this is run after the VMS is set up*/ 7.96 void benchmark(void *in, VirtProcr *animatingPr) 7.97 @@ -172,19 +179,24 @@ 7.98 { input[i].barrier = &barr; 7.99 } 7.100 7.101 - union workload stamp_startThread, stamp_endThread; 7.102 - saveTimeStampCountInto(stamp_startThread.highLow[0], stamp_startThread.highLow[1]); 7.103 + union timeStamp startBenchTime, endBenchTime; 7.104 + uint64 lastThreadFinishTime = 0L; 7.105 + saveTimeStampCountInto(startBenchTime.highLow[0], startBenchTime.highLow[1]); 7.106 for(i=0; i<num_threads; i++) 7.107 { VPThread__create_thread((VirtProcrFnPtr)work, (void*)&input[i], animatingPr); 7.108 } 7.109 barrier_wait(&barr, animatingPr); 7.110 - saveTimeStampCountInto(stamp_endThread.highLow[0], stamp_endThread.highLow[1]); 7.111 + saveTimeStampCountInto(endBenchTime.highLow[0], endBenchTime.highLow[1]); 7.112 7.113 7.114 for(i=0; i<num_threads; i++) 7.115 - { printf("Workcycles: %d\n",input[i].workcycles); 7.116 + { printf("WorkCycles: %d\n",input[i].totalWorkCycles); 7.117 + printf("Work + Sync Cycles: %lu\n", input[i].workPlusMutexCycles); 7.118 + if(input[i].endTime.total > lastThreadFinishTime) 7.119 + lastThreadFinishTime = input[i].endTime.total; 7.120 } 7.121 - printf("Total cycles %lu\n", stamp_endThread.total-stamp_startThread.total); 7.122 + printf("Time inside Barrier: %lu\n", endBenchTime.total-startBenchTime.total); 7.123 + printf("Longest Span: %lu\n", lastThreadFinishTime-startBenchTime.total); 7.124 7.125 //====================================================== 7.126
