diff c-ray-mt.c @ 0:4ae1d7ffb1ae

Initial pthreads version
author Merten Sach <msach@mailbox.tu-berlin.de>
date Wed, 03 Aug 2011 14:26:31 +0200
parents
children 3840d91821c4
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/c-ray-mt.c	Wed Aug 03 14:26:31 2011 +0200
     1.3 @@ -0,0 +1,692 @@
     1.4 +/* c-ray-mt - a simple multithreaded raytracing filter.
     1.5 + * Copyright (C) 2006 John Tsiombikas <nuclear@siggraph.org>
     1.6 + *
     1.7 + * You are free to use, modify and redistribute this program under the
     1.8 + * terms of the GNU General Public License v2 or (at your option) later.
     1.9 + * see "http://www.gnu.org/licenses/gpl.txt" for details.
    1.10 + * ---------------------------------------------------------------------
    1.11 + * Usage:
    1.12 + *   compile:  just type make
    1.13 + *              (add any arch-specific optimizations for your compiler in CFLAGS first)
    1.14 + *       run:  cat scene | ./c-ray-mt [-t num-threads] >foo.ppm
    1.15 + *              (on broken systems such as windows try: c-ray-mt -i scene -o foo.ppm)
    1.16 + *     enjoy:  display foo.ppm
    1.17 + *              (with imagemagick, or use your favorite image viewer)
    1.18 + * ---------------------------------------------------------------------
    1.19 + * Scene file format:
    1.20 + *   # sphere (many)
    1.21 + *   s  x y z  rad   r g b   shininess   reflectivity
    1.22 + *   # light (many)
    1.23 + *   l  x y z
    1.24 + *   # camera (one)
    1.25 + *   c  x y z  fov   tx ty tz
    1.26 + * ---------------------------------------------------------------------
    1.27 + */
    1.28 +#include <stdio.h>
    1.29 +#include <stdlib.h>
    1.30 +#include <string.h>
    1.31 +#include <math.h>
    1.32 +#include <ctype.h>
    1.33 +#include <errno.h>
    1.34 +#include <pthread.h>
    1.35 +#include "VPThread_lib/VPThread.h"
    1.36 +
    1.37 +#define VER_MAJOR	1
    1.38 +#define VER_MINOR	1
    1.39 +#define VER_STR		"c-ray-mt v%d.%d\n"
    1.40 +
    1.41 +#if !defined(unix) && !defined(__unix__)
    1.42 +#ifdef __MACH__
    1.43 +#define unix		1
    1.44 +#define __unix__	1
    1.45 +#endif	/* __MACH__ */
    1.46 +#endif	/* unix */
    1.47 +
    1.48 +/* find the appropriate way to define explicitly sized types */
    1.49 +/* for C99 or GNU libc (also mach's libc) we can use stdint.h */
    1.50 +#if (__STDC_VERSION__ >= 199900) || defined(__GLIBC__) || defined(__MACH__)
    1.51 +#include <stdint.h>
    1.52 +#elif defined(unix) || defined(__unix__)	/* some UNIX systems have them in sys/types.h */
    1.53 +#include <sys/types.h>
    1.54 +#elif defined(__WIN32__) || defined(WIN32)	/* the nameless one */
    1.55 +typedef unsigned __int8 uint8_t;
    1.56 +typedef unsigned __int32 uint32_t;
    1.57 +#endif	/* sized type detection */
    1.58 +
    1.59 +struct vec3 {
    1.60 +	double x, y, z;
    1.61 +};
    1.62 +
    1.63 +struct ray {
    1.64 +	struct vec3 orig, dir;
    1.65 +};
    1.66 +
    1.67 +struct material {
    1.68 +	struct vec3 col;	/* color */
    1.69 +	double spow;		/* specular power */
    1.70 +	double refl;		/* reflection intensity */
    1.71 +};
    1.72 +
    1.73 +struct sphere {
    1.74 +	struct vec3 pos;
    1.75 +	double rad;
    1.76 +	struct material mat;
    1.77 +	struct sphere *next;
    1.78 +};
    1.79 +
    1.80 +struct spoint {
    1.81 +	struct vec3 pos, normal, vref;	/* position, normal and view reflection */
    1.82 +	double dist;		/* parametric distance of intersection along the ray */
    1.83 +};
    1.84 +
    1.85 +struct camera {
    1.86 +	struct vec3 pos, targ;
    1.87 +	double fov;
    1.88 +};
    1.89 +
    1.90 +struct thread_data {
    1.91 +	pthread_t tid;
    1.92 +	int sl_start, sl_count;
    1.93 +
    1.94 +	uint32_t *pixels;
    1.95 +};
    1.96 +
    1.97 +void render_scanline(int xsz, int ysz, int sl, uint32_t *fb, int samples);
    1.98 +struct vec3 trace(struct ray ray, int depth);
    1.99 +struct vec3 shade(struct sphere *obj, struct spoint *sp, int depth);
   1.100 +struct vec3 reflect(struct vec3 v, struct vec3 n);
   1.101 +struct vec3 cross_product(struct vec3 v1, struct vec3 v2);
   1.102 +struct ray get_primary_ray(int x, int y, int sample);
   1.103 +struct vec3 get_sample_pos(int x, int y, int sample);
   1.104 +struct vec3 jitter(int x, int y, int s);
   1.105 +int ray_sphere(const struct sphere *sph, struct ray ray, struct spoint *sp);
   1.106 +void load_scene(FILE *fp);
   1.107 +unsigned long get_msec(void);
   1.108 +
   1.109 +void *thread_func(void *tdata);
   1.110 +
   1.111 +#define MAX_LIGHTS		16				/* maximum number of lights */
   1.112 +#define RAY_MAG			1000.0			/* trace rays of this magnitude */
   1.113 +#define MAX_RAY_DEPTH	5				/* raytrace recursion limit */
   1.114 +#define FOV				0.78539816		/* field of view in rads (pi/4) */
   1.115 +#define HALF_FOV		(FOV * 0.5)
   1.116 +#define ERR_MARGIN		1e-6			/* an arbitrary error margin to avoid surface acne */
   1.117 +
   1.118 +/* bit-shift ammount for packing each color into a 32bit uint */
   1.119 +#ifdef LITTLE_ENDIAN
   1.120 +#define RSHIFT	16
   1.121 +#define BSHIFT	0
   1.122 +#else	/* big endian */
   1.123 +#define RSHIFT	0
   1.124 +#define BSHIFT	16
   1.125 +#endif	/* endianess */
   1.126 +#define GSHIFT	8	/* this is the same in both byte orders */
   1.127 +
   1.128 +/* some helpful macros... */
   1.129 +#define SQ(x)		((x) * (x))
   1.130 +#define MAX(a, b)	((a) > (b) ? (a) : (b))
   1.131 +#define MIN(a, b)	((a) < (b) ? (a) : (b))
   1.132 +#define DOT(a, b)	((a).x * (b).x + (a).y * (b).y + (a).z * (b).z)
   1.133 +#define NORMALIZE(a)  do {\
   1.134 +	double len = sqrt(DOT(a, a));\
   1.135 +	(a).x /= len; (a).y /= len; (a).z /= len;\
   1.136 +} while(0);
   1.137 +
   1.138 +/* global state */
   1.139 +int xres = 800;
   1.140 +int yres = 600;
   1.141 +int rays_per_pixel = 1;
   1.142 +double aspect = 1.333333;
   1.143 +struct sphere *obj_list;
   1.144 +struct vec3 lights[MAX_LIGHTS];
   1.145 +int lnum = 0;
   1.146 +struct camera cam;
   1.147 +
   1.148 +int thread_num = 1;
   1.149 +struct thread_data *threads;
   1.150 +
   1.151 +int start = 0;
   1.152 +pthread_mutex_t start_mutex = PTHREAD_MUTEX_INITIALIZER;
   1.153 +pthread_cond_t start_cond = PTHREAD_COND_INITIALIZER;
   1.154 +
   1.155 +#define NRAN	1024
   1.156 +#define MASK	(NRAN - 1)
   1.157 +struct vec3 urand[NRAN];
   1.158 +int irand[NRAN];
   1.159 +
   1.160 +unsigned long rend_time, start_time;
   1.161 +
   1.162 +const char *usage = {
   1.163 +	"Usage: c-ray-mt [options]\n"
   1.164 +	"  Reads a scene file from stdin, writes the image to stdout, and stats to stderr.\n\n"
   1.165 +	"Options:\n"
   1.166 +	"  -t <num>   how many threads to use (default: 1)\n"
   1.167 +	"  -s WxH     where W is the width and H the height of the image\n"
   1.168 +	"  -r <rays>  shoot <rays> rays per pixel (antialiasing)\n"
   1.169 +	"  -i <file>  read from <file> instead of stdin\n"
   1.170 +	"  -o <file>  write to <file> instead of stdout\n"
   1.171 +	"  -h         this help screen\n\n"
   1.172 +};
   1.173 +
   1.174 +void raytrace(uint32_t *pixels);
   1.175 +
   1.176 +int main(int argc, char **argv) {
   1.177 +	int i;
   1.178 +	uint32_t *pixels;
   1.179 +        double sl, sl_per_thread;
   1.180 +	FILE *infile = stdin, *outfile = stdout;
   1.181 +
   1.182 +	for(i=1; i<argc; i++) {
   1.183 +		if(argv[i][0] == '-' && argv[i][2] == 0) {
   1.184 +			char *sep;
   1.185 +			switch(argv[i][1]) {
   1.186 +			case 't':
   1.187 +				if(!isdigit(argv[++i][0])) {
   1.188 +					fprintf(stderr, "-t mus be followed by the number of worker threads to spawn\n");
   1.189 +					return EXIT_FAILURE;
   1.190 +				}
   1.191 +				thread_num = atoi(argv[i]);
   1.192 +				if(!thread_num) {
   1.193 +					fprintf(stderr, "invalid number of threads specified: %d\n", thread_num);
   1.194 +					return EXIT_FAILURE;
   1.195 +				}
   1.196 +				break;
   1.197 +					
   1.198 +			case 's':
   1.199 +				if(!isdigit(argv[++i][0]) || !(sep = strchr(argv[i], 'x')) || !isdigit(*(sep + 1))) {
   1.200 +					fputs("-s must be followed by something like \"640x480\"\n", stderr);
   1.201 +					return EXIT_FAILURE;
   1.202 +				}
   1.203 +				xres = atoi(argv[i]);
   1.204 +				yres = atoi(sep + 1);
   1.205 +				aspect = (double)xres / (double)yres;
   1.206 +				break;
   1.207 +
   1.208 +			case 'i':
   1.209 +				if(!(infile = fopen(argv[++i], "rb"))) {
   1.210 +					fprintf(stderr, "failed to open input file %s: %s\n", argv[i], strerror(errno));
   1.211 +					return EXIT_FAILURE;
   1.212 +				}
   1.213 +				break;
   1.214 +
   1.215 +			case 'o':
   1.216 +				if(!(outfile = fopen(argv[++i], "wb"))) {
   1.217 +					fprintf(stderr, "failed to open output file %s: %s\n", argv[i], strerror(errno));
   1.218 +					return EXIT_FAILURE;
   1.219 +				}
   1.220 +				break;
   1.221 +
   1.222 +			case 'r':
   1.223 +				if(!isdigit(argv[++i][0])) {
   1.224 +					fputs("-r must be followed by a number (rays per pixel)\n", stderr);
   1.225 +					return EXIT_FAILURE;
   1.226 +				}
   1.227 +				rays_per_pixel = atoi(argv[i]);
   1.228 +				break;
   1.229 +
   1.230 +			case 'h':
   1.231 +				fputs(usage, stdout);
   1.232 +				return 0;
   1.233 +				
   1.234 +			default:
   1.235 +				fprintf(stderr, "unrecognized argument: %s\n", argv[i]);
   1.236 +				fputs(usage, stderr);
   1.237 +				return EXIT_FAILURE;
   1.238 +			}
   1.239 +		} else {
   1.240 +			fprintf(stderr, "unrecognized argument: %s\n", argv[i]);
   1.241 +			fputs(usage, stderr);
   1.242 +			return EXIT_FAILURE;
   1.243 +		}
   1.244 +	}
   1.245 +
   1.246 +        
   1.247 +        if(!(pixels = malloc(xres * yres * sizeof *pixels))) {
   1.248 +		perror("pixel buffer allocation failed");
   1.249 +		return EXIT_FAILURE;
   1.250 +	}
   1.251 +	load_scene(infile);
   1.252 +        
   1.253 +        raytrace(pixels);
   1.254 +	
   1.255 +	/* output statistics to stderr */
   1.256 +	fprintf(stderr, "Rendering took: %lu seconds (%lu milliseconds)\n", rend_time / 1000, rend_time);
   1.257 +
   1.258 +	/* output the image */
   1.259 +	fprintf(outfile, "P6\n%d %d\n255\n", xres, yres);
   1.260 +	for(i=0; i<xres * yres; i++) {
   1.261 +		fputc((pixels[i] >> RSHIFT) & 0xff, outfile);
   1.262 +		fputc((pixels[i] >> GSHIFT) & 0xff, outfile);
   1.263 +		fputc((pixels[i] >> BSHIFT) & 0xff, outfile);
   1.264 +	}
   1.265 +	fflush(outfile);
   1.266 +
   1.267 +	if(infile != stdin) fclose(infile);
   1.268 +	if(outfile != stdout) fclose(outfile);
   1.269 +
   1.270 +	struct sphere *walker = obj_list;
   1.271 +	while(walker) {
   1.272 +		struct sphere *tmp = walker;
   1.273 +		walker = walker->next;
   1.274 +		free(tmp);
   1.275 +	}
   1.276 +	free(pixels);
   1.277 +	free(threads);
   1.278 +	return 0;
   1.279 +}
   1.280 +
   1.281 +/* this is run after the VMS is set up*/
   1.282 +void raytrace(uint32_t *pixels)
   1.283 +{
   1.284 +    int i;
   1.285 +    double sl, sl_per_thread;
   1.286 +    
   1.287 +    /* initialize the random number tables for the jitter */
   1.288 +    for(i=0; i<NRAN; i++) urand[i].x = (double)rand() / RAND_MAX - 0.5;
   1.289 +    for(i=0; i<NRAN; i++) urand[i].y = (double)rand() / RAND_MAX - 0.5;
   1.290 +    for(i=0; i<NRAN; i++) irand[i] = (int)(NRAN * ((double)rand() / RAND_MAX));
   1.291 +
   1.292 +    if(thread_num > yres) {
   1.293 +            fprintf(stderr, "more threads than scanlines specified, reducing number of threads to %d\n", yres);
   1.294 +            thread_num = yres;
   1.295 +    }
   1.296 +
   1.297 +    if(!(threads = malloc(thread_num * sizeof *threads))) {
   1.298 +            perror("failed to allocate thread table");
   1.299 +            exit(EXIT_FAILURE);
   1.300 +    }
   1.301 +
   1.302 +    sl = 0.0;
   1.303 +    sl_per_thread = (double)yres / (double)thread_num;
   1.304 +    for(i=0; i<thread_num; i++) {
   1.305 +            threads[i].sl_start = (int)sl;
   1.306 +            sl += sl_per_thread;
   1.307 +            threads[i].sl_count = (int)sl - threads[i].sl_start;
   1.308 +            threads[i].pixels = pixels;
   1.309 +
   1.310 +            if(pthread_create(&threads[i].tid, 0, thread_func, &threads[i]) != 0) {
   1.311 +                    perror("failed to spawn thread");
   1.312 +                    exit(EXIT_FAILURE);
   1.313 +            }
   1.314 +    }
   1.315 +    threads[thread_num - 1].sl_count = yres - threads[thread_num - 1].sl_start;
   1.316 +
   1.317 +    fprintf(stderr, VER_STR, VER_MAJOR, VER_MINOR);
   1.318 +
   1.319 +    pthread_mutex_lock(&start_mutex);
   1.320 +    start_time = get_msec();
   1.321 +    start = 1;
   1.322 +    pthread_cond_broadcast(&start_cond);
   1.323 +    pthread_mutex_unlock(&start_mutex);
   1.324 +
   1.325 +    for(i=0; i<thread_num; i++) {
   1.326 +            pthread_join(threads[i].tid, 0);
   1.327 +    }
   1.328 +    rend_time = get_msec() - start_time;
   1.329 +}
   1.330 +
   1.331 +/* render a frame of xsz/ysz dimensions into the provided framebuffer */
   1.332 +void render_scanline(int xsz, int ysz, int sl, uint32_t *fb, int samples) {
   1.333 +	int i, s;
   1.334 +	double rcp_samples = 1.0 / (double)samples;
   1.335 +
   1.336 +	for(i=0; i<xsz; i++) {
   1.337 +		double r, g, b;
   1.338 +		r = g = b = 0.0;
   1.339 +			
   1.340 +		for(s=0; s<samples; s++) {
   1.341 +			struct vec3 col = trace(get_primary_ray(i, sl, s), 0);
   1.342 +			r += col.x;
   1.343 +			g += col.y;
   1.344 +			b += col.z;
   1.345 +		}
   1.346 +
   1.347 +		r = r * rcp_samples;
   1.348 +		g = g * rcp_samples;
   1.349 +		b = b * rcp_samples;
   1.350 +			
   1.351 +		fb[sl * xsz + i] = ((uint32_t)(MIN(r, 1.0) * 255.0) & 0xff) << RSHIFT |
   1.352 +							((uint32_t)(MIN(g, 1.0) * 255.0) & 0xff) << GSHIFT |
   1.353 +							((uint32_t)(MIN(b, 1.0) * 255.0) & 0xff) << BSHIFT;
   1.354 +	}
   1.355 +}
   1.356 +
   1.357 +/* trace a ray throught the scene recursively (the recursion happens through
   1.358 + * shade() to calculate reflection rays if necessary).
   1.359 + */
   1.360 +struct vec3 trace(struct ray ray, int depth) {
   1.361 +	struct vec3 col;
   1.362 +	struct spoint sp, nearest_sp;
   1.363 +	struct sphere *nearest_obj = 0;
   1.364 +	struct sphere *iter = obj_list->next;
   1.365 +
   1.366 +	/* if we reached the recursion limit, bail out */
   1.367 +	if(depth >= MAX_RAY_DEPTH) {
   1.368 +		col.x = col.y = col.z = 0.0;
   1.369 +		return col;
   1.370 +	}
   1.371 +	
   1.372 +	/* find the nearest intersection ... */
   1.373 +	while(iter) {
   1.374 +		if(ray_sphere(iter, ray, &sp)) {
   1.375 +			if(!nearest_obj || sp.dist < nearest_sp.dist) {
   1.376 +				nearest_obj = iter;
   1.377 +				nearest_sp = sp;
   1.378 +			}
   1.379 +		}
   1.380 +		iter = iter->next;
   1.381 +	}
   1.382 +
   1.383 +	/* and perform shading calculations as needed by calling shade() */
   1.384 +	if(nearest_obj) {
   1.385 +		col = shade(nearest_obj, &nearest_sp, depth);
   1.386 +	} else {
   1.387 +		col.x = col.y = col.z = 0.0;
   1.388 +	}
   1.389 +
   1.390 +	return col;
   1.391 +}
   1.392 +
   1.393 +/* Calculates direct illumination with the phong reflectance model.
   1.394 + * Also handles reflections by calling trace again, if necessary.
   1.395 + */
   1.396 +struct vec3 shade(struct sphere *obj, struct spoint *sp, int depth) {
   1.397 +	int i;
   1.398 +	struct vec3 col = {0, 0, 0};
   1.399 +
   1.400 +	/* for all lights ... */
   1.401 +	for(i=0; i<lnum; i++) {
   1.402 +		double ispec, idiff;
   1.403 +		struct vec3 ldir;
   1.404 +		struct ray shadow_ray;
   1.405 +		struct sphere *iter = obj_list->next;
   1.406 +		int in_shadow = 0;
   1.407 +
   1.408 +		ldir.x = lights[i].x - sp->pos.x;
   1.409 +		ldir.y = lights[i].y - sp->pos.y;
   1.410 +		ldir.z = lights[i].z - sp->pos.z;
   1.411 +
   1.412 +		shadow_ray.orig = sp->pos;
   1.413 +		shadow_ray.dir = ldir;
   1.414 +
   1.415 +		/* shoot shadow rays to determine if we have a line of sight with the light */
   1.416 +		while(iter) {
   1.417 +			if(ray_sphere(iter, shadow_ray, 0)) {
   1.418 +				in_shadow = 1;
   1.419 +				break;
   1.420 +			}
   1.421 +			iter = iter->next;
   1.422 +		}
   1.423 +
   1.424 +		/* and if we're not in shadow, calculate direct illumination with the phong model. */
   1.425 +		if(!in_shadow) {
   1.426 +			NORMALIZE(ldir);
   1.427 +
   1.428 +			idiff = MAX(DOT(sp->normal, ldir), 0.0);
   1.429 +			ispec = obj->mat.spow > 0.0 ? pow(MAX(DOT(sp->vref, ldir), 0.0), obj->mat.spow) : 0.0;
   1.430 +
   1.431 +			col.x += idiff * obj->mat.col.x + ispec;
   1.432 +			col.y += idiff * obj->mat.col.y + ispec;
   1.433 +			col.z += idiff * obj->mat.col.z + ispec;
   1.434 +		}
   1.435 +	}
   1.436 +
   1.437 +	/* Also, if the object is reflective, spawn a reflection ray, and call trace()
   1.438 +	 * to calculate the light arriving from the mirror direction.
   1.439 +	 */
   1.440 +	if(obj->mat.refl > 0.0) {
   1.441 +		struct ray ray;
   1.442 +		struct vec3 rcol;
   1.443 +
   1.444 +		ray.orig = sp->pos;
   1.445 +		ray.dir = sp->vref;
   1.446 +		ray.dir.x *= RAY_MAG;
   1.447 +		ray.dir.y *= RAY_MAG;
   1.448 +		ray.dir.z *= RAY_MAG;
   1.449 +
   1.450 +		rcol = trace(ray, depth + 1);
   1.451 +		col.x += rcol.x * obj->mat.refl;
   1.452 +		col.y += rcol.y * obj->mat.refl;
   1.453 +		col.z += rcol.z * obj->mat.refl;
   1.454 +	}
   1.455 +
   1.456 +	return col;
   1.457 +}
   1.458 +
   1.459 +/* calculate reflection vector */
   1.460 +struct vec3 reflect(struct vec3 v, struct vec3 n) {
   1.461 +	struct vec3 res;
   1.462 +	double dot = v.x * n.x + v.y * n.y + v.z * n.z;
   1.463 +	res.x = -(2.0 * dot * n.x - v.x);
   1.464 +	res.y = -(2.0 * dot * n.y - v.y);
   1.465 +	res.z = -(2.0 * dot * n.z - v.z);
   1.466 +	return res;
   1.467 +}
   1.468 +
   1.469 +struct vec3 cross_product(struct vec3 v1, struct vec3 v2) {
   1.470 +	struct vec3 res;
   1.471 +	res.x = v1.y * v2.z - v1.z * v2.y;
   1.472 +	res.y = v1.z * v2.x - v1.x * v2.z;
   1.473 +	res.z = v1.x * v2.y - v1.y * v2.x;
   1.474 +	return res;
   1.475 +}
   1.476 +
   1.477 +/* determine the primary ray corresponding to the specified pixel (x, y) */
   1.478 +struct ray get_primary_ray(int x, int y, int sample) {
   1.479 +	struct ray ray;
   1.480 +	float m[3][3];
   1.481 +	struct vec3 i, j = {0, 1, 0}, k, dir, orig, foo;
   1.482 +
   1.483 +	k.x = cam.targ.x - cam.pos.x;
   1.484 +	k.y = cam.targ.y - cam.pos.y;
   1.485 +	k.z = cam.targ.z - cam.pos.z;
   1.486 +	NORMALIZE(k);
   1.487 +
   1.488 +	i = cross_product(j, k);
   1.489 +	j = cross_product(k, i);
   1.490 +	m[0][0] = i.x; m[0][1] = j.x; m[0][2] = k.x;
   1.491 +	m[1][0] = i.y; m[1][1] = j.y; m[1][2] = k.y;
   1.492 +	m[2][0] = i.z; m[2][1] = j.z; m[2][2] = k.z;
   1.493 +	
   1.494 +	ray.orig.x = ray.orig.y = ray.orig.z = 0.0;
   1.495 +	ray.dir = get_sample_pos(x, y, sample);
   1.496 +	ray.dir.z = 1.0 / HALF_FOV;
   1.497 +	ray.dir.x *= RAY_MAG;
   1.498 +	ray.dir.y *= RAY_MAG;
   1.499 +	ray.dir.z *= RAY_MAG;
   1.500 +	
   1.501 +	dir.x = ray.dir.x + ray.orig.x;
   1.502 +	dir.y = ray.dir.y + ray.orig.y;
   1.503 +	dir.z = ray.dir.z + ray.orig.z;
   1.504 +	foo.x = dir.x * m[0][0] + dir.y * m[0][1] + dir.z * m[0][2];
   1.505 +	foo.y = dir.x * m[1][0] + dir.y * m[1][1] + dir.z * m[1][2];
   1.506 +	foo.z = dir.x * m[2][0] + dir.y * m[2][1] + dir.z * m[2][2];
   1.507 +
   1.508 +	orig.x = ray.orig.x * m[0][0] + ray.orig.y * m[0][1] + ray.orig.z * m[0][2] + cam.pos.x;
   1.509 +	orig.y = ray.orig.x * m[1][0] + ray.orig.y * m[1][1] + ray.orig.z * m[1][2] + cam.pos.y;
   1.510 +	orig.z = ray.orig.x * m[2][0] + ray.orig.y * m[2][1] + ray.orig.z * m[2][2] + cam.pos.z;
   1.511 +
   1.512 +	ray.orig = orig;
   1.513 +	ray.dir.x = foo.x + orig.x;
   1.514 +	ray.dir.y = foo.y + orig.y;
   1.515 +	ray.dir.z = foo.z + orig.z;
   1.516 +	
   1.517 +	return ray;
   1.518 +}
   1.519 +
   1.520 +
   1.521 +struct vec3 get_sample_pos(int x, int y, int sample) {
   1.522 +	struct vec3 pt;
   1.523 +	static double sf = 0.0;
   1.524 +
   1.525 +	if(sf == 0.0) {
   1.526 +		sf = 1.5 / (double)xres;
   1.527 +	}
   1.528 +
   1.529 +	pt.x = ((double)x / (double)xres) - 0.5;
   1.530 +	pt.y = -(((double)y / (double)yres) - 0.65) / aspect;
   1.531 +
   1.532 +	if(sample) {
   1.533 +		struct vec3 jt = jitter(x, y, sample);
   1.534 +		pt.x += jt.x * sf;
   1.535 +		pt.y += jt.y * sf / aspect;
   1.536 +	}
   1.537 +	return pt;
   1.538 +}
   1.539 +
   1.540 +/* jitter function taken from Graphics Gems I. */
   1.541 +struct vec3 jitter(int x, int y, int s) {
   1.542 +	struct vec3 pt;
   1.543 +	pt.x = urand[(x + (y << 2) + irand[(x + s) & MASK]) & MASK].x;
   1.544 +	pt.y = urand[(y + (x << 2) + irand[(y + s) & MASK]) & MASK].y;
   1.545 +	return pt;
   1.546 +}
   1.547 +
   1.548 +/* Calculate ray-sphere intersection, and return {1, 0} to signify hit or no hit.
   1.549 + * Also the surface point parameters like position, normal, etc are returned through
   1.550 + * the sp pointer if it is not NULL.
   1.551 + */
   1.552 +int ray_sphere(const struct sphere *sph, struct ray ray, struct spoint *sp) {
   1.553 +	double a, b, c, d, sqrt_d, t1, t2;
   1.554 +	
   1.555 +	a = SQ(ray.dir.x) + SQ(ray.dir.y) + SQ(ray.dir.z);
   1.556 +	b = 2.0 * ray.dir.x * (ray.orig.x - sph->pos.x) +
   1.557 +				2.0 * ray.dir.y * (ray.orig.y - sph->pos.y) +
   1.558 +				2.0 * ray.dir.z * (ray.orig.z - sph->pos.z);
   1.559 +	c = SQ(sph->pos.x) + SQ(sph->pos.y) + SQ(sph->pos.z) +
   1.560 +				SQ(ray.orig.x) + SQ(ray.orig.y) + SQ(ray.orig.z) +
   1.561 +				2.0 * (-sph->pos.x * ray.orig.x - sph->pos.y * ray.orig.y - sph->pos.z * ray.orig.z) - SQ(sph->rad);
   1.562 +	
   1.563 +	if((d = SQ(b) - 4.0 * a * c) < 0.0) return 0;
   1.564 +
   1.565 +	sqrt_d = sqrt(d);
   1.566 +	t1 = (-b + sqrt_d) / (2.0 * a);
   1.567 +	t2 = (-b - sqrt_d) / (2.0 * a);
   1.568 +
   1.569 +	if((t1 < ERR_MARGIN && t2 < ERR_MARGIN) || (t1 > 1.0 && t2 > 1.0)) return 0;
   1.570 +
   1.571 +	if(sp) {
   1.572 +		if(t1 < ERR_MARGIN) t1 = t2;
   1.573 +		if(t2 < ERR_MARGIN) t2 = t1;
   1.574 +		sp->dist = t1 < t2 ? t1 : t2;
   1.575 +		
   1.576 +		sp->pos.x = ray.orig.x + ray.dir.x * sp->dist;
   1.577 +		sp->pos.y = ray.orig.y + ray.dir.y * sp->dist;
   1.578 +		sp->pos.z = ray.orig.z + ray.dir.z * sp->dist;
   1.579 +		
   1.580 +		sp->normal.x = (sp->pos.x - sph->pos.x) / sph->rad;
   1.581 +		sp->normal.y = (sp->pos.y - sph->pos.y) / sph->rad;
   1.582 +		sp->normal.z = (sp->pos.z - sph->pos.z) / sph->rad;
   1.583 +
   1.584 +		sp->vref = reflect(ray.dir, sp->normal);
   1.585 +		NORMALIZE(sp->vref);
   1.586 +	}
   1.587 +	return 1;
   1.588 +}
   1.589 +
   1.590 +/* Load the scene from an extremely simple scene description file */
   1.591 +#define DELIM	" \t\n"
   1.592 +void load_scene(FILE *fp) {
   1.593 +	char line[256], *ptr, type;
   1.594 +
   1.595 +	obj_list = malloc(sizeof(struct sphere));
   1.596 +	obj_list->next = 0;
   1.597 +	
   1.598 +	while((ptr = fgets(line, 256, fp))) {
   1.599 +		int i;
   1.600 +		struct vec3 pos, col;
   1.601 +		double rad, spow, refl;
   1.602 +		
   1.603 +		while(*ptr == ' ' || *ptr == '\t') ptr++;
   1.604 +		if(*ptr == '#' || *ptr == '\n') continue;
   1.605 +
   1.606 +		if(!(ptr = strtok(line, DELIM))) continue;
   1.607 +		type = *ptr;
   1.608 +		
   1.609 +		for(i=0; i<3; i++) {
   1.610 +			if(!(ptr = strtok(0, DELIM))) break;
   1.611 +			*((double*)&pos.x + i) = atof(ptr);
   1.612 +		}
   1.613 +
   1.614 +		if(type == 'l') {
   1.615 +			lights[lnum++] = pos;
   1.616 +			continue;
   1.617 +		}
   1.618 +
   1.619 +		if(!(ptr = strtok(0, DELIM))) continue;
   1.620 +		rad = atof(ptr);
   1.621 +
   1.622 +		for(i=0; i<3; i++) {
   1.623 +			if(!(ptr = strtok(0, DELIM))) break;
   1.624 +			*((double*)&col.x + i) = atof(ptr);
   1.625 +		}
   1.626 +
   1.627 +		if(type == 'c') {
   1.628 +			cam.pos = pos;
   1.629 +			cam.targ = col;
   1.630 +			cam.fov = rad;
   1.631 +			continue;
   1.632 +		}
   1.633 +
   1.634 +		if(!(ptr = strtok(0, DELIM))) continue;
   1.635 +		spow = atof(ptr);
   1.636 +
   1.637 +		if(!(ptr = strtok(0, DELIM))) continue;
   1.638 +		refl = atof(ptr);
   1.639 +
   1.640 +		if(type == 's') {
   1.641 +			struct sphere *sph = malloc(sizeof *sph);
   1.642 +			sph->next = obj_list->next;
   1.643 +			obj_list->next = sph;
   1.644 +
   1.645 +			sph->pos = pos;
   1.646 +			sph->rad = rad;
   1.647 +			sph->mat.col = col;
   1.648 +			sph->mat.spow = spow;
   1.649 +			sph->mat.refl = refl;
   1.650 +		} else {
   1.651 +			fprintf(stderr, "unknown type: %c\n", type);
   1.652 +		}
   1.653 +	}
   1.654 +}
   1.655 +
   1.656 +
   1.657 +/* provide a millisecond-resolution timer for each system */
   1.658 +#if defined(unix) || defined(__unix__)
   1.659 +#include <time.h>
   1.660 +#include <sys/time.h>
   1.661 +unsigned long get_msec(void) {
   1.662 +	static struct timeval timeval, first_timeval;
   1.663 +	
   1.664 +	gettimeofday(&timeval, 0);
   1.665 +	if(first_timeval.tv_sec == 0) {
   1.666 +		first_timeval = timeval;
   1.667 +		return 0;
   1.668 +	}
   1.669 +	return (timeval.tv_sec - first_timeval.tv_sec) * 1000 + (timeval.tv_usec - first_timeval.tv_usec) / 1000;
   1.670 +}
   1.671 +#elif defined(__WIN32__) || defined(WIN32)
   1.672 +#include <windows.h>
   1.673 +unsigned long get_msec(void) {
   1.674 +	return GetTickCount();
   1.675 +}
   1.676 +#else
   1.677 +#error "I don't know how to measure time on your platform"
   1.678 +#endif
   1.679 +
   1.680 +void *thread_func(void *tdata) {
   1.681 +	int i;
   1.682 +	struct thread_data *td = (struct thread_data*)tdata;
   1.683 +
   1.684 +	pthread_mutex_lock(&start_mutex);
   1.685 +	while(!start) {
   1.686 +		pthread_cond_wait(&start_cond, &start_mutex);
   1.687 +	}
   1.688 +	pthread_mutex_unlock(&start_mutex);
   1.689 +
   1.690 +	for(i=0; i<td->sl_count; i++) {
   1.691 +		render_scanline(xres, yres, i + td->sl_start, td->pixels, rays_per_pixel);
   1.692 +	}
   1.693 +
   1.694 +	return 0;
   1.695 +}