msach@0: /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ msach@0: /* File: pthreads_main.c (an OpenMP version) */ msach@0: /* Description: This program shows an example on how to call a subroutine */ msach@0: /* that implements a simple k-means clustering algorithm */ msach@0: /* based on Euclid distance. */ msach@0: /* Input file format: */ msach@0: /* ascii file: each line contains 1 data object */ msach@0: /* binary file: first 4-byte integer is the number of data */ msach@0: /* objects and 2nd integer is the no. of features (or */ msach@0: /* coordinates) of each object */ msach@0: /* */ msach@0: /* Author: Wei-keng Liao */ msach@0: /* ECE Department Northwestern University */ msach@0: /* email: wkliao@ece.northwestern.edu */ msach@0: /* Copyright, 2005, Wei-keng Liao */ msach@0: /* */ msach@0: /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ msach@0: msach@0: #include msach@0: #include msach@0: #include /* strtok() */ msach@0: #include /* open() */ msach@0: #include msach@0: #include msach@0: #include msach@0: #include /* getopt() */ msach@0: #include msach@0: #include "kmeans.h" msach@0: msach@0: #include "SSR_lib/SSR.h" msach@0: msach@0: char __ProgrammName[] = "kmeans"; msach@0: char __DataSet[255]; msach@0: msach@0: #define seconds(tm) gettimeofday(&tp,(struct timezone *)0);\ msach@0: tm=tp.tv_sec+tp.tv_usec/1000000.0 msach@0: msach@0: struct timeval tp; msach@0: msach@0: int numClusters, numCoords, numObjs, nthreads; msach@0: msach@0: /* msach@0: * Function: usage msach@0: * --------------- msach@0: * Prints information on how to call the program. msach@0: */ msach@0: static void usage(char *argv0) { msach@0: char *help = msach@0: "Usage: %s [switches] -i filename -n num_clusters [OPTIONS]\n" msach@0: " -i filename : file containing data to be clustered\n" msach@0: " -b : input file is in binary format (default no)\n" msach@0: " -n num_clusters: number of clusters (K must be > 1)\n" msach@0: " -p nproc : number of threads (default 1)\n" msach@0: " -o filename : write output to file\n"; msach@0: fprintf(stderr, help, argv0); msach@0: exit(-1); msach@0: } msach@0: msach@0: /*---< main() >-------------------------------------------------------------*/ msach@0: int main(int argc, char **argv) { msach@0: int opt; msach@0: extern char *optarg; msach@0: extern int optind; msach@0: int j; msach@0: int isBinaryFile; msach@0: msach@0: int *membership; /* [numObjs] */ msach@0: char *filename, *outfile; msach@0: double **objects; /* [numObjs][numCoords] data objects */ msach@0: double **clusters; /* [numClusters][numCoords] cluster center */ msach@0: double threshold; msach@0: double timing, io_timing, clustering_timing; msach@0: msach@0: /* some default values */ msach@0: nthreads = 1; /* Amount of threads to use */ msach@0: numClusters = 1; /* Amount of cluster centers */ msach@0: threshold = 0.001; /* Percentage of objects that need to change membership for the clusting to continue */ msach@0: isBinaryFile = 0; /* 0 if the input file is in ASCII format, 1 for binary format */ msach@0: filename = NULL; /* Name of the input file */ msach@0: outfile = NULL; /* Name of the output file */ msach@0: msach@0: /* Parse command line options */ msach@0: while ( (opt=getopt(argc,argv,"o:p:i:n:t:bh"))!= EOF) { msach@0: switch (opt) { msach@0: case 'i': filename=optarg; msach@0: break; msach@0: case 'b': isBinaryFile = 1; msach@0: break; msach@0: case 'n': numClusters = atoi(optarg); msach@0: break; msach@0: case 'p': nthreads = atoi(optarg); msach@0: break; msach@0: case 'h': usage(argv[0]); msach@0: break; msach@0: case 'o': outfile=optarg; msach@0: break; msach@0: default: usage(argv[0]); msach@0: break; msach@0: } msach@0: } msach@0: msach@0: if (filename == NULL) usage(argv[0]); msach@0: msach@0: seconds(io_timing); msach@0: msach@0: /* Read input data points from given input file */ msach@0: objects = file_read(isBinaryFile, filename, &numObjs, &numCoords); msach@0: assert(objects != NULL); msach@0: msach@0: seconds(timing); msach@0: io_timing = timing - io_timing; msach@0: clustering_timing = timing; msach@0: msach@0: membership = (int*) malloc(numObjs * sizeof(int)); msach@0: assert(membership != NULL); msach@0: msach@0: clusters = malloc(numClusters * sizeof(double*)); msach@0: assert(clusters != NULL); msach@0: clusters[0] = malloc(numClusters * numCoords * sizeof(double)); msach@0: assert(clusters[0] != NULL); msach@0: msach@0: struct call_data data = { 0, objects, numCoords, numObjs, msach@0: numClusters, threshold, membership, clusters }; msach@0: msach@0: /* Launch the core computation algorithm */ msach@0: SSR__create_seed_procr_and_do_work(kmeans, (void*)&data); msach@0: msach@0: free(objects[0]); msach@0: free(objects); msach@0: msach@0: seconds(timing); msach@0: clustering_timing = timing - clustering_timing; msach@0: msach@0: /* Memory cleanup */ msach@0: free(membership); msach@0: msach@0: if(outfile != NULL) { msach@0: int l; msach@0: FILE* fp = fopen(outfile, "w"); msach@0: for(j = 0; j < numClusters; j++) { msach@0: fprintf(fp, "Cluster %d: ", j); msach@0: for(l = 0; l < numCoords; l++) msach@0: fprintf(fp, "%f ", clusters[j][l]); msach@0: fprintf(fp, "\n"); msach@0: } msach@0: fclose(fp); msach@0: } msach@0: msach@0: free(clusters[0]); msach@0: free(clusters); msach@0: msach@0: /* Print performance numbers on stdout */ msach@0: double t1; msach@0: io_timing += seconds(t1) - timing; msach@0: msach@0: printf("\n---- kMeans Clustering ----\n"); msach@0: printf("Number of threads = %d\n", nthreads); msach@0: printf("Input file: %s\n", filename); msach@0: printf("numObjs = %d\n", numObjs); msach@0: printf("numCoords = %d\n", numCoords); msach@0: printf("numClusters = %d\n", numClusters); msach@0: printf("threshold = %.4f\n", threshold); msach@0: msach@0: printf("I/O time = %10.4f sec\n", io_timing); msach@0: printf("Computation timing = %10.4f sec\n", clustering_timing); msach@0: msach@0: return(0); msach@0: } msach@0: