annotate main.c @ 0:0ce47c784647

Initial commit
author Merten Sach <msach@mailbox.tu-berlin.de>
date Tue, 27 Sep 2011 15:08:02 +0200
parents
children d906272ff3a3
rev   line source
msach@0 1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
msach@0 2 /* File: pthreads_main.c (an OpenMP version) */
msach@0 3 /* Description: This program shows an example on how to call a subroutine */
msach@0 4 /* that implements a simple k-means clustering algorithm */
msach@0 5 /* based on Euclid distance. */
msach@0 6 /* Input file format: */
msach@0 7 /* ascii file: each line contains 1 data object */
msach@0 8 /* binary file: first 4-byte integer is the number of data */
msach@0 9 /* objects and 2nd integer is the no. of features (or */
msach@0 10 /* coordinates) of each object */
msach@0 11 /* */
msach@0 12 /* Author: Wei-keng Liao */
msach@0 13 /* ECE Department Northwestern University */
msach@0 14 /* email: wkliao@ece.northwestern.edu */
msach@0 15 /* Copyright, 2005, Wei-keng Liao */
msach@0 16 /* */
msach@0 17 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
msach@0 18
msach@0 19 #include <stdio.h>
msach@0 20 #include <stdlib.h>
msach@0 21 #include <string.h> /* strtok() */
msach@0 22 #include <sys/types.h> /* open() */
msach@0 23 #include <sys/stat.h>
msach@0 24 #include <sys/time.h>
msach@0 25 #include <fcntl.h>
msach@0 26 #include <unistd.h> /* getopt() */
msach@0 27 #include <time.h>
msach@0 28 #include "kmeans.h"
msach@0 29
msach@0 30 #include "SSR_lib/SSR.h"
msach@0 31
msach@0 32 char __ProgrammName[] = "kmeans";
msach@0 33 char __DataSet[255];
msach@0 34
msach@0 35 #define seconds(tm) gettimeofday(&tp,(struct timezone *)0);\
msach@0 36 tm=tp.tv_sec+tp.tv_usec/1000000.0
msach@0 37
msach@0 38 struct timeval tp;
msach@0 39
msach@0 40 int numClusters, numCoords, numObjs, nthreads;
msach@0 41
msach@0 42 /*
msach@0 43 * Function: usage
msach@0 44 * ---------------
msach@0 45 * Prints information on how to call the program.
msach@0 46 */
msach@0 47 static void usage(char *argv0) {
msach@0 48 char *help =
msach@0 49 "Usage: %s [switches] -i filename -n num_clusters [OPTIONS]\n"
msach@0 50 " -i filename : file containing data to be clustered\n"
msach@0 51 " -b : input file is in binary format (default no)\n"
msach@0 52 " -n num_clusters: number of clusters (K must be > 1)\n"
msach@0 53 " -p nproc : number of threads (default 1)\n"
msach@0 54 " -o filename : write output to file\n";
msach@0 55 fprintf(stderr, help, argv0);
msach@0 56 exit(-1);
msach@0 57 }
msach@0 58
msach@0 59 /*---< main() >-------------------------------------------------------------*/
msach@0 60 int main(int argc, char **argv) {
msach@0 61 int opt;
msach@0 62 extern char *optarg;
msach@0 63 extern int optind;
msach@0 64 int j;
msach@0 65 int isBinaryFile;
msach@0 66
msach@0 67 int *membership; /* [numObjs] */
msach@0 68 char *filename, *outfile;
msach@0 69 double **objects; /* [numObjs][numCoords] data objects */
msach@0 70 double **clusters; /* [numClusters][numCoords] cluster center */
msach@0 71 double threshold;
msach@0 72 double timing, io_timing, clustering_timing;
msach@0 73
msach@0 74 /* some default values */
msach@0 75 nthreads = 1; /* Amount of threads to use */
msach@0 76 numClusters = 1; /* Amount of cluster centers */
msach@0 77 threshold = 0.001; /* Percentage of objects that need to change membership for the clusting to continue */
msach@0 78 isBinaryFile = 0; /* 0 if the input file is in ASCII format, 1 for binary format */
msach@0 79 filename = NULL; /* Name of the input file */
msach@0 80 outfile = NULL; /* Name of the output file */
msach@0 81
msach@0 82 /* Parse command line options */
msach@0 83 while ( (opt=getopt(argc,argv,"o:p:i:n:t:bh"))!= EOF) {
msach@0 84 switch (opt) {
msach@0 85 case 'i': filename=optarg;
msach@0 86 break;
msach@0 87 case 'b': isBinaryFile = 1;
msach@0 88 break;
msach@0 89 case 'n': numClusters = atoi(optarg);
msach@0 90 break;
msach@0 91 case 'p': nthreads = atoi(optarg);
msach@0 92 break;
msach@0 93 case 'h': usage(argv[0]);
msach@0 94 break;
msach@0 95 case 'o': outfile=optarg;
msach@0 96 break;
msach@0 97 default: usage(argv[0]);
msach@0 98 break;
msach@0 99 }
msach@0 100 }
msach@0 101
msach@0 102 if (filename == NULL) usage(argv[0]);
msach@0 103
msach@0 104 seconds(io_timing);
msach@0 105
msach@0 106 /* Read input data points from given input file */
msach@0 107 objects = file_read(isBinaryFile, filename, &numObjs, &numCoords);
msach@0 108 assert(objects != NULL);
msach@0 109
msach@0 110 seconds(timing);
msach@0 111 io_timing = timing - io_timing;
msach@0 112 clustering_timing = timing;
msach@0 113
msach@0 114 membership = (int*) malloc(numObjs * sizeof(int));
msach@0 115 assert(membership != NULL);
msach@0 116
msach@0 117 clusters = malloc(numClusters * sizeof(double*));
msach@0 118 assert(clusters != NULL);
msach@0 119 clusters[0] = malloc(numClusters * numCoords * sizeof(double));
msach@0 120 assert(clusters[0] != NULL);
msach@0 121
msach@0 122 struct call_data data = { 0, objects, numCoords, numObjs,
msach@0 123 numClusters, threshold, membership, clusters };
msach@0 124
msach@0 125 /* Launch the core computation algorithm */
msach@0 126 SSR__create_seed_procr_and_do_work(kmeans, (void*)&data);
msach@0 127
msach@0 128 free(objects[0]);
msach@0 129 free(objects);
msach@0 130
msach@0 131 seconds(timing);
msach@0 132 clustering_timing = timing - clustering_timing;
msach@0 133
msach@0 134 /* Memory cleanup */
msach@0 135 free(membership);
msach@0 136
msach@0 137 if(outfile != NULL) {
msach@0 138 int l;
msach@0 139 FILE* fp = fopen(outfile, "w");
msach@0 140 for(j = 0; j < numClusters; j++) {
msach@0 141 fprintf(fp, "Cluster %d: ", j);
msach@0 142 for(l = 0; l < numCoords; l++)
msach@0 143 fprintf(fp, "%f ", clusters[j][l]);
msach@0 144 fprintf(fp, "\n");
msach@0 145 }
msach@0 146 fclose(fp);
msach@0 147 }
msach@0 148
msach@0 149 free(clusters[0]);
msach@0 150 free(clusters);
msach@0 151
msach@0 152 /* Print performance numbers on stdout */
msach@0 153 double t1;
msach@0 154 io_timing += seconds(t1) - timing;
msach@0 155
msach@0 156 printf("\n---- kMeans Clustering ----\n");
msach@0 157 printf("Number of threads = %d\n", nthreads);
msach@0 158 printf("Input file: %s\n", filename);
msach@0 159 printf("numObjs = %d\n", numObjs);
msach@0 160 printf("numCoords = %d\n", numCoords);
msach@0 161 printf("numClusters = %d\n", numClusters);
msach@0 162 printf("threshold = %.4f\n", threshold);
msach@0 163
msach@0 164 printf("I/O time = %10.4f sec\n", io_timing);
msach@0 165 printf("Computation timing = %10.4f sec\n", clustering_timing);
msach@0 166
msach@0 167 return(0);
msach@0 168 }
msach@0 169