| rev |
line source |
|
msach@0
|
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
|
msach@0
|
2 /* File: pthreads_main.c (an OpenMP version) */
|
|
msach@0
|
3 /* Description: This program shows an example on how to call a subroutine */
|
|
msach@0
|
4 /* that implements a simple k-means clustering algorithm */
|
|
msach@0
|
5 /* based on Euclid distance. */
|
|
msach@0
|
6 /* Input file format: */
|
|
msach@0
|
7 /* ascii file: each line contains 1 data object */
|
|
msach@0
|
8 /* binary file: first 4-byte integer is the number of data */
|
|
msach@0
|
9 /* objects and 2nd integer is the no. of features (or */
|
|
msach@0
|
10 /* coordinates) of each object */
|
|
msach@0
|
11 /* */
|
|
msach@0
|
12 /* Author: Wei-keng Liao */
|
|
msach@0
|
13 /* ECE Department Northwestern University */
|
|
msach@0
|
14 /* email: wkliao@ece.northwestern.edu */
|
|
msach@0
|
15 /* Copyright, 2005, Wei-keng Liao */
|
|
msach@0
|
16 /* */
|
|
msach@0
|
17 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
|
msach@0
|
18
|
|
msach@0
|
19 #include <stdio.h>
|
|
msach@0
|
20 #include <stdlib.h>
|
|
msach@0
|
21 #include <string.h> /* strtok() */
|
|
msach@0
|
22 #include <sys/types.h> /* open() */
|
|
msach@0
|
23 #include <sys/stat.h>
|
|
msach@0
|
24 #include <sys/time.h>
|
|
msach@0
|
25 #include <fcntl.h>
|
|
msach@0
|
26 #include <unistd.h> /* getopt() */
|
|
msach@0
|
27 #include <time.h>
|
|
msach@0
|
28 #include "kmeans.h"
|
|
msach@0
|
29
|
|
msach@0
|
30 #include "SSR_lib/SSR.h"
|
|
msach@0
|
31
|
|
msach@0
|
32 char __ProgrammName[] = "kmeans";
|
|
msach@0
|
33 char __DataSet[255];
|
|
msach@0
|
34
|
|
msach@0
|
35 #define seconds(tm) gettimeofday(&tp,(struct timezone *)0);\
|
|
msach@0
|
36 tm=tp.tv_sec+tp.tv_usec/1000000.0
|
|
msach@0
|
37
|
|
msach@0
|
38 struct timeval tp;
|
|
msach@0
|
39
|
|
msach@0
|
40 int numClusters, numCoords, numObjs, nthreads;
|
|
msach@0
|
41
|
|
msach@0
|
42 /*
|
|
msach@0
|
43 * Function: usage
|
|
msach@0
|
44 * ---------------
|
|
msach@0
|
45 * Prints information on how to call the program.
|
|
msach@0
|
46 */
|
|
msach@0
|
47 static void usage(char *argv0) {
|
|
msach@0
|
48 char *help =
|
|
msach@0
|
49 "Usage: %s [switches] -i filename -n num_clusters [OPTIONS]\n"
|
|
msach@0
|
50 " -i filename : file containing data to be clustered\n"
|
|
msach@0
|
51 " -b : input file is in binary format (default no)\n"
|
|
msach@0
|
52 " -n num_clusters: number of clusters (K must be > 1)\n"
|
|
msach@0
|
53 " -p nproc : number of threads (default 1)\n"
|
|
msach@0
|
54 " -o filename : write output to file\n";
|
|
msach@0
|
55 fprintf(stderr, help, argv0);
|
|
msach@0
|
56 exit(-1);
|
|
msach@0
|
57 }
|
|
msach@0
|
58
|
|
msach@0
|
59 /*---< main() >-------------------------------------------------------------*/
|
|
msach@0
|
60 int main(int argc, char **argv) {
|
|
msach@0
|
61 int opt;
|
|
msach@0
|
62 extern char *optarg;
|
|
msach@0
|
63 extern int optind;
|
|
msach@0
|
64 int j;
|
|
msach@0
|
65 int isBinaryFile;
|
|
msach@0
|
66
|
|
msach@0
|
67 int *membership; /* [numObjs] */
|
|
msach@0
|
68 char *filename, *outfile;
|
|
msach@0
|
69 double **objects; /* [numObjs][numCoords] data objects */
|
|
msach@0
|
70 double **clusters; /* [numClusters][numCoords] cluster center */
|
|
msach@0
|
71 double threshold;
|
|
msach@0
|
72 double timing, io_timing, clustering_timing;
|
|
msach@0
|
73
|
|
msach@0
|
74 /* some default values */
|
|
msach@0
|
75 nthreads = 1; /* Amount of threads to use */
|
|
msach@0
|
76 numClusters = 1; /* Amount of cluster centers */
|
|
msach@0
|
77 threshold = 0.001; /* Percentage of objects that need to change membership for the clusting to continue */
|
|
msach@0
|
78 isBinaryFile = 0; /* 0 if the input file is in ASCII format, 1 for binary format */
|
|
msach@0
|
79 filename = NULL; /* Name of the input file */
|
|
msach@0
|
80 outfile = NULL; /* Name of the output file */
|
|
msach@0
|
81
|
|
msach@0
|
82 /* Parse command line options */
|
|
msach@0
|
83 while ( (opt=getopt(argc,argv,"o:p:i:n:t:bh"))!= EOF) {
|
|
msach@0
|
84 switch (opt) {
|
|
msach@0
|
85 case 'i': filename=optarg;
|
|
msach@0
|
86 break;
|
|
msach@0
|
87 case 'b': isBinaryFile = 1;
|
|
msach@0
|
88 break;
|
|
msach@0
|
89 case 'n': numClusters = atoi(optarg);
|
|
msach@0
|
90 break;
|
|
msach@0
|
91 case 'p': nthreads = atoi(optarg);
|
|
msach@0
|
92 break;
|
|
msach@0
|
93 case 'h': usage(argv[0]);
|
|
msach@0
|
94 break;
|
|
msach@0
|
95 case 'o': outfile=optarg;
|
|
msach@0
|
96 break;
|
|
msach@0
|
97 default: usage(argv[0]);
|
|
msach@0
|
98 break;
|
|
msach@0
|
99 }
|
|
msach@0
|
100 }
|
|
msach@0
|
101
|
|
msach@0
|
102 if (filename == NULL) usage(argv[0]);
|
|
msach@0
|
103
|
|
msach@0
|
104 seconds(io_timing);
|
|
msach@0
|
105
|
|
msach@0
|
106 /* Read input data points from given input file */
|
|
msach@0
|
107 objects = file_read(isBinaryFile, filename, &numObjs, &numCoords);
|
|
msach@0
|
108 assert(objects != NULL);
|
|
msach@0
|
109
|
|
msach@0
|
110 seconds(timing);
|
|
msach@0
|
111 io_timing = timing - io_timing;
|
|
msach@0
|
112 clustering_timing = timing;
|
|
msach@0
|
113
|
|
msach@0
|
114 membership = (int*) malloc(numObjs * sizeof(int));
|
|
msach@0
|
115 assert(membership != NULL);
|
|
msach@0
|
116
|
|
msach@0
|
117 clusters = malloc(numClusters * sizeof(double*));
|
|
msach@0
|
118 assert(clusters != NULL);
|
|
msach@0
|
119 clusters[0] = malloc(numClusters * numCoords * sizeof(double));
|
|
msach@0
|
120 assert(clusters[0] != NULL);
|
|
msach@0
|
121
|
|
msach@0
|
122 struct call_data data = { 0, objects, numCoords, numObjs,
|
|
msach@0
|
123 numClusters, threshold, membership, clusters };
|
|
msach@0
|
124
|
|
msach@0
|
125 /* Launch the core computation algorithm */
|
|
msach@0
|
126 SSR__create_seed_procr_and_do_work(kmeans, (void*)&data);
|
|
msach@0
|
127
|
|
msach@0
|
128 free(objects[0]);
|
|
msach@0
|
129 free(objects);
|
|
msach@0
|
130
|
|
msach@0
|
131 seconds(timing);
|
|
msach@0
|
132 clustering_timing = timing - clustering_timing;
|
|
msach@0
|
133
|
|
msach@0
|
134 /* Memory cleanup */
|
|
msach@0
|
135 free(membership);
|
|
msach@0
|
136
|
|
msach@0
|
137 if(outfile != NULL) {
|
|
msach@0
|
138 int l;
|
|
msach@0
|
139 FILE* fp = fopen(outfile, "w");
|
|
msach@0
|
140 for(j = 0; j < numClusters; j++) {
|
|
msach@0
|
141 fprintf(fp, "Cluster %d: ", j);
|
|
msach@0
|
142 for(l = 0; l < numCoords; l++)
|
|
msach@0
|
143 fprintf(fp, "%f ", clusters[j][l]);
|
|
msach@0
|
144 fprintf(fp, "\n");
|
|
msach@0
|
145 }
|
|
msach@0
|
146 fclose(fp);
|
|
msach@0
|
147 }
|
|
msach@0
|
148
|
|
msach@0
|
149 free(clusters[0]);
|
|
msach@0
|
150 free(clusters);
|
|
msach@0
|
151
|
|
msach@0
|
152 /* Print performance numbers on stdout */
|
|
msach@0
|
153 double t1;
|
|
msach@0
|
154 io_timing += seconds(t1) - timing;
|
|
msach@0
|
155
|
|
msach@0
|
156 printf("\n---- kMeans Clustering ----\n");
|
|
msach@0
|
157 printf("Number of threads = %d\n", nthreads);
|
|
msach@0
|
158 printf("Input file: %s\n", filename);
|
|
msach@0
|
159 printf("numObjs = %d\n", numObjs);
|
|
msach@0
|
160 printf("numCoords = %d\n", numCoords);
|
|
msach@0
|
161 printf("numClusters = %d\n", numClusters);
|
|
msach@0
|
162 printf("threshold = %.4f\n", threshold);
|
|
msach@0
|
163
|
|
msach@0
|
164 printf("I/O time = %10.4f sec\n", io_timing);
|
|
msach@0
|
165 printf("Computation timing = %10.4f sec\n", clustering_timing);
|
|
msach@0
|
166
|
|
msach@0
|
167 return(0);
|
|
msach@0
|
168 }
|
|
msach@0
|
169
|