view main.c @ 3:d906272ff3a3

DataSet print
author Merten Sach <msach@mailbox.tu-berlin.de>
date Wed, 28 Sep 2011 15:04:24 +0200
parents 0ce47c784647
children
line source
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* File: pthreads_main.c (an OpenMP version) */
3 /* Description: This program shows an example on how to call a subroutine */
4 /* that implements a simple k-means clustering algorithm */
5 /* based on Euclid distance. */
6 /* Input file format: */
7 /* ascii file: each line contains 1 data object */
8 /* binary file: first 4-byte integer is the number of data */
9 /* objects and 2nd integer is the no. of features (or */
10 /* coordinates) of each object */
11 /* */
12 /* Author: Wei-keng Liao */
13 /* ECE Department Northwestern University */
14 /* email: wkliao@ece.northwestern.edu */
15 /* Copyright, 2005, Wei-keng Liao */
16 /* */
17 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h> /* strtok() */
22 #include <sys/types.h> /* open() */
23 #include <sys/stat.h>
24 #include <sys/time.h>
25 #include <fcntl.h>
26 #include <unistd.h> /* getopt() */
27 #include <time.h>
28 #include "kmeans.h"
30 #include "SSR_lib/SSR.h"
32 char __ProgrammName[] = "kmeans";
33 char __DataSet[255];
35 #define seconds(tm) gettimeofday(&tp,(struct timezone *)0);\
36 tm=tp.tv_sec+tp.tv_usec/1000000.0
38 struct timeval tp;
40 int numClusters, numCoords, numObjs, nthreads;
42 /*
43 * Function: usage
44 * ---------------
45 * Prints information on how to call the program.
46 */
47 static void usage(char *argv0) {
48 char *help =
49 "Usage: %s [switches] -i filename -n num_clusters [OPTIONS]\n"
50 " -i filename : file containing data to be clustered\n"
51 " -b : input file is in binary format (default no)\n"
52 " -n num_clusters: number of clusters (K must be > 1)\n"
53 " -p nproc : number of threads (default 1)\n"
54 " -o filename : write output to file\n";
55 fprintf(stderr, help, argv0);
56 exit(-1);
57 }
59 /*---< main() >-------------------------------------------------------------*/
60 int main(int argc, char **argv) {
61 int opt;
62 extern char *optarg;
63 extern int optind;
64 int j;
65 int isBinaryFile;
67 int *membership; /* [numObjs] */
68 char *filename, *outfile;
69 double **objects; /* [numObjs][numCoords] data objects */
70 double **clusters; /* [numClusters][numCoords] cluster center */
71 double threshold;
72 double timing, io_timing, clustering_timing;
74 /* some default values */
75 nthreads = 1; /* Amount of threads to use */
76 numClusters = 1; /* Amount of cluster centers */
77 threshold = 0.001; /* Percentage of objects that need to change membership for the clusting to continue */
78 isBinaryFile = 0; /* 0 if the input file is in ASCII format, 1 for binary format */
79 filename = NULL; /* Name of the input file */
80 outfile = NULL; /* Name of the output file */
82 /* Parse command line options */
83 while ( (opt=getopt(argc,argv,"o:p:i:n:t:bh"))!= EOF) {
84 switch (opt) {
85 case 'i': filename=optarg;
86 break;
87 case 'b': isBinaryFile = 1;
88 break;
89 case 'n': numClusters = atoi(optarg);
90 break;
91 case 'p': nthreads = atoi(optarg);
92 break;
93 case 'h': usage(argv[0]);
94 break;
95 case 'o': outfile=optarg;
96 break;
97 default: usage(argv[0]);
98 break;
99 }
100 }
102 snprintf(__DataSet,255,"File: %s\nClusters %d\nThreads: %d\n",
103 filename, numClusters, nthreads);
105 if (filename == NULL) usage(argv[0]);
107 seconds(io_timing);
109 /* Read input data points from given input file */
110 objects = file_read(isBinaryFile, filename, &numObjs, &numCoords);
111 assert(objects != NULL);
113 seconds(timing);
114 io_timing = timing - io_timing;
115 clustering_timing = timing;
117 membership = (int*) malloc(numObjs * sizeof(int));
118 assert(membership != NULL);
120 clusters = malloc(numClusters * sizeof(double*));
121 assert(clusters != NULL);
122 clusters[0] = malloc(numClusters * numCoords * sizeof(double));
123 assert(clusters[0] != NULL);
125 struct call_data data = { 0, objects, numCoords, numObjs,
126 numClusters, threshold, membership, clusters };
128 /* Launch the core computation algorithm */
129 SSR__create_seed_procr_and_do_work(kmeans, (void*)&data);
131 free(objects[0]);
132 free(objects);
134 seconds(timing);
135 clustering_timing = timing - clustering_timing;
137 /* Memory cleanup */
138 free(membership);
140 if(outfile != NULL) {
141 int l;
142 FILE* fp = fopen(outfile, "w");
143 for(j = 0; j < numClusters; j++) {
144 fprintf(fp, "Cluster %d: ", j);
145 for(l = 0; l < numCoords; l++)
146 fprintf(fp, "%f ", clusters[j][l]);
147 fprintf(fp, "\n");
148 }
149 fclose(fp);
150 }
152 free(clusters[0]);
153 free(clusters);
155 /* Print performance numbers on stdout */
156 double t1;
157 io_timing += seconds(t1) - timing;
159 printf("\n---- kMeans Clustering ----\n");
160 printf("Number of threads = %d\n", nthreads);
161 printf("Input file: %s\n", filename);
162 printf("numObjs = %d\n", numObjs);
163 printf("numCoords = %d\n", numCoords);
164 printf("numClusters = %d\n", numClusters);
165 printf("threshold = %.4f\n", threshold);
167 printf("I/O time = %10.4f sec\n", io_timing);
168 printf("Computation timing = %10.4f sec\n", clustering_timing);
170 return(0);
171 }