diff main.c @ 0:0ce47c784647

Initial commit
author Merten Sach <msach@mailbox.tu-berlin.de>
date Tue, 27 Sep 2011 15:08:02 +0200
parents
children d906272ff3a3
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/main.c	Tue Sep 27 15:08:02 2011 +0200
     1.3 @@ -0,0 +1,169 @@
     1.4 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
     1.5 +/*   File:         pthreads_main.c   (an OpenMP version)                          */
     1.6 +/*   Description:  This program shows an example on how to call a subroutine */
     1.7 +/*                 that implements a simple k-means clustering algorithm     */
     1.8 +/*                 based on Euclid distance.                                 */
     1.9 +/*   Input file format:                                                      */
    1.10 +/*                 ascii  file: each line contains 1 data object             */
    1.11 +/*                 binary file: first 4-byte integer is the number of data   */
    1.12 +/*                 objects and 2nd integer is the no. of features (or        */
    1.13 +/*                 coordinates) of each object                               */
    1.14 +/*                                                                           */
    1.15 +/*   Author:  Wei-keng Liao                                                  */
    1.16 +/*            ECE Department Northwestern University                         */
    1.17 +/*            email: wkliao@ece.northwestern.edu                             */
    1.18 +/*   Copyright, 2005, Wei-keng Liao                                          */
    1.19 +/*                                                                           */
    1.20 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    1.21 +
    1.22 +#include <stdio.h>
    1.23 +#include <stdlib.h>
    1.24 +#include <string.h>     /* strtok() */
    1.25 +#include <sys/types.h>  /* open() */
    1.26 +#include <sys/stat.h>
    1.27 +#include <sys/time.h>
    1.28 +#include <fcntl.h>
    1.29 +#include <unistd.h>     /* getopt() */
    1.30 +#include <time.h>
    1.31 +#include "kmeans.h"
    1.32 +
    1.33 +#include "SSR_lib/SSR.h"
    1.34 +
    1.35 +char __ProgrammName[] = "kmeans";
    1.36 +char __DataSet[255];
    1.37 +
    1.38 +#define seconds(tm) gettimeofday(&tp,(struct timezone *)0);\
    1.39 +   tm=tp.tv_sec+tp.tv_usec/1000000.0
    1.40 +
    1.41 +struct timeval tp;
    1.42 +
    1.43 +int numClusters, numCoords, numObjs, nthreads;
    1.44 +
    1.45 +/*
    1.46 +*	Function: usage
    1.47 +*	---------------
    1.48 +*	Prints information on how to call the program.
    1.49 +*/
    1.50 +static void usage(char *argv0) {
    1.51 +    char *help =
    1.52 +        "Usage: %s [switches] -i filename -n num_clusters [OPTIONS]\n"
    1.53 +        "       -i filename    : file containing data to be clustered\n"
    1.54 +        "       -b             : input file is in binary format (default no)\n"
    1.55 +        "       -n num_clusters: number of clusters (K must be > 1)\n"
    1.56 +        "       -p nproc       : number of threads (default 1)\n"
    1.57 +        "       -o filename    : write output to file\n";
    1.58 +    fprintf(stderr, help, argv0);
    1.59 +    exit(-1);
    1.60 +}
    1.61 +
    1.62 +/*---< main() >-------------------------------------------------------------*/
    1.63 +int main(int argc, char **argv) {
    1.64 +    int     opt;
    1.65 +    extern char   *optarg;
    1.66 +    extern int     optind;
    1.67 +    int     j;
    1.68 +    int     isBinaryFile;
    1.69 +    
    1.70 +    int    *membership;    /* [numObjs] */
    1.71 +    char   *filename, *outfile;
    1.72 +    double **objects;       /* [numObjs][numCoords] data objects */
    1.73 +    double **clusters;      /* [numClusters][numCoords] cluster center */
    1.74 +    double   threshold;
    1.75 +    double  timing, io_timing, clustering_timing;
    1.76 +    
    1.77 +    /* some default values */
    1.78 +    nthreads          = 1;		/* Amount of threads to use */
    1.79 +    numClusters       = 1;		/* Amount of cluster centers */
    1.80 +    threshold         = 0.001; 	/* Percentage of objects that need to change membership for the clusting to continue */
    1.81 +    isBinaryFile      = 0;		/* 0 if the input file is in ASCII format, 1 for binary format */
    1.82 +    filename          = NULL;	/* Name of the input file */
    1.83 +	outfile   		  = NULL;	/* Name of the output file */
    1.84 +
    1.85 +	/* Parse command line options */
    1.86 +    while ( (opt=getopt(argc,argv,"o:p:i:n:t:bh"))!= EOF) {
    1.87 +        switch (opt) {
    1.88 +            case 'i': filename=optarg;
    1.89 +                      break;
    1.90 +            case 'b': isBinaryFile = 1;
    1.91 +                      break;
    1.92 +            case 'n': numClusters = atoi(optarg);
    1.93 +                      break;
    1.94 +            case 'p': nthreads = atoi(optarg);
    1.95 +                      break;
    1.96 +            case 'h': usage(argv[0]);
    1.97 +                      break;
    1.98 +			case 'o': outfile=optarg;
    1.99 +					  break;
   1.100 +            default: usage(argv[0]);
   1.101 +                      break;
   1.102 +        }
   1.103 +    }
   1.104 +
   1.105 +    if (filename == NULL) usage(argv[0]);
   1.106 +
   1.107 +	seconds(io_timing);
   1.108 +
   1.109 +    /* Read input data points from given input file */
   1.110 +    objects = file_read(isBinaryFile, filename, &numObjs, &numCoords);
   1.111 +    assert(objects != NULL);
   1.112 +
   1.113 +	seconds(timing);
   1.114 +	io_timing         = timing - io_timing;
   1.115 +	clustering_timing = timing;      
   1.116 +
   1.117 +    membership = (int*) malloc(numObjs * sizeof(int));
   1.118 +	assert(membership != NULL);
   1.119 +        
   1.120 +    clusters = malloc(numClusters * sizeof(double*));
   1.121 +    assert(clusters != NULL);
   1.122 +    clusters[0] = malloc(numClusters * numCoords * sizeof(double));
   1.123 +    assert(clusters[0] != NULL);
   1.124 +
   1.125 +    struct call_data data = { 0, objects, numCoords, numObjs,
   1.126 +                          numClusters, threshold, membership, clusters };
   1.127 +    
   1.128 +    /* Launch the core computation algorithm */
   1.129 +    SSR__create_seed_procr_and_do_work(kmeans, (void*)&data);
   1.130 +
   1.131 +    free(objects[0]);
   1.132 +    free(objects);
   1.133 +
   1.134 +    seconds(timing);
   1.135 +    clustering_timing = timing - clustering_timing;
   1.136 +
   1.137 +    /* Memory cleanup */
   1.138 +    free(membership);
   1.139 +
   1.140 +	if(outfile != NULL) {
   1.141 +        int l;
   1.142 +        FILE* fp = fopen(outfile, "w");
   1.143 +        for(j = 0; j < numClusters; j++) {
   1.144 +            fprintf(fp, "Cluster %d: ", j);
   1.145 +            for(l = 0; l < numCoords; l++)
   1.146 +                fprintf(fp, "%f ", clusters[j][l]);
   1.147 +            fprintf(fp, "\n");
   1.148 +        }
   1.149 +        fclose(fp);
   1.150 +    }
   1.151 +
   1.152 +    free(clusters[0]);
   1.153 +    free(clusters);
   1.154 +
   1.155 +    /* Print performance numbers on stdout */
   1.156 +	double t1;
   1.157 +	io_timing += seconds(t1) - timing;
   1.158 +
   1.159 +    printf("\n---- kMeans Clustering ----\n");
   1.160 +    printf("Number of threads = %d\n", nthreads);
   1.161 +    printf("Input file:     %s\n", filename);
   1.162 +    printf("numObjs       = %d\n", numObjs);
   1.163 +    printf("numCoords     = %d\n", numCoords);
   1.164 +    printf("numClusters   = %d\n", numClusters);
   1.165 +    printf("threshold     = %.4f\n", threshold);
   1.166 +
   1.167 +    printf("I/O time           = %10.4f sec\n", io_timing);
   1.168 +    printf("Computation timing = %10.4f sec\n", clustering_timing);
   1.169 +
   1.170 +    return(0);
   1.171 +}
   1.172 +