Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > SSR > SSR__KMeans__Bench
diff file_io.c @ 0:0ce47c784647
Initial commit
| author | Merten Sach <msach@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 27 Sep 2011 15:08:02 +0200 |
| parents | |
| children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/file_io.c Tue Sep 27 15:08:02 2011 +0200 1.3 @@ -0,0 +1,162 @@ 1.4 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 1.5 +/* File: file_io.c */ 1.6 +/* Description: This program reads point data from a file */ 1.7 +/* and write cluster output to files */ 1.8 +/* Input file format: */ 1.9 +/* ascii file: each line contains 1 data object */ 1.10 +/* binary file: first 4-byte integer is the number of data */ 1.11 +/* objects and 2nd integer is the no. of features (or */ 1.12 +/* coordinates) of each object */ 1.13 +/* */ 1.14 +/* Author: Wei-keng Liao */ 1.15 +/* ECE Department Northwestern University */ 1.16 +/* email: wkliao@ece.northwestern.edu */ 1.17 +/* Copyright, 2005, Wei-keng Liao */ 1.18 +/* */ 1.19 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 1.20 + 1.21 +#include <stdio.h> 1.22 +#include <stdlib.h> 1.23 +#include <string.h> /* strtok() */ 1.24 +#include <sys/types.h> /* open() */ 1.25 +#include <sys/stat.h> 1.26 +#include <fcntl.h> 1.27 +#include <unistd.h> /* read(), close() */ 1.28 + 1.29 +#include "kmeans.h" 1.30 + 1.31 +#define MAX_CHAR_PER_LINE 128 1.32 + 1.33 + 1.34 +/* 1.35 +* Function: file_read 1.36 +* ------------------- 1.37 +* Function for loading input data into memory. 1.38 +*/ 1.39 +double** file_read(int isBinaryFile, /* flag: 0 or 1 */ 1.40 + char *filename, /* input file name */ 1.41 + int *numObjs, /* count of data objects (local) */ 1.42 + int *numCoords) /* count of coordinates */ 1.43 +{ 1.44 + float **objects; 1.45 + int i, j, len; 1.46 + ssize_t numBytesRead; 1.47 + 1.48 + if (isBinaryFile) { /* input file is in raw binary format -------------*/ 1.49 + int infile; 1.50 + fprintf(stderr, "Trying to read from binary file: %s", filename); 1.51 + if ((infile = open(filename, O_RDONLY, "0600")) == -1) { 1.52 + fprintf(stderr, "Error: Input File Not Found\n"); 1.53 + exit(EXIT_FAILURE); 1.54 + } 1.55 + numBytesRead = read(infile, numObjs, sizeof(int)); 1.56 + assert(numBytesRead == sizeof(int)); 1.57 + numBytesRead = read(infile, numCoords, sizeof(int)); 1.58 + assert(numBytesRead == sizeof(int)); 1.59 + 1.60 + /* allocate space for objects[][] and read all objects */ 1.61 + len = (*numObjs) * (*numCoords); 1.62 + objects = (float**)malloc((*numObjs) * sizeof(float*)); 1.63 + objects[0] = (float*) malloc(len * sizeof(float)); 1.64 + 1.65 + if(objects == NULL || objects[0] == NULL) { 1.66 + fprintf(stderr, "Could Not Allocate Memory\n"); 1.67 + exit(EXIT_FAILURE); 1.68 + } 1.69 + 1.70 + for (i = 1; i < (*numObjs); i++) 1.71 + objects[i] = objects[i-1] + (*numCoords); 1.72 + 1.73 + numBytesRead = read(infile, objects[0], len*sizeof(float)); 1.74 + assert(numBytesRead == len*sizeof(float)); 1.75 + fprintf(stderr, " ... Input read successfully!\n"); 1.76 + close(infile); 1.77 + 1.78 + } else { /* input file is in ASCII format -------------------------------*/ 1.79 + FILE *infile; 1.80 + char *line, *ret; 1.81 + int lineLen; 1.82 + 1.83 + fprintf(stderr, "Trying to read from ASCII file: %s", filename); 1.84 + if ((infile = fopen(filename, "r")) == NULL) { 1.85 + fprintf(stderr, "Error: Input File Not Found\n"); 1.86 + exit(EXIT_FAILURE); 1.87 + } 1.88 + 1.89 + /* first find the number of objects */ 1.90 + lineLen = MAX_CHAR_PER_LINE; 1.91 + line = (char*) malloc(lineLen); 1.92 + assert(line != NULL); 1.93 + 1.94 + (*numObjs) = 0; 1.95 + while (fgets(line, lineLen, infile) != NULL) { 1.96 + /* check each line to find the max line length */ 1.97 + while (strlen(line) == lineLen-1) { 1.98 + /* this line read is not complete */ 1.99 + len = strlen(line); 1.100 + fseek(infile, -len, SEEK_CUR); 1.101 + 1.102 + /* increase lineLen */ 1.103 + lineLen += MAX_CHAR_PER_LINE; 1.104 + line = (char*) realloc(line, lineLen); 1.105 + assert(line != NULL); 1.106 + 1.107 + ret = fgets(line, lineLen, infile); 1.108 + assert(ret != NULL); 1.109 + } 1.110 + 1.111 + if (strtok(line, " \t\n") != 0) 1.112 + (*numObjs)++; 1.113 + } 1.114 + rewind(infile); 1.115 + 1.116 + /* find the no. objects of each object */ 1.117 + (*numCoords) = 0; 1.118 + while (fgets(line, lineLen, infile) != NULL) { 1.119 + if (strtok(line, " \t\n") != 0) { 1.120 + /* ignore the id (first coordiinate): numCoords = 1; */ 1.121 + while (strtok(NULL, " ,\t\n") != NULL) (*numCoords)++; 1.122 + break; /* this makes read from 1st object */ 1.123 + } 1.124 + } 1.125 + rewind(infile); 1.126 + 1.127 + /* allocate space for objects[][] and read all objects */ 1.128 + len = (*numObjs) * (*numCoords); 1.129 + objects = (float**)malloc((*numObjs) * sizeof(float*)); 1.130 + assert(objects != NULL); 1.131 + objects[0] = (float*) malloc(len * sizeof(float)); 1.132 + assert(objects[0] != NULL); 1.133 + for (i=1; i<(*numObjs); i++) 1.134 + objects[i] = objects[i-1] + (*numCoords); 1.135 + 1.136 + i = 0; 1.137 + /* read all objects */ 1.138 + while (fgets(line, lineLen, infile) != NULL) { 1.139 + if (strtok(line, " \t\n") == NULL) continue; 1.140 + for (j=0; j<(*numCoords); j++) 1.141 + objects[i][j] = atof(strtok(NULL, " ,\t\n")); 1.142 + i++; 1.143 + } 1.144 + fprintf(stderr, " ... Input read successfully!\n"); 1.145 + fclose(infile); 1.146 + free(line); 1.147 + } 1.148 + 1.149 + 1.150 + double** objects_d = (double**)malloc((*numObjs) * sizeof(double*)); 1.151 + objects_d[0] = (double*) malloc(len * sizeof(double)); 1.152 + for (i = 1; i < (*numObjs); i++) 1.153 + objects_d[i] = objects_d[i-1] + (*numCoords); 1.154 + 1.155 + for (i=0; i< (*numObjs); i++){ 1.156 + for (j=0; j<(*numCoords); j++){ 1.157 + objects_d[i][j] = objects[i][j]; 1.158 + } 1.159 + } 1.160 + free(objects[0]); 1.161 + free(objects); 1.162 + 1.163 + return objects_d; 1.164 +} 1.165 +
