| rev |
line source |
|
msach@0
|
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
|
msach@0
|
2 /* File: file_io.c */
|
|
msach@0
|
3 /* Description: This program reads point data from a file */
|
|
msach@0
|
4 /* and write cluster output to files */
|
|
msach@0
|
5 /* Input file format: */
|
|
msach@0
|
6 /* ascii file: each line contains 1 data object */
|
|
msach@0
|
7 /* binary file: first 4-byte integer is the number of data */
|
|
msach@0
|
8 /* objects and 2nd integer is the no. of features (or */
|
|
msach@0
|
9 /* coordinates) of each object */
|
|
msach@0
|
10 /* */
|
|
msach@0
|
11 /* Author: Wei-keng Liao */
|
|
msach@0
|
12 /* ECE Department Northwestern University */
|
|
msach@0
|
13 /* email: wkliao@ece.northwestern.edu */
|
|
msach@0
|
14 /* Copyright, 2005, Wei-keng Liao */
|
|
msach@0
|
15 /* */
|
|
msach@0
|
16 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
|
msach@0
|
17
|
|
msach@0
|
18 #include <stdio.h>
|
|
msach@0
|
19 #include <stdlib.h>
|
|
msach@0
|
20 #include <string.h> /* strtok() */
|
|
msach@0
|
21 #include <sys/types.h> /* open() */
|
|
msach@0
|
22 #include <sys/stat.h>
|
|
msach@0
|
23 #include <fcntl.h>
|
|
msach@0
|
24 #include <unistd.h> /* read(), close() */
|
|
msach@0
|
25
|
|
msach@0
|
26 #include "kmeans.h"
|
|
msach@0
|
27
|
|
msach@0
|
28 #define MAX_CHAR_PER_LINE 128
|
|
msach@0
|
29
|
|
msach@0
|
30
|
|
msach@0
|
31 /*
|
|
msach@0
|
32 * Function: file_read
|
|
msach@0
|
33 * -------------------
|
|
msach@0
|
34 * Function for loading input data into memory.
|
|
msach@0
|
35 */
|
|
msach@0
|
36 double** file_read(int isBinaryFile, /* flag: 0 or 1 */
|
|
msach@0
|
37 char *filename, /* input file name */
|
|
msach@0
|
38 int *numObjs, /* count of data objects (local) */
|
|
msach@0
|
39 int *numCoords) /* count of coordinates */
|
|
msach@0
|
40 {
|
|
msach@0
|
41 float **objects;
|
|
msach@0
|
42 int i, j, len;
|
|
msach@0
|
43 ssize_t numBytesRead;
|
|
msach@0
|
44
|
|
msach@0
|
45 if (isBinaryFile) { /* input file is in raw binary format -------------*/
|
|
msach@0
|
46 int infile;
|
|
msach@0
|
47 fprintf(stderr, "Trying to read from binary file: %s", filename);
|
|
msach@0
|
48 if ((infile = open(filename, O_RDONLY, "0600")) == -1) {
|
|
msach@0
|
49 fprintf(stderr, "Error: Input File Not Found\n");
|
|
msach@0
|
50 exit(EXIT_FAILURE);
|
|
msach@0
|
51 }
|
|
msach@0
|
52 numBytesRead = read(infile, numObjs, sizeof(int));
|
|
msach@0
|
53 assert(numBytesRead == sizeof(int));
|
|
msach@0
|
54 numBytesRead = read(infile, numCoords, sizeof(int));
|
|
msach@0
|
55 assert(numBytesRead == sizeof(int));
|
|
msach@0
|
56
|
|
msach@0
|
57 /* allocate space for objects[][] and read all objects */
|
|
msach@0
|
58 len = (*numObjs) * (*numCoords);
|
|
msach@0
|
59 objects = (float**)malloc((*numObjs) * sizeof(float*));
|
|
msach@0
|
60 objects[0] = (float*) malloc(len * sizeof(float));
|
|
msach@0
|
61
|
|
msach@0
|
62 if(objects == NULL || objects[0] == NULL) {
|
|
msach@0
|
63 fprintf(stderr, "Could Not Allocate Memory\n");
|
|
msach@0
|
64 exit(EXIT_FAILURE);
|
|
msach@0
|
65 }
|
|
msach@0
|
66
|
|
msach@0
|
67 for (i = 1; i < (*numObjs); i++)
|
|
msach@0
|
68 objects[i] = objects[i-1] + (*numCoords);
|
|
msach@0
|
69
|
|
msach@0
|
70 numBytesRead = read(infile, objects[0], len*sizeof(float));
|
|
msach@0
|
71 assert(numBytesRead == len*sizeof(float));
|
|
msach@0
|
72 fprintf(stderr, " ... Input read successfully!\n");
|
|
msach@0
|
73 close(infile);
|
|
msach@0
|
74
|
|
msach@0
|
75 } else { /* input file is in ASCII format -------------------------------*/
|
|
msach@0
|
76 FILE *infile;
|
|
msach@0
|
77 char *line, *ret;
|
|
msach@0
|
78 int lineLen;
|
|
msach@0
|
79
|
|
msach@0
|
80 fprintf(stderr, "Trying to read from ASCII file: %s", filename);
|
|
msach@0
|
81 if ((infile = fopen(filename, "r")) == NULL) {
|
|
msach@0
|
82 fprintf(stderr, "Error: Input File Not Found\n");
|
|
msach@0
|
83 exit(EXIT_FAILURE);
|
|
msach@0
|
84 }
|
|
msach@0
|
85
|
|
msach@0
|
86 /* first find the number of objects */
|
|
msach@0
|
87 lineLen = MAX_CHAR_PER_LINE;
|
|
msach@0
|
88 line = (char*) malloc(lineLen);
|
|
msach@0
|
89 assert(line != NULL);
|
|
msach@0
|
90
|
|
msach@0
|
91 (*numObjs) = 0;
|
|
msach@0
|
92 while (fgets(line, lineLen, infile) != NULL) {
|
|
msach@0
|
93 /* check each line to find the max line length */
|
|
msach@0
|
94 while (strlen(line) == lineLen-1) {
|
|
msach@0
|
95 /* this line read is not complete */
|
|
msach@0
|
96 len = strlen(line);
|
|
msach@0
|
97 fseek(infile, -len, SEEK_CUR);
|
|
msach@0
|
98
|
|
msach@0
|
99 /* increase lineLen */
|
|
msach@0
|
100 lineLen += MAX_CHAR_PER_LINE;
|
|
msach@0
|
101 line = (char*) realloc(line, lineLen);
|
|
msach@0
|
102 assert(line != NULL);
|
|
msach@0
|
103
|
|
msach@0
|
104 ret = fgets(line, lineLen, infile);
|
|
msach@0
|
105 assert(ret != NULL);
|
|
msach@0
|
106 }
|
|
msach@0
|
107
|
|
msach@0
|
108 if (strtok(line, " \t\n") != 0)
|
|
msach@0
|
109 (*numObjs)++;
|
|
msach@0
|
110 }
|
|
msach@0
|
111 rewind(infile);
|
|
msach@0
|
112
|
|
msach@0
|
113 /* find the no. objects of each object */
|
|
msach@0
|
114 (*numCoords) = 0;
|
|
msach@0
|
115 while (fgets(line, lineLen, infile) != NULL) {
|
|
msach@0
|
116 if (strtok(line, " \t\n") != 0) {
|
|
msach@0
|
117 /* ignore the id (first coordiinate): numCoords = 1; */
|
|
msach@0
|
118 while (strtok(NULL, " ,\t\n") != NULL) (*numCoords)++;
|
|
msach@0
|
119 break; /* this makes read from 1st object */
|
|
msach@0
|
120 }
|
|
msach@0
|
121 }
|
|
msach@0
|
122 rewind(infile);
|
|
msach@0
|
123
|
|
msach@0
|
124 /* allocate space for objects[][] and read all objects */
|
|
msach@0
|
125 len = (*numObjs) * (*numCoords);
|
|
msach@0
|
126 objects = (float**)malloc((*numObjs) * sizeof(float*));
|
|
msach@0
|
127 assert(objects != NULL);
|
|
msach@0
|
128 objects[0] = (float*) malloc(len * sizeof(float));
|
|
msach@0
|
129 assert(objects[0] != NULL);
|
|
msach@0
|
130 for (i=1; i<(*numObjs); i++)
|
|
msach@0
|
131 objects[i] = objects[i-1] + (*numCoords);
|
|
msach@0
|
132
|
|
msach@0
|
133 i = 0;
|
|
msach@0
|
134 /* read all objects */
|
|
msach@0
|
135 while (fgets(line, lineLen, infile) != NULL) {
|
|
msach@0
|
136 if (strtok(line, " \t\n") == NULL) continue;
|
|
msach@0
|
137 for (j=0; j<(*numCoords); j++)
|
|
msach@0
|
138 objects[i][j] = atof(strtok(NULL, " ,\t\n"));
|
|
msach@0
|
139 i++;
|
|
msach@0
|
140 }
|
|
msach@0
|
141 fprintf(stderr, " ... Input read successfully!\n");
|
|
msach@0
|
142 fclose(infile);
|
|
msach@0
|
143 free(line);
|
|
msach@0
|
144 }
|
|
msach@0
|
145
|
|
msach@0
|
146
|
|
msach@0
|
147 double** objects_d = (double**)malloc((*numObjs) * sizeof(double*));
|
|
msach@0
|
148 objects_d[0] = (double*) malloc(len * sizeof(double));
|
|
msach@0
|
149 for (i = 1; i < (*numObjs); i++)
|
|
msach@0
|
150 objects_d[i] = objects_d[i-1] + (*numCoords);
|
|
msach@0
|
151
|
|
msach@0
|
152 for (i=0; i< (*numObjs); i++){
|
|
msach@0
|
153 for (j=0; j<(*numCoords); j++){
|
|
msach@0
|
154 objects_d[i][j] = objects[i][j];
|
|
msach@0
|
155 }
|
|
msach@0
|
156 }
|
|
msach@0
|
157 free(objects[0]);
|
|
msach@0
|
158 free(objects);
|
|
msach@0
|
159
|
|
msach@0
|
160 return objects_d;
|
|
msach@0
|
161 }
|
|
msach@0
|
162
|