view file_io.c @ 3:d906272ff3a3

DataSet print
author Merten Sach <msach@mailbox.tu-berlin.de>
date Wed, 28 Sep 2011 15:04:24 +0200
parents
children
line source
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* File: file_io.c */
3 /* Description: This program reads point data from a file */
4 /* and write cluster output to files */
5 /* Input file format: */
6 /* ascii file: each line contains 1 data object */
7 /* binary file: first 4-byte integer is the number of data */
8 /* objects and 2nd integer is the no. of features (or */
9 /* coordinates) of each object */
10 /* */
11 /* Author: Wei-keng Liao */
12 /* ECE Department Northwestern University */
13 /* email: wkliao@ece.northwestern.edu */
14 /* Copyright, 2005, Wei-keng Liao */
15 /* */
16 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h> /* strtok() */
21 #include <sys/types.h> /* open() */
22 #include <sys/stat.h>
23 #include <fcntl.h>
24 #include <unistd.h> /* read(), close() */
26 #include "kmeans.h"
28 #define MAX_CHAR_PER_LINE 128
31 /*
32 * Function: file_read
33 * -------------------
34 * Function for loading input data into memory.
35 */
36 double** file_read(int isBinaryFile, /* flag: 0 or 1 */
37 char *filename, /* input file name */
38 int *numObjs, /* count of data objects (local) */
39 int *numCoords) /* count of coordinates */
40 {
41 float **objects;
42 int i, j, len;
43 ssize_t numBytesRead;
45 if (isBinaryFile) { /* input file is in raw binary format -------------*/
46 int infile;
47 fprintf(stderr, "Trying to read from binary file: %s", filename);
48 if ((infile = open(filename, O_RDONLY, "0600")) == -1) {
49 fprintf(stderr, "Error: Input File Not Found\n");
50 exit(EXIT_FAILURE);
51 }
52 numBytesRead = read(infile, numObjs, sizeof(int));
53 assert(numBytesRead == sizeof(int));
54 numBytesRead = read(infile, numCoords, sizeof(int));
55 assert(numBytesRead == sizeof(int));
57 /* allocate space for objects[][] and read all objects */
58 len = (*numObjs) * (*numCoords);
59 objects = (float**)malloc((*numObjs) * sizeof(float*));
60 objects[0] = (float*) malloc(len * sizeof(float));
62 if(objects == NULL || objects[0] == NULL) {
63 fprintf(stderr, "Could Not Allocate Memory\n");
64 exit(EXIT_FAILURE);
65 }
67 for (i = 1; i < (*numObjs); i++)
68 objects[i] = objects[i-1] + (*numCoords);
70 numBytesRead = read(infile, objects[0], len*sizeof(float));
71 assert(numBytesRead == len*sizeof(float));
72 fprintf(stderr, " ... Input read successfully!\n");
73 close(infile);
75 } else { /* input file is in ASCII format -------------------------------*/
76 FILE *infile;
77 char *line, *ret;
78 int lineLen;
80 fprintf(stderr, "Trying to read from ASCII file: %s", filename);
81 if ((infile = fopen(filename, "r")) == NULL) {
82 fprintf(stderr, "Error: Input File Not Found\n");
83 exit(EXIT_FAILURE);
84 }
86 /* first find the number of objects */
87 lineLen = MAX_CHAR_PER_LINE;
88 line = (char*) malloc(lineLen);
89 assert(line != NULL);
91 (*numObjs) = 0;
92 while (fgets(line, lineLen, infile) != NULL) {
93 /* check each line to find the max line length */
94 while (strlen(line) == lineLen-1) {
95 /* this line read is not complete */
96 len = strlen(line);
97 fseek(infile, -len, SEEK_CUR);
99 /* increase lineLen */
100 lineLen += MAX_CHAR_PER_LINE;
101 line = (char*) realloc(line, lineLen);
102 assert(line != NULL);
104 ret = fgets(line, lineLen, infile);
105 assert(ret != NULL);
106 }
108 if (strtok(line, " \t\n") != 0)
109 (*numObjs)++;
110 }
111 rewind(infile);
113 /* find the no. objects of each object */
114 (*numCoords) = 0;
115 while (fgets(line, lineLen, infile) != NULL) {
116 if (strtok(line, " \t\n") != 0) {
117 /* ignore the id (first coordiinate): numCoords = 1; */
118 while (strtok(NULL, " ,\t\n") != NULL) (*numCoords)++;
119 break; /* this makes read from 1st object */
120 }
121 }
122 rewind(infile);
124 /* allocate space for objects[][] and read all objects */
125 len = (*numObjs) * (*numCoords);
126 objects = (float**)malloc((*numObjs) * sizeof(float*));
127 assert(objects != NULL);
128 objects[0] = (float*) malloc(len * sizeof(float));
129 assert(objects[0] != NULL);
130 for (i=1; i<(*numObjs); i++)
131 objects[i] = objects[i-1] + (*numCoords);
133 i = 0;
134 /* read all objects */
135 while (fgets(line, lineLen, infile) != NULL) {
136 if (strtok(line, " \t\n") == NULL) continue;
137 for (j=0; j<(*numCoords); j++)
138 objects[i][j] = atof(strtok(NULL, " ,\t\n"));
139 i++;
140 }
141 fprintf(stderr, " ... Input read successfully!\n");
142 fclose(infile);
143 free(line);
144 }
147 double** objects_d = (double**)malloc((*numObjs) * sizeof(double*));
148 objects_d[0] = (double*) malloc(len * sizeof(double));
149 for (i = 1; i < (*numObjs); i++)
150 objects_d[i] = objects_d[i-1] + (*numCoords);
152 for (i=0; i< (*numObjs); i++){
153 for (j=0; j<(*numCoords); j++){
154 objects_d[i][j] = objects[i][j];
155 }
156 }
157 free(objects[0]);
158 free(objects);
160 return objects_d;
161 }