annotate file_io.c @ 0:0ce47c784647

Initial commit
author Merten Sach <msach@mailbox.tu-berlin.de>
date Tue, 27 Sep 2011 15:08:02 +0200
parents
children
rev   line source
msach@0 1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
msach@0 2 /* File: file_io.c */
msach@0 3 /* Description: This program reads point data from a file */
msach@0 4 /* and write cluster output to files */
msach@0 5 /* Input file format: */
msach@0 6 /* ascii file: each line contains 1 data object */
msach@0 7 /* binary file: first 4-byte integer is the number of data */
msach@0 8 /* objects and 2nd integer is the no. of features (or */
msach@0 9 /* coordinates) of each object */
msach@0 10 /* */
msach@0 11 /* Author: Wei-keng Liao */
msach@0 12 /* ECE Department Northwestern University */
msach@0 13 /* email: wkliao@ece.northwestern.edu */
msach@0 14 /* Copyright, 2005, Wei-keng Liao */
msach@0 15 /* */
msach@0 16 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
msach@0 17
msach@0 18 #include <stdio.h>
msach@0 19 #include <stdlib.h>
msach@0 20 #include <string.h> /* strtok() */
msach@0 21 #include <sys/types.h> /* open() */
msach@0 22 #include <sys/stat.h>
msach@0 23 #include <fcntl.h>
msach@0 24 #include <unistd.h> /* read(), close() */
msach@0 25
msach@0 26 #include "kmeans.h"
msach@0 27
msach@0 28 #define MAX_CHAR_PER_LINE 128
msach@0 29
msach@0 30
msach@0 31 /*
msach@0 32 * Function: file_read
msach@0 33 * -------------------
msach@0 34 * Function for loading input data into memory.
msach@0 35 */
msach@0 36 double** file_read(int isBinaryFile, /* flag: 0 or 1 */
msach@0 37 char *filename, /* input file name */
msach@0 38 int *numObjs, /* count of data objects (local) */
msach@0 39 int *numCoords) /* count of coordinates */
msach@0 40 {
msach@0 41 float **objects;
msach@0 42 int i, j, len;
msach@0 43 ssize_t numBytesRead;
msach@0 44
msach@0 45 if (isBinaryFile) { /* input file is in raw binary format -------------*/
msach@0 46 int infile;
msach@0 47 fprintf(stderr, "Trying to read from binary file: %s", filename);
msach@0 48 if ((infile = open(filename, O_RDONLY, "0600")) == -1) {
msach@0 49 fprintf(stderr, "Error: Input File Not Found\n");
msach@0 50 exit(EXIT_FAILURE);
msach@0 51 }
msach@0 52 numBytesRead = read(infile, numObjs, sizeof(int));
msach@0 53 assert(numBytesRead == sizeof(int));
msach@0 54 numBytesRead = read(infile, numCoords, sizeof(int));
msach@0 55 assert(numBytesRead == sizeof(int));
msach@0 56
msach@0 57 /* allocate space for objects[][] and read all objects */
msach@0 58 len = (*numObjs) * (*numCoords);
msach@0 59 objects = (float**)malloc((*numObjs) * sizeof(float*));
msach@0 60 objects[0] = (float*) malloc(len * sizeof(float));
msach@0 61
msach@0 62 if(objects == NULL || objects[0] == NULL) {
msach@0 63 fprintf(stderr, "Could Not Allocate Memory\n");
msach@0 64 exit(EXIT_FAILURE);
msach@0 65 }
msach@0 66
msach@0 67 for (i = 1; i < (*numObjs); i++)
msach@0 68 objects[i] = objects[i-1] + (*numCoords);
msach@0 69
msach@0 70 numBytesRead = read(infile, objects[0], len*sizeof(float));
msach@0 71 assert(numBytesRead == len*sizeof(float));
msach@0 72 fprintf(stderr, " ... Input read successfully!\n");
msach@0 73 close(infile);
msach@0 74
msach@0 75 } else { /* input file is in ASCII format -------------------------------*/
msach@0 76 FILE *infile;
msach@0 77 char *line, *ret;
msach@0 78 int lineLen;
msach@0 79
msach@0 80 fprintf(stderr, "Trying to read from ASCII file: %s", filename);
msach@0 81 if ((infile = fopen(filename, "r")) == NULL) {
msach@0 82 fprintf(stderr, "Error: Input File Not Found\n");
msach@0 83 exit(EXIT_FAILURE);
msach@0 84 }
msach@0 85
msach@0 86 /* first find the number of objects */
msach@0 87 lineLen = MAX_CHAR_PER_LINE;
msach@0 88 line = (char*) malloc(lineLen);
msach@0 89 assert(line != NULL);
msach@0 90
msach@0 91 (*numObjs) = 0;
msach@0 92 while (fgets(line, lineLen, infile) != NULL) {
msach@0 93 /* check each line to find the max line length */
msach@0 94 while (strlen(line) == lineLen-1) {
msach@0 95 /* this line read is not complete */
msach@0 96 len = strlen(line);
msach@0 97 fseek(infile, -len, SEEK_CUR);
msach@0 98
msach@0 99 /* increase lineLen */
msach@0 100 lineLen += MAX_CHAR_PER_LINE;
msach@0 101 line = (char*) realloc(line, lineLen);
msach@0 102 assert(line != NULL);
msach@0 103
msach@0 104 ret = fgets(line, lineLen, infile);
msach@0 105 assert(ret != NULL);
msach@0 106 }
msach@0 107
msach@0 108 if (strtok(line, " \t\n") != 0)
msach@0 109 (*numObjs)++;
msach@0 110 }
msach@0 111 rewind(infile);
msach@0 112
msach@0 113 /* find the no. objects of each object */
msach@0 114 (*numCoords) = 0;
msach@0 115 while (fgets(line, lineLen, infile) != NULL) {
msach@0 116 if (strtok(line, " \t\n") != 0) {
msach@0 117 /* ignore the id (first coordiinate): numCoords = 1; */
msach@0 118 while (strtok(NULL, " ,\t\n") != NULL) (*numCoords)++;
msach@0 119 break; /* this makes read from 1st object */
msach@0 120 }
msach@0 121 }
msach@0 122 rewind(infile);
msach@0 123
msach@0 124 /* allocate space for objects[][] and read all objects */
msach@0 125 len = (*numObjs) * (*numCoords);
msach@0 126 objects = (float**)malloc((*numObjs) * sizeof(float*));
msach@0 127 assert(objects != NULL);
msach@0 128 objects[0] = (float*) malloc(len * sizeof(float));
msach@0 129 assert(objects[0] != NULL);
msach@0 130 for (i=1; i<(*numObjs); i++)
msach@0 131 objects[i] = objects[i-1] + (*numCoords);
msach@0 132
msach@0 133 i = 0;
msach@0 134 /* read all objects */
msach@0 135 while (fgets(line, lineLen, infile) != NULL) {
msach@0 136 if (strtok(line, " \t\n") == NULL) continue;
msach@0 137 for (j=0; j<(*numCoords); j++)
msach@0 138 objects[i][j] = atof(strtok(NULL, " ,\t\n"));
msach@0 139 i++;
msach@0 140 }
msach@0 141 fprintf(stderr, " ... Input read successfully!\n");
msach@0 142 fclose(infile);
msach@0 143 free(line);
msach@0 144 }
msach@0 145
msach@0 146
msach@0 147 double** objects_d = (double**)malloc((*numObjs) * sizeof(double*));
msach@0 148 objects_d[0] = (double*) malloc(len * sizeof(double));
msach@0 149 for (i = 1; i < (*numObjs); i++)
msach@0 150 objects_d[i] = objects_d[i-1] + (*numCoords);
msach@0 151
msach@0 152 for (i=0; i< (*numObjs); i++){
msach@0 153 for (j=0; j<(*numCoords); j++){
msach@0 154 objects_d[i][j] = objects[i][j];
msach@0 155 }
msach@0 156 }
msach@0 157 free(objects[0]);
msach@0 158 free(objects);
msach@0 159
msach@0 160 return objects_d;
msach@0 161 }
msach@0 162