msach@0: /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ msach@0: /* File: file_io.c */ msach@0: /* Description: This program reads point data from a file */ msach@0: /* and write cluster output to files */ msach@0: /* Input file format: */ msach@0: /* ascii file: each line contains 1 data object */ msach@0: /* binary file: first 4-byte integer is the number of data */ msach@0: /* objects and 2nd integer is the no. of features (or */ msach@0: /* coordinates) of each object */ msach@0: /* */ msach@0: /* Author: Wei-keng Liao */ msach@0: /* ECE Department Northwestern University */ msach@0: /* email: wkliao@ece.northwestern.edu */ msach@0: /* Copyright, 2005, Wei-keng Liao */ msach@0: /* */ msach@0: /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ msach@0: msach@0: #include msach@0: #include msach@0: #include /* strtok() */ msach@0: #include /* open() */ msach@0: #include msach@0: #include msach@0: #include /* read(), close() */ msach@0: msach@0: #include "kmeans.h" msach@0: msach@0: #define MAX_CHAR_PER_LINE 128 msach@0: msach@0: msach@0: /* msach@0: * Function: file_read msach@0: * ------------------- msach@0: * Function for loading input data into memory. msach@0: */ msach@0: double** file_read(int isBinaryFile, /* flag: 0 or 1 */ msach@0: char *filename, /* input file name */ msach@0: int *numObjs, /* count of data objects (local) */ msach@0: int *numCoords) /* count of coordinates */ msach@0: { msach@0: float **objects; msach@0: int i, j, len; msach@0: ssize_t numBytesRead; msach@0: msach@0: if (isBinaryFile) { /* input file is in raw binary format -------------*/ msach@0: int infile; msach@0: fprintf(stderr, "Trying to read from binary file: %s", filename); msach@0: if ((infile = open(filename, O_RDONLY, "0600")) == -1) { msach@0: fprintf(stderr, "Error: Input File Not Found\n"); msach@0: exit(EXIT_FAILURE); msach@0: } msach@0: numBytesRead = read(infile, numObjs, sizeof(int)); msach@0: assert(numBytesRead == sizeof(int)); msach@0: numBytesRead = read(infile, numCoords, sizeof(int)); msach@0: assert(numBytesRead == sizeof(int)); msach@0: msach@0: /* allocate space for objects[][] and read all objects */ msach@0: len = (*numObjs) * (*numCoords); msach@0: objects = (float**)malloc((*numObjs) * sizeof(float*)); msach@0: objects[0] = (float*) malloc(len * sizeof(float)); msach@0: msach@0: if(objects == NULL || objects[0] == NULL) { msach@0: fprintf(stderr, "Could Not Allocate Memory\n"); msach@0: exit(EXIT_FAILURE); msach@0: } msach@0: msach@0: for (i = 1; i < (*numObjs); i++) msach@0: objects[i] = objects[i-1] + (*numCoords); msach@0: msach@0: numBytesRead = read(infile, objects[0], len*sizeof(float)); msach@0: assert(numBytesRead == len*sizeof(float)); msach@0: fprintf(stderr, " ... Input read successfully!\n"); msach@0: close(infile); msach@0: msach@0: } else { /* input file is in ASCII format -------------------------------*/ msach@0: FILE *infile; msach@0: char *line, *ret; msach@0: int lineLen; msach@0: msach@0: fprintf(stderr, "Trying to read from ASCII file: %s", filename); msach@0: if ((infile = fopen(filename, "r")) == NULL) { msach@0: fprintf(stderr, "Error: Input File Not Found\n"); msach@0: exit(EXIT_FAILURE); msach@0: } msach@0: msach@0: /* first find the number of objects */ msach@0: lineLen = MAX_CHAR_PER_LINE; msach@0: line = (char*) malloc(lineLen); msach@0: assert(line != NULL); msach@0: msach@0: (*numObjs) = 0; msach@0: while (fgets(line, lineLen, infile) != NULL) { msach@0: /* check each line to find the max line length */ msach@0: while (strlen(line) == lineLen-1) { msach@0: /* this line read is not complete */ msach@0: len = strlen(line); msach@0: fseek(infile, -len, SEEK_CUR); msach@0: msach@0: /* increase lineLen */ msach@0: lineLen += MAX_CHAR_PER_LINE; msach@0: line = (char*) realloc(line, lineLen); msach@0: assert(line != NULL); msach@0: msach@0: ret = fgets(line, lineLen, infile); msach@0: assert(ret != NULL); msach@0: } msach@0: msach@0: if (strtok(line, " \t\n") != 0) msach@0: (*numObjs)++; msach@0: } msach@0: rewind(infile); msach@0: msach@0: /* find the no. objects of each object */ msach@0: (*numCoords) = 0; msach@0: while (fgets(line, lineLen, infile) != NULL) { msach@0: if (strtok(line, " \t\n") != 0) { msach@0: /* ignore the id (first coordiinate): numCoords = 1; */ msach@0: while (strtok(NULL, " ,\t\n") != NULL) (*numCoords)++; msach@0: break; /* this makes read from 1st object */ msach@0: } msach@0: } msach@0: rewind(infile); msach@0: msach@0: /* allocate space for objects[][] and read all objects */ msach@0: len = (*numObjs) * (*numCoords); msach@0: objects = (float**)malloc((*numObjs) * sizeof(float*)); msach@0: assert(objects != NULL); msach@0: objects[0] = (float*) malloc(len * sizeof(float)); msach@0: assert(objects[0] != NULL); msach@0: for (i=1; i<(*numObjs); i++) msach@0: objects[i] = objects[i-1] + (*numCoords); msach@0: msach@0: i = 0; msach@0: /* read all objects */ msach@0: while (fgets(line, lineLen, infile) != NULL) { msach@0: if (strtok(line, " \t\n") == NULL) continue; msach@0: for (j=0; j<(*numCoords); j++) msach@0: objects[i][j] = atof(strtok(NULL, " ,\t\n")); msach@0: i++; msach@0: } msach@0: fprintf(stderr, " ... Input read successfully!\n"); msach@0: fclose(infile); msach@0: free(line); msach@0: } msach@0: msach@0: msach@0: double** objects_d = (double**)malloc((*numObjs) * sizeof(double*)); msach@0: objects_d[0] = (double*) malloc(len * sizeof(double)); msach@0: for (i = 1; i < (*numObjs); i++) msach@0: objects_d[i] = objects_d[i-1] + (*numCoords); msach@0: msach@0: for (i=0; i< (*numObjs); i++){ msach@0: for (j=0; j<(*numCoords); j++){ msach@0: objects_d[i][j] = objects[i][j]; msach@0: } msach@0: } msach@0: free(objects[0]); msach@0: free(objects); msach@0: msach@0: return objects_d; msach@0: } msach@0: