Source Code

< Back to The Program | Forward to Program Output >

/* start read.c */

/*******************************************************************************
*  Name.......: read.c                                                         *
*                                                                              *
*  Description: C program to test text file reading.                           *
*                                                                              *
*  Author.....: Scott Brueckner (99001; COP2222 E002; Tue, 7:00-9:45 pm)       *
*                                                                              *
*  Date.......: 11/09/1999                                                     *
*                                                                              *
*  Arguments..: int   argc   = Count of command line arguments                 *
*               char *argv[] = Array of pointers to command line arguments     *
*                                argv[0] = path to executable                  *
*                                argv[1] = name of file to process             *
*                                argv[2] = file open mode                      *
*                                            t = text                          *
*                                            b = binary                        *
*                                argv[3] = read function                       *
*                                            fscanf, fgets, fgetc, fread       *
*                                                                              *
*  Return.....: int: 0 = normal completion                                     *
*                    1 = error occurred                                        *
*                                                                              *
*  Compilers..: Visual C++ 6.0   (Win32)                                       *
*               Borland C++ 4.52 (16-bit DOS)                                  *
*               GNU gcc 2.7.2.1  (Linux)                                       *
*                                                                              *
*  Notes......: This program opens the text file specified in argv[1] in the   *
*               mode (text or binary) specified in argv[2] and reads it using  *
*               the C function specified in argv[3]. It prints out the         *
*               results line-by-line in a modified "hex dump."                 *
*                                                                              *
*               The allowable C read functions (fscanf(), fgets(), fgetc(),    *
*               and fread()) are implemented in separate functions in this     *
*               program (T_fscanf(), T_fgets(), T_fgetc(), and T_fread(),      *
*               respectively).                                                 *
*                                                                              *
*               All of the functions (except T_fread()) are called by          *
*               dereferencing a pointer to the appropriate function. Each      *
*               function (except T_fread()) returns the next line of the text  *
*               file, and the overall logic is controlled in main(). The       *
*               T_fread() function is "stand-alone" and contains its own       *
*               logic for traversing the file.                                 *
*                                                                              *
*               This is an academic exercise for demonstration and testing     *
*               purposes. It contains some limitations that are inappropriate  *
*               in the "real world." These are noted in the comments. Also,    *
*               this program doesn't "do" anything with the information in     *
*               the text file; it simply displays exactly what it read.        *
*                                                                              *
*               The sample text files included with this program are examples  *
*               of delimited data files. In a "real" application, you would    *
*               need to perform additional processing, such as removing        *
*               carriage-return and line-feed characters, splitting the lines  *
*               into individual fields, removing the quotes from the text      *
*               strings, and validating the resulting data. After that, you'd  *
*               likely need to write the data back to disk in some other       *
*               format, such as a specific database.                           *
*******************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

#define CR 13            /* Decimal code of Carriage Return char */
#define LF 10            /* Decimal code of Line Feed char */
#define EOF_MARKER 26    /* Decimal code of DOS end-of-file marker */
#define MAX_REC_LEN 1024 /* Maximum size of input buffer */

/* Read functions */
int T_fscanf(FILE *InputFile, char *ReadBuffer);
int T_fgets(FILE *InputFile, char *ReadBuffer);
int T_fgetc(FILE *InputFile, char *ReadBuffer);
int T_fread(FILE *InputFile);

/* Output functions */
void PrintHeader(char *CommandLineArgs[], long FileLength);
void PrintLine(
                char *TextReadFromFile,
                long  CurrentLineNumber,
                long  LengthOfLine,
                long  OffsetOfStartOfCurrentLine,
                long *OffsetOfEndOfPreviousLine,
                int   OffsetError
              );

/* Utility functions */
void HR(int LengthOfHorizontalRule);
void syntax(void);

/******************************************************************************/
int main(int argc, char *argv[])
/******************************************************************************/
{
  /* Array of pointers to read functions */
  int (*GetLine[3])(FILE*, char*) = { T_fscanf, T_fgets, T_fgetc };

  int   iReadMode;               /* Index into *GetLine[] array */
  int   iReadReturn;             /* Result of read function */
  int   isFilePosErr;            /* Boolean indicating file offset error */
  long  lFileLen;                /* Length of file */
  long  lLastFilePos;            /* Byte offset of end of previous line */
  long  lLineCount;              /* Line count accumulator */
  long  lLineLen;                /* Length of current line */
  long  lThisFilePos;            /* Byte offset of start of current line */
  char  szReadLine[MAX_REC_LEN]; /* Input buffer */
  FILE *inputFilePtr;            /* Pointer to input file */

  if (argc < 4)                  /* All arguments are required */
  {
    syntax();
    return 1;
  }

  /* Set index into function pointer array based on command line */
  if (strcmp(argv[3], "fscanf") == 0)
    iReadMode = 0;

  else if (strcmp(argv[3], "fgets") == 0)
    iReadMode = 1;

  else if (strcmp(argv[3], "fgetc") == 0)
    iReadMode = 2;

  else if (strcmp(argv[3], "fread") == 0)
    iReadMode = 3;

  else /* Oops */
  {
    syntax();
    return 1;
  }

  if (strcmp(argv[2], "t") == 0)
    inputFilePtr = fopen(argv[1], "r");  /* Open in TEXT mode */

  else if (strcmp(argv[2], "b") == 0)
    inputFilePtr = fopen(argv[1], "rb"); /* Open in BINARY mode */

  else /* Oops */
  {
    syntax();
    return 1;
  }

  if (inputFilePtr == NULL )             /* Could not open file */
  {
    printf("Error opening %s: %s (%u)\n", argv[1], strerror(errno), errno);
    return 1;
  }

  fseek(inputFilePtr, 0L, SEEK_END);     /* Position to end of file */
  lFileLen = ftell(inputFilePtr);        /* Get file length */
  rewind(inputFilePtr);                  /* Back to start of file */

  PrintHeader(argv, lFileLen);           /* Print the header info */

  /*
  *  The implementation of the fread() function in this program is
  *  different enough from the other read methods that it's easiest
  *  to just call it explicitly and quit.
  */

  if (iReadMode == 3)                    /* Use fread() */
  {
    iReadReturn = T_fread(inputFilePtr); /* Read the file and print output */
    fclose(inputFilePtr);                /* Close it */
    HR(80);                              /* Print a separator line */
    return (iReadReturn ? 0 : 1);        /* Exit with success or error code */
  }

  /* At this point, we'll be using fscanf(), fgets(), or fgetc() */

  lLineCount   =  0L; /* No lines read yet */
  lLastFilePos = -1L; /* So first line doesn't show an offset error */

  while (1)
  {
    isFilePosErr = 0;                   /* Clear error flag */
    lThisFilePos = ftell(inputFilePtr); /* Offset of start of line */
                                           /* This will not necessarily be */
                                           /* the absolute file offset if  */
                                           /* the file is opened in TEXT   */
                                           /* mode.                        */

    if (lThisFilePos != lLastFilePos + 1)  /* Set error flag if not next byte */
      isFilePosErr = 1;

    szReadLine[0] = '\0';                  /* Clear buffer for next line */

    /* Read the next line with the appropriate read function */
    iReadReturn = (*GetLine[iReadMode])(inputFilePtr, szReadLine);

    if (iReadReturn < 0)  /* Error reading line */
    {
      /*
      *  Any system error code generated in the read functions is returned
      *  as a negative number so we can use positive numbers to indicate
      *  success. Therefore, we need to 're-negative' it to convert it back
      *  to the original error code.
      *
      *  Error codes are implentation-dependent, but as far as I know, they
      *  are always positive integers.
      */

      printf("Error reading %s: %s (%u)\n",
             argv[1], strerror(-errno), -errno);
      break;
    }

    lLineLen = strlen(szReadLine); /* Get length of line */

    if (lLineLen)                  /* Got some data */
    {
      ++lLineCount;                /* Increment line counter */

      /* Print the line's detail */
      PrintLine(szReadLine, lLineCount, lLineLen,
                lThisFilePos, &lLastFilePos, isFilePosErr);
    }

    if (iReadReturn == 0)          /* End of file reached */
    {
      lThisFilePos = ftell(inputFilePtr); /* EOF offset */
      HR(80); /* Print a separator line */
      printf("EOF at offset %#x (dec %ld)\n", (int)lThisFilePos, lThisFilePos);
      break;
    }

  }/* end while (1) */

  fclose(inputFilePtr); /* Close the file */
  HR(80);               /* Print a separator line */
  return 0;             /* Exit with success code */

} /* end main() */

/******************************************************************************/
int T_fscanf(FILE *input, char *output) /* Use:       Read text file w/fscanf */
                                        /*                                    */
                                        /* Arguments: FILE *input             */
                                        /*              Pointer to input file */
                                        /*            char *output            */
                                        /*              Read buffer           */
                                        /*                                    */
                                        /* Return:    int                     */
                                        /*               0 = end of file      */
                                        /*              >0 = # of fields read */
/******************************************************************************/
{
  /*
  *  The fscanf() function has some limitations that usually make it
  *  inappropriate for reading text files, the main one being that
  *  it will stop reading at the first space character. It is included
  *  here for completeness.
  */

  int iReturn = fscanf(input, "%s", output); /* Read from file */

  if (iReturn == EOF) /* End of file reached */
    return 0;

  return iReturn;

} /* end T_fscanf() */

/******************************************************************************/
int T_fgets(FILE *input, char *output) /* Use:       Read next line of text   */
                                       /*            file with fgets          */
                                       /*                                     */
                                       /* Arguments: FILE *input              */
                                       /*              Pointer to input file  */
                                       /*            char *output             */
                                       /*              Read buffer            */
                                       /*                                     */
                                       /* Return:    int                      */
                                       /*              <0 = error             */
                                       /*               0 = end of file       */
                                       /*               1 = line read okay    */
/******************************************************************************/
{
  /*
  *  The fgets() function will read up to 'MAX_REC_LEN' characters
  *  (1K in this program), but will stop at the first newline
  *  (which is a LF in the three compilers tested).
  *
  *  If the line length is greater than 'MAX_REC_LEN', we won't get
  *  the entire line. A real application should take this into account.
  */

  fgets(output, MAX_REC_LEN, input); /* Read the line */

  if (ferror(input))                 /* Error reading */
    return -errno;                   /* Convert code to negative number */

  if (feof(input))                   /* End of file reached */
    return 0;

  return 1;

} /* end T_fgets() */

/******************************************************************************/
int T_fgetc(FILE *input, char *output) /* Use:       Read next line of text   */
                                       /*            file with fgetc          */
                                       /*                                     */
                                       /* Arguments: FILE *input              */
                                       /*              Pointer to input file  */
                                       /*            char *output             */
                                       /*              Read buffer            */
                                       /*                                     */
                                       /* Return:    int                      */
                                       /*              <0 = error             */
                                       /*               0 = end of file       */
                                       /*               1 = line read okay    */
/******************************************************************************/
{
  /*
  *  This function repeatedly calls fgetc(), reading one character at
  *  a time, until it encounters the first character FOLLOWING a CR
  *  or LF that is NOT a CR or LF (or reaches the end of file). It
  *  assumes that everything up to (but NOT including) this character
  *  is part of the current line.
  *
  *  As a result, this function will NOT read BLANK lines correctly.
  *  It will include ALL CRs and LFs as the trailing characters
  *  on the current line. A real application should take this into
  *  account.
  *
  *  This function works okay, but reading a file one byte at a time
  *  is rather inefficient. See the T_fread() function in this program
  *  for a better approach.
  */

  int  iReturn   = 1;  /* Return value (Innocent until proved guilty) */
  int  iThisChar;      /* Current character */
  int  isNewline = 0;  /* Boolean indicating we've read a CR or LF */
  long lIndex    = 0L; /* Index into read buffer */

  while (1) /* Will exit on error, end of line, or end of file */
  {
    iThisChar = fgetc(input);     /* Read the next character */

    if (ferror(input))            /* Error reading */
    {
      iReturn = -errno;           /* Convert to negative number */
      break;
    }

    if (iThisChar == EOF)         /* End of file reached */
    {
      /*
      *  If we've already read characters on this line put the EOF back
      *  into the stream (ungetc()). We'll end on the NEXT call to this
      *  function.
      */

      if (lIndex > 0)
        ungetc(iThisChar, input);

      else           /* Nothing read but EOF; we're done with the file */
        iReturn = 0;

      break;
    }

    if (!isNewline) /* Haven't read a CR or LF yet */
    {
      if (iThisChar == CR || iThisChar == LF) /* This char IS a CR or LF */
        isNewline = 1;                        /* Set flag */
    }

    else            /* We've already read one or more CRs or LFs */
    {
      if (iThisChar != CR && iThisChar != LF) /* This char is NOT a CR or LF */
      {
        ungetc(iThisChar, input);             /* Put char back in stream */
        break;                                /* Done reading this line */
      }
    }

    output[lIndex++] = iThisChar;             /* Put char in read buffer */

  } /* end while (1) */

  output[lIndex] = '\0';                      /* Terminate the read buffer */
  return iReturn;

} /* end T_fgetc() */

/******************************************************************************/
int T_fread(FILE *input) /* Use:       Read text file using fread()           */
                         /*                                                   */
                         /* Arguments: FILE *input                            */
                         /*              Pointer to input file                */
                         /*                                                   */
                         /* Return:    int                                    */
                         /*              0 = error                            */
                         /*              1 = success                          */
/******************************************************************************/
{
  /*
  *  This function reads the ENTIRE FILE into a character array and
  *  then parses the array to determine the contents of each line.
  *  This is lightning-fast, but may not work for large files. (See the
  *  notes preceding the call to calloc() in this function.)
  *
  *  This routine combines the functionality of the main() and T_fgetc()
  *  functions in this program (although, unlike T_fgetc(), it parses
  *  the lines from memory rather than directly from disk). I wrote it
  *  this way so I could keep everything in one source file and easily
  *  share the output routines.
  *  
  *  As in the T_fgetc() function, this function will "collapse" any
  *  blank lines. This may not be appropriate in a real application.
  */

  int   isNewline;              /* Boolean indicating we've read a CR or LF */
  long  lFileLen;               /* Length of file */
  long  lIndex;                 /* Index into cThisLine array */
  long  lLineCount;             /* Current line number */
  long  lLineLen;               /* Current line length */
  long  lStartPos;              /* Offset of start of current line */
  long  lTotalChars;            /* Total characters read */
  char  cThisLine[MAX_REC_LEN]; /* Contents of current line */
  char *cFile;                  /* Dynamically allocated buffer (entire file) */
  char *cThisPtr;               /* Pointer to current position in cFile */

  fseek(input, 0L, SEEK_END);  /* Position to end of file */
  lFileLen = ftell(input);     /* Get file length */
  rewind(input);               /* Back to start of file */

  /*
  *  The next line attempts to reserve enough memory to read the
  *  entire file into memory (plus 1 byte for the null-terminator).
  *
  *  The program will simply quit if the memory isn't available.
  *  This normally won't happen on computers that use virtual
  *  memory (such as Windows PCs), but a real application should
  *  make provisions for reading the file in smaller blocks.
  *
  *  We could use malloc() to allocate the memory, but calloc()
  *  has the advantage of initializing all of the bits to 0, so
  *  we don't have to worry about adding the null-terminator
  *  (Essentially, every character initially IS a null-terminator).
  *
  *  Note that we don't call the free() function to release the
  *  memory allocated by calloc(). It should not be necessary in
  *  this case because cFile is a local variable and will be
  *  deallocated automatically when this function ends.
  */

  cFile = calloc(lFileLen + 1, sizeof(char));

  if(cFile == NULL )
  {
    printf("\nInsufficient memory to read file.\n");
    return 0;
  }

  fread(cFile, lFileLen, 1, input); /* Read the entire file into cFile */

  lLineCount  = 0L;
  lTotalChars = 0L;

  cThisPtr    = cFile;              /* Point to beginning of array */

  while (*cThisPtr)                 /* Read until reaching null char */
  {
    lIndex    = 0L;                 /* Reset counters and flags */
    isNewline = 0;
    lStartPos = lTotalChars;

    while (*cThisPtr)               /* Read until reaching null char */
    {
      if (!isNewline)               /* Haven't read a CR or LF yet */
      {
        if (*cThisPtr == CR || *cThisPtr == LF) /* This char IS a CR or LF */
          isNewline = 1;                        /* Set flag */
      }

      else if (*cThisPtr != CR && *cThisPtr != LF) /* Already found CR or LF */
        break;                                     /* Done with line */

      cThisLine[lIndex++] = *cThisPtr++; /* Add char to output and increment */
      ++lTotalChars;

    } /* end while (*cThisPtr) */

    cThisLine[lIndex] = '\0';     /* Terminate the string */
    ++lLineCount;                 /* Increment the line counter */
    lLineLen = strlen(cThisLine); /* Get length of line */

    /* Print the detail for this line */
    PrintLine(cThisLine, lLineCount, lLineLen, lStartPos, NULL, 0);

  } /* end while (cThisPtr <= cEndPtr) */

  HR(80); /* Print a separator line */
  printf("Length of file array=%#x (dec %d)\n", strlen(cFile), strlen(cFile));

  return 1;

} /* end T_fread() */

/******************************************************************************/
void PrintHeader(char *argv[], long lFileLen) /* Use:       Print header info */
                                              /*                              */
                                              /* Arguments: char *argv[]      */
                                              /*             Command line args*/
                                              /*            long lFileLen     */
                                              /*             Length of file   */
                                              /*                              */
                                              /* Return:    void              */
/******************************************************************************/
{
  HR(80); /* Print a separator line */

  /*
  *  lFileLen is cast to an (int) for display as a hex number because
  *  the Borland compiler couldn't handle converting a (long) to hex.
  *  Visual C++ and gcc were able to handle (long)s. The hex display
  *  will be screwed up if the file size is larger than the maximum
  *  signed (int).
  */

  printf(
          "File=%s, Size=%#x (dec %ld), Open mode=%s%s%s\n",
          argv[1],
          (int)lFileLen,
          lFileLen,
          (strcmp(argv[2], "t") == 0 ? "Text" : "Binary"),
          (argv[3] == NULL ? "" : ", Read mode="),
          (argv[3] == NULL ? "" : argv[3])
        );

  return;

} /* end PrintHeader() */

/******************************************************************************/
void PrintLine(char *szReadLine,  long lLineCount,    long lLineLen,
               long lThisFilePos, long *lLastFilePos, int  isFilePosErr)
/******************************************************************************/
/* Use:       Print detail for current line                                   */
/*                                                                            */
/* Arguments: char *szReadLine   = Read buffer containg text line             */
/*            long  lLineCount   = Current line number                        */
/*            long  lLineLen     = Current line length                        */
/*            long  lThisFilePos = Offset of start of current line            */
/*            long *lLastFilePos = Offset of end of current line              */
/*            int   isFilePosErr = True if start of current line is not       */
/*                                   1 greater than end of last line          */
/*                                                                            */
/* Return:    void                                                            */
/******************************************************************************/
{
  char *cPtr; /* Pointer to current character */

  HR(80); /* Print a separator line */
  printf("LINE %ld, Length=%#x (dec %ld)\n",
         lLineCount, (int)lLineLen, lLineLen); /* See PrintHeader() for an    */
                                               /* explanation of why the cast */
                                               /* is needed.                  */
  printf(" Offset:");

  cPtr = szReadLine; /* Point to start of string */

  if (isFilePosErr)                 /* Indicates offset error */
    printf("*%2x", lThisFilePos++);    /* Print '*' plus starting offset */
  else                              /* Offset okay */
    printf("%3x", lThisFilePos++);     /* Just print starting offset */

  for (++cPtr; cPtr < szReadLine + lLineLen; cPtr++) /* Remaining offsets */
    printf("%3x", lThisFilePos++);

  if (lLastFilePos != NULL)           /* Set end position if arg passed */
    *lLastFilePos = lThisFilePos - 1;

  printf("\n Hex:   ");

  /* Print the hex values, including null terminator */
  for (cPtr = szReadLine; cPtr <= szReadLine + lLineLen; cPtr++)
    printf("%3x", *cPtr);

  printf("\n Char:  ");

  /* Print the characters, including null terminator */
  for (cPtr = szReadLine; cPtr <= szReadLine + lLineLen; cPtr++)
  {
    switch (*cPtr)
    {
      case 0:                 /* Null terminator */
        printf(" \\0");
        break;

      case CR:                /* Carriage return */
        printf(" cr");
        break;

      case LF:                /* Line feed */
        printf(" lf");
        break;

      case EOF_MARKER:        /* DOS end-of-file marker */
        printf(" em");
        break;

      default:                /* A 'real' character */
        printf("%3c", *cPtr);
        break;

    } /* end switch (*cPtr) */

  } /* end for (cPtr) */

  printf("\n");
  return;

} /* end PrintLine()

/******************************************************************************/
void HR(int iLen) /* Print a horizontal line of iLen length                   */
/******************************************************************************/
{
  int i;

  for (i = 0; i < iLen; i++)
    printf("-");

  printf("\n");
  return;

} /* end HR() */

/******************************************************************************/
void syntax(void) /* Print correct command line syntax                        */
/******************************************************************************/
{
  printf("\nSyntax: READ FileName OpenMode ReadMode\n\n");
  printf("  OpenMode = \"t\" (text mode),  or\n");
  printf("             \"b\" (binary mode)\n\n");
  printf("  ReadMode = \"fscanf\", or\n");
  printf("             \"fgets\",  or\n");
  printf("             \"fgetc\",  or\n");
  printf("             \"fread\"\n");
  return;

} /* end syntax() */

/* end read.c */
< Back to The Program | ^ Up to Top | Forward to Program Output >