Logo Search packages:      
Sourcecode: octave-io version File versions  Download package

textread.cc

/* Copyright (C) 2005 Stefan van der Walt <stefan@sun.ac.za>

   Redistribution and use in source  and binary forms, with or without
   modification, are permitted  provided that the following conditions
   are met:

   1. Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.

   2. Redistributions in binary form must reproduce the above
      copyright notice, this list of conditions and the following
      disclaimer in the documentation and/or other materials provided
      with the distribution.

  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */

#include <octave/oct.h>
#include <octave/Cell.h>
#include <octave/quit.h>

#include <string>
#include <sstream>
#include <iostream>
#include <fstream>

#define BUFFER_SIZE 4096

/*
 * Manage all information relevant to the texfile read.  This includes
 * file I/O and the format descriptions of the different fields.
 */
00042 class TextFile
{
public:
    TextFile(std::string filename)
      : _lines(0)
    {
      this->filename = filename;
      open();
    }

    ~TextFile()
    {
      if (data.is_open()) {
          data.close();
      }
    }

    bool
    open()
    {
      data.open(filename.c_str());
      if (!data) {
          error("textread: couldn't open data file %s", filename.c_str());
          return false;
      }
      return true;
    }

    void
    close()
    {
      data.close();
    }

    void
    ignore_whitespace()
    {
      data >> std::skipws;
    }

    long unsigned int
    lines()
    {
      if (_lines == 0) {

          std::ifstream tmpdata(filename.c_str());
          if (!tmpdata) { 
            warning("textread: couldn't open file for line counting");
            return 0;
          }

          char buf[BUFFER_SIZE];
          while (!tmpdata.eof()) {
            tmpdata.getline(buf, BUFFER_SIZE);
            if (std::string(buf).length() != 0) {
                _lines++;
            }
          }
          
          tmpdata.close();
      }

      return _lines;
    }
    
    template <typename container_type, typename column_type>
    void
    set_container_data(std::vector<container_type> &v, const unsigned int &field, const long unsigned int &record)
    {
      column_type p;
      line >> p;

      if (!data) {
          return;
      }
      
      v[field](0, record) = p;
    }

    void
    skip_column()
    {
      std::vector<Cell> a;
      Cell c = Cell(1,1);
      a.push_back(c);
      set_container_data<Cell, std::string>(a, 0, 0);
    }
    
    void
    readline()
    {
      data.getline(buffer, BUFFER_SIZE);
      if (std::string(buffer).length() != 0) {
          line.str(buffer);
          line.clear();
      }
    }

    bool
    is_valid() {
      return data ? true : false;
    }

    void
    set_format(const std::string &fmt) {
      std::istringstream format_s(fmt);
      while (format_s && !format_s.eof()) {
          std::string p;
          format_s >> p;
          columns.push_back(p);
      }
    }

    /// Column format descriptor
00156     std::vector<std::string> columns;

private:
    std::string filename;
    std::ifstream data;

    char buffer[BUFFER_SIZE];
    std::istringstream line;

    unsigned long int _lines;
};


DEFUN_DLD(textread, args, ,
"-*- texinfo -*-\n\
@deftypefn {Loadable Function} {[@var{a}, @var{b}, @var{c}, ...] =} textread (@var{filename}, @var{format}[, @var{N}])\n\
@deftypefnx {Loadable Function} {[@var{a}, @var{b}, @var{c}, ...] =} textread (..., @var{prop}, @var{value})\n\
\n\
Read data from the columns of a text file.\n\
\n\
Columns containing strings are returned as cell-arrays, while numeric values\n\
are returned as matrices.\n\
\n\
The string @var{format} describes the different columns of the text file and\n\
is repeated @var{N} times.  It may continue the following specifiers:\n\
\n\
@table @code\n\
@item %s\n\
for a string,\n\
\n\
@item %d,%f\n\
for a double, floating-point or integer number and\n\
\n\
@item %*\n\
to ignore a column.\n\
@end table\n\
\n\
For example, the textfile containing\n\
\n\
@example\n\
@group\n\
Bunny Bugs   5.5\n\
Duck Daffy  -7.5e-5\n\
Penguin Tux   6\n\
@end group\n\
@end example\n\
\n\
can be read using\n\
\n\
@example\n\
@code{[a,b,c] = textread(\"test.txt\", \"%s %s %f\").}\n\
@end example\n\
\n\
Currently implemented @var{prop} arguments are:\n\
@itemize \n\
@item \"headerlines\": \n\
@var{value} represents the number of header lines to skip.\n\
@end itemize \n\
@end deftypefn\n\
@seealso{load, dlmread, fscanf}")
{
    octave_value_list retval;
    int nargin = args.length();
 
    if (nargin < 2) {
      print_usage ();
      return retval;
    }

    std::string filename = args(0).string_value();
    std::string format = args(1).string_value();
    unsigned int headerlines = 0;

    int repeat = 0;
    int param_arg = 2;

    if (nargin > 2) {
      if (!args(2).is_string()) {
      repeat = args(2).int_value();
        param_arg = 3;
      }
    }

    for (int i=param_arg; i<nargin-1; i+=2) {
      std::string prop = args(i).string_value();

      if (prop == "headerlines")
        headerlines = args(i+1).int_value();
      else
      error("Unknown property %s.",prop.c_str());

    }

    if (error_state) {
      error("Invalid argument specified");
      print_usage ();
      return retval;
    }



    TextFile input(filename.c_str());
    if (!input.is_valid()) {
      return retval;
    }

    format += " ";
    std::string repeated_fmt(format);
    for (int i = 0; i < (repeat-1); i++) {
      repeated_fmt += format;
    }

    input.set_format(repeated_fmt);    
    input.ignore_whitespace();

    long unsigned int nr_rows = input.lines();

    if (nr_rows == 0) {
        return octave_value (Matrix ());
    }

    // nr_rows represents the number of lines in the file minus the ignored 
    // header lines

    nr_rows -= headerlines;

    std::vector<Matrix> matrix_output;
    std::vector<Cell> cell_output;

    for (unsigned int i = 0; i < input.columns.size(); i++) {
      if ((input.columns[i] == "%d") || (input.columns[i] == "%f") || (input.columns[i] == "%u")) {
          // Storage matrix
          matrix_output.push_back(Matrix(1, nr_rows, 0));

          // Placeholder
          cell_output.push_back(Cell());
      }
      else if (input.columns[i] == "%s") {
          cell_output.push_back(Cell(1, nr_rows));
          matrix_output.push_back(Matrix());
      }
      else {
          cell_output.push_back(Cell());
          matrix_output.push_back(Matrix());
      }
    }

    std::string s;
    long unsigned int row = 0;
    try 
      {
          while ((row < headerlines) && input.is_valid()) {
            input.readline();
            row++;
          }

            row = 0;

          while ((row < nr_rows) && input.is_valid()) {
            input.readline();

            for (unsigned int i = 0; i < input.columns.size(); i++) {
                OCTAVE_QUIT;

                if ((input.columns[i] == "%d") || (input.columns[i] == "%f") || (input.columns[i] == "%u")) {
                  input.set_container_data<Matrix, double>(matrix_output, i,  row);
                } else if (input.columns[i] == "%s") {
                  input.set_container_data<Cell, std::string>(cell_output, i, row);
                } else {
                  input.skip_column();
                }
            }
            row++;
          }
      }
    catch (std::exception e)
      {
          error("textread: cannot read from %s (%s)", filename.c_str(), e.what());
          return retval;
      }

    dim_vector dim(1, nr_rows);
    for (unsigned int i = 0; i < input.columns.size(); i++) {
      long unsigned int cl = cell_output[i].length();
      long unsigned int ml = matrix_output[i].length();

      if ((cl == 0) && (ml == 0)) {
          continue;
      }
      else if (cl == 0) {
          matrix_output[i].resize(1, nr_rows);
          retval.append(octave_value(matrix_output[i].transpose()));
      } else {
          cell_output[i].resize(dim);
          retval.append(octave_value(cell_output[i].transpose()));
      }
    }

    input.close();

    return retval;
}

Generated by  Doxygen 1.6.0   Back to index