• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

src/DelimitedFile.cpp

Go to the documentation of this file.
00001 /*
00002  *   This file is part of the Standard Portable Library (SPL).
00003  *
00004  *   SPL is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   SPL is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with SPL.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00022 #include <spl/types.h>
00023 #include <spl/Debug.h>
00024 #include <spl/io/DelimitedFile.h>
00025 #include <spl/io/File.h>
00026 #include <spl/io/StreamBuffer.h>
00027 
00028 using namespace spl;
00029 
00030 class DelimitedFileRowParser : public IMemoryValidate
00031 {
00032 private:
00033         // copy constructor doesn't make sense for this class
00034         inline DelimitedFileRowParser(const DelimitedFileRowParser& csv) : m_reader(spl::IStreamPtr()) {}
00035 
00036 protected:
00037         char m_delimchar;
00038         Vector<StringBuffer> m_cols;
00039         TextReader m_reader;
00040         StringBuffer m_line;
00041 
00042 public:
00043         DelimitedFileRowParser(char delimchar, spl::IStreamPtr strm);
00044         virtual ~DelimitedFileRowParser();
00045 
00046         bool Next();
00047 
00048         bool RowHasData() const;
00049         inline int ColCount() const { return m_cols.Count(); }
00050         inline const StringBuffer& CellAt(int col) { return m_cols.ElementAtRef(col); }
00051 
00052 #ifdef DEBUG
00053         virtual void ValidateMem() const;
00054         virtual void CheckMem() const;
00055 #endif
00056 };
00057 
00058 DelimitedFile::DelimitedFile(  )
00059 : m_table()
00060 {
00061 }
00062 
00063 DelimitedFile::DelimitedFile(const DelimitedFile& csv)
00064 : m_table()
00065 {
00066         *this = csv;
00067 }
00068 
00069 
00070 DelimitedFile::~DelimitedFile( )
00071 {
00072         Clear();
00073 }
00074 
00075 DelimitedFile& DelimitedFile::operator =(const DelimitedFile& csv)
00076 {
00077         Clear();
00078         m_table = csv.m_table;
00079         return *this;
00080 }
00081 
00082 void DelimitedFile::Clear()
00083 {
00084         m_table.Clear();
00085 }
00086 
00087 DataRowPtr DelimitedFile::RowAt(int idx) const
00088 { 
00089         if ( idx >= m_table.RowCount() )
00090         {
00091                 return DataRowPtr();
00092         }
00093         return m_table.Row(idx);
00094 }
00095 
00096 DataRowPtr DelimitedFile::operator[] (int idx) const
00097 {
00098         return m_table.Row(idx);
00099 }
00100 
00101 bool DelimitedFile::RowHasData(int rowNum) const
00102 {
00103         DataRowPtr row = m_table.Row(rowNum);
00104 
00105         int count = row->Count();
00106         for ( int x = 0; x < count; x++ )
00107         {
00108                 if ( ! row->Cell(x)->IsUndefined() )
00109                 {
00110                         return true;
00111                 }
00112         }
00113         return false;
00114 }
00115 
00116 DelimitedFilePtr DelimitedFile::Parse( TextReader& reader, char coldelim )
00117 {
00118         Array<byte> buf(512);
00119         DelimitedFilePtr dfile = DelimitedFilePtr(new DelimitedFile());
00120         int lineLen;
00121 
00122         while ( reader.ReadLine(buf, lineLen) )
00123         {
00124                 reader.ValidateMem();
00125                 DataRowPtr row = Parse(buf, lineLen - 1, coldelim);
00126                 dfile->AddRow( row );
00127         }
00128         reader.Close();
00129 
00130         dfile.ValidateMem();
00131         return dfile;
00132 }
00133 
00134 #ifdef DEBUG
00135 void DelimitedFile::ValidateMem() const
00136 {
00137         m_table.ValidateMem();
00138 }
00139 
00140 void DelimitedFile::CheckMem() const
00141 {
00142         m_table.CheckMem();
00143 }
00144 #endif
00145 
00146 DelimitedFilePtr DelimitedFile::Parse( const String& filename, char coldelim )
00147 {
00148         if ( ! File::Exists(filename) )
00149         {
00150                 return DelimitedFilePtr();
00151         }
00152 
00153         IStreamPtr fs = File::OpenText(filename);
00154         TextReader reader(StreamBufferPtr(new StreamBuffer(fs, true)));
00155         DelimitedFilePtr df;
00156 
00157         try
00158         {
00159                 df = Parse(reader, coldelim);
00160         }
00161         catch (Exception *ex)
00162         {
00163                 reader.Close();
00164                 throw ex;
00165         }
00166 
00167         // reader is closed by Parse.
00168         return df;
00169 }
00170 
00171 static enum DilimitedRowParseState
00172 {
00173         DRP_STATE_CHARS,
00174         DRP_STATE_QUOTE,
00175         DRP_STATE_QUOTE_COMMA,
00176 
00177 } DilimitedRowParseState;
00178 
00179 DataRowPtr DelimitedFile::Parse( Array<byte>& cstr, int cstrLen, char coldelim )
00180 {
00181         DataColumnPtr col(new DataColumn("dummy"));
00182         DataRowPtr row = DataRowPtr(new DataRow());
00183         enum DilimitedRowParseState state = DRP_STATE_CHARS;
00184 
00185         bool trailingComma = false;
00186         int start = 0;
00187         int len = cstrLen;
00188         for ( int x = 0; x < len; x++ )
00189         {
00190                 char ch = cstr[x];
00191                 switch ( state )
00192                 {
00193                 case DRP_STATE_CHARS:
00194                         if ( start == x && ch == '"' )
00195                         {
00196                                 state = DRP_STATE_QUOTE;
00197                                 start = x + 1;
00198                                 break;
00199                         }
00200                         trailingComma = false;
00201                         if ( ch == coldelim )
00202                         {
00203                                 int cplen = x - start;
00204                                 row->AddColumn(col, VariantPtr(new Variant(String(cstr, start, cplen))));
00205 
00206                                 start = x + 1;
00207                                 trailingComma = true;
00208                         }
00209                         break;
00210                 case DRP_STATE_QUOTE:
00211                         if ( ch == '"' )
00212                         {
00213                                 int cplen = x - start;
00214                                 row->AddColumn(col, VariantPtr(new Variant(String(cstr, start, cplen))));
00215 
00216                                 state = DRP_STATE_QUOTE_COMMA;
00217                                 trailingComma = false;
00218                         }
00219                         break;
00220                 case DRP_STATE_QUOTE_COMMA:
00221                         if ( ch == ',' )
00222                         {
00223                                 start = x + 1;
00224                                 state = DRP_STATE_QUOTE;
00225                                 trailingComma = true;
00226                         }
00227                         break;
00228                 }
00229         }
00230         int cplen = len - start;
00231         if ( cplen > 0 || trailingComma )
00232         {
00233                 row->AddColumn(col, VariantPtr(new Variant(String(cstr, start, cplen))));
00234         }
00235 
00236         row.ValidateMem();
00237         return row;
00238 }
00239 
00240 DelimitedFileRowParser::DelimitedFileRowParser(char delimchar, spl::IStreamPtr strm)
00241 : m_cols(), m_reader(strm), m_delimchar(delimchar), m_line(121)
00242 {
00243 }
00244 
00245 DelimitedFileRowParser::~DelimitedFileRowParser()
00246 {
00247         m_reader.Close();
00248 }
00249 
00250 bool DelimitedFileRowParser::RowHasData() const
00251 {
00252         int colcount = m_cols.Count();
00253         for ( int x = 0; x < colcount; x++ )
00254         {
00255                 if ( m_cols.ElementAtRef(x).Length() > 0 )
00256                 {
00257                         return true;
00258                 }
00259         }
00260         return false;
00261 }
00262 
00263 bool DelimitedFileRowParser::Next()
00264 {
00265         int x;
00266         int colcount = m_cols.Count();
00267         for ( x = 0; x < colcount; x++ )
00268         {
00269                 m_cols.ElementAtRef(x).Clear();
00270         }
00271 
00272         m_line.SetLength(0);
00273         if ( ! m_reader.ReadLine(m_line) )
00274         {
00275                 return false;
00276         }
00277 
00278         if ( 0 == m_cols.Count() )
00279         {
00280                 StringBuffer sb;
00281                 m_cols.Add(sb);
00282         }
00283 
00284         int curcol = 0;
00285 
00286         enum DilimitedRowParseState state = DRP_STATE_CHARS;
00287         bool trailingComma = false;
00288         int len = m_line.Length();
00289         for ( x = 0; x < len; x++ )
00290         {
00291                 char ch = m_line.CharAt(x);
00292                 switch ( state )
00293                 {
00294                 case DRP_STATE_CHARS:
00295                         if ( ch == '"' )
00296                         {
00297                                 state = DRP_STATE_QUOTE;
00298                                 break;
00299                         }
00300                         trailingComma = false;
00301                         if ( ch == m_delimchar )
00302                         {
00303                                 curcol++;
00304                                 if ( curcol >= m_cols.Count() )
00305                                 {
00306                                         StringBuffer sb;
00307                                         m_cols.Add(sb);
00308                                 }
00309                                 trailingComma = true;
00310                         }
00311                         else
00312                         {
00313                                 m_cols.ElementAtRef(curcol).Append( ch );
00314                         }
00315                         break;
00316                 case DRP_STATE_QUOTE:
00317                         if ( ch == '"' )
00318                         {
00319                                 curcol++;
00320                                 if ( curcol >= m_cols.Count() )
00321                                 {
00322                                         StringBuffer sb;
00323                                         m_cols.Add(sb);
00324                                 }
00325                                 state = DRP_STATE_QUOTE_COMMA;
00326                                 trailingComma = false;
00327                         }
00328                         break;
00329                 case DRP_STATE_QUOTE_COMMA:
00330                         if ( ch == ',' )
00331                         {
00332                                 state = DRP_STATE_QUOTE;
00333                                 trailingComma = true;
00334                         }
00335                         break;
00336                 }
00337         }
00338         return true;
00339 }
00340 
00341 #ifdef DEBUG
00342 void DelimitedFileRowParser::ValidateMem() const
00343 {
00344         m_reader.ValidateMem();
00345         m_cols.ValidateMem();
00346 }
00347 
00348 void DelimitedFileRowParser::CheckMem() const
00349 {
00350         m_reader.CheckMem();
00351         m_cols.CheckMem();
00352 }
00353 #endif