00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00022 #include <spl/types.h>
00023 #include <spl/Debug.h>
00024 #include <spl/io/DelimitedFile.h>
00025 #include <spl/io/File.h>
00026 #include <spl/io/StreamBuffer.h>
00027
00028 using namespace spl;
00029
00030 class DelimitedFileRowParser : public IMemoryValidate
00031 {
00032 private:
00033
00034 inline DelimitedFileRowParser(const DelimitedFileRowParser& csv) : m_reader(spl::IStreamPtr()) {}
00035
00036 protected:
00037 char m_delimchar;
00038 Vector<StringBuffer> m_cols;
00039 TextReader m_reader;
00040 StringBuffer m_line;
00041
00042 public:
00043 DelimitedFileRowParser(char delimchar, spl::IStreamPtr strm);
00044 virtual ~DelimitedFileRowParser();
00045
00046 bool Next();
00047
00048 bool RowHasData() const;
00049 inline int ColCount() const { return m_cols.Count(); }
00050 inline const StringBuffer& CellAt(int col) { return m_cols.ElementAtRef(col); }
00051
00052 #ifdef DEBUG
00053 virtual void ValidateMem() const;
00054 virtual void CheckMem() const;
00055 #endif
00056 };
00057
00058 DelimitedFile::DelimitedFile( )
00059 : m_table()
00060 {
00061 }
00062
00063 DelimitedFile::DelimitedFile(const DelimitedFile& csv)
00064 : m_table()
00065 {
00066 *this = csv;
00067 }
00068
00069
00070 DelimitedFile::~DelimitedFile( )
00071 {
00072 Clear();
00073 }
00074
00075 DelimitedFile& DelimitedFile::operator =(const DelimitedFile& csv)
00076 {
00077 Clear();
00078 m_table = csv.m_table;
00079 return *this;
00080 }
00081
00082 void DelimitedFile::Clear()
00083 {
00084 m_table.Clear();
00085 }
00086
00087 DataRowPtr DelimitedFile::RowAt(int idx) const
00088 {
00089 if ( idx >= m_table.RowCount() )
00090 {
00091 return DataRowPtr();
00092 }
00093 return m_table.Row(idx);
00094 }
00095
00096 DataRowPtr DelimitedFile::operator[] (int idx) const
00097 {
00098 return m_table.Row(idx);
00099 }
00100
00101 bool DelimitedFile::RowHasData(int rowNum) const
00102 {
00103 DataRowPtr row = m_table.Row(rowNum);
00104
00105 int count = row->Count();
00106 for ( int x = 0; x < count; x++ )
00107 {
00108 if ( ! row->Cell(x)->IsUndefined() )
00109 {
00110 return true;
00111 }
00112 }
00113 return false;
00114 }
00115
00116 DelimitedFilePtr DelimitedFile::Parse( TextReader& reader, char coldelim )
00117 {
00118 Array<byte> buf(512);
00119 DelimitedFilePtr dfile = DelimitedFilePtr(new DelimitedFile());
00120 int lineLen;
00121
00122 while ( reader.ReadLine(buf, lineLen) )
00123 {
00124 reader.ValidateMem();
00125 DataRowPtr row = Parse(buf, lineLen - 1, coldelim);
00126 dfile->AddRow( row );
00127 }
00128 reader.Close();
00129
00130 dfile.ValidateMem();
00131 return dfile;
00132 }
00133
00134 #ifdef DEBUG
00135 void DelimitedFile::ValidateMem() const
00136 {
00137 m_table.ValidateMem();
00138 }
00139
00140 void DelimitedFile::CheckMem() const
00141 {
00142 m_table.CheckMem();
00143 }
00144 #endif
00145
00146 DelimitedFilePtr DelimitedFile::Parse( const String& filename, char coldelim )
00147 {
00148 if ( ! File::Exists(filename) )
00149 {
00150 return DelimitedFilePtr();
00151 }
00152
00153 IStreamPtr fs = File::OpenText(filename);
00154 TextReader reader(StreamBufferPtr(new StreamBuffer(fs, true)));
00155 DelimitedFilePtr df;
00156
00157 try
00158 {
00159 df = Parse(reader, coldelim);
00160 }
00161 catch (Exception *ex)
00162 {
00163 reader.Close();
00164 throw ex;
00165 }
00166
00167
00168 return df;
00169 }
00170
00171 static enum DilimitedRowParseState
00172 {
00173 DRP_STATE_CHARS,
00174 DRP_STATE_QUOTE,
00175 DRP_STATE_QUOTE_COMMA,
00176
00177 } DilimitedRowParseState;
00178
00179 DataRowPtr DelimitedFile::Parse( Array<byte>& cstr, int cstrLen, char coldelim )
00180 {
00181 DataColumnPtr col(new DataColumn("dummy"));
00182 DataRowPtr row = DataRowPtr(new DataRow());
00183 enum DilimitedRowParseState state = DRP_STATE_CHARS;
00184
00185 bool trailingComma = false;
00186 int start = 0;
00187 int len = cstrLen;
00188 for ( int x = 0; x < len; x++ )
00189 {
00190 char ch = cstr[x];
00191 switch ( state )
00192 {
00193 case DRP_STATE_CHARS:
00194 if ( start == x && ch == '"' )
00195 {
00196 state = DRP_STATE_QUOTE;
00197 start = x + 1;
00198 break;
00199 }
00200 trailingComma = false;
00201 if ( ch == coldelim )
00202 {
00203 int cplen = x - start;
00204 row->AddColumn(col, VariantPtr(new Variant(String(cstr, start, cplen))));
00205
00206 start = x + 1;
00207 trailingComma = true;
00208 }
00209 break;
00210 case DRP_STATE_QUOTE:
00211 if ( ch == '"' )
00212 {
00213 int cplen = x - start;
00214 row->AddColumn(col, VariantPtr(new Variant(String(cstr, start, cplen))));
00215
00216 state = DRP_STATE_QUOTE_COMMA;
00217 trailingComma = false;
00218 }
00219 break;
00220 case DRP_STATE_QUOTE_COMMA:
00221 if ( ch == ',' )
00222 {
00223 start = x + 1;
00224 state = DRP_STATE_QUOTE;
00225 trailingComma = true;
00226 }
00227 break;
00228 }
00229 }
00230 int cplen = len - start;
00231 if ( cplen > 0 || trailingComma )
00232 {
00233 row->AddColumn(col, VariantPtr(new Variant(String(cstr, start, cplen))));
00234 }
00235
00236 row.ValidateMem();
00237 return row;
00238 }
00239
00240 DelimitedFileRowParser::DelimitedFileRowParser(char delimchar, spl::IStreamPtr strm)
00241 : m_cols(), m_reader(strm), m_delimchar(delimchar), m_line(121)
00242 {
00243 }
00244
00245 DelimitedFileRowParser::~DelimitedFileRowParser()
00246 {
00247 m_reader.Close();
00248 }
00249
00250 bool DelimitedFileRowParser::RowHasData() const
00251 {
00252 int colcount = m_cols.Count();
00253 for ( int x = 0; x < colcount; x++ )
00254 {
00255 if ( m_cols.ElementAtRef(x).Length() > 0 )
00256 {
00257 return true;
00258 }
00259 }
00260 return false;
00261 }
00262
00263 bool DelimitedFileRowParser::Next()
00264 {
00265 int x;
00266 int colcount = m_cols.Count();
00267 for ( x = 0; x < colcount; x++ )
00268 {
00269 m_cols.ElementAtRef(x).Clear();
00270 }
00271
00272 m_line.SetLength(0);
00273 if ( ! m_reader.ReadLine(m_line) )
00274 {
00275 return false;
00276 }
00277
00278 if ( 0 == m_cols.Count() )
00279 {
00280 StringBuffer sb;
00281 m_cols.Add(sb);
00282 }
00283
00284 int curcol = 0;
00285
00286 enum DilimitedRowParseState state = DRP_STATE_CHARS;
00287 bool trailingComma = false;
00288 int len = m_line.Length();
00289 for ( x = 0; x < len; x++ )
00290 {
00291 char ch = m_line.CharAt(x);
00292 switch ( state )
00293 {
00294 case DRP_STATE_CHARS:
00295 if ( ch == '"' )
00296 {
00297 state = DRP_STATE_QUOTE;
00298 break;
00299 }
00300 trailingComma = false;
00301 if ( ch == m_delimchar )
00302 {
00303 curcol++;
00304 if ( curcol >= m_cols.Count() )
00305 {
00306 StringBuffer sb;
00307 m_cols.Add(sb);
00308 }
00309 trailingComma = true;
00310 }
00311 else
00312 {
00313 m_cols.ElementAtRef(curcol).Append( ch );
00314 }
00315 break;
00316 case DRP_STATE_QUOTE:
00317 if ( ch == '"' )
00318 {
00319 curcol++;
00320 if ( curcol >= m_cols.Count() )
00321 {
00322 StringBuffer sb;
00323 m_cols.Add(sb);
00324 }
00325 state = DRP_STATE_QUOTE_COMMA;
00326 trailingComma = false;
00327 }
00328 break;
00329 case DRP_STATE_QUOTE_COMMA:
00330 if ( ch == ',' )
00331 {
00332 state = DRP_STATE_QUOTE;
00333 trailingComma = true;
00334 }
00335 break;
00336 }
00337 }
00338 return true;
00339 }
00340
00341 #ifdef DEBUG
00342 void DelimitedFileRowParser::ValidateMem() const
00343 {
00344 m_reader.ValidateMem();
00345 m_cols.ValidateMem();
00346 }
00347
00348 void DelimitedFileRowParser::CheckMem() const
00349 {
00350 m_reader.CheckMem();
00351 m_cols.CheckMem();
00352 }
00353 #endif