// CSV.h #ifndef CSVh #define CSVh #if _MSC_VER > 1000 #pragma once #endif // _MSC_VER > 1000 #include "Bytes.h" /* Usage: This class was designed to read Comma Separated Value (.CSV) files but has been extended to read other similar formats as well. CSV Files are text files using one line to represent one Record (Record Separators are \r\n characters). Fields are separated by a particular character (comma , by default). eg: one,2,three,four,5.0 If a Field contains a Field Separator character it should be enclosed in Text Separators (Speech Marks " by default). The StringToField method does this. eg: one,2,three,"four, or 4",5.0 If a Field contains a Text Separator, that Text Separator should be doubled up, and the Field should be enclosed in Text Separators. The StringToField method does this. eg: the fourth field of: one,2,three,"4'5"" tall",5.0 would be read as: 4'5" tall Leading and trailing white space characters are removed. You can specify multiple field separators: CBytes S; CCSV CSV(&S, "1234,432.234;346:787 65",",;: "); Assert(CSV.GetFieldCount()==5); The class also handles e-mail recipient lists where a field may contain a section in Text Separators followed by a section which will never contain Text or Field separators. eg: "Smith, John" , "Jones David" , Me@Home.com would be read as the following three fields: "Smith, John" "Jones David" Me@Home.com Note that the white-space characters between the two sections are replaced with a single space character. The Automatic Tester has lots of usage examples at the end of CSV.h To read from a file: CFile File; if(!File.Open("Test.csv", CFile::modeRead|CFile::shareDenyNone)) return; CArchive Ar(&File, CArchive::load); CBytes S; while(Ar.ReadString(S)) { // For all lines of the file CCSV CSV; CBytes Field; CSV.Set(&Field,S); while(CSV.GetNextField()) { // for all Fields of the Record // The current Field is now stored in CBytes Field } } To read command line arguments from GetCommandLine() (you should really use __argc and __argv[]): CBytes S; CBytes First ; CBytes Second; CBytes Third ; CCSV CSV(&S, GetCommandLine(), ' '); CSV.GetNextField(); // Application Path if(!CSV.GetNextField()) return true; // No Parameters CBytes argv(S); if(CSV.GetNextField(First ) // First Parameter && CSV.GetNextField(Second) // Second Parameter && CSV.GetNextField(Third ) // Third Parameter && !CSV.GetNextField()) { ... // got the parameters } To create a folder from a Path: CBytes CreateBranch(const CBytes& Path) { // returns the path that was successfully created. CCSV CSV(0, Path, '\\'); if(Path.Left(2)=="\\\\") { // \\ComputerName\Share\Directory format CSV.GetNextField(); // Skip the first two "empty fields" CSV.GetNextField(); CSV.GetNextField(); // Win9x needs the ComputerName and Share to be used together, so we have to skip the ComputerName too. } while(CSV.GetNextField() && ((GetFileAttributes(CSV.GetDone())!=-1) || ::CreateDirectory(CSV.GetDone(),0))); return CSV.GetDone(); // could be return CSV.GetEnd().IsEmpty() if you wanted the function to return a bool... */ class CCSV { const char* src; // Character iterator pointing within the Record protected: CBytes* dst; // Destination CBytes CBytes Rec; // Our copy of the Record CBytes FS; // Field Separator(s) char TS; // Text Separator public: CCSV() : src(0), Array(0) {} virtual ~CCSV() {try{delete[] Array;}catch(...){ASSERT(0);}} CCSV (CBytes* out, const char* in, char _FS, char _TS='"') {Set(out,in,_FS,_TS);} CCSV (CBytes* out, const char* in, CBytes _FS=",", char _TS='"') {Set(out,in,_FS,_TS);} void Set (CBytes* out, const char* in, char _FS, char _TS='"') {Set(out,in,CBytes(_FS), _TS);} void Set (CBytes* out, const char* in, CBytes _FS=",", char _TS='"') {dst=out; src=Rec=in; FS=_FS; TS=_TS; Array=0;} bool GetFirstField(CBytes* out, const char* in, char _FS, char _TS='"') {Set(out,in,_FS,_TS); return GetNextField();} bool GetFirstField(CBytes* out, const char* in, CBytes _FS=",", char _TS='"') {Set(out,in,_FS,_TS); return GetNextField();} bool GetFirstField(CBytes* out) {dst=out; return GetFirstField();} bool GetFirstField() {src=Rec; return GetNextField();} void Home() {src=Rec;} bool IsSpace(char c) const {return ((c==' ')&&(TS!=' ')&&(FS.Find(' ')==-1)) || ((c=='\t')&&(TS!='\t')&&(FS.Find('\t')==-1));} bool GetNextField(CBytes* out=0) { if(!src) return false; bool Quoted=false; while(char c=*src++) { if(IsSpace(c)) continue; // Ignore leading white-space characters. CBytes Text(""); // Using Null-terminated Strings if(c==TS) { // Quoted String: Quoted=true; while(c=*src++) { if(c!=TS) Text+=c; else { // Trailing " if((c=*src++)==TS) Text+=TS; // We have "" so insert a single " into the string else { while(IsSpace(c) && (c=*src++)); // Ignore trailing white-space characters. break; // We've got the quoted section, now try to get the rest: } } } } if(c && (FS.Find(c)==-1)) { if(Quoted) Text=TS+Text+TS+' '; // Parse e-mail Recipient lists like: "Jones, David" , JS@Work.com do Text+=c; // This is the normal Field copying loop: while((c=*src++) && (FS.Find(c)==-1)); } if(!c) --src; // Point back at the NULL Terminator if(out) *out=Text; if(dst) *dst=Text; return true; } --src; // Point back at the NULL Terminator for GetDone and GetEnd. if(out) out->Empty(); if(dst) dst->Empty(); return false; } bool IsLast () const {return (src==0) || (*src==0);} CBytes GetDone() const {return Rec.Left(src-Rec);} // Returns the parsed section of the Record. CBytes GetEnd () const {return Rec.Mid (src-Rec);} // Returns the section of the Record that is yet to be parsed. CBytes GetRecord() const {return Rec;} static CBytes StringToCSVField(CBytes Text) {return (Text.Replace("\"","\"\"") || Text.Find(',')) ? Text : '\"' + Text + '\"';} CBytes StringToField(CBytes Text) const {return (Text.Replace(TS,TS+TS) || Text.Find(FS)) ? Text : TS + Text + TS;} protected: // CtmpString helps shorten the following functions by storing the current state and providing a temporary CBytes: class CtmpString : public CBytes { // Store the current position: const char*& src; // references to the src and dst pointers: CBytes*& dst; const char* tmpsrc; // the stored src and dst values: CBytes* tmpdst; public: CtmpString(const char*& src, CBytes*& dst) : src(src), tmpsrc(src), dst(dst), tmpdst(dst) {} virtual ~CtmpString() {src=tmpsrc; dst=tmpdst;} }; public: int FindField(const CBytes& Field) { // int so we can return -1 when not found. CtmpString S(src, dst); int i=GetFirstField(&S)-1; for(;S!=Field; ++i) if(!GetNextField()) return -1; return i; } // The following functions parse the Record and store an array of the Fields for quick index-based access: WORD GetFieldCount() {if(!Array) Index(); return FieldCount;} const CBytes& operator[](WORD Pos) {return GetFieldAt(Pos);} const CBytes& GetFieldAt(WORD Pos) { if(!Array) Index(); static const CBytes MT; return Pos < FieldCount ? Array[Pos] : MT; } private: CBytes* Array; WORD ArraySize; WORD FieldCount; void Index() { CtmpString S(src, dst); for(FieldCount=GetFirstField(&S); GetNextField(); ++FieldCount); if(Array && (ArraySize"); Assert(CSV.GetFieldCount()==5); Assert(CSV.FindField("none")==-1); Assert(CSV.GetNextField()); Assert(S=="one"); Assert(CSV.GetFieldAt(0)=="one"); Assert(CSV[0]=="one"); Assert(CSV.FindField("one")==0); Assert(CSV.GetFieldCount()==5); Assert(CSV.GetNextField()); Assert(S=="two"); Assert(CSV.GetFieldAt(1)=="two"); Assert(CSV[1]=="two"); Assert(CSV.FindField("two")==1); Assert(CSV.GetFieldCount()==5); Assert(CSV.GetNextField()); Assert(S=="thr,ee"); Assert(CSV.GetFieldAt(2)=="thr,ee"); Assert(CSV.FindField("thr,ee")==2); Assert(CSV.GetFieldCount()==5); Assert(CSV.GetNextField()); Assert(S=="fo\"ur"); Assert(CSV.GetFieldAt(3)=="fo\"ur"); Assert(CSV[3]=="fo\"ur"); Assert(CSV.FindField("fo\"ur")==3); Assert(CSV.GetFieldCount()==5); Assert(CSV.GetNextField()); Assert(S=="\"Smith, John\" "); // #x in the Assert macro doesn't like escaped quotes! Assert(CSV.GetFieldAt(4)=="\"Smith, John\" "); Assert(CSV[4]=="\"Smith, John\" "); Assert(CSV.FindField("\"Smith, John\" ")==4); Assert(CSV.GetFieldCount()==5); Assert(!CSV.GetNextField()); Assert(S==""); Assert(CSV.GetFieldAt(5)==""); Assert(CSV[5]==""); Assert(CSV.GetFieldCount()==5); } void PRNTest() { CCSV CSV(&S,"one #two# #thr,ee# #fo##ur# #\"Smith, John\",",' ','#'); Assert(CSV.GetFieldCount()==5); Assert(CSV.FindField("none")==-1); Assert(CSV.GetNextField()); Assert(S=="one"); Assert(CSV.GetFieldAt(0)=="one"); Assert(CSV[0]=="one"); Assert(CSV.FindField("one")==0); Assert(CSV.GetFieldCount()==5); Assert(CSV.GetNextField()); Assert(S=="two"); Assert(CSV.GetFieldAt(1)=="two"); Assert(CSV[1]=="two"); Assert(CSV.FindField("two")==1); Assert(CSV.GetFieldCount()==5); Assert(CSV.GetNextField()); Assert(S=="thr,ee"); Assert(CSV.GetFieldAt(2)=="thr,ee"); Assert(CSV.FindField("thr,ee")==2); Assert(CSV.GetFieldCount()==5); Assert(CSV.GetNextField()); Assert(S=="fo#ur"); Assert(CSV.GetFieldAt(3)=="fo#ur"); Assert(CSV[3]=="fo#ur"); Assert(CSV.FindField("fo#ur")==3); Assert(CSV.GetFieldCount()==5); Assert(CSV.GetNextField()); ASSERT(S=="\"Smith, John\","); // #x in the Assert macro doesn't like escaped quotes! ASSERT(CSV.GetFieldAt(4)=="\"Smith, John\","); ASSERT(CSV[4]=="\"Smith, John\","); ASSERT(CSV.FindField("\"Smith, John\",")==4); Assert(CSV.GetFieldCount()==5); Assert(!CSV.GetNextField()); Assert(S==""); Assert(CSV.GetFieldAt(5)==""); Assert(CSV[5]==""); Assert(CSV.GetFieldCount()==5); } }_CSVTester; } #endif // def Assert #endif // ndef CSVh