//BlockFile.h #ifndef BlockFileh #define BlockFileh #if _MSC_VER > 1000 #pragma once #endif // _MSC_VER > 1000 #include //For FileInterface using low level I/O routines #include //For _O_CREAT, _O_BINARY, _O_RANDOM, _O_RDWR, _O_TRUNC #include //_S_IREAD | _S_IWRITE /* CBlockFile was developed to keep many small files in one file, a little like a .tar or .zip file or a database holding many records. Hard Disks are formatted with fixed Block sizes. One File may occupy many Blocks, but one Block can only hold the data for one File. I had a Server with 4kB Block size which meant that every file that was just a few Bytes long would take up 4kB of Disk space... The Server was supposed to be used with a few large files but ended up having thousands of tiny files for a particular application (ESSI files for a CNC Profiler). I created an application which tokenised and compressed the ESSI files and stored them all in a single BlockFile. That File currently holds more than 7000 small files and gets Defragmented at midnight every night. It's been running now (with Uninterruptible Power Supply) for four years without a problem. CBlockFile was modelled on the malloc and free routines used in the RAM Memory Allocation routines on all Operating Systems. Instead of RAM, though, CBlockFile allocates a block in a Disk File (Code is included to make it operate on RAM for debugging). This implementation uses a struct FileInterface to hold all the file handling routines and uses the standard Low-Level routines defined in io.h MFC users can change this to use CFile if they prefer. If any of the boolean routines that aren't called Is...() return false, you must assume that the File is corrupt. When a Block is Freed, the class will make one large Free Block from adjacent Free Blocks where possible. If a Freed Block is at the end of the File, the File is shortened. The Defragment() function will remove all Free blocks from a file. I Defragment when my Application opens the File: CopyFile("MyFile.bf", "MyFile.bak", FALSE); //Save File incase of failure if(!BlockFile.Open("MyFile.bf")) return false; if(!BlockFile.Defragment()) { //If defragmentation fails: BlockFile.Close(); CopyFile("MyFile.bak", "MyFile.bf", FALSE); //Restore the File if(!BlockFile.Open("MyFile.bf")) return false; } This looks a little paranoid, but when you have many files within one file you have cause to be paranoid! This code has been thoroughly tested using a "Thrasher" which randomly calls random functions with random parameters... The code was considered finished when the Thrasher ran through 20,000 calls to its functions without failure. The following is true for all files, but this is a good place to make sure you know: It only takes one bit to flip in the File and you could loose all the data it held. For this reason I recommend that the File is closed as soon as possible to minimise the chance of power failure while the File is open (which is the most likely cause of corruption). If you leave the File open and your Application hangs or the power fails you may end up with corrupt data. */ struct CBlockFile { CBlockFile() {} virtual ~CBlockFile() {}//if(File.IsOpen()) Defragment(File.GetFilePath()); bool Create(const char* Path) { if(File.IsOpen()) File.Close(); if(!File.Create(Path)) return false; First.Used=First.Free=Pos=0; File.Write(&First, sizeof(First)); //Set next and prev to 0. return true; } bool Open(const char* Path) { if(File.IsOpen()) File.Close(); if(!File.Open(Path)) return Create(Path); Home(); return true; } bool IsOpen() const {return File.IsOpen();} bool Close() {Pos=0; return File.Close();} bool IsValid() const {return Pos!=0;} bool IsEmpty() const {return First.Free==First.Used;} //Only true when empty as both are then zero. //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Block Functions <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< bool Home() { Pos=0; return File.IsOpen() && File.SeekToBegin() &&(File.Read(&First, sizeof(First))==sizeof(First)) && Goto(First.Used); } bool Next() {return Pos ? Goto(Node.Next) : true;} //false implies corruption, so we return true is Pos is 0. bool Prev() {return Pos ? Goto(Node.Prev) : true;} bool GetSpace(DWORD& Blocks, DWORD& Bytes) { DWORD SafePos=Pos; if(!File.IsOpen()) return false; Blocks=Bytes=0; if(!Goto(First.Free)) return true; do { ++Blocks; Bytes+=Node.Size; }while(Next()); return Goto(SafePos); } DWORD Alloc(DWORD Size) { //returns a Handle to the Block or 0 if something went wrong. if(!File.IsOpen()) return 0; Home(); for(Goto(First.Free); Pos; Next()) { //if First.Free==0 falls through to Append... if((sizeof(Node)+Size+sizeof(EndNode)) < Node.Size) { //Split Free block in two: Node.Size-=sizeof(Node)+Size+sizeof(EndNode); //shrink the free block Node.Flags|=FREE; WriteNode(); Pos+=sizeof(Node)+Node.Size+sizeof(EndNode); break; }else if(Size<=Node.Size) { //Use the Free Block as it is: if(!UnLink()) return 0; //if it fails the File is Corrupted. Size=Node.Size; break; } } if(!Pos) { //Append: Pos=File.SeekToEnd(); //Check we have disk space before committing: if(!File.SetLength(File.GetLength()+sizeof(Node)+Size+sizeof(EndNode))) { File.SetLength(Pos); Home(); return 0; } } Node.Next=First.Used; //Make new Header: Node.Prev=0; Node.Size=Size; Node.Flags&=~FREE; return WriteNode() && SetNextsPrev(Pos) //Next->Prev=this; if it fails the File is Corrupted. && SetFirstUsed(Pos) && SeekToBegin() ? Pos : 0; //Point to the start of the Data section for this Block. } bool Free(DWORD NewPos) {return Goto(NewPos) && Free();} bool Free() { if(!File.IsOpen() || !Pos) return false; if(Node.Flags & FREE) return Home(); //it's already Free! DWORD Size=Node.Size; //Final size of Free block if(!UnLink()) return false; //if this fails the File is Corrupted. DWORD Me=Pos; DWORD NextBlock=Pos+sizeof(Node)+Size+sizeof(EndNode); bool LastBlock=(NextBlock==File.GetLength()); if(!LastBlock) { //Try to merge this Block with the Next Block: if(!Goto(NextBlock)) return false; //if this fails the File is Corrupted. if(Node.Flags & FREE) { // Next Block is also Free: Size+=sizeof(Node)+Node.Size+sizeof(EndNode); //Include this block in our final Free block if(!UnLink()) return false; //From Free list } } //only Pos & Size are valid now. Try to merge this block with the Previous Block: bool Linked=false; if(Me!=sizeof(First)) { //If this isn't the first Block in the File: if(!File.Seek(Me-sizeof(EndNode)) //Load the Previous Block: || (File.Read(&EndNode, sizeof(EndNode))!=sizeof(EndNode)) || (EndNode.Pos>=File.GetLength()) //Check that we haven't read in garbage || !Goto(EndNode.Pos)) return false; //if this fails the File is Corrupted. Linked=Node.Flags & FREE; //true if the previous Block is also in the list (we'll grow it) } if(Linked) Node.Size+=sizeof(Node)+Size+sizeof(EndNode); //Extend the previous Free Block to include this Block. else{ //Previous block is not in the Free List: EndNode.Pos=Pos=Me; Node.Next=First.Free; //Insert this Block into the Free list: Node.Prev=0; Node.Size=Size; Node.Flags|=FREE; } if(LastBlock) { //At end of file if(Linked && !UnLink(Pos)) return false; //if this fails the File is Corrupted. if(!File.SetLength(Pos)) return false; //if this fails the File is Corrupted. Home(); //Don't care if this fails return true; } if(!Linked) { if(!SetNextsPrev(Pos) || !SetFirstFree(Pos)) return false; //if this fails the File is Corrupted. } if(!WriteNode()) return false; //if this fails the File is Corrupted. Home(); //Don't care if this fails return true; } bool Defragment() { //If this returns false, the File is corrupt. if(!File.IsOpen() || !First.Free) return true; //No Fragments DWORD FileLength=File.GetLength(); DWORD Done=sizeof(First); //Start at first the Block in the File. DWORD Size; for(DWORD Next=Done; Next>>>>>>>>>>>>>>> Functions handling Data within a Block <<<<<<<<<<<<<<<<<<<<< public: DWORD GetHandle() const {return Pos;} DWORD SizeLeft() const {return Node.Size-(uPos-Pos-sizeof(Node));} DWORD GetSize() const {return Pos ? Node.Size : 0;} //Seeking within the current Block: bool SeekToBegin() {return Seek(0);} bool SeekToEnd() {return Seek(Node.Size);} bool Seek(DWORD NewPos) { uPos=0; return Pos && File.IsOpen() && (NewPos<=Node.Size) && File.Seek(uPos=Pos+sizeof(Node)+NewPos); } DWORD Read(DWORD* n) {return Read(n, sizeof(DWORD));} DWORD Read(void* Buffer, DWORD Size) { if((!File.IsOpen()) || !Pos || (Size>SizeLeft())) return 0; DWORD BytesIn=File.Read(Buffer, min(Size, SizeLeft())); if(BytesIn==Size) uPos+=BytesIn; else Seek(uPos); return BytesIn; } DWORD Write(DWORD n) {return Write(&n, sizeof(DWORD));} DWORD Write(void* Buffer, DWORD Size) { if((!File.IsOpen()) || (!Pos || (Size>SizeLeft()))) return 0; DWORD BytesOut=File.Write(Buffer, min(Size, SizeLeft())); if(BytesOut==Size) uPos+=BytesOut; else Seek(uPos); return BytesOut; } bool WriteNew(void* Buffer, DWORD Size) { //Create and write a block in one go. bool OK=Alloc(Size) && Pos && File.Seek(Pos+sizeof(Node)) && (File.Write(Buffer, min(Size, SizeLeft()))); if(OK) File.Seek(Pos); //Point where we should else Free(); return OK; } //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> File and Block Guts <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< protected: DWORD Pos; // Pos is the file position of the beginning of the current Node. Zero is invalid because of the two DWORDs that start the file. DWORD uPos; //uPos is the user's file position (relative to start of File). struct FileInterface { // A version of FileInterface which will use memory rather than a file is listed at the bottom of this file. int File; FileInterface() : File(-1) {} bool Create(const char* Path) {return (File=_open(Path, _O_CREAT | _O_BINARY | _O_RANDOM | _O_RDWR | _O_TRUNC, _S_IREAD | _S_IWRITE))!=-1;} bool Open (const char* Path) {return (File=_open(Path, _O_BINARY | _O_RANDOM | _O_RDWR , _S_IREAD | _S_IWRITE))!=-1;} bool Close () {bool Result=(_close(File)!=-1); File=-1; return Result;} bool IsOpen() const {return File!=-1;} bool Seek(DWORD Pos) {return IsOpen() ? _lseek(File, Pos, SEEK_SET)!=-1 : false;} bool SeekToBegin() {return IsOpen() ? _lseek(File, 0, SEEK_SET)!=-1 : false;} DWORD SeekToEnd() {DWORD Result=IsOpen() ? _lseek(File, 0, SEEK_END) : -1; return Result==-1 ? 0 : Result;} bool SetLength(DWORD L) {return IsOpen() ? _chsize(File, L)!=-1 : 0;} // If you change the suite of File handling routines, make sure you pick one that can change the File Length! DWORD GetLength() {return IsOpen() ? _filelength(File) : 0;} DWORD Read (void* Buffer, DWORD Size) {DWORD Result=IsOpen() ? _read (File, Buffer, Size) : -1; return Result==-1 ? 0 : Result;} DWORD Write(void* Buffer, DWORD Size) {DWORD Result=IsOpen() ? _write(File, Buffer, Size) : -1; return Result==-1 ? 0 : Result;} } File; struct FIRST { //This is the first two DWORDs of the file, loaded when the file is opened and saved every time anything changes. FIRST() : Used(0), Free(0) {} DWORD Used; DWORD Free; } First; struct NODE { //This is loaded when Pos is valid. Each File Block starts with a NODE as a Header. NODE() : Next(0), Prev(0), Size(0), Flags(0) {} DWORD Next; DWORD Prev; DWORD Size; BYTE Flags; //See NodeFlags } Node; enum NodeFlags { //Derived classes may add other flags to denote special blocks: FREE=0x01, }; struct ENDNODE { //Each File Block ends with an ENDNODE Footer which is just a DWORD pointing to the Blocks NODE Header. ENDNODE() : Pos(0) {} DWORD Pos; } EndNode; bool Goto(DWORD NewPos) {Pos=NewPos; return Pos && ReadNode() && SeekToBegin();} bool ReadNode() { bool Result=Pos && File.Seek(Pos) && (File.Read(&Node, sizeof(Node))==sizeof(Node)) && Node.SizePrev=Prev && (File.Write(&ptr, sizeof(DWORD))==sizeof(DWORD))); } bool SetPrevsNext(DWORD ptr) { //A little more complex because if there is no Prev we must set the appropriate "First" pointer instead. if(Node.Prev) return File.Seek(Node.Prev) //Prev->Next=Next && (File.Write(&ptr, sizeof(DWORD))==sizeof(DWORD)); return Node.Flags & FREE ? SetFirstFree(ptr) : SetFirstUsed(ptr); //First=Next } }; /* Use this version to write to memory so that you can debug using the Memory Debug Window: struct FileInterface { char* File; DWORD Pos; DWORD Length; DWORD BufferLength; FileInterface() : File(0), Pos(0), Length(0), BufferLength(1024) {} ~FileInterface() {delete File; File=0;} bool Create(const char* Path) {File=new char[BufferLength]; memset(File, 0x11, BufferLength); return File!=0;} bool Open (const char* Path) {return File ? true : Create(Path);} bool Close () {return true;} bool IsOpen() const {return File!=0;} bool Seek(DWORD NewPos) {Pos=NewPos; return true;} bool SeekToBegin() {Pos=0; return true;} DWORD SeekToEnd() {return Pos=Length;} bool SetLength(DWORD L) { if(LLength) Length=Pos; return File!=0 ? Size : 0; } } File; */ #endif//ndef BlockFileh