// LZW.h #ifndef LZWh #define LZWh #if _MSC_VER > 1000 #pragma once #endif // _MSC_VER > 1000 /* This is a small suite of simple compression classes that use the LZW algorithm optionally using the GIF coding format. Unisys patented the LZW alogorithm but the patent has expired, so you are free to use this code without licence. That's why everyone uses other LZs instead. There is a CLZWEncoder class and a CLZWDecoder class (both derived from CLZWBase). They were designed to store short text messages efficiently. You can specify a File or a CString or an area of memory for each class to act upon. The Encoder can be told to create GIF codes in the Stream. The Decoder automatically decodes GIF or normal LZW. The code has the following behaviour: The Encoder checks to see if Encoding really did compress the data. If the data wan't compressed the Encoder output is just a copy of the input. If the data was compressed the Encoder output starts with a 0xC3 Byte (Supposed to look like the 'C' and 'M' in "CoMpressed"). Following the 0xC3 Byte is a DWORD containing the Uncompressed data length. Use this to create the buffer for your Decompression. The Decoder looks for a leading byte of 0xC3 and if it doesn't find one, just copies input to output. So if the original data has 0xC3 as the first byte and doesn't get smaller after LZW then the Decoder will fail. For all other cases you can happily Encode data and know you'll never end up with larger data. If you used the default constructor, the function you called to Compress or Decompress will return an error message if anything went wrong. The simplest way to use a CString to hold the data. Here's example code for some of the constructors: CString S("anna and nanna banned bananas and bandannas"); S+=S+S+S+S+S+S+S+S+S+S+S+S+S+S+S+S+S+S+S+S+S+S+S+S+S+S+S+S; int Length=S.GetLength(); CLZWEncoder E(S); // S is now the compressed data (or the original data if no compression was possible). DWORD CompressedLength=S.GetLength(); // At this point: note the leading 0xC3 Byte. S may contain NULLs before its end. GetLength returns the correct length for the compressed data. if((BYTE)*S==0xC3) { DWORD UnCompressedLength=*((DWORD*)(&*S+1)); //You can access the UnCompressed data Length from the compressed code } CLZWDecoder D(S); //S is now back to the original data or empty if we ran out of memory or the data was corrupt. if(S.IsEmpty() && CompressedLength) AfxMessageBox("LZW Decompression Failed"); if(Length!=S.GetLength()) AfxMessageBox("LZW Decompression lost Byte(s)"); //A file is specified simply by its Path, a section of memory by its Base and Length. CLZWEncoder Encoder1; const char* src="anna and nanna banned bananas and bandannas"; DWORD UnCompressedLength=strlen(src)+1; // +1 to include the NULL terminator. char* LZW=new char[UnCompressedLength]; S=Encoder1.Encode(src, UnCompressedLength, LZW, UnCompressedLength); if(!S.IsEmpty()) AfxMessageBox(S); else{ CompressedLength=Encoder1.GetOutSize(); double Ratio=UnCompressedLength+1./CompressedLength; CLZWDecoder Decoder1; char* dst=LZW; //start assuming no compression happened, so destination is the same as the source. if((BYTE)*LZW==0xC3) { // Decoding is necessary ASSERT(UnCompressedLength==*((DWORD*)(LZW+1))); //You can access the UnCompressed data Length from the compressed code dst=new char[UnCompressedLength]; Decoder1.Decode(LZW, CompressedLength, dst, UnCompressedLength); ASSERT(strcmp(src,dst)==0); delete dst; } } delete LZW; DeleteFile("ReadMeNow.txt"); CLZWEncoder Encoder2; Encoder2.Encode("ReadMe.txt", "ReadMe.lzw"); // You could use a memory buffer instead of the .lzw file CLZWDecoder Decoder2; Decoder2.Decode("ReadMe.lzw", "ReadMeNow.txt"); The bulk of the code handles the input/output, the compression and decompression are short sections of code. If you remove the GIF decoding block, the Decoder involves very little code: it should be very clear and easy to alter. */ #include "Bytes.h" #ifndef Languageh static const char* sBadFileName ="Bad File Name: "; static const char* sBadMAlloc ="Out of Memory (RAM)."; static const char* sBadDataCode ="Bad Data Code."; static const char* sDictOverflow="LZW Dictionary overflowed."; static const char* sOutOfSpace ="Couldn't Write: out of space."; #endif struct CLZWBase : public CByteStream { // Things shared by the Encoder and Decoder DWORD GetOutSize() {return OutSize;} void UseGIFEncoding(bool GIF=true) {GIFEncoding=GIF;} // When true, use marker codes: InitCode & EndCode. protected: // No public constructors for this Base Class. CLZWBase(CByteStream& Data, bool GIF=false) : CByteStream(Data), GIFEncoding(GIF), OutSize(0), WriteVal(0), ReadVal(0), WriteBitCtr(0), ReadBitCtr(0) {} virtual ~CLZWBase() {} bool GIFEncoding; // When true, use marker codes: InitCode & EndCode. DWORD OutSize; // Bytes output when completed. BYTE DataBits; // Bits-per-Pixel in GIF terminology. BYTE MinBitCtr; // Bit counter to encode 'InitCode'. BYTE BitCtr; // Current Code Size in Bits. BYTE ReadBitCtr; // BitStream index into ReadVal. BYTE WriteBitCtr; // BitStream index into WriteVal. DWORD ReadVal; // BitStream reading Codes. DWORD WriteVal; // BitStream writing Codes. WORD DictIdx; // Current position in the Dictionary. enum {WordCounterBits=12}; // 3 to 25. 2^WordCounterBits. Maximum word counter in the DictNodes during *all* the compressions. Beyond 12, you can have memory allocation errors depending on your compiler and computer. WORD MaxDictIdx; // EndCode to 2^WordCounterBits. Maximum Word counter in the DictNodes during *one* compression. WORD InitCode, EndCode; // InitCode and EndCode are both consecutive coming up just after the last known word in the initial DictNodes. }; //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> CLZWEncoder <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< class CLZWEncoder : public CLZWBase { struct DictValNode { DictValNode(WORD i) : Character((BYTE)i), Code(i), Left(0), Right(0) {} BYTE Character; WORD Code; DictValNode* Left; DictValNode* Right; }*DictNodes[1<>=8)); WriteByte((BYTE)(UnCompressedLength>>=8)); WriteByte((BYTE)(UnCompressedLength>>=8)); if(!EndData()) { MaxDictIdx=1<<12; // Range: 2^3 to 2^WordCounterBits. DataBits=8; // Range: 1 to WordCounterBits-1 (usually, for pictures, set up to 1, or 4, or 8, in the case of monochrome, or 16-colors, or 256-colors picture). MinBitCtr=(DataBits==1 ? 3 : DataBits+1); InitCode=1<<(MinBitCtr-1); EndCode=InitCode+(GIFEncoding ? 1 : -1); WORD i; for(i=0; i<=EndCode; ++i) { if((DictNodes[i]=new DictValNode(i))==0) { while(i) delete DictNodes[--i]; SetDestinationLength(0); return; } } memset(&DictNodes[i], 0, (MaxDictIdx-i)*sizeof(DictValNode*)); // Zero the remaining array values DictIdx=EndCode+1; BitCtr=MinBitCtr; if(GIFEncoding) WriteCodeLR(InitCode); DictValNode* it=DictNodes[ReadByte()]; while(CanWrite() && !EndData()) { BYTE Symbol=ReadByte(); DictValNode* NewNode=FindNode(it, Symbol); if((NewNode!=it)&&(NewNode->Character==Symbol)) it=NewNode; else { WriteCodeLR(it->Code); if(!AddNode(it, NewNode, Symbol)) { SetDestinationLength(0); return; } if(DictIdx==MaxDictIdx) { if(GIFEncoding) WriteCodeLR(InitCode); //Re-initialise DictNodes: for(i=0; iLeft=DictNodes[i]->Right=0; DictIdx=EndCode+1; BitCtr=MinBitCtr; } it=DictNodes[Symbol]; } } WriteCodeLR(it->Code); if(GIFEncoding) WriteCodeLR(EndCode); WriteEndLR(); FreeDict(); } if(GetSourcePos()<=GetDestinationPos()) { CopyFile(); //If we didn't compress things then just copy the file. SetDestinationLength(GetSourceLength()); }else SetDestinationLength(GetDestinationPos()); if(!CanWrite()) SetDestinationLength(0); } virtual ~CLZWEncoder() {} private: DictValNode* FindNode(DictValNode* it, BYTE Symbol) { //Looks for 'Symbol' from 'it->Left'. DictValNode* NewNode; if(!it || it->Left==0) return it; for(NewNode=it->Left; (NewNode->Character!=Symbol)&&(NewNode->Right); NewNode=NewNode->Right); return NewNode; } bool AddNode(DictValNode* it, DictValNode* NewNode, BYTE Symbol) { DictValNode* ptr=DictNodes[DictIdx]; if(!ptr && ((ptr=DictNodes[DictIdx]=new DictValNode(DictIdx))==0)) { FreeDict(); return false; } ptr->Character=Symbol; if(it==NewNode) NewNode->Left =ptr; else NewNode->Right=ptr; if(++DictIdx==(WORD)(1<=8) { WriteBitCtr-=8; WriteByte((BYTE)(WriteVal >> WriteBitCtr)); WriteVal &= ((1<0) WriteByte((BYTE)(WriteVal << (8-WriteBitCtr))); WriteVal=WriteBitCtr=0; } }; //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> CLZWDecoder <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< class CLZWDecoder : public CLZWBase { /* IgnoreOverflow is only considered when using GIFEncoding The reception of the Code 'InitCode' will force the dictionary to by flushed. If IgnoreOverflow=false and the dictionary is filled we return an Overflow Error. If IgnoreOverflow=true, (default) the dictionary is just not updated when full. */ bool IgnoreOverflow; struct DictLinkNode { DictLinkNode(BYTE i) : Character(i), Predecessor(0) {} BYTE Character; DictLinkNode* Predecessor; } *DictNodes[1<Predecessor=0; DictIdx=EndCode+1; BitCtr=MinBitCtr; CurCode=ReadCodeLR(); if(CurCodeDictIdx) { SetDestinationLength(0); return; } if(IgnoreOverflow) { WriteString(PrevCode, CurCode, FirstChar); if(DictIdx=BitCtr))) { CurCode=ReadCodeLR(); if(CurCode>DictIdx) { SetDestinationLength(0); return; } WriteString(PrevCode, CurCode, FirstChar); if(DictIdx==MaxDictIdx-2) { for(WORD i=InitCode; (iPredecessor=0; DictIdx=EndCode+1; BitCtr=MinBitCtr; if((!EndData())||(ReadBitCtr>=BitCtr)) { PrevCode=(CurCode=ReadCodeLR()); WriteString(PrevCode, CurCode, FirstChar); } } else AddString(PrevCode, FirstChar); PrevCode=CurCode; } } FreeDict(); SetDestinationLength(CanWrite() ? GetDestinationPos() : 0); }else{ //It's not compressed so just copy the file: CopyFile(); SetDestinationLength(GetSourceLength()); } } virtual ~CLZWDecoder() {} private: void AddString(WORD Code, BYTE FirstChar) { DictNodes[DictIdx]->Character=FirstChar; DictNodes[DictIdx]->Predecessor=DictNodes[Code]; if(++DictIdx+1==(WORD)(1<Predecessor) { WriteLink(Link->Predecessor, Character); //Recursive WriteByte(Link->Character); }else{ WriteByte(Link->Character); *Character=Link->Character; } } /* The bits are stored from left to right. Example: aaabbbbcccc is written: Bits 7 6 5 4 3 2 1 0 Byte 1 a a a b b b b c Byte 2 c c c ? ? ? ? ? */ WORD ReadCodeLR() { while(ReadBitCtr>ReadBitCtr); ReadVal &= ((1<=iBytes.Length) return; // return if it wasn't compressed. oBytes[++oBytes.Length-1]=iBytes.NullTerminated; // append the Null Terminated Flag oBytes.MoveTo(iBytes); } static void Decode(CBytes& iBytes, bool Auto=true) { if(iBytes.IsEmpty() || (iBytes[0]!=0xC3)) return; CBytes oBytes(*((DWORD*)(iBytes.Bytes+1)), iBytes[iBytes.Length-1]!=0); --iBytes.Length; // for the NullTerminated flag CLZWDecoder(CByteStream(iBytes,oBytes), Auto); // -1 for the Null Terminated Flag if(!oBytes.IsEmpty()) oBytes.MoveTo(iBytes); // Don't decode if an error occurred } }; #endif //ndef LZWh