// URL.h #ifndef URLh #define URLh #if _MSC_VER > 1000 #pragma once #endif // _MSC_VER > 1000 #include "Bytes.h" // For Fromatting URLs to the HTTP standard class __declspec(novtable) URL { URL() {} // Private Constructor: you shouldn't create one of these. public: static void Byte2Hex(BYTE lo, BYTE*& dst) { BYTE hi=lo>>4; lo&=0x0F; *dst++='%'; *dst++=(hi+(hi>9 ? 'A'-10 : '0')); *dst++=(lo+(lo>9 ? 'A'-10 : '0')); } static void Hex2Byte(BYTE*& src, BYTE*& dst) { BYTE hi=*src++; if(hi==0) {--src; return;} hi-=(hi<'A' ? '0' : 'A'-10); BYTE lo=*src++; if(lo==0) {--src; return;} lo-=(lo<'A' ? '0' : 'A'-10); *dst++=(hi<<4) | (lo & 0x0F); // " & 0x0F" deals with lower-case characters } /* http://www.w3.org/International/O-URL-code.html Encode a string to the "x-www-form-urlencoded" form, enhanced with the UTF-8-in-URL proposal. This is what happens: The ASCII characters 'a' through 'z', 'A' through 'Z', and '0' through '9' remain the same. The unreserved characters - _ . ! ~ * ' ( ) remain the same. The space character ' ' is converted into a plus sign '+'. All other ASCII characters are converted into the 3-character string "%xy", where xy is the two-digit hexadecimal representation of the character code. All non-ASCII characters are encoded in two steps: first to a sequence of 2 or 3 bytes, using the UTF-8 algorithm; secondly each of these bytes is encoded as "%xx". */ static void Encode(CBytes& iBytes, bool IsParams=false) { // Assumes no Parameters are present! static const char Type[]={ // 0=Safe, >0=UnSafe in URL, >1=Unsafe in Parameter //0,1,2,3,4,5,6,7,8,9,A,B,C,D,E,F // X 0,1,2,3,4,5,6,7,8,9,A,B,C,D,E,F 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0 , , , , , , , , , , , , , , ,. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 1 .,.,.,.,.,.,.,.,.,.,.,.,.,.,.,. 1,0,1,1,2,1,2,0,0,0,0,2,2,0,0,0, // 2 ,!,",#,$,%,&,',(,),*,+,,,-,.,/ 0,0,0,0,0,0,0,0,0,0,0,2,1,2,1,2, // 3 0,1,2,3,4,5,6,7,8,9,:,;,<,=,>,? 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 4 @,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0, // 5 P,Q,R,S,T,U,V,W,X,Y,Z,[,\,],^,_ 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 6 `,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o 0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1, // 7 p,q,r,s,t,u,v,w,x,y,z,{,|,},~, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 8 ,,,,,,,,,,,,,,, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 9 ,,,,,,,,,,,,,,, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // A ,,,,,,,,,,,,,,, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // B ,,,,,,,,,,,,,,, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // C ,,,,,,,,,,,,,,, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // D ,,,,,,,,,,,,,,, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // E ,,,,,,,,,,,,,,, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // F ,,,,,,,,,,,,,,, }; CBytes oBytes(iBytes.GetLength()*3+1, true); // Allocate enough space for every character to be expanded. BYTE* src=iBytes.Bytes; BYTE* dst=oBytes.Bytes; BYTE c; while(c=*src++) { if(c==' ') *dst++='+'; else if(Type[c] > static_cast(IsParams)) Byte2Hex(c,dst); else *dst++=c; } *dst++=0; // Null terminator oBytes.Length=dst-oBytes.Bytes; oBytes.MoveTo(iBytes); } static void EncodeParameters(CBytes& Bytes) {Encode(Bytes,true);} static void Decode(CBytes& Bytes) { BYTE* src=Bytes.Bytes; BYTE* dst=src; for(char c; c=*src++; (c=='%') ? Hex2Byte(src,dst) : ((c=='+') ? (*dst++=' ') : (*dst++=c))); *dst++=0; // Null terminator Bytes.Length=dst-Bytes.Bytes; } }; #ifdef Assert // Run tests if Tester.h is included in stdafx.h namespace { // namespace prevents multiple definitions if instantiating in the header like this: struct CURLTester : Tester { CURLTester() { // "http://user:password@www.domain.co.uk:80/root/subdirectory/page.htm?parameter=value&setting=this#fragment" for(BYTE b=255; b; --b) { CBytes Bytes(static_cast(2)); Bytes[0]=b; URL::Encode(Bytes); if(IsUnsafeUrlChar(b)) { if(b==' ') Assert((Bytes.GetLength()==1) && (Bytes[0]=='+')); else Assert((Bytes.GetLength()==3) && (Bytes[0]=='%')); }else Assert(Bytes[0]==b); URL::Decode(Bytes); Assert(Bytes[0]==b); } } bool IsUnsafeUrlChar(BYTE ch) { switch(ch) { case ';': case '\\': case '?': case '@': case '&': case '=': case '+': case '$': case ',': case ' ': case '<': case '>': case '#': case '%': case '\"': case '{': case '}': case '|': case '^': case '[': case ']': case '`': return true; default: return (ch < 32 || ch > 126); } } } URLTester; } #endif //def Assert #endif // ndef URLh