• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

src/web/HttpUtility.cpp

Go to the documentation of this file.
00001 
00004 // 
00005 // System.Web.HttpUtility
00006 //
00007 // Authors:
00008 //   Patrik Torstensson (Patrik.Torstensson@labs2.com)
00009 //   Wictor Wilén (decode/encode functions) (wictor@ibizkit.se)
00010 //   Tim Coleman (tim@timcoleman.com)
00011 //   Gonzalo Paniagua Javier (gonzalo@ximian.com)
00012 //
00013 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
00014 //
00015 // Permission is hereby granted, free of charge, to any person obtaining
00016 // a copy of this software and associated documentation files (the
00017 // "Software"), to deal in the Software without restriction, including
00018 // without limitation the rights to use, copy, modify, merge, publish,
00019 // distribute, sublicense, and/or sell copies of the Software, and to
00020 // permit persons to whom the Software is furnished to do so, subject to
00021 // the following conditions:
00022 // 
00023 // The above copyright notice and this permission notice shall be
00024 // included in all copies or substantial portions of the Software.
00025 // 
00026 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00027 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00028 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00029 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
00030 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
00031 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
00032 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00033 //
00034 #include <ctype.h>
00035 #include <spl/Exception.h>
00036 #include <spl/Int32.h>
00037 #include <spl/text/StringBuffer.h>
00038 #include <spl/web/HttpUtility.h>
00039 
00040 using namespace spl;
00041 
00042 bool HttpUtility::HtmlAttributeEncodeRequired (const char *cp, int len)
00043 {
00044         for (int i = 0; i < len; i++) 
00045         {
00046                 if ( cp[i] == '&' || cp[i] == '"' || cp[i] == '<') 
00047                 {
00048                         return true;
00049                 }
00050         }
00051         return false;
00052 }
00053 
00054 StringPtr HttpUtility::HtmlAttributeEncode (const char *cp, int len)
00055 {
00056         int i;
00057 
00058         if (NULL == cp) 
00059         {
00060                 return StringPtr(new String());
00061         }
00062 
00063 
00064         if (!HtmlAttributeEncodeRequired(cp, len))
00065         {
00066                 return StringPtr(new String(cp));
00067         }
00068 
00069         StringBuffer output;
00070 
00071         for (i = 0; i < len; i++)
00072         {
00073                 switch ( cp[i] ) 
00074                 {
00075                 case '&' : 
00076                         output.Append ("&amp;");
00077                         break;
00078                 case '"' :
00079                         output.Append ("&quot;");
00080                         break;
00081                 case '<':
00082                         output.Append ("&lt;");
00083                         break;
00084                 default:
00085                         output.Append ( cp[i] );
00086                         break;
00087                 }
00088         }
00089 
00090         return output.ToString();
00091 }
00092 
00093 static int _GetInt (char c)
00094 {
00095         if (c >= '0' && c <= '9')
00096                 return c - '0';
00097 
00098         if (c >= 'a' && c <= 'f')
00099                 return c - 'a' + 10;
00100 
00101         if (c >= 'A' && c <= 'F')
00102                 return c - 'A' + 10;
00103 
00104         return -1;
00105 }
00106 
00107 static int _GetChar (const char *bytes, int offset, int length)
00108 {
00109         int value = 0;
00110         int end = length + offset;
00111         for (int i = offset; i < end; i++) 
00112         {
00113                 int current = _GetInt (bytes[i]);
00114                 if (current == -1)
00115                 {
00116                         return -1;
00117                 }
00118                 value = (value << 4) + current;
00119         }
00120 
00121         return value;
00122 }
00123 
00124 StringPtr HttpUtility::UrlDecode (const char *cp, int len)
00125 {
00126         if (NULL == cp) 
00127         {
00128                 return StringPtr(new String());
00129         }
00130 
00131         if ( IndexOfCh(cp, '%') < 0 && IndexOfCh(cp, '+') < 0 )
00132         {
00133                 return StringPtr(new String(cp));
00134         }
00135 
00136         StringBuffer output;
00137         StringBuffer bytes;
00138         int xchar;
00139 
00140         for (int i = 0; i < len; i++) 
00141         {
00142                 if (cp[i] == '%' && i + 2 < len && cp[i + 1] != '%') 
00143                 {
00144                         if (cp[i + 1] == 'u' && i + 5 < len) 
00145                         {
00146                                 if (bytes.Length() > 0) 
00147                                 {
00148                                         output.Append (bytes.GetChars());
00149                                         bytes.SetLength(0);
00150                                 }
00151 
00152                                 xchar = _GetChar (cp, i + 2, 4);
00153                                 if (xchar != -1) 
00154                                 {
00155                                         output.Append ((char) xchar);
00156                                         i += 5;
00157                                 } 
00158                                 else 
00159                                 {
00160                                         output.Append ('%');
00161                                 }
00162                         } 
00163                         else if ((xchar = _GetChar(cp, i + 1, 2)) != -1) 
00164                         {
00165                                 bytes.Append ( (char)xchar );
00166                                 i += 2;
00167                         } 
00168                         else 
00169                         {
00170                                 output.Append ('%');
00171                         }
00172                         continue;
00173                 }
00174 
00175                 if (bytes.Length() > 0) 
00176                 {
00177                         output.Append (bytes.GetChars());
00178                         bytes.SetLength(0);
00179                 }
00180 
00181                 if (cp[i] == '+') 
00182                 {
00183                         output.Append (' ');
00184                 } 
00185                 else 
00186                 {
00187                         output.Append (cp[i]);
00188                 }
00189         }
00190 
00191         if (bytes.Length() > 0) 
00192         {
00193                 output.Append (bytes.GetChars());
00194         }
00195 
00196         return output.ToString();
00197 }
00198 
00199 static char _urlHexChars[] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
00200 
00201 bool HttpUtility::UrlEncodeRequired (const char *cp, int len)
00202 {
00203         for (int i = 0; i < len; i++) 
00204         {
00205                 char c = cp[i];
00206                 if ((c < '0') || (c < 'A' && c > '9') || (c > 'Z' && c < 'a') || (c > 'z')) 
00207                 {
00208                         if (NotEncoded (c))
00209                         {
00210                                 continue;
00211                         }
00212                         return true;
00213                 }
00214         }
00215         return false;
00216 }
00217 
00218 StringPtr HttpUtility::UrlEncode (const char *cp, int len)
00219 {
00220         if (cp == NULL)
00221         {
00222                 return StringPtr(new String());
00223         }
00224 
00225         if (cp[0] == '\0')
00226         {
00227                 return StringPtr(new String());
00228         }
00229 
00230         int i;
00231         bool isUnicode = false;
00232 
00233 
00234         if (!UrlEncodeRequired(cp, len))
00235         {
00236                 return StringPtr(new String(cp));
00237         }
00238 
00239         StringBuffer result;
00240 
00241         for ( i = 0; i < len; i++ )
00242         {
00243                 char c = cp[i];
00244 
00245                 if (sizeof(char) > 1 && c > 255) 
00246                 {
00247                         int idx;
00248                         int i = (int) c;
00249 
00250                         result.Append('%');
00251                         result.Append ('u');
00252                         idx = i >> 12;
00253                         result.Append (_urlHexChars [idx]);
00254                         idx = (i >> 8) & 0x0F;
00255                         result.Append (_urlHexChars [idx]);
00256                         idx = (i >> 4) & 0x0F;
00257                         result.Append (_urlHexChars [idx]);
00258                         idx = i & 0x0F;
00259                         result.Append (_urlHexChars [idx]);
00260                         continue;
00261                 }
00262                 
00263                 if (c > ' ' && NotEncoded (c)) 
00264                 {
00265                         result.Append(c);
00266                         continue;
00267                 }
00268                 if (c==' ') 
00269                 {
00270                         result.Append('+');
00271                         continue;
00272                 }
00273                 if ( (c < '0') ||
00274                         (c < 'A' && c > '9') ||
00275                         (c > 'Z' && c < 'a') ||
00276                         (c > 'z')) 
00277                 {
00278                         if (isUnicode && c > 127) 
00279                         {
00280                                 result.Append ('%');
00281                                 result.Append ('u');
00282                                 result.Append ('0');
00283                                 result.Append ('0');
00284                         }
00285                         else
00286                         {
00287                                 result.Append('%');
00288                         }
00289                         int idx = ((int) c) >> 4;
00290                         result.Append (_urlHexChars [idx]);
00291                         idx = ((int) c) & 0x0F;
00292                         result.Append (_urlHexChars [idx]);
00293                 }
00294                 else
00295                 {
00296                         result.Append (c);
00297                 }
00298         }
00299 
00300         return result.ToString();
00301 }
00302 
00303 StringPtr HttpUtility::HtmlDecode (const char *cp, int len)
00304 {
00305         if (cp == NULL)
00306         {
00307                 throw new InvalidArgumentException("HtmlDecode: argument was null");
00308         }
00309 
00310         if (IndexOfCh(cp, '&') < 0)
00311         {
00312                 return StringPtr(new String(cp));
00313         }
00314 
00315         StringBuffer entity;
00316         StringBuffer output;
00317 
00318         // 0 -> nothing,
00319         // 1 -> right after '&'
00320         // 2 -> between '&' and ';' but no '#'
00321         // 3 -> '#' found after '&' and getting numbers
00322         int state = 0;
00323         int number = 0;
00324         bool have_trailing_digits = false;
00325 
00326         for (int i = 0; i < len; i++) 
00327         {
00328                 char c = cp[i];
00329                 if (state == 0) 
00330                 {
00331                         if (c == '&') 
00332                         {
00333                                 entity.Append (c);
00334                                 state = 1;
00335                         } 
00336                         else 
00337                         {
00338                                 output.Append (c);
00339                         }
00340                         continue;
00341                 }
00342 
00343                 if (c == '&') 
00344                 {
00345                         state = 1;
00346                         if (have_trailing_digits) 
00347                         {
00348                                 entity.Append(Int32::ToString(number));
00349                                 have_trailing_digits = false;
00350                         }
00351 
00352                         output.Append(entity.GetChars());
00353                         entity.SetLength(0);
00354                         entity.Append ('&');
00355                         continue;
00356                 }
00357 
00358                 if (state == 1) 
00359                 {
00360                         if (c == ';') 
00361                         {
00362                                 state = 0;
00363                                 output.Append(entity.GetChars());
00364                                 output.Append(c);
00365                                 entity.SetLength(0);
00366                         } 
00367                         else 
00368                         {
00369                                 number = 0;
00370                                 if (c != '#') 
00371                                 {
00372                                         state = 2;
00373                                 } 
00374                                 else 
00375                                 {
00376                                         state = 3;
00377                                 }
00378                                 entity.Append (c);
00379                         }
00380                 } 
00381                 else if (state == 2) 
00382                 {
00383                         entity.Append (c);
00384                         if (c == ';') 
00385                         {
00386                                 // This only checks the most common entities, since I don't want a static hashtable in the library.
00387                                 if (entity.Equals("&nbsp;"))
00388                                 {
00389                                         output.Append(' ');
00390                                 }
00391                                 else if (entity.Equals("&amp;"))
00392                                 {
00393                                         output.Append('&');
00394                                 }
00395                                 else if (entity.Equals("&lt;"))
00396                                 {
00397                                         output.Append('<');
00398                                 }
00399                                 else if (entity.Equals("&gt;"))
00400                                 {
00401                                         output.Append('>');
00402                                 }
00403                                 else if (entity.Equals("&quot;"))
00404                                 {
00405                                         output.Append('"');
00406                                 }
00407                                 else
00408                                 {
00409                                         output.Append(entity.GetChars());
00410                                 }
00411                                 state = 0;
00412                                 entity.SetLength(0);;
00413                         }
00414                 } 
00415                 else if (state == 3) 
00416                 {
00417                         if (c == ';') 
00418                         {
00419                                 if (number > 65535) 
00420                                 {
00421                                         output.Append("&#");
00422                                         output.Append(Int32::ToString(number));
00423                                         output.Append(";");
00424                                 } 
00425                                 else 
00426                                 {
00427                                         output.Append ((char) number);
00428                                 }
00429                                 state = 0;
00430                                 entity.SetLength(0);
00431                                 have_trailing_digits = false;
00432                         } 
00433                         else if (isdigit(c)) 
00434                         {
00435                                 number = number * 10 + ((int) c - '0');
00436                                 have_trailing_digits = true;
00437                         } 
00438                         else 
00439                         {
00440                                 state = 2;
00441                                 if (have_trailing_digits) 
00442                                 {
00443                                         entity.Append (Int32::ToString(number));
00444                                         have_trailing_digits = false;
00445                                 }
00446                                 entity.Append(c);
00447                         }
00448                 }
00449         }
00450 
00451         if (entity.Length() > 0) 
00452         {
00453                 output.Append(entity.GetChars());
00454         } 
00455         else if (have_trailing_digits) 
00456         {
00457                 output.Append (Int32::ToString(number));
00458         }
00459         return output.ToString ();
00460 }
00461 
00462 bool HttpUtility::HtmlEncodeRequired (const char *cp, int len)
00463 {
00464         for (int i = 0; i < len; i++) 
00465         {
00466                 char c = cp[i];
00467                 if (c == '&' || c == '"' || c == '<' || c == '>' || c > 159) 
00468                 {
00469                         return true;
00470                 }
00471         }
00472         return false;
00473 }
00474 
00475 StringPtr HttpUtility::HtmlEncode (const char *cp, int len) 
00476 {
00477         if (cp == NULL)
00478         {
00479                 return StringPtr(new String());
00480         }
00481 
00482         int i;
00483 
00484 
00485         if (!HtmlEncodeRequired(cp, len))
00486         {
00487                 return StringPtr(new String(cp));
00488         }
00489 
00490         StringBuffer output;
00491 
00492         for (i = 0; i < len; i++)
00493         {
00494                 switch (cp[i]) 
00495                 {
00496                 case '&':
00497                         output.Append ("&amp;");
00498                         break;
00499                 case '>' : 
00500                         output.Append ("&gt;");
00501                         break;
00502                 case '<':
00503                         output.Append ("&lt;");
00504                         break;
00505                 case '"':
00506                         output.Append ("&quot;");
00507                         break;
00508                 default:
00509                         // MS starts encoding with &# from 160 and stops at 255.
00510                         // We don't do that. One reason is the 65308/65310 unicode
00511                         // characters that look like '<' and '>'.
00512                         if (cp[i] > 159) 
00513                         {
00514                                 output.Append ("&#");
00515                                 output.Append (Int32::ToString((int)cp[i]));
00516                                 output.Append (";");
00517                         } 
00518                         else 
00519                         {
00520                                 output.Append (cp[i]);
00521                         }
00522                         break;
00523                 }
00524         }
00525         return output.ToString();
00526 }
00527