Standard Portable Library: src/xml/TinyXmlParser.cpp Source File

00001 /* Modified for the SPL project. */
00002 /*
00003 www.sourceforge.net/projects/tinyxml
00004 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
00005 
00006 This software is provided 'as-is', without any express or implied 
00007 warranty. In no event will the authors be held liable for any 
00008 damages arising from the use of this software.
00009 
00010 Permission is granted to anyone to use this software for any 
00011 purpose, including commercial applications, and to alter it and 
00012 redistribute it freely, subject to the following restrictions:
00013 
00014 1. The origin of this software must not be misrepresented; you must 
00015 not claim that you wrote the original software. If you use this
00016 software in a product, an acknowledgment in the product documentation
00017 would be appreciated but is not required.
00018 
00019 2. Altered source versions must be plainly marked as such, and 
00020 must not be misrepresented as being the original software.
00021 
00022 3. This notice may not be removed or altered from any source 
00023 distribution.
00024 */
00025 
00026 #include <ctype.h>
00027 #include <stdio.h>
00028 #include <stddef.h>
00029 
00030 #include <spl/xml/XmlAttribute.h>
00031 #include <spl/xml/XmlComment.h>
00032 #include <spl/xml/XmlDeclaration.h>
00033 #include <spl/xml/XmlDocument.h>
00034 #include <spl/xml/XmlElement.h>
00035 #include <spl/xml/XmlNode.h>
00036 #include <spl/xml/XmlText.h>
00037 #include <spl/xml/XmlUnknown.h>
00038 
00039 #ifdef TIXML_SAFE
00040 #if defined(_MSC_VER) && (_MSC_VER >= 1400 )
00041 // Microsoft visual studio, version 2005 and higher.
00042 #define TIXML_SNPRINTF _snprintf_s
00043 #define TIXML_SNSCANF  _snscanf_s
00044 #define TIXML_SSCANF   sscanf_s
00045 #elif defined(_MSC_VER) && (_MSC_VER >= 1200 )
00046 // Microsoft visual studio, version 6 and higher.
00047 //#pragma message( "Using _sn* functions." )
00048 #define TIXML_SNPRINTF _snprintf
00049 #define TIXML_SNSCANF  _snscanf
00050 #define TIXML_SSCANF   sscanf
00051 #elif defined(__GNUC__) && (__GNUC__ >= 3 )
00052 // GCC version 3 and higher.s
00053 //#warning( "Using sn* functions." )
00054 #define TIXML_SNPRINTF snprintf
00055 #define TIXML_SNSCANF  snscanf
00056 #define TIXML_SSCANF   sscanf
00057 #else
00058 #define TIXML_SSCANF   sscanf
00059 #endif
00060 #endif  
00061 
00062 struct Entity
00063 {
00064         const char*     str;
00065         unsigned int    strLength;
00066         char                chr;
00067 };
00068 
00069 #define NUM_ENTITY 5
00070 #define MAX_ENTITY_LENGTH 6
00071 
00072 // Note tha "PutString" hardcodes the same list. This
00073 // is less flexible than it appears. Changing the entries
00074 // or order will break putstring.       
00075 static struct Entity entity[ NUM_ENTITY ] = 
00076 {
00077         { "&amp;",  5, '&' },
00078         { "&lt;",   4, '<' },
00079         { "&gt;",   4, '>' },
00080         { "&quot;", 6, '\"' },
00081         { "&apos;", 6, '\'' }
00082 };
00083 
00084 // Bunch of unicode info at:
00085 //              http://www.unicode.org/faq/utf_bom.html
00086 // Including the basic of this table, which determines the #bytes in the
00087 // sequence from the lead byte. 1 placed for invalid sequences --
00088 // although the result will be junk, pass it through as much as possible.
00089 // Beware of the non-characters in UTF-8:       
00090 //                              ef bb bf (Microsoft "lead bytes")
00091 //                              ef bf be
00092 //                              ef bf bf 
00093 
00094 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00095 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00096 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00097 
00098 // Table that returs, for a given lead byte, the total number of bytes
00099 // in the UTF-8 sequence.
00100 const int utf8ByteTable[256] = 
00101 {
00102         //      0       1       2       3       4       5       6       7       8       9       a       b       c       d       e       f
00103         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x00
00104         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x10
00105         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x20
00106         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x30
00107         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x40
00108         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x50
00109         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x60
00110         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x70 End of ASCII range
00111         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x80 0x80 to 0xc1 invalid
00112         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x90 
00113         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xa0 
00114         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xb0 
00115         1,      1,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xc0 0xc2 to 0xdf 2 byte
00116         2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xd0
00117         3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      // 0xe0 0xe0 to 0xef 3 byte
00118         4,      4,      4,      4,      4,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1       // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
00119 };
00120 
00121 void ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00122 {
00123         const unsigned long BYTE_MASK = 0xBF;
00124         const unsigned long BYTE_MARK = 0x80;
00125         const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00126 
00127         if (input < 0x80) 
00128                 *length = 1;
00129         else if ( input < 0x800 )
00130                 *length = 2;
00131         else if ( input < 0x10000 )
00132                 *length = 3;
00133         else if ( input < 0x200000 )
00134                 *length = 4;
00135         else
00136         { *length = 0; return; }        // This code won't covert this correctly anyway.
00137 
00138         output += *length;
00139 
00140         // Scary scary fall throughs.
00141         switch (*length) 
00142         {
00143         case 4:
00144                 --output; 
00145                 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00146                 input >>= 6;
00147         case 3:
00148                 --output; 
00149                 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00150                 input >>= 6;
00151         case 2:
00152                 --output; 
00153                 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00154                 input >>= 6;
00155         case 1:
00156                 --output; 
00157                 *output = (char)(input | FIRST_BYTE_MARK[*length]);
00158         }
00159 }
00160 
00161 // Return true if the next characters in the stream are any of the endTag sequences.
00162 // Ignore case only works for english, and should only be relied on when comparing
00163 // to English words: StringEqual( p, "version", true ) is fine.
00164 static bool StringEqual(        const char* p,
00165                                                 const char* endTag,
00166                                                 bool ignoreCase,
00167                                                 XmlEncoding encoding );
00168 
00169 const char* _xmlErrorStrings[ XmlNode::TIXML_ERROR_STRING_COUNT ] =
00170 {
00171         "No error",
00172         "Error",
00173         "Failed to open file",
00174         "Memory allocation failed.",
00175         "Error parsing Element.",
00176         "Failed to read Element name",
00177         "Error reading Element value.",
00178         "Error reading Attributes.",
00179         "Error: empty tag.",
00180         "Error reading end tag.",
00181         "Error parsing Unknown.",
00182         "Error parsing Comment.",
00183         "Error parsing Declaration.",
00184         "Error document empty.",
00185         "Error null (0) or unexpected EOF found in input stream.",
00186         "Error parsing CDATA.",
00187         "Error when TiXmlDocument added to document, because TiXmlDocument can only be at the root.",
00188 };
00189 
00190 // None of these methods are reliable for any language except English.
00191 // Good for approximation, not great for accuracy.
00192 static int IsAlpha( unsigned char anyByte, XmlEncoding encoding );
00193 
00194 static int IsAlphaNum( unsigned char anyByte, XmlEncoding encoding );
00195 
00196 inline static int ToLower( int v, XmlEncoding encoding )
00197 {
00198         if ( encoding == TIXML_ENCODING_UTF8 )
00199         {
00200                 if ( v < 128 ) 
00201                         return tolower( v );
00202                 return v;
00203         }
00204         else
00205         {
00206                 return tolower( v );
00207         }
00208 }
00209 
00210 void _XmlEncodeString( const String& str, StringBuffer& outString )
00211 {
00212         int i = 0;
00213 
00214         while( i < str.Length() )
00215         {
00216                 unsigned char c = (unsigned char) str[i];
00217 
00218                 if (    c == '&' 
00219                         && i < ( (int)str.Length() - 2 )
00220                         && str[i+1] == '#'
00221                         && str[i+2] == 'x' )
00222                 {
00223                         // Hexadecimal character reference.
00224                         // Pass through unchanged.
00225                         // &#xA9;       -- copyright symbol, for example.
00226                         //
00227                         // The -1 is a bug fix from Rob Laveaux. It keeps
00228                         // an overflow from happening if there is no ';'.
00229                         // There are actually 2 ways to exit this loop -
00230                         // while fails (error case) and break (semicolon found).
00231                         // However, there is no mechanism (currently) for
00232                         // this function to return an error.
00233                         while ( i < str.Length()-1 )
00234                         {
00235                                 outString.Append( str.GetChars() + i, 1 );
00236                                 ++i;
00237                                 if ( str[i] == ';' )
00238                                         break;
00239                         }
00240                 }
00241                 else if ( c == '&' )
00242                 {
00243                         outString.Append( entity[0].str, entity[0].strLength );
00244                         ++i;
00245                 }
00246                 else if ( c == '<' )
00247                 {
00248                         outString.Append( entity[1].str, entity[1].strLength );
00249                         ++i;
00250                 }
00251                 else if ( c == '>' )
00252                 {
00253                         outString.Append( entity[2].str, entity[2].strLength );
00254                         ++i;
00255                 }
00256                 else if ( c == '\"' )
00257                 {
00258                         outString.Append( entity[3].str, entity[3].strLength );
00259                         ++i;
00260                 }
00261                 else if ( c == '\'' )
00262                 {
00263                         outString.Append( entity[4].str, entity[4].strLength );
00264                         ++i;
00265                 }
00266                 else if ( c < 32 )
00267                 {
00268                         // Easy pass at non-alpha/numeric/symbol
00269                         // Below 32 is symbolic.
00270                         char buf[ 32 ];
00271 
00272 #if defined(TIXML_SNPRINTF)             
00273                         TIXML_SNPRINTF( buf, sizeof(buf), "&#x%02X;", (unsigned) ( c & 0xff ) );
00274 #else
00275                         sprintf( buf, "&#x%02X;", (unsigned) ( c & 0xff ) );
00276 #endif          
00277 
00278                         //*ME:  warning C4267: convert 'size_t' to 'int'
00279                         //*ME:  Int-Cast to make compiler happy ...
00280                         outString.Append( buf, (int)strlen( buf ) );
00281                         ++i;
00282                 }
00283                 else
00284                 {
00285                         //char realc = (char) c;
00286                         //outString->append( &realc, 1 );
00287                         outString.Append((char) c);     // somewhat more efficient function call.
00288                         ++i;
00289                 }
00290         }
00291 }
00292 
00293 static const char* SkipWhiteSpace( const char*, XmlEncoding encoding );
00294 
00295 inline static bool IsWhiteSpace( char c )               
00296 { 
00297         return ( isspace( (unsigned char) c ) || c == '\n' || c == '\r' ); 
00298 }
00299 
00300 inline static bool IsWhiteSpace( int c )
00301 {
00302         if ( c < 256 )
00303                 return IsWhiteSpace( (char) c );
00304         return false;   // Again, only truly correct for English/Latin...but usually works.
00305 }
00306 
00307 /*      Reads an XML name into the string provided. Returns
00308 a pointer just past the last character of the name,
00309 or 0 if the function has an error.
00310 */
00311 static const char* ReadName( const char* p, String* name, XmlEncoding encoding );
00312 
00313 /*      Reads text. Returns a pointer past the given end tag.
00314 Wickedly complex options, but it keeps the (sensitive) code in one place.
00315 */
00316 static const char* ReadText(    const char* in,                         // where to start
00317                                                         StringBuffer& text,                     // the string read
00318                                                         bool ignoreWhiteSpace,          // whether to keep the white space
00319                                                         const char* endTag,                     // what ends this text
00320                                                         bool ignoreCase,                        // whether to ignore case in the end tag
00321                                                         XmlEncoding encoding ); // the current encoding
00322 
00323 // If an entity has been found, transform it into a character.
00324 static const char* GetEntity( const char* in, char* value, int* length, XmlEncoding encoding );
00325 
00326 // Get a character, while interpreting entities.
00327 // The length can be from 0 to 4 bytes.
00328 inline static const char* GetChar( const char* p, char* _value, int* length, XmlEncoding encoding )
00329 {
00330         ASSERT( NULL != p );
00331         if ( encoding == TIXML_ENCODING_UTF8 )
00332         {
00333                 *length = utf8ByteTable[ *((const unsigned char*)p) ];
00334                 assert( *length >= 0 && *length < 5 );
00335         }
00336         else
00337         {
00338                 *length = 1;
00339         }
00340 
00341         if ( *length == 1 )
00342         {
00343                 if ( *p == '&' )
00344                         return GetEntity( p, _value, length, encoding );
00345                 *_value = *p;
00346                 return p+1;
00347         }
00348         else if ( *length )
00349         {
00350                 //strncpy( _value, p, *length );        // lots of compilers don't like this function (unsafe),
00351                 // and the null terminator isn't needed
00352                 for( int i=0; p[i] && i<*length; ++i ) {
00353                         _value[i] = p[i];
00354                 }
00355                 return p + (*length);
00356         }
00357         else
00358         {
00359                 // Not valid text.
00360                 return 0;
00361         }
00362 }
00363 
00364 /*static*/ int IsAlpha( unsigned char anyByte, XmlEncoding /*encoding*/ )
00365 {
00366         // This will only work for low-ascii, everything else is assumed to be a valid
00367         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00368         // to figure out alhabetical vs. not across encoding. So take a very 
00369         // conservative approach.
00370 
00371         //      if ( encoding == TIXML_ENCODING_UTF8 )
00372         //      {
00373         if ( anyByte < 127 )
00374                 return isalpha( anyByte );
00375         else
00376                 return 1;       // What else to do? The unicode set is huge...get the english ones right.
00377         //      }
00378         //      else
00379         //      {
00380         //              return isalpha( anyByte );
00381         //      }
00382 }
00383 
00384 
00385 /*static*/ int IsAlphaNum( unsigned char anyByte, XmlEncoding /*encoding*/ )
00386 {
00387         // This will only work for low-ascii, everything else is assumed to be a valid
00388         // letter. I'm not sure this is the best approach, but it is quite tricky trying
00389         // to figure out alhabetical vs. not across encoding. So take a very 
00390         // conservative approach.
00391 
00392         //      if ( encoding == TIXML_ENCODING_UTF8 )
00393         //      {
00394         if ( anyByte < 127 )
00395                 return isalnum( anyByte );
00396         else
00397                 return 1;       // What else to do? The unicode set is huge...get the english ones right.
00398         //      }
00399         //      else
00400         //      {
00401         //              return isalnum( anyByte );
00402         //      }
00403 }
00404 
00405 void XmlParsingData::Stamp( const char* now, XmlEncoding encoding )
00406 {
00407         assert( now );
00408 
00409         // Do nothing if the tabsize is 0.
00410         if ( tabsize < 1 )
00411         {
00412                 return;
00413         }
00414 
00415         // Get the current row, column.
00416         int row = cursor.row;
00417         int col = cursor.col;
00418         const char* p = stamp;
00419         assert( p );
00420 
00421         while ( p < now )
00422         {
00423                 // Treat p as unsigned, so we have a happy compiler.
00424                 const unsigned char* pU = (const unsigned char*)p;
00425 
00426                 // Code contributed by Fletcher Dunn: (modified by lee)
00427                 switch (*pU) {
00428 case 0:
00429         // We *should* never get here, but in case we do, don't
00430         // advance past the terminating null character, ever
00431         return;
00432 
00433 case '\r':
00434         // bump down to the next line
00435         ++row;
00436         col = 0;                                
00437         // Eat the character
00438         ++p;
00439 
00440         // Check for \r\n sequence, and treat this as a single character
00441         if (*p == '\n') 
00442         {
00443                 ++p;
00444         }
00445         break;
00446 
00447 case '\n':
00448         // bump down to the next line
00449         ++row;
00450         col = 0;
00451 
00452         // Eat the character
00453         ++p;
00454 
00455         // Check for \n\r sequence, and treat this as a single
00456         // character.  (Yes, this bizarre thing does occur still
00457         // on some arcane platforms...)
00458         if (*p == '\r') 
00459         {
00460                 ++p;
00461         }
00462         break;
00463 
00464 case '\t':
00465         // Eat the character
00466         ++p;
00467 
00468         // Skip to next tab stop
00469         col = (col / tabsize + 1) * tabsize;
00470         break;
00471 
00472 case TIXML_UTF_LEAD_0:
00473         if ( encoding == TIXML_ENCODING_UTF8 )
00474         {
00475                 if ( *(p+1) && *(p+2) )
00476                 {
00477                         // In these cases, don't advance the column. These are
00478                         // 0-width spaces.
00479                         if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00480                                 p += 3; 
00481                         else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00482                                 p += 3; 
00483                         else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00484                                 p += 3; 
00485                         else
00486                         { p +=3; ++col; }       // A normal character.
00487                 }
00488         }
00489         else
00490         {
00491                 ++p;
00492                 ++col;
00493         }
00494         break;
00495 
00496 default:
00497         if ( encoding == TIXML_ENCODING_UTF8 )
00498         {
00499                 // Eat the 1 to 4 byte utf8 character.
00500                 int step = utf8ByteTable[*((const unsigned char*)p)];
00501                 if ( step == 0 )
00502                         step = 1;               // Error case from bad encoding, but handle gracefully.
00503                 p += step;
00504 
00505                 // Just advance one column, of course.
00506                 ++col;
00507         }
00508         else
00509         {
00510                 ++p;
00511                 ++col;
00512         }
00513         break;
00514                 }
00515         }
00516         cursor.row = row;
00517         cursor.col = col;
00518         assert( cursor.row >= -1 );
00519         assert( cursor.col >= -1 );
00520         stamp = p;
00521         assert( stamp );
00522 }
00523 
00524 
00525 const char* SkipWhiteSpace( const char* p, XmlEncoding encoding )
00526 {
00527         if ( !p || !*p )
00528         {
00529                 return 0;
00530         }
00531         if ( encoding == TIXML_ENCODING_UTF8 )
00532         {
00533                 while ( *p )
00534                 {
00535                         const unsigned char* pU = (const unsigned char*)p;
00536 
00537                         // Skip the stupid Microsoft UTF-8 Byte order marks
00538                         if (    *(pU+0)==TIXML_UTF_LEAD_0
00539                                 && *(pU+1)==TIXML_UTF_LEAD_1 
00540                                 && *(pU+2)==TIXML_UTF_LEAD_2 )
00541                         {
00542                                 p += 3;
00543                                 continue;
00544                         }
00545                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00546                                 && *(pU+1)==0xbfU
00547                                 && *(pU+2)==0xbeU )
00548                         {
00549                                 p += 3;
00550                                 continue;
00551                         }
00552                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00553                                 && *(pU+1)==0xbfU
00554                                 && *(pU+2)==0xbfU )
00555                         {
00556                                 p += 3;
00557                                 continue;
00558                         }
00559 
00560                         if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )            // Still using old rules for white space.
00561                                 ++p;
00562                         else
00563                                 break;
00564                 }
00565         }
00566         else
00567         {
00568                 while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
00569                         ++p;
00570         }
00571 
00572         return p;
00573 }
00574 
00575 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
00576 // "assign" optimization removes over 10% of the execution time.
00577 //
00578 const char* ReadName( const char* p, String * name, XmlEncoding encoding )
00579 {
00580         ASSERT(NULL != p);
00581         name->Set("");
00582 
00583         // Names start with letters or underscores.
00584         // Of course, in unicode, tinyxml has no idea what a letter *is*. The
00585         // algorithm is generous.
00586         //
00587         // After that, they can be letters, underscores, numbers,
00588         // hyphens, or colons. (Colons are valid ony for namespaces,
00589         // but tinyxml can't tell namespaces from names.)
00590         if ( NULL != p && *p && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00591         {
00592                 int len = 0;
00593                 const char *start = p;
00594                 while( p && *p  &&      
00595                         (               
00596                         IsAlphaNum( (unsigned char ) *p, encoding ) 
00597                         || *p == '_'
00598                         || *p == '-'
00599                         || *p == '.'
00600                         || *p == ':' ) 
00601                         )
00602                 {
00603                         ++p;
00604                         len++;
00605                 }
00606                 if ( len > 0 ) 
00607                 {
00608                         name->Set( String(start, len) );
00609                 }
00610                 return p;
00611         }
00612         return NULL;
00613 }
00614 
00615 const char* GetEntity( const char* p, char* value, int* length, XmlEncoding encoding )
00616 {
00617         // Presume an entity, and pull it out.
00618         String ent;
00619         int i;
00620         *length = 0;
00621 
00622         if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00623         {
00624                 unsigned long ucs = 0;
00625                 ptrdiff_t delta = 0;
00626                 unsigned mult = 1;
00627 
00628                 if ( *(p+2) == 'x' )
00629                 {
00630                         // Hexadecimal.
00631                         if ( !*(p+3) ) 
00632                                 return 0;
00633 
00634                         const char* q = p+3;
00635                         q = strchr( q, ';' );
00636 
00637                         if ( !q || !*q ) 
00638                                 return 0;
00639 
00640                         delta = q-p;
00641                         --q;
00642 
00643                         while ( *q != 'x' )
00644                         {
00645                                 if ( *q >= '0' && *q <= '9' )
00646                                         ucs += mult * (*q - '0');
00647                                 else if ( *q >= 'a' && *q <= 'f' )
00648                                         ucs += mult * (*q - 'a' + 10);
00649                                 else if ( *q >= 'A' && *q <= 'F' )
00650                                         ucs += mult * (*q - 'A' + 10 );
00651                                 else 
00652                                         return 0;
00653                                 mult *= 16;
00654                                 --q;
00655                         }
00656                 }
00657                 else
00658                 {
00659                         // Decimal.
00660                         if ( !*(p+2) ) 
00661                                 return 0;
00662 
00663                         const char* q = p+2;
00664                         q = strchr( q, ';' );
00665 
00666                         if ( !q || !*q ) 
00667                                 return 0;
00668 
00669                         delta = q-p;
00670                         --q;
00671 
00672                         while ( *q != '#' )
00673                         {
00674                                 if ( *q >= '0' && *q <= '9' )
00675                                         ucs += mult * (*q - '0');
00676                                 else 
00677                                         return 0;
00678                                 mult *= 10;
00679                                 --q;
00680                         }
00681                 }
00682                 if ( encoding == TIXML_ENCODING_UTF8 )
00683                 {
00684                         // convert the UCS to UTF-8
00685                         ConvertUTF32ToUTF8( ucs, value, length );
00686                 }
00687                 else
00688                 {
00689                         *value = (char)ucs;
00690                         *length = 1;
00691                 }
00692                 return p + delta + 1;
00693         }
00694 
00695         // Now try to match it.
00696         for( i=0; i<NUM_ENTITY; ++i )
00697         {
00698                 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00699                 {
00700                         assert( strlen( entity[i].str ) == entity[i].strLength );
00701                         *value = entity[i].chr;
00702                         *length = 1;
00703                         return ( p + entity[i].strLength );
00704                 }
00705         }
00706 
00707         // So it wasn't an entity, its unrecognized, or something like that.
00708         *value = *p;    // Don't put back the last one, since we return it!
00709         //*length = 1;  // Leave unrecognized entities - this doesn't really work.
00710         // Just writes strange XML.
00711         return p+1;
00712 }
00713 
00714 
00715 bool StringEqual( const char* p,
00716                                  const char* tag,
00717                                  bool ignoreCase,
00718                                  XmlEncoding encoding )
00719 {
00720         assert( p );
00721         assert( tag );
00722         if ( !p || !*p )
00723         {
00724                 assert( 0 );
00725                 return false;
00726         }
00727 
00728         const char* q = p;
00729 
00730         if ( ignoreCase )
00731         {
00732                 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00733                 {
00734                         ++q;
00735                         ++tag;
00736                 }
00737 
00738                 if ( *tag == 0 )
00739                         return true;
00740         }
00741         else
00742         {
00743                 while ( *q && *tag && *q == *tag )
00744                 {
00745                         ++q;
00746                         ++tag;
00747                 }
00748 
00749                 if ( *tag == 0 )                // Have we found the end of the tag, and everything equal?
00750                         return true;
00751         }
00752         return false;
00753 }
00754 
00755 const char* ReadText(   const char* p, 
00756                                          StringBuffer& text, 
00757                                          bool trimWhiteSpace, 
00758                                          const char* endTag, 
00759                                          bool caseInsensitive,
00760                                          XmlEncoding encoding )
00761 {
00762         text.SetLength(0);
00763 
00764         if (    !trimWhiteSpace                 // certain tags always keep whitespace
00765                 || !XmlDocument::IsWhiteSpaceCondensed() )      // if true, whitespace is always kept
00766         {
00767                 // Keep all the white space.
00768                 while (    p && *p
00769                         && !StringEqual( p, endTag, caseInsensitive, encoding )
00770                         )
00771                 {
00772                         int len;
00773                         char cArr[4] = { 0, 0, 0, 0 };
00774                         p = GetChar( p, cArr, &len, encoding );
00775                         text.Append( cArr, len );
00776                 }
00777         }
00778         else
00779         {
00780                 bool whitespace = false;
00781 
00782                 // Remove leading white space:
00783                 p = SkipWhiteSpace( p, encoding );
00784                 while (    p && *p
00785                         && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00786                 {
00787                         if ( *p == '\r' || *p == '\n' )
00788                         {
00789                                 whitespace = true;
00790                                 ++p;
00791                         }
00792                         else if ( IsWhiteSpace( *p ) )
00793                         {
00794                                 whitespace = true;
00795                                 ++p;
00796                         }
00797                         else
00798                         {
00799                                 // If we've found whitespace, add it before the
00800                                 // new character. Any whitespace just becomes a space.
00801                                 if ( whitespace )
00802                                 {
00803                                         text.Append(' ');
00804                                         whitespace = false;
00805                                 }
00806                                 int len;
00807                                 char cArr[4] = { 0, 0, 0, 0 };
00808                                 p = GetChar( p, cArr, &len, encoding );
00809                                 if ( len == 1 )
00810                                         text.Append(cArr[0]);   
00811                                 else
00812                                         text.Append( cArr, len );
00813                         }
00814                 }
00815         }
00816         if ( p ) 
00817                 p += strlen( endTag );
00818         return p;
00819 }
00820 
00821 const char *XmlDocument::_Parse( const char *p, XmlParsingData* prevData, XmlEncoding encoding )
00822 {
00823         if (NULL != prevData)
00824         {
00825                 m_condenseWhiteSpace = prevData->condenseWs;
00826                 m_tabsize = prevData->tabsize;
00827         }
00828 
00829         // Parse away, at the document level. Since a document
00830         // contains nothing but other tags, most of what happens
00831         // here is skipping white space.
00832         if ( NULL == p || '\0' == p[0] )
00833         {
00834                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_DOCUMENT_EMPTY], 0, 0 );
00835         }
00836 
00837         // Note that, for a document, this needs to come
00838         // before the while space skip, so that parsing
00839         // starts from the pointer we are given.
00840         m_location.Clear();
00841         if ( prevData )
00842         {
00843                 m_location.row = prevData->Cursor().row;
00844                 m_location.col = prevData->Cursor().col;
00845         }
00846         else
00847         {
00848                 m_location.row = 0;
00849                 m_location.col = 0;
00850         }
00851         XmlParsingData data( p, TabSize(), m_location.row, m_location.col, m_condenseWhiteSpace);
00852         m_location = data.Cursor();
00853 
00854         if ( encoding == TIXML_ENCODING_UNKNOWN )
00855         {
00856                 // Check for the Microsoft UTF-8 lead bytes.
00857                 const unsigned char* pU = (const unsigned char*)p;
00858                 if (    *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00859                         && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00860                         && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00861                 {
00862                         encoding = TIXML_ENCODING_UTF8;
00863                         m_useMicrosoftBOM = true;
00864                 }
00865         }
00866 
00867         p = SkipWhiteSpace( p, encoding );
00868         if ( !p )
00869         {
00870                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_DOCUMENT_EMPTY], m_location.row, m_location.col );
00871         }
00872 
00873         while ( p && *p )
00874         {
00875                 ValidateMem();
00876 
00877                 XmlNodePtr node = Identify( p, encoding );
00878                 if ( node.IsNotNull() )
00879                 {
00880                         p = node->_Parse( p, &data, encoding );
00881                         AppendChild( node );
00882                         ValidateMem();
00883                 }
00884                 else
00885                 {
00886                         break;
00887                 }
00888 
00889                 // Did we get encoding info?
00890                 if (encoding == TIXML_ENCODING_UNKNOWN && node->IsDeclaration() )
00891                 {
00892                         XmlDeclarationPtr dec = node->ToDeclaration();
00893                         const String& enc = dec->Encoding();
00894 
00895                         if ( enc.Length() == 0 )
00896                                 encoding = TIXML_ENCODING_UTF8;
00897                         else if ( StringEqual( enc.GetChars(), "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00898                                 encoding = TIXML_ENCODING_UTF8;
00899                         else if ( StringEqual( enc.GetChars(), "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00900                                 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
00901                         else 
00902                                 encoding = TIXML_ENCODING_LEGACY;
00903                 }
00904 
00905                 p = SkipWhiteSpace( p, encoding );
00906         }
00907 
00908         // Was this empty?
00909         if ( m_firstChild.IsNull() ) 
00910         {
00911                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_DOCUMENT_EMPTY], 0, 0 );
00912         }
00913 
00914         // All is well.
00915         return p;
00916 }
00917 
00918 //void XmlDocument::SetError( int err, const char* pError, XmlParsingData* data, XmlEncoding encoding )
00919 //{     
00920 //      // The first error in a chain is more accurate - don't set again!
00921 //      if ( m_error )
00922 //              return;
00923 //
00924 //      assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00925 //      m_error   = true;
00926 //      m_errorId = err;
00927 //      m_errorDesc = errorString[ m_errorId ];
00928 //
00929 //      m_errorLocation.Clear();
00930 //      if ( pError && data )
00931 //      {
00932 //              data->Stamp( pError, encoding );
00933 //              m_errorLocation = data->Cursor();
00934 //      }
00935 //}
00936 //
00937 static const char* _xmlHeader = { "<?xml" };
00938 static const char* _commentHeader = { "<!--" };
00939 static const char* _dtdHeader = { "<!" };
00940 static const char* _cdataHeader = { "<![CDATA[" };
00941 
00942 XmlNodePtr XmlNode::Identify( const char* p, XmlEncoding encoding )
00943 {
00944         XmlNodePtr returnNode;
00945 
00946         p = SkipWhiteSpace( p, encoding );
00947         if( !p || !*p || *p != '<' )
00948         {
00949                 return returnNode;
00950         }
00951 
00952         p = SkipWhiteSpace( p, encoding );
00953 
00954         if ( !p || !*p )
00955         {
00956                 return returnNode;
00957         }
00958 
00959         // What is this thing? 
00960         // - Elements start with a letter or underscore, but xml is reserved.
00961         // - Comments: <!--
00962         // - Decleration: <?xml
00963         // - Everthing else is unknown to tinyxml.
00964         //
00965 
00966         if ( StringEqual( p, _xmlHeader, true, encoding ) )
00967         {
00968 #ifdef DEBUG_PARSER
00969                 TIXML_LOG( "XML parsing Declaration\n" );
00970 #endif
00971                 returnNode = (XmlNodePtr)XmlDeclarationPtr(new XmlDeclaration());
00972         }
00973         else if ( StringEqual( p, _commentHeader, false, encoding ) )
00974         {
00975 #ifdef DEBUG_PARSER
00976                 TIXML_LOG( "XML parsing Comment\n" );
00977 #endif
00978                 returnNode = (XmlNodePtr)XmlCommentPtr(new XmlComment());
00979         }
00980         else if ( StringEqual( p, _cdataHeader, false, encoding ) )
00981         {
00982 #ifdef DEBUG_PARSER
00983                 TIXML_LOG( "XML parsing CDATA\n" );
00984 #endif
00985                 XmlTextPtr text = XmlTextPtr(new XmlText( "" ));
00986                 text->SetCDATA( true );
00987                 returnNode = (XmlNodePtr)text;
00988         }
00989         else if ( StringEqual( p, _dtdHeader, false, encoding ) )
00990         {
00991 #ifdef DEBUG_PARSER
00992                 TIXML_LOG( "XML parsing Unknown(1)\n" );
00993 #endif
00994                 returnNode = (XmlNodePtr)XmlUnknownPtr(new XmlUnknown());
00995         }
00996         else if (    IsAlpha( *(p+1), encoding )
00997                 || *(p+1) == '_' )
00998         {
00999 #ifdef DEBUG_PARSER
01000                 TIXML_LOG( "XML parsing Element\n" );
01001 #endif
01002                 returnNode = (XmlNodePtr)XmlElementPtr(new XmlElement( "" ));
01003         }
01004         else
01005         {
01006 #ifdef DEBUG_PARSER
01007                 TIXML_LOG( "XML parsing Unknown(2)\n" );
01008 #endif
01009                 returnNode = (XmlNodePtr)XmlUnknownPtr(new XmlUnknown());
01010         }
01011 
01012         if ( returnNode.IsNotNull() )
01013         {
01014                 // Set the parent, so it can report errors
01015                 returnNode->m_parent = this;
01016 
01017                 returnNode->m_self = returnNode;
01018         }
01019         return returnNode;
01020 }
01021 
01022 const char* XmlElement::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01023 {
01024         p = SkipWhiteSpace( p, encoding );
01025 
01026         if ( !p || !*p )
01027         {
01028                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_ELEMENT], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01029         }
01030 
01031         if ( data )
01032         {
01033                 data->Stamp( p, encoding );
01034                 m_location = data->Cursor();
01035         }
01036 
01037         if ( *p != '<' )
01038         {
01039                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_ELEMENT], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01040         }
01041 
01042         p = SkipWhiteSpace( p+1, encoding );
01043 
01044         // Read the name.
01045         const char* pErr = p;
01046 
01047         p = ReadName( p, &m_name, encoding );
01048         if ( !p || !*p )
01049         {
01050                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01051         }
01052 
01053         StringBuffer endTag ("</");
01054         endTag.Append( m_name );
01055         endTag.Append( ">" );
01056 
01057         // Check for and read attributes. Also look for an empty
01058         // tag or an end tag.
01059         while ( p && *p )
01060         {
01061                 pErr = p;
01062                 p = SkipWhiteSpace( p, encoding );
01063                 if ( !p || !*p )
01064                 {
01065                         throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01066                 }
01067                 if ( *p == '/' )
01068                 {
01069                         ++p;
01070                         // Empty tag.
01071                         if ( *p  != '>' )
01072                         {
01073                                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_EMPTY], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01074                         }
01075                         return (p+1);
01076                 }
01077                 else if ( *p == '>' )
01078                 {
01079                         // Done with attributes (if there were any.)
01080                         // Read the value -- which can include other
01081                         // elements -- read the end tag, and return.
01082                         ++p;
01083                         p = ReadValue( p, data, encoding );             // Note this is an Element method, and will set the error if one happens.
01084                         if ( !p || !*p ) 
01085                         {
01086                                 // We were looking for the end tag, but found nothing.
01087                                 // Fix for [ 1663758 ] Failure to report error on bad XML
01088                                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_END_TAG], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01089                         }
01090 
01091                         // We should find the end tag now
01092                         if ( StringEqual( p, endTag.GetChars(), false, encoding ) )
01093                         {
01094                                 p += endTag.Length();
01095                                 return p;
01096                         }
01097                         else
01098                         {
01099                                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_END_TAG], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01100                         }
01101                 }
01102                 else
01103                 {
01104                         // Try to read an attribute:
01105                         XmlAttributePtr attrib = XmlAttributePtr(new XmlAttribute());
01106                         XmlDocumentPtr document = GetDocument();
01107                         attrib->SetDocument( document );
01108                         pErr = p;
01109                         p = attrib->_Parse( p, data, encoding );
01110 
01111                         if ( !p || !*p )
01112                         {
01113                                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_ELEMENT], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01114                         }
01115 
01116                         if ( m_attribs.IsNull() )
01117                         {
01118                                 m_attribs = XmlAttributeCollectionPtr(new XmlAttributeCollection());
01119                         }
01120 
01121                         // Handle the strange case of double attributes:
01122                         XmlAttributePtr node = m_attribs->Find( attrib->Name() );
01123                         if ( node.IsNotNull() )
01124                         {
01125                                 node.ValidateMem();
01126                                 node->SetValue( *attrib->Value() );
01127                                 return NULL;
01128                         }
01129 
01130                         m_attribs->Add( attrib );
01131                 }
01132         }
01133         return p;
01134 }
01135 
01136 const char* XmlElement::ReadValue( const char* p, XmlParsingData* data, XmlEncoding encoding )
01137 {
01138         // Read in text and elements in any order.
01139         const char* pWithWhiteSpace = p;
01140         p = SkipWhiteSpace( p, encoding );
01141 
01142         while ( p && *p )
01143         {
01144                 if ( *p != '<' )
01145                 {
01146                         // Take what we have, make a text element.
01147                         XmlTextPtr textNode = XmlTextPtr(new XmlText( "" ));
01148                         XmlNodePtr xtextNode = (XmlNodePtr)textNode;
01149                         textNode->m_self = xtextNode;
01150 
01151                         if ( XmlDocument::IsWhiteSpaceCondensed() )
01152                         {
01153                                 p = textNode->_Parse( p, data, encoding );
01154                         }
01155                         else
01156                         {
01157                                 // Special case: we want to keep the white space
01158                                 // so that leading spaces aren't removed.
01159                                 p = textNode->_Parse( pWithWhiteSpace, data, encoding );
01160                         }
01161 
01162                         if ( !textNode->IsBlank() )
01163                         {
01164                                 AppendChild( textNode );
01165                         }
01166                         else
01167                         {
01168                         }
01169                 } 
01170                 else 
01171                 {
01172                         // We hit a '<'
01173                         // Have we hit a new element or an end tag? This could also be
01174                         // a TiXmlText in the "CDATA" style.
01175                         if ( StringEqual( p, "</", false, encoding ) )
01176                         {
01177                                 return p;
01178                         }
01179                         else
01180                         {
01181                                 XmlNodePtr node = Identify( p, encoding );
01182                                 if ( node.IsNotNull() )
01183                                 {
01184                                         p = node->_Parse( p, data, encoding );
01185                                         AppendChild( node );
01186                                 }                               
01187                                 else
01188                                 {
01189                                         return NULL;
01190                                 }
01191                         }
01192                 }
01193                 pWithWhiteSpace = p;
01194                 p = SkipWhiteSpace( p, encoding );
01195         }
01196 
01197         if ( !p )
01198         {
01199                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ELEMENT_VALUE], 0, 0 );
01200         }       
01201         return p;
01202 }
01203 
01204 const char* XmlUnknown::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01205 {
01206         p = SkipWhiteSpace( p, encoding );
01207 
01208         if ( data )
01209         {
01210                 data->Stamp( p, encoding );
01211                 m_location = data->Cursor();
01212         }
01213         if ( !p || !*p || *p != '<' )
01214         {
01215                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_UNKNOWN], data->Cursor().row, data->Cursor().col );
01216         }
01217 
01218         ++p;
01219         m_value = "";
01220 
01221         while ( p && *p && *p != '>' )
01222         {
01223                 m_value.Append( *p );
01224                 ++p;
01225         }
01226 
01227         if ( !p )
01228         {
01229                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_UNKNOWN], data->Cursor().row, data->Cursor().col );
01230         }
01231         if ( *p == '>' )
01232                 return p+1;
01233         return p;
01234 }
01235 
01236 const char* XmlComment::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01237 {
01238         m_value = "";
01239 
01240         p = SkipWhiteSpace( p, encoding );
01241 
01242         if ( data )
01243         {
01244                 data->Stamp( p, encoding );
01245                 m_location = data->Cursor();
01246         }
01247         const char* startTag = "<!--";
01248         const char* endTag   = "-->";
01249 
01250         if ( !StringEqual( p, startTag, false, encoding ) )
01251         {
01252                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_COMMENT], data->Cursor().row, data->Cursor().col );
01253         }
01254         p += strlen( startTag );
01255 
01256         // [ 1475201 ] TinyXML parses entities in comments
01257         // Oops - ReadText doesn't work, because we don't want to parse the entities.
01258         // p = ReadText( p, &value, false, endTag, false, encoding );
01259         //
01260         // from the XML spec:
01261         /*
01262         [Definition: Comments may appear anywhere in a document outside other markup; in addition, 
01263         they may appear within the document type declaration at places allowed by the grammar. 
01264         They are not part of the document's character data; an XML processor MAY, but need not, 
01265         make it possible for an application to retrieve the text of comments. For compatibility, 
01266         the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity 
01267         references MUST NOT be recognized within comments.
01268 
01269         An example of a comment:
01270 
01271         <!-- declarations for <head> & <body> -->
01272         */
01273 
01274         m_value = "";
01275         // Keep all the white space.
01276         while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01277         {
01278                 m_value.Append( p, 1 );
01279                 ++p;
01280         }
01281         if ( p ) 
01282                 p += strlen( endTag );
01283 
01284         return p;
01285 }
01286 
01287 static const char SINGLE_QUOTE = '\'';
01288 static const char DOUBLE_QUOTE = '\"';
01289 
01290 const char* XmlAttribute::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01291 {
01292         ASSERT(NULL != p);
01293 
01294         p = SkipWhiteSpace( p, encoding );
01295 
01296         if ( !p || !*p ) 
01297                 return NULL;
01298 
01299         //      int tabsize = 4;
01300         //      if ( document )
01301         //              tabsize = document->TabSize();
01302 
01303         if ( data )
01304         {
01305                 data->Stamp( p, encoding );
01306                 m_location = data->Cursor();
01307         }
01308 
01309         // Read the name, the '=' and the value.
01310         const char* pErr = p;
01311         p = ReadName( p, &m_name, encoding );
01312         if ( !p || !*p )
01313         {
01314                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], data->Cursor().row, data->Cursor().col );
01315         }
01316 
01317         p = SkipWhiteSpace( p, encoding );
01318         if ( !p || !*p || *p != '=' )
01319         {
01320                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], data->Cursor().row, data->Cursor().col );
01321         }
01322 
01323         ++p;    // skip '='
01324         p = SkipWhiteSpace( p, encoding );
01325         if ( !p || !*p )
01326         {
01327                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], data->Cursor().row, data->Cursor().col );
01328         }
01329 
01330         const char* end;
01331 
01332         if ( *p == SINGLE_QUOTE )
01333         {
01334                 ++p;
01335                 end = "\'";             // single quote in string
01336                 p = ReadText( p, m_value, false, end, false, encoding );
01337         }
01338         else if ( *p == DOUBLE_QUOTE )
01339         {
01340                 ++p;
01341                 end = "\"";             // double quote in string
01342                 p = ReadText( p, m_value, false, end, false, encoding );
01343         }
01344         else
01345         {
01346                 // All attribute values should be in single or double quotes.
01347                 // But this is such a common error that the parser will try
01348                 // its best, even without them.
01349                 m_value = "";
01350                 while (    p && *p                                                                                      // existence
01351                         && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'      // whitespace
01352                         && *p != '/' && *p != '>' )                                                     // tag end
01353                 {
01354                         if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) 
01355                         {
01356                                 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
01357                                 // We did not have an opening quote but seem to have a 
01358                                 // closing one. Give up and throw an error.
01359                                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], data->Cursor().row, data->Cursor().col );
01360                         }
01361                         m_value.Append( *p );
01362                         ++p;
01363                 }
01364         }
01365         return p;
01366 }
01367 
01368 const char* XmlText::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01369 {
01370         m_value = "";
01371 
01372         if ( data )
01373         {
01374                 data->Stamp( p, encoding );
01375                 m_location = data->Cursor();
01376         }
01377 
01378         const char* const startTag = "<![CDATA[";
01379         const char* const endTag   = "]]>";
01380 
01381         if ( m_cdata || StringEqual( p, startTag, false, encoding ) )
01382         {
01383                 m_cdata = true;
01384 
01385                 if ( !StringEqual( p, startTag, false, encoding ) )
01386                 {
01387                         throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_CDATA], data->Cursor().row, data->Cursor().col );
01388                 }
01389                 p += strlen( startTag );
01390 
01391                 // Keep all the white space, ignore the encoding, etc.
01392                 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01393                 {
01394                         m_value.Append( *p );
01395                         ++p;
01396                 }
01397 
01398                 StringBuffer dummy;
01399                 p = ReadText( p, dummy, false, endTag, false, encoding );
01400                 return p;
01401         }
01402         else
01403         {
01404                 bool ignoreWhite = true;
01405 
01406                 const char* end = "<";
01407                 p = ReadText( p, m_value, ignoreWhite, end, false, encoding );
01408                 if ( p )
01409                         return p-1;     // don't truncate the '<'
01410                 return 0;
01411         }
01412 }
01413 
01414 const char* XmlDeclaration::_Parse( const char* p, XmlParsingData* data, XmlEncoding _encoding )
01415 {
01416         p = SkipWhiteSpace( p, _encoding );
01417         // Find the beginning, find the end, and look for
01418         // the stuff in-between.
01419 
01420         if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01421         {
01422                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_DECLARATION], data->Cursor().row, data->Cursor().col );
01423         }
01424         if ( data )
01425         {
01426                 data->Stamp( p, _encoding );
01427                 m_location = data->Cursor();
01428         }
01429         p += 5;
01430 
01431         version = "";
01432         encoding = "";
01433         standalone = "";
01434 
01435         while ( p && *p )
01436         {
01437                 if ( *p == '>' )
01438                 {
01439                         ++p;
01440                         return p;
01441                 }
01442 
01443                 p = SkipWhiteSpace( p, _encoding );
01444                 if ( StringEqual( p, "version", true, _encoding ) )
01445                 {
01446                         XmlAttribute attrib;
01447                         p = attrib._Parse( p, data, _encoding );                
01448                         version = *attrib.Value();
01449                 }
01450                 else if ( StringEqual( p, "encoding", true, _encoding ) )
01451                 {
01452                         XmlAttribute attrib;
01453                         p = attrib._Parse( p, data, _encoding );                
01454                         encoding = *attrib.Value();
01455                 }
01456                 else if ( StringEqual( p, "standalone", true, _encoding ) )
01457                 {
01458                         XmlAttribute attrib;
01459                         p = attrib._Parse( p, data, _encoding );                
01460                         standalone = *attrib.Value();
01461                 }
01462                 else
01463                 {
01464                         // Read over whatever it is.
01465                         while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01466                                 ++p;
01467                 }
01468                 ValidateMem();
01469         }
01470         return NULL;
01471 }
01472 
01473 bool XmlText::IsBlank() const
01474 {
01475         for ( int i=0; i<m_value.Length(); i++ )
01476         {
01477                 if ( !IsWhiteSpace( m_value[i] ) )
01478                 {
01479                         return false;
01480                 }
01481         }
01482         return true;
01483 }
01484