00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 #include <ctype.h>
00027 #include <stdio.h>
00028 #include <stddef.h>
00029 
00030 #include <spl/xml/XmlAttribute.h>
00031 #include <spl/xml/XmlComment.h>
00032 #include <spl/xml/XmlDeclaration.h>
00033 #include <spl/xml/XmlDocument.h>
00034 #include <spl/xml/XmlElement.h>
00035 #include <spl/xml/XmlNode.h>
00036 #include <spl/xml/XmlText.h>
00037 #include <spl/xml/XmlUnknown.h>
00038 
00039 #ifdef TIXML_SAFE
00040 #if defined(_MSC_VER) && (_MSC_VER >= 1400 )
00041 
00042 #define TIXML_SNPRINTF _snprintf_s
00043 #define TIXML_SNSCANF  _snscanf_s
00044 #define TIXML_SSCANF   sscanf_s
00045 #elif defined(_MSC_VER) && (_MSC_VER >= 1200 )
00046 
00047 
00048 #define TIXML_SNPRINTF _snprintf
00049 #define TIXML_SNSCANF  _snscanf
00050 #define TIXML_SSCANF   sscanf
00051 #elif defined(__GNUC__) && (__GNUC__ >= 3 )
00052 
00053 
00054 #define TIXML_SNPRINTF snprintf
00055 #define TIXML_SNSCANF  snscanf
00056 #define TIXML_SSCANF   sscanf
00057 #else
00058 #define TIXML_SSCANF   sscanf
00059 #endif
00060 #endif  
00061 
00062 struct Entity
00063 {
00064         const char*     str;
00065         unsigned int    strLength;
00066         char                chr;
00067 };
00068 
00069 #define NUM_ENTITY 5
00070 #define MAX_ENTITY_LENGTH 6
00071 
00072 
00073 
00074 
00075 static struct Entity entity[ NUM_ENTITY ] = 
00076 {
00077         { "&",  5, '&' },
00078         { "<",   4, '<' },
00079         { ">",   4, '>' },
00080         { """, 6, '\"' },
00081         { "'", 6, '\'' }
00082 };
00083 
00084 
00085 
00086 
00087 
00088 
00089 
00090 
00091 
00092 
00093 
00094 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00095 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00096 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00097 
00098 
00099 
00100 const int utf8ByteTable[256] = 
00101 {
00102         
00103         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00104         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00105         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00106         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00107         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00108         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00109         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00110         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00111         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00112         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00113         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00114         1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      
00115         1,      1,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      
00116         2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      
00117         3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      
00118         4,      4,      4,      4,      4,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1       
00119 };
00120 
00121 void ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00122 {
00123         const unsigned long BYTE_MASK = 0xBF;
00124         const unsigned long BYTE_MARK = 0x80;
00125         const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00126 
00127         if (input < 0x80) 
00128                 *length = 1;
00129         else if ( input < 0x800 )
00130                 *length = 2;
00131         else if ( input < 0x10000 )
00132                 *length = 3;
00133         else if ( input < 0x200000 )
00134                 *length = 4;
00135         else
00136         { *length = 0; return; }        
00137 
00138         output += *length;
00139 
00140         
00141         switch (*length) 
00142         {
00143         case 4:
00144                 --output; 
00145                 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00146                 input >>= 6;
00147         case 3:
00148                 --output; 
00149                 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00150                 input >>= 6;
00151         case 2:
00152                 --output; 
00153                 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00154                 input >>= 6;
00155         case 1:
00156                 --output; 
00157                 *output = (char)(input | FIRST_BYTE_MARK[*length]);
00158         }
00159 }
00160 
00161 
00162 
00163 
00164 static bool StringEqual(        const char* p,
00165                                                 const char* endTag,
00166                                                 bool ignoreCase,
00167                                                 XmlEncoding encoding );
00168 
00169 const char* _xmlErrorStrings[ XmlNode::TIXML_ERROR_STRING_COUNT ] =
00170 {
00171         "No error",
00172         "Error",
00173         "Failed to open file",
00174         "Memory allocation failed.",
00175         "Error parsing Element.",
00176         "Failed to read Element name",
00177         "Error reading Element value.",
00178         "Error reading Attributes.",
00179         "Error: empty tag.",
00180         "Error reading end tag.",
00181         "Error parsing Unknown.",
00182         "Error parsing Comment.",
00183         "Error parsing Declaration.",
00184         "Error document empty.",
00185         "Error null (0) or unexpected EOF found in input stream.",
00186         "Error parsing CDATA.",
00187         "Error when TiXmlDocument added to document, because TiXmlDocument can only be at the root.",
00188 };
00189 
00190 
00191 
00192 static int IsAlpha( unsigned char anyByte, XmlEncoding encoding );
00193 
00194 static int IsAlphaNum( unsigned char anyByte, XmlEncoding encoding );
00195 
00196 inline static int ToLower( int v, XmlEncoding encoding )
00197 {
00198         if ( encoding == TIXML_ENCODING_UTF8 )
00199         {
00200                 if ( v < 128 ) 
00201                         return tolower( v );
00202                 return v;
00203         }
00204         else
00205         {
00206                 return tolower( v );
00207         }
00208 }
00209 
00210 void _XmlEncodeString( const String& str, StringBuffer& outString )
00211 {
00212         int i = 0;
00213 
00214         while( i < str.Length() )
00215         {
00216                 unsigned char c = (unsigned char) str[i];
00217 
00218                 if (    c == '&' 
00219                         && i < ( (int)str.Length() - 2 )
00220                         && str[i+1] == '#'
00221                         && str[i+2] == 'x' )
00222                 {
00223                         
00224                         
00225                         
00226                         
00227                         
00228                         
00229                         
00230                         
00231                         
00232                         
00233                         while ( i < str.Length()-1 )
00234                         {
00235                                 outString.Append( str.GetChars() + i, 1 );
00236                                 ++i;
00237                                 if ( str[i] == ';' )
00238                                         break;
00239                         }
00240                 }
00241                 else if ( c == '&' )
00242                 {
00243                         outString.Append( entity[0].str, entity[0].strLength );
00244                         ++i;
00245                 }
00246                 else if ( c == '<' )
00247                 {
00248                         outString.Append( entity[1].str, entity[1].strLength );
00249                         ++i;
00250                 }
00251                 else if ( c == '>' )
00252                 {
00253                         outString.Append( entity[2].str, entity[2].strLength );
00254                         ++i;
00255                 }
00256                 else if ( c == '\"' )
00257                 {
00258                         outString.Append( entity[3].str, entity[3].strLength );
00259                         ++i;
00260                 }
00261                 else if ( c == '\'' )
00262                 {
00263                         outString.Append( entity[4].str, entity[4].strLength );
00264                         ++i;
00265                 }
00266                 else if ( c < 32 )
00267                 {
00268                         
00269                         
00270                         char buf[ 32 ];
00271 
00272 #if defined(TIXML_SNPRINTF)             
00273                         TIXML_SNPRINTF( buf, sizeof(buf), "&#x%02X;", (unsigned) ( c & 0xff ) );
00274 #else
00275                         sprintf( buf, "&#x%02X;", (unsigned) ( c & 0xff ) );
00276 #endif          
00277 
00278                         
00279                         
00280                         outString.Append( buf, (int)strlen( buf ) );
00281                         ++i;
00282                 }
00283                 else
00284                 {
00285                         
00286                         
00287                         outString.Append((char) c);     
00288                         ++i;
00289                 }
00290         }
00291 }
00292 
00293 static const char* SkipWhiteSpace( const char*, XmlEncoding encoding );
00294 
00295 inline static bool IsWhiteSpace( char c )               
00296 { 
00297         return ( isspace( (unsigned char) c ) || c == '\n' || c == '\r' ); 
00298 }
00299 
00300 inline static bool IsWhiteSpace( int c )
00301 {
00302         if ( c < 256 )
00303                 return IsWhiteSpace( (char) c );
00304         return false;   
00305 }
00306 
00307 
00308 
00309 
00310 
00311 static const char* ReadName( const char* p, String* name, XmlEncoding encoding );
00312 
00313 
00314 
00315 
00316 static const char* ReadText(    const char* in,                         
00317                                                         StringBuffer& text,                     
00318                                                         bool ignoreWhiteSpace,          
00319                                                         const char* endTag,                     
00320                                                         bool ignoreCase,                        
00321                                                         XmlEncoding encoding ); 
00322 
00323 
00324 static const char* GetEntity( const char* in, char* value, int* length, XmlEncoding encoding );
00325 
00326 
00327 
00328 inline static const char* GetChar( const char* p, char* _value, int* length, XmlEncoding encoding )
00329 {
00330         ASSERT( NULL != p );
00331         if ( encoding == TIXML_ENCODING_UTF8 )
00332         {
00333                 *length = utf8ByteTable[ *((const unsigned char*)p) ];
00334                 assert( *length >= 0 && *length < 5 );
00335         }
00336         else
00337         {
00338                 *length = 1;
00339         }
00340 
00341         if ( *length == 1 )
00342         {
00343                 if ( *p == '&' )
00344                         return GetEntity( p, _value, length, encoding );
00345                 *_value = *p;
00346                 return p+1;
00347         }
00348         else if ( *length )
00349         {
00350                 
00351                 
00352                 for( int i=0; p[i] && i<*length; ++i ) {
00353                         _value[i] = p[i];
00354                 }
00355                 return p + (*length);
00356         }
00357         else
00358         {
00359                 
00360                 return 0;
00361         }
00362 }
00363 
00364  int IsAlpha( unsigned char anyByte, XmlEncoding  )
00365 {
00366         
00367         
00368         
00369         
00370 
00371         
00372         
00373         if ( anyByte < 127 )
00374                 return isalpha( anyByte );
00375         else
00376                 return 1;       
00377         
00378         
00379         
00380         
00381         
00382 }
00383 
00384 
00385  int IsAlphaNum( unsigned char anyByte, XmlEncoding  )
00386 {
00387         
00388         
00389         
00390         
00391 
00392         
00393         
00394         if ( anyByte < 127 )
00395                 return isalnum( anyByte );
00396         else
00397                 return 1;       
00398         
00399         
00400         
00401         
00402         
00403 }
00404 
00405 void XmlParsingData::Stamp( const char* now, XmlEncoding encoding )
00406 {
00407         assert( now );
00408 
00409         
00410         if ( tabsize < 1 )
00411         {
00412                 return;
00413         }
00414 
00415         
00416         int row = cursor.row;
00417         int col = cursor.col;
00418         const char* p = stamp;
00419         assert( p );
00420 
00421         while ( p < now )
00422         {
00423                 
00424                 const unsigned char* pU = (const unsigned char*)p;
00425 
00426                 
00427                 switch (*pU) {
00428 case 0:
00429         
00430         
00431         return;
00432 
00433 case '\r':
00434         
00435         ++row;
00436         col = 0;                                
00437         
00438         ++p;
00439 
00440         
00441         if (*p == '\n') 
00442         {
00443                 ++p;
00444         }
00445         break;
00446 
00447 case '\n':
00448         
00449         ++row;
00450         col = 0;
00451 
00452         
00453         ++p;
00454 
00455         
00456         
00457         
00458         if (*p == '\r') 
00459         {
00460                 ++p;
00461         }
00462         break;
00463 
00464 case '\t':
00465         
00466         ++p;
00467 
00468         
00469         col = (col / tabsize + 1) * tabsize;
00470         break;
00471 
00472 case TIXML_UTF_LEAD_0:
00473         if ( encoding == TIXML_ENCODING_UTF8 )
00474         {
00475                 if ( *(p+1) && *(p+2) )
00476                 {
00477                         
00478                         
00479                         if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00480                                 p += 3; 
00481                         else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00482                                 p += 3; 
00483                         else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00484                                 p += 3; 
00485                         else
00486                         { p +=3; ++col; }       
00487                 }
00488         }
00489         else
00490         {
00491                 ++p;
00492                 ++col;
00493         }
00494         break;
00495 
00496 default:
00497         if ( encoding == TIXML_ENCODING_UTF8 )
00498         {
00499                 
00500                 int step = utf8ByteTable[*((const unsigned char*)p)];
00501                 if ( step == 0 )
00502                         step = 1;               
00503                 p += step;
00504 
00505                 
00506                 ++col;
00507         }
00508         else
00509         {
00510                 ++p;
00511                 ++col;
00512         }
00513         break;
00514                 }
00515         }
00516         cursor.row = row;
00517         cursor.col = col;
00518         assert( cursor.row >= -1 );
00519         assert( cursor.col >= -1 );
00520         stamp = p;
00521         assert( stamp );
00522 }
00523 
00524 
00525 const char* SkipWhiteSpace( const char* p, XmlEncoding encoding )
00526 {
00527         if ( !p || !*p )
00528         {
00529                 return 0;
00530         }
00531         if ( encoding == TIXML_ENCODING_UTF8 )
00532         {
00533                 while ( *p )
00534                 {
00535                         const unsigned char* pU = (const unsigned char*)p;
00536 
00537                         
00538                         if (    *(pU+0)==TIXML_UTF_LEAD_0
00539                                 && *(pU+1)==TIXML_UTF_LEAD_1 
00540                                 && *(pU+2)==TIXML_UTF_LEAD_2 )
00541                         {
00542                                 p += 3;
00543                                 continue;
00544                         }
00545                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00546                                 && *(pU+1)==0xbfU
00547                                 && *(pU+2)==0xbeU )
00548                         {
00549                                 p += 3;
00550                                 continue;
00551                         }
00552                         else if(*(pU+0)==TIXML_UTF_LEAD_0
00553                                 && *(pU+1)==0xbfU
00554                                 && *(pU+2)==0xbfU )
00555                         {
00556                                 p += 3;
00557                                 continue;
00558                         }
00559 
00560                         if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )            
00561                                 ++p;
00562                         else
00563                                 break;
00564                 }
00565         }
00566         else
00567         {
00568                 while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
00569                         ++p;
00570         }
00571 
00572         return p;
00573 }
00574 
00575 
00576 
00577 
00578 const char* ReadName( const char* p, String * name, XmlEncoding encoding )
00579 {
00580         ASSERT(NULL != p);
00581         name->Set("");
00582 
00583         
00584         
00585         
00586         
00587         
00588         
00589         
00590         if ( NULL != p && *p && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00591         {
00592                 int len = 0;
00593                 const char *start = p;
00594                 while( p && *p  &&      
00595                         (               
00596                         IsAlphaNum( (unsigned char ) *p, encoding ) 
00597                         || *p == '_'
00598                         || *p == '-'
00599                         || *p == '.'
00600                         || *p == ':' ) 
00601                         )
00602                 {
00603                         ++p;
00604                         len++;
00605                 }
00606                 if ( len > 0 ) 
00607                 {
00608                         name->Set( String(start, len) );
00609                 }
00610                 return p;
00611         }
00612         return NULL;
00613 }
00614 
00615 const char* GetEntity( const char* p, char* value, int* length, XmlEncoding encoding )
00616 {
00617         
00618         String ent;
00619         int i;
00620         *length = 0;
00621 
00622         if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00623         {
00624                 unsigned long ucs = 0;
00625                 ptrdiff_t delta = 0;
00626                 unsigned mult = 1;
00627 
00628                 if ( *(p+2) == 'x' )
00629                 {
00630                         
00631                         if ( !*(p+3) ) 
00632                                 return 0;
00633 
00634                         const char* q = p+3;
00635                         q = strchr( q, ';' );
00636 
00637                         if ( !q || !*q ) 
00638                                 return 0;
00639 
00640                         delta = q-p;
00641                         --q;
00642 
00643                         while ( *q != 'x' )
00644                         {
00645                                 if ( *q >= '0' && *q <= '9' )
00646                                         ucs += mult * (*q - '0');
00647                                 else if ( *q >= 'a' && *q <= 'f' )
00648                                         ucs += mult * (*q - 'a' + 10);
00649                                 else if ( *q >= 'A' && *q <= 'F' )
00650                                         ucs += mult * (*q - 'A' + 10 );
00651                                 else 
00652                                         return 0;
00653                                 mult *= 16;
00654                                 --q;
00655                         }
00656                 }
00657                 else
00658                 {
00659                         
00660                         if ( !*(p+2) ) 
00661                                 return 0;
00662 
00663                         const char* q = p+2;
00664                         q = strchr( q, ';' );
00665 
00666                         if ( !q || !*q ) 
00667                                 return 0;
00668 
00669                         delta = q-p;
00670                         --q;
00671 
00672                         while ( *q != '#' )
00673                         {
00674                                 if ( *q >= '0' && *q <= '9' )
00675                                         ucs += mult * (*q - '0');
00676                                 else 
00677                                         return 0;
00678                                 mult *= 10;
00679                                 --q;
00680                         }
00681                 }
00682                 if ( encoding == TIXML_ENCODING_UTF8 )
00683                 {
00684                         
00685                         ConvertUTF32ToUTF8( ucs, value, length );
00686                 }
00687                 else
00688                 {
00689                         *value = (char)ucs;
00690                         *length = 1;
00691                 }
00692                 return p + delta + 1;
00693         }
00694 
00695         
00696         for( i=0; i<NUM_ENTITY; ++i )
00697         {
00698                 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00699                 {
00700                         assert( strlen( entity[i].str ) == entity[i].strLength );
00701                         *value = entity[i].chr;
00702                         *length = 1;
00703                         return ( p + entity[i].strLength );
00704                 }
00705         }
00706 
00707         
00708         *value = *p;    
00709         
00710         
00711         return p+1;
00712 }
00713 
00714 
00715 bool StringEqual( const char* p,
00716                                  const char* tag,
00717                                  bool ignoreCase,
00718                                  XmlEncoding encoding )
00719 {
00720         assert( p );
00721         assert( tag );
00722         if ( !p || !*p )
00723         {
00724                 assert( 0 );
00725                 return false;
00726         }
00727 
00728         const char* q = p;
00729 
00730         if ( ignoreCase )
00731         {
00732                 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00733                 {
00734                         ++q;
00735                         ++tag;
00736                 }
00737 
00738                 if ( *tag == 0 )
00739                         return true;
00740         }
00741         else
00742         {
00743                 while ( *q && *tag && *q == *tag )
00744                 {
00745                         ++q;
00746                         ++tag;
00747                 }
00748 
00749                 if ( *tag == 0 )                
00750                         return true;
00751         }
00752         return false;
00753 }
00754 
00755 const char* ReadText(   const char* p, 
00756                                          StringBuffer& text, 
00757                                          bool trimWhiteSpace, 
00758                                          const char* endTag, 
00759                                          bool caseInsensitive,
00760                                          XmlEncoding encoding )
00761 {
00762         text.SetLength(0);
00763 
00764         if (    !trimWhiteSpace                 
00765                 || !XmlDocument::IsWhiteSpaceCondensed() )      
00766         {
00767                 
00768                 while (    p && *p
00769                         && !StringEqual( p, endTag, caseInsensitive, encoding )
00770                         )
00771                 {
00772                         int len;
00773                         char cArr[4] = { 0, 0, 0, 0 };
00774                         p = GetChar( p, cArr, &len, encoding );
00775                         text.Append( cArr, len );
00776                 }
00777         }
00778         else
00779         {
00780                 bool whitespace = false;
00781 
00782                 
00783                 p = SkipWhiteSpace( p, encoding );
00784                 while (    p && *p
00785                         && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00786                 {
00787                         if ( *p == '\r' || *p == '\n' )
00788                         {
00789                                 whitespace = true;
00790                                 ++p;
00791                         }
00792                         else if ( IsWhiteSpace( *p ) )
00793                         {
00794                                 whitespace = true;
00795                                 ++p;
00796                         }
00797                         else
00798                         {
00799                                 
00800                                 
00801                                 if ( whitespace )
00802                                 {
00803                                         text.Append(' ');
00804                                         whitespace = false;
00805                                 }
00806                                 int len;
00807                                 char cArr[4] = { 0, 0, 0, 0 };
00808                                 p = GetChar( p, cArr, &len, encoding );
00809                                 if ( len == 1 )
00810                                         text.Append(cArr[0]);   
00811                                 else
00812                                         text.Append( cArr, len );
00813                         }
00814                 }
00815         }
00816         if ( p ) 
00817                 p += strlen( endTag );
00818         return p;
00819 }
00820 
00821 const char *XmlDocument::_Parse( const char *p, XmlParsingData* prevData, XmlEncoding encoding )
00822 {
00823         if (NULL != prevData)
00824         {
00825                 m_condenseWhiteSpace = prevData->condenseWs;
00826                 m_tabsize = prevData->tabsize;
00827         }
00828 
00829         
00830         
00831         
00832         if ( NULL == p || '\0' == p[0] )
00833         {
00834                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_DOCUMENT_EMPTY], 0, 0 );
00835         }
00836 
00837         
00838         
00839         
00840         m_location.Clear();
00841         if ( prevData )
00842         {
00843                 m_location.row = prevData->Cursor().row;
00844                 m_location.col = prevData->Cursor().col;
00845         }
00846         else
00847         {
00848                 m_location.row = 0;
00849                 m_location.col = 0;
00850         }
00851         XmlParsingData data( p, TabSize(), m_location.row, m_location.col, m_condenseWhiteSpace);
00852         m_location = data.Cursor();
00853 
00854         if ( encoding == TIXML_ENCODING_UNKNOWN )
00855         {
00856                 
00857                 const unsigned char* pU = (const unsigned char*)p;
00858                 if (    *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00859                         && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00860                         && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00861                 {
00862                         encoding = TIXML_ENCODING_UTF8;
00863                         m_useMicrosoftBOM = true;
00864                 }
00865         }
00866 
00867         p = SkipWhiteSpace( p, encoding );
00868         if ( !p )
00869         {
00870                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_DOCUMENT_EMPTY], m_location.row, m_location.col );
00871         }
00872 
00873         while ( p && *p )
00874         {
00875                 ValidateMem();
00876 
00877                 XmlNodePtr node = Identify( p, encoding );
00878                 if ( node.IsNotNull() )
00879                 {
00880                         p = node->_Parse( p, &data, encoding );
00881                         AppendChild( node );
00882                         ValidateMem();
00883                 }
00884                 else
00885                 {
00886                         break;
00887                 }
00888 
00889                 
00890                 if (encoding == TIXML_ENCODING_UNKNOWN && node->IsDeclaration() )
00891                 {
00892                         XmlDeclarationPtr dec = node->ToDeclaration();
00893                         const String& enc = dec->Encoding();
00894 
00895                         if ( enc.Length() == 0 )
00896                                 encoding = TIXML_ENCODING_UTF8;
00897                         else if ( StringEqual( enc.GetChars(), "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00898                                 encoding = TIXML_ENCODING_UTF8;
00899                         else if ( StringEqual( enc.GetChars(), "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00900                                 encoding = TIXML_ENCODING_UTF8; 
00901                         else 
00902                                 encoding = TIXML_ENCODING_LEGACY;
00903                 }
00904 
00905                 p = SkipWhiteSpace( p, encoding );
00906         }
00907 
00908         
00909         if ( m_firstChild.IsNull() ) 
00910         {
00911                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_DOCUMENT_EMPTY], 0, 0 );
00912         }
00913 
00914         
00915         return p;
00916 }
00917 
00918 
00919 
00920 
00921 
00922 
00923 
00924 
00925 
00926 
00927 
00928 
00929 
00930 
00931 
00932 
00933 
00934 
00935 
00936 
00937 static const char* _xmlHeader = { "<?xml" };
00938 static const char* _commentHeader = { "<!--" };
00939 static const char* _dtdHeader = { "<!" };
00940 static const char* _cdataHeader = { "<![CDATA[" };
00941 
00942 XmlNodePtr XmlNode::Identify( const char* p, XmlEncoding encoding )
00943 {
00944         XmlNodePtr returnNode;
00945 
00946         p = SkipWhiteSpace( p, encoding );
00947         if( !p || !*p || *p != '<' )
00948         {
00949                 return returnNode;
00950         }
00951 
00952         p = SkipWhiteSpace( p, encoding );
00953 
00954         if ( !p || !*p )
00955         {
00956                 return returnNode;
00957         }
00958 
00959         
00960         
00961         
00962         
00963         
00964         
00965 
00966         if ( StringEqual( p, _xmlHeader, true, encoding ) )
00967         {
00968 #ifdef DEBUG_PARSER
00969                 TIXML_LOG( "XML parsing Declaration\n" );
00970 #endif
00971                 returnNode = (XmlNodePtr)XmlDeclarationPtr(new XmlDeclaration());
00972         }
00973         else if ( StringEqual( p, _commentHeader, false, encoding ) )
00974         {
00975 #ifdef DEBUG_PARSER
00976                 TIXML_LOG( "XML parsing Comment\n" );
00977 #endif
00978                 returnNode = (XmlNodePtr)XmlCommentPtr(new XmlComment());
00979         }
00980         else if ( StringEqual( p, _cdataHeader, false, encoding ) )
00981         {
00982 #ifdef DEBUG_PARSER
00983                 TIXML_LOG( "XML parsing CDATA\n" );
00984 #endif
00985                 XmlTextPtr text = XmlTextPtr(new XmlText( "" ));
00986                 text->SetCDATA( true );
00987                 returnNode = (XmlNodePtr)text;
00988         }
00989         else if ( StringEqual( p, _dtdHeader, false, encoding ) )
00990         {
00991 #ifdef DEBUG_PARSER
00992                 TIXML_LOG( "XML parsing Unknown(1)\n" );
00993 #endif
00994                 returnNode = (XmlNodePtr)XmlUnknownPtr(new XmlUnknown());
00995         }
00996         else if (    IsAlpha( *(p+1), encoding )
00997                 || *(p+1) == '_' )
00998         {
00999 #ifdef DEBUG_PARSER
01000                 TIXML_LOG( "XML parsing Element\n" );
01001 #endif
01002                 returnNode = (XmlNodePtr)XmlElementPtr(new XmlElement( "" ));
01003         }
01004         else
01005         {
01006 #ifdef DEBUG_PARSER
01007                 TIXML_LOG( "XML parsing Unknown(2)\n" );
01008 #endif
01009                 returnNode = (XmlNodePtr)XmlUnknownPtr(new XmlUnknown());
01010         }
01011 
01012         if ( returnNode.IsNotNull() )
01013         {
01014                 
01015                 returnNode->m_parent = this;
01016 
01017                 returnNode->m_self = returnNode;
01018         }
01019         return returnNode;
01020 }
01021 
01022 const char* XmlElement::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01023 {
01024         p = SkipWhiteSpace( p, encoding );
01025 
01026         if ( !p || !*p )
01027         {
01028                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_ELEMENT], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01029         }
01030 
01031         if ( data )
01032         {
01033                 data->Stamp( p, encoding );
01034                 m_location = data->Cursor();
01035         }
01036 
01037         if ( *p != '<' )
01038         {
01039                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_ELEMENT], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01040         }
01041 
01042         p = SkipWhiteSpace( p+1, encoding );
01043 
01044         
01045         const char* pErr = p;
01046 
01047         p = ReadName( p, &m_name, encoding );
01048         if ( !p || !*p )
01049         {
01050                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01051         }
01052 
01053         StringBuffer endTag ("</");
01054         endTag.Append( m_name );
01055         endTag.Append( ">" );
01056 
01057         
01058         
01059         while ( p && *p )
01060         {
01061                 pErr = p;
01062                 p = SkipWhiteSpace( p, encoding );
01063                 if ( !p || !*p )
01064                 {
01065                         throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01066                 }
01067                 if ( *p == '/' )
01068                 {
01069                         ++p;
01070                         
01071                         if ( *p  != '>' )
01072                         {
01073                                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_EMPTY], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01074                         }
01075                         return (p+1);
01076                 }
01077                 else if ( *p == '>' )
01078                 {
01079                         
01080                         
01081                         
01082                         ++p;
01083                         p = ReadValue( p, data, encoding );             
01084                         if ( !p || !*p ) 
01085                         {
01086                                 
01087                                 
01088                                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_END_TAG], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01089                         }
01090 
01091                         
01092                         if ( StringEqual( p, endTag.GetChars(), false, encoding ) )
01093                         {
01094                                 p += endTag.Length();
01095                                 return p;
01096                         }
01097                         else
01098                         {
01099                                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_END_TAG], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01100                         }
01101                 }
01102                 else
01103                 {
01104                         
01105                         XmlAttributePtr attrib = XmlAttributePtr(new XmlAttribute());
01106                         XmlDocumentPtr document = GetDocument();
01107                         attrib->SetDocument( document );
01108                         pErr = p;
01109                         p = attrib->_Parse( p, data, encoding );
01110 
01111                         if ( !p || !*p )
01112                         {
01113                                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_ELEMENT], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01114                         }
01115 
01116                         if ( m_attribs.IsNull() )
01117                         {
01118                                 m_attribs = XmlAttributeCollectionPtr(new XmlAttributeCollection());
01119                         }
01120 
01121                         
01122                         XmlAttributePtr node = m_attribs->Find( attrib->Name() );
01123                         if ( node.IsNotNull() )
01124                         {
01125                                 node.ValidateMem();
01126                                 node->SetValue( *attrib->Value() );
01127                                 return NULL;
01128                         }
01129 
01130                         m_attribs->Add( attrib );
01131                 }
01132         }
01133         return p;
01134 }
01135 
01136 const char* XmlElement::ReadValue( const char* p, XmlParsingData* data, XmlEncoding encoding )
01137 {
01138         
01139         const char* pWithWhiteSpace = p;
01140         p = SkipWhiteSpace( p, encoding );
01141 
01142         while ( p && *p )
01143         {
01144                 if ( *p != '<' )
01145                 {
01146                         
01147                         XmlTextPtr textNode = XmlTextPtr(new XmlText( "" ));
01148                         XmlNodePtr xtextNode = (XmlNodePtr)textNode;
01149                         textNode->m_self = xtextNode;
01150 
01151                         if ( XmlDocument::IsWhiteSpaceCondensed() )
01152                         {
01153                                 p = textNode->_Parse( p, data, encoding );
01154                         }
01155                         else
01156                         {
01157                                 
01158                                 
01159                                 p = textNode->_Parse( pWithWhiteSpace, data, encoding );
01160                         }
01161 
01162                         if ( !textNode->IsBlank() )
01163                         {
01164                                 AppendChild( textNode );
01165                         }
01166                         else
01167                         {
01168                         }
01169                 } 
01170                 else 
01171                 {
01172                         
01173                         
01174                         
01175                         if ( StringEqual( p, "</", false, encoding ) )
01176                         {
01177                                 return p;
01178                         }
01179                         else
01180                         {
01181                                 XmlNodePtr node = Identify( p, encoding );
01182                                 if ( node.IsNotNull() )
01183                                 {
01184                                         p = node->_Parse( p, data, encoding );
01185                                         AppendChild( node );
01186                                 }                               
01187                                 else
01188                                 {
01189                                         return NULL;
01190                                 }
01191                         }
01192                 }
01193                 pWithWhiteSpace = p;
01194                 p = SkipWhiteSpace( p, encoding );
01195         }
01196 
01197         if ( !p )
01198         {
01199                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ELEMENT_VALUE], 0, 0 );
01200         }       
01201         return p;
01202 }
01203 
01204 const char* XmlUnknown::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01205 {
01206         p = SkipWhiteSpace( p, encoding );
01207 
01208         if ( data )
01209         {
01210                 data->Stamp( p, encoding );
01211                 m_location = data->Cursor();
01212         }
01213         if ( !p || !*p || *p != '<' )
01214         {
01215                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_UNKNOWN], data->Cursor().row, data->Cursor().col );
01216         }
01217 
01218         ++p;
01219         m_value = "";
01220 
01221         while ( p && *p && *p != '>' )
01222         {
01223                 m_value.Append( *p );
01224                 ++p;
01225         }
01226 
01227         if ( !p )
01228         {
01229                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_UNKNOWN], data->Cursor().row, data->Cursor().col );
01230         }
01231         if ( *p == '>' )
01232                 return p+1;
01233         return p;
01234 }
01235 
01236 const char* XmlComment::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01237 {
01238         m_value = "";
01239 
01240         p = SkipWhiteSpace( p, encoding );
01241 
01242         if ( data )
01243         {
01244                 data->Stamp( p, encoding );
01245                 m_location = data->Cursor();
01246         }
01247         const char* startTag = "<!--";
01248         const char* endTag   = "-->";
01249 
01250         if ( !StringEqual( p, startTag, false, encoding ) )
01251         {
01252                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_COMMENT], data->Cursor().row, data->Cursor().col );
01253         }
01254         p += strlen( startTag );
01255 
01256         
01257         
01258         
01259         
01260         
01261         
01262 
01263 
01264 
01265 
01266 
01267 
01268 
01269 
01270 
01271 
01272 
01273 
01274         m_value = "";
01275         
01276         while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01277         {
01278                 m_value.Append( p, 1 );
01279                 ++p;
01280         }
01281         if ( p ) 
01282                 p += strlen( endTag );
01283 
01284         return p;
01285 }
01286 
01287 static const char SINGLE_QUOTE = '\'';
01288 static const char DOUBLE_QUOTE = '\"';
01289 
01290 const char* XmlAttribute::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01291 {
01292         ASSERT(NULL != p);
01293 
01294         p = SkipWhiteSpace( p, encoding );
01295 
01296         if ( !p || !*p ) 
01297                 return NULL;
01298 
01299         
01300         
01301         
01302 
01303         if ( data )
01304         {
01305                 data->Stamp( p, encoding );
01306                 m_location = data->Cursor();
01307         }
01308 
01309         
01310         const char* pErr = p;
01311         p = ReadName( p, &m_name, encoding );
01312         if ( !p || !*p )
01313         {
01314                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], data->Cursor().row, data->Cursor().col );
01315         }
01316 
01317         p = SkipWhiteSpace( p, encoding );
01318         if ( !p || !*p || *p != '=' )
01319         {
01320                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], data->Cursor().row, data->Cursor().col );
01321         }
01322 
01323         ++p;    
01324         p = SkipWhiteSpace( p, encoding );
01325         if ( !p || !*p )
01326         {
01327                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], data->Cursor().row, data->Cursor().col );
01328         }
01329 
01330         const char* end;
01331 
01332         if ( *p == SINGLE_QUOTE )
01333         {
01334                 ++p;
01335                 end = "\'";             
01336                 p = ReadText( p, m_value, false, end, false, encoding );
01337         }
01338         else if ( *p == DOUBLE_QUOTE )
01339         {
01340                 ++p;
01341                 end = "\"";             
01342                 p = ReadText( p, m_value, false, end, false, encoding );
01343         }
01344         else
01345         {
01346                 
01347                 
01348                 
01349                 m_value = "";
01350                 while (    p && *p                                                                                      
01351                         && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'      
01352                         && *p != '/' && *p != '>' )                                                     
01353                 {
01354                         if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) 
01355                         {
01356                                 
01357                                 
01358                                 
01359                                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], data->Cursor().row, data->Cursor().col );
01360                         }
01361                         m_value.Append( *p );
01362                         ++p;
01363                 }
01364         }
01365         return p;
01366 }
01367 
01368 const char* XmlText::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01369 {
01370         m_value = "";
01371 
01372         if ( data )
01373         {
01374                 data->Stamp( p, encoding );
01375                 m_location = data->Cursor();
01376         }
01377 
01378         const char* const startTag = "<![CDATA[";
01379         const char* const endTag   = "]]>";
01380 
01381         if ( m_cdata || StringEqual( p, startTag, false, encoding ) )
01382         {
01383                 m_cdata = true;
01384 
01385                 if ( !StringEqual( p, startTag, false, encoding ) )
01386                 {
01387                         throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_CDATA], data->Cursor().row, data->Cursor().col );
01388                 }
01389                 p += strlen( startTag );
01390 
01391                 
01392                 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01393                 {
01394                         m_value.Append( *p );
01395                         ++p;
01396                 }
01397 
01398                 StringBuffer dummy;
01399                 p = ReadText( p, dummy, false, endTag, false, encoding );
01400                 return p;
01401         }
01402         else
01403         {
01404                 bool ignoreWhite = true;
01405 
01406                 const char* end = "<";
01407                 p = ReadText( p, m_value, ignoreWhite, end, false, encoding );
01408                 if ( p )
01409                         return p-1;     
01410                 return 0;
01411         }
01412 }
01413 
01414 const char* XmlDeclaration::_Parse( const char* p, XmlParsingData* data, XmlEncoding _encoding )
01415 {
01416         p = SkipWhiteSpace( p, _encoding );
01417         
01418         
01419 
01420         if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01421         {
01422                 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_DECLARATION], data->Cursor().row, data->Cursor().col );
01423         }
01424         if ( data )
01425         {
01426                 data->Stamp( p, _encoding );
01427                 m_location = data->Cursor();
01428         }
01429         p += 5;
01430 
01431         version = "";
01432         encoding = "";
01433         standalone = "";
01434 
01435         while ( p && *p )
01436         {
01437                 if ( *p == '>' )
01438                 {
01439                         ++p;
01440                         return p;
01441                 }
01442 
01443                 p = SkipWhiteSpace( p, _encoding );
01444                 if ( StringEqual( p, "version", true, _encoding ) )
01445                 {
01446                         XmlAttribute attrib;
01447                         p = attrib._Parse( p, data, _encoding );                
01448                         version = *attrib.Value();
01449                 }
01450                 else if ( StringEqual( p, "encoding", true, _encoding ) )
01451                 {
01452                         XmlAttribute attrib;
01453                         p = attrib._Parse( p, data, _encoding );                
01454                         encoding = *attrib.Value();
01455                 }
01456                 else if ( StringEqual( p, "standalone", true, _encoding ) )
01457                 {
01458                         XmlAttribute attrib;
01459                         p = attrib._Parse( p, data, _encoding );                
01460                         standalone = *attrib.Value();
01461                 }
01462                 else
01463                 {
01464                         
01465                         while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01466                                 ++p;
01467                 }
01468                 ValidateMem();
01469         }
01470         return NULL;
01471 }
01472 
01473 bool XmlText::IsBlank() const
01474 {
01475         for ( int i=0; i<m_value.Length(); i++ )
01476         {
01477                 if ( !IsWhiteSpace( m_value[i] ) )
01478                 {
01479                         return false;
01480                 }
01481         }
01482         return true;
01483 }
01484