00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include <ctype.h>
00027 #include <stdio.h>
00028 #include <stddef.h>
00029
00030 #include <spl/xml/XmlAttribute.h>
00031 #include <spl/xml/XmlComment.h>
00032 #include <spl/xml/XmlDeclaration.h>
00033 #include <spl/xml/XmlDocument.h>
00034 #include <spl/xml/XmlElement.h>
00035 #include <spl/xml/XmlNode.h>
00036 #include <spl/xml/XmlText.h>
00037 #include <spl/xml/XmlUnknown.h>
00038
00039 #ifdef TIXML_SAFE
00040 #if defined(_MSC_VER) && (_MSC_VER >= 1400 )
00041
00042 #define TIXML_SNPRINTF _snprintf_s
00043 #define TIXML_SNSCANF _snscanf_s
00044 #define TIXML_SSCANF sscanf_s
00045 #elif defined(_MSC_VER) && (_MSC_VER >= 1200 )
00046
00047
00048 #define TIXML_SNPRINTF _snprintf
00049 #define TIXML_SNSCANF _snscanf
00050 #define TIXML_SSCANF sscanf
00051 #elif defined(__GNUC__) && (__GNUC__ >= 3 )
00052
00053
00054 #define TIXML_SNPRINTF snprintf
00055 #define TIXML_SNSCANF snscanf
00056 #define TIXML_SSCANF sscanf
00057 #else
00058 #define TIXML_SSCANF sscanf
00059 #endif
00060 #endif
00061
00062 struct Entity
00063 {
00064 const char* str;
00065 unsigned int strLength;
00066 char chr;
00067 };
00068
00069 #define NUM_ENTITY 5
00070 #define MAX_ENTITY_LENGTH 6
00071
00072
00073
00074
00075 static struct Entity entity[ NUM_ENTITY ] =
00076 {
00077 { "&", 5, '&' },
00078 { "<", 4, '<' },
00079 { ">", 4, '>' },
00080 { """, 6, '\"' },
00081 { "'", 6, '\'' }
00082 };
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00095 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00096 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00097
00098
00099
00100 const int utf8ByteTable[256] =
00101 {
00102
00103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00115 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00117 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00118 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
00119 };
00120
00121 void ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00122 {
00123 const unsigned long BYTE_MASK = 0xBF;
00124 const unsigned long BYTE_MARK = 0x80;
00125 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00126
00127 if (input < 0x80)
00128 *length = 1;
00129 else if ( input < 0x800 )
00130 *length = 2;
00131 else if ( input < 0x10000 )
00132 *length = 3;
00133 else if ( input < 0x200000 )
00134 *length = 4;
00135 else
00136 { *length = 0; return; }
00137
00138 output += *length;
00139
00140
00141 switch (*length)
00142 {
00143 case 4:
00144 --output;
00145 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00146 input >>= 6;
00147 case 3:
00148 --output;
00149 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00150 input >>= 6;
00151 case 2:
00152 --output;
00153 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00154 input >>= 6;
00155 case 1:
00156 --output;
00157 *output = (char)(input | FIRST_BYTE_MARK[*length]);
00158 }
00159 }
00160
00161
00162
00163
00164 static bool StringEqual( const char* p,
00165 const char* endTag,
00166 bool ignoreCase,
00167 XmlEncoding encoding );
00168
00169 const char* _xmlErrorStrings[ XmlNode::TIXML_ERROR_STRING_COUNT ] =
00170 {
00171 "No error",
00172 "Error",
00173 "Failed to open file",
00174 "Memory allocation failed.",
00175 "Error parsing Element.",
00176 "Failed to read Element name",
00177 "Error reading Element value.",
00178 "Error reading Attributes.",
00179 "Error: empty tag.",
00180 "Error reading end tag.",
00181 "Error parsing Unknown.",
00182 "Error parsing Comment.",
00183 "Error parsing Declaration.",
00184 "Error document empty.",
00185 "Error null (0) or unexpected EOF found in input stream.",
00186 "Error parsing CDATA.",
00187 "Error when TiXmlDocument added to document, because TiXmlDocument can only be at the root.",
00188 };
00189
00190
00191
00192 static int IsAlpha( unsigned char anyByte, XmlEncoding encoding );
00193
00194 static int IsAlphaNum( unsigned char anyByte, XmlEncoding encoding );
00195
00196 inline static int ToLower( int v, XmlEncoding encoding )
00197 {
00198 if ( encoding == TIXML_ENCODING_UTF8 )
00199 {
00200 if ( v < 128 )
00201 return tolower( v );
00202 return v;
00203 }
00204 else
00205 {
00206 return tolower( v );
00207 }
00208 }
00209
00210 void _XmlEncodeString( const String& str, StringBuffer& outString )
00211 {
00212 int i = 0;
00213
00214 while( i < str.Length() )
00215 {
00216 unsigned char c = (unsigned char) str[i];
00217
00218 if ( c == '&'
00219 && i < ( (int)str.Length() - 2 )
00220 && str[i+1] == '#'
00221 && str[i+2] == 'x' )
00222 {
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233 while ( i < str.Length()-1 )
00234 {
00235 outString.Append( str.GetChars() + i, 1 );
00236 ++i;
00237 if ( str[i] == ';' )
00238 break;
00239 }
00240 }
00241 else if ( c == '&' )
00242 {
00243 outString.Append( entity[0].str, entity[0].strLength );
00244 ++i;
00245 }
00246 else if ( c == '<' )
00247 {
00248 outString.Append( entity[1].str, entity[1].strLength );
00249 ++i;
00250 }
00251 else if ( c == '>' )
00252 {
00253 outString.Append( entity[2].str, entity[2].strLength );
00254 ++i;
00255 }
00256 else if ( c == '\"' )
00257 {
00258 outString.Append( entity[3].str, entity[3].strLength );
00259 ++i;
00260 }
00261 else if ( c == '\'' )
00262 {
00263 outString.Append( entity[4].str, entity[4].strLength );
00264 ++i;
00265 }
00266 else if ( c < 32 )
00267 {
00268
00269
00270 char buf[ 32 ];
00271
00272 #if defined(TIXML_SNPRINTF)
00273 TIXML_SNPRINTF( buf, sizeof(buf), "&#x%02X;", (unsigned) ( c & 0xff ) );
00274 #else
00275 sprintf( buf, "&#x%02X;", (unsigned) ( c & 0xff ) );
00276 #endif
00277
00278
00279
00280 outString.Append( buf, (int)strlen( buf ) );
00281 ++i;
00282 }
00283 else
00284 {
00285
00286
00287 outString.Append((char) c);
00288 ++i;
00289 }
00290 }
00291 }
00292
00293 static const char* SkipWhiteSpace( const char*, XmlEncoding encoding );
00294
00295 inline static bool IsWhiteSpace( char c )
00296 {
00297 return ( isspace( (unsigned char) c ) || c == '\n' || c == '\r' );
00298 }
00299
00300 inline static bool IsWhiteSpace( int c )
00301 {
00302 if ( c < 256 )
00303 return IsWhiteSpace( (char) c );
00304 return false;
00305 }
00306
00307
00308
00309
00310
00311 static const char* ReadName( const char* p, String* name, XmlEncoding encoding );
00312
00313
00314
00315
00316 static const char* ReadText( const char* in,
00317 StringBuffer& text,
00318 bool ignoreWhiteSpace,
00319 const char* endTag,
00320 bool ignoreCase,
00321 XmlEncoding encoding );
00322
00323
00324 static const char* GetEntity( const char* in, char* value, int* length, XmlEncoding encoding );
00325
00326
00327
00328 inline static const char* GetChar( const char* p, char* _value, int* length, XmlEncoding encoding )
00329 {
00330 ASSERT( NULL != p );
00331 if ( encoding == TIXML_ENCODING_UTF8 )
00332 {
00333 *length = utf8ByteTable[ *((const unsigned char*)p) ];
00334 assert( *length >= 0 && *length < 5 );
00335 }
00336 else
00337 {
00338 *length = 1;
00339 }
00340
00341 if ( *length == 1 )
00342 {
00343 if ( *p == '&' )
00344 return GetEntity( p, _value, length, encoding );
00345 *_value = *p;
00346 return p+1;
00347 }
00348 else if ( *length )
00349 {
00350
00351
00352 for( int i=0; p[i] && i<*length; ++i ) {
00353 _value[i] = p[i];
00354 }
00355 return p + (*length);
00356 }
00357 else
00358 {
00359
00360 return 0;
00361 }
00362 }
00363
00364 int IsAlpha( unsigned char anyByte, XmlEncoding )
00365 {
00366
00367
00368
00369
00370
00371
00372
00373 if ( anyByte < 127 )
00374 return isalpha( anyByte );
00375 else
00376 return 1;
00377
00378
00379
00380
00381
00382 }
00383
00384
00385 int IsAlphaNum( unsigned char anyByte, XmlEncoding )
00386 {
00387
00388
00389
00390
00391
00392
00393
00394 if ( anyByte < 127 )
00395 return isalnum( anyByte );
00396 else
00397 return 1;
00398
00399
00400
00401
00402
00403 }
00404
00405 void XmlParsingData::Stamp( const char* now, XmlEncoding encoding )
00406 {
00407 assert( now );
00408
00409
00410 if ( tabsize < 1 )
00411 {
00412 return;
00413 }
00414
00415
00416 int row = cursor.row;
00417 int col = cursor.col;
00418 const char* p = stamp;
00419 assert( p );
00420
00421 while ( p < now )
00422 {
00423
00424 const unsigned char* pU = (const unsigned char*)p;
00425
00426
00427 switch (*pU) {
00428 case 0:
00429
00430
00431 return;
00432
00433 case '\r':
00434
00435 ++row;
00436 col = 0;
00437
00438 ++p;
00439
00440
00441 if (*p == '\n')
00442 {
00443 ++p;
00444 }
00445 break;
00446
00447 case '\n':
00448
00449 ++row;
00450 col = 0;
00451
00452
00453 ++p;
00454
00455
00456
00457
00458 if (*p == '\r')
00459 {
00460 ++p;
00461 }
00462 break;
00463
00464 case '\t':
00465
00466 ++p;
00467
00468
00469 col = (col / tabsize + 1) * tabsize;
00470 break;
00471
00472 case TIXML_UTF_LEAD_0:
00473 if ( encoding == TIXML_ENCODING_UTF8 )
00474 {
00475 if ( *(p+1) && *(p+2) )
00476 {
00477
00478
00479 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00480 p += 3;
00481 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00482 p += 3;
00483 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00484 p += 3;
00485 else
00486 { p +=3; ++col; }
00487 }
00488 }
00489 else
00490 {
00491 ++p;
00492 ++col;
00493 }
00494 break;
00495
00496 default:
00497 if ( encoding == TIXML_ENCODING_UTF8 )
00498 {
00499
00500 int step = utf8ByteTable[*((const unsigned char*)p)];
00501 if ( step == 0 )
00502 step = 1;
00503 p += step;
00504
00505
00506 ++col;
00507 }
00508 else
00509 {
00510 ++p;
00511 ++col;
00512 }
00513 break;
00514 }
00515 }
00516 cursor.row = row;
00517 cursor.col = col;
00518 assert( cursor.row >= -1 );
00519 assert( cursor.col >= -1 );
00520 stamp = p;
00521 assert( stamp );
00522 }
00523
00524
00525 const char* SkipWhiteSpace( const char* p, XmlEncoding encoding )
00526 {
00527 if ( !p || !*p )
00528 {
00529 return 0;
00530 }
00531 if ( encoding == TIXML_ENCODING_UTF8 )
00532 {
00533 while ( *p )
00534 {
00535 const unsigned char* pU = (const unsigned char*)p;
00536
00537
00538 if ( *(pU+0)==TIXML_UTF_LEAD_0
00539 && *(pU+1)==TIXML_UTF_LEAD_1
00540 && *(pU+2)==TIXML_UTF_LEAD_2 )
00541 {
00542 p += 3;
00543 continue;
00544 }
00545 else if(*(pU+0)==TIXML_UTF_LEAD_0
00546 && *(pU+1)==0xbfU
00547 && *(pU+2)==0xbeU )
00548 {
00549 p += 3;
00550 continue;
00551 }
00552 else if(*(pU+0)==TIXML_UTF_LEAD_0
00553 && *(pU+1)==0xbfU
00554 && *(pU+2)==0xbfU )
00555 {
00556 p += 3;
00557 continue;
00558 }
00559
00560 if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
00561 ++p;
00562 else
00563 break;
00564 }
00565 }
00566 else
00567 {
00568 while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
00569 ++p;
00570 }
00571
00572 return p;
00573 }
00574
00575
00576
00577
00578 const char* ReadName( const char* p, String * name, XmlEncoding encoding )
00579 {
00580 ASSERT(NULL != p);
00581 name->Set("");
00582
00583
00584
00585
00586
00587
00588
00589
00590 if ( NULL != p && *p && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00591 {
00592 int len = 0;
00593 const char *start = p;
00594 while( p && *p &&
00595 (
00596 IsAlphaNum( (unsigned char ) *p, encoding )
00597 || *p == '_'
00598 || *p == '-'
00599 || *p == '.'
00600 || *p == ':' )
00601 )
00602 {
00603 ++p;
00604 len++;
00605 }
00606 if ( len > 0 )
00607 {
00608 name->Set( String(start, len) );
00609 }
00610 return p;
00611 }
00612 return NULL;
00613 }
00614
00615 const char* GetEntity( const char* p, char* value, int* length, XmlEncoding encoding )
00616 {
00617
00618 String ent;
00619 int i;
00620 *length = 0;
00621
00622 if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00623 {
00624 unsigned long ucs = 0;
00625 ptrdiff_t delta = 0;
00626 unsigned mult = 1;
00627
00628 if ( *(p+2) == 'x' )
00629 {
00630
00631 if ( !*(p+3) )
00632 return 0;
00633
00634 const char* q = p+3;
00635 q = strchr( q, ';' );
00636
00637 if ( !q || !*q )
00638 return 0;
00639
00640 delta = q-p;
00641 --q;
00642
00643 while ( *q != 'x' )
00644 {
00645 if ( *q >= '0' && *q <= '9' )
00646 ucs += mult * (*q - '0');
00647 else if ( *q >= 'a' && *q <= 'f' )
00648 ucs += mult * (*q - 'a' + 10);
00649 else if ( *q >= 'A' && *q <= 'F' )
00650 ucs += mult * (*q - 'A' + 10 );
00651 else
00652 return 0;
00653 mult *= 16;
00654 --q;
00655 }
00656 }
00657 else
00658 {
00659
00660 if ( !*(p+2) )
00661 return 0;
00662
00663 const char* q = p+2;
00664 q = strchr( q, ';' );
00665
00666 if ( !q || !*q )
00667 return 0;
00668
00669 delta = q-p;
00670 --q;
00671
00672 while ( *q != '#' )
00673 {
00674 if ( *q >= '0' && *q <= '9' )
00675 ucs += mult * (*q - '0');
00676 else
00677 return 0;
00678 mult *= 10;
00679 --q;
00680 }
00681 }
00682 if ( encoding == TIXML_ENCODING_UTF8 )
00683 {
00684
00685 ConvertUTF32ToUTF8( ucs, value, length );
00686 }
00687 else
00688 {
00689 *value = (char)ucs;
00690 *length = 1;
00691 }
00692 return p + delta + 1;
00693 }
00694
00695
00696 for( i=0; i<NUM_ENTITY; ++i )
00697 {
00698 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00699 {
00700 assert( strlen( entity[i].str ) == entity[i].strLength );
00701 *value = entity[i].chr;
00702 *length = 1;
00703 return ( p + entity[i].strLength );
00704 }
00705 }
00706
00707
00708 *value = *p;
00709
00710
00711 return p+1;
00712 }
00713
00714
00715 bool StringEqual( const char* p,
00716 const char* tag,
00717 bool ignoreCase,
00718 XmlEncoding encoding )
00719 {
00720 assert( p );
00721 assert( tag );
00722 if ( !p || !*p )
00723 {
00724 assert( 0 );
00725 return false;
00726 }
00727
00728 const char* q = p;
00729
00730 if ( ignoreCase )
00731 {
00732 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00733 {
00734 ++q;
00735 ++tag;
00736 }
00737
00738 if ( *tag == 0 )
00739 return true;
00740 }
00741 else
00742 {
00743 while ( *q && *tag && *q == *tag )
00744 {
00745 ++q;
00746 ++tag;
00747 }
00748
00749 if ( *tag == 0 )
00750 return true;
00751 }
00752 return false;
00753 }
00754
00755 const char* ReadText( const char* p,
00756 StringBuffer& text,
00757 bool trimWhiteSpace,
00758 const char* endTag,
00759 bool caseInsensitive,
00760 XmlEncoding encoding )
00761 {
00762 text.SetLength(0);
00763
00764 if ( !trimWhiteSpace
00765 || !XmlDocument::IsWhiteSpaceCondensed() )
00766 {
00767
00768 while ( p && *p
00769 && !StringEqual( p, endTag, caseInsensitive, encoding )
00770 )
00771 {
00772 int len;
00773 char cArr[4] = { 0, 0, 0, 0 };
00774 p = GetChar( p, cArr, &len, encoding );
00775 text.Append( cArr, len );
00776 }
00777 }
00778 else
00779 {
00780 bool whitespace = false;
00781
00782
00783 p = SkipWhiteSpace( p, encoding );
00784 while ( p && *p
00785 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00786 {
00787 if ( *p == '\r' || *p == '\n' )
00788 {
00789 whitespace = true;
00790 ++p;
00791 }
00792 else if ( IsWhiteSpace( *p ) )
00793 {
00794 whitespace = true;
00795 ++p;
00796 }
00797 else
00798 {
00799
00800
00801 if ( whitespace )
00802 {
00803 text.Append(' ');
00804 whitespace = false;
00805 }
00806 int len;
00807 char cArr[4] = { 0, 0, 0, 0 };
00808 p = GetChar( p, cArr, &len, encoding );
00809 if ( len == 1 )
00810 text.Append(cArr[0]);
00811 else
00812 text.Append( cArr, len );
00813 }
00814 }
00815 }
00816 if ( p )
00817 p += strlen( endTag );
00818 return p;
00819 }
00820
00821 const char *XmlDocument::_Parse( const char *p, XmlParsingData* prevData, XmlEncoding encoding )
00822 {
00823 if (NULL != prevData)
00824 {
00825 m_condenseWhiteSpace = prevData->condenseWs;
00826 m_tabsize = prevData->tabsize;
00827 }
00828
00829
00830
00831
00832 if ( NULL == p || '\0' == p[0] )
00833 {
00834 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_DOCUMENT_EMPTY], 0, 0 );
00835 }
00836
00837
00838
00839
00840 m_location.Clear();
00841 if ( prevData )
00842 {
00843 m_location.row = prevData->Cursor().row;
00844 m_location.col = prevData->Cursor().col;
00845 }
00846 else
00847 {
00848 m_location.row = 0;
00849 m_location.col = 0;
00850 }
00851 XmlParsingData data( p, TabSize(), m_location.row, m_location.col, m_condenseWhiteSpace);
00852 m_location = data.Cursor();
00853
00854 if ( encoding == TIXML_ENCODING_UNKNOWN )
00855 {
00856
00857 const unsigned char* pU = (const unsigned char*)p;
00858 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00859 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00860 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00861 {
00862 encoding = TIXML_ENCODING_UTF8;
00863 m_useMicrosoftBOM = true;
00864 }
00865 }
00866
00867 p = SkipWhiteSpace( p, encoding );
00868 if ( !p )
00869 {
00870 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_DOCUMENT_EMPTY], m_location.row, m_location.col );
00871 }
00872
00873 while ( p && *p )
00874 {
00875 ValidateMem();
00876
00877 XmlNodePtr node = Identify( p, encoding );
00878 if ( node.IsNotNull() )
00879 {
00880 p = node->_Parse( p, &data, encoding );
00881 AppendChild( node );
00882 ValidateMem();
00883 }
00884 else
00885 {
00886 break;
00887 }
00888
00889
00890 if (encoding == TIXML_ENCODING_UNKNOWN && node->IsDeclaration() )
00891 {
00892 XmlDeclarationPtr dec = node->ToDeclaration();
00893 const String& enc = dec->Encoding();
00894
00895 if ( enc.Length() == 0 )
00896 encoding = TIXML_ENCODING_UTF8;
00897 else if ( StringEqual( enc.GetChars(), "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00898 encoding = TIXML_ENCODING_UTF8;
00899 else if ( StringEqual( enc.GetChars(), "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00900 encoding = TIXML_ENCODING_UTF8;
00901 else
00902 encoding = TIXML_ENCODING_LEGACY;
00903 }
00904
00905 p = SkipWhiteSpace( p, encoding );
00906 }
00907
00908
00909 if ( m_firstChild.IsNull() )
00910 {
00911 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_DOCUMENT_EMPTY], 0, 0 );
00912 }
00913
00914
00915 return p;
00916 }
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931
00932
00933
00934
00935
00936
00937 static const char* _xmlHeader = { "<?xml" };
00938 static const char* _commentHeader = { "<!--" };
00939 static const char* _dtdHeader = { "<!" };
00940 static const char* _cdataHeader = { "<![CDATA[" };
00941
00942 XmlNodePtr XmlNode::Identify( const char* p, XmlEncoding encoding )
00943 {
00944 XmlNodePtr returnNode;
00945
00946 p = SkipWhiteSpace( p, encoding );
00947 if( !p || !*p || *p != '<' )
00948 {
00949 return returnNode;
00950 }
00951
00952 p = SkipWhiteSpace( p, encoding );
00953
00954 if ( !p || !*p )
00955 {
00956 return returnNode;
00957 }
00958
00959
00960
00961
00962
00963
00964
00965
00966 if ( StringEqual( p, _xmlHeader, true, encoding ) )
00967 {
00968 #ifdef DEBUG_PARSER
00969 TIXML_LOG( "XML parsing Declaration\n" );
00970 #endif
00971 returnNode = (XmlNodePtr)XmlDeclarationPtr(new XmlDeclaration());
00972 }
00973 else if ( StringEqual( p, _commentHeader, false, encoding ) )
00974 {
00975 #ifdef DEBUG_PARSER
00976 TIXML_LOG( "XML parsing Comment\n" );
00977 #endif
00978 returnNode = (XmlNodePtr)XmlCommentPtr(new XmlComment());
00979 }
00980 else if ( StringEqual( p, _cdataHeader, false, encoding ) )
00981 {
00982 #ifdef DEBUG_PARSER
00983 TIXML_LOG( "XML parsing CDATA\n" );
00984 #endif
00985 XmlTextPtr text = XmlTextPtr(new XmlText( "" ));
00986 text->SetCDATA( true );
00987 returnNode = (XmlNodePtr)text;
00988 }
00989 else if ( StringEqual( p, _dtdHeader, false, encoding ) )
00990 {
00991 #ifdef DEBUG_PARSER
00992 TIXML_LOG( "XML parsing Unknown(1)\n" );
00993 #endif
00994 returnNode = (XmlNodePtr)XmlUnknownPtr(new XmlUnknown());
00995 }
00996 else if ( IsAlpha( *(p+1), encoding )
00997 || *(p+1) == '_' )
00998 {
00999 #ifdef DEBUG_PARSER
01000 TIXML_LOG( "XML parsing Element\n" );
01001 #endif
01002 returnNode = (XmlNodePtr)XmlElementPtr(new XmlElement( "" ));
01003 }
01004 else
01005 {
01006 #ifdef DEBUG_PARSER
01007 TIXML_LOG( "XML parsing Unknown(2)\n" );
01008 #endif
01009 returnNode = (XmlNodePtr)XmlUnknownPtr(new XmlUnknown());
01010 }
01011
01012 if ( returnNode.IsNotNull() )
01013 {
01014
01015 returnNode->m_parent = this;
01016
01017 returnNode->m_self = returnNode;
01018 }
01019 return returnNode;
01020 }
01021
01022 const char* XmlElement::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01023 {
01024 p = SkipWhiteSpace( p, encoding );
01025
01026 if ( !p || !*p )
01027 {
01028 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_ELEMENT], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01029 }
01030
01031 if ( data )
01032 {
01033 data->Stamp( p, encoding );
01034 m_location = data->Cursor();
01035 }
01036
01037 if ( *p != '<' )
01038 {
01039 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_ELEMENT], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01040 }
01041
01042 p = SkipWhiteSpace( p+1, encoding );
01043
01044
01045 const char* pErr = p;
01046
01047 p = ReadName( p, &m_name, encoding );
01048 if ( !p || !*p )
01049 {
01050 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01051 }
01052
01053 StringBuffer endTag ("</");
01054 endTag.Append( m_name );
01055 endTag.Append( ">" );
01056
01057
01058
01059 while ( p && *p )
01060 {
01061 pErr = p;
01062 p = SkipWhiteSpace( p, encoding );
01063 if ( !p || !*p )
01064 {
01065 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01066 }
01067 if ( *p == '/' )
01068 {
01069 ++p;
01070
01071 if ( *p != '>' )
01072 {
01073 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_EMPTY], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01074 }
01075 return (p+1);
01076 }
01077 else if ( *p == '>' )
01078 {
01079
01080
01081
01082 ++p;
01083 p = ReadValue( p, data, encoding );
01084 if ( !p || !*p )
01085 {
01086
01087
01088 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_END_TAG], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01089 }
01090
01091
01092 if ( StringEqual( p, endTag.GetChars(), false, encoding ) )
01093 {
01094 p += endTag.Length();
01095 return p;
01096 }
01097 else
01098 {
01099 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_END_TAG], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01100 }
01101 }
01102 else
01103 {
01104
01105 XmlAttributePtr attrib = XmlAttributePtr(new XmlAttribute());
01106 XmlDocumentPtr document = GetDocument();
01107 attrib->SetDocument( document );
01108 pErr = p;
01109 p = attrib->_Parse( p, data, encoding );
01110
01111 if ( !p || !*p )
01112 {
01113 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_ELEMENT], NULL == data ? 0 : data->Cursor().row, NULL == data ? 0 : data->Cursor().col );
01114 }
01115
01116 if ( m_attribs.IsNull() )
01117 {
01118 m_attribs = XmlAttributeCollectionPtr(new XmlAttributeCollection());
01119 }
01120
01121
01122 XmlAttributePtr node = m_attribs->Find( attrib->Name() );
01123 if ( node.IsNotNull() )
01124 {
01125 node.ValidateMem();
01126 node->SetValue( *attrib->Value() );
01127 return NULL;
01128 }
01129
01130 m_attribs->Add( attrib );
01131 }
01132 }
01133 return p;
01134 }
01135
01136 const char* XmlElement::ReadValue( const char* p, XmlParsingData* data, XmlEncoding encoding )
01137 {
01138
01139 const char* pWithWhiteSpace = p;
01140 p = SkipWhiteSpace( p, encoding );
01141
01142 while ( p && *p )
01143 {
01144 if ( *p != '<' )
01145 {
01146
01147 XmlTextPtr textNode = XmlTextPtr(new XmlText( "" ));
01148 XmlNodePtr xtextNode = (XmlNodePtr)textNode;
01149 textNode->m_self = xtextNode;
01150
01151 if ( XmlDocument::IsWhiteSpaceCondensed() )
01152 {
01153 p = textNode->_Parse( p, data, encoding );
01154 }
01155 else
01156 {
01157
01158
01159 p = textNode->_Parse( pWithWhiteSpace, data, encoding );
01160 }
01161
01162 if ( !textNode->IsBlank() )
01163 {
01164 AppendChild( textNode );
01165 }
01166 else
01167 {
01168 }
01169 }
01170 else
01171 {
01172
01173
01174
01175 if ( StringEqual( p, "</", false, encoding ) )
01176 {
01177 return p;
01178 }
01179 else
01180 {
01181 XmlNodePtr node = Identify( p, encoding );
01182 if ( node.IsNotNull() )
01183 {
01184 p = node->_Parse( p, data, encoding );
01185 AppendChild( node );
01186 }
01187 else
01188 {
01189 return NULL;
01190 }
01191 }
01192 }
01193 pWithWhiteSpace = p;
01194 p = SkipWhiteSpace( p, encoding );
01195 }
01196
01197 if ( !p )
01198 {
01199 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ELEMENT_VALUE], 0, 0 );
01200 }
01201 return p;
01202 }
01203
01204 const char* XmlUnknown::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01205 {
01206 p = SkipWhiteSpace( p, encoding );
01207
01208 if ( data )
01209 {
01210 data->Stamp( p, encoding );
01211 m_location = data->Cursor();
01212 }
01213 if ( !p || !*p || *p != '<' )
01214 {
01215 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_UNKNOWN], data->Cursor().row, data->Cursor().col );
01216 }
01217
01218 ++p;
01219 m_value = "";
01220
01221 while ( p && *p && *p != '>' )
01222 {
01223 m_value.Append( *p );
01224 ++p;
01225 }
01226
01227 if ( !p )
01228 {
01229 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_UNKNOWN], data->Cursor().row, data->Cursor().col );
01230 }
01231 if ( *p == '>' )
01232 return p+1;
01233 return p;
01234 }
01235
01236 const char* XmlComment::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01237 {
01238 m_value = "";
01239
01240 p = SkipWhiteSpace( p, encoding );
01241
01242 if ( data )
01243 {
01244 data->Stamp( p, encoding );
01245 m_location = data->Cursor();
01246 }
01247 const char* startTag = "<!--";
01248 const char* endTag = "-->";
01249
01250 if ( !StringEqual( p, startTag, false, encoding ) )
01251 {
01252 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_COMMENT], data->Cursor().row, data->Cursor().col );
01253 }
01254 p += strlen( startTag );
01255
01256
01257
01258
01259
01260
01261
01262
01263
01264
01265
01266
01267
01268
01269
01270
01271
01272
01273
01274 m_value = "";
01275
01276 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01277 {
01278 m_value.Append( p, 1 );
01279 ++p;
01280 }
01281 if ( p )
01282 p += strlen( endTag );
01283
01284 return p;
01285 }
01286
01287 static const char SINGLE_QUOTE = '\'';
01288 static const char DOUBLE_QUOTE = '\"';
01289
01290 const char* XmlAttribute::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01291 {
01292 ASSERT(NULL != p);
01293
01294 p = SkipWhiteSpace( p, encoding );
01295
01296 if ( !p || !*p )
01297 return NULL;
01298
01299
01300
01301
01302
01303 if ( data )
01304 {
01305 data->Stamp( p, encoding );
01306 m_location = data->Cursor();
01307 }
01308
01309
01310 const char* pErr = p;
01311 p = ReadName( p, &m_name, encoding );
01312 if ( !p || !*p )
01313 {
01314 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], data->Cursor().row, data->Cursor().col );
01315 }
01316
01317 p = SkipWhiteSpace( p, encoding );
01318 if ( !p || !*p || *p != '=' )
01319 {
01320 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], data->Cursor().row, data->Cursor().col );
01321 }
01322
01323 ++p;
01324 p = SkipWhiteSpace( p, encoding );
01325 if ( !p || !*p )
01326 {
01327 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], data->Cursor().row, data->Cursor().col );
01328 }
01329
01330 const char* end;
01331
01332 if ( *p == SINGLE_QUOTE )
01333 {
01334 ++p;
01335 end = "\'";
01336 p = ReadText( p, m_value, false, end, false, encoding );
01337 }
01338 else if ( *p == DOUBLE_QUOTE )
01339 {
01340 ++p;
01341 end = "\"";
01342 p = ReadText( p, m_value, false, end, false, encoding );
01343 }
01344 else
01345 {
01346
01347
01348
01349 m_value = "";
01350 while ( p && *p
01351 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'
01352 && *p != '/' && *p != '>' )
01353 {
01354 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE )
01355 {
01356
01357
01358
01359 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_READING_ATTRIBUTES], data->Cursor().row, data->Cursor().col );
01360 }
01361 m_value.Append( *p );
01362 ++p;
01363 }
01364 }
01365 return p;
01366 }
01367
01368 const char* XmlText::_Parse( const char* p, XmlParsingData* data, XmlEncoding encoding )
01369 {
01370 m_value = "";
01371
01372 if ( data )
01373 {
01374 data->Stamp( p, encoding );
01375 m_location = data->Cursor();
01376 }
01377
01378 const char* const startTag = "<![CDATA[";
01379 const char* const endTag = "]]>";
01380
01381 if ( m_cdata || StringEqual( p, startTag, false, encoding ) )
01382 {
01383 m_cdata = true;
01384
01385 if ( !StringEqual( p, startTag, false, encoding ) )
01386 {
01387 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_CDATA], data->Cursor().row, data->Cursor().col );
01388 }
01389 p += strlen( startTag );
01390
01391
01392 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
01393 {
01394 m_value.Append( *p );
01395 ++p;
01396 }
01397
01398 StringBuffer dummy;
01399 p = ReadText( p, dummy, false, endTag, false, encoding );
01400 return p;
01401 }
01402 else
01403 {
01404 bool ignoreWhite = true;
01405
01406 const char* end = "<";
01407 p = ReadText( p, m_value, ignoreWhite, end, false, encoding );
01408 if ( p )
01409 return p-1;
01410 return 0;
01411 }
01412 }
01413
01414 const char* XmlDeclaration::_Parse( const char* p, XmlParsingData* data, XmlEncoding _encoding )
01415 {
01416 p = SkipWhiteSpace( p, _encoding );
01417
01418
01419
01420 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01421 {
01422 throw new XmlException( _xmlErrorStrings[TIXML_ERROR_PARSING_DECLARATION], data->Cursor().row, data->Cursor().col );
01423 }
01424 if ( data )
01425 {
01426 data->Stamp( p, _encoding );
01427 m_location = data->Cursor();
01428 }
01429 p += 5;
01430
01431 version = "";
01432 encoding = "";
01433 standalone = "";
01434
01435 while ( p && *p )
01436 {
01437 if ( *p == '>' )
01438 {
01439 ++p;
01440 return p;
01441 }
01442
01443 p = SkipWhiteSpace( p, _encoding );
01444 if ( StringEqual( p, "version", true, _encoding ) )
01445 {
01446 XmlAttribute attrib;
01447 p = attrib._Parse( p, data, _encoding );
01448 version = *attrib.Value();
01449 }
01450 else if ( StringEqual( p, "encoding", true, _encoding ) )
01451 {
01452 XmlAttribute attrib;
01453 p = attrib._Parse( p, data, _encoding );
01454 encoding = *attrib.Value();
01455 }
01456 else if ( StringEqual( p, "standalone", true, _encoding ) )
01457 {
01458 XmlAttribute attrib;
01459 p = attrib._Parse( p, data, _encoding );
01460 standalone = *attrib.Value();
01461 }
01462 else
01463 {
01464
01465 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01466 ++p;
01467 }
01468 ValidateMem();
01469 }
01470 return NULL;
01471 }
01472
01473 bool XmlText::IsBlank() const
01474 {
01475 for ( int i=0; i<m_value.Length(); i++ )
01476 {
01477 if ( !IsWhiteSpace( m_value[i] ) )
01478 {
01479 return false;
01480 }
01481 }
01482 return true;
01483 }
01484