00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #include <spl/Char.h>
00018 #include <spl/Exception.h>
00019 #include <spl/Int32.h>
00020 #include <spl/text/StringBuffer.h>
00021 #include <spl/xml/xpath/private/XPathLex.h>
00022
00023 XPathLex::XPathLex()
00024 : m_tokens(), m_lexums(), m_pos(0), m_state(XPS_START), m_lexCharPoses()
00025 {
00026 }
00027
00028 XPathLex::XPathLex(const XPathLex& lex)
00029 : m_tokens(lex.m_tokens), m_lexums(lex.m_lexums), m_pos(lex.m_pos), m_state(lex.m_state), m_lexCharPoses(lex.m_lexCharPoses)
00030 {
00031 }
00032
00033 XPathLex::~XPathLex()
00034 {
00035 }
00036
00037 XPathLex& XPathLex::operator =(const XPathLex& lex)
00038 {
00039 m_tokens = lex.m_tokens;
00040 m_lexums = lex.m_lexums;
00041 m_pos = lex.m_pos;
00042 m_state = lex.m_state;
00043 m_lexCharPoses = lex.m_lexCharPoses;
00044
00045 return *this;
00046 }
00047
00048 XPathLex::XPathLexToken XPathLex::Match(XPathLex::XPathLexToken token)
00049 {
00050 if (! HasMoreTokens())
00051 {
00052 throw new IndexOutOfBoundsException("No more tokens");
00053 }
00054 if (token != m_tokens[m_pos++])
00055 {
00056 throw new InvalidArgumentException("Match failure");
00057 }
00058 if (! HasMoreTokens())
00059 {
00060 return XP_EOF;
00061 }
00062 return m_tokens[m_pos];
00063 }
00064
00065 XPathLex::XPathLexToken XPathLex::GetRelativeToken(int distance) const
00066 {
00067 distance += m_pos;
00068 if (distance >= m_tokens.Count() || 0 > distance)
00069 {
00070 return XP_EOF;
00071 }
00072 return m_tokens[distance];
00073 }
00074
00075 #if defined(DEBUG) || defined(_DEBUG)
00076 void XPathLex::CheckMem() const
00077 {
00078 m_lexums.CheckMem();
00079 m_tokens.CheckMem();
00080 m_lexCharPoses.CheckMem();
00081 }
00082
00083 void XPathLex::ValidateMem() const
00084 {
00085 m_lexums.ValidateMem();
00086 m_tokens.ValidateMem();
00087 m_lexCharPoses.ValidateMem();
00088 }
00089 #endif
00090
00091 void XPathLex::TokenizeString(StringBuffer& token, char ch, char termchar, int pos)
00092 {
00093 if (ch == termchar)
00094 {
00095 m_tokens.Add(XP_STRING);
00096 m_lexums.Add(token.ToString());
00097 m_lexCharPoses.Add(pos - token.Length());
00098 token.SetLength(0);
00099 m_state = XPS_START;
00100 }
00101 else
00102 {
00103 token.Append(ch);
00104 }
00105 }
00106
00107 void XPathLex::TokenizeChar2(StringBuffer& token, int& pos, char ch, char chChar2, XPathLexToken char2Token, XPathLexToken char1Token)
00108 {
00109 if (chChar2 == ch)
00110 {
00111 m_tokens.Add(char2Token);
00112 token.Append(ch);
00113 m_lexums.Add(token.ToString());
00114 m_lexCharPoses.Add(pos - token.Length());
00115 }
00116 else
00117 {
00118 m_tokens.Add(char1Token);
00119 m_lexums.Add(token.ToString());
00120 pos--;
00121 m_lexCharPoses.Add(pos - token.Length());
00122 }
00123 token.SetLength(0);
00124 m_state = XPS_START;
00125 }
00126
00127 void XPathLex::Tokenize(const String& text)
00128 {
00129 m_tokens.Clear();
00130 m_lexums.Clear();
00131 m_lexCharPoses.Clear();
00132 m_state = XPS_START;
00133 m_pos = 0;
00134
00135 StringBuffer token;
00136
00137 for ( int x = 0; x < text.Length(); x++ )
00138 {
00139 char ch = text.CharAt(x);
00140 bool complete = false;
00141
00142 switch( m_state )
00143 {
00144 case XPS_START:
00145 if (Char::IsWhiteSpace(ch))
00146 {
00147 break;
00148 }
00149 token.Append(ch);
00150 if (Char::IsDigit(ch))
00151 {
00152 m_state = XPS_NUMERIC;
00153 break;
00154 }
00155 else if (Char::IsLetter(ch) || '_' == ch)
00156 {
00157 m_state = XPS_NAME;
00158 break;
00159 }
00160 switch (ch)
00161 {
00162 case '(':
00163 m_tokens.Add(XP_LPAR);
00164 complete = true;
00165 break;
00166 case ')':
00167 m_tokens.Add(XP_RPAR);
00168 complete = true;
00169 break;
00170 case '[':
00171 m_tokens.Add(XP_LBRAC);
00172 complete = true;
00173 break;
00174 case ']':
00175 m_tokens.Add(XP_RBRAC);
00176 complete = true;
00177 break;
00178 case '.':
00179 m_state = XPS_DOT;
00180 break;
00181 case '@':
00182 m_tokens.Add(XP_AT);
00183 complete = true;
00184 break;
00185 case ',':
00186 m_tokens.Add(XP_COMMA);
00187 complete = true;
00188 break;
00189 case ':':
00190 m_state = XPS_COLON;
00191 break;
00192 case '\'':
00193 m_state = XPS_TICK;
00194 token.Clear();
00195 break;
00196 case '"':
00197 m_state = XPS_QUOTE;
00198 token.Clear();
00199 break;
00200 case '*':
00201 m_tokens.Add(XP_STAR);
00202 complete = true;
00203 break;
00204 case '/':
00205 m_state = XPS_SLASH;
00206 break;
00207 case '|':
00208 m_tokens.Add(XP_PIPE);
00209 complete = true;
00210 break;
00211 case '+':
00212 m_tokens.Add(XP_PLUS);
00213 complete = true;
00214 break;
00215 case '-':
00216 m_state = XPS_NEG;
00217 break;
00218 case '=':
00219 m_state = XPS_EQ;
00220 break;
00221 case '!':
00222 m_state = XPS_BANG;
00223 break;
00224 case '<':
00225 m_state = XPS_LT;
00226 break;
00227 case '>':
00228 m_state = XPS_GT;
00229 break;
00230 case '$':
00231 m_tokens.Add(XP_DOLLAR);
00232 complete = true;
00233 break;
00234 case ' ':
00235 case '\n':
00236 case '\r':
00237 case '\t':
00238 break;
00239
00240 default:
00241 throw new InvalidArgumentException("Invalid char '" + *Char::ToString(ch) + "' at position " + *Int32::ToString(x));
00242 }
00243 if (complete)
00244 {
00245 m_lexums.Add(token.ToString());
00246 m_lexCharPoses.Add(x - token.Length());
00247 token.SetLength(0);
00248 }
00249 break;
00250
00251 case XPS_NUMERIC:
00252
00253 if ('.' == ch)
00254 {
00255 token.Append(ch);
00256 m_state = XPS_FLOAT;
00257 break;
00258 }
00259 else if (Char::IsWhiteSpace(ch) || ispunct(ch) || x == text.Length() - 1)
00260 {
00261 if (x != text.Length() - 1)
00262 {
00263 x--;
00264 }
00265 else
00266 {
00267 token.Append(ch);
00268 }
00269 m_tokens.Add(XP_INT);
00270 m_lexums.Add(token.ToString());
00271 m_lexCharPoses.Add(x - token.Length());
00272 token.SetLength(0);
00273 m_state = XPS_START;
00274 break;
00275 }
00276 else if (Char::IsDigit(ch))
00277 {
00278 token.Append(ch);
00279 }
00280 else if ('e' == ch || 'E' == ch)
00281 {
00282 token.Append(ch);
00283 m_state = XPS_FLOAT_EXP;
00284 break;
00285 }
00286 else
00287 {
00288 throw new InvalidArgumentException("Invalid char '" + *Char::ToString(ch) + "' at position " + *Int32::ToString(x));
00289 }
00290 break;
00291
00292 case XPS_FLOAT:
00293 if (Char::IsWhiteSpace(ch) || ispunct(ch) || x == text.Length() - 1)
00294 {
00295 if (x != text.Length() - 1)
00296 {
00297 x--;
00298 }
00299 else
00300 {
00301 token.Append(ch);
00302 }
00303 m_tokens.Add(XP_FLOAT);
00304 m_lexums.Add(token.ToString());
00305 m_lexCharPoses.Add(x - token.Length());
00306 token.SetLength(0);
00307 m_state = XPS_START;
00308 break;
00309 }
00310 else if (Char::IsDigit(ch))
00311 {
00312 token.Append(ch);
00313 }
00314 else if ('e' == ch || 'E' == ch)
00315 {
00316 token.Append(ch);
00317 m_state = XPS_FLOAT_EXP;
00318 break;
00319 }
00320 else
00321 {
00322 throw new InvalidArgumentException("Invalid char '" + *Char::ToString(ch) + "' at position " + *Int32::ToString(x));
00323 }
00324 break;
00325
00326 case XPS_FLOAT_EXP:
00327 if (Char::IsDigit(ch))
00328 {
00329 token.Append(ch);
00330 }
00331 if (!Char::IsDigit(ch) || x == text.Length() - 1)
00332 {
00333 if (x != text.Length() - 1)
00334 {
00335 x--;
00336 }
00337 else
00338 {
00339 token.Append(ch);
00340 }
00341 m_tokens.Add(XP_FLOAT);
00342 m_lexums.Add(token.ToString());
00343 m_lexCharPoses.Add(x - token.Length());
00344 token.SetLength(0);
00345 m_state = XPS_START;
00346 break;
00347 }
00348 break;
00349
00350 case XPS_NAME:
00351 if ( !IsNameChar(ch) || x == text.Length() - 1)
00352 {
00353 if (!IsNameChar(ch))
00354 {
00355 x--;
00356 }
00357 else
00358 {
00359 token.Append(ch);
00360 }
00361 m_tokens.Add(XP_LITERAL);
00362 m_lexums.Add(token.ToString());
00363 m_lexCharPoses.Add(x - token.Length());
00364 token.SetLength(0);
00365 m_state = XPS_START;
00366 }
00367 else
00368 {
00369 token.Append(ch);
00370 }
00371 break;
00372
00373 case XPS_DOT:
00374
00375 if (Char::IsNumber(ch))
00376 {
00377 token.Append(ch);
00378 m_state = XPS_FLOAT;
00379 break;
00380 }
00381 if (!Char::IsNumber(ch) || x == text.Length() - 1)
00382 {
00383 TokenizeChar2(token, x, ch, '.', XP_DOTDOT, XP_DOT);
00384 }
00385 break;
00386
00387 case XPS_COLON:
00388
00389 TokenizeChar2(token, x, ch, ':', XP_COLONCOLON, XP_COLON);
00390 break;
00391
00392 case XPS_TICK:
00393
00394 TokenizeString(token, ch, '\'', x);
00395 break;
00396
00397 case XPS_QUOTE:
00398
00399 TokenizeString(token, ch, '"', x);
00400 break;
00401
00402 case XPS_SLASH:
00403
00404 TokenizeChar2(token, x, ch, '/', XP_SLASHSLASH, XP_SLASH);
00405 break;
00406
00407 case XPS_BANG:
00408
00409 if ('=' != ch)
00410 {
00411 throw new InvalidArgumentException("Invalid char '" + *Char::ToString(ch) + "' at position " + *Int32::ToString(x));
00412 }
00413 m_tokens.Add(XP_NEQ);
00414 token.Append(ch);
00415 m_lexums.Add(token.ToString());
00416 m_lexCharPoses.Add(x - token.Length());
00417 token.SetLength(0);
00418 m_state = XPS_START;
00419 break;
00420
00421 case XPS_LT:
00422
00423 TokenizeChar2(token, x, ch, '=', XP_LTEQ, XP_LT);
00424 break;
00425
00426 case XPS_GT:
00427
00428 TokenizeChar2(token, x, ch, '=', XP_GTEQ, XP_GT);
00429 break;
00430
00431 case XPS_EQ:
00432
00433 TokenizeChar2(token, x, ch, '=', XP_EQEQ, XP_EQ);
00434 break;
00435
00436 case XPS_NEG:
00437 if (Char::IsDigit(ch))
00438 {
00439 token.Append(ch);
00440 m_state = XPS_NUMERIC;
00441 }
00442 else
00443 {
00444 m_tokens.Add(XP_MIN);
00445 m_lexums.Add(token.ToString());
00446 m_lexCharPoses.Add(x - token.Length());
00447 token.SetLength(0);
00448 x--;
00449 m_state = XPS_START;
00450 }
00451 break;
00452
00453 default:
00454 throw new StateException("Internal error, invalid XPathLex state");
00455 }
00456 }
00457
00458 if (XPS_NUMERIC == m_state)
00459 {
00460 m_tokens.Add(XP_INT);
00461 m_lexums.Add(token.ToString());
00462 m_lexCharPoses.Add(text.Length());
00463 }
00464 else if (XPS_NAME == m_state)
00465 {
00466 m_tokens.Add(XP_STRING);
00467 m_lexums.Add(token.ToString());
00468 m_lexCharPoses.Add(text.Length());
00469 }
00470 else if (XPS_SLASH == m_state)
00471 {
00472 m_tokens.Add(XP_SLASH);
00473 m_lexums.Add(token.ToString());
00474 m_lexCharPoses.Add(text.Length());
00475 }
00476
00477 m_tokens.Add(XP_EOF);
00478 m_lexums.Add(StringPtr(new String()));
00479 }