• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

src/xpath/XPathLex.cpp

00001 /*
00002  *   This file is part of the Standard Portable Library (SPL).
00003  *
00004  *   SPL is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   SPL is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with SPL.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 #include <spl/Char.h>
00018 #include <spl/Exception.h>
00019 #include <spl/Int32.h>
00020 #include <spl/text/StringBuffer.h>
00021 #include <spl/xml/xpath/private/XPathLex.h>
00022 
00023 XPathLex::XPathLex()
00024 :       m_tokens(), m_lexums(), m_pos(0), m_state(XPS_START), m_lexCharPoses()
00025 {
00026 }
00027 
00028 XPathLex::XPathLex(const XPathLex& lex)
00029 :       m_tokens(lex.m_tokens), m_lexums(lex.m_lexums), m_pos(lex.m_pos), m_state(lex.m_state), m_lexCharPoses(lex.m_lexCharPoses)
00030 {
00031 }
00032 
00033 XPathLex::~XPathLex()
00034 {
00035 }
00036 
00037 XPathLex& XPathLex::operator =(const XPathLex& lex)
00038 {
00039         m_tokens = lex.m_tokens;
00040         m_lexums = lex.m_lexums;
00041         m_pos = lex.m_pos;
00042         m_state = lex.m_state;
00043         m_lexCharPoses = lex.m_lexCharPoses;
00044 
00045         return *this;
00046 }
00047 
00048 XPathLex::XPathLexToken XPathLex::Match(XPathLex::XPathLexToken token)
00049 {
00050         if (! HasMoreTokens())
00051         {
00052                 throw new IndexOutOfBoundsException("No more tokens");
00053         }
00054         if (token != m_tokens[m_pos++])
00055         {
00056                 throw new InvalidArgumentException("Match failure");
00057         }
00058         if (! HasMoreTokens())
00059         {
00060                 return XP_EOF;
00061         }
00062         return m_tokens[m_pos];
00063 }
00064 
00065 XPathLex::XPathLexToken XPathLex::GetRelativeToken(int distance) const
00066 {
00067         distance += m_pos;
00068         if (distance >= m_tokens.Count() || 0 > distance)
00069         {
00070                 return XP_EOF;
00071         }
00072         return m_tokens[distance];
00073 }
00074 
00075 #if defined(DEBUG) || defined(_DEBUG)
00076 void XPathLex::CheckMem() const
00077 {
00078         m_lexums.CheckMem();
00079         m_tokens.CheckMem();
00080         m_lexCharPoses.CheckMem();
00081 }
00082 
00083 void XPathLex::ValidateMem() const
00084 {
00085         m_lexums.ValidateMem();
00086         m_tokens.ValidateMem();
00087         m_lexCharPoses.ValidateMem();
00088 }
00089 #endif
00090 
00091 void XPathLex::TokenizeString(StringBuffer& token, char ch, char termchar, int pos)
00092 {
00093         if (ch == termchar)
00094         {
00095                 m_tokens.Add(XP_STRING);
00096                 m_lexums.Add(token.ToString());
00097                 m_lexCharPoses.Add(pos - token.Length());
00098                 token.SetLength(0);
00099                 m_state = XPS_START;
00100         }
00101         else
00102         {
00103                 token.Append(ch);
00104         }
00105 }
00106 
00107 void XPathLex::TokenizeChar2(StringBuffer& token, int& pos, char ch, char chChar2, XPathLexToken char2Token, XPathLexToken char1Token)
00108 {
00109         if (chChar2 == ch)
00110         {
00111                 m_tokens.Add(char2Token);
00112                 token.Append(ch);
00113                 m_lexums.Add(token.ToString());
00114                 m_lexCharPoses.Add(pos - token.Length());
00115         }
00116         else
00117         {
00118                 m_tokens.Add(char1Token);
00119                 m_lexums.Add(token.ToString());
00120                 pos--;
00121                 m_lexCharPoses.Add(pos - token.Length());
00122         }
00123         token.SetLength(0);
00124         m_state = XPS_START;
00125 }
00126 
00127 void XPathLex::Tokenize(const String& text)
00128 {
00129         m_tokens.Clear();
00130         m_lexums.Clear();
00131         m_lexCharPoses.Clear();
00132         m_state = XPS_START;
00133         m_pos = 0;
00134 
00135         StringBuffer token;
00136 
00137         for ( int x = 0; x < text.Length(); x++ )
00138         {
00139                 char ch = text.CharAt(x);
00140                 bool complete = false;
00141 
00142                 switch( m_state )
00143                 {
00144                 case XPS_START:
00145                         if (Char::IsWhiteSpace(ch))
00146                         {
00147                                 break;
00148                         }
00149                         token.Append(ch);
00150                         if (Char::IsDigit(ch))
00151                         {
00152                                 m_state = XPS_NUMERIC;
00153                                 break;
00154                         }
00155                         else if (Char::IsLetter(ch) || '_' == ch)
00156                         {
00157                                 m_state = XPS_NAME;
00158                                 break;
00159                         }
00160                         switch (ch)
00161                         {
00162                         case '(':
00163                                 m_tokens.Add(XP_LPAR);
00164                                 complete = true;
00165                                 break;
00166                         case ')':
00167                                 m_tokens.Add(XP_RPAR);
00168                                 complete = true;
00169                                 break;
00170                         case '[':
00171                                 m_tokens.Add(XP_LBRAC);
00172                                 complete = true;
00173                                 break;
00174                         case ']':
00175                                 m_tokens.Add(XP_RBRAC);
00176                                 complete = true;
00177                                 break;
00178                         case '.':
00179                                 m_state = XPS_DOT;
00180                                 break;
00181                         case '@':
00182                                 m_tokens.Add(XP_AT);
00183                                 complete = true;
00184                                 break;
00185                         case ',':
00186                                 m_tokens.Add(XP_COMMA);
00187                                 complete = true;
00188                                 break;
00189                         case ':':
00190                                 m_state = XPS_COLON;
00191                                 break;
00192                         case '\'':
00193                                 m_state = XPS_TICK;
00194                                 token.Clear();
00195                                 break;
00196                         case '"':
00197                                 m_state = XPS_QUOTE;
00198                                 token.Clear();
00199                                 break;
00200                         case '*':
00201                                 m_tokens.Add(XP_STAR);
00202                                 complete = true;
00203                                 break;
00204                         case '/':
00205                                 m_state = XPS_SLASH;
00206                                 break;
00207                         case '|':
00208                                 m_tokens.Add(XP_PIPE);
00209                                 complete = true;
00210                                 break;
00211                         case '+':
00212                                 m_tokens.Add(XP_PLUS);
00213                                 complete = true;
00214                                 break;
00215                         case '-':
00216                                 m_state = XPS_NEG;
00217                                 break;
00218                         case '=':
00219                                 m_state = XPS_EQ;
00220                                 break;
00221                         case '!':
00222                                 m_state = XPS_BANG;
00223                                 break;
00224                         case '<':
00225                                 m_state = XPS_LT;
00226                                 break;
00227                         case '>':
00228                                 m_state = XPS_GT;
00229                                 break;
00230                         case '$':
00231                                 m_tokens.Add(XP_DOLLAR);
00232                                 complete = true;
00233                                 break;
00234                         case ' ':
00235                         case '\n':
00236                         case '\r':
00237                         case '\t':
00238                                 break;
00239 
00240                         default:
00241                                 throw new InvalidArgumentException("Invalid char '" + *Char::ToString(ch) + "' at position " + *Int32::ToString(x));
00242                         }
00243                         if (complete)
00244                         {
00245                                 m_lexums.Add(token.ToString());
00246                                 m_lexCharPoses.Add(x - token.Length());
00247                                 token.SetLength(0);
00248                         }
00249                         break;
00250 
00251                 case XPS_NUMERIC:
00252                         // int or float
00253                         if ('.' == ch)
00254                         {
00255                                 token.Append(ch);
00256                                 m_state = XPS_FLOAT;
00257                                 break;
00258                         }
00259                         else if (Char::IsWhiteSpace(ch) || ispunct(ch) || x == text.Length() - 1)
00260                         {
00261                                 if (x != text.Length() - 1)
00262                                 {
00263                                         x--;
00264                                 }
00265                                 else
00266                                 {
00267                                         token.Append(ch);
00268                                 }
00269                                 m_tokens.Add(XP_INT);
00270                                 m_lexums.Add(token.ToString());
00271                                 m_lexCharPoses.Add(x - token.Length());
00272                                 token.SetLength(0);
00273                                 m_state = XPS_START;
00274                                 break;
00275                         }
00276                         else if (Char::IsDigit(ch))
00277                         {
00278                                 token.Append(ch);
00279                         }
00280                         else if ('e' == ch || 'E' == ch)
00281                         {
00282                                 token.Append(ch);
00283                                 m_state = XPS_FLOAT_EXP;
00284                                 break;
00285                         }
00286                         else
00287                         {
00288                                 throw new InvalidArgumentException("Invalid char '" + *Char::ToString(ch) + "' at position " + *Int32::ToString(x));
00289                         }
00290                         break;
00291 
00292                 case XPS_FLOAT:
00293                         if (Char::IsWhiteSpace(ch) || ispunct(ch) || x == text.Length() - 1)
00294                         {
00295                                 if (x != text.Length() - 1)
00296                                 {
00297                                         x--;
00298                                 }
00299                                 else
00300                                 {
00301                                         token.Append(ch);
00302                                 }
00303                                 m_tokens.Add(XP_FLOAT);
00304                                 m_lexums.Add(token.ToString());
00305                                 m_lexCharPoses.Add(x - token.Length());
00306                                 token.SetLength(0);
00307                                 m_state = XPS_START;
00308                                 break;
00309                         }
00310                         else if (Char::IsDigit(ch))
00311                         {
00312                                 token.Append(ch);
00313                         }
00314                         else if ('e' == ch || 'E' == ch)
00315                         {
00316                                 token.Append(ch);
00317                                 m_state = XPS_FLOAT_EXP;
00318                                 break;
00319                         }
00320                         else
00321                         {
00322                                 throw new InvalidArgumentException("Invalid char '" + *Char::ToString(ch) + "' at position " + *Int32::ToString(x));
00323                         }
00324                         break;
00325 
00326                 case XPS_FLOAT_EXP:
00327                         if (Char::IsDigit(ch))
00328                         {
00329                                 token.Append(ch);
00330                         }
00331                         if (!Char::IsDigit(ch) || x == text.Length() - 1)
00332                         {
00333                                 if (x != text.Length() - 1)
00334                                 {
00335                                         x--;
00336                                 }
00337                                 else
00338                                 {
00339                                         token.Append(ch);
00340                                 }
00341                                 m_tokens.Add(XP_FLOAT);
00342                                 m_lexums.Add(token.ToString());
00343                                 m_lexCharPoses.Add(x - token.Length());
00344                                 token.SetLength(0);
00345                                 m_state = XPS_START;
00346                                 break;
00347                         }                       
00348                         break;
00349 
00350                 case XPS_NAME:
00351                         if ( !IsNameChar(ch) || x == text.Length() - 1)
00352                         {
00353                                 if (!IsNameChar(ch))
00354                                 {
00355                                         x--;
00356                                 }
00357                                 else
00358                                 {
00359                                         token.Append(ch);
00360                                 }
00361                                 m_tokens.Add(XP_LITERAL);
00362                                 m_lexums.Add(token.ToString());
00363                                 m_lexCharPoses.Add(x - token.Length());
00364                                 token.SetLength(0);
00365                                 m_state = XPS_START;
00366                         }
00367                         else
00368                         {
00369                                 token.Append(ch);
00370                         }
00371                         break;
00372 
00373                 case XPS_DOT:
00374                         // dot or dotdot or number
00375                         if (Char::IsNumber(ch))
00376                         {
00377                                 token.Append(ch);
00378                                 m_state = XPS_FLOAT;
00379                                 break;
00380                         }
00381                         if (!Char::IsNumber(ch) || x == text.Length() - 1)
00382                         {
00383                                 TokenizeChar2(token, x, ch, '.', XP_DOTDOT, XP_DOT);
00384                         }
00385                         break;
00386 
00387                 case XPS_COLON:
00388                         // colon or colon colon
00389                         TokenizeChar2(token, x, ch, ':', XP_COLONCOLON, XP_COLON);
00390                         break;
00391 
00392                 case XPS_TICK:
00393                         // string
00394                         TokenizeString(token, ch, '\'', x);
00395                         break;
00396 
00397                 case XPS_QUOTE:
00398                         // string
00399                         TokenizeString(token, ch, '"', x);
00400                         break;
00401 
00402                 case XPS_SLASH:
00403                         // slash or slashslash
00404                         TokenizeChar2(token, x, ch, '/', XP_SLASHSLASH, XP_SLASH);
00405                         break;
00406                 
00407                 case XPS_BANG:
00408                         // !=
00409                         if ('=' != ch)
00410                         {
00411                                 throw new InvalidArgumentException("Invalid char '" + *Char::ToString(ch) + "' at position " + *Int32::ToString(x));
00412                         }
00413                         m_tokens.Add(XP_NEQ);
00414                         token.Append(ch);
00415                         m_lexums.Add(token.ToString());
00416                         m_lexCharPoses.Add(x - token.Length());
00417                         token.SetLength(0);
00418                         m_state = XPS_START;                    
00419                         break;
00420 
00421                 case XPS_LT:
00422                         // < or <=
00423                         TokenizeChar2(token, x, ch, '=', XP_LTEQ, XP_LT);
00424                         break;
00425 
00426                 case XPS_GT:
00427                         // > or >=
00428                         TokenizeChar2(token, x, ch, '=', XP_GTEQ, XP_GT);
00429                         break;
00430         
00431                 case XPS_EQ:
00432                         // = or ==
00433                         TokenizeChar2(token, x, ch, '=', XP_EQEQ, XP_EQ);
00434                         break;
00435 
00436                 case XPS_NEG:
00437                         if (Char::IsDigit(ch))
00438                         {
00439                                 token.Append(ch);
00440                                 m_state = XPS_NUMERIC;
00441                         }
00442                         else
00443                         {
00444                                 m_tokens.Add(XP_MIN);
00445                                 m_lexums.Add(token.ToString());
00446                                 m_lexCharPoses.Add(x - token.Length());
00447                                 token.SetLength(0);
00448                                 x--;
00449                                 m_state = XPS_START;
00450                         }
00451                         break;
00452 
00453                 default:
00454                         throw new StateException("Internal error, invalid XPathLex state");
00455                 }
00456         }
00457         
00458         if (XPS_NUMERIC == m_state)
00459         {
00460                 m_tokens.Add(XP_INT);
00461                 m_lexums.Add(token.ToString());
00462                 m_lexCharPoses.Add(text.Length());
00463         }
00464         else if (XPS_NAME == m_state)
00465         {
00466                 m_tokens.Add(XP_STRING);
00467                 m_lexums.Add(token.ToString());
00468                 m_lexCharPoses.Add(text.Length());
00469         }
00470         else if (XPS_SLASH == m_state)
00471         {
00472                 m_tokens.Add(XP_SLASH);
00473                 m_lexums.Add(token.ToString());
00474                 m_lexCharPoses.Add(text.Length());
00475         }
00476 
00477         m_tokens.Add(XP_EOF);
00478         m_lexums.Add(StringPtr(new String()));
00479 }