• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

src/pcre/pcre_get.c

00001 /*************************************************
00002 *      Perl-Compatible Regular Expressions       *
00003 *************************************************/
00004 
00005 /* PCRE is a library of functions to support regular expressions whose syntax
00006 and semantics are as close as possible to those of the Perl 5 language.
00007 
00008                        Written by Philip Hazel
00009            Copyright (c) 1997-2008 University of Cambridge
00010 
00011 -----------------------------------------------------------------------------
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014 
00015     * Redistributions of source code must retain the above copyright notice,
00016       this list of conditions and the following disclaimer.
00017 
00018     * Redistributions in binary form must reproduce the above copyright
00019       notice, this list of conditions and the following disclaimer in the
00020       documentation and/or other materials provided with the distribution.
00021 
00022     * Neither the name of the University of Cambridge nor the names of its
00023       contributors may be used to endorse or promote products derived from
00024       this software without specific prior written permission.
00025 
00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00036 POSSIBILITY OF SUCH DAMAGE.
00037 -----------------------------------------------------------------------------
00038 */
00039 
00040 
00041 /* This module contains some convenience functions for extracting substrings
00042 from the subject string after a regex match has succeeded. The original idea
00043 for these functions came from Scott Wimer. */
00044 
00045 
00046 #ifdef HAVE_CONFIG_H
00047 #include "config.h"
00048 #else if defined(_WINDOWS)
00049 #include <spl/configwin32.h>
00050 #endif
00051 
00052 
00053 #include "pcre_internal.h"
00054 
00055 
00056 /*************************************************
00057 *           Find number for named string         *
00058 *************************************************/
00059 
00060 /* This function is used by the get_first_set() function below, as well
00061 as being generally available. It assumes that names are unique.
00062 
00063 Arguments:
00064   code        the compiled regex
00065   stringname  the name whose number is required
00066 
00067 Returns:      the number of the named parentheses, or a negative number
00068                 (PCRE_ERROR_NOSUBSTRING) if not found
00069 */
00070 
00071 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00072 pcre_get_stringnumber(const pcre *code, const char *stringname)
00073 {
00074 int rc;
00075 int entrysize;
00076 int top, bot;
00077 uschar *nametable;
00078 
00079 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
00080   return rc;
00081 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
00082 
00083 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
00084   return rc;
00085 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
00086   return rc;
00087 
00088 bot = 0;
00089 while (top > bot)
00090   {
00091   int mid = (top + bot) / 2;
00092   uschar *entry = nametable + entrysize*mid;
00093   int c = strcmp(stringname, (char *)(entry + 2));
00094   if (c == 0) return (entry[0] << 8) + entry[1];
00095   if (c > 0) bot = mid + 1; else top = mid;
00096   }
00097 
00098 return PCRE_ERROR_NOSUBSTRING;
00099 }
00100 
00101 
00102 
00103 /*************************************************
00104 *     Find (multiple) entries for named string   *
00105 *************************************************/
00106 
00107 /* This is used by the get_first_set() function below, as well as being
00108 generally available. It is used when duplicated names are permitted.
00109 
00110 Arguments:
00111   code        the compiled regex
00112   stringname  the name whose entries required
00113   firstptr    where to put the pointer to the first entry
00114   lastptr     where to put the pointer to the last entry
00115 
00116 Returns:      the length of each entry, or a negative number
00117                 (PCRE_ERROR_NOSUBSTRING) if not found
00118 */
00119 
00120 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00121 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
00122   char **firstptr, char **lastptr)
00123 {
00124 int rc;
00125 int entrysize;
00126 int top, bot;
00127 uschar *nametable, *lastentry;
00128 
00129 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
00130   return rc;
00131 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
00132 
00133 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
00134   return rc;
00135 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
00136   return rc;
00137 
00138 lastentry = nametable + entrysize * (top - 1);
00139 bot = 0;
00140 while (top > bot)
00141   {
00142   int mid = (top + bot) / 2;
00143   uschar *entry = nametable + entrysize*mid;
00144   int c = strcmp(stringname, (char *)(entry + 2));
00145   if (c == 0)
00146     {
00147     uschar *first = entry;
00148     uschar *last = entry;
00149     while (first > nametable)
00150       {
00151       if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
00152       first -= entrysize;
00153       }
00154     while (last < lastentry)
00155       {
00156       if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
00157       last += entrysize;
00158       }
00159     *firstptr = (char *)first;
00160     *lastptr = (char *)last;
00161     return entrysize;
00162     }
00163   if (c > 0) bot = mid + 1; else top = mid;
00164   }
00165 
00166 return PCRE_ERROR_NOSUBSTRING;
00167 }
00168 
00169 
00170 
00171 /*************************************************
00172 *    Find first set of multiple named strings    *
00173 *************************************************/
00174 
00175 /* This function allows for duplicate names in the table of named substrings.
00176 It returns the number of the first one that was set in a pattern match.
00177 
00178 Arguments:
00179   code         the compiled regex
00180   stringname   the name of the capturing substring
00181   ovector      the vector of matched substrings
00182 
00183 Returns:       the number of the first that is set,
00184                or the number of the last one if none are set,
00185                or a negative number on error
00186 */
00187 
00188 static int
00189 get_first_set(const pcre *code, const char *stringname, int *ovector)
00190 {
00191 const real_pcre *re = (const real_pcre *)code;
00192 int entrysize;
00193 char *first, *last;
00194 uschar *entry;
00195 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
00196   return pcre_get_stringnumber(code, stringname);
00197 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
00198 if (entrysize <= 0) return entrysize;
00199 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
00200   {
00201   int n = (entry[0] << 8) + entry[1];
00202   if (ovector[n*2] >= 0) return n;
00203   }
00204 return (first[0] << 8) + first[1];
00205 }
00206 
00207 
00208 
00209 
00210 /*************************************************
00211 *      Copy captured string to given buffer      *
00212 *************************************************/
00213 
00214 /* This function copies a single captured substring into a given buffer.
00215 Note that we use memcpy() rather than strncpy() in case there are binary zeros
00216 in the string.
00217 
00218 Arguments:
00219   subject        the subject string that was matched
00220   ovector        pointer to the offsets table
00221   stringcount    the number of substrings that were captured
00222                    (i.e. the yield of the pcre_exec call, unless
00223                    that was zero, in which case it should be 1/3
00224                    of the offset table size)
00225   stringnumber   the number of the required substring
00226   buffer         where to put the substring
00227   size           the size of the buffer
00228 
00229 Returns:         if successful:
00230                    the length of the copied string, not including the zero
00231                    that is put on the end; can be zero
00232                  if not successful:
00233                    PCRE_ERROR_NOMEMORY (-6) buffer too small
00234                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
00235 */
00236 
00237 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00238 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
00239   int stringnumber, char *buffer, int size)
00240 {
00241 int yield;
00242 if (stringnumber < 0 || stringnumber >= stringcount)
00243   return PCRE_ERROR_NOSUBSTRING;
00244 stringnumber *= 2;
00245 yield = ovector[stringnumber+1] - ovector[stringnumber];
00246 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
00247 memcpy(buffer, subject + ovector[stringnumber], yield);
00248 buffer[yield] = 0;
00249 return yield;
00250 }
00251 
00252 
00253 
00254 /*************************************************
00255 *   Copy named captured string to given buffer   *
00256 *************************************************/
00257 
00258 /* This function copies a single captured substring into a given buffer,
00259 identifying it by name. If the regex permits duplicate names, the first
00260 substring that is set is chosen.
00261 
00262 Arguments:
00263   code           the compiled regex
00264   subject        the subject string that was matched
00265   ovector        pointer to the offsets table
00266   stringcount    the number of substrings that were captured
00267                    (i.e. the yield of the pcre_exec call, unless
00268                    that was zero, in which case it should be 1/3
00269                    of the offset table size)
00270   stringname     the name of the required substring
00271   buffer         where to put the substring
00272   size           the size of the buffer
00273 
00274 Returns:         if successful:
00275                    the length of the copied string, not including the zero
00276                    that is put on the end; can be zero
00277                  if not successful:
00278                    PCRE_ERROR_NOMEMORY (-6) buffer too small
00279                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
00280 */
00281 
00282 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00283 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
00284   int stringcount, const char *stringname, char *buffer, int size)
00285 {
00286 int n = get_first_set(code, stringname, ovector);
00287 if (n <= 0) return n;
00288 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
00289 }
00290 
00291 
00292 
00293 /*************************************************
00294 *      Copy all captured strings to new store    *
00295 *************************************************/
00296 
00297 /* This function gets one chunk of store and builds a list of pointers and all
00298 of the captured substrings in it. A NULL pointer is put on the end of the list.
00299 
00300 Arguments:
00301   subject        the subject string that was matched
00302   ovector        pointer to the offsets table
00303   stringcount    the number of substrings that were captured
00304                    (i.e. the yield of the pcre_exec call, unless
00305                    that was zero, in which case it should be 1/3
00306                    of the offset table size)
00307   listptr        set to point to the list of pointers
00308 
00309 Returns:         if successful: 0
00310                  if not successful:
00311                    PCRE_ERROR_NOMEMORY (-6) failed to get store
00312 */
00313 
00314 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00315 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
00316   const char ***listptr)
00317 {
00318 int i;
00319 int size = sizeof(char *);
00320 int double_count = stringcount * 2;
00321 char **stringlist;
00322 char *p;
00323 
00324 for (i = 0; i < double_count; i += 2)
00325   size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
00326 
00327 stringlist = (char **)(pcre_malloc)(size);
00328 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
00329 
00330 *listptr = (const char **)stringlist;
00331 p = (char *)(stringlist + stringcount + 1);
00332 
00333 for (i = 0; i < double_count; i += 2)
00334   {
00335   int len = ovector[i+1] - ovector[i];
00336   memcpy(p, subject + ovector[i], len);
00337   *stringlist++ = p;
00338   p += len;
00339   *p++ = 0;
00340   }
00341 
00342 *stringlist = NULL;
00343 return 0;
00344 }
00345 
00346 
00347 
00348 /*************************************************
00349 *   Free store obtained by get_substring_list    *
00350 *************************************************/
00351 
00352 /* This function exists for the benefit of people calling PCRE from non-C
00353 programs that can call its functions, but not free() or (pcre_free)() directly.
00354 
00355 Argument:   the result of a previous pcre_get_substring_list()
00356 Returns:    nothing
00357 */
00358 
00359 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
00360 pcre_free_substring_list(const char **pointer)
00361 {
00362 (pcre_free)((void *)pointer);
00363 }
00364 
00365 
00366 
00367 /*************************************************
00368 *      Copy captured string to new store         *
00369 *************************************************/
00370 
00371 /* This function copies a single captured substring into a piece of new
00372 store
00373 
00374 Arguments:
00375   subject        the subject string that was matched
00376   ovector        pointer to the offsets table
00377   stringcount    the number of substrings that were captured
00378                    (i.e. the yield of the pcre_exec call, unless
00379                    that was zero, in which case it should be 1/3
00380                    of the offset table size)
00381   stringnumber   the number of the required substring
00382   stringptr      where to put a pointer to the substring
00383 
00384 Returns:         if successful:
00385                    the length of the string, not including the zero that
00386                    is put on the end; can be zero
00387                  if not successful:
00388                    PCRE_ERROR_NOMEMORY (-6) failed to get store
00389                    PCRE_ERROR_NOSUBSTRING (-7) substring not present
00390 */
00391 
00392 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00393 pcre_get_substring(const char *subject, int *ovector, int stringcount,
00394   int stringnumber, const char **stringptr)
00395 {
00396 int yield;
00397 char *substring;
00398 if (stringnumber < 0 || stringnumber >= stringcount)
00399   return PCRE_ERROR_NOSUBSTRING;
00400 stringnumber *= 2;
00401 yield = ovector[stringnumber+1] - ovector[stringnumber];
00402 substring = (char *)(pcre_malloc)(yield + 1);
00403 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
00404 memcpy(substring, subject + ovector[stringnumber], yield);
00405 substring[yield] = 0;
00406 *stringptr = substring;
00407 return yield;
00408 }
00409 
00410 
00411 
00412 /*************************************************
00413 *   Copy named captured string to new store      *
00414 *************************************************/
00415 
00416 /* This function copies a single captured substring, identified by name, into
00417 new store. If the regex permits duplicate names, the first substring that is
00418 set is chosen.
00419 
00420 Arguments:
00421   code           the compiled regex
00422   subject        the subject string that was matched
00423   ovector        pointer to the offsets table
00424   stringcount    the number of substrings that were captured
00425                    (i.e. the yield of the pcre_exec call, unless
00426                    that was zero, in which case it should be 1/3
00427                    of the offset table size)
00428   stringname     the name of the required substring
00429   stringptr      where to put the pointer
00430 
00431 Returns:         if successful:
00432                    the length of the copied string, not including the zero
00433                    that is put on the end; can be zero
00434                  if not successful:
00435                    PCRE_ERROR_NOMEMORY (-6) couldn't get memory
00436                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
00437 */
00438 
00439 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00440 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
00441   int stringcount, const char *stringname, const char **stringptr)
00442 {
00443 int n = get_first_set(code, stringname, ovector);
00444 if (n <= 0) return n;
00445 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
00446 }
00447 
00448 
00449 
00450 
00451 /*************************************************
00452 *       Free store obtained by get_substring     *
00453 *************************************************/
00454 
00455 /* This function exists for the benefit of people calling PCRE from non-C
00456 programs that can call its functions, but not free() or (pcre_free)() directly.
00457 
00458 Argument:   the result of a previous pcre_get_substring()
00459 Returns:    nothing
00460 */
00461 
00462 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
00463 pcre_free_substring(const char *pointer)
00464 {
00465 (pcre_free)((void *)pointer);
00466 }
00467 
00468 /* End of pcre_get.c */