00001 /************************************************* 00002 * Perl-Compatible Regular Expressions * 00003 *************************************************/ 00004 00005 /* PCRE is a library of functions to support regular expressions whose syntax 00006 and semantics are as close as possible to those of the Perl 5 language. 00007 00008 Written by Philip Hazel 00009 Copyright (c) 1997-2008 University of Cambridge 00010 00011 ----------------------------------------------------------------------------- 00012 Redistribution and use in source and binary forms, with or without 00013 modification, are permitted provided that the following conditions are met: 00014 00015 * Redistributions of source code must retain the above copyright notice, 00016 this list of conditions and the following disclaimer. 00017 00018 * Redistributions in binary form must reproduce the above copyright 00019 notice, this list of conditions and the following disclaimer in the 00020 documentation and/or other materials provided with the distribution. 00021 00022 * Neither the name of the University of Cambridge nor the names of its 00023 contributors may be used to endorse or promote products derived from 00024 this software without specific prior written permission. 00025 00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00036 POSSIBILITY OF SUCH DAMAGE. 00037 ----------------------------------------------------------------------------- 00038 */ 00039 00040 00041 /* This module contains some convenience functions for extracting substrings 00042 from the subject string after a regex match has succeeded. The original idea 00043 for these functions came from Scott Wimer. */ 00044 00045 00046 #ifdef HAVE_CONFIG_H 00047 #include "config.h" 00048 #else if defined(_WINDOWS) 00049 #include <spl/configwin32.h> 00050 #endif 00051 00052 00053 #include "pcre_internal.h" 00054 00055 00056 /************************************************* 00057 * Find number for named string * 00058 *************************************************/ 00059 00060 /* This function is used by the get_first_set() function below, as well 00061 as being generally available. It assumes that names are unique. 00062 00063 Arguments: 00064 code the compiled regex 00065 stringname the name whose number is required 00066 00067 Returns: the number of the named parentheses, or a negative number 00068 (PCRE_ERROR_NOSUBSTRING) if not found 00069 */ 00070 00071 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00072 pcre_get_stringnumber(const pcre *code, const char *stringname) 00073 { 00074 int rc; 00075 int entrysize; 00076 int top, bot; 00077 uschar *nametable; 00078 00079 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) 00080 return rc; 00081 if (top <= 0) return PCRE_ERROR_NOSUBSTRING; 00082 00083 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) 00084 return rc; 00085 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) 00086 return rc; 00087 00088 bot = 0; 00089 while (top > bot) 00090 { 00091 int mid = (top + bot) / 2; 00092 uschar *entry = nametable + entrysize*mid; 00093 int c = strcmp(stringname, (char *)(entry + 2)); 00094 if (c == 0) return (entry[0] << 8) + entry[1]; 00095 if (c > 0) bot = mid + 1; else top = mid; 00096 } 00097 00098 return PCRE_ERROR_NOSUBSTRING; 00099 } 00100 00101 00102 00103 /************************************************* 00104 * Find (multiple) entries for named string * 00105 *************************************************/ 00106 00107 /* This is used by the get_first_set() function below, as well as being 00108 generally available. It is used when duplicated names are permitted. 00109 00110 Arguments: 00111 code the compiled regex 00112 stringname the name whose entries required 00113 firstptr where to put the pointer to the first entry 00114 lastptr where to put the pointer to the last entry 00115 00116 Returns: the length of each entry, or a negative number 00117 (PCRE_ERROR_NOSUBSTRING) if not found 00118 */ 00119 00120 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00121 pcre_get_stringtable_entries(const pcre *code, const char *stringname, 00122 char **firstptr, char **lastptr) 00123 { 00124 int rc; 00125 int entrysize; 00126 int top, bot; 00127 uschar *nametable, *lastentry; 00128 00129 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) 00130 return rc; 00131 if (top <= 0) return PCRE_ERROR_NOSUBSTRING; 00132 00133 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) 00134 return rc; 00135 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) 00136 return rc; 00137 00138 lastentry = nametable + entrysize * (top - 1); 00139 bot = 0; 00140 while (top > bot) 00141 { 00142 int mid = (top + bot) / 2; 00143 uschar *entry = nametable + entrysize*mid; 00144 int c = strcmp(stringname, (char *)(entry + 2)); 00145 if (c == 0) 00146 { 00147 uschar *first = entry; 00148 uschar *last = entry; 00149 while (first > nametable) 00150 { 00151 if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break; 00152 first -= entrysize; 00153 } 00154 while (last < lastentry) 00155 { 00156 if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break; 00157 last += entrysize; 00158 } 00159 *firstptr = (char *)first; 00160 *lastptr = (char *)last; 00161 return entrysize; 00162 } 00163 if (c > 0) bot = mid + 1; else top = mid; 00164 } 00165 00166 return PCRE_ERROR_NOSUBSTRING; 00167 } 00168 00169 00170 00171 /************************************************* 00172 * Find first set of multiple named strings * 00173 *************************************************/ 00174 00175 /* This function allows for duplicate names in the table of named substrings. 00176 It returns the number of the first one that was set in a pattern match. 00177 00178 Arguments: 00179 code the compiled regex 00180 stringname the name of the capturing substring 00181 ovector the vector of matched substrings 00182 00183 Returns: the number of the first that is set, 00184 or the number of the last one if none are set, 00185 or a negative number on error 00186 */ 00187 00188 static int 00189 get_first_set(const pcre *code, const char *stringname, int *ovector) 00190 { 00191 const real_pcre *re = (const real_pcre *)code; 00192 int entrysize; 00193 char *first, *last; 00194 uschar *entry; 00195 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) 00196 return pcre_get_stringnumber(code, stringname); 00197 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); 00198 if (entrysize <= 0) return entrysize; 00199 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize) 00200 { 00201 int n = (entry[0] << 8) + entry[1]; 00202 if (ovector[n*2] >= 0) return n; 00203 } 00204 return (first[0] << 8) + first[1]; 00205 } 00206 00207 00208 00209 00210 /************************************************* 00211 * Copy captured string to given buffer * 00212 *************************************************/ 00213 00214 /* This function copies a single captured substring into a given buffer. 00215 Note that we use memcpy() rather than strncpy() in case there are binary zeros 00216 in the string. 00217 00218 Arguments: 00219 subject the subject string that was matched 00220 ovector pointer to the offsets table 00221 stringcount the number of substrings that were captured 00222 (i.e. the yield of the pcre_exec call, unless 00223 that was zero, in which case it should be 1/3 00224 of the offset table size) 00225 stringnumber the number of the required substring 00226 buffer where to put the substring 00227 size the size of the buffer 00228 00229 Returns: if successful: 00230 the length of the copied string, not including the zero 00231 that is put on the end; can be zero 00232 if not successful: 00233 PCRE_ERROR_NOMEMORY (-6) buffer too small 00234 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 00235 */ 00236 00237 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00238 pcre_copy_substring(const char *subject, int *ovector, int stringcount, 00239 int stringnumber, char *buffer, int size) 00240 { 00241 int yield; 00242 if (stringnumber < 0 || stringnumber >= stringcount) 00243 return PCRE_ERROR_NOSUBSTRING; 00244 stringnumber *= 2; 00245 yield = ovector[stringnumber+1] - ovector[stringnumber]; 00246 if (size < yield + 1) return PCRE_ERROR_NOMEMORY; 00247 memcpy(buffer, subject + ovector[stringnumber], yield); 00248 buffer[yield] = 0; 00249 return yield; 00250 } 00251 00252 00253 00254 /************************************************* 00255 * Copy named captured string to given buffer * 00256 *************************************************/ 00257 00258 /* This function copies a single captured substring into a given buffer, 00259 identifying it by name. If the regex permits duplicate names, the first 00260 substring that is set is chosen. 00261 00262 Arguments: 00263 code the compiled regex 00264 subject the subject string that was matched 00265 ovector pointer to the offsets table 00266 stringcount the number of substrings that were captured 00267 (i.e. the yield of the pcre_exec call, unless 00268 that was zero, in which case it should be 1/3 00269 of the offset table size) 00270 stringname the name of the required substring 00271 buffer where to put the substring 00272 size the size of the buffer 00273 00274 Returns: if successful: 00275 the length of the copied string, not including the zero 00276 that is put on the end; can be zero 00277 if not successful: 00278 PCRE_ERROR_NOMEMORY (-6) buffer too small 00279 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 00280 */ 00281 00282 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00283 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector, 00284 int stringcount, const char *stringname, char *buffer, int size) 00285 { 00286 int n = get_first_set(code, stringname, ovector); 00287 if (n <= 0) return n; 00288 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); 00289 } 00290 00291 00292 00293 /************************************************* 00294 * Copy all captured strings to new store * 00295 *************************************************/ 00296 00297 /* This function gets one chunk of store and builds a list of pointers and all 00298 of the captured substrings in it. A NULL pointer is put on the end of the list. 00299 00300 Arguments: 00301 subject the subject string that was matched 00302 ovector pointer to the offsets table 00303 stringcount the number of substrings that were captured 00304 (i.e. the yield of the pcre_exec call, unless 00305 that was zero, in which case it should be 1/3 00306 of the offset table size) 00307 listptr set to point to the list of pointers 00308 00309 Returns: if successful: 0 00310 if not successful: 00311 PCRE_ERROR_NOMEMORY (-6) failed to get store 00312 */ 00313 00314 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00315 pcre_get_substring_list(const char *subject, int *ovector, int stringcount, 00316 const char ***listptr) 00317 { 00318 int i; 00319 int size = sizeof(char *); 00320 int double_count = stringcount * 2; 00321 char **stringlist; 00322 char *p; 00323 00324 for (i = 0; i < double_count; i += 2) 00325 size += sizeof(char *) + ovector[i+1] - ovector[i] + 1; 00326 00327 stringlist = (char **)(pcre_malloc)(size); 00328 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; 00329 00330 *listptr = (const char **)stringlist; 00331 p = (char *)(stringlist + stringcount + 1); 00332 00333 for (i = 0; i < double_count; i += 2) 00334 { 00335 int len = ovector[i+1] - ovector[i]; 00336 memcpy(p, subject + ovector[i], len); 00337 *stringlist++ = p; 00338 p += len; 00339 *p++ = 0; 00340 } 00341 00342 *stringlist = NULL; 00343 return 0; 00344 } 00345 00346 00347 00348 /************************************************* 00349 * Free store obtained by get_substring_list * 00350 *************************************************/ 00351 00352 /* This function exists for the benefit of people calling PCRE from non-C 00353 programs that can call its functions, but not free() or (pcre_free)() directly. 00354 00355 Argument: the result of a previous pcre_get_substring_list() 00356 Returns: nothing 00357 */ 00358 00359 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION 00360 pcre_free_substring_list(const char **pointer) 00361 { 00362 (pcre_free)((void *)pointer); 00363 } 00364 00365 00366 00367 /************************************************* 00368 * Copy captured string to new store * 00369 *************************************************/ 00370 00371 /* This function copies a single captured substring into a piece of new 00372 store 00373 00374 Arguments: 00375 subject the subject string that was matched 00376 ovector pointer to the offsets table 00377 stringcount the number of substrings that were captured 00378 (i.e. the yield of the pcre_exec call, unless 00379 that was zero, in which case it should be 1/3 00380 of the offset table size) 00381 stringnumber the number of the required substring 00382 stringptr where to put a pointer to the substring 00383 00384 Returns: if successful: 00385 the length of the string, not including the zero that 00386 is put on the end; can be zero 00387 if not successful: 00388 PCRE_ERROR_NOMEMORY (-6) failed to get store 00389 PCRE_ERROR_NOSUBSTRING (-7) substring not present 00390 */ 00391 00392 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00393 pcre_get_substring(const char *subject, int *ovector, int stringcount, 00394 int stringnumber, const char **stringptr) 00395 { 00396 int yield; 00397 char *substring; 00398 if (stringnumber < 0 || stringnumber >= stringcount) 00399 return PCRE_ERROR_NOSUBSTRING; 00400 stringnumber *= 2; 00401 yield = ovector[stringnumber+1] - ovector[stringnumber]; 00402 substring = (char *)(pcre_malloc)(yield + 1); 00403 if (substring == NULL) return PCRE_ERROR_NOMEMORY; 00404 memcpy(substring, subject + ovector[stringnumber], yield); 00405 substring[yield] = 0; 00406 *stringptr = substring; 00407 return yield; 00408 } 00409 00410 00411 00412 /************************************************* 00413 * Copy named captured string to new store * 00414 *************************************************/ 00415 00416 /* This function copies a single captured substring, identified by name, into 00417 new store. If the regex permits duplicate names, the first substring that is 00418 set is chosen. 00419 00420 Arguments: 00421 code the compiled regex 00422 subject the subject string that was matched 00423 ovector pointer to the offsets table 00424 stringcount the number of substrings that were captured 00425 (i.e. the yield of the pcre_exec call, unless 00426 that was zero, in which case it should be 1/3 00427 of the offset table size) 00428 stringname the name of the required substring 00429 stringptr where to put the pointer 00430 00431 Returns: if successful: 00432 the length of the copied string, not including the zero 00433 that is put on the end; can be zero 00434 if not successful: 00435 PCRE_ERROR_NOMEMORY (-6) couldn't get memory 00436 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 00437 */ 00438 00439 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00440 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector, 00441 int stringcount, const char *stringname, const char **stringptr) 00442 { 00443 int n = get_first_set(code, stringname, ovector); 00444 if (n <= 0) return n; 00445 return pcre_get_substring(subject, ovector, stringcount, n, stringptr); 00446 } 00447 00448 00449 00450 00451 /************************************************* 00452 * Free store obtained by get_substring * 00453 *************************************************/ 00454 00455 /* This function exists for the benefit of people calling PCRE from non-C 00456 programs that can call its functions, but not free() or (pcre_free)() directly. 00457 00458 Argument: the result of a previous pcre_get_substring() 00459 Returns: nothing 00460 */ 00461 00462 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION 00463 pcre_free_substring(const char *pointer) 00464 { 00465 (pcre_free)((void *)pointer); 00466 } 00467 00468 /* End of pcre_get.c */