00001 /************************************************* 00002 * Perl-Compatible Regular Expressions * 00003 *************************************************/ 00004 00005 /* PCRE is a library of functions to support regular expressions whose syntax 00006 and semantics are as close as possible to those of the Perl 5 language. 00007 00008 Written by Philip Hazel 00009 Copyright (c) 1997-2008 University of Cambridge 00010 00011 ----------------------------------------------------------------------------- 00012 Redistribution and use in source and binary forms, with or without 00013 modification, are permitted provided that the following conditions are met: 00014 00015 * Redistributions of source code must retain the above copyright notice, 00016 this list of conditions and the following disclaimer. 00017 00018 * Redistributions in binary form must reproduce the above copyright 00019 notice, this list of conditions and the following disclaimer in the 00020 documentation and/or other materials provided with the distribution. 00021 00022 * Neither the name of the University of Cambridge nor the names of its 00023 contributors may be used to endorse or promote products derived from 00024 this software without specific prior written permission. 00025 00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00036 POSSIBILITY OF SUCH DAMAGE. 00037 ----------------------------------------------------------------------------- 00038 */ 00039 00040 00041 /* This module contains the external function pcre_maketables(), which builds 00042 character tables for PCRE in the current locale. The file is compiled on its 00043 own as part of the PCRE library. However, it is also included in the 00044 compilation of dftables.c, in which case the macro DFTABLES is defined. */ 00045 00046 00047 #ifndef DFTABLES 00048 #ifdef HAVE_CONFIG_H 00049 #include "config.h" 00050 #else if defined(_WINDOWS) 00051 #include <spl/configwin32.h> 00052 #endif 00053 # include "pcre_internal.h" 00054 #endif 00055 00056 00057 /************************************************* 00058 * Create PCRE character tables * 00059 *************************************************/ 00060 00061 /* This function builds a set of character tables for use by PCRE and returns 00062 a pointer to them. They are build using the ctype functions, and consequently 00063 their contents will depend upon the current locale setting. When compiled as 00064 part of the library, the store is obtained via pcre_malloc(), but when compiled 00065 inside dftables, use malloc(). 00066 00067 Arguments: none 00068 Returns: pointer to the contiguous block of data 00069 */ 00070 00071 const unsigned char * 00072 pcre_maketables(void) 00073 { 00074 unsigned char *yield, *p; 00075 int i; 00076 00077 #ifndef DFTABLES 00078 yield = (unsigned char*)(pcre_malloc)(tables_length); 00079 #else 00080 yield = (unsigned char*)malloc(tables_length); 00081 #endif 00082 00083 if (yield == NULL) return NULL; 00084 p = yield; 00085 00086 /* First comes the lower casing table */ 00087 00088 for (i = 0; i < 256; i++) *p++ = tolower(i); 00089 00090 /* Next the case-flipping table */ 00091 00092 for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); 00093 00094 /* Then the character class tables. Don't try to be clever and save effort on 00095 exclusive ones - in some locales things may be different. Note that the table 00096 for "space" includes everything "isspace" gives, including VT in the default 00097 locale. This makes it work for the POSIX class [:space:]. Note also that it is 00098 possible for a character to be alnum or alpha without being lower or upper, 00099 such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at 00100 least under Debian Linux's locales as of 12/2005). So we must test for alnum 00101 specially. */ 00102 00103 memset(p, 0, cbit_length); 00104 for (i = 0; i < 256; i++) 00105 { 00106 if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7); 00107 if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7); 00108 if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7); 00109 if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7); 00110 if (i == '_') p[cbit_word + i/8] |= 1 << (i&7); 00111 if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7); 00112 if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7); 00113 if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7); 00114 if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7); 00115 if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7); 00116 if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7); 00117 } 00118 p += cbit_length; 00119 00120 /* Finally, the character type table. In this, we exclude VT from the white 00121 space chars, because Perl doesn't recognize it as such for \s and for comments 00122 within regexes. */ 00123 00124 for (i = 0; i < 256; i++) 00125 { 00126 int x = 0; 00127 if (i != 0x0b && isspace(i)) x += ctype_space; 00128 if (isalpha(i)) x += ctype_letter; 00129 if (isdigit(i)) x += ctype_digit; 00130 if (isxdigit(i)) x += ctype_xdigit; 00131 if (isalnum(i) || i == '_') x += ctype_word; 00132 00133 /* Note: strchr includes the terminating zero in the characters it considers. 00134 In this instance, that is ok because we want binary zero to be flagged as a 00135 meta-character, which in this sense is any character that terminates a run 00136 of data characters. */ 00137 00138 if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta; 00139 *p++ = x; 00140 } 00141 00142 return yield; 00143 } 00144 00145 /* End of pcre_maketables.c */