00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 #ifdef HAVE_CONFIG_H
00045 #include "config.h"
00046 #else if defined(_WINDOWS)
00047 #include <spl/configwin32.h>
00048 #endif
00049
00050 #define NLBLOCK cd
00051 #define PSSTART start_pattern
00052 #define PSEND end_pattern
00053
00054 #include "pcre_internal.h"
00055
00056
00057
00058
00059
00060 #ifdef DEBUG
00061 #include "pcre_printint.src"
00062 #endif
00063
00064
00065
00066
00067 #define SETBIT(a,b) a[b/8] |= (1 << (b%8))
00068
00069
00070
00071
00072
00073
00074 #define OFLOW_MAX (INT_MAX - 20)
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093 #define COMPILE_WORK_SIZE (4096)
00094
00095
00096
00097
00098
00099
00100
00101 #ifndef EBCDIC
00102
00103
00104
00105
00106 static const short int escapes[] = {
00107 0, 0,
00108 0, 0,
00109 0, 0,
00110 0, 0,
00111 0, 0,
00112 CHAR_COLON, CHAR_SEMICOLON,
00113 CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN,
00114 CHAR_GREATER_THAN_SIGN, CHAR_QUESTION_MARK,
00115 CHAR_COMMERCIAL_AT, -ESC_A,
00116 -ESC_B, -ESC_C,
00117 -ESC_D, -ESC_E,
00118 0, -ESC_G,
00119 -ESC_H, 0,
00120 0, -ESC_K,
00121 0, 0,
00122 0, 0,
00123 -ESC_P, -ESC_Q,
00124 -ESC_R, -ESC_S,
00125 0, 0,
00126 -ESC_V, -ESC_W,
00127 -ESC_X, 0,
00128 -ESC_Z, CHAR_LEFT_SQUARE_BRACKET,
00129 CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET,
00130 CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE,
00131 CHAR_GRAVE_ACCENT, 7,
00132 -ESC_b, 0,
00133 -ESC_d, ESC_e,
00134 ESC_f, 0,
00135 -ESC_h, 0,
00136 0, -ESC_k,
00137 0, 0,
00138 ESC_n, 0,
00139 -ESC_p, 0,
00140 ESC_r, -ESC_s,
00141 ESC_tee, 0,
00142 -ESC_v, -ESC_w,
00143 0, 0,
00144 -ESC_z
00145 };
00146
00147 #else
00148
00149
00150
00151 static const short int escapes[] = {
00152 0, 0, 0, '.', '<', '(', '+', '|',
00153 '&', 0, 0, 0, 0, 0, 0, 0,
00154 0, 0, '!', '$', '*', ')', ';', '~',
00155 '-', '/', 0, 0, 0, 0, 0, 0,
00156 0, 0, '|', ',', '%', '_', '>', '?',
00157 0, 0, 0, 0, 0, 0, 0, 0,
00158 0, '`', ':', '#', '@', '\'', '=', '"',
00159 0, 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
00160 -ESC_h, 0, 0, '{', 0, 0, 0, 0,
00161 0, 0, -ESC_k, 'l', 0, ESC_n, 0, -ESC_p,
00162 0, ESC_r, 0, '}', 0, 0, 0, 0,
00163 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0,
00164 0,-ESC_z, 0, 0, 0, '[', 0, 0,
00165 0, 0, 0, 0, 0, 0, 0, 0,
00166 0, 0, 0, 0, 0, ']', '=', '-',
00167 '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,
00168 -ESC_H, 0, 0, 0, 0, 0, 0, 0,
00169 '}', 0, -ESC_K, 0, 0, 0, 0, -ESC_P,
00170 -ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0,
00171 '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X,
00172 0,-ESC_Z, 0, 0, 0, 0, 0, 0,
00173 0, 0, 0, 0, 0, 0, 0, 0,
00174 0, 0, 0, 0, 0, 0, 0, 0
00175 };
00176 #endif
00177
00178
00179
00180
00181
00182
00183
00184
00185 typedef struct verbitem {
00186 int len;
00187 int op;
00188 } verbitem;
00189
00190 static const char verbnames[] =
00191 STRING_ACCEPT0
00192 STRING_COMMIT0
00193 STRING_F0
00194 STRING_FAIL0
00195 STRING_PRUNE0
00196 STRING_SKIP0
00197 STRING_THEN;
00198
00199 static const verbitem verbs[] = {
00200 { 6, OP_ACCEPT },
00201 { 6, OP_COMMIT },
00202 { 1, OP_FAIL },
00203 { 4, OP_FAIL },
00204 { 5, OP_PRUNE },
00205 { 4, OP_SKIP },
00206 { 4, OP_THEN }
00207 };
00208
00209 static const int verbcount = sizeof(verbs)/sizeof(verbitem);
00210
00211
00212
00213
00214
00215
00216
00217
00218 static const char posix_names[] =
00219 STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
00220 STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
00221 STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
00222 STRING_word0 STRING_xdigit;
00223
00224 static const uschar posix_name_lengths[] = {
00225 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237 static const int posix_class_maps[] = {
00238 cbit_word, cbit_digit, -2,
00239 cbit_lower, -1, 0,
00240 cbit_upper, -1, 0,
00241 cbit_word, -1, 2,
00242 cbit_print, cbit_cntrl, 0,
00243 cbit_space, -1, 1,
00244 cbit_cntrl, -1, 0,
00245 cbit_digit, -1, 0,
00246 cbit_graph, -1, 0,
00247 cbit_print, -1, 0,
00248 cbit_punct, -1, 0,
00249 cbit_space, -1, 0,
00250 cbit_word, -1, 0,
00251 cbit_xdigit,-1, 0
00252 };
00253
00254
00255 #define STRING(a) # a
00256 #define XSTRING(s) STRING(s)
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268 static const char error_texts[] =
00269 "no error\0"
00270 "\\ at end of pattern\0"
00271 "\\c at end of pattern\0"
00272 "unrecognized character follows \\\0"
00273 "numbers out of order in {} quantifier\0"
00274
00275 "number too big in {} quantifier\0"
00276 "missing terminating ] for character class\0"
00277 "invalid escape sequence in character class\0"
00278 "range out of order in character class\0"
00279 "nothing to repeat\0"
00280
00281 "operand of unlimited repeat could match the empty string\0"
00282 "internal error: unexpected repeat\0"
00283 "unrecognized character after (? or (?-\0"
00284 "POSIX named classes are supported only within a class\0"
00285 "missing )\0"
00286
00287 "reference to non-existent subpattern\0"
00288 "erroffset passed as NULL\0"
00289 "unknown option bit(s) set\0"
00290 "missing ) after comment\0"
00291 "parentheses nested too deeply\0"
00292
00293 "regular expression is too large\0"
00294 "failed to get memory\0"
00295 "unmatched parentheses\0"
00296 "internal error: code overflow\0"
00297 "unrecognized character after (?<\0"
00298
00299 "lookbehind assertion is not fixed length\0"
00300 "malformed number or name after (?(\0"
00301 "conditional group contains more than two branches\0"
00302 "assertion expected after (?(\0"
00303 "(?R or (?[+-]digits must be followed by )\0"
00304
00305 "unknown POSIX class name\0"
00306 "POSIX collating elements are not supported\0"
00307 "this version of PCRE is not compiled with PCRE_UTF8 support\0"
00308 "spare error\0"
00309 "character value in \\x{...} sequence is too large\0"
00310
00311 "invalid condition (?(0)\0"
00312 "\\C not allowed in lookbehind assertion\0"
00313 "PCRE does not support \\L, \\l, \\N, \\U, or \\u\0"
00314 "number after (?C is > 255\0"
00315 "closing ) for (?C expected\0"
00316
00317 "recursive call could loop indefinitely\0"
00318 "unrecognized character after (?P\0"
00319 "syntax error in subpattern name (missing terminator)\0"
00320 "two named subpatterns have the same name\0"
00321 "invalid UTF-8 string\0"
00322
00323 "support for \\P, \\p, and \\X has not been compiled\0"
00324 "malformed \\P or \\p sequence\0"
00325 "unknown property name after \\P or \\p\0"
00326 "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
00327 "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
00328
00329 "repeated subpattern is too long\0"
00330 "octal value is greater than \\377 (not in UTF-8 mode)\0"
00331 "internal error: overran compiling workspace\0"
00332 "internal error: previously-checked referenced subpattern not found\0"
00333 "DEFINE group contains more than one branch\0"
00334
00335 "repeating a DEFINE group is not allowed\0"
00336 "inconsistent NEWLINE options\0"
00337 "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
00338 "a numbered reference must not be zero\0"
00339 "(*VERB) with an argument is not supported\0"
00340
00341 "(*VERB) not recognized\0"
00342 "number is too big\0"
00343 "subpattern name expected\0"
00344 "digit expected after (?+\0"
00345 "] is an invalid data character in JavaScript compatibility mode\0"
00346
00347 "different names for subpatterns of the same number are not allowed";
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366 #ifndef EBCDIC
00367
00368
00369
00370
00371 static const unsigned char digitab[] =
00372 {
00373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00374 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00375 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00376 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00377 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00378 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00379 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,
00380 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,
00381 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
00382 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00383 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00384 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00385 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
00386 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00387 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00388 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00389 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00390 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00391 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00392 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00393 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00394 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00395 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00396 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00397 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00398 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00399 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00400 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00402 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
00405
00406 #else
00407
00408
00409
00410 static const unsigned char digitab[] =
00411 {
00412 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00413 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00414 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00415 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00416 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00417 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00418 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00419 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00420 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00421 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00422 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00423 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00424 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00425 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00426 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00427 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00428 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
00429 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00430 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00431 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00432 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00433 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00434 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00435 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00436 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
00437 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00438 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00439 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00440 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00441 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00442 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,
00443 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};
00444
00445 static const unsigned char ebcdic_chartab[] = {
00446 0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00,
00447 0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00,
00448 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,
00449 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00450 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,
00451 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00452 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00453 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00454 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00455 0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80,
00456 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00457 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00,
00458 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00459 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80,
00460 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00461 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00462 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12,
00463 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
00464 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00465 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
00466 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12,
00467 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
00468 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00469 0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,
00470 0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12,
00471 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
00472 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00473 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
00474 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12,
00475 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
00476 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,
00477 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};
00478 #endif
00479
00480
00481
00482
00483 static BOOL
00484 compile_regex(int, int, uschar **, const uschar **, int *, BOOL, BOOL, int,
00485 int *, int *, branch_chain *, compile_data *, int *);
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502 static const char *
00503 find_error_text(int n)
00504 {
00505 const char *s = error_texts;
00506 for (; n > 0; n--) while (*s++ != 0) {};
00507 return s;
00508 }
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535 static int
00536 check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount,
00537 int options, BOOL isclass)
00538 {
00539 BOOL utf8 = (options & PCRE_UTF8) != 0;
00540 const uschar *ptr = *ptrptr + 1;
00541 int c, i;
00542
00543 GETCHARINCTEST(c, ptr);
00544 ptr--;
00545
00546
00547
00548 if (c == 0) *errorcodeptr = ERR1;
00549
00550
00551
00552
00553
00554 #ifndef EBCDIC
00555 else if (c < CHAR_0 || c > CHAR_z) {}
00556 else if ((i = escapes[c - CHAR_0]) != 0) c = i;
00557
00558 #else
00559 else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}
00560 else if ((i = escapes[c - 0x48]) != 0) c = i;
00561 #endif
00562
00563
00564
00565 else
00566 {
00567 const uschar *oldptr;
00568 BOOL braced, negated;
00569
00570 switch (c)
00571 {
00572
00573
00574
00575 case CHAR_l:
00576 case CHAR_L:
00577 case CHAR_N:
00578 case CHAR_u:
00579 case CHAR_U:
00580 *errorcodeptr = ERR37;
00581 break;
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599 case CHAR_g:
00600 if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
00601 {
00602 c = -ESC_g;
00603 break;
00604 }
00605
00606
00607
00608 if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
00609 {
00610 const uschar *p;
00611 for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
00612 if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
00613 if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
00614 {
00615 c = -ESC_k;
00616 break;
00617 }
00618 braced = TRUE;
00619 ptr++;
00620 }
00621 else braced = FALSE;
00622
00623 if (ptr[1] == CHAR_MINUS)
00624 {
00625 negated = TRUE;
00626 ptr++;
00627 }
00628 else negated = FALSE;
00629
00630 c = 0;
00631 while ((digitab[ptr[1]] & ctype_digit) != 0)
00632 c = c * 10 + *(++ptr) - CHAR_0;
00633
00634 if (c < 0)
00635 {
00636 *errorcodeptr = ERR61;
00637 break;
00638 }
00639
00640 if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
00641 {
00642 *errorcodeptr = ERR57;
00643 break;
00644 }
00645
00646 if (c == 0)
00647 {
00648 *errorcodeptr = ERR58;
00649 break;
00650 }
00651
00652 if (negated)
00653 {
00654 if (c > bracount)
00655 {
00656 *errorcodeptr = ERR15;
00657 break;
00658 }
00659 c = bracount - (c - 1);
00660 }
00661
00662 c = -(ESC_REF + c);
00663 break;
00664
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676
00677 case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
00678 case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
00679
00680 if (!isclass)
00681 {
00682 oldptr = ptr;
00683 c -= CHAR_0;
00684 while ((digitab[ptr[1]] & ctype_digit) != 0)
00685 c = c * 10 + *(++ptr) - CHAR_0;
00686 if (c < 0)
00687 {
00688 *errorcodeptr = ERR61;
00689 break;
00690 }
00691 if (c < 10 || c <= bracount)
00692 {
00693 c = -(ESC_REF + c);
00694 break;
00695 }
00696 ptr = oldptr;
00697 }
00698
00699
00700
00701
00702
00703 if ((c = *ptr) >= CHAR_8)
00704 {
00705 ptr--;
00706 c = 0;
00707 break;
00708 }
00709
00710
00711
00712
00713
00714
00715
00716 case CHAR_0:
00717 c -= CHAR_0;
00718 while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
00719 c = c * 8 + *(++ptr) - CHAR_0;
00720 if (!utf8 && c > 255) *errorcodeptr = ERR51;
00721 break;
00722
00723
00724
00725
00726
00727 case CHAR_x:
00728 if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
00729 {
00730 const uschar *pt = ptr + 2;
00731 int count = 0;
00732
00733 c = 0;
00734 while ((digitab[*pt] & ctype_xdigit) != 0)
00735 {
00736 register int cc = *pt++;
00737 if (c == 0 && cc == CHAR_0) continue;
00738 count++;
00739
00740 #ifndef EBCDIC
00741 if (cc >= CHAR_a) cc -= 32;
00742 c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
00743 #else
00744 if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;
00745 c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
00746 #endif
00747 }
00748
00749 if (*pt == CHAR_RIGHT_CURLY_BRACKET)
00750 {
00751 if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
00752 ptr = pt;
00753 break;
00754 }
00755
00756
00757
00758 }
00759
00760
00761
00762 c = 0;
00763 while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
00764 {
00765 int cc;
00766 cc = *(++ptr);
00767 #ifndef EBCDIC
00768 if (cc >= CHAR_a) cc -= 32;
00769 c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
00770 #else
00771 if (cc <= CHAR_z) cc += 64;
00772 c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
00773 #endif
00774 }
00775 break;
00776
00777
00778
00779
00780
00781 case CHAR_c:
00782 c = *(++ptr);
00783 if (c == 0)
00784 {
00785 *errorcodeptr = ERR2;
00786 break;
00787 }
00788
00789 #ifndef EBCDIC
00790 if (c >= CHAR_a && c <= CHAR_z) c -= 32;
00791 c ^= 0x40;
00792 #else
00793 if (c >= CHAR_a && c <= CHAR_z) c += 64;
00794 c ^= 0xC0;
00795 #endif
00796 break;
00797
00798
00799
00800
00801
00802
00803
00804 default:
00805 if ((options & PCRE_EXTRA) != 0) switch(c)
00806 {
00807 default:
00808 *errorcodeptr = ERR3;
00809 break;
00810 }
00811 break;
00812 }
00813 }
00814
00815 *ptrptr = ptr;
00816 return c;
00817 }
00818
00819
00820
00821 #ifdef SUPPORT_UCP
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840 static int
00841 get_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
00842 {
00843 int c, i, bot, top;
00844 const uschar *ptr = *ptrptr;
00845 char name[32];
00846
00847 c = *(++ptr);
00848 if (c == 0) goto ERROR_RETURN;
00849
00850 *negptr = FALSE;
00851
00852
00853
00854
00855 if (c == CHAR_LEFT_CURLY_BRACKET)
00856 {
00857 if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
00858 {
00859 *negptr = TRUE;
00860 ptr++;
00861 }
00862 for (i = 0; i < (int)sizeof(name) - 1; i++)
00863 {
00864 c = *(++ptr);
00865 if (c == 0) goto ERROR_RETURN;
00866 if (c == CHAR_RIGHT_CURLY_BRACKET) break;
00867 name[i] = c;
00868 }
00869 if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
00870 name[i] = 0;
00871 }
00872
00873
00874
00875 else
00876 {
00877 name[0] = c;
00878 name[1] = 0;
00879 }
00880
00881 *ptrptr = ptr;
00882
00883
00884
00885 bot = 0;
00886 top = _pcre_utt_size;
00887
00888 while (bot < top)
00889 {
00890 i = (bot + top) >> 1;
00891 c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);
00892 if (c == 0)
00893 {
00894 *dptr = _pcre_utt[i].value;
00895 return _pcre_utt[i].type;
00896 }
00897 if (c > 0) bot = i + 1; else top = i;
00898 }
00899
00900 *errorcodeptr = ERR47;
00901 *ptrptr = ptr;
00902 return -1;
00903
00904 ERROR_RETURN:
00905 *errorcodeptr = ERR46;
00906 *ptrptr = ptr;
00907 return -1;
00908 }
00909 #endif
00910
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926
00927
00928
00929 static BOOL
00930 is_counted_repeat(const uschar *p)
00931 {
00932 if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
00933 while ((digitab[*p] & ctype_digit) != 0) p++;
00934 if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
00935
00936 if (*p++ != CHAR_COMMA) return FALSE;
00937 if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
00938
00939 if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
00940 while ((digitab[*p] & ctype_digit) != 0) p++;
00941
00942 return (*p == CHAR_RIGHT_CURLY_BRACKET);
00943 }
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964
00965
00966 static const uschar *
00967 read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr)
00968 {
00969 int min = 0;
00970 int max = -1;
00971
00972
00973
00974
00975 while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - CHAR_0;
00976 if (min < 0 || min > 65535)
00977 {
00978 *errorcodeptr = ERR5;
00979 return p;
00980 }
00981
00982
00983
00984
00985 if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
00986 {
00987 if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
00988 {
00989 max = 0;
00990 while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - CHAR_0;
00991 if (max < 0 || max > 65535)
00992 {
00993 *errorcodeptr = ERR5;
00994 return p;
00995 }
00996 if (max < min)
00997 {
00998 *errorcodeptr = ERR4;
00999 return p;
01000 }
01001 }
01002 }
01003
01004
01005
01006
01007 *minp = min;
01008 *maxp = max;
01009 return p;
01010 }
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020
01021
01022
01023
01024
01025
01026
01027
01028
01029
01030
01031
01032
01033
01034
01035
01036
01037
01038
01039 static int
01040 find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,
01041 BOOL xmode, int *count)
01042 {
01043 uschar *ptr = *ptrptr;
01044 int start_count = *count;
01045 int hwm_count = start_count;
01046 BOOL dup_parens = FALSE;
01047
01048
01049
01050
01051 if (ptr[0] == CHAR_LEFT_PARENTHESIS)
01052 {
01053 if (ptr[1] == CHAR_QUESTION_MARK &&
01054 ptr[2] == CHAR_VERTICAL_LINE)
01055 {
01056 ptr += 3;
01057 dup_parens = TRUE;
01058 }
01059
01060
01061
01062 else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)
01063 {
01064 *count += 1;
01065 if (name == NULL && *count == lorn) return *count;
01066 ptr++;
01067 }
01068
01069
01070
01071
01072
01073 else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
01074 {
01075 ptr += 2;
01076 if (ptr[1] != CHAR_QUESTION_MARK)
01077 {
01078 while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
01079 if (*ptr != 0) ptr++;
01080 }
01081 }
01082
01083
01084
01085 else
01086 {
01087 ptr += 2;
01088 if (*ptr == CHAR_P) ptr++;
01089
01090
01091
01092 if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
01093 ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
01094 {
01095 int term;
01096 const uschar *thisname;
01097 *count += 1;
01098 if (name == NULL && *count == lorn) return *count;
01099 term = *ptr++;
01100 if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
01101 thisname = ptr;
01102 while (*ptr != term) ptr++;
01103 if (name != NULL && lorn == ptr - thisname &&
01104 strncmp((const char *)name, (const char *)thisname, lorn) == 0)
01105 return *count;
01106 term++;
01107 }
01108 }
01109 }
01110
01111
01112
01113
01114 for (; *ptr != 0; ptr++)
01115 {
01116
01117
01118 if (*ptr == CHAR_BACKSLASH)
01119 {
01120 if (*(++ptr) == 0) goto FAIL_EXIT;
01121 if (*ptr == CHAR_Q) for (;;)
01122 {
01123 while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
01124 if (*ptr == 0) goto FAIL_EXIT;
01125 if (*(++ptr) == CHAR_E) break;
01126 }
01127 continue;
01128 }
01129
01130
01131
01132
01133
01134
01135
01136 if (*ptr == CHAR_LEFT_SQUARE_BRACKET)
01137 {
01138 BOOL negate_class = FALSE;
01139 for (;;)
01140 {
01141 if (ptr[1] == CHAR_BACKSLASH)
01142 {
01143 if (ptr[2] == CHAR_E)
01144 ptr+= 2;
01145 else if (strncmp((const char *)ptr+2,
01146 STR_Q STR_BACKSLASH STR_E, 3) == 0)
01147 ptr += 4;
01148 else
01149 break;
01150 }
01151 else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
01152 {
01153 negate_class = TRUE;
01154 ptr++;
01155 }
01156 else break;
01157 }
01158
01159
01160
01161
01162 if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&
01163 (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
01164 ptr++;
01165
01166 while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)
01167 {
01168 if (*ptr == 0) return -1;
01169 if (*ptr == CHAR_BACKSLASH)
01170 {
01171 if (*(++ptr) == 0) goto FAIL_EXIT;
01172 if (*ptr == CHAR_Q) for (;;)
01173 {
01174 while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
01175 if (*ptr == 0) goto FAIL_EXIT;
01176 if (*(++ptr) == CHAR_E) break;
01177 }
01178 continue;
01179 }
01180 }
01181 continue;
01182 }
01183
01184
01185
01186 if (xmode && *ptr == CHAR_NUMBER_SIGN)
01187 {
01188 while (*(++ptr) != 0 && *ptr != CHAR_NL) {};
01189 if (*ptr == 0) goto FAIL_EXIT;
01190 continue;
01191 }
01192
01193
01194
01195 if (*ptr == CHAR_LEFT_PARENTHESIS)
01196 {
01197 int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);
01198 if (rc > 0) return rc;
01199 if (*ptr == 0) goto FAIL_EXIT;
01200 }
01201
01202 else if (*ptr == CHAR_RIGHT_PARENTHESIS)
01203 {
01204 if (dup_parens && *count < hwm_count) *count = hwm_count;
01205 *ptrptr = ptr;
01206 return -1;
01207 }
01208
01209 else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
01210 {
01211 if (*count > hwm_count) hwm_count = *count;
01212 *count = start_count;
01213 }
01214 }
01215
01216 FAIL_EXIT:
01217 *ptrptr = ptr;
01218 return -1;
01219 }
01220
01221
01222
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232
01233
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247
01248 static int
01249 find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode)
01250 {
01251 uschar *ptr = (uschar *)cd->start_pattern;
01252 int count = 0;
01253 int rc;
01254
01255
01256
01257
01258
01259
01260 for (;;)
01261 {
01262 rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);
01263 if (rc > 0 || *ptr++ == 0) break;
01264 }
01265
01266 return rc;
01267 }
01268
01269
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292 static const uschar*
01293 first_significant_code(const uschar *code, int *options, int optbit,
01294 BOOL skipassert)
01295 {
01296 for (;;)
01297 {
01298 switch ((int)*code)
01299 {
01300 case OP_OPT:
01301 if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
01302 *options = (int)code[1];
01303 code += 2;
01304 break;
01305
01306 case OP_ASSERT_NOT:
01307 case OP_ASSERTBACK:
01308 case OP_ASSERTBACK_NOT:
01309 if (!skipassert) return code;
01310 do code += GET(code, 1); while (*code == OP_ALT);
01311 code += _pcre_OP_lengths[*code];
01312 break;
01313
01314 case OP_WORD_BOUNDARY:
01315 case OP_NOT_WORD_BOUNDARY:
01316 if (!skipassert) return code;
01317
01318
01319 case OP_CALLOUT:
01320 case OP_CREF:
01321 case OP_NCREF:
01322 case OP_RREF:
01323 case OP_NRREF:
01324 case OP_DEF:
01325 code += _pcre_OP_lengths[*code];
01326 break;
01327
01328 default:
01329 return code;
01330 }
01331 }
01332
01333 }
01334
01335
01336
01337
01338
01339
01340
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360
01361
01362
01363
01364
01365 static int
01366 find_fixedlength(uschar *code, int options, BOOL atend, compile_data *cd)
01367 {
01368 int length = -1;
01369
01370 register int branchlength = 0;
01371 register uschar *cc = code + 1 + LINK_SIZE;
01372
01373
01374
01375
01376 for (;;)
01377 {
01378 int d;
01379 uschar *ce, *cs;
01380 register int op = *cc;
01381 switch (op)
01382 {
01383 case OP_CBRA:
01384 case OP_BRA:
01385 case OP_ONCE:
01386 case OP_COND:
01387 d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options, atend, cd);
01388 if (d < 0) return d;
01389 branchlength += d;
01390 do cc += GET(cc, 1); while (*cc == OP_ALT);
01391 cc += 1 + LINK_SIZE;
01392 break;
01393
01394
01395
01396
01397
01398 case OP_ALT:
01399 case OP_KET:
01400 case OP_KETRMAX:
01401 case OP_KETRMIN:
01402 case OP_END:
01403 if (length < 0) length = branchlength;
01404 else if (length != branchlength) return -1;
01405 if (*cc != OP_ALT) return length;
01406 cc += 1 + LINK_SIZE;
01407 branchlength = 0;
01408 break;
01409
01410
01411
01412
01413
01414 case OP_RECURSE:
01415 if (!atend) return -3;
01416 cs = ce = (uschar *)cd->start_code + GET(cc, 1);
01417 do ce += GET(ce, 1); while (*ce == OP_ALT);
01418 if (cc > cs && cc < ce) return -1;
01419 d = find_fixedlength(cs + 2, options, atend, cd);
01420 if (d < 0) return d;
01421 branchlength += d;
01422 cc += 1 + LINK_SIZE;
01423 break;
01424
01425
01426
01427 case OP_ASSERT:
01428 case OP_ASSERT_NOT:
01429 case OP_ASSERTBACK:
01430 case OP_ASSERTBACK_NOT:
01431 do cc += GET(cc, 1); while (*cc == OP_ALT);
01432
01433
01434
01435
01436 case OP_REVERSE:
01437 case OP_CREF:
01438 case OP_NCREF:
01439 case OP_RREF:
01440 case OP_NRREF:
01441 case OP_DEF:
01442 case OP_OPT:
01443 case OP_CALLOUT:
01444 case OP_SOD:
01445 case OP_SOM:
01446 case OP_EOD:
01447 case OP_EODN:
01448 case OP_CIRC:
01449 case OP_DOLL:
01450 case OP_NOT_WORD_BOUNDARY:
01451 case OP_WORD_BOUNDARY:
01452 cc += _pcre_OP_lengths[*cc];
01453 break;
01454
01455
01456
01457 case OP_CHAR:
01458 case OP_CHARNC:
01459 case OP_NOT:
01460 branchlength++;
01461 cc += 2;
01462 #ifdef SUPPORT_UTF8
01463 if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
01464 cc += _pcre_utf8_table4[cc[-1] & 0x3f];
01465 #endif
01466 break;
01467
01468
01469
01470
01471 case OP_EXACT:
01472 branchlength += GET2(cc,1);
01473 cc += 4;
01474 #ifdef SUPPORT_UTF8
01475 if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
01476 cc += _pcre_utf8_table4[cc[-1] & 0x3f];
01477 #endif
01478 break;
01479
01480 case OP_TYPEEXACT:
01481 branchlength += GET2(cc,1);
01482 if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;
01483 cc += 4;
01484 break;
01485
01486
01487
01488 case OP_PROP:
01489 case OP_NOTPROP:
01490 cc += 2;
01491
01492
01493 case OP_NOT_DIGIT:
01494 case OP_DIGIT:
01495 case OP_NOT_WHITESPACE:
01496 case OP_WHITESPACE:
01497 case OP_NOT_WORDCHAR:
01498 case OP_WORDCHAR:
01499 case OP_ANY:
01500 case OP_ALLANY:
01501 branchlength++;
01502 cc++;
01503 break;
01504
01505
01506
01507 case OP_ANYBYTE:
01508 return -2;
01509
01510
01511
01512 #ifdef SUPPORT_UTF8
01513 case OP_XCLASS:
01514 cc += GET(cc, 1) - 33;
01515
01516 #endif
01517
01518 case OP_CLASS:
01519 case OP_NCLASS:
01520 cc += 33;
01521
01522 switch (*cc)
01523 {
01524 case OP_CRSTAR:
01525 case OP_CRMINSTAR:
01526 case OP_CRQUERY:
01527 case OP_CRMINQUERY:
01528 return -1;
01529
01530 case OP_CRRANGE:
01531 case OP_CRMINRANGE:
01532 if (GET2(cc,1) != GET2(cc,3)) return -1;
01533 branchlength += GET2(cc,1);
01534 cc += 5;
01535 break;
01536
01537 default:
01538 branchlength++;
01539 }
01540 break;
01541
01542
01543
01544 default:
01545 return -1;
01546 }
01547 }
01548
01549 }
01550
01551
01552
01553
01554
01555
01556
01557
01558
01559
01560
01561
01562
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572 const uschar *
01573 _pcre_find_bracket(const uschar *code, BOOL utf8, int number)
01574 {
01575 for (;;)
01576 {
01577 register int c = *code;
01578 if (c == OP_END) return NULL;
01579
01580
01581
01582
01583
01584 if (c == OP_XCLASS) code += GET(code, 1);
01585
01586
01587
01588 else if (c == OP_REVERSE)
01589 {
01590 if (number < 0) return (uschar *)code;
01591 code += _pcre_OP_lengths[c];
01592 }
01593
01594
01595
01596 else if (c == OP_CBRA)
01597 {
01598 int n = GET2(code, 1+LINK_SIZE);
01599 if (n == number) return (uschar *)code;
01600 code += _pcre_OP_lengths[c];
01601 }
01602
01603
01604
01605
01606
01607 else
01608 {
01609 switch(c)
01610 {
01611 case OP_TYPESTAR:
01612 case OP_TYPEMINSTAR:
01613 case OP_TYPEPLUS:
01614 case OP_TYPEMINPLUS:
01615 case OP_TYPEQUERY:
01616 case OP_TYPEMINQUERY:
01617 case OP_TYPEPOSSTAR:
01618 case OP_TYPEPOSPLUS:
01619 case OP_TYPEPOSQUERY:
01620 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
01621 break;
01622
01623 case OP_TYPEUPTO:
01624 case OP_TYPEMINUPTO:
01625 case OP_TYPEEXACT:
01626 case OP_TYPEPOSUPTO:
01627 if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
01628 break;
01629 }
01630
01631
01632
01633 code += _pcre_OP_lengths[c];
01634
01635
01636
01637
01638
01639 #ifdef SUPPORT_UTF8
01640 if (utf8) switch(c)
01641 {
01642 case OP_CHAR:
01643 case OP_CHARNC:
01644 case OP_EXACT:
01645 case OP_UPTO:
01646 case OP_MINUPTO:
01647 case OP_POSUPTO:
01648 case OP_STAR:
01649 case OP_MINSTAR:
01650 case OP_POSSTAR:
01651 case OP_PLUS:
01652 case OP_MINPLUS:
01653 case OP_POSPLUS:
01654 case OP_QUERY:
01655 case OP_MINQUERY:
01656 case OP_POSQUERY:
01657 if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
01658 break;
01659 }
01660 #else
01661 (void)(utf8);
01662 #endif
01663 }
01664 }
01665 }
01666
01667
01668
01669
01670
01671
01672
01673
01674
01675
01676
01677
01678
01679
01680
01681
01682
01683 static const uschar *
01684 find_recurse(const uschar *code, BOOL utf8)
01685 {
01686 for (;;)
01687 {
01688 register int c = *code;
01689 if (c == OP_END) return NULL;
01690 if (c == OP_RECURSE) return code;
01691
01692
01693
01694
01695
01696 if (c == OP_XCLASS) code += GET(code, 1);
01697
01698
01699
01700
01701
01702 else
01703 {
01704 switch(c)
01705 {
01706 case OP_TYPESTAR:
01707 case OP_TYPEMINSTAR:
01708 case OP_TYPEPLUS:
01709 case OP_TYPEMINPLUS:
01710 case OP_TYPEQUERY:
01711 case OP_TYPEMINQUERY:
01712 case OP_TYPEPOSSTAR:
01713 case OP_TYPEPOSPLUS:
01714 case OP_TYPEPOSQUERY:
01715 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
01716 break;
01717
01718 case OP_TYPEPOSUPTO:
01719 case OP_TYPEUPTO:
01720 case OP_TYPEMINUPTO:
01721 case OP_TYPEEXACT:
01722 if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
01723 break;
01724 }
01725
01726
01727
01728 code += _pcre_OP_lengths[c];
01729
01730
01731
01732
01733
01734 #ifdef SUPPORT_UTF8
01735 if (utf8) switch(c)
01736 {
01737 case OP_CHAR:
01738 case OP_CHARNC:
01739 case OP_EXACT:
01740 case OP_UPTO:
01741 case OP_MINUPTO:
01742 case OP_POSUPTO:
01743 case OP_STAR:
01744 case OP_MINSTAR:
01745 case OP_POSSTAR:
01746 case OP_PLUS:
01747 case OP_MINPLUS:
01748 case OP_POSPLUS:
01749 case OP_QUERY:
01750 case OP_MINQUERY:
01751 case OP_POSQUERY:
01752 if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
01753 break;
01754 }
01755 #else
01756 (void)(utf8);
01757 #endif
01758 }
01759 }
01760 }
01761
01762
01763
01764
01765
01766
01767
01768
01769
01770
01771
01772
01773
01774
01775
01776
01777
01778
01779
01780
01781
01782
01783
01784 static BOOL
01785 could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8)
01786 {
01787 register int c;
01788 for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);
01789 code < endcode;
01790 code = first_significant_code(code + _pcre_OP_lengths[c], NULL, 0, TRUE))
01791 {
01792 const uschar *ccode;
01793
01794 c = *code;
01795
01796
01797
01798
01799 if (c == OP_ASSERT)
01800 {
01801 do code += GET(code, 1); while (*code == OP_ALT);
01802 c = *code;
01803 continue;
01804 }
01805
01806
01807
01808 if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO)
01809 {
01810 code += _pcre_OP_lengths[c];
01811 do code += GET(code, 1); while (*code == OP_ALT);
01812 c = *code;
01813 continue;
01814 }
01815
01816
01817
01818 if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND)
01819 {
01820 BOOL empty_branch;
01821 if (GET(code, 1) == 0) return TRUE;
01822
01823
01824
01825
01826
01827 if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
01828 code += GET(code, 1);
01829 else
01830 {
01831 empty_branch = FALSE;
01832 do
01833 {
01834 if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
01835 empty_branch = TRUE;
01836 code += GET(code, 1);
01837 }
01838 while (*code == OP_ALT);
01839 if (!empty_branch) return FALSE;
01840 }
01841
01842 c = *code;
01843 continue;
01844 }
01845
01846
01847
01848 switch (c)
01849 {
01850
01851
01852
01853
01854
01855
01856 #ifdef SUPPORT_UTF8
01857 case OP_XCLASS:
01858 ccode = code += GET(code, 1);
01859 goto CHECK_CLASS_REPEAT;
01860 #endif
01861
01862 case OP_CLASS:
01863 case OP_NCLASS:
01864 ccode = code + 33;
01865
01866 #ifdef SUPPORT_UTF8
01867 CHECK_CLASS_REPEAT:
01868 #endif
01869
01870 switch (*ccode)
01871 {
01872 case OP_CRSTAR:
01873 case OP_CRMINSTAR:
01874 case OP_CRQUERY:
01875 case OP_CRMINQUERY:
01876 break;
01877
01878 default:
01879 case OP_CRPLUS:
01880 case OP_CRMINPLUS:
01881 return FALSE;
01882
01883 case OP_CRRANGE:
01884 case OP_CRMINRANGE:
01885 if (GET2(ccode, 1) > 0) return FALSE;
01886 break;
01887 }
01888 break;
01889
01890
01891
01892 case OP_PROP:
01893 case OP_NOTPROP:
01894 case OP_EXTUNI:
01895 case OP_NOT_DIGIT:
01896 case OP_DIGIT:
01897 case OP_NOT_WHITESPACE:
01898 case OP_WHITESPACE:
01899 case OP_NOT_WORDCHAR:
01900 case OP_WORDCHAR:
01901 case OP_ANY:
01902 case OP_ALLANY:
01903 case OP_ANYBYTE:
01904 case OP_CHAR:
01905 case OP_CHARNC:
01906 case OP_NOT:
01907 case OP_PLUS:
01908 case OP_MINPLUS:
01909 case OP_POSPLUS:
01910 case OP_EXACT:
01911 case OP_NOTPLUS:
01912 case OP_NOTMINPLUS:
01913 case OP_NOTPOSPLUS:
01914 case OP_NOTEXACT:
01915 case OP_TYPEPLUS:
01916 case OP_TYPEMINPLUS:
01917 case OP_TYPEPOSPLUS:
01918 case OP_TYPEEXACT:
01919 return FALSE;
01920
01921
01922
01923
01924 case OP_TYPESTAR:
01925 case OP_TYPEMINSTAR:
01926 case OP_TYPEPOSSTAR:
01927 case OP_TYPEQUERY:
01928 case OP_TYPEMINQUERY:
01929 case OP_TYPEPOSQUERY:
01930 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
01931 break;
01932
01933
01934
01935 case OP_TYPEUPTO:
01936 case OP_TYPEMINUPTO:
01937 case OP_TYPEPOSUPTO:
01938 if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
01939 break;
01940
01941
01942
01943 case OP_KET:
01944 case OP_KETRMAX:
01945 case OP_KETRMIN:
01946 case OP_ALT:
01947 return TRUE;
01948
01949
01950
01951
01952 #ifdef SUPPORT_UTF8
01953 case OP_STAR:
01954 case OP_MINSTAR:
01955 case OP_POSSTAR:
01956 case OP_QUERY:
01957 case OP_MINQUERY:
01958 case OP_POSQUERY:
01959 if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];
01960 break;
01961
01962 case OP_UPTO:
01963 case OP_MINUPTO:
01964 case OP_POSUPTO:
01965 if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
01966 break;
01967 #endif
01968 }
01969 }
01970
01971 return TRUE;
01972 }
01973
01974
01975
01976
01977
01978
01979
01980
01981
01982
01983
01984
01985
01986
01987
01988
01989
01990
01991
01992
01993
01994 static BOOL
01995 could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
01996 BOOL utf8)
01997 {
01998 while (bcptr != NULL && bcptr->current >= code)
01999 {
02000 if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;
02001 bcptr = bcptr->outer;
02002 }
02003 return TRUE;
02004 }
02005
02006
02007
02008
02009
02010
02011
02012
02013
02014
02015
02016
02017
02018
02019
02020
02021
02022
02023
02024
02025
02026
02027
02028
02029
02030
02031
02032
02033
02034
02035
02036
02037
02038
02039 static BOOL
02040 check_posix_syntax(const uschar *ptr, const uschar **endptr)
02041 {
02042 int terminator;
02043 terminator = *(++ptr);
02044 for (++ptr; *ptr != 0; ptr++)
02045 {
02046 if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else
02047 {
02048 if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
02049 if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
02050 {
02051 *endptr = ptr;
02052 return TRUE;
02053 }
02054 }
02055 }
02056 return FALSE;
02057 }
02058
02059
02060
02061
02062
02063
02064
02065
02066
02067
02068
02069
02070
02071
02072
02073
02074
02075
02076 static int
02077 check_posix_name(const uschar *ptr, int len)
02078 {
02079 const char *pn = posix_names;
02080 register int yield = 0;
02081 while (posix_name_lengths[yield] != 0)
02082 {
02083 if (len == posix_name_lengths[yield] &&
02084 strncmp((const char *)ptr, pn, len) == 0) return yield;
02085 pn += posix_name_lengths[yield] + 1;
02086 yield++;
02087 }
02088 return -1;
02089 }
02090
02091
02092
02093
02094
02095
02096
02097
02098
02099
02100
02101
02102
02103
02104
02105
02106
02107
02108
02109
02110
02111
02112
02113
02114
02115
02116
02117
02118
02119
02120
02121
02122
02123 static void
02124 adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,
02125 uschar *save_hwm)
02126 {
02127 uschar *ptr = group;
02128
02129 while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)
02130 {
02131 int offset;
02132 uschar *hc;
02133
02134
02135
02136
02137 for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)
02138 {
02139 offset = GET(hc, 0);
02140 if (cd->start_code + offset == ptr + 1)
02141 {
02142 PUT(hc, 0, offset + adjust);
02143 break;
02144 }
02145 }
02146
02147
02148
02149
02150 if (hc >= cd->hwm)
02151 {
02152 offset = GET(ptr, 1);
02153 if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);
02154 }
02155
02156 ptr += 1 + LINK_SIZE;
02157 }
02158 }
02159
02160
02161
02162
02163
02164
02165
02166
02167
02168
02169
02170
02171
02172
02173
02174
02175
02176
02177 static uschar *
02178 auto_callout(uschar *code, const uschar *ptr, compile_data *cd)
02179 {
02180 *code++ = OP_CALLOUT;
02181 *code++ = 255;
02182 PUT(code, 0, ptr - cd->start_pattern);
02183 PUT(code, LINK_SIZE, 0);
02184 return code + 2*LINK_SIZE;
02185 }
02186
02187
02188
02189
02190
02191
02192
02193
02194
02195
02196
02197
02198
02199
02200
02201
02202
02203
02204
02205 static void
02206 complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)
02207 {
02208 int length = ptr - cd->start_pattern - GET(previous_callout, 2);
02209 PUT(previous_callout, 2 + LINK_SIZE, length);
02210 }
02211
02212
02213
02214 #ifdef SUPPORT_UCP
02215
02216
02217
02218
02219
02220
02221
02222
02223
02224
02225
02226
02227
02228
02229
02230
02231
02232
02233 static BOOL
02234 get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,
02235 unsigned int *odptr)
02236 {
02237 unsigned int c, othercase, next;
02238
02239 for (c = *cptr; c <= d; c++)
02240 { if ((othercase = UCD_OTHERCASE(c)) != c) break; }
02241
02242 if (c > d) return FALSE;
02243
02244 *ocptr = othercase;
02245 next = othercase + 1;
02246
02247 for (++c; c <= d; c++)
02248 {
02249 if (UCD_OTHERCASE(c) != next) break;
02250 next++;
02251 }
02252
02253 *odptr = next - 1;
02254 *cptr = c;
02255
02256 return TRUE;
02257 }
02258 #endif
02259
02260
02261
02262
02263
02264
02265
02266
02267
02268
02269
02270
02271
02272
02273
02274
02275
02276
02277
02278
02279
02280
02281
02282 static BOOL
02283 check_auto_possessive(int op_code, int item, BOOL utf8, uschar *utf8_char,
02284 const uschar *ptr, int options, compile_data *cd)
02285 {
02286 int next;
02287
02288
02289
02290 if ((options & PCRE_EXTENDED) != 0)
02291 {
02292 for (;;)
02293 {
02294 while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
02295 if (*ptr == CHAR_NUMBER_SIGN)
02296 {
02297 while (*(++ptr) != 0)
02298 if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
02299 }
02300 else break;
02301 }
02302 }
02303
02304
02305
02306
02307 if (*ptr == CHAR_BACKSLASH)
02308 {
02309 int temperrorcode = 0;
02310 next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);
02311 if (temperrorcode != 0) return FALSE;
02312 ptr++;
02313 }
02314
02315 else if ((cd->ctypes[*ptr] & ctype_meta) == 0)
02316 {
02317 #ifdef SUPPORT_UTF8
02318 if (utf8) { GETCHARINC(next, ptr); } else
02319 #endif
02320 next = *ptr++;
02321 }
02322
02323 else return FALSE;
02324
02325
02326
02327 if ((options & PCRE_EXTENDED) != 0)
02328 {
02329 for (;;)
02330 {
02331 while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
02332 if (*ptr == CHAR_NUMBER_SIGN)
02333 {
02334 while (*(++ptr) != 0)
02335 if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
02336 }
02337 else break;
02338 }
02339 }
02340
02341
02342
02343 if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
02344 strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
02345 return FALSE;
02346
02347
02348
02349
02350
02351
02352
02353
02354
02355 if (next >= 0) switch(op_code)
02356 {
02357 case OP_CHAR:
02358 #ifdef SUPPORT_UTF8
02359 if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
02360 #else
02361 (void)(utf8_char);
02362 #endif
02363 return item != next;
02364
02365
02366
02367
02368
02369 case OP_CHARNC:
02370 #ifdef SUPPORT_UTF8
02371 if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
02372 #endif
02373 if (item == next) return FALSE;
02374 #ifdef SUPPORT_UTF8
02375 if (utf8)
02376 {
02377 unsigned int othercase;
02378 if (next < 128) othercase = cd->fcc[next]; else
02379 #ifdef SUPPORT_UCP
02380 othercase = UCD_OTHERCASE((unsigned int)next);
02381 #else
02382 othercase = NOTACHAR;
02383 #endif
02384 return (unsigned int)item != othercase;
02385 }
02386 else
02387 #endif
02388 return (item != cd->fcc[next]);
02389
02390
02391
02392 case OP_NOT:
02393 if (item == next) return TRUE;
02394 if ((options & PCRE_CASELESS) == 0) return FALSE;
02395 #ifdef SUPPORT_UTF8
02396 if (utf8)
02397 {
02398 unsigned int othercase;
02399 if (next < 128) othercase = cd->fcc[next]; else
02400 #ifdef SUPPORT_UCP
02401 othercase = UCD_OTHERCASE(next);
02402 #else
02403 othercase = NOTACHAR;
02404 #endif
02405 return (unsigned int)item == othercase;
02406 }
02407 else
02408 #endif
02409 return (item == cd->fcc[next]);
02410
02411 case OP_DIGIT:
02412 return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;
02413
02414 case OP_NOT_DIGIT:
02415 return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0;
02416
02417 case OP_WHITESPACE:
02418 return next > 127 || (cd->ctypes[next] & ctype_space) == 0;
02419
02420 case OP_NOT_WHITESPACE:
02421 return next <= 127 && (cd->ctypes[next] & ctype_space) != 0;
02422
02423 case OP_WORDCHAR:
02424 return next > 127 || (cd->ctypes[next] & ctype_word) == 0;
02425
02426 case OP_NOT_WORDCHAR:
02427 return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;
02428
02429 case OP_HSPACE:
02430 case OP_NOT_HSPACE:
02431 switch(next)
02432 {
02433 case 0x09:
02434 case 0x20:
02435 case 0xa0:
02436 case 0x1680:
02437 case 0x180e:
02438 case 0x2000:
02439 case 0x2001:
02440 case 0x2002:
02441 case 0x2003:
02442 case 0x2004:
02443 case 0x2005:
02444 case 0x2006:
02445 case 0x2007:
02446 case 0x2008:
02447 case 0x2009:
02448 case 0x200A:
02449 case 0x202f:
02450 case 0x205f:
02451 case 0x3000:
02452 return op_code != OP_HSPACE;
02453 default:
02454 return op_code == OP_HSPACE;
02455 }
02456
02457 case OP_VSPACE:
02458 case OP_NOT_VSPACE:
02459 switch(next)
02460 {
02461 case 0x0a:
02462 case 0x0b:
02463 case 0x0c:
02464 case 0x0d:
02465 case 0x85:
02466 case 0x2028:
02467 case 0x2029:
02468 return op_code != OP_VSPACE;
02469 default:
02470 return op_code == OP_VSPACE;
02471 }
02472
02473 default:
02474 return FALSE;
02475 }
02476
02477
02478
02479
02480 switch(op_code)
02481 {
02482 case OP_CHAR:
02483 case OP_CHARNC:
02484 #ifdef SUPPORT_UTF8
02485 if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
02486 #endif
02487 switch(-next)
02488 {
02489 case ESC_d:
02490 return item > 127 || (cd->ctypes[item] & ctype_digit) == 0;
02491
02492 case ESC_D:
02493 return item <= 127 && (cd->ctypes[item] & ctype_digit) != 0;
02494
02495 case ESC_s:
02496 return item > 127 || (cd->ctypes[item] & ctype_space) == 0;
02497
02498 case ESC_S:
02499 return item <= 127 && (cd->ctypes[item] & ctype_space) != 0;
02500
02501 case ESC_w:
02502 return item > 127 || (cd->ctypes[item] & ctype_word) == 0;
02503
02504 case ESC_W:
02505 return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;
02506
02507 case ESC_h:
02508 case ESC_H:
02509 switch(item)
02510 {
02511 case 0x09:
02512 case 0x20:
02513 case 0xa0:
02514 case 0x1680:
02515 case 0x180e:
02516 case 0x2000:
02517 case 0x2001:
02518 case 0x2002:
02519 case 0x2003:
02520 case 0x2004:
02521 case 0x2005:
02522 case 0x2006:
02523 case 0x2007:
02524 case 0x2008:
02525 case 0x2009:
02526 case 0x200A:
02527 case 0x202f:
02528 case 0x205f:
02529 case 0x3000:
02530 return -next != ESC_h;
02531 default:
02532 return -next == ESC_h;
02533 }
02534
02535 case ESC_v:
02536 case ESC_V:
02537 switch(item)
02538 {
02539 case 0x0a:
02540 case 0x0b:
02541 case 0x0c:
02542 case 0x0d:
02543 case 0x85:
02544 case 0x2028:
02545 case 0x2029:
02546 return -next != ESC_v;
02547 default:
02548 return -next == ESC_v;
02549 }
02550
02551 default:
02552 return FALSE;
02553 }
02554
02555 case OP_DIGIT:
02556 return next == -ESC_D || next == -ESC_s || next == -ESC_W ||
02557 next == -ESC_h || next == -ESC_v;
02558
02559 case OP_NOT_DIGIT:
02560 return next == -ESC_d;
02561
02562 case OP_WHITESPACE:
02563 return next == -ESC_S || next == -ESC_d || next == -ESC_w;
02564
02565 case OP_NOT_WHITESPACE:
02566 return next == -ESC_s || next == -ESC_h || next == -ESC_v;
02567
02568 case OP_HSPACE:
02569 return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w;
02570
02571 case OP_NOT_HSPACE:
02572 return next == -ESC_h;
02573
02574
02575 case OP_VSPACE:
02576 return next == -ESC_V || next == -ESC_d || next == -ESC_w;
02577
02578 case OP_NOT_VSPACE:
02579 return next == -ESC_v;
02580
02581 case OP_WORDCHAR:
02582 return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;
02583
02584 case OP_NOT_WORDCHAR:
02585 return next == -ESC_w || next == -ESC_d;
02586
02587 default:
02588 return FALSE;
02589 }
02590
02591
02592 }
02593
02594
02595
02596
02597
02598
02599
02600
02601
02602
02603
02604
02605
02606
02607
02608
02609
02610
02611
02612
02613
02614
02615
02616
02617
02618
02619
02620
02621
02622 static BOOL
02623 compile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr,
02624 int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
02625 compile_data *cd, int *lengthptr)
02626 {
02627 int repeat_type, op_type;
02628 int repeat_min = 0, repeat_max = 0;
02629 int bravalue = 0;
02630 int greedy_default, greedy_non_default;
02631 int firstbyte, reqbyte;
02632 int zeroreqbyte, zerofirstbyte;
02633 int req_caseopt, reqvary, tempreqvary;
02634 int options = *optionsptr;
02635 int after_manual_callout = 0;
02636 int length_prevgroup = 0;
02637 register int c;
02638 register uschar *code = *codeptr;
02639 uschar *last_code = code;
02640 uschar *orig_code = code;
02641 uschar *tempcode;
02642 BOOL inescq = FALSE;
02643 BOOL groupsetfirstbyte = FALSE;
02644 const uschar *ptr = *ptrptr;
02645 const uschar *tempptr;
02646 uschar *previous = NULL;
02647 uschar *previous_callout = NULL;
02648 uschar *save_hwm = NULL;
02649 uschar classbits[32];
02650
02651 #ifdef SUPPORT_UTF8
02652 BOOL class_utf8;
02653 BOOL utf8 = (options & PCRE_UTF8) != 0;
02654 uschar *class_utf8data;
02655 uschar *class_utf8data_base;
02656 uschar utf8_char[6];
02657 #else
02658 BOOL utf8 = FALSE;
02659 uschar *utf8_char = NULL;
02660 #endif
02661
02662 #ifdef DEBUG
02663 if (lengthptr != NULL) DPRINTF((">> start branch\n"));
02664 #endif
02665
02666
02667
02668 greedy_default = ((options & PCRE_UNGREEDY) != 0);
02669 greedy_non_default = greedy_default ^ 1;
02670
02671
02672
02673
02674
02675
02676
02677
02678
02679
02680
02681 firstbyte = reqbyte = zerofirstbyte = zeroreqbyte = REQ_UNSET;
02682
02683
02684
02685
02686
02687
02688 req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
02689
02690
02691
02692 for (;; ptr++)
02693 {
02694 BOOL negate_class;
02695 BOOL should_flip_negation;
02696 BOOL possessive_quantifier;
02697 BOOL is_quantifier;
02698 BOOL is_recurse;
02699 BOOL reset_bracount;
02700 int class_charcount;
02701 int class_lastchar;
02702 int newoptions;
02703 int recno;
02704 int refsign;
02705 int skipbytes;
02706 int subreqbyte;
02707 int subfirstbyte;
02708 int terminator;
02709 int mclength;
02710 uschar mcbuffer[8];
02711
02712
02713
02714 c = *ptr;
02715
02716
02717
02718
02719 if (lengthptr != NULL)
02720 {
02721 #ifdef DEBUG
02722 if (code > cd->hwm) cd->hwm = code;
02723 #endif
02724 if (code > cd->start_workspace + COMPILE_WORK_SIZE)
02725 {
02726 *errorcodeptr = ERR52;
02727 goto FAILED;
02728 }
02729
02730
02731
02732
02733
02734
02735
02736 if (code < last_code) code = last_code;
02737
02738
02739
02740 if (OFLOW_MAX - *lengthptr < code - last_code)
02741 {
02742 *errorcodeptr = ERR20;
02743 goto FAILED;
02744 }
02745
02746 *lengthptr += code - last_code;
02747 DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));
02748
02749
02750
02751
02752
02753 if (previous != NULL)
02754 {
02755 if (previous > orig_code)
02756 {
02757 memmove(orig_code, previous, code - previous);
02758 code -= previous - orig_code;
02759 previous = orig_code;
02760 }
02761 }
02762 else code = orig_code;
02763
02764
02765
02766
02767 last_code = code;
02768 }
02769
02770
02771
02772
02773 else if (cd->hwm > cd->start_workspace + COMPILE_WORK_SIZE)
02774 {
02775 *errorcodeptr = ERR52;
02776 goto FAILED;
02777 }
02778
02779
02780
02781 if (inescq && c != 0)
02782 {
02783 if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
02784 {
02785 inescq = FALSE;
02786 ptr++;
02787 continue;
02788 }
02789 else
02790 {
02791 if (previous_callout != NULL)
02792 {
02793 if (lengthptr == NULL)
02794 complete_callout(previous_callout, ptr, cd);
02795 previous_callout = NULL;
02796 }
02797 if ((options & PCRE_AUTO_CALLOUT) != 0)
02798 {
02799 previous_callout = code;
02800 code = auto_callout(code, ptr, cd);
02801 }
02802 goto NORMAL_CHAR;
02803 }
02804 }
02805
02806
02807
02808
02809 is_quantifier =
02810 c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
02811 (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
02812
02813 if (!is_quantifier && previous_callout != NULL &&
02814 after_manual_callout-- <= 0)
02815 {
02816 if (lengthptr == NULL)
02817 complete_callout(previous_callout, ptr, cd);
02818 previous_callout = NULL;
02819 }
02820
02821
02822
02823 if ((options & PCRE_EXTENDED) != 0)
02824 {
02825 if ((cd->ctypes[c] & ctype_space) != 0) continue;
02826 if (c == CHAR_NUMBER_SIGN)
02827 {
02828 while (*(++ptr) != 0)
02829 {
02830 if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
02831 }
02832 if (*ptr != 0) continue;
02833
02834
02835 c = 0;
02836 }
02837 }
02838
02839
02840
02841 if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier)
02842 {
02843 previous_callout = code;
02844 code = auto_callout(code, ptr, cd);
02845 }
02846
02847 switch(c)
02848 {
02849
02850 case 0:
02851 case CHAR_VERTICAL_LINE:
02852 case CHAR_RIGHT_PARENTHESIS:
02853 *firstbyteptr = firstbyte;
02854 *reqbyteptr = reqbyte;
02855 *codeptr = code;
02856 *ptrptr = ptr;
02857 if (lengthptr != NULL)
02858 {
02859 if (OFLOW_MAX - *lengthptr < code - last_code)
02860 {
02861 *errorcodeptr = ERR20;
02862 goto FAILED;
02863 }
02864 *lengthptr += code - last_code;
02865 DPRINTF((">> end branch\n"));
02866 }
02867 return TRUE;
02868
02869
02870
02871
02872
02873
02874 case CHAR_CIRCUMFLEX_ACCENT:
02875 if ((options & PCRE_MULTILINE) != 0)
02876 {
02877 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
02878 }
02879 previous = NULL;
02880 *code++ = OP_CIRC;
02881 break;
02882
02883 case CHAR_DOLLAR_SIGN:
02884 previous = NULL;
02885 *code++ = OP_DOLL;
02886 break;
02887
02888
02889
02890
02891 case CHAR_DOT:
02892 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
02893 zerofirstbyte = firstbyte;
02894 zeroreqbyte = reqbyte;
02895 previous = code;
02896 *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
02897 break;
02898
02899
02900
02901
02902
02903
02904
02905
02906
02907
02908
02909
02910
02911
02912
02913
02914
02915 case CHAR_RIGHT_SQUARE_BRACKET:
02916 if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
02917 {
02918 *errorcodeptr = ERR64;
02919 goto FAILED;
02920 }
02921 goto NORMAL_CHAR;
02922
02923 case CHAR_LEFT_SQUARE_BRACKET:
02924 previous = code;
02925
02926
02927
02928
02929 if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
02930 ptr[1] == CHAR_EQUALS_SIGN) &&
02931 check_posix_syntax(ptr, &tempptr))
02932 {
02933 *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31;
02934 goto FAILED;
02935 }
02936
02937
02938
02939
02940
02941 negate_class = FALSE;
02942 for (;;)
02943 {
02944 c = *(++ptr);
02945 if (c == CHAR_BACKSLASH)
02946 {
02947 if (ptr[1] == CHAR_E)
02948 ptr++;
02949 else if (strncmp((const char *)ptr+1,
02950 STR_Q STR_BACKSLASH STR_E, 3) == 0)
02951 ptr += 3;
02952 else
02953 break;
02954 }
02955 else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
02956 negate_class = TRUE;
02957 else break;
02958 }
02959
02960
02961
02962
02963
02964
02965 if (c == CHAR_RIGHT_SQUARE_BRACKET &&
02966 (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
02967 {
02968 *code++ = negate_class? OP_ALLANY : OP_FAIL;
02969 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
02970 zerofirstbyte = firstbyte;
02971 break;
02972 }
02973
02974
02975
02976
02977
02978 should_flip_negation = FALSE;
02979
02980
02981
02982
02983
02984 class_charcount = 0;
02985 class_lastchar = -1;
02986
02987
02988
02989
02990
02991
02992 memset(classbits, 0, 32 * sizeof(uschar));
02993
02994 #ifdef SUPPORT_UTF8
02995 class_utf8 = FALSE;
02996 class_utf8data = code + LINK_SIZE + 2;
02997 class_utf8data_base = class_utf8data;
02998 #endif
02999
03000
03001
03002
03003
03004 if (c != 0) do
03005 {
03006 const uschar *oldptr;
03007
03008 #ifdef SUPPORT_UTF8
03009 if (utf8 && c > 127)
03010 {
03011 GETCHARLEN(c, ptr, ptr);
03012 }
03013
03014
03015
03016
03017
03018
03019 if (lengthptr != NULL)
03020 {
03021 *lengthptr += class_utf8data - class_utf8data_base;
03022 class_utf8data = class_utf8data_base;
03023 }
03024
03025 #endif
03026
03027
03028
03029 if (inescq)
03030 {
03031 if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
03032 {
03033 inescq = FALSE;
03034 ptr++;
03035 continue;
03036 }
03037 goto CHECK_RANGE;
03038 }
03039
03040
03041
03042
03043
03044
03045
03046 if (c == CHAR_LEFT_SQUARE_BRACKET &&
03047 (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
03048 ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
03049 {
03050 BOOL local_negate = FALSE;
03051 int posix_class, taboffset, tabopt;
03052 register const uschar *cbits = cd->cbits;
03053 uschar pbits[32];
03054
03055 if (ptr[1] != CHAR_COLON)
03056 {
03057 *errorcodeptr = ERR31;
03058 goto FAILED;
03059 }
03060
03061 ptr += 2;
03062 if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
03063 {
03064 local_negate = TRUE;
03065 should_flip_negation = TRUE;
03066 ptr++;
03067 }
03068
03069 posix_class = check_posix_name(ptr, tempptr - ptr);
03070 if (posix_class < 0)
03071 {
03072 *errorcodeptr = ERR30;
03073 goto FAILED;
03074 }
03075
03076
03077
03078
03079
03080 if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
03081 posix_class = 0;
03082
03083
03084
03085
03086
03087
03088 posix_class *= 3;
03089
03090
03091
03092 memcpy(pbits, cbits + posix_class_maps[posix_class],
03093 32 * sizeof(uschar));
03094
03095
03096
03097 taboffset = posix_class_maps[posix_class + 1];
03098 tabopt = posix_class_maps[posix_class + 2];
03099
03100 if (taboffset >= 0)
03101 {
03102 if (tabopt >= 0)
03103 for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset];
03104 else
03105 for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset];
03106 }
03107
03108
03109
03110
03111 if (tabopt < 0) tabopt = -tabopt;
03112 if (tabopt == 1) pbits[1] &= ~0x3c;
03113 else if (tabopt == 2) pbits[11] &= 0x7f;
03114
03115
03116
03117
03118 if (local_negate)
03119 for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c];
03120 else
03121 for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
03122
03123 ptr = tempptr + 1;
03124 class_charcount = 10;
03125 continue;
03126 }
03127
03128
03129
03130
03131
03132
03133
03134
03135 if (c == CHAR_BACKSLASH)
03136 {
03137 c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
03138 if (*errorcodeptr != 0) goto FAILED;
03139
03140 if (-c == ESC_b) c = CHAR_BS;
03141 else if (-c == ESC_X) c = CHAR_X;
03142 else if (-c == ESC_R) c = CHAR_R;
03143 else if (-c == ESC_Q)
03144 {
03145 if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
03146 {
03147 ptr += 2;
03148 }
03149 else inescq = TRUE;
03150 continue;
03151 }
03152 else if (-c == ESC_E) continue;
03153
03154 if (c < 0)
03155 {
03156 register const uschar *cbits = cd->cbits;
03157 class_charcount += 2;
03158
03159
03160
03161 if (lengthptr == NULL) switch (-c)
03162 {
03163 case ESC_d:
03164 for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit];
03165 continue;
03166
03167 case ESC_D:
03168 should_flip_negation = TRUE;
03169 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
03170 continue;
03171
03172 case ESC_w:
03173 for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word];
03174 continue;
03175
03176 case ESC_W:
03177 should_flip_negation = TRUE;
03178 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
03179 continue;
03180
03181 case ESC_s:
03182 for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space];
03183 classbits[1] &= ~0x08;
03184 continue;
03185
03186 case ESC_S:
03187 should_flip_negation = TRUE;
03188 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
03189 classbits[1] |= 0x08;
03190 continue;
03191
03192 default:
03193 break;
03194 }
03195
03196
03197
03198 else if (c == -ESC_d || c == -ESC_D || c == -ESC_w ||
03199 c == -ESC_W || c == -ESC_s || c == -ESC_S) continue;
03200
03201
03202
03203
03204 if (-c == ESC_h)
03205 {
03206 SETBIT(classbits, 0x09);
03207 SETBIT(classbits, 0x20);
03208 SETBIT(classbits, 0xa0);
03209 #ifdef SUPPORT_UTF8
03210 if (utf8)
03211 {
03212 class_utf8 = TRUE;
03213 *class_utf8data++ = XCL_SINGLE;
03214 class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data);
03215 *class_utf8data++ = XCL_SINGLE;
03216 class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data);
03217 *class_utf8data++ = XCL_RANGE;
03218 class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data);
03219 class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data);
03220 *class_utf8data++ = XCL_SINGLE;
03221 class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data);
03222 *class_utf8data++ = XCL_SINGLE;
03223 class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data);
03224 *class_utf8data++ = XCL_SINGLE;
03225 class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data);
03226 }
03227 #endif
03228 continue;
03229 }
03230
03231 if (-c == ESC_H)
03232 {
03233 for (c = 0; c < 32; c++)
03234 {
03235 int x = 0xff;
03236 switch (c)
03237 {
03238 case 0x09/8: x ^= 1 << (0x09%8); break;
03239 case 0x20/8: x ^= 1 << (0x20%8); break;
03240 case 0xa0/8: x ^= 1 << (0xa0%8); break;
03241 default: break;
03242 }
03243 classbits[c] |= x;
03244 }
03245
03246 #ifdef SUPPORT_UTF8
03247 if (utf8)
03248 {
03249 class_utf8 = TRUE;
03250 *class_utf8data++ = XCL_RANGE;
03251 class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
03252 class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data);
03253 *class_utf8data++ = XCL_RANGE;
03254 class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data);
03255 class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data);
03256 *class_utf8data++ = XCL_RANGE;
03257 class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data);
03258 class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data);
03259 *class_utf8data++ = XCL_RANGE;
03260 class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data);
03261 class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data);
03262 *class_utf8data++ = XCL_RANGE;
03263 class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data);
03264 class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data);
03265 *class_utf8data++ = XCL_RANGE;
03266 class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data);
03267 class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data);
03268 *class_utf8data++ = XCL_RANGE;
03269 class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data);
03270 class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
03271 }
03272 #endif
03273 continue;
03274 }
03275
03276 if (-c == ESC_v)
03277 {
03278 SETBIT(classbits, 0x0a);
03279 SETBIT(classbits, 0x0b);
03280 SETBIT(classbits, 0x0c);
03281 SETBIT(classbits, 0x0d);
03282 SETBIT(classbits, 0x85);
03283 #ifdef SUPPORT_UTF8
03284 if (utf8)
03285 {
03286 class_utf8 = TRUE;
03287 *class_utf8data++ = XCL_RANGE;
03288 class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data);
03289 class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
03290 }
03291 #endif
03292 continue;
03293 }
03294
03295 if (-c == ESC_V)
03296 {
03297 for (c = 0; c < 32; c++)
03298 {
03299 int x = 0xff;
03300 switch (c)
03301 {
03302 case 0x0a/8: x ^= 1 << (0x0a%8);
03303 x ^= 1 << (0x0b%8);
03304 x ^= 1 << (0x0c%8);
03305 x ^= 1 << (0x0d%8);
03306 break;
03307 case 0x85/8: x ^= 1 << (0x85%8); break;
03308 default: break;
03309 }
03310 classbits[c] |= x;
03311 }
03312
03313 #ifdef SUPPORT_UTF8
03314 if (utf8)
03315 {
03316 class_utf8 = TRUE;
03317 *class_utf8data++ = XCL_RANGE;
03318 class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
03319 class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data);
03320 *class_utf8data++ = XCL_RANGE;
03321 class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
03322 class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
03323 }
03324 #endif
03325 continue;
03326 }
03327
03328
03329
03330 #ifdef SUPPORT_UCP
03331 if (-c == ESC_p || -c == ESC_P)
03332 {
03333 BOOL negated;
03334 int pdata;
03335 int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
03336 if (ptype < 0) goto FAILED;
03337 class_utf8 = TRUE;
03338 *class_utf8data++ = ((-c == ESC_p) != negated)?
03339 XCL_PROP : XCL_NOTPROP;
03340 *class_utf8data++ = ptype;
03341 *class_utf8data++ = pdata;
03342 class_charcount -= 2;
03343 continue;
03344 }
03345 #endif
03346
03347
03348
03349
03350 if ((options & PCRE_EXTRA) != 0)
03351 {
03352 *errorcodeptr = ERR7;
03353 goto FAILED;
03354 }
03355
03356 class_charcount -= 2;
03357 c = *ptr;
03358 }
03359
03360
03361
03362
03363 }
03364
03365
03366
03367
03368
03369
03370 CHECK_RANGE:
03371 while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
03372 {
03373 inescq = FALSE;
03374 ptr += 2;
03375 }
03376
03377 oldptr = ptr;
03378
03379
03380
03381 if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
03382
03383
03384
03385 if (!inescq && ptr[1] == CHAR_MINUS)
03386 {
03387 int d;
03388 ptr += 2;
03389 while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
03390
03391
03392
03393
03394 while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
03395 {
03396 ptr += 2;
03397 if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
03398 { ptr += 2; continue; }
03399 inescq = TRUE;
03400 break;
03401 }
03402
03403 if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
03404 {
03405 ptr = oldptr;
03406 goto LONE_SINGLE_CHARACTER;
03407 }
03408
03409 #ifdef SUPPORT_UTF8
03410 if (utf8)
03411 {
03412 GETCHARLEN(d, ptr, ptr);
03413 }
03414 else
03415 #endif
03416 d = *ptr;
03417
03418
03419
03420
03421
03422 if (!inescq && d == CHAR_BACKSLASH)
03423 {
03424 d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
03425 if (*errorcodeptr != 0) goto FAILED;
03426
03427
03428
03429
03430 if (d < 0)
03431 {
03432 if (d == -ESC_b) d = CHAR_BS;
03433 else if (d == -ESC_X) d = CHAR_X;
03434 else if (d == -ESC_R) d = CHAR_R; else
03435 {
03436 ptr = oldptr;
03437 goto LONE_SINGLE_CHARACTER;
03438 }
03439 }
03440 }
03441
03442
03443
03444
03445 if (d < c)
03446 {
03447 *errorcodeptr = ERR8;
03448 goto FAILED;
03449 }
03450
03451 if (d == c) goto LONE_SINGLE_CHARACTER;
03452
03453
03454
03455 if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
03456
03457
03458
03459
03460
03461
03462 #ifdef SUPPORT_UTF8
03463 if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))
03464 {
03465 class_utf8 = TRUE;
03466
03467
03468
03469
03470
03471 #ifdef SUPPORT_UCP
03472 if ((options & PCRE_CASELESS) != 0)
03473 {
03474 unsigned int occ, ocd;
03475 unsigned int cc = c;
03476 unsigned int origd = d;
03477 while (get_othercase_range(&cc, origd, &occ, &ocd))
03478 {
03479 if (occ >= (unsigned int)c &&
03480 ocd <= (unsigned int)d)
03481 continue;
03482
03483 if (occ < (unsigned int)c &&
03484 ocd >= (unsigned int)c - 1)
03485 {
03486 c = occ;
03487 continue;
03488 }
03489 if (ocd > (unsigned int)d &&
03490 occ <= (unsigned int)d + 1)
03491 {
03492 d = ocd;
03493 continue;
03494 }
03495
03496 if (occ == ocd)
03497 {
03498 *class_utf8data++ = XCL_SINGLE;
03499 }
03500 else
03501 {
03502 *class_utf8data++ = XCL_RANGE;
03503 class_utf8data += _pcre_ord2utf8(occ, class_utf8data);
03504 }
03505 class_utf8data += _pcre_ord2utf8(ocd, class_utf8data);
03506 }
03507 }
03508 #endif
03509
03510
03511
03512
03513 *class_utf8data++ = XCL_RANGE;
03514 class_utf8data += _pcre_ord2utf8(c, class_utf8data);
03515 class_utf8data += _pcre_ord2utf8(d, class_utf8data);
03516
03517
03518
03519
03520
03521 #ifdef SUPPORT_UCP
03522 continue;
03523 #else
03524 if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
03525
03526
03527
03528 d = 127;
03529
03530 #endif
03531 }
03532 #endif
03533
03534
03535
03536
03537
03538 class_charcount += d - c + 1;
03539 class_lastchar = d;
03540
03541
03542
03543 if (lengthptr == NULL) for (; c <= d; c++)
03544 {
03545 classbits[c/8] |= (1 << (c&7));
03546 if ((options & PCRE_CASELESS) != 0)
03547 {
03548 int uc = cd->fcc[c];
03549 classbits[uc/8] |= (1 << (uc&7));
03550 }
03551 }
03552
03553 continue;
03554 }
03555
03556
03557
03558
03559
03560 LONE_SINGLE_CHARACTER:
03561
03562
03563
03564 #ifdef SUPPORT_UTF8
03565 if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
03566 {
03567 class_utf8 = TRUE;
03568 *class_utf8data++ = XCL_SINGLE;
03569 class_utf8data += _pcre_ord2utf8(c, class_utf8data);
03570
03571 #ifdef SUPPORT_UCP
03572 if ((options & PCRE_CASELESS) != 0)
03573 {
03574 unsigned int othercase;
03575 if ((othercase = UCD_OTHERCASE(c)) != c)
03576 {
03577 *class_utf8data++ = XCL_SINGLE;
03578 class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
03579 }
03580 }
03581 #endif
03582
03583 }
03584 else
03585 #endif
03586
03587
03588 {
03589 classbits[c/8] |= (1 << (c&7));
03590 if ((options & PCRE_CASELESS) != 0)
03591 {
03592 c = cd->fcc[c];
03593 classbits[c/8] |= (1 << (c&7));
03594 }
03595 class_charcount++;
03596 class_lastchar = c;
03597 }
03598 }
03599
03600
03601
03602 while ((c = *(++ptr)) != 0 && (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
03603
03604 if (c == 0)
03605 {
03606 *errorcodeptr = ERR6;
03607 goto FAILED;
03608 }
03609
03610
03611
03612
03613
03614
03615 #if 0
03616
03617
03618 if (negate_class)
03619 {
03620 if ((classbits[1] & 0x24) != 0x24) cd->external_flags |= PCRE_HASCRORLF;
03621 }
03622 else
03623 {
03624 if ((classbits[1] & 0x24) != 0) cd->external_flags |= PCRE_HASCRORLF;
03625 }
03626 #endif
03627
03628
03629
03630
03631
03632
03633
03634
03635
03636
03637
03638
03639
03640
03641
03642
03643
03644
03645
03646 #ifdef SUPPORT_UTF8
03647 if (class_charcount == 1 && !class_utf8 &&
03648 (!utf8 || !negate_class || class_lastchar < 128))
03649 #else
03650 if (class_charcount == 1)
03651 #endif
03652 {
03653 zeroreqbyte = reqbyte;
03654
03655
03656
03657 if (negate_class)
03658 {
03659 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
03660 zerofirstbyte = firstbyte;
03661 *code++ = OP_NOT;
03662 *code++ = class_lastchar;
03663 break;
03664 }
03665
03666
03667
03668
03669 #ifdef SUPPORT_UTF8
03670 if (utf8 && class_lastchar > 127)
03671 mclength = _pcre_ord2utf8(class_lastchar, mcbuffer);
03672 else
03673 #endif
03674 {
03675 mcbuffer[0] = class_lastchar;
03676 mclength = 1;
03677 }
03678 goto ONE_CHAR;
03679 }
03680
03681
03682
03683
03684
03685
03686 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
03687 zerofirstbyte = firstbyte;
03688 zeroreqbyte = reqbyte;
03689
03690
03691
03692
03693
03694
03695
03696
03697 #ifdef SUPPORT_UTF8
03698 if (class_utf8 && !should_flip_negation)
03699 {
03700 *class_utf8data++ = XCL_END;
03701 *code++ = OP_XCLASS;
03702 code += LINK_SIZE;
03703 *code = negate_class? XCL_NOT : 0;
03704
03705
03706
03707
03708 if (class_charcount > 0)
03709 {
03710 *code++ |= XCL_MAP;
03711 memmove(code + 32, code, class_utf8data - code);
03712 memcpy(code, classbits, 32);
03713 code = class_utf8data + 32;
03714 }
03715 else code = class_utf8data;
03716
03717
03718
03719 PUT(previous, 1, code - previous);
03720 break;
03721 }
03722 #endif
03723
03724
03725
03726
03727
03728
03729 *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
03730 if (negate_class)
03731 {
03732 if (lengthptr == NULL)
03733 for (c = 0; c < 32; c++) code[c] = ~classbits[c];
03734 }
03735 else
03736 {
03737 memcpy(code, classbits, 32);
03738 }
03739 code += 32;
03740 break;
03741
03742
03743
03744
03745
03746
03747 case CHAR_LEFT_CURLY_BRACKET:
03748 if (!is_quantifier) goto NORMAL_CHAR;
03749 ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
03750 if (*errorcodeptr != 0) goto FAILED;
03751 goto REPEAT;
03752
03753 case CHAR_ASTERISK:
03754 repeat_min = 0;
03755 repeat_max = -1;
03756 goto REPEAT;
03757
03758 case CHAR_PLUS:
03759 repeat_min = 1;
03760 repeat_max = -1;
03761 goto REPEAT;
03762
03763 case CHAR_QUESTION_MARK:
03764 repeat_min = 0;
03765 repeat_max = 1;
03766
03767 REPEAT:
03768 if (previous == NULL)
03769 {
03770 *errorcodeptr = ERR9;
03771 goto FAILED;
03772 }
03773
03774 if (repeat_min == 0)
03775 {
03776 firstbyte = zerofirstbyte;
03777 reqbyte = zeroreqbyte;
03778 }
03779
03780
03781
03782 reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
03783
03784 op_type = 0;
03785 possessive_quantifier = FALSE;
03786
03787
03788
03789
03790 tempcode = previous;
03791
03792
03793
03794
03795
03796
03797
03798 if (ptr[1] == CHAR_PLUS)
03799 {
03800 repeat_type = 0;
03801 possessive_quantifier = TRUE;
03802 ptr++;
03803 }
03804 else if (ptr[1] == CHAR_QUESTION_MARK)
03805 {
03806 repeat_type = greedy_non_default;
03807 ptr++;
03808 }
03809 else repeat_type = greedy_default;
03810
03811
03812
03813
03814
03815
03816
03817 if (*previous == OP_CHAR || *previous == OP_CHARNC)
03818 {
03819
03820
03821
03822
03823
03824 #ifdef SUPPORT_UTF8
03825 if (utf8 && (code[-1] & 0x80) != 0)
03826 {
03827 uschar *lastchar = code - 1;
03828 while((*lastchar & 0xc0) == 0x80) lastchar--;
03829 c = code - lastchar;
03830 memcpy(utf8_char, lastchar, c);
03831 c |= 0x80;
03832 }
03833 else
03834 #endif
03835
03836
03837
03838
03839 {
03840 c = code[-1];
03841 if (repeat_min > 1) reqbyte = c | req_caseopt | cd->req_varyopt;
03842 }
03843
03844
03845
03846
03847
03848
03849 if (!possessive_quantifier &&
03850 repeat_max < 0 &&
03851 check_auto_possessive(*previous, c, utf8, utf8_char, ptr + 1,
03852 options, cd))
03853 {
03854 repeat_type = 0;
03855 possessive_quantifier = TRUE;
03856 }
03857
03858 goto OUTPUT_SINGLE_REPEAT;
03859 }
03860
03861
03862
03863
03864
03865
03866
03867 else if (*previous == OP_NOT)
03868 {
03869 op_type = OP_NOTSTAR - OP_STAR;
03870 c = previous[1];
03871 if (!possessive_quantifier &&
03872 repeat_max < 0 &&
03873 check_auto_possessive(OP_NOT, c, utf8, NULL, ptr + 1, options, cd))
03874 {
03875 repeat_type = 0;
03876 possessive_quantifier = TRUE;
03877 }
03878 goto OUTPUT_SINGLE_REPEAT;
03879 }
03880
03881
03882
03883
03884
03885
03886
03887
03888 else if (*previous < OP_EODN)
03889 {
03890 uschar *oldcode;
03891 int prop_type, prop_value;
03892 op_type = OP_TYPESTAR - OP_STAR;
03893 c = *previous;
03894
03895 if (!possessive_quantifier &&
03896 repeat_max < 0 &&
03897 check_auto_possessive(c, 0, utf8, NULL, ptr + 1, options, cd))
03898 {
03899 repeat_type = 0;
03900 possessive_quantifier = TRUE;
03901 }
03902
03903 OUTPUT_SINGLE_REPEAT:
03904 if (*previous == OP_PROP || *previous == OP_NOTPROP)
03905 {
03906 prop_type = previous[1];
03907 prop_value = previous[2];
03908 }
03909 else prop_type = prop_value = -1;
03910
03911 oldcode = code;
03912 code = previous;
03913
03914
03915
03916
03917 if (repeat_max == 0) goto END_REPEAT;
03918
03919
03920
03921
03922
03923
03924
03925
03926
03927
03928
03929
03930
03931 repeat_type += op_type;
03932
03933
03934
03935
03936 if (repeat_min == 0)
03937 {
03938 if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
03939 else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
03940 else
03941 {
03942 *code++ = OP_UPTO + repeat_type;
03943 PUT2INC(code, 0, repeat_max);
03944 }
03945 }
03946
03947
03948
03949
03950
03951
03952 else if (repeat_min == 1)
03953 {
03954 if (repeat_max == -1)
03955 *code++ = OP_PLUS + repeat_type;
03956 else
03957 {
03958 code = oldcode;
03959 if (repeat_max == 1) goto END_REPEAT;
03960 *code++ = OP_UPTO + repeat_type;
03961 PUT2INC(code, 0, repeat_max - 1);
03962 }
03963 }
03964
03965
03966
03967
03968 else
03969 {
03970 *code++ = OP_EXACT + op_type;
03971 PUT2INC(code, 0, repeat_min);
03972
03973
03974
03975
03976
03977
03978
03979 if (repeat_max < 0)
03980 {
03981 #ifdef SUPPORT_UTF8
03982 if (utf8 && c >= 128)
03983 {
03984 memcpy(code, utf8_char, c & 7);
03985 code += c & 7;
03986 }
03987 else
03988 #endif
03989 {
03990 *code++ = c;
03991 if (prop_type >= 0)
03992 {
03993 *code++ = prop_type;
03994 *code++ = prop_value;
03995 }
03996 }
03997 *code++ = OP_STAR + repeat_type;
03998 }
03999
04000
04001
04002
04003
04004 else if (repeat_max != repeat_min)
04005 {
04006 #ifdef SUPPORT_UTF8
04007 if (utf8 && c >= 128)
04008 {
04009 memcpy(code, utf8_char, c & 7);
04010 code += c & 7;
04011 }
04012 else
04013 #endif
04014 *code++ = c;
04015 if (prop_type >= 0)
04016 {
04017 *code++ = prop_type;
04018 *code++ = prop_value;
04019 }
04020 repeat_max -= repeat_min;
04021
04022 if (repeat_max == 1)
04023 {
04024 *code++ = OP_QUERY + repeat_type;
04025 }
04026 else
04027 {
04028 *code++ = OP_UPTO + repeat_type;
04029 PUT2INC(code, 0, repeat_max);
04030 }
04031 }
04032 }
04033
04034
04035
04036 #ifdef SUPPORT_UTF8
04037 if (utf8 && c >= 128)
04038 {
04039 memcpy(code, utf8_char, c & 7);
04040 code += c & 7;
04041 }
04042 else
04043 #endif
04044 *code++ = c;
04045
04046
04047
04048
04049 #ifdef SUPPORT_UCP
04050 if (prop_type >= 0)
04051 {
04052 *code++ = prop_type;
04053 *code++ = prop_value;
04054 }
04055 #endif
04056 }
04057
04058
04059
04060
04061 else if (*previous == OP_CLASS ||
04062 *previous == OP_NCLASS ||
04063 #ifdef SUPPORT_UTF8
04064 *previous == OP_XCLASS ||
04065 #endif
04066 *previous == OP_REF)
04067 {
04068 if (repeat_max == 0)
04069 {
04070 code = previous;
04071 goto END_REPEAT;
04072 }
04073
04074
04075
04076
04077
04078
04079
04080
04081
04082
04083
04084 if (repeat_min == 0 && repeat_max == -1)
04085 *code++ = OP_CRSTAR + repeat_type;
04086 else if (repeat_min == 1 && repeat_max == -1)
04087 *code++ = OP_CRPLUS + repeat_type;
04088 else if (repeat_min == 0 && repeat_max == 1)
04089 *code++ = OP_CRQUERY + repeat_type;
04090 else
04091 {
04092 *code++ = OP_CRRANGE + repeat_type;
04093 PUT2INC(code, 0, repeat_min);
04094 if (repeat_max == -1) repeat_max = 0;
04095 PUT2INC(code, 0, repeat_max);
04096 }
04097 }
04098
04099
04100
04101
04102 else if (*previous == OP_BRA || *previous == OP_CBRA ||
04103 *previous == OP_ONCE || *previous == OP_COND)
04104 {
04105 register int i;
04106 int ketoffset = 0;
04107 int len = code - previous;
04108 uschar *bralink = NULL;
04109
04110
04111
04112 if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)
04113 {
04114 *errorcodeptr = ERR55;
04115 goto FAILED;
04116 }
04117
04118
04119
04120
04121
04122
04123
04124 if (repeat_max == -1)
04125 {
04126 register uschar *ket = previous;
04127 do ket += GET(ket, 1); while (*ket != OP_KET);
04128 ketoffset = code - ket;
04129 }
04130
04131
04132
04133
04134
04135
04136
04137
04138 if (repeat_min == 0)
04139 {
04140
04141
04142
04143
04144
04145
04146
04147
04148
04149
04150
04151
04152
04153
04154
04155
04156
04157
04158
04159
04160
04161 if (repeat_max <= 1)
04162 {
04163 *code = OP_END;
04164 adjust_recurse(previous, 1, utf8, cd, save_hwm);
04165 memmove(previous+1, previous, len);
04166 code++;
04167 if (repeat_max == 0)
04168 {
04169 *previous++ = OP_SKIPZERO;
04170 goto END_REPEAT;
04171 }
04172 *previous++ = OP_BRAZERO + repeat_type;
04173 }
04174
04175
04176
04177
04178
04179
04180
04181
04182
04183 else
04184 {
04185 int offset;
04186 *code = OP_END;
04187 adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm);
04188 memmove(previous + 2 + LINK_SIZE, previous, len);
04189 code += 2 + LINK_SIZE;
04190 *previous++ = OP_BRAZERO + repeat_type;
04191 *previous++ = OP_BRA;
04192
04193
04194
04195
04196 offset = (bralink == NULL)? 0 : previous - bralink;
04197 bralink = previous;
04198 PUTINC(previous, 0, offset);
04199 }
04200
04201 repeat_max--;
04202 }
04203
04204
04205
04206
04207
04208
04209
04210
04211 else
04212 {
04213 if (repeat_min > 1)
04214 {
04215
04216
04217
04218
04219 if (lengthptr != NULL)
04220 {
04221 int delta = (repeat_min - 1)*length_prevgroup;
04222 if ((double)(repeat_min - 1)*(double)length_prevgroup >
04223 (double)INT_MAX ||
04224 OFLOW_MAX - *lengthptr < delta)
04225 {
04226 *errorcodeptr = ERR20;
04227 goto FAILED;
04228 }
04229 *lengthptr += delta;
04230 }
04231
04232
04233
04234 else
04235 {
04236 if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
04237 for (i = 1; i < repeat_min; i++)
04238 {
04239 uschar *hc;
04240 uschar *this_hwm = cd->hwm;
04241 memcpy(code, previous, len);
04242 for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
04243 {
04244 PUT(cd->hwm, 0, GET(hc, 0) + len);
04245 cd->hwm += LINK_SIZE;
04246 }
04247 save_hwm = this_hwm;
04248 code += len;
04249 }
04250 }
04251 }
04252
04253 if (repeat_max > 0) repeat_max -= repeat_min;
04254 }
04255
04256
04257
04258
04259
04260
04261
04262
04263 if (repeat_max >= 0)
04264 {
04265
04266
04267
04268
04269
04270
04271 if (lengthptr != NULL && repeat_max > 0)
04272 {
04273 int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
04274 2 - 2*LINK_SIZE;
04275 if ((double)repeat_max *
04276 (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
04277 > (double)INT_MAX ||
04278 OFLOW_MAX - *lengthptr < delta)
04279 {
04280 *errorcodeptr = ERR20;
04281 goto FAILED;
04282 }
04283 *lengthptr += delta;
04284 }
04285
04286
04287
04288 else for (i = repeat_max - 1; i >= 0; i--)
04289 {
04290 uschar *hc;
04291 uschar *this_hwm = cd->hwm;
04292
04293 *code++ = OP_BRAZERO + repeat_type;
04294
04295
04296
04297
04298 if (i != 0)
04299 {
04300 int offset;
04301 *code++ = OP_BRA;
04302 offset = (bralink == NULL)? 0 : code - bralink;
04303 bralink = code;
04304 PUTINC(code, 0, offset);
04305 }
04306
04307 memcpy(code, previous, len);
04308 for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
04309 {
04310 PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
04311 cd->hwm += LINK_SIZE;
04312 }
04313 save_hwm = this_hwm;
04314 code += len;
04315 }
04316
04317
04318
04319
04320 while (bralink != NULL)
04321 {
04322 int oldlinkoffset;
04323 int offset = code - bralink + 1;
04324 uschar *bra = code - offset;
04325 oldlinkoffset = GET(bra, 1);
04326 bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
04327 *code++ = OP_KET;
04328 PUTINC(code, 0, offset);
04329 PUT(bra, 1, offset);
04330 }
04331 }
04332
04333
04334
04335
04336
04337
04338
04339
04340
04341
04342
04343
04344 else
04345 {
04346 uschar *ketcode = code - ketoffset;
04347 uschar *bracode = ketcode - GET(ketcode, 1);
04348 *ketcode = OP_KETRMAX + repeat_type;
04349 if (lengthptr == NULL && *bracode != OP_ONCE)
04350 {
04351 uschar *scode = bracode;
04352 do
04353 {
04354 if (could_be_empty_branch(scode, ketcode, utf8))
04355 {
04356 *bracode += OP_SBRA - OP_BRA;
04357 break;
04358 }
04359 scode += GET(scode, 1);
04360 }
04361 while (*scode == OP_ALT);
04362 }
04363 }
04364 }
04365
04366
04367
04368
04369
04370
04371 else if (*previous == OP_FAIL) goto END_REPEAT;
04372
04373
04374
04375 else
04376 {
04377 *errorcodeptr = ERR11;
04378 goto FAILED;
04379 }
04380
04381
04382
04383
04384
04385
04386
04387
04388
04389
04390
04391
04392
04393
04394 if (possessive_quantifier)
04395 {
04396 int len;
04397
04398 if (*tempcode == OP_TYPEEXACT)
04399 tempcode += _pcre_OP_lengths[*tempcode] +
04400 ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);
04401
04402 else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
04403 {
04404 tempcode += _pcre_OP_lengths[*tempcode];
04405 #ifdef SUPPORT_UTF8
04406 if (utf8 && tempcode[-1] >= 0xc0)
04407 tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];
04408 #endif
04409 }
04410
04411 len = code - tempcode;
04412 if (len > 0) switch (*tempcode)
04413 {
04414 case OP_STAR: *tempcode = OP_POSSTAR; break;
04415 case OP_PLUS: *tempcode = OP_POSPLUS; break;
04416 case OP_QUERY: *tempcode = OP_POSQUERY; break;
04417 case OP_UPTO: *tempcode = OP_POSUPTO; break;
04418
04419 case OP_TYPESTAR: *tempcode = OP_TYPEPOSSTAR; break;
04420 case OP_TYPEPLUS: *tempcode = OP_TYPEPOSPLUS; break;
04421 case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break;
04422 case OP_TYPEUPTO: *tempcode = OP_TYPEPOSUPTO; break;
04423
04424 case OP_NOTSTAR: *tempcode = OP_NOTPOSSTAR; break;
04425 case OP_NOTPLUS: *tempcode = OP_NOTPOSPLUS; break;
04426 case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break;
04427 case OP_NOTUPTO: *tempcode = OP_NOTPOSUPTO; break;
04428
04429 default:
04430 memmove(tempcode + 1+LINK_SIZE, tempcode, len);
04431 code += 1 + LINK_SIZE;
04432 len += 1 + LINK_SIZE;
04433 tempcode[0] = OP_ONCE;
04434 *code++ = OP_KET;
04435 PUTINC(code, 0, len);
04436 PUT(tempcode, 1, len);
04437 break;
04438 }
04439 }
04440
04441
04442
04443
04444
04445 END_REPEAT:
04446 previous = NULL;
04447 cd->req_varyopt |= reqvary;
04448 break;
04449
04450
04451
04452
04453
04454
04455
04456 case CHAR_LEFT_PARENTHESIS:
04457 newoptions = options;
04458 skipbytes = 0;
04459 bravalue = OP_CBRA;
04460 save_hwm = cd->hwm;
04461 reset_bracount = FALSE;
04462
04463
04464
04465 if (*(++ptr) == CHAR_ASTERISK && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
04466 {
04467 int i, namelen;
04468 const char *vn = verbnames;
04469 const uschar *name = ++ptr;
04470 previous = NULL;
04471 while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
04472 if (*ptr == CHAR_COLON)
04473 {
04474 *errorcodeptr = ERR59;
04475 goto FAILED;
04476 }
04477 if (*ptr != CHAR_RIGHT_PARENTHESIS)
04478 {
04479 *errorcodeptr = ERR60;
04480 goto FAILED;
04481 }
04482 namelen = ptr - name;
04483 for (i = 0; i < verbcount; i++)
04484 {
04485 if (namelen == verbs[i].len &&
04486 strncmp((char *)name, vn, namelen) == 0)
04487 {
04488
04489
04490 if (verbs[i].op == OP_ACCEPT)
04491 {
04492 open_capitem *oc;
04493 cd->had_accept = TRUE;
04494 for (oc = cd->open_caps; oc != NULL; oc = oc->next)
04495 {
04496 *code++ = OP_CLOSE;
04497 PUT2INC(code, 0, oc->number);
04498 }
04499 }
04500 *code++ = verbs[i].op;
04501 break;
04502 }
04503 vn += verbs[i].len + 1;
04504 }
04505 if (i < verbcount) continue;
04506 *errorcodeptr = ERR60;
04507 goto FAILED;
04508 }
04509
04510
04511
04512
04513 else if (*ptr == CHAR_QUESTION_MARK)
04514 {
04515 int i, set, unset, namelen;
04516 int *optset;
04517 const uschar *name;
04518 uschar *slot;
04519
04520 switch (*(++ptr))
04521 {
04522 case CHAR_NUMBER_SIGN:
04523 ptr++;
04524 while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
04525 if (*ptr == 0)
04526 {
04527 *errorcodeptr = ERR18;
04528 goto FAILED;
04529 }
04530 continue;
04531
04532
04533
04534 case CHAR_VERTICAL_LINE:
04535 reset_bracount = TRUE;
04536
04537
04538
04539 case CHAR_COLON:
04540 bravalue = OP_BRA;
04541 ptr++;
04542 break;
04543
04544
04545
04546 case CHAR_LEFT_PARENTHESIS:
04547 bravalue = OP_COND;
04548
04549
04550
04551
04552
04553
04554
04555
04556
04557
04558
04559
04560
04561
04562
04563
04564
04565
04566 if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
04567 ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
04568 break;
04569
04570
04571
04572
04573 code[1+LINK_SIZE] = OP_CREF;
04574 skipbytes = 3;
04575 refsign = -1;
04576
04577
04578
04579 if (ptr[1] == CHAR_R && ptr[2] == CHAR_AMPERSAND)
04580 {
04581 terminator = -1;
04582 ptr += 2;
04583 code[1+LINK_SIZE] = OP_RREF;
04584 }
04585
04586
04587
04588
04589 else if (ptr[1] == CHAR_LESS_THAN_SIGN)
04590 {
04591 terminator = CHAR_GREATER_THAN_SIGN;
04592 ptr++;
04593 }
04594 else if (ptr[1] == CHAR_APOSTROPHE)
04595 {
04596 terminator = CHAR_APOSTROPHE;
04597 ptr++;
04598 }
04599 else
04600 {
04601 terminator = 0;
04602 if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
04603 }
04604
04605
04606
04607 if ((cd->ctypes[ptr[1]] & ctype_word) == 0)
04608 {
04609 ptr += 1;
04610 *errorcodeptr = ERR28;
04611 goto FAILED;
04612 }
04613
04614
04615
04616 recno = 0;
04617 name = ++ptr;
04618 while ((cd->ctypes[*ptr] & ctype_word) != 0)
04619 {
04620 if (recno >= 0)
04621 recno = ((digitab[*ptr] & ctype_digit) != 0)?
04622 recno * 10 + *ptr - CHAR_0 : -1;
04623 ptr++;
04624 }
04625 namelen = ptr - name;
04626
04627 if ((terminator > 0 && *ptr++ != terminator) ||
04628 *ptr++ != CHAR_RIGHT_PARENTHESIS)
04629 {
04630 ptr--;
04631 *errorcodeptr = ERR26;
04632 goto FAILED;
04633 }
04634
04635
04636
04637 if (lengthptr != NULL) break;
04638
04639
04640
04641
04642
04643 if (refsign > 0)
04644 {
04645 if (recno <= 0)
04646 {
04647 *errorcodeptr = ERR58;
04648 goto FAILED;
04649 }
04650 recno = (refsign == CHAR_MINUS)?
04651 cd->bracount - recno + 1 : recno +cd->bracount;
04652 if (recno <= 0 || recno > cd->final_bracount)
04653 {
04654 *errorcodeptr = ERR15;
04655 goto FAILED;
04656 }
04657 PUT2(code, 2+LINK_SIZE, recno);
04658 break;
04659 }
04660
04661
04662
04663
04664
04665
04666
04667 slot = cd->name_table;
04668 for (i = 0; i < cd->names_found; i++)
04669 {
04670 if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;
04671 slot += cd->name_entry_size;
04672 }
04673
04674
04675
04676 if (i < cd->names_found)
04677 {
04678 recno = GET2(slot, 0);
04679 PUT2(code, 2+LINK_SIZE, recno);
04680 code[1+LINK_SIZE]++;
04681 }
04682
04683
04684
04685 else if ((i = find_parens(cd, name, namelen,
04686 (options & PCRE_EXTENDED) != 0)) > 0)
04687 {
04688 PUT2(code, 2+LINK_SIZE, i);
04689 code[1+LINK_SIZE]++;
04690 }
04691
04692
04693
04694
04695
04696
04697
04698 else if (terminator != 0)
04699 {
04700 *errorcodeptr = ERR15;
04701 goto FAILED;
04702 }
04703
04704
04705
04706
04707 else if (*name == CHAR_R)
04708 {
04709 recno = 0;
04710 for (i = 1; i < namelen; i++)
04711 {
04712 if ((digitab[name[i]] & ctype_digit) == 0)
04713 {
04714 *errorcodeptr = ERR15;
04715 goto FAILED;
04716 }
04717 recno = recno * 10 + name[i] - CHAR_0;
04718 }
04719 if (recno == 0) recno = RREF_ANY;
04720 code[1+LINK_SIZE] = OP_RREF;
04721 PUT2(code, 2+LINK_SIZE, recno);
04722 }
04723
04724
04725
04726
04727 else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)
04728 {
04729 code[1+LINK_SIZE] = OP_DEF;
04730 skipbytes = 1;
04731 }
04732
04733
04734
04735
04736 else if (recno > 0 && recno <= cd->final_bracount)
04737 {
04738 PUT2(code, 2+LINK_SIZE, recno);
04739 }
04740
04741
04742
04743 else
04744 {
04745 *errorcodeptr = (recno == 0)? ERR35: ERR15;
04746 goto FAILED;
04747 }
04748 break;
04749
04750
04751
04752 case CHAR_EQUALS_SIGN:
04753 bravalue = OP_ASSERT;
04754 ptr++;
04755 break;
04756
04757
04758
04759 case CHAR_EXCLAMATION_MARK:
04760 ptr++;
04761 if (*ptr == CHAR_RIGHT_PARENTHESIS)
04762 {
04763 *code++ = OP_FAIL;
04764 previous = NULL;
04765 continue;
04766 }
04767 bravalue = OP_ASSERT_NOT;
04768 break;
04769
04770
04771
04772 case CHAR_LESS_THAN_SIGN:
04773 switch (ptr[1])
04774 {
04775 case CHAR_EQUALS_SIGN:
04776 bravalue = OP_ASSERTBACK;
04777 ptr += 2;
04778 break;
04779
04780 case CHAR_EXCLAMATION_MARK:
04781 bravalue = OP_ASSERTBACK_NOT;
04782 ptr += 2;
04783 break;
04784
04785 default:
04786 if ((cd->ctypes[ptr[1]] & ctype_word) != 0) goto DEFINE_NAME;
04787 ptr++;
04788 *errorcodeptr = ERR24;
04789 goto FAILED;
04790 }
04791 break;
04792
04793
04794
04795 case CHAR_GREATER_THAN_SIGN:
04796 bravalue = OP_ONCE;
04797 ptr++;
04798 break;
04799
04800
04801
04802 case CHAR_C:
04803 previous_callout = code;
04804 after_manual_callout = 1;
04805 *code++ = OP_CALLOUT;
04806 {
04807 int n = 0;
04808 while ((digitab[*(++ptr)] & ctype_digit) != 0)
04809 n = n * 10 + *ptr - CHAR_0;
04810 if (*ptr != CHAR_RIGHT_PARENTHESIS)
04811 {
04812 *errorcodeptr = ERR39;
04813 goto FAILED;
04814 }
04815 if (n > 255)
04816 {
04817 *errorcodeptr = ERR38;
04818 goto FAILED;
04819 }
04820 *code++ = n;
04821 PUT(code, 0, ptr - cd->start_pattern + 1);
04822 PUT(code, LINK_SIZE, 0);
04823 code += 2 * LINK_SIZE;
04824 }
04825 previous = NULL;
04826 continue;
04827
04828
04829
04830 case CHAR_P:
04831 if (*(++ptr) == CHAR_EQUALS_SIGN ||
04832 *ptr == CHAR_GREATER_THAN_SIGN)
04833 {
04834 is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
04835 terminator = CHAR_RIGHT_PARENTHESIS;
04836 goto NAMED_REF_OR_RECURSE;
04837 }
04838 else if (*ptr != CHAR_LESS_THAN_SIGN)
04839 {
04840 *errorcodeptr = ERR41;
04841 goto FAILED;
04842 }
04843
04844
04845
04846
04847 DEFINE_NAME:
04848 case CHAR_APOSTROPHE:
04849 {
04850 terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
04851 CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
04852 name = ++ptr;
04853
04854 while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
04855 namelen = ptr - name;
04856
04857
04858
04859 if (lengthptr != NULL)
04860 {
04861 if (*ptr != terminator)
04862 {
04863 *errorcodeptr = ERR42;
04864 goto FAILED;
04865 }
04866 if (cd->names_found >= MAX_NAME_COUNT)
04867 {
04868 *errorcodeptr = ERR49;
04869 goto FAILED;
04870 }
04871 if (namelen + 3 > cd->name_entry_size)
04872 {
04873 cd->name_entry_size = namelen + 3;
04874 if (namelen > MAX_NAME_SIZE)
04875 {
04876 *errorcodeptr = ERR48;
04877 goto FAILED;
04878 }
04879 }
04880 }
04881
04882
04883
04884
04885
04886
04887
04888
04889
04890
04891
04892
04893
04894
04895 else
04896 {
04897 BOOL dupname = FALSE;
04898 slot = cd->name_table;
04899
04900 for (i = 0; i < cd->names_found; i++)
04901 {
04902 int crc = memcmp(name, slot+2, namelen);
04903 if (crc == 0)
04904 {
04905 if (slot[2+namelen] == 0)
04906 {
04907 if (GET2(slot, 0) != cd->bracount + 1 &&
04908 (options & PCRE_DUPNAMES) == 0)
04909 {
04910 *errorcodeptr = ERR43;
04911 goto FAILED;
04912 }
04913 else dupname = TRUE;
04914 }
04915 else crc = -1;
04916 }
04917
04918
04919
04920
04921
04922
04923 if (crc < 0)
04924 {
04925 memmove(slot + cd->name_entry_size, slot,
04926 (cd->names_found - i) * cd->name_entry_size);
04927 break;
04928 }
04929
04930
04931
04932 slot += cd->name_entry_size;
04933 }
04934
04935
04936
04937
04938 if (!dupname)
04939 {
04940 uschar *cslot = cd->name_table;
04941 for (i = 0; i < cd->names_found; i++)
04942 {
04943 if (cslot != slot)
04944 {
04945 if (GET2(cslot, 0) == cd->bracount + 1)
04946 {
04947 *errorcodeptr = ERR65;
04948 goto FAILED;
04949 }
04950 }
04951 else i--;
04952 cslot += cd->name_entry_size;
04953 }
04954 }
04955
04956 PUT2(slot, 0, cd->bracount + 1);
04957 memcpy(slot + 2, name, namelen);
04958 slot[2+namelen] = 0;
04959 }
04960 }
04961
04962
04963
04964
04965 cd->names_found++;
04966 ptr++;
04967 goto NUMBERED_GROUP;
04968
04969
04970
04971 case CHAR_AMPERSAND:
04972 terminator = CHAR_RIGHT_PARENTHESIS;
04973 is_recurse = TRUE;
04974
04975
04976
04977
04978
04979
04980
04981
04982 NAMED_REF_OR_RECURSE:
04983 name = ++ptr;
04984 while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
04985 namelen = ptr - name;
04986
04987
04988
04989
04990 if (lengthptr != NULL)
04991 {
04992 if (namelen == 0)
04993 {
04994 *errorcodeptr = ERR62;
04995 goto FAILED;
04996 }
04997 if (*ptr != terminator)
04998 {
04999 *errorcodeptr = ERR42;
05000 goto FAILED;
05001 }
05002 if (namelen > MAX_NAME_SIZE)
05003 {
05004 *errorcodeptr = ERR48;
05005 goto FAILED;
05006 }
05007 recno = 0;
05008 }
05009
05010
05011
05012
05013
05014
05015 else
05016 {
05017 slot = cd->name_table;
05018 for (i = 0; i < cd->names_found; i++)
05019 {
05020 if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
05021 slot[2+namelen] == 0)
05022 break;
05023 slot += cd->name_entry_size;
05024 }
05025
05026 if (i < cd->names_found)
05027 {
05028 recno = GET2(slot, 0);
05029 }
05030 else if ((recno =
05031 find_parens(cd, name, namelen,
05032 (options & PCRE_EXTENDED) != 0)) <= 0)
05033 {
05034 *errorcodeptr = ERR15;
05035 goto FAILED;
05036 }
05037 }
05038
05039
05040
05041
05042 if (is_recurse) goto HANDLE_RECURSION;
05043 else goto HANDLE_REFERENCE;
05044
05045
05046
05047 case CHAR_R:
05048 ptr++;
05049
05050
05051
05052
05053 case CHAR_MINUS: case CHAR_PLUS:
05054 case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
05055 case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
05056 {
05057 const uschar *called;
05058 terminator = CHAR_RIGHT_PARENTHESIS;
05059
05060
05061
05062
05063
05064
05065
05066 HANDLE_NUMERICAL_RECURSION:
05067
05068 if ((refsign = *ptr) == CHAR_PLUS)
05069 {
05070 ptr++;
05071 if ((digitab[*ptr] & ctype_digit) == 0)
05072 {
05073 *errorcodeptr = ERR63;
05074 goto FAILED;
05075 }
05076 }
05077 else if (refsign == CHAR_MINUS)
05078 {
05079 if ((digitab[ptr[1]] & ctype_digit) == 0)
05080 goto OTHER_CHAR_AFTER_QUERY;
05081 ptr++;
05082 }
05083
05084 recno = 0;
05085 while((digitab[*ptr] & ctype_digit) != 0)
05086 recno = recno * 10 + *ptr++ - CHAR_0;
05087
05088 if (*ptr != terminator)
05089 {
05090 *errorcodeptr = ERR29;
05091 goto FAILED;
05092 }
05093
05094 if (refsign == CHAR_MINUS)
05095 {
05096 if (recno == 0)
05097 {
05098 *errorcodeptr = ERR58;
05099 goto FAILED;
05100 }
05101 recno = cd->bracount - recno + 1;
05102 if (recno <= 0)
05103 {
05104 *errorcodeptr = ERR15;
05105 goto FAILED;
05106 }
05107 }
05108 else if (refsign == CHAR_PLUS)
05109 {
05110 if (recno == 0)
05111 {
05112 *errorcodeptr = ERR58;
05113 goto FAILED;
05114 }
05115 recno += cd->bracount;
05116 }
05117
05118
05119
05120 HANDLE_RECURSION:
05121
05122 previous = code;
05123 called = cd->start_code;
05124
05125
05126
05127
05128
05129
05130
05131
05132 if (lengthptr == NULL)
05133 {
05134 *code = OP_END;
05135 if (recno != 0)
05136 called = _pcre_find_bracket(cd->start_code, utf8, recno);
05137
05138
05139
05140 if (called == NULL)
05141 {
05142 if (find_parens(cd, NULL, recno,
05143 (options & PCRE_EXTENDED) != 0) < 0)
05144 {
05145 *errorcodeptr = ERR15;
05146 goto FAILED;
05147 }
05148 called = cd->start_code + recno;
05149 PUTINC(cd->hwm, 0, code + 2 + LINK_SIZE - cd->start_code);
05150 }
05151
05152
05153
05154
05155
05156 else if (GET(called, 1) == 0 &&
05157 could_be_empty(called, code, bcptr, utf8))
05158 {
05159 *errorcodeptr = ERR40;
05160 goto FAILED;
05161 }
05162 }
05163
05164
05165
05166
05167
05168 *code = OP_ONCE;
05169 PUT(code, 1, 2 + 2*LINK_SIZE);
05170 code += 1 + LINK_SIZE;
05171
05172 *code = OP_RECURSE;
05173 PUT(code, 1, called - cd->start_code);
05174 code += 1 + LINK_SIZE;
05175
05176 *code = OP_KET;
05177 PUT(code, 1, 2 + 2*LINK_SIZE);
05178 code += 1 + LINK_SIZE;
05179
05180 length_prevgroup = 3 + 3*LINK_SIZE;
05181 }
05182
05183
05184
05185 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
05186 continue;
05187
05188
05189
05190 default:
05191 OTHER_CHAR_AFTER_QUERY:
05192 set = unset = 0;
05193 optset = &set;
05194
05195 while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
05196 {
05197 switch (*ptr++)
05198 {
05199 case CHAR_MINUS: optset = &unset; break;
05200
05201 case CHAR_J:
05202 *optset |= PCRE_DUPNAMES;
05203 cd->external_flags |= PCRE_JCHANGED;
05204 break;
05205
05206 case CHAR_i: *optset |= PCRE_CASELESS; break;
05207 case CHAR_m: *optset |= PCRE_MULTILINE; break;
05208 case CHAR_s: *optset |= PCRE_DOTALL; break;
05209 case CHAR_x: *optset |= PCRE_EXTENDED; break;
05210 case CHAR_U: *optset |= PCRE_UNGREEDY; break;
05211 case CHAR_X: *optset |= PCRE_EXTRA; break;
05212
05213 default: *errorcodeptr = ERR12;
05214 ptr--;
05215 goto FAILED;
05216 }
05217 }
05218
05219
05220
05221 newoptions = (options | set) & (~unset);
05222
05223
05224
05225
05226
05227
05228
05229
05230
05231
05232
05233
05234
05235
05236
05237
05238
05239
05240
05241
05242
05243
05244
05245 if (*ptr == CHAR_RIGHT_PARENTHESIS)
05246 {
05247 if (code == cd->start_code + 1 + LINK_SIZE &&
05248 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
05249 {
05250 cd->external_options = newoptions;
05251 }
05252 else
05253 {
05254 if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))
05255 {
05256 *code++ = OP_OPT;
05257 *code++ = newoptions & PCRE_IMS;
05258 }
05259 greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
05260 greedy_non_default = greedy_default ^ 1;
05261 req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
05262 }
05263
05264
05265
05266
05267
05268
05269 *optionsptr = options = newoptions;
05270 previous = NULL;
05271 continue;
05272 }
05273
05274
05275
05276
05277
05278
05279 bravalue = OP_BRA;
05280 ptr++;
05281 }
05282 }
05283
05284
05285
05286
05287
05288 else if ((options & PCRE_NO_AUTO_CAPTURE) != 0)
05289 {
05290 bravalue = OP_BRA;
05291 }
05292
05293
05294
05295 else
05296 {
05297 NUMBERED_GROUP:
05298 cd->bracount += 1;
05299 PUT2(code, 1+LINK_SIZE, cd->bracount);
05300 skipbytes = 2;
05301 }
05302
05303
05304
05305
05306
05307
05308
05309 previous = (bravalue >= OP_ONCE)? code : NULL;
05310 *code = bravalue;
05311 tempcode = code;
05312 tempreqvary = cd->req_varyopt;
05313 length_prevgroup = 0;
05314
05315 if (!compile_regex(
05316 newoptions,
05317 options & PCRE_IMS,
05318 &tempcode,
05319 &ptr,
05320 errorcodeptr,
05321 (bravalue == OP_ASSERTBACK ||
05322 bravalue == OP_ASSERTBACK_NOT),
05323 reset_bracount,
05324 skipbytes,
05325 &subfirstbyte,
05326 &subreqbyte,
05327 bcptr,
05328 cd,
05329 (lengthptr == NULL)? NULL :
05330 &length_prevgroup
05331 ))
05332 goto FAILED;
05333
05334
05335
05336
05337
05338
05339
05340
05341
05342
05343
05344 if (bravalue == OP_COND && lengthptr == NULL)
05345 {
05346 uschar *tc = code;
05347 int condcount = 0;
05348
05349 do {
05350 condcount++;
05351 tc += GET(tc,1);
05352 }
05353 while (*tc != OP_KET);
05354
05355
05356
05357
05358 if (code[LINK_SIZE+1] == OP_DEF)
05359 {
05360 if (condcount > 1)
05361 {
05362 *errorcodeptr = ERR54;
05363 goto FAILED;
05364 }
05365 bravalue = OP_DEF;
05366 }
05367
05368
05369
05370
05371
05372 else
05373 {
05374 if (condcount > 2)
05375 {
05376 *errorcodeptr = ERR27;
05377 goto FAILED;
05378 }
05379 if (condcount == 1) subfirstbyte = subreqbyte = REQ_NONE;
05380 }
05381 }
05382
05383
05384
05385 if (*ptr != CHAR_RIGHT_PARENTHESIS)
05386 {
05387 *errorcodeptr = ERR14;
05388 goto FAILED;
05389 }
05390
05391
05392
05393
05394
05395
05396 if (lengthptr != NULL)
05397 {
05398 if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE)
05399 {
05400 *errorcodeptr = ERR20;
05401 goto FAILED;
05402 }
05403 *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;
05404 *code++ = OP_BRA;
05405 PUTINC(code, 0, 1 + LINK_SIZE);
05406 *code++ = OP_KET;
05407 PUTINC(code, 0, 1 + LINK_SIZE);
05408 break;
05409 }
05410
05411
05412
05413 code = tempcode;
05414
05415
05416
05417
05418 if (bravalue == OP_DEF) break;
05419
05420
05421
05422
05423
05424
05425
05426
05427 zeroreqbyte = reqbyte;
05428 zerofirstbyte = firstbyte;
05429 groupsetfirstbyte = FALSE;
05430
05431 if (bravalue >= OP_ONCE)
05432 {
05433
05434
05435
05436
05437
05438
05439 if (firstbyte == REQ_UNSET)
05440 {
05441 if (subfirstbyte >= 0)
05442 {
05443 firstbyte = subfirstbyte;
05444 groupsetfirstbyte = TRUE;
05445 }
05446 else firstbyte = REQ_NONE;
05447 zerofirstbyte = REQ_NONE;
05448 }
05449
05450
05451
05452
05453
05454 else if (subfirstbyte >= 0 && subreqbyte < 0)
05455 subreqbyte = subfirstbyte | tempreqvary;
05456
05457
05458
05459
05460 if (subreqbyte >= 0) reqbyte = subreqbyte;
05461 }
05462
05463
05464
05465
05466
05467
05468
05469
05470
05471 else if (bravalue == OP_ASSERT && subreqbyte >= 0) reqbyte = subreqbyte;
05472 break;
05473
05474
05475
05476
05477
05478
05479
05480
05481
05482
05483 case CHAR_BACKSLASH:
05484 tempptr = ptr;
05485 c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);
05486 if (*errorcodeptr != 0) goto FAILED;
05487
05488 if (c < 0)
05489 {
05490 if (-c == ESC_Q)
05491 {
05492 if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
05493 ptr += 2;
05494 else inescq = TRUE;
05495 continue;
05496 }
05497
05498 if (-c == ESC_E) continue;
05499
05500
05501
05502
05503 if (firstbyte == REQ_UNSET && -c > ESC_b && -c < ESC_Z)
05504 firstbyte = REQ_NONE;
05505
05506
05507
05508 zerofirstbyte = firstbyte;
05509 zeroreqbyte = reqbyte;
05510
05511
05512
05513
05514
05515
05516
05517
05518 if (-c == ESC_g)
05519 {
05520 const uschar *p;
05521 save_hwm = cd->hwm;
05522 terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
05523 CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
05524
05525
05526
05527
05528
05529
05530 skipbytes = 0;
05531 reset_bracount = FALSE;
05532
05533
05534
05535 if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
05536 {
05537 BOOL isnumber = TRUE;
05538 for (p = ptr + 1; *p != 0 && *p != terminator; p++)
05539 {
05540 if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE;
05541 if ((cd->ctypes[*p] & ctype_word) == 0) break;
05542 }
05543 if (*p != terminator)
05544 {
05545 *errorcodeptr = ERR57;
05546 break;
05547 }
05548 if (isnumber)
05549 {
05550 ptr++;
05551 goto HANDLE_NUMERICAL_RECURSION;
05552 }
05553 is_recurse = TRUE;
05554 goto NAMED_REF_OR_RECURSE;
05555 }
05556
05557
05558
05559 p = ptr + 2;
05560 while ((digitab[*p] & ctype_digit) != 0) p++;
05561 if (*p != terminator)
05562 {
05563 *errorcodeptr = ERR57;
05564 break;
05565 }
05566 ptr++;
05567 goto HANDLE_NUMERICAL_RECURSION;
05568 }
05569
05570
05571
05572
05573 if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN ||
05574 ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET))
05575 {
05576 is_recurse = FALSE;
05577 terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
05578 CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
05579 CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
05580 goto NAMED_REF_OR_RECURSE;
05581 }
05582
05583
05584
05585
05586
05587 if (-c >= ESC_REF)
05588 {
05589 recno = -c - ESC_REF;
05590
05591 HANDLE_REFERENCE:
05592 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
05593 previous = code;
05594 *code++ = OP_REF;
05595 PUT2INC(code, 0, recno);
05596 cd->backref_map |= (recno < 32)? (1 << recno) : 1;
05597 if (recno > cd->top_backref) cd->top_backref = recno;
05598 }
05599
05600
05601
05602 #ifdef SUPPORT_UCP
05603 else if (-c == ESC_P || -c == ESC_p)
05604 {
05605 BOOL negated;
05606 int pdata;
05607 int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
05608 if (ptype < 0) goto FAILED;
05609 previous = code;
05610 *code++ = ((-c == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
05611 *code++ = ptype;
05612 *code++ = pdata;
05613 }
05614 #else
05615
05616
05617
05618
05619 else if (-c == ESC_X || -c == ESC_P || -c == ESC_p)
05620 {
05621 *errorcodeptr = ERR45;
05622 goto FAILED;
05623 }
05624 #endif
05625
05626
05627
05628
05629 else
05630 {
05631 previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
05632 *code++ = -c;
05633 }
05634 continue;
05635 }
05636
05637
05638
05639
05640
05641 #ifdef SUPPORT_UTF8
05642 if (utf8 && c > 127)
05643 mclength = _pcre_ord2utf8(c, mcbuffer);
05644 else
05645 #endif
05646
05647 {
05648 mcbuffer[0] = c;
05649 mclength = 1;
05650 }
05651 goto ONE_CHAR;
05652
05653
05654
05655
05656
05657
05658
05659 default:
05660 NORMAL_CHAR:
05661 mclength = 1;
05662 mcbuffer[0] = c;
05663
05664 #ifdef SUPPORT_UTF8
05665 if (utf8 && c >= 0xc0)
05666 {
05667 while ((ptr[1] & 0xc0) == 0x80)
05668 mcbuffer[mclength++] = *(++ptr);
05669 }
05670 #endif
05671
05672
05673
05674
05675 ONE_CHAR:
05676 previous = code;
05677 *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR;
05678 for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
05679
05680
05681
05682 if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
05683 cd->external_flags |= PCRE_HASCRORLF;
05684
05685
05686
05687
05688
05689
05690 if (firstbyte == REQ_UNSET)
05691 {
05692 zerofirstbyte = REQ_NONE;
05693 zeroreqbyte = reqbyte;
05694
05695
05696
05697
05698 if (mclength == 1 || req_caseopt == 0)
05699 {
05700 firstbyte = mcbuffer[0] | req_caseopt;
05701 if (mclength != 1) reqbyte = code[-1] | cd->req_varyopt;
05702 }
05703 else firstbyte = reqbyte = REQ_NONE;
05704 }
05705
05706
05707
05708
05709 else
05710 {
05711 zerofirstbyte = firstbyte;
05712 zeroreqbyte = reqbyte;
05713 if (mclength == 1 || req_caseopt == 0)
05714 reqbyte = code[-1] | req_caseopt | cd->req_varyopt;
05715 }
05716
05717 break;
05718 }
05719 }
05720
05721
05722
05723
05724
05725
05726 FAILED:
05727 *ptrptr = ptr;
05728 return FALSE;
05729 }
05730
05731
05732
05733
05734
05735
05736
05737
05738
05739
05740
05741
05742
05743
05744
05745
05746
05747
05748
05749
05750
05751
05752
05753
05754
05755
05756
05757
05758
05759
05760
05761
05762
05763
05764
05765
05766
05767
05768
05769 static BOOL
05770 compile_regex(int options, int oldims, uschar **codeptr, const uschar **ptrptr,
05771 int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
05772 int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd,
05773 int *lengthptr)
05774 {
05775 const uschar *ptr = *ptrptr;
05776 uschar *code = *codeptr;
05777 uschar *last_branch = code;
05778 uschar *start_bracket = code;
05779 uschar *reverse_count = NULL;
05780 open_capitem capitem;
05781 int capnumber = 0;
05782 int firstbyte, reqbyte;
05783 int branchfirstbyte, branchreqbyte;
05784 int length;
05785 int orig_bracount;
05786 int max_bracount;
05787 branch_chain bc;
05788
05789 bc.outer = bcptr;
05790 bc.current = code;
05791
05792 firstbyte = reqbyte = REQ_UNSET;
05793
05794
05795
05796
05797
05798
05799
05800
05801 length = 2 + 2*LINK_SIZE + skipbytes;
05802
05803
05804
05805
05806
05807
05808
05809
05810
05811 if (*code == OP_CBRA)
05812 {
05813 capnumber = GET2(code, 1 + LINK_SIZE);
05814 capitem.number = capnumber;
05815 capitem.next = cd->open_caps;
05816 cd->open_caps = &capitem;
05817 }
05818
05819
05820
05821 PUT(code, 1, 0);
05822 code += 1 + LINK_SIZE + skipbytes;
05823
05824
05825
05826 orig_bracount = max_bracount = cd->bracount;
05827 for (;;)
05828 {
05829
05830
05831
05832 if (reset_bracount) cd->bracount = orig_bracount;
05833
05834
05835
05836 if ((options & PCRE_IMS) != oldims)
05837 {
05838 *code++ = OP_OPT;
05839 *code++ = options & PCRE_IMS;
05840 length += 2;
05841 }
05842
05843
05844
05845 if (lookbehind)
05846 {
05847 *code++ = OP_REVERSE;
05848 reverse_count = code;
05849 PUTINC(code, 0, 0);
05850 length += 1 + LINK_SIZE;
05851 }
05852
05853
05854
05855
05856 if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstbyte,
05857 &branchreqbyte, &bc, cd, (lengthptr == NULL)? NULL : &length))
05858 {
05859 *ptrptr = ptr;
05860 return FALSE;
05861 }
05862
05863
05864
05865
05866 if (cd->bracount > max_bracount) max_bracount = cd->bracount;
05867
05868
05869
05870 if (lengthptr == NULL)
05871 {
05872
05873
05874
05875 if (*last_branch != OP_ALT)
05876 {
05877 firstbyte = branchfirstbyte;
05878 reqbyte = branchreqbyte;
05879 }
05880
05881
05882
05883
05884
05885
05886 else
05887 {
05888
05889
05890
05891
05892 if (firstbyte >= 0 && firstbyte != branchfirstbyte)
05893 {
05894 if (reqbyte < 0) reqbyte = firstbyte;
05895 firstbyte = REQ_NONE;
05896 }
05897
05898
05899
05900
05901 if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)
05902 branchreqbyte = branchfirstbyte;
05903
05904
05905
05906 if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY))
05907 reqbyte = REQ_NONE;
05908 else reqbyte |= branchreqbyte;
05909 }
05910
05911
05912
05913
05914
05915
05916
05917
05918
05919 if (lookbehind)
05920 {
05921 int fixed_length;
05922 *code = OP_END;
05923 fixed_length = find_fixedlength(last_branch, options, FALSE, cd);
05924 DPRINTF(("fixed length = %d\n", fixed_length));
05925 if (fixed_length == -3)
05926 {
05927 cd->check_lookbehind = TRUE;
05928 }
05929 else if (fixed_length < 0)
05930 {
05931 *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;
05932 *ptrptr = ptr;
05933 return FALSE;
05934 }
05935 else { PUT(reverse_count, 0, fixed_length); }
05936 }
05937 }
05938
05939
05940
05941
05942
05943
05944
05945
05946
05947
05948 if (*ptr != CHAR_VERTICAL_LINE)
05949 {
05950 if (lengthptr == NULL)
05951 {
05952 int branch_length = code - last_branch;
05953 do
05954 {
05955 int prev_length = GET(last_branch, 1);
05956 PUT(last_branch, 1, branch_length);
05957 branch_length = prev_length;
05958 last_branch -= branch_length;
05959 }
05960 while (branch_length > 0);
05961 }
05962
05963
05964
05965 if (capnumber > 0) cd->open_caps = cd->open_caps->next;
05966
05967
05968
05969 *code = OP_KET;
05970 PUT(code, 1, code - start_bracket);
05971 code += 1 + LINK_SIZE;
05972
05973
05974
05975 if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
05976 {
05977 *code++ = OP_OPT;
05978 *code++ = oldims;
05979 length += 2;
05980 }
05981
05982
05983
05984 cd->bracount = max_bracount;
05985
05986
05987
05988 *codeptr = code;
05989 *ptrptr = ptr;
05990 *firstbyteptr = firstbyte;
05991 *reqbyteptr = reqbyte;
05992 if (lengthptr != NULL)
05993 {
05994 if (OFLOW_MAX - *lengthptr < length)
05995 {
05996 *errorcodeptr = ERR20;
05997 return FALSE;
05998 }
05999 *lengthptr += length;
06000 }
06001 return TRUE;
06002 }
06003
06004
06005
06006
06007
06008
06009
06010
06011
06012
06013 if (lengthptr != NULL)
06014 {
06015 code = *codeptr + 1 + LINK_SIZE + skipbytes;
06016 length += 1 + LINK_SIZE;
06017 }
06018 else
06019 {
06020 *code = OP_ALT;
06021 PUT(code, 1, code - last_branch);
06022 bc.current = last_branch = code;
06023 code += 1 + LINK_SIZE;
06024 }
06025
06026 ptr++;
06027 }
06028
06029 }
06030
06031
06032
06033
06034
06035
06036
06037
06038
06039
06040
06041
06042
06043
06044
06045
06046
06047
06048
06049
06050
06051
06052
06053
06054
06055
06056
06057
06058
06059
06060
06061
06062
06063
06064
06065
06066
06067
06068
06069
06070
06071
06072 static BOOL
06073 is_anchored(register const uschar *code, int *options, unsigned int bracket_map,
06074 unsigned int backref_map)
06075 {
06076 do {
06077 const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],
06078 options, PCRE_MULTILINE, FALSE);
06079 register int op = *scode;
06080
06081
06082
06083 if (op == OP_BRA)
06084 {
06085 if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;
06086 }
06087
06088
06089
06090 else if (op == OP_CBRA)
06091 {
06092 int n = GET2(scode, 1+LINK_SIZE);
06093 int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
06094 if (!is_anchored(scode, options, new_map, backref_map)) return FALSE;
06095 }
06096
06097
06098
06099 else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
06100 {
06101 if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;
06102 }
06103
06104
06105
06106
06107 else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
06108 op == OP_TYPEPOSSTAR))
06109 {
06110 if (scode[1] != OP_ALLANY || (bracket_map & backref_map) != 0)
06111 return FALSE;
06112 }
06113
06114
06115
06116 else if (op != OP_SOD && op != OP_SOM &&
06117 ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
06118 return FALSE;
06119 code += GET(code, 1);
06120 }
06121 while (*code == OP_ALT);
06122 return TRUE;
06123 }
06124
06125
06126
06127
06128
06129
06130
06131
06132
06133
06134
06135
06136
06137
06138
06139
06140
06141
06142
06143
06144
06145
06146
06147
06148 static BOOL
06149 is_startline(const uschar *code, unsigned int bracket_map,
06150 unsigned int backref_map)
06151 {
06152 do {
06153 const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],
06154 NULL, 0, FALSE);
06155 register int op = *scode;
06156
06157
06158
06159
06160
06161
06162 if (op == OP_COND)
06163 {
06164 scode += 1 + LINK_SIZE;
06165 if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
06166 switch (*scode)
06167 {
06168 case OP_CREF:
06169 case OP_NCREF:
06170 case OP_RREF:
06171 case OP_NRREF:
06172 case OP_DEF:
06173 return FALSE;
06174
06175 default:
06176 if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
06177 do scode += GET(scode, 1); while (*scode == OP_ALT);
06178 scode += 1 + LINK_SIZE;
06179 break;
06180 }
06181 scode = first_significant_code(scode, NULL, 0, FALSE);
06182 op = *scode;
06183 }
06184
06185
06186
06187 if (op == OP_BRA)
06188 {
06189 if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
06190 }
06191
06192
06193
06194 else if (op == OP_CBRA)
06195 {
06196 int n = GET2(scode, 1+LINK_SIZE);
06197 int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
06198 if (!is_startline(scode, new_map, backref_map)) return FALSE;
06199 }
06200
06201
06202
06203 else if (op == OP_ASSERT || op == OP_ONCE)
06204 {
06205 if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
06206 }
06207
06208
06209
06210
06211 else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)
06212 {
06213 if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;
06214 }
06215
06216
06217
06218 else if (op != OP_CIRC) return FALSE;
06219
06220
06221
06222 code += GET(code, 1);
06223 }
06224 while (*code == OP_ALT);
06225 return TRUE;
06226 }
06227
06228
06229
06230
06231
06232
06233
06234
06235
06236
06237
06238
06239
06240
06241
06242
06243
06244
06245
06246
06247
06248
06249
06250 static int
06251 find_firstassertedchar(const uschar *code, int *options, BOOL inassert)
06252 {
06253 register int c = -1;
06254 do {
06255 int d;
06256 const uschar *scode =
06257 first_significant_code(code + 1+LINK_SIZE, options, PCRE_CASELESS, TRUE);
06258 register int op = *scode;
06259
06260 switch(op)
06261 {
06262 default:
06263 return -1;
06264
06265 case OP_BRA:
06266 case OP_CBRA:
06267 case OP_ASSERT:
06268 case OP_ONCE:
06269 case OP_COND:
06270 if ((d = find_firstassertedchar(scode, options, op == OP_ASSERT)) < 0)
06271 return -1;
06272 if (c < 0) c = d; else if (c != d) return -1;
06273 break;
06274
06275 case OP_EXACT:
06276 scode += 2;
06277
06278 case OP_CHAR:
06279 case OP_CHARNC:
06280 case OP_PLUS:
06281 case OP_MINPLUS:
06282 case OP_POSPLUS:
06283 if (!inassert) return -1;
06284 if (c < 0)
06285 {
06286 c = scode[1];
06287 if ((*options & PCRE_CASELESS) != 0) c |= REQ_CASELESS;
06288 }
06289 else if (c != scode[1]) return -1;
06290 break;
06291 }
06292
06293 code += GET(code, 1);
06294 }
06295 while (*code == OP_ALT);
06296 return c;
06297 }
06298
06299
06300
06301
06302
06303
06304
06305
06306
06307
06308
06309
06310
06311
06312
06313
06314
06315
06316
06317
06318
06319
06320
06321
06322
06323 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
06324 pcre_compile(const char *pattern, int options, const char **errorptr,
06325 int *erroroffset, const unsigned char *tables)
06326 {
06327 return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
06328 }
06329
06330
06331 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
06332 pcre_compile2(const char *pattern, int options, int *errorcodeptr,
06333 const char **errorptr, int *erroroffset, const unsigned char *tables)
06334 {
06335 real_pcre *re;
06336 int length = 1;
06337 int firstbyte, reqbyte, newline;
06338 int errorcode = 0;
06339 int skipatstart = 0;
06340 BOOL utf8 = (options & PCRE_UTF8) != 0;
06341 size_t size;
06342 uschar *code;
06343 const uschar *codestart;
06344 const uschar *ptr;
06345 compile_data compile_block;
06346 compile_data *cd = &compile_block;
06347
06348
06349
06350
06351
06352
06353
06354 uschar cworkspace[COMPILE_WORK_SIZE];
06355
06356
06357
06358 ptr = (const uschar *)pattern;
06359
06360
06361
06362
06363
06364 if (errorptr == NULL)
06365 {
06366 if (errorcodeptr != NULL) *errorcodeptr = 99;
06367 return NULL;
06368 }
06369
06370 *errorptr = NULL;
06371 if (errorcodeptr != NULL) *errorcodeptr = ERR0;
06372
06373
06374
06375 if (erroroffset == NULL)
06376 {
06377 errorcode = ERR16;
06378 goto PCRE_EARLY_ERROR_RETURN2;
06379 }
06380
06381 *erroroffset = 0;
06382
06383
06384
06385 if (tables == NULL) tables = _pcre_default_tables;
06386 cd->lcc = tables + lcc_offset;
06387 cd->fcc = tables + fcc_offset;
06388 cd->cbits = tables + cbits_offset;
06389 cd->ctypes = tables + ctypes_offset;
06390
06391
06392
06393 if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
06394 {
06395 errorcode = ERR17;
06396 goto PCRE_EARLY_ERROR_RETURN;
06397 }
06398
06399
06400
06401
06402 while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
06403 ptr[skipatstart+1] == CHAR_ASTERISK)
06404 {
06405 int newnl = 0;
06406 int newbsr = 0;
06407
06408 if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)
06409 { skipatstart += 7; options |= PCRE_UTF8; continue; }
06410
06411 if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
06412 { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
06413 else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3) == 0)
06414 { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
06415 else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5) == 0)
06416 { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
06417 else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)
06418 { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
06419 else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)
06420 { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
06421
06422 else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
06423 { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
06424 else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
06425 { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
06426
06427 if (newnl != 0)
06428 options = (options & ~PCRE_NEWLINE_BITS) | newnl;
06429 else if (newbsr != 0)
06430 options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;
06431 else break;
06432 }
06433
06434
06435
06436 #ifdef SUPPORT_UTF8
06437 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
06438 (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
06439 {
06440 errorcode = ERR44;
06441 goto PCRE_EARLY_ERROR_RETURN2;
06442 }
06443 #else
06444 if (utf8)
06445 {
06446 errorcode = ERR32;
06447 goto PCRE_EARLY_ERROR_RETURN;
06448 }
06449 #endif
06450
06451
06452
06453 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
06454 {
06455 case 0:
06456 case PCRE_BSR_ANYCRLF:
06457 case PCRE_BSR_UNICODE:
06458 break;
06459 default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
06460 }
06461
06462
06463
06464
06465
06466 switch (options & PCRE_NEWLINE_BITS)
06467 {
06468 case 0: newline = NEWLINE; break;
06469 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
06470 case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
06471 case PCRE_NEWLINE_CR+
06472 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
06473 case PCRE_NEWLINE_ANY: newline = -1; break;
06474 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
06475 default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
06476 }
06477
06478 if (newline == -2)
06479 {
06480 cd->nltype = NLTYPE_ANYCRLF;
06481 }
06482 else if (newline < 0)
06483 {
06484 cd->nltype = NLTYPE_ANY;
06485 }
06486 else
06487 {
06488 cd->nltype = NLTYPE_FIXED;
06489 if (newline > 255)
06490 {
06491 cd->nllen = 2;
06492 cd->nl[0] = (newline >> 8) & 255;
06493 cd->nl[1] = newline & 255;
06494 }
06495 else
06496 {
06497 cd->nllen = 1;
06498 cd->nl[0] = newline;
06499 }
06500 }
06501
06502
06503
06504
06505
06506 cd->top_backref = 0;
06507 cd->backref_map = 0;
06508
06509
06510
06511 DPRINTF(("------------------------------------------------------------------\n"));
06512 DPRINTF(("%s\n", pattern));
06513
06514
06515
06516
06517
06518
06519
06520
06521 cd->bracount = cd->final_bracount = 0;
06522 cd->names_found = 0;
06523 cd->name_entry_size = 0;
06524 cd->name_table = NULL;
06525 cd->start_workspace = cworkspace;
06526 cd->start_code = cworkspace;
06527 cd->hwm = cworkspace;
06528 cd->start_pattern = (const uschar *)pattern;
06529 cd->end_pattern = (const uschar *)(pattern + strlen(pattern));
06530 cd->req_varyopt = 0;
06531 cd->external_options = options;
06532 cd->external_flags = 0;
06533 cd->open_caps = NULL;
06534
06535
06536
06537
06538
06539
06540
06541 ptr += skipatstart;
06542 code = cworkspace;
06543 *code = OP_BRA;
06544 (void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,
06545 &code, &ptr, &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd,
06546 &length);
06547 if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
06548
06549 DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
06550 cd->hwm - cworkspace));
06551
06552 if (length > MAX_PATTERN_SIZE)
06553 {
06554 errorcode = ERR20;
06555 goto PCRE_EARLY_ERROR_RETURN;
06556 }
06557
06558
06559
06560
06561
06562
06563 size = length + sizeof(real_pcre) + cd->names_found * (cd->name_entry_size + 3);
06564 re = (real_pcre *)(pcre_malloc)(size);
06565
06566 if (re == NULL)
06567 {
06568 errorcode = ERR21;
06569 goto PCRE_EARLY_ERROR_RETURN;
06570 }
06571
06572
06573
06574
06575
06576
06577
06578 re->magic_number = MAGIC_NUMBER;
06579 re->size = size;
06580 re->options = cd->external_options;
06581 re->flags = cd->external_flags;
06582 re->dummy1 = 0;
06583 re->first_byte = 0;
06584 re->req_byte = 0;
06585 re->name_table_offset = sizeof(real_pcre);
06586 re->name_entry_size = cd->name_entry_size;
06587 re->name_count = cd->names_found;
06588 re->ref_count = 0;
06589 re->tables = (tables == _pcre_default_tables)? NULL : tables;
06590 re->nullpad = NULL;
06591
06592
06593
06594
06595
06596
06597
06598
06599 cd->final_bracount = cd->bracount;
06600 cd->bracount = 0;
06601 cd->names_found = 0;
06602 cd->name_table = (uschar *)re + re->name_table_offset;
06603 codestart = cd->name_table + re->name_entry_size * re->name_count;
06604 cd->start_code = codestart;
06605 cd->hwm = cworkspace;
06606 cd->req_varyopt = 0;
06607 cd->had_accept = FALSE;
06608 cd->check_lookbehind = FALSE;
06609 cd->open_caps = NULL;
06610
06611
06612
06613
06614
06615 ptr = (const uschar *)pattern + skipatstart;
06616 code = (uschar *)codestart;
06617 *code = OP_BRA;
06618 (void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
06619 &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
06620 re->top_bracket = cd->bracount;
06621 re->top_backref = cd->top_backref;
06622 re->flags = cd->external_flags;
06623
06624 if (cd->had_accept) reqbyte = -1;
06625
06626
06627
06628 if (errorcode == 0 && *ptr != 0) errorcode = ERR22;
06629
06630
06631
06632
06633 *code++ = OP_END;
06634
06635 #ifndef DEBUG
06636 if (code - codestart > length) errorcode = ERR23;
06637 #endif
06638
06639
06640
06641 while (errorcode == 0 && cd->hwm > cworkspace)
06642 {
06643 int offset, recno;
06644 const uschar *groupptr;
06645 cd->hwm -= LINK_SIZE;
06646 offset = GET(cd->hwm, 0);
06647 recno = GET(codestart, offset);
06648 groupptr = _pcre_find_bracket(codestart, utf8, recno);
06649 if (groupptr == NULL) errorcode = ERR53;
06650 else PUT(((uschar *)codestart), offset, groupptr - codestart);
06651 }
06652
06653
06654
06655
06656 if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
06657
06658
06659
06660
06661
06662
06663
06664
06665
06666 if (cd->check_lookbehind)
06667 {
06668 uschar *cc = (uschar *)codestart;
06669
06670
06671
06672
06673
06674
06675 for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);
06676 cc != NULL;
06677 cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))
06678 {
06679 if (GET(cc, 1) == 0)
06680 {
06681 int fixed_length;
06682 uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
06683 int end_op = *be;
06684 *be = OP_END;
06685 fixed_length = find_fixedlength(cc, re->options, TRUE, cd);
06686 *be = end_op;
06687 DPRINTF(("fixed length = %d\n", fixed_length));
06688 if (fixed_length < 0)
06689 {
06690 errorcode = (fixed_length == -2)? ERR36 : ERR25;
06691 break;
06692 }
06693 PUT(cc, 1, fixed_length);
06694 }
06695 cc += 1 + LINK_SIZE;
06696 }
06697 }
06698
06699
06700
06701 if (errorcode != 0)
06702 {
06703 (pcre_free)(re);
06704 PCRE_EARLY_ERROR_RETURN:
06705 *erroroffset = ptr - (const uschar *)pattern;
06706 PCRE_EARLY_ERROR_RETURN2:
06707 *errorptr = find_error_text(errorcode);
06708 if (errorcodeptr != NULL) *errorcodeptr = errorcode;
06709 return NULL;
06710 }
06711
06712
06713
06714
06715
06716
06717
06718
06719
06720
06721
06722 if ((re->options & PCRE_ANCHORED) == 0)
06723 {
06724 int temp_options = re->options;
06725 if (is_anchored(codestart, &temp_options, 0, cd->backref_map))
06726 re->options |= PCRE_ANCHORED;
06727 else
06728 {
06729 if (firstbyte < 0)
06730 firstbyte = find_firstassertedchar(codestart, &temp_options, FALSE);
06731 if (firstbyte >= 0)
06732 {
06733 int ch = firstbyte & 255;
06734 re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&
06735 cd->fcc[ch] == ch)? ch : firstbyte;
06736 re->flags |= PCRE_FIRSTSET;
06737 }
06738 else if (is_startline(codestart, 0, cd->backref_map))
06739 re->flags |= PCRE_STARTLINE;
06740 }
06741 }
06742
06743
06744
06745
06746
06747 if (reqbyte >= 0 &&
06748 ((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0))
06749 {
06750 int ch = reqbyte & 255;
06751 re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&
06752 cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
06753 re->flags |= PCRE_REQCHSET;
06754 }
06755
06756
06757
06758
06759 #ifdef DEBUG_PCRE
06760
06761 printf("Length = %d top_bracket = %d top_backref = %d\n",
06762 length, re->top_bracket, re->top_backref);
06763
06764 printf("Options=%08x\n", re->options);
06765
06766 if ((re->flags & PCRE_FIRSTSET) != 0)
06767 {
06768 int ch = re->first_byte & 255;
06769 const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)?
06770 "" : " (caseless)";
06771 if (isprint(ch)) printf("First char = %c%s\n", ch, caseless);
06772 else printf("First char = \\x%02x%s\n", ch, caseless);
06773 }
06774
06775 if ((re->flags & PCRE_REQCHSET) != 0)
06776 {
06777 int ch = re->req_byte & 255;
06778 const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)?
06779 "" : " (caseless)";
06780 if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless);
06781 else printf("Req char = \\x%02x%s\n", ch, caseless);
06782 }
06783
06784 pcre_printint(re, stdout, TRUE);
06785
06786
06787
06788
06789 if (code - codestart > length)
06790 {
06791 (pcre_free)(re);
06792 *errorptr = find_error_text(ERR23);
06793 *erroroffset = ptr - (uschar *)pattern;
06794 if (errorcodeptr != NULL) *errorcodeptr = ERR23;
06795 return NULL;
06796 }
06797 #endif
06798
06799 return (pcre *)re;
06800 }
06801
06802