00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045 #ifdef HAVE_CONFIG_H
00046 #include "config.h"
00047 #else if defined(_WINDOWS)
00048 #include <spl/configwin32.h>
00049 #endif
00050
00051
00052 #include "pcre_internal.h"
00053
00054
00055
00056
00057 enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080 static int
00081 find_minlength(const uschar *code, const uschar *startcode, int options)
00082 {
00083 int length = -1;
00084 BOOL utf8 = (options & PCRE_UTF8) != 0;
00085 BOOL had_recurse = FALSE;
00086 register int branchlength = 0;
00087 register uschar *cc = (uschar *)code + 1 + LINK_SIZE;
00088
00089 if (*code == OP_CBRA || *code == OP_SCBRA) cc += 2;
00090
00091
00092
00093
00094 for (;;)
00095 {
00096 int d, min;
00097 uschar *cs, *ce;
00098 register int op = *cc;
00099
00100 switch (op)
00101 {
00102 case OP_CBRA:
00103 case OP_SCBRA:
00104 case OP_BRA:
00105 case OP_SBRA:
00106 case OP_ONCE:
00107 case OP_COND:
00108 case OP_SCOND:
00109 d = find_minlength(cc, startcode, options);
00110 if (d < 0) return d;
00111 branchlength += d;
00112 do cc += GET(cc, 1); while (*cc == OP_ALT);
00113 cc += 1 + LINK_SIZE;
00114 break;
00115
00116
00117
00118
00119
00120 case OP_ALT:
00121 case OP_KET:
00122 case OP_KETRMAX:
00123 case OP_KETRMIN:
00124 case OP_END:
00125 if (length < 0 || (!had_recurse && branchlength < length))
00126 length = branchlength;
00127 if (*cc != OP_ALT) return length;
00128 cc += 1 + LINK_SIZE;
00129 branchlength = 0;
00130 had_recurse = FALSE;
00131 break;
00132
00133
00134
00135 case OP_ASSERT:
00136 case OP_ASSERT_NOT:
00137 case OP_ASSERTBACK:
00138 case OP_ASSERTBACK_NOT:
00139 do cc += GET(cc, 1); while (*cc == OP_ALT);
00140
00141
00142
00143
00144 case OP_REVERSE:
00145 case OP_CREF:
00146 case OP_NCREF:
00147 case OP_RREF:
00148 case OP_NRREF:
00149 case OP_DEF:
00150 case OP_OPT:
00151 case OP_CALLOUT:
00152 case OP_SOD:
00153 case OP_SOM:
00154 case OP_EOD:
00155 case OP_EODN:
00156 case OP_CIRC:
00157 case OP_DOLL:
00158 case OP_NOT_WORD_BOUNDARY:
00159 case OP_WORD_BOUNDARY:
00160 cc += _pcre_OP_lengths[*cc];
00161 break;
00162
00163
00164
00165 case OP_BRAZERO:
00166 case OP_BRAMINZERO:
00167 case OP_SKIPZERO:
00168 cc += _pcre_OP_lengths[*cc];
00169 do cc += GET(cc, 1); while (*cc == OP_ALT);
00170 cc += 1 + LINK_SIZE;
00171 break;
00172
00173
00174
00175 case OP_CHAR:
00176 case OP_CHARNC:
00177 case OP_NOT:
00178 case OP_PLUS:
00179 case OP_MINPLUS:
00180 case OP_POSPLUS:
00181 case OP_NOTPLUS:
00182 case OP_NOTMINPLUS:
00183 case OP_NOTPOSPLUS:
00184 branchlength++;
00185 cc += 2;
00186 #ifdef SUPPORT_UTF8
00187 if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
00188 #endif
00189 break;
00190
00191 case OP_TYPEPLUS:
00192 case OP_TYPEMINPLUS:
00193 case OP_TYPEPOSPLUS:
00194 branchlength++;
00195 cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2;
00196 break;
00197
00198
00199
00200
00201 case OP_EXACT:
00202 case OP_NOTEXACT:
00203 branchlength += GET2(cc,1);
00204 cc += 4;
00205 #ifdef SUPPORT_UTF8
00206 if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
00207 #endif
00208 break;
00209
00210 case OP_TYPEEXACT:
00211 branchlength += GET2(cc,1);
00212 cc += (cc[3] == OP_PROP || cc[3] == OP_NOTPROP)? 6 : 4;
00213 break;
00214
00215
00216
00217 case OP_PROP:
00218 case OP_NOTPROP:
00219 cc += 2;
00220
00221
00222 case OP_NOT_DIGIT:
00223 case OP_DIGIT:
00224 case OP_NOT_WHITESPACE:
00225 case OP_WHITESPACE:
00226 case OP_NOT_WORDCHAR:
00227 case OP_WORDCHAR:
00228 case OP_ANY:
00229 case OP_ALLANY:
00230 case OP_EXTUNI:
00231 case OP_HSPACE:
00232 case OP_NOT_HSPACE:
00233 case OP_VSPACE:
00234 case OP_NOT_VSPACE:
00235 branchlength++;
00236 cc++;
00237 break;
00238
00239
00240
00241 case OP_ANYNL:
00242 branchlength += 2;
00243 cc++;
00244 break;
00245
00246
00247
00248 case OP_ANYBYTE:
00249 #ifdef SUPPORT_UTF8
00250 if (utf8) return -1;
00251 #endif
00252 branchlength++;
00253 cc++;
00254 break;
00255
00256
00257
00258
00259 case OP_TYPESTAR:
00260 case OP_TYPEMINSTAR:
00261 case OP_TYPEQUERY:
00262 case OP_TYPEMINQUERY:
00263 case OP_TYPEPOSSTAR:
00264 case OP_TYPEPOSQUERY:
00265 if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
00266 cc += _pcre_OP_lengths[op];
00267 break;
00268
00269 case OP_TYPEUPTO:
00270 case OP_TYPEMINUPTO:
00271 case OP_TYPEPOSUPTO:
00272 if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;
00273 cc += _pcre_OP_lengths[op];
00274 break;
00275
00276
00277
00278 #ifdef SUPPORT_UTF8
00279 case OP_XCLASS:
00280 cc += GET(cc, 1) - 33;
00281
00282 #endif
00283
00284 case OP_CLASS:
00285 case OP_NCLASS:
00286 cc += 33;
00287
00288 switch (*cc)
00289 {
00290 case OP_CRPLUS:
00291 case OP_CRMINPLUS:
00292 branchlength++;
00293
00294
00295 case OP_CRSTAR:
00296 case OP_CRMINSTAR:
00297 case OP_CRQUERY:
00298 case OP_CRMINQUERY:
00299 cc++;
00300 break;
00301
00302 case OP_CRRANGE:
00303 case OP_CRMINRANGE:
00304 branchlength += GET2(cc,1);
00305 cc += 5;
00306 break;
00307
00308 default:
00309 branchlength++;
00310 break;
00311 }
00312 break;
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326 case OP_REF:
00327 if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
00328 {
00329 ce = cs = (uschar *)_pcre_find_bracket(startcode, utf8, GET2(cc, 1));
00330 if (cs == NULL) return -2;
00331 do ce += GET(ce, 1); while (*ce == OP_ALT);
00332 if (cc > cs && cc < ce)
00333 {
00334 d = 0;
00335 had_recurse = TRUE;
00336 }
00337 else d = find_minlength(cs, startcode, options);
00338 }
00339 else d = 0;
00340 cc += 3;
00341
00342
00343
00344 switch (*cc)
00345 {
00346 case OP_CRSTAR:
00347 case OP_CRMINSTAR:
00348 case OP_CRQUERY:
00349 case OP_CRMINQUERY:
00350 min = 0;
00351 cc++;
00352 break;
00353
00354 case OP_CRRANGE:
00355 case OP_CRMINRANGE:
00356 min = GET2(cc, 1);
00357 cc += 5;
00358 break;
00359
00360 default:
00361 min = 1;
00362 break;
00363 }
00364
00365 branchlength += min * d;
00366 break;
00367
00368 case OP_RECURSE:
00369 cs = ce = (uschar *)startcode + GET(cc, 1);
00370 if (cs == NULL) return -2;
00371 do ce += GET(ce, 1); while (*ce == OP_ALT);
00372 if (cc > cs && cc < ce)
00373 had_recurse = TRUE;
00374 else
00375 branchlength += find_minlength(cs, startcode, options);
00376 cc += 1 + LINK_SIZE;
00377 break;
00378
00379
00380
00381
00382
00383 case OP_UPTO:
00384 case OP_NOTUPTO:
00385 case OP_MINUPTO:
00386 case OP_NOTMINUPTO:
00387 case OP_POSUPTO:
00388 case OP_STAR:
00389 case OP_MINSTAR:
00390 case OP_NOTMINSTAR:
00391 case OP_POSSTAR:
00392 case OP_NOTPOSSTAR:
00393 case OP_QUERY:
00394 case OP_MINQUERY:
00395 case OP_NOTMINQUERY:
00396 case OP_POSQUERY:
00397 case OP_NOTPOSQUERY:
00398 cc += _pcre_OP_lengths[op];
00399 #ifdef SUPPORT_UTF8
00400 if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
00401 #endif
00402 break;
00403
00404
00405
00406
00407
00408 default:
00409 cc += _pcre_OP_lengths[op];
00410 break;
00411 }
00412 }
00413
00414 }
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434 static void
00435 set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
00436 {
00437 start_bits[c/8] |= (1 << (c&7));
00438 if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
00439 start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
00440 }
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468 static int
00469 set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
00470 BOOL utf8, compile_data *cd)
00471 {
00472 register int c;
00473 int yield = SSB_DONE;
00474
00475 #if 0
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488 volatile int dummy;
00489
00490 #endif
00491
00492 do
00493 {
00494 const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE;
00495 BOOL try_next = TRUE;
00496
00497 while (try_next)
00498 {
00499 int rc;
00500 switch(*tcode)
00501 {
00502
00503
00504 default:
00505 return SSB_FAIL;
00506
00507
00508
00509
00510
00511
00512 case OP_BRA:
00513 case OP_SBRA:
00514 case OP_CBRA:
00515 case OP_SCBRA:
00516 case OP_ONCE:
00517 case OP_ASSERT:
00518 rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);
00519 if (rc == SSB_FAIL) return SSB_FAIL;
00520 if (rc == SSB_DONE) try_next = FALSE; else
00521 {
00522 do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
00523 tcode += 1 + LINK_SIZE;
00524 }
00525 break;
00526
00527
00528
00529
00530
00531
00532
00533
00534 case OP_ALT:
00535 yield = SSB_CONTINUE;
00536 try_next = FALSE;
00537 break;
00538
00539 case OP_KET:
00540 case OP_KETRMAX:
00541 case OP_KETRMIN:
00542 return SSB_CONTINUE;
00543
00544
00545
00546 case OP_CALLOUT:
00547 tcode += 2 + 2*LINK_SIZE;
00548 break;
00549
00550
00551
00552 case OP_ASSERT_NOT:
00553 case OP_ASSERTBACK:
00554 case OP_ASSERTBACK_NOT:
00555 do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
00556 tcode += 1 + LINK_SIZE;
00557 break;
00558
00559
00560
00561 case OP_OPT:
00562 caseless = (tcode[1] & PCRE_CASELESS) != 0;
00563 tcode += 2;
00564 break;
00565
00566
00567
00568 case OP_BRAZERO:
00569 case OP_BRAMINZERO:
00570 if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)
00571 return SSB_FAIL;
00572
00573
00574
00575
00576
00577 do tcode += GET(tcode,1); while (*tcode == OP_ALT);
00578 tcode += 1 + LINK_SIZE;
00579 break;
00580
00581
00582
00583 case OP_SKIPZERO:
00584 tcode++;
00585 do tcode += GET(tcode,1); while (*tcode == OP_ALT);
00586 tcode += 1 + LINK_SIZE;
00587 break;
00588
00589
00590
00591 case OP_STAR:
00592 case OP_MINSTAR:
00593 case OP_POSSTAR:
00594 case OP_QUERY:
00595 case OP_MINQUERY:
00596 case OP_POSQUERY:
00597 set_bit(start_bits, tcode[1], caseless, cd);
00598 tcode += 2;
00599 #ifdef SUPPORT_UTF8
00600 if (utf8 && tcode[-1] >= 0xc0)
00601 tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
00602 #endif
00603 break;
00604
00605
00606
00607 case OP_UPTO:
00608 case OP_MINUPTO:
00609 case OP_POSUPTO:
00610 set_bit(start_bits, tcode[3], caseless, cd);
00611 tcode += 4;
00612 #ifdef SUPPORT_UTF8
00613 if (utf8 && tcode[-1] >= 0xc0)
00614 tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
00615 #endif
00616 break;
00617
00618
00619
00620 case OP_EXACT:
00621 tcode += 2;
00622
00623 case OP_CHAR:
00624 case OP_CHARNC:
00625 case OP_PLUS:
00626 case OP_MINPLUS:
00627 case OP_POSPLUS:
00628 set_bit(start_bits, tcode[1], caseless, cd);
00629 try_next = FALSE;
00630 break;
00631
00632
00633
00634 case OP_NOT_DIGIT:
00635 for (c = 0; c < 32; c++)
00636 start_bits[c] |= ~cd->cbits[c+cbit_digit];
00637 try_next = FALSE;
00638 break;
00639
00640 case OP_DIGIT:
00641 for (c = 0; c < 32; c++)
00642 start_bits[c] |= cd->cbits[c+cbit_digit];
00643 try_next = FALSE;
00644 break;
00645
00646
00647
00648
00649 case OP_NOT_WHITESPACE:
00650 for (c = 0; c < 32; c++)
00651 {
00652 int d = cd->cbits[c+cbit_space];
00653 if (c == 1) d &= ~0x08;
00654 start_bits[c] |= ~d;
00655 }
00656 try_next = FALSE;
00657 break;
00658
00659
00660
00661
00662 case OP_WHITESPACE:
00663 for (c = 0; c < 32; c++)
00664 {
00665 int d = cd->cbits[c+cbit_space];
00666 if (c == 1) d &= ~0x08;
00667 start_bits[c] |= d;
00668 }
00669 try_next = FALSE;
00670 break;
00671
00672 case OP_NOT_WORDCHAR:
00673 for (c = 0; c < 32; c++)
00674 start_bits[c] |= ~cd->cbits[c+cbit_word];
00675 try_next = FALSE;
00676 break;
00677
00678 case OP_WORDCHAR:
00679 for (c = 0; c < 32; c++)
00680 start_bits[c] |= cd->cbits[c+cbit_word];
00681 try_next = FALSE;
00682 break;
00683
00684
00685
00686
00687 case OP_TYPEPLUS:
00688 case OP_TYPEMINPLUS:
00689 tcode++;
00690 break;
00691
00692 case OP_TYPEEXACT:
00693 tcode += 3;
00694 break;
00695
00696
00697
00698
00699 case OP_TYPEUPTO:
00700 case OP_TYPEMINUPTO:
00701 case OP_TYPEPOSUPTO:
00702 tcode += 2;
00703
00704 case OP_TYPESTAR:
00705 case OP_TYPEMINSTAR:
00706 case OP_TYPEPOSSTAR:
00707 case OP_TYPEQUERY:
00708 case OP_TYPEMINQUERY:
00709 case OP_TYPEPOSQUERY:
00710 switch(tcode[1])
00711 {
00712 case OP_ANY:
00713 case OP_ALLANY:
00714 return SSB_FAIL;
00715
00716 case OP_NOT_DIGIT:
00717 for (c = 0; c < 32; c++)
00718 start_bits[c] |= ~cd->cbits[c+cbit_digit];
00719 break;
00720
00721 case OP_DIGIT:
00722 for (c = 0; c < 32; c++)
00723 start_bits[c] |= cd->cbits[c+cbit_digit];
00724 break;
00725
00726
00727
00728
00729 case OP_NOT_WHITESPACE:
00730 for (c = 0; c < 32; c++)
00731 {
00732 int d = cd->cbits[c+cbit_space];
00733 if (c == 1) d &= ~0x08;
00734 start_bits[c] |= ~d;
00735 }
00736 break;
00737
00738
00739
00740
00741 case OP_WHITESPACE:
00742 for (c = 0; c < 32; c++)
00743 {
00744 int d = cd->cbits[c+cbit_space];
00745 if (c == 1) d &= ~0x08;
00746 start_bits[c] |= d;
00747 }
00748 break;
00749
00750 case OP_NOT_WORDCHAR:
00751 for (c = 0; c < 32; c++)
00752 start_bits[c] |= ~cd->cbits[c+cbit_word];
00753 break;
00754
00755 case OP_WORDCHAR:
00756 for (c = 0; c < 32; c++)
00757 start_bits[c] |= cd->cbits[c+cbit_word];
00758 break;
00759 }
00760
00761 tcode += 2;
00762 break;
00763
00764
00765
00766
00767
00768
00769
00770 case OP_NCLASS:
00771 #ifdef SUPPORT_UTF8
00772 if (utf8)
00773 {
00774 start_bits[24] |= 0xf0;
00775 memset(start_bits+25, 0xff, 7);
00776 }
00777 #endif
00778
00779
00780 case OP_CLASS:
00781 {
00782 tcode++;
00783
00784
00785
00786
00787
00788
00789
00790 #ifdef SUPPORT_UTF8
00791 if (utf8)
00792 {
00793 for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
00794 for (c = 128; c < 256; c++)
00795 {
00796 if ((tcode[c/8] && (1 << (c&7))) != 0)
00797 {
00798 int d = (c >> 6) | 0xc0;
00799 start_bits[d/8] |= (1 << (d&7));
00800 c = (c & 0xc0) + 0x40 - 1;
00801 }
00802 }
00803 }
00804
00805
00806
00807 else
00808 #endif
00809 {
00810 for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
00811 }
00812
00813
00814
00815 tcode += 32;
00816 switch (*tcode)
00817 {
00818 case OP_CRSTAR:
00819 case OP_CRMINSTAR:
00820 case OP_CRQUERY:
00821 case OP_CRMINQUERY:
00822 tcode++;
00823 break;
00824
00825 case OP_CRRANGE:
00826 case OP_CRMINRANGE:
00827 if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
00828 else try_next = FALSE;
00829 break;
00830
00831 default:
00832 try_next = FALSE;
00833 break;
00834 }
00835 }
00836 break;
00837
00838 }
00839 }
00840
00841 code += GET(code, 1);
00842 }
00843 while (*code == OP_ALT);
00844 return yield;
00845 }
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
00869 pcre_study(const pcre *external_re, int options, const char **errorptr)
00870 {
00871 int min;
00872 BOOL bits_set = FALSE;
00873 uschar start_bits[32];
00874 pcre_extra *extra;
00875 pcre_study_data *study;
00876 const uschar *tables;
00877 uschar *code;
00878 compile_data compile_block;
00879 const real_pcre *re = (const real_pcre *)external_re;
00880
00881 *errorptr = NULL;
00882
00883 if (re == NULL || re->magic_number != MAGIC_NUMBER)
00884 {
00885 *errorptr = "argument is not a compiled regular expression";
00886 return NULL;
00887 }
00888
00889 if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
00890 {
00891 *errorptr = "unknown or incorrect option bit(s) set";
00892 return NULL;
00893 }
00894
00895 code = (uschar *)re + re->name_table_offset +
00896 (re->name_count * re->name_entry_size);
00897
00898
00899
00900
00901
00902 if ((re->options & PCRE_ANCHORED) == 0 &&
00903 (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0)
00904 {
00905
00906
00907 tables = re->tables;
00908 if (tables == NULL)
00909 (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
00910 (void *)(&tables));
00911
00912 compile_block.lcc = tables + lcc_offset;
00913 compile_block.fcc = tables + fcc_offset;
00914 compile_block.cbits = tables + cbits_offset;
00915 compile_block.ctypes = tables + ctypes_offset;
00916
00917
00918
00919 memset(start_bits, 0, 32 * sizeof(uschar));
00920 bits_set = set_start_bits(code, start_bits,
00921 (re->options & PCRE_CASELESS) != 0, (re->options & PCRE_UTF8) != 0,
00922 &compile_block) == SSB_DONE;
00923 }
00924
00925
00926
00927 min = find_minlength(code, code, re->options);
00928
00929
00930
00931 if (!bits_set && min < 0) return NULL;
00932
00933
00934
00935
00936
00937
00938
00939
00940 extra = (pcre_extra *)(pcre_malloc)
00941 (sizeof(pcre_extra) + sizeof(pcre_study_data));
00942
00943 if (extra == NULL)
00944 {
00945 *errorptr = "failed to get memory";
00946 return NULL;
00947 }
00948
00949 study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
00950 extra->flags = PCRE_EXTRA_STUDY_DATA;
00951 extra->study_data = study;
00952
00953 study->size = sizeof(pcre_study_data);
00954 study->flags = 0;
00955
00956 if (bits_set)
00957 {
00958 study->flags |= PCRE_STUDY_MAPPED;
00959 memcpy(study->start_bits, start_bits, sizeof(start_bits));
00960 }
00961
00962 if (min >= 0)
00963 {
00964 study->flags |= PCRE_STUDY_MINLEN;
00965 study->minlength = min;
00966 }
00967
00968 return extra;
00969 }
00970
00971