00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076 #ifdef HAVE_CONFIG_H
00077 #include "config.h"
00078 #else if defined(_WINDOWS)
00079 #include <spl/configwin32.h>
00080 #endif
00081
00082
00083 #define NLBLOCK md
00084 #define PSSTART start_subject
00085 #define PSEND end_subject
00086
00087 #include "pcre_internal.h"
00088
00089
00090
00091
00092 #define SP " "
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104 #define OP_PROP_EXTRA 300
00105 #define OP_EXTUNI_EXTRA 320
00106 #define OP_ANYNL_EXTRA 340
00107 #define OP_HSPACE_EXTRA 360
00108 #define OP_VSPACE_EXTRA 380
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119 static const uschar coptable[] = {
00120 0,
00121 0, 0, 0, 0, 0,
00122 0, 0, 0, 0, 0, 0,
00123 0, 0, 0,
00124 0, 0, 0,
00125 0, 0, 0, 0, 0,
00126 0, 0, 0, 0, 0,
00127 1,
00128 1,
00129 1,
00130
00131 1, 1, 1, 1, 1, 1,
00132 3, 3, 3,
00133 1, 1, 1, 3,
00134
00135 1, 1, 1, 1, 1, 1,
00136 3, 3, 3,
00137 1, 1, 1, 3,
00138
00139 1, 1, 1, 1, 1, 1,
00140 3, 3, 3,
00141 1, 1, 1, 3,
00142
00143 0, 0, 0, 0, 0, 0,
00144 0, 0,
00145 0,
00146 0,
00147 0,
00148 0,
00149 0,
00150 0,
00151 0,
00152 0,
00153 0,
00154 0,
00155 0,
00156 0,
00157 0,
00158 0,
00159 0,
00160 0, 0, 0, 0,
00161 0, 0, 0,
00162 0,
00163 0,
00164 0,
00165 0, 0,
00166 0, 0, 0, 0,
00167 0, 0, 0, 0
00168 };
00169
00170
00171
00172
00173
00174
00175 static const uschar poptable[] = {
00176 0,
00177 0, 0, 0, 1, 1,
00178 1, 1, 1, 1, 1, 1,
00179 1, 1, 1,
00180 1, 1, 1,
00181 1, 1, 1, 1, 1,
00182 0, 0, 0, 0, 0,
00183 1,
00184 1,
00185 1,
00186
00187 1, 1, 1, 1, 1, 1,
00188 1, 1, 1,
00189 1, 1, 1, 1,
00190
00191 1, 1, 1, 1, 1, 1,
00192 1, 1, 1,
00193 1, 1, 1, 1,
00194
00195 1, 1, 1, 1, 1, 1,
00196 1, 1, 1,
00197 1, 1, 1, 1,
00198
00199 1, 1, 1, 1, 1, 1,
00200 1, 1,
00201 1,
00202 1,
00203 1,
00204 0,
00205 0,
00206 0,
00207 0,
00208 0,
00209 0,
00210 0,
00211 0,
00212 0,
00213 0,
00214 0,
00215 0,
00216 0, 0, 0, 0,
00217 0, 0, 0,
00218 0,
00219 0,
00220 0,
00221 0, 0,
00222 0, 0, 0, 0,
00223 0, 0, 0, 0
00224 };
00225
00226
00227
00228
00229 static const uschar toptable1[] = {
00230 0, 0, 0, 0, 0, 0,
00231 ctype_digit, ctype_digit,
00232 ctype_space, ctype_space,
00233 ctype_word, ctype_word,
00234 0, 0
00235 };
00236
00237 static const uschar toptable2[] = {
00238 0, 0, 0, 0, 0, 0,
00239 ctype_digit, 0,
00240 ctype_space, 0,
00241 ctype_word, 0,
00242 1, 1
00243 };
00244
00245
00246
00247
00248
00249
00250
00251 typedef struct stateblock {
00252 int offset;
00253 int count;
00254 int ims;
00255 int data;
00256 } stateblock;
00257
00258 #define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))
00259
00260
00261 #ifdef DEBUG
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276 static void
00277 pchars(unsigned char *p, int length, FILE *f)
00278 {
00279 int c;
00280 while (length-- > 0)
00281 {
00282 if (isprint(c = *(p++)))
00283 fprintf(f, "%c", c);
00284 else
00285 fprintf(f, "\\x%02x", c);
00286 }
00287 }
00288 #endif
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322 #define ADD_ACTIVE(x,y) \
00323 if (active_count++ < wscount) \
00324 { \
00325 next_active_state->offset = (x); \
00326 next_active_state->count = (y); \
00327 next_active_state->ims = ims; \
00328 next_active_state++; \
00329 DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
00330 } \
00331 else return PCRE_ERROR_DFA_WSSIZE
00332
00333 #define ADD_ACTIVE_DATA(x,y,z) \
00334 if (active_count++ < wscount) \
00335 { \
00336 next_active_state->offset = (x); \
00337 next_active_state->count = (y); \
00338 next_active_state->ims = ims; \
00339 next_active_state->data = (z); \
00340 next_active_state++; \
00341 DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
00342 } \
00343 else return PCRE_ERROR_DFA_WSSIZE
00344
00345 #define ADD_NEW(x,y) \
00346 if (new_count++ < wscount) \
00347 { \
00348 next_new_state->offset = (x); \
00349 next_new_state->count = (y); \
00350 next_new_state->ims = ims; \
00351 next_new_state++; \
00352 DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
00353 } \
00354 else return PCRE_ERROR_DFA_WSSIZE
00355
00356 #define ADD_NEW_DATA(x,y,z) \
00357 if (new_count++ < wscount) \
00358 { \
00359 next_new_state->offset = (x); \
00360 next_new_state->count = (y); \
00361 next_new_state->ims = ims; \
00362 next_new_state->data = (z); \
00363 next_new_state++; \
00364 DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
00365 } \
00366 else return PCRE_ERROR_DFA_WSSIZE
00367
00368
00369
00370 static int
00371 internal_dfa_exec(
00372 dfa_match_data *md,
00373 const uschar *this_start_code,
00374 const uschar *current_subject,
00375 int start_offset,
00376 int *offsets,
00377 int offsetcount,
00378 int *workspace,
00379 int wscount,
00380 int ims,
00381 int rlevel,
00382 int recursing)
00383 {
00384 stateblock *active_states, *new_states, *temp_states;
00385 stateblock *next_active_state, *next_new_state;
00386
00387 const uschar *ctypes, *lcc, *fcc;
00388 const uschar *ptr;
00389 const uschar *end_code, *first_op;
00390
00391 int active_count, new_count, match_count;
00392
00393
00394
00395
00396 const uschar *start_subject = md->start_subject;
00397 const uschar *end_subject = md->end_subject;
00398 const uschar *start_code = md->start_code;
00399
00400 #ifdef SUPPORT_UTF8
00401 BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
00402 #else
00403 BOOL utf8 = FALSE;
00404 #endif
00405
00406 rlevel++;
00407 offsetcount &= (-2);
00408
00409 wscount -= 2;
00410 wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
00411 (2 * INTS_PER_STATEBLOCK);
00412
00413 DPRINTF(("\n%.*s---------------------\n"
00414 "%.*sCall to internal_dfa_exec f=%d r=%d\n",
00415 rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));
00416
00417 ctypes = md->tables + ctypes_offset;
00418 lcc = md->tables + lcc_offset;
00419 fcc = md->tables + fcc_offset;
00420
00421 match_count = PCRE_ERROR_NOMATCH;
00422
00423 active_states = (stateblock *)(workspace + 2);
00424 next_new_state = new_states = active_states + wscount;
00425 new_count = 0;
00426
00427 first_op = this_start_code + 1 + LINK_SIZE +
00428 ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439 if (*first_op == OP_REVERSE)
00440 {
00441 int max_back = 0;
00442 int gone_back;
00443
00444 end_code = this_start_code;
00445 do
00446 {
00447 int back = GET(end_code, 2+LINK_SIZE);
00448 if (back > max_back) max_back = back;
00449 end_code += GET(end_code, 1);
00450 }
00451 while (*end_code == OP_ALT);
00452
00453
00454
00455
00456 #ifdef SUPPORT_UTF8
00457
00458
00459 if (utf8)
00460 {
00461 for (gone_back = 0; gone_back < max_back; gone_back++)
00462 {
00463 if (current_subject <= start_subject) break;
00464 current_subject--;
00465 while (current_subject > start_subject &&
00466 (*current_subject & 0xc0) == 0x80)
00467 current_subject--;
00468 }
00469 }
00470 else
00471 #endif
00472
00473
00474
00475 {
00476 gone_back = (current_subject - max_back < start_subject)?
00477 current_subject - start_subject : max_back;
00478 current_subject -= gone_back;
00479 }
00480
00481
00482
00483 if (current_subject < md->start_used_ptr)
00484 md->start_used_ptr = current_subject;
00485
00486
00487
00488 end_code = this_start_code;
00489 do
00490 {
00491 int back = GET(end_code, 2+LINK_SIZE);
00492 if (back <= gone_back)
00493 {
00494 int bstate = end_code - start_code + 2 + 2*LINK_SIZE;
00495 ADD_NEW_DATA(-bstate, 0, gone_back - back);
00496 }
00497 end_code += GET(end_code, 1);
00498 }
00499 while (*end_code == OP_ALT);
00500 }
00501
00502
00503
00504
00505
00506
00507
00508 else
00509 {
00510 end_code = this_start_code;
00511
00512
00513
00514 if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
00515 {
00516 do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
00517 new_count = workspace[1];
00518 if (!workspace[0])
00519 memcpy(new_states, active_states, new_count * sizeof(stateblock));
00520 }
00521
00522
00523
00524 else
00525 {
00526 int length = 1 + LINK_SIZE +
00527 ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
00528 do
00529 {
00530 ADD_NEW(end_code - start_code + length, 0);
00531 end_code += GET(end_code, 1);
00532 length = 1 + LINK_SIZE;
00533 }
00534 while (*end_code == OP_ALT);
00535 }
00536 }
00537
00538 workspace[0] = 0;
00539
00540 DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));
00541
00542
00543
00544 ptr = current_subject;
00545 for (;;)
00546 {
00547 int i, j;
00548 int clen, dlen;
00549 unsigned int c, d;
00550 int forced_fail = 0;
00551 BOOL could_continue = FALSE;
00552
00553
00554
00555
00556 temp_states = active_states;
00557 active_states = new_states;
00558 new_states = temp_states;
00559 active_count = new_count;
00560 new_count = 0;
00561
00562 workspace[0] ^= 1;
00563 workspace[1] = active_count;
00564
00565 #ifdef DEBUG
00566 printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
00567 pchars((uschar *)ptr, strlen((char *)ptr), stdout);
00568 printf("\"\n");
00569
00570 printf("%.*sActive states: ", rlevel*2-2, SP);
00571 for (i = 0; i < active_count; i++)
00572 printf("%d/%d ", active_states[i].offset, active_states[i].count);
00573 printf("\n");
00574 #endif
00575
00576
00577
00578 next_active_state = active_states + active_count;
00579 next_new_state = new_states;
00580
00581
00582
00583
00584
00585 if (ptr < end_subject)
00586 {
00587 clen = 1;
00588 #ifdef SUPPORT_UTF8
00589 if (utf8) { GETCHARLEN(c, ptr, clen); } else
00590 #endif
00591 c = *ptr;
00592 }
00593 else
00594 {
00595 clen = 0;
00596 c = NOTACHAR;
00597 }
00598
00599
00600
00601
00602
00603
00604 for (i = 0; i < active_count; i++)
00605 {
00606 stateblock *current_state = active_states + i;
00607 const uschar *code;
00608 int state_offset = current_state->offset;
00609 int count, codevalue, rrc;
00610
00611 #ifdef DEBUG
00612 printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
00613 if (clen == 0) printf("EOL\n");
00614 else if (c > 32 && c < 127) printf("'%c'\n", c);
00615 else printf("0x%02x\n", c);
00616 #endif
00617
00618
00619
00620 ims = current_state->ims;
00621
00622
00623
00624
00625
00626 if (state_offset < 0)
00627 {
00628 if (current_state->data > 0)
00629 {
00630 DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
00631 ADD_NEW_DATA(state_offset, current_state->count,
00632 current_state->data - 1);
00633 continue;
00634 }
00635 else
00636 {
00637 current_state->offset = state_offset = -state_offset;
00638 }
00639 }
00640
00641
00642
00643
00644
00645 for (j = 0; j < i; j++)
00646 {
00647 if (active_states[j].offset == state_offset &&
00648 active_states[j].count == current_state->count)
00649 {
00650 DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
00651 goto NEXT_ACTIVE_STATE;
00652 }
00653 }
00654
00655
00656
00657 code = start_code + state_offset;
00658 codevalue = *code;
00659
00660
00661
00662
00663 if (clen == 0 && poptable[codevalue] != 0)
00664 could_continue = TRUE;
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676
00677 if (coptable[codevalue] > 0)
00678 {
00679 dlen = 1;
00680 #ifdef SUPPORT_UTF8
00681 if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
00682 #endif
00683 d = code[coptable[codevalue]];
00684 if (codevalue >= OP_TYPESTAR)
00685 {
00686 switch(d)
00687 {
00688 case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
00689 case OP_NOTPROP:
00690 case OP_PROP: codevalue += OP_PROP_EXTRA; break;
00691 case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
00692 case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
00693 case OP_NOT_HSPACE:
00694 case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
00695 case OP_NOT_VSPACE:
00696 case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
00697 default: break;
00698 }
00699 }
00700 }
00701 else
00702 {
00703 dlen = 0;
00704 d = NOTACHAR;
00705 }
00706
00707
00708
00709
00710 switch (codevalue)
00711 {
00712
00713
00714
00715
00716
00717
00718
00719
00720 case OP_KET:
00721 case OP_KETRMIN:
00722 case OP_KETRMAX:
00723 if (code != end_code)
00724 {
00725 ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
00726 if (codevalue != OP_KET)
00727 {
00728 ADD_ACTIVE(state_offset - GET(code, 1), 0);
00729 }
00730 }
00731 else
00732 {
00733 if (ptr > current_subject ||
00734 ((md->moptions & PCRE_NOTEMPTY) == 0 &&
00735 ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
00736 current_subject > start_subject + md->start_offset)))
00737 {
00738 if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
00739 else if (match_count > 0 && ++match_count * 2 >= offsetcount)
00740 match_count = 0;
00741 count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
00742 if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
00743 if (offsetcount >= 2)
00744 {
00745 offsets[0] = current_subject - start_subject;
00746 offsets[1] = ptr - start_subject;
00747 DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
00748 offsets[1] - offsets[0], current_subject));
00749 }
00750 if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
00751 {
00752 DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
00753 "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
00754 match_count, rlevel*2-2, SP));
00755 return match_count;
00756 }
00757 }
00758 }
00759 break;
00760
00761
00762
00763
00764
00765
00766 case OP_ALT:
00767 do { code += GET(code, 1); } while (*code == OP_ALT);
00768 ADD_ACTIVE(code - start_code, 0);
00769 break;
00770
00771
00772 case OP_BRA:
00773 case OP_SBRA:
00774 do
00775 {
00776 ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
00777 code += GET(code, 1);
00778 }
00779 while (*code == OP_ALT);
00780 break;
00781
00782
00783 case OP_CBRA:
00784 case OP_SCBRA:
00785 ADD_ACTIVE(code - start_code + 3 + LINK_SIZE, 0);
00786 code += GET(code, 1);
00787 while (*code == OP_ALT)
00788 {
00789 ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
00790 code += GET(code, 1);
00791 }
00792 break;
00793
00794
00795 case OP_BRAZERO:
00796 case OP_BRAMINZERO:
00797 ADD_ACTIVE(state_offset + 1, 0);
00798 code += 1 + GET(code, 2);
00799 while (*code == OP_ALT) code += GET(code, 1);
00800 ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
00801 break;
00802
00803
00804 case OP_SKIPZERO:
00805 code += 1 + GET(code, 2);
00806 while (*code == OP_ALT) code += GET(code, 1);
00807 ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
00808 break;
00809
00810
00811 case OP_CIRC:
00812 if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
00813 ((ims & PCRE_MULTILINE) != 0 &&
00814 ptr != end_subject &&
00815 WAS_NEWLINE(ptr)))
00816 { ADD_ACTIVE(state_offset + 1, 0); }
00817 break;
00818
00819
00820 case OP_EOD:
00821 if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }
00822 break;
00823
00824
00825 case OP_OPT:
00826 ims = code[1];
00827 ADD_ACTIVE(state_offset + 2, 0);
00828 break;
00829
00830
00831 case OP_SOD:
00832 if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
00833 break;
00834
00835
00836 case OP_SOM:
00837 if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
00838 break;
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848 case OP_ANY:
00849 if (clen > 0 && !IS_NEWLINE(ptr))
00850 { ADD_NEW(state_offset + 1, 0); }
00851 break;
00852
00853
00854 case OP_ALLANY:
00855 if (clen > 0)
00856 { ADD_NEW(state_offset + 1, 0); }
00857 break;
00858
00859
00860 case OP_EODN:
00861 if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
00862 { ADD_ACTIVE(state_offset + 1, 0); }
00863 break;
00864
00865
00866 case OP_DOLL:
00867 if ((md->moptions & PCRE_NOTEOL) == 0)
00868 {
00869 if (clen == 0 ||
00870 ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
00871 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
00872 ))
00873 { ADD_ACTIVE(state_offset + 1, 0); }
00874 }
00875 else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
00876 { ADD_ACTIVE(state_offset + 1, 0); }
00877 break;
00878
00879
00880
00881 case OP_DIGIT:
00882 case OP_WHITESPACE:
00883 case OP_WORDCHAR:
00884 if (clen > 0 && c < 256 &&
00885 ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
00886 { ADD_NEW(state_offset + 1, 0); }
00887 break;
00888
00889
00890 case OP_NOT_DIGIT:
00891 case OP_NOT_WHITESPACE:
00892 case OP_NOT_WORDCHAR:
00893 if (clen > 0 && (c >= 256 ||
00894 ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
00895 { ADD_NEW(state_offset + 1, 0); }
00896 break;
00897
00898
00899 case OP_WORD_BOUNDARY:
00900 case OP_NOT_WORD_BOUNDARY:
00901 {
00902 int left_word, right_word;
00903
00904 if (ptr > start_subject)
00905 {
00906 const uschar *temp = ptr - 1;
00907 if (temp < md->start_used_ptr) md->start_used_ptr = temp;
00908 #ifdef SUPPORT_UTF8
00909 if (utf8) BACKCHAR(temp);
00910 #endif
00911 GETCHARTEST(d, temp);
00912 left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
00913 }
00914 else left_word = 0;
00915
00916 if (clen > 0)
00917 right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
00918 else right_word = 0;
00919
00920 if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
00921 { ADD_ACTIVE(state_offset + 1, 0); }
00922 }
00923 break;
00924
00925
00926
00927
00928
00929
00930
00931 #ifdef SUPPORT_UCP
00932 case OP_PROP:
00933 case OP_NOTPROP:
00934 if (clen > 0)
00935 {
00936 BOOL OK;
00937 const ucd_record * prop = GET_UCD(c);
00938 switch(code[1])
00939 {
00940 case PT_ANY:
00941 OK = TRUE;
00942 break;
00943
00944 case PT_LAMP:
00945 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
00946 break;
00947
00948 case PT_GC:
00949 OK = _pcre_ucp_gentype[prop->chartype] == code[2];
00950 break;
00951
00952 case PT_PC:
00953 OK = prop->chartype == code[2];
00954 break;
00955
00956 case PT_SC:
00957 OK = prop->script == code[2];
00958 break;
00959
00960
00961
00962 default:
00963 OK = codevalue != OP_PROP;
00964 break;
00965 }
00966
00967 if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
00968 }
00969 break;
00970 #endif
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980 case OP_TYPEPLUS:
00981 case OP_TYPEMINPLUS:
00982 case OP_TYPEPOSPLUS:
00983 count = current_state->count;
00984 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
00985 if (clen > 0)
00986 {
00987 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
00988 (c < 256 &&
00989 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
00990 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
00991 {
00992 if (count > 0 && codevalue == OP_TYPEPOSPLUS)
00993 {
00994 active_count--;
00995 next_active_state--;
00996 }
00997 count++;
00998 ADD_NEW(state_offset, count);
00999 }
01000 }
01001 break;
01002
01003
01004 case OP_TYPEQUERY:
01005 case OP_TYPEMINQUERY:
01006 case OP_TYPEPOSQUERY:
01007 ADD_ACTIVE(state_offset + 2, 0);
01008 if (clen > 0)
01009 {
01010 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
01011 (c < 256 &&
01012 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
01013 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
01014 {
01015 if (codevalue == OP_TYPEPOSQUERY)
01016 {
01017 active_count--;
01018 next_active_state--;
01019 }
01020 ADD_NEW(state_offset + 2, 0);
01021 }
01022 }
01023 break;
01024
01025
01026 case OP_TYPESTAR:
01027 case OP_TYPEMINSTAR:
01028 case OP_TYPEPOSSTAR:
01029 ADD_ACTIVE(state_offset + 2, 0);
01030 if (clen > 0)
01031 {
01032 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
01033 (c < 256 &&
01034 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
01035 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
01036 {
01037 if (codevalue == OP_TYPEPOSSTAR)
01038 {
01039 active_count--;
01040 next_active_state--;
01041 }
01042 ADD_NEW(state_offset, 0);
01043 }
01044 }
01045 break;
01046
01047
01048 case OP_TYPEEXACT:
01049 count = current_state->count;
01050 if (clen > 0)
01051 {
01052 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
01053 (c < 256 &&
01054 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
01055 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
01056 {
01057 if (++count >= GET2(code, 1))
01058 { ADD_NEW(state_offset + 4, 0); }
01059 else
01060 { ADD_NEW(state_offset, count); }
01061 }
01062 }
01063 break;
01064
01065
01066 case OP_TYPEUPTO:
01067 case OP_TYPEMINUPTO:
01068 case OP_TYPEPOSUPTO:
01069 ADD_ACTIVE(state_offset + 4, 0);
01070 count = current_state->count;
01071 if (clen > 0)
01072 {
01073 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
01074 (c < 256 &&
01075 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
01076 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
01077 {
01078 if (codevalue == OP_TYPEPOSUPTO)
01079 {
01080 active_count--;
01081 next_active_state--;
01082 }
01083 if (++count >= GET2(code, 1))
01084 { ADD_NEW(state_offset + 4, 0); }
01085 else
01086 { ADD_NEW(state_offset, count); }
01087 }
01088 }
01089 break;
01090
01091
01092
01093
01094
01095
01096
01097 #ifdef SUPPORT_UCP
01098 case OP_PROP_EXTRA + OP_TYPEPLUS:
01099 case OP_PROP_EXTRA + OP_TYPEMINPLUS:
01100 case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
01101 count = current_state->count;
01102 if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
01103 if (clen > 0)
01104 {
01105 BOOL OK;
01106 const ucd_record * prop = GET_UCD(c);
01107 switch(code[2])
01108 {
01109 case PT_ANY:
01110 OK = TRUE;
01111 break;
01112
01113 case PT_LAMP:
01114 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
01115 break;
01116
01117 case PT_GC:
01118 OK = _pcre_ucp_gentype[prop->chartype] == code[3];
01119 break;
01120
01121 case PT_PC:
01122 OK = prop->chartype == code[3];
01123 break;
01124
01125 case PT_SC:
01126 OK = prop->script == code[3];
01127 break;
01128
01129
01130
01131 default:
01132 OK = codevalue != OP_PROP;
01133 break;
01134 }
01135
01136 if (OK == (d == OP_PROP))
01137 {
01138 if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
01139 {
01140 active_count--;
01141 next_active_state--;
01142 }
01143 count++;
01144 ADD_NEW(state_offset, count);
01145 }
01146 }
01147 break;
01148
01149
01150 case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
01151 case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
01152 case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
01153 count = current_state->count;
01154 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
01155 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
01156 {
01157 const uschar *nptr = ptr + clen;
01158 int ncount = 0;
01159 if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
01160 {
01161 active_count--;
01162 next_active_state--;
01163 }
01164 while (nptr < end_subject)
01165 {
01166 int nd;
01167 int ndlen = 1;
01168 GETCHARLEN(nd, nptr, ndlen);
01169 if (UCD_CATEGORY(nd) != ucp_M) break;
01170 ncount++;
01171 nptr += ndlen;
01172 }
01173 count++;
01174 ADD_NEW_DATA(-state_offset, count, ncount);
01175 }
01176 break;
01177 #endif
01178
01179
01180 case OP_ANYNL_EXTRA + OP_TYPEPLUS:
01181 case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
01182 case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
01183 count = current_state->count;
01184 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
01185 if (clen > 0)
01186 {
01187 int ncount = 0;
01188 switch (c)
01189 {
01190 case 0x000b:
01191 case 0x000c:
01192 case 0x0085:
01193 case 0x2028:
01194 case 0x2029:
01195 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
01196 goto ANYNL01;
01197
01198 case 0x000d:
01199 if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
01200
01201
01202 ANYNL01:
01203 case 0x000a:
01204 if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
01205 {
01206 active_count--;
01207 next_active_state--;
01208 }
01209 count++;
01210 ADD_NEW_DATA(-state_offset, count, ncount);
01211 break;
01212
01213 default:
01214 break;
01215 }
01216 }
01217 break;
01218
01219
01220 case OP_VSPACE_EXTRA + OP_TYPEPLUS:
01221 case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
01222 case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
01223 count = current_state->count;
01224 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
01225 if (clen > 0)
01226 {
01227 BOOL OK;
01228 switch (c)
01229 {
01230 case 0x000a:
01231 case 0x000b:
01232 case 0x000c:
01233 case 0x000d:
01234 case 0x0085:
01235 case 0x2028:
01236 case 0x2029:
01237 OK = TRUE;
01238 break;
01239
01240 default:
01241 OK = FALSE;
01242 break;
01243 }
01244
01245 if (OK == (d == OP_VSPACE))
01246 {
01247 if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
01248 {
01249 active_count--;
01250 next_active_state--;
01251 }
01252 count++;
01253 ADD_NEW_DATA(-state_offset, count, 0);
01254 }
01255 }
01256 break;
01257
01258
01259 case OP_HSPACE_EXTRA + OP_TYPEPLUS:
01260 case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
01261 case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
01262 count = current_state->count;
01263 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
01264 if (clen > 0)
01265 {
01266 BOOL OK;
01267 switch (c)
01268 {
01269 case 0x09:
01270 case 0x20:
01271 case 0xa0:
01272 case 0x1680:
01273 case 0x180e:
01274 case 0x2000:
01275 case 0x2001:
01276 case 0x2002:
01277 case 0x2003:
01278 case 0x2004:
01279 case 0x2005:
01280 case 0x2006:
01281 case 0x2007:
01282 case 0x2008:
01283 case 0x2009:
01284 case 0x200A:
01285 case 0x202f:
01286 case 0x205f:
01287 case 0x3000:
01288 OK = TRUE;
01289 break;
01290
01291 default:
01292 OK = FALSE;
01293 break;
01294 }
01295
01296 if (OK == (d == OP_HSPACE))
01297 {
01298 if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
01299 {
01300 active_count--;
01301 next_active_state--;
01302 }
01303 count++;
01304 ADD_NEW_DATA(-state_offset, count, 0);
01305 }
01306 }
01307 break;
01308
01309
01310 #ifdef SUPPORT_UCP
01311 case OP_PROP_EXTRA + OP_TYPEQUERY:
01312 case OP_PROP_EXTRA + OP_TYPEMINQUERY:
01313 case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
01314 count = 4;
01315 goto QS1;
01316
01317 case OP_PROP_EXTRA + OP_TYPESTAR:
01318 case OP_PROP_EXTRA + OP_TYPEMINSTAR:
01319 case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
01320 count = 0;
01321
01322 QS1:
01323
01324 ADD_ACTIVE(state_offset + 4, 0);
01325 if (clen > 0)
01326 {
01327 BOOL OK;
01328 const ucd_record * prop = GET_UCD(c);
01329 switch(code[2])
01330 {
01331 case PT_ANY:
01332 OK = TRUE;
01333 break;
01334
01335 case PT_LAMP:
01336 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
01337 break;
01338
01339 case PT_GC:
01340 OK = _pcre_ucp_gentype[prop->chartype] == code[3];
01341 break;
01342
01343 case PT_PC:
01344 OK = prop->chartype == code[3];
01345 break;
01346
01347 case PT_SC:
01348 OK = prop->script == code[3];
01349 break;
01350
01351
01352
01353 default:
01354 OK = codevalue != OP_PROP;
01355 break;
01356 }
01357
01358 if (OK == (d == OP_PROP))
01359 {
01360 if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
01361 codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
01362 {
01363 active_count--;
01364 next_active_state--;
01365 }
01366 ADD_NEW(state_offset + count, 0);
01367 }
01368 }
01369 break;
01370
01371
01372 case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
01373 case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
01374 case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
01375 count = 2;
01376 goto QS2;
01377
01378 case OP_EXTUNI_EXTRA + OP_TYPESTAR:
01379 case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
01380 case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
01381 count = 0;
01382
01383 QS2:
01384
01385 ADD_ACTIVE(state_offset + 2, 0);
01386 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
01387 {
01388 const uschar *nptr = ptr + clen;
01389 int ncount = 0;
01390 if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
01391 codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
01392 {
01393 active_count--;
01394 next_active_state--;
01395 }
01396 while (nptr < end_subject)
01397 {
01398 int nd;
01399 int ndlen = 1;
01400 GETCHARLEN(nd, nptr, ndlen);
01401 if (UCD_CATEGORY(nd) != ucp_M) break;
01402 ncount++;
01403 nptr += ndlen;
01404 }
01405 ADD_NEW_DATA(-(state_offset + count), 0, ncount);
01406 }
01407 break;
01408 #endif
01409
01410
01411 case OP_ANYNL_EXTRA + OP_TYPEQUERY:
01412 case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
01413 case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
01414 count = 2;
01415 goto QS3;
01416
01417 case OP_ANYNL_EXTRA + OP_TYPESTAR:
01418 case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
01419 case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
01420 count = 0;
01421
01422 QS3:
01423 ADD_ACTIVE(state_offset + 2, 0);
01424 if (clen > 0)
01425 {
01426 int ncount = 0;
01427 switch (c)
01428 {
01429 case 0x000b:
01430 case 0x000c:
01431 case 0x0085:
01432 case 0x2028:
01433 case 0x2029:
01434 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
01435 goto ANYNL02;
01436
01437 case 0x000d:
01438 if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
01439
01440
01441 ANYNL02:
01442 case 0x000a:
01443 if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
01444 codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
01445 {
01446 active_count--;
01447 next_active_state--;
01448 }
01449 ADD_NEW_DATA(-(state_offset + count), 0, ncount);
01450 break;
01451
01452 default:
01453 break;
01454 }
01455 }
01456 break;
01457
01458
01459 case OP_VSPACE_EXTRA + OP_TYPEQUERY:
01460 case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
01461 case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
01462 count = 2;
01463 goto QS4;
01464
01465 case OP_VSPACE_EXTRA + OP_TYPESTAR:
01466 case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
01467 case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
01468 count = 0;
01469
01470 QS4:
01471 ADD_ACTIVE(state_offset + 2, 0);
01472 if (clen > 0)
01473 {
01474 BOOL OK;
01475 switch (c)
01476 {
01477 case 0x000a:
01478 case 0x000b:
01479 case 0x000c:
01480 case 0x000d:
01481 case 0x0085:
01482 case 0x2028:
01483 case 0x2029:
01484 OK = TRUE;
01485 break;
01486
01487 default:
01488 OK = FALSE;
01489 break;
01490 }
01491 if (OK == (d == OP_VSPACE))
01492 {
01493 if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
01494 codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
01495 {
01496 active_count--;
01497 next_active_state--;
01498 }
01499 ADD_NEW_DATA(-(state_offset + count), 0, 0);
01500 }
01501 }
01502 break;
01503
01504
01505 case OP_HSPACE_EXTRA + OP_TYPEQUERY:
01506 case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
01507 case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
01508 count = 2;
01509 goto QS5;
01510
01511 case OP_HSPACE_EXTRA + OP_TYPESTAR:
01512 case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
01513 case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
01514 count = 0;
01515
01516 QS5:
01517 ADD_ACTIVE(state_offset + 2, 0);
01518 if (clen > 0)
01519 {
01520 BOOL OK;
01521 switch (c)
01522 {
01523 case 0x09:
01524 case 0x20:
01525 case 0xa0:
01526 case 0x1680:
01527 case 0x180e:
01528 case 0x2000:
01529 case 0x2001:
01530 case 0x2002:
01531 case 0x2003:
01532 case 0x2004:
01533 case 0x2005:
01534 case 0x2006:
01535 case 0x2007:
01536 case 0x2008:
01537 case 0x2009:
01538 case 0x200A:
01539 case 0x202f:
01540 case 0x205f:
01541 case 0x3000:
01542 OK = TRUE;
01543 break;
01544
01545 default:
01546 OK = FALSE;
01547 break;
01548 }
01549
01550 if (OK == (d == OP_HSPACE))
01551 {
01552 if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
01553 codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
01554 {
01555 active_count--;
01556 next_active_state--;
01557 }
01558 ADD_NEW_DATA(-(state_offset + count), 0, 0);
01559 }
01560 }
01561 break;
01562
01563
01564 #ifdef SUPPORT_UCP
01565 case OP_PROP_EXTRA + OP_TYPEEXACT:
01566 case OP_PROP_EXTRA + OP_TYPEUPTO:
01567 case OP_PROP_EXTRA + OP_TYPEMINUPTO:
01568 case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
01569 if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
01570 { ADD_ACTIVE(state_offset + 6, 0); }
01571 count = current_state->count;
01572 if (clen > 0)
01573 {
01574 BOOL OK;
01575 const ucd_record * prop = GET_UCD(c);
01576 switch(code[4])
01577 {
01578 case PT_ANY:
01579 OK = TRUE;
01580 break;
01581
01582 case PT_LAMP:
01583 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
01584 break;
01585
01586 case PT_GC:
01587 OK = _pcre_ucp_gentype[prop->chartype] == code[5];
01588 break;
01589
01590 case PT_PC:
01591 OK = prop->chartype == code[5];
01592 break;
01593
01594 case PT_SC:
01595 OK = prop->script == code[5];
01596 break;
01597
01598
01599
01600 default:
01601 OK = codevalue != OP_PROP;
01602 break;
01603 }
01604
01605 if (OK == (d == OP_PROP))
01606 {
01607 if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
01608 {
01609 active_count--;
01610 next_active_state--;
01611 }
01612 if (++count >= GET2(code, 1))
01613 { ADD_NEW(state_offset + 6, 0); }
01614 else
01615 { ADD_NEW(state_offset, count); }
01616 }
01617 }
01618 break;
01619
01620
01621 case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
01622 case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
01623 case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
01624 case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
01625 if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
01626 { ADD_ACTIVE(state_offset + 4, 0); }
01627 count = current_state->count;
01628 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
01629 {
01630 const uschar *nptr = ptr + clen;
01631 int ncount = 0;
01632 if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
01633 {
01634 active_count--;
01635 next_active_state--;
01636 }
01637 while (nptr < end_subject)
01638 {
01639 int nd;
01640 int ndlen = 1;
01641 GETCHARLEN(nd, nptr, ndlen);
01642 if (UCD_CATEGORY(nd) != ucp_M) break;
01643 ncount++;
01644 nptr += ndlen;
01645 }
01646 if (++count >= GET2(code, 1))
01647 { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
01648 else
01649 { ADD_NEW_DATA(-state_offset, count, ncount); }
01650 }
01651 break;
01652 #endif
01653
01654
01655 case OP_ANYNL_EXTRA + OP_TYPEEXACT:
01656 case OP_ANYNL_EXTRA + OP_TYPEUPTO:
01657 case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
01658 case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
01659 if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
01660 { ADD_ACTIVE(state_offset + 4, 0); }
01661 count = current_state->count;
01662 if (clen > 0)
01663 {
01664 int ncount = 0;
01665 switch (c)
01666 {
01667 case 0x000b:
01668 case 0x000c:
01669 case 0x0085:
01670 case 0x2028:
01671 case 0x2029:
01672 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
01673 goto ANYNL03;
01674
01675 case 0x000d:
01676 if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
01677
01678
01679 ANYNL03:
01680 case 0x000a:
01681 if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
01682 {
01683 active_count--;
01684 next_active_state--;
01685 }
01686 if (++count >= GET2(code, 1))
01687 { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
01688 else
01689 { ADD_NEW_DATA(-state_offset, count, ncount); }
01690 break;
01691
01692 default:
01693 break;
01694 }
01695 }
01696 break;
01697
01698
01699 case OP_VSPACE_EXTRA + OP_TYPEEXACT:
01700 case OP_VSPACE_EXTRA + OP_TYPEUPTO:
01701 case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
01702 case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
01703 if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
01704 { ADD_ACTIVE(state_offset + 4, 0); }
01705 count = current_state->count;
01706 if (clen > 0)
01707 {
01708 BOOL OK;
01709 switch (c)
01710 {
01711 case 0x000a:
01712 case 0x000b:
01713 case 0x000c:
01714 case 0x000d:
01715 case 0x0085:
01716 case 0x2028:
01717 case 0x2029:
01718 OK = TRUE;
01719 break;
01720
01721 default:
01722 OK = FALSE;
01723 }
01724
01725 if (OK == (d == OP_VSPACE))
01726 {
01727 if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
01728 {
01729 active_count--;
01730 next_active_state--;
01731 }
01732 if (++count >= GET2(code, 1))
01733 { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
01734 else
01735 { ADD_NEW_DATA(-state_offset, count, 0); }
01736 }
01737 }
01738 break;
01739
01740
01741 case OP_HSPACE_EXTRA + OP_TYPEEXACT:
01742 case OP_HSPACE_EXTRA + OP_TYPEUPTO:
01743 case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
01744 case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
01745 if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
01746 { ADD_ACTIVE(state_offset + 4, 0); }
01747 count = current_state->count;
01748 if (clen > 0)
01749 {
01750 BOOL OK;
01751 switch (c)
01752 {
01753 case 0x09:
01754 case 0x20:
01755 case 0xa0:
01756 case 0x1680:
01757 case 0x180e:
01758 case 0x2000:
01759 case 0x2001:
01760 case 0x2002:
01761 case 0x2003:
01762 case 0x2004:
01763 case 0x2005:
01764 case 0x2006:
01765 case 0x2007:
01766 case 0x2008:
01767 case 0x2009:
01768 case 0x200A:
01769 case 0x202f:
01770 case 0x205f:
01771 case 0x3000:
01772 OK = TRUE;
01773 break;
01774
01775 default:
01776 OK = FALSE;
01777 break;
01778 }
01779
01780 if (OK == (d == OP_HSPACE))
01781 {
01782 if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
01783 {
01784 active_count--;
01785 next_active_state--;
01786 }
01787 if (++count >= GET2(code, 1))
01788 { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
01789 else
01790 { ADD_NEW_DATA(-state_offset, count, 0); }
01791 }
01792 }
01793 break;
01794
01795
01796
01797
01798
01799
01800
01801
01802 case OP_CHAR:
01803 if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
01804 break;
01805
01806
01807 case OP_CHARNC:
01808 if (clen == 0) break;
01809
01810 #ifdef SUPPORT_UTF8
01811 if (utf8)
01812 {
01813 if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
01814 {
01815 unsigned int othercase;
01816 if (c < 128) othercase = fcc[c]; else
01817
01818
01819
01820
01821 #ifdef SUPPORT_UCP
01822 othercase = UCD_OTHERCASE(c);
01823 #else
01824 othercase = NOTACHAR;
01825 #endif
01826
01827 if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
01828 }
01829 }
01830 else
01831 #endif
01832
01833
01834 {
01835 if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }
01836 }
01837 break;
01838
01839
01840 #ifdef SUPPORT_UCP
01841
01842
01843
01844
01845
01846 case OP_EXTUNI:
01847 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
01848 {
01849 const uschar *nptr = ptr + clen;
01850 int ncount = 0;
01851 while (nptr < end_subject)
01852 {
01853 int nclen = 1;
01854 GETCHARLEN(c, nptr, nclen);
01855 if (UCD_CATEGORY(c) != ucp_M) break;
01856 ncount++;
01857 nptr += nclen;
01858 }
01859 ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
01860 }
01861 break;
01862 #endif
01863
01864
01865
01866
01867
01868
01869 case OP_ANYNL:
01870 if (clen > 0) switch(c)
01871 {
01872 case 0x000b:
01873 case 0x000c:
01874 case 0x0085:
01875 case 0x2028:
01876 case 0x2029:
01877 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
01878
01879 case 0x000a:
01880 ADD_NEW(state_offset + 1, 0);
01881 break;
01882
01883 case 0x000d:
01884 if (ptr + 1 < end_subject && ptr[1] == 0x0a)
01885 {
01886 ADD_NEW_DATA(-(state_offset + 1), 0, 1);
01887 }
01888 else
01889 {
01890 ADD_NEW(state_offset + 1, 0);
01891 }
01892 break;
01893 }
01894 break;
01895
01896
01897 case OP_NOT_VSPACE:
01898 if (clen > 0) switch(c)
01899 {
01900 case 0x000a:
01901 case 0x000b:
01902 case 0x000c:
01903 case 0x000d:
01904 case 0x0085:
01905 case 0x2028:
01906 case 0x2029:
01907 break;
01908
01909 default:
01910 ADD_NEW(state_offset + 1, 0);
01911 break;
01912 }
01913 break;
01914
01915
01916 case OP_VSPACE:
01917 if (clen > 0) switch(c)
01918 {
01919 case 0x000a:
01920 case 0x000b:
01921 case 0x000c:
01922 case 0x000d:
01923 case 0x0085:
01924 case 0x2028:
01925 case 0x2029:
01926 ADD_NEW(state_offset + 1, 0);
01927 break;
01928
01929 default: break;
01930 }
01931 break;
01932
01933
01934 case OP_NOT_HSPACE:
01935 if (clen > 0) switch(c)
01936 {
01937 case 0x09:
01938 case 0x20:
01939 case 0xa0:
01940 case 0x1680:
01941 case 0x180e:
01942 case 0x2000:
01943 case 0x2001:
01944 case 0x2002:
01945 case 0x2003:
01946 case 0x2004:
01947 case 0x2005:
01948 case 0x2006:
01949 case 0x2007:
01950 case 0x2008:
01951 case 0x2009:
01952 case 0x200A:
01953 case 0x202f:
01954 case 0x205f:
01955 case 0x3000:
01956 break;
01957
01958 default:
01959 ADD_NEW(state_offset + 1, 0);
01960 break;
01961 }
01962 break;
01963
01964
01965 case OP_HSPACE:
01966 if (clen > 0) switch(c)
01967 {
01968 case 0x09:
01969 case 0x20:
01970 case 0xa0:
01971 case 0x1680:
01972 case 0x180e:
01973 case 0x2000:
01974 case 0x2001:
01975 case 0x2002:
01976 case 0x2003:
01977 case 0x2004:
01978 case 0x2005:
01979 case 0x2006:
01980 case 0x2007:
01981 case 0x2008:
01982 case 0x2009:
01983 case 0x200A:
01984 case 0x202f:
01985 case 0x205f:
01986 case 0x3000:
01987 ADD_NEW(state_offset + 1, 0);
01988 break;
01989 }
01990 break;
01991
01992
01993
01994
01995
01996
01997 case OP_NOT:
01998 if (clen > 0)
01999 {
02000 unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
02001 if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
02002 }
02003 break;
02004
02005
02006 case OP_PLUS:
02007 case OP_MINPLUS:
02008 case OP_POSPLUS:
02009 case OP_NOTPLUS:
02010 case OP_NOTMINPLUS:
02011 case OP_NOTPOSPLUS:
02012 count = current_state->count;
02013 if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
02014 if (clen > 0)
02015 {
02016 unsigned int otherd = NOTACHAR;
02017 if ((ims & PCRE_CASELESS) != 0)
02018 {
02019 #ifdef SUPPORT_UTF8
02020 if (utf8 && d >= 128)
02021 {
02022 #ifdef SUPPORT_UCP
02023 otherd = UCD_OTHERCASE(d);
02024 #endif
02025 }
02026 else
02027 #endif
02028 otherd = fcc[d];
02029 }
02030 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
02031 {
02032 if (count > 0 &&
02033 (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
02034 {
02035 active_count--;
02036 next_active_state--;
02037 }
02038 count++;
02039 ADD_NEW(state_offset, count);
02040 }
02041 }
02042 break;
02043
02044
02045 case OP_QUERY:
02046 case OP_MINQUERY:
02047 case OP_POSQUERY:
02048 case OP_NOTQUERY:
02049 case OP_NOTMINQUERY:
02050 case OP_NOTPOSQUERY:
02051 ADD_ACTIVE(state_offset + dlen + 1, 0);
02052 if (clen > 0)
02053 {
02054 unsigned int otherd = NOTACHAR;
02055 if ((ims & PCRE_CASELESS) != 0)
02056 {
02057 #ifdef SUPPORT_UTF8
02058 if (utf8 && d >= 128)
02059 {
02060 #ifdef SUPPORT_UCP
02061 otherd = UCD_OTHERCASE(d);
02062 #endif
02063 }
02064 else
02065 #endif
02066 otherd = fcc[d];
02067 }
02068 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
02069 {
02070 if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
02071 {
02072 active_count--;
02073 next_active_state--;
02074 }
02075 ADD_NEW(state_offset + dlen + 1, 0);
02076 }
02077 }
02078 break;
02079
02080
02081 case OP_STAR:
02082 case OP_MINSTAR:
02083 case OP_POSSTAR:
02084 case OP_NOTSTAR:
02085 case OP_NOTMINSTAR:
02086 case OP_NOTPOSSTAR:
02087 ADD_ACTIVE(state_offset + dlen + 1, 0);
02088 if (clen > 0)
02089 {
02090 unsigned int otherd = NOTACHAR;
02091 if ((ims & PCRE_CASELESS) != 0)
02092 {
02093 #ifdef SUPPORT_UTF8
02094 if (utf8 && d >= 128)
02095 {
02096 #ifdef SUPPORT_UCP
02097 otherd = UCD_OTHERCASE(d);
02098 #endif
02099 }
02100 else
02101 #endif
02102 otherd = fcc[d];
02103 }
02104 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
02105 {
02106 if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
02107 {
02108 active_count--;
02109 next_active_state--;
02110 }
02111 ADD_NEW(state_offset, 0);
02112 }
02113 }
02114 break;
02115
02116
02117 case OP_EXACT:
02118 case OP_NOTEXACT:
02119 count = current_state->count;
02120 if (clen > 0)
02121 {
02122 unsigned int otherd = NOTACHAR;
02123 if ((ims & PCRE_CASELESS) != 0)
02124 {
02125 #ifdef SUPPORT_UTF8
02126 if (utf8 && d >= 128)
02127 {
02128 #ifdef SUPPORT_UCP
02129 otherd = UCD_OTHERCASE(d);
02130 #endif
02131 }
02132 else
02133 #endif
02134 otherd = fcc[d];
02135 }
02136 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
02137 {
02138 if (++count >= GET2(code, 1))
02139 { ADD_NEW(state_offset + dlen + 3, 0); }
02140 else
02141 { ADD_NEW(state_offset, count); }
02142 }
02143 }
02144 break;
02145
02146
02147 case OP_UPTO:
02148 case OP_MINUPTO:
02149 case OP_POSUPTO:
02150 case OP_NOTUPTO:
02151 case OP_NOTMINUPTO:
02152 case OP_NOTPOSUPTO:
02153 ADD_ACTIVE(state_offset + dlen + 3, 0);
02154 count = current_state->count;
02155 if (clen > 0)
02156 {
02157 unsigned int otherd = NOTACHAR;
02158 if ((ims & PCRE_CASELESS) != 0)
02159 {
02160 #ifdef SUPPORT_UTF8
02161 if (utf8 && d >= 128)
02162 {
02163 #ifdef SUPPORT_UCP
02164 otherd = UCD_OTHERCASE(d);
02165 #endif
02166 }
02167 else
02168 #endif
02169 otherd = fcc[d];
02170 }
02171 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
02172 {
02173 if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
02174 {
02175 active_count--;
02176 next_active_state--;
02177 }
02178 if (++count >= GET2(code, 1))
02179 { ADD_NEW(state_offset + dlen + 3, 0); }
02180 else
02181 { ADD_NEW(state_offset, count); }
02182 }
02183 }
02184 break;
02185
02186
02187
02188
02189
02190 case OP_CLASS:
02191 case OP_NCLASS:
02192 case OP_XCLASS:
02193 {
02194 BOOL isinclass = FALSE;
02195 int next_state_offset;
02196 const uschar *ecode;
02197
02198
02199
02200
02201 if (codevalue != OP_XCLASS)
02202 {
02203 ecode = code + 33;
02204 if (clen > 0)
02205 {
02206 isinclass = (c > 255)? (codevalue == OP_NCLASS) :
02207 ((code[1 + c/8] & (1 << (c&7))) != 0);
02208 }
02209 }
02210
02211
02212
02213
02214
02215 else
02216 {
02217 ecode = code + GET(code, 1);
02218 if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);
02219 }
02220
02221
02222
02223
02224
02225 next_state_offset = ecode - start_code;
02226
02227 switch (*ecode)
02228 {
02229 case OP_CRSTAR:
02230 case OP_CRMINSTAR:
02231 ADD_ACTIVE(next_state_offset + 1, 0);
02232 if (isinclass) { ADD_NEW(state_offset, 0); }
02233 break;
02234
02235 case OP_CRPLUS:
02236 case OP_CRMINPLUS:
02237 count = current_state->count;
02238 if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
02239 if (isinclass) { count++; ADD_NEW(state_offset, count); }
02240 break;
02241
02242 case OP_CRQUERY:
02243 case OP_CRMINQUERY:
02244 ADD_ACTIVE(next_state_offset + 1, 0);
02245 if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
02246 break;
02247
02248 case OP_CRRANGE:
02249 case OP_CRMINRANGE:
02250 count = current_state->count;
02251 if (count >= GET2(ecode, 1))
02252 { ADD_ACTIVE(next_state_offset + 5, 0); }
02253 if (isinclass)
02254 {
02255 int max = GET2(ecode, 3);
02256 if (++count >= max && max != 0)
02257 { ADD_NEW(next_state_offset + 5, 0); }
02258 else
02259 { ADD_NEW(state_offset, count); }
02260 }
02261 break;
02262
02263 default:
02264 if (isinclass) { ADD_NEW(next_state_offset, 0); }
02265 break;
02266 }
02267 }
02268 break;
02269
02270
02271
02272
02273
02274
02275
02276 case OP_FAIL:
02277 forced_fail++;
02278 break;
02279
02280 case OP_ASSERT:
02281 case OP_ASSERT_NOT:
02282 case OP_ASSERTBACK:
02283 case OP_ASSERTBACK_NOT:
02284 {
02285 int rc;
02286 int local_offsets[2];
02287 int local_workspace[1000];
02288 const uschar *endasscode = code + GET(code, 1);
02289
02290 while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
02291
02292 rc = internal_dfa_exec(
02293 md,
02294 code,
02295 ptr,
02296 ptr - start_subject,
02297 local_offsets,
02298 sizeof(local_offsets)/sizeof(int),
02299 local_workspace,
02300 sizeof(local_workspace)/sizeof(int),
02301 ims,
02302 rlevel,
02303 recursing);
02304
02305 if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
02306 { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
02307 }
02308 break;
02309
02310
02311 case OP_COND:
02312 case OP_SCOND:
02313 {
02314 int local_offsets[1000];
02315 int local_workspace[1000];
02316 int codelink = GET(code, 1);
02317 int condcode;
02318
02319
02320
02321
02322
02323 if (code[LINK_SIZE+1] == OP_CALLOUT)
02324 {
02325 rrc = 0;
02326 if (pcre_callout != NULL)
02327 {
02328 pcre_callout_block cb;
02329 cb.version = 1;
02330 cb.callout_number = code[LINK_SIZE+2];
02331 cb.offset_vector = offsets;
02332 cb.subject = (PCRE_SPTR)start_subject;
02333 cb.subject_length = end_subject - start_subject;
02334 cb.start_match = current_subject - start_subject;
02335 cb.current_position = ptr - start_subject;
02336 cb.pattern_position = GET(code, LINK_SIZE + 3);
02337 cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
02338 cb.capture_top = 1;
02339 cb.capture_last = -1;
02340 cb.callout_data = md->callout_data;
02341 if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;
02342 }
02343 if (rrc > 0) break;
02344 code += _pcre_OP_lengths[OP_CALLOUT];
02345 }
02346
02347 condcode = code[LINK_SIZE+1];
02348
02349
02350
02351 if (condcode == OP_CREF || condcode == OP_NCREF)
02352 return PCRE_ERROR_DFA_UCOND;
02353
02354
02355
02356 if (condcode == OP_DEF)
02357 { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
02358
02359
02360
02361
02362
02363 else if (condcode == OP_RREF || condcode == OP_NRREF)
02364 {
02365 int value = GET2(code, LINK_SIZE+2);
02366 if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
02367 if (recursing > 0)
02368 { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
02369 else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
02370 }
02371
02372
02373
02374 else
02375 {
02376 int rc;
02377 const uschar *asscode = code + LINK_SIZE + 1;
02378 const uschar *endasscode = asscode + GET(asscode, 1);
02379
02380 while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
02381
02382 rc = internal_dfa_exec(
02383 md,
02384 asscode,
02385 ptr,
02386 ptr - start_subject,
02387 local_offsets,
02388 sizeof(local_offsets)/sizeof(int),
02389 local_workspace,
02390 sizeof(local_workspace)/sizeof(int),
02391 ims,
02392 rlevel,
02393 recursing);
02394
02395 if ((rc >= 0) ==
02396 (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
02397 { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
02398 else
02399 { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
02400 }
02401 }
02402 break;
02403
02404
02405 case OP_RECURSE:
02406 {
02407 int local_offsets[1000];
02408 int local_workspace[1000];
02409 int rc;
02410
02411 DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,
02412 recursing + 1));
02413
02414 rc = internal_dfa_exec(
02415 md,
02416 start_code + GET(code, 1),
02417 ptr,
02418 ptr - start_subject,
02419 local_offsets,
02420 sizeof(local_offsets)/sizeof(int),
02421 local_workspace,
02422 sizeof(local_workspace)/sizeof(int),
02423 ims,
02424 rlevel,
02425 recursing + 1);
02426
02427 DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,
02428 recursing + 1, rc));
02429
02430
02431
02432 if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
02433
02434
02435
02436
02437
02438 if (rc > 0)
02439 {
02440 for (rc = rc*2 - 2; rc >= 0; rc -= 2)
02441 {
02442 const uschar *p = start_subject + local_offsets[rc];
02443 const uschar *pp = start_subject + local_offsets[rc+1];
02444 int charcount = local_offsets[rc+1] - local_offsets[rc];
02445 while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
02446 if (charcount > 0)
02447 {
02448 ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
02449 }
02450 else
02451 {
02452 ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
02453 }
02454 }
02455 }
02456 else if (rc != PCRE_ERROR_NOMATCH) return rc;
02457 }
02458 break;
02459
02460
02461 case OP_ONCE:
02462 {
02463 int local_offsets[2];
02464 int local_workspace[1000];
02465
02466 int rc = internal_dfa_exec(
02467 md,
02468 code,
02469 ptr,
02470 ptr - start_subject,
02471 local_offsets,
02472 sizeof(local_offsets)/sizeof(int),
02473 local_workspace,
02474 sizeof(local_workspace)/sizeof(int),
02475 ims,
02476 rlevel,
02477 recursing);
02478
02479 if (rc >= 0)
02480 {
02481 const uschar *end_subpattern = code;
02482 int charcount = local_offsets[1] - local_offsets[0];
02483 int next_state_offset, repeat_state_offset;
02484
02485 do { end_subpattern += GET(end_subpattern, 1); }
02486 while (*end_subpattern == OP_ALT);
02487 next_state_offset = end_subpattern - start_code + LINK_SIZE + 1;
02488
02489
02490
02491
02492
02493 repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
02494 *end_subpattern == OP_KETRMIN)?
02495 end_subpattern - start_code - GET(end_subpattern, 1) : -1;
02496
02497
02498
02499
02500
02501
02502 if (charcount == 0)
02503 {
02504 ADD_ACTIVE(next_state_offset, 0);
02505 }
02506
02507
02508
02509
02510
02511
02512 else if (i + 1 >= active_count && new_count == 0)
02513 {
02514 ptr += charcount;
02515 clen = 0;
02516 ADD_NEW(next_state_offset, 0);
02517
02518
02519
02520
02521
02522
02523 if (repeat_state_offset >= 0)
02524 {
02525 next_active_state = active_states;
02526 active_count = 0;
02527 i = -1;
02528 ADD_ACTIVE(repeat_state_offset, 0);
02529 }
02530 }
02531 else
02532 {
02533 const uschar *p = start_subject + local_offsets[0];
02534 const uschar *pp = start_subject + local_offsets[1];
02535 while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
02536 ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
02537 if (repeat_state_offset >= 0)
02538 { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
02539 }
02540
02541 }
02542 else if (rc != PCRE_ERROR_NOMATCH) return rc;
02543 }
02544 break;
02545
02546
02547
02548
02549
02550 case OP_CALLOUT:
02551 rrc = 0;
02552 if (pcre_callout != NULL)
02553 {
02554 pcre_callout_block cb;
02555 cb.version = 1;
02556 cb.callout_number = code[1];
02557 cb.offset_vector = offsets;
02558 cb.subject = (PCRE_SPTR)start_subject;
02559 cb.subject_length = end_subject - start_subject;
02560 cb.start_match = current_subject - start_subject;
02561 cb.current_position = ptr - start_subject;
02562 cb.pattern_position = GET(code, 2);
02563 cb.next_item_length = GET(code, 2 + LINK_SIZE);
02564 cb.capture_top = 1;
02565 cb.capture_last = -1;
02566 cb.callout_data = md->callout_data;
02567 if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;
02568 }
02569 if (rrc == 0)
02570 { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }
02571 break;
02572
02573
02574
02575 default:
02576 return PCRE_ERROR_DFA_UITEM;
02577 }
02578
02579 NEXT_ACTIVE_STATE: continue;
02580
02581 }
02582
02583
02584
02585
02586
02587
02588
02589
02590
02591
02592
02593
02594
02595
02596 if (new_count <= 0)
02597 {
02598 if (rlevel == 1 &&
02599 could_continue &&
02600 forced_fail != workspace[1] &&
02601 (
02602 (md->moptions & PCRE_PARTIAL_HARD) != 0
02603 ||
02604 ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&
02605 match_count < 0)
02606 ) &&
02607 ptr >= end_subject &&
02608 ptr > current_subject)
02609 {
02610 if (offsetcount >= 2)
02611 {
02612 offsets[0] = md->start_used_ptr - start_subject;
02613 offsets[1] = end_subject - start_subject;
02614 }
02615 match_count = PCRE_ERROR_PARTIAL;
02616 }
02617
02618 DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
02619 "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
02620 rlevel*2-2, SP));
02621 break;
02622 }
02623
02624
02625
02626 ptr += clen;
02627 }
02628
02629
02630
02631
02632
02633
02634
02635 return match_count;
02636 }
02637
02638
02639
02640
02641
02642
02643
02644
02645
02646
02647
02648
02649
02650
02651
02652
02653
02654
02655
02656
02657
02658
02659
02660
02661
02662
02663
02664
02665
02666
02667 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
02668 pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
02669 const char *subject, int length, int start_offset, int options, int *offsets,
02670 int offsetcount, int *workspace, int wscount)
02671 {
02672 real_pcre *re = (real_pcre *)argument_re;
02673 dfa_match_data match_block;
02674 dfa_match_data *md = &match_block;
02675 BOOL utf8, anchored, startline, firstline;
02676 const uschar *current_subject, *end_subject, *lcc;
02677
02678 pcre_study_data internal_study;
02679 const pcre_study_data *study = NULL;
02680 real_pcre internal_re;
02681
02682 const uschar *req_byte_ptr;
02683 const uschar *start_bits = NULL;
02684 BOOL first_byte_caseless = FALSE;
02685 BOOL req_byte_caseless = FALSE;
02686 int first_byte = -1;
02687 int req_byte = -1;
02688 int req_byte2 = -1;
02689 int newline;
02690
02691
02692
02693 if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
02694 if (re == NULL || subject == NULL || workspace == NULL ||
02695 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
02696 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
02697 if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
02698
02699
02700
02701
02702
02703
02704 md->tables = re->tables;
02705 md->callout_data = NULL;
02706
02707 if (extra_data != NULL)
02708 {
02709 unsigned int flags = extra_data->flags;
02710 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
02711 study = (const pcre_study_data *)extra_data->study_data;
02712 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
02713 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
02714 return PCRE_ERROR_DFA_UMLIMIT;
02715 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
02716 md->callout_data = extra_data->callout_data;
02717 if ((flags & PCRE_EXTRA_TABLES) != 0)
02718 md->tables = extra_data->tables;
02719 }
02720
02721
02722
02723
02724
02725
02726 if (re->magic_number != MAGIC_NUMBER)
02727 {
02728 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
02729 if (re == NULL) return PCRE_ERROR_BADMAGIC;
02730 if (study != NULL) study = &internal_study;
02731 }
02732
02733
02734
02735 current_subject = (const unsigned char *)subject + start_offset;
02736 end_subject = (const unsigned char *)subject + length;
02737 req_byte_ptr = current_subject - 1;
02738
02739 #ifdef SUPPORT_UTF8
02740 utf8 = (re->options & PCRE_UTF8) != 0;
02741 #else
02742 utf8 = FALSE;
02743 #endif
02744
02745 anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
02746 (re->options & PCRE_ANCHORED) != 0;
02747
02748
02749
02750 md->start_code = (const uschar *)argument_re +
02751 re->name_table_offset + re->name_count * re->name_entry_size;
02752 md->start_subject = (const unsigned char *)subject;
02753 md->end_subject = end_subject;
02754 md->start_offset = start_offset;
02755 md->moptions = options;
02756 md->poptions = re->options;
02757
02758
02759
02760
02761 if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
02762 {
02763 if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
02764 md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
02765 #ifdef BSR_ANYCRLF
02766 else md->moptions |= PCRE_BSR_ANYCRLF;
02767 #endif
02768 }
02769
02770
02771
02772
02773 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
02774 PCRE_NEWLINE_BITS)
02775 {
02776 case 0: newline = NEWLINE; break;
02777 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
02778 case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
02779 case PCRE_NEWLINE_CR+
02780 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
02781 case PCRE_NEWLINE_ANY: newline = -1; break;
02782 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
02783 default: return PCRE_ERROR_BADNEWLINE;
02784 }
02785
02786 if (newline == -2)
02787 {
02788 md->nltype = NLTYPE_ANYCRLF;
02789 }
02790 else if (newline < 0)
02791 {
02792 md->nltype = NLTYPE_ANY;
02793 }
02794 else
02795 {
02796 md->nltype = NLTYPE_FIXED;
02797 if (newline > 255)
02798 {
02799 md->nllen = 2;
02800 md->nl[0] = (newline >> 8) & 255;
02801 md->nl[1] = newline & 255;
02802 }
02803 else
02804 {
02805 md->nllen = 1;
02806 md->nl[0] = newline;
02807 }
02808 }
02809
02810
02811
02812
02813 #ifdef SUPPORT_UTF8
02814 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
02815 {
02816 if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
02817 return PCRE_ERROR_BADUTF8;
02818 if (start_offset > 0 && start_offset < length)
02819 {
02820 int tb = ((uschar *)subject)[start_offset];
02821 if (tb > 127)
02822 {
02823 tb &= 0xc0;
02824 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
02825 }
02826 }
02827 }
02828 #endif
02829
02830
02831
02832
02833
02834 if (md->tables == NULL) md->tables = _pcre_default_tables;
02835
02836
02837
02838
02839 lcc = md->tables + lcc_offset;
02840 startline = (re->flags & PCRE_STARTLINE) != 0;
02841 firstline = (re->options & PCRE_FIRSTLINE) != 0;
02842
02843
02844
02845
02846
02847
02848
02849 if (!anchored)
02850 {
02851 if ((re->flags & PCRE_FIRSTSET) != 0)
02852 {
02853 first_byte = re->first_byte & 255;
02854 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
02855 first_byte = lcc[first_byte];
02856 }
02857 else
02858 {
02859 if (!startline && study != NULL &&
02860 (study->flags & PCRE_STUDY_MAPPED) != 0)
02861 start_bits = study->start_bits;
02862 }
02863 }
02864
02865
02866
02867
02868 if ((re->flags & PCRE_REQCHSET) != 0)
02869 {
02870 req_byte = re->req_byte & 255;
02871 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
02872 req_byte2 = (md->tables + fcc_offset)[req_byte];
02873 }
02874
02875
02876
02877
02878
02879 for (;;)
02880 {
02881 int rc;
02882
02883 if ((options & PCRE_DFA_RESTART) == 0)
02884 {
02885 const uschar *save_end_subject = end_subject;
02886
02887
02888
02889
02890
02891
02892 if (firstline)
02893 {
02894 USPTR t = current_subject;
02895 #ifdef SUPPORT_UTF8
02896 if (utf8)
02897 {
02898 while (t < md->end_subject && !IS_NEWLINE(t))
02899 {
02900 t++;
02901 while (t < end_subject && (*t & 0xc0) == 0x80) t++;
02902 }
02903 }
02904 else
02905 #endif
02906 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
02907 end_subject = t;
02908 }
02909
02910
02911
02912
02913
02914 if ((options & PCRE_NO_START_OPTIMIZE) == 0)
02915 {
02916
02917
02918 if (first_byte >= 0)
02919 {
02920 if (first_byte_caseless)
02921 while (current_subject < end_subject &&
02922 lcc[*current_subject] != first_byte)
02923 current_subject++;
02924 else
02925 while (current_subject < end_subject &&
02926 *current_subject != first_byte)
02927 current_subject++;
02928 }
02929
02930
02931
02932 else if (startline)
02933 {
02934 if (current_subject > md->start_subject + start_offset)
02935 {
02936 #ifdef SUPPORT_UTF8
02937 if (utf8)
02938 {
02939 while (current_subject < end_subject &&
02940 !WAS_NEWLINE(current_subject))
02941 {
02942 current_subject++;
02943 while(current_subject < end_subject &&
02944 (*current_subject & 0xc0) == 0x80)
02945 current_subject++;
02946 }
02947 }
02948 else
02949 #endif
02950 while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
02951 current_subject++;
02952
02953
02954
02955
02956
02957 if (current_subject[-1] == CHAR_CR &&
02958 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
02959 current_subject < end_subject &&
02960 *current_subject == CHAR_NL)
02961 current_subject++;
02962 }
02963 }
02964
02965
02966
02967 else if (start_bits != NULL)
02968 {
02969 while (current_subject < end_subject)
02970 {
02971 register unsigned int c = *current_subject;
02972 if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
02973 else break;
02974 }
02975 }
02976 }
02977
02978
02979
02980 end_subject = save_end_subject;
02981
02982
02983
02984
02985
02986 if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
02987 (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
02988 {
02989
02990
02991
02992
02993
02994 if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
02995 end_subject - current_subject < study->minlength)
02996 return PCRE_ERROR_NOMATCH;
02997
02998
02999
03000
03001
03002
03003
03004
03005
03006
03007
03008
03009
03010
03011 if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)
03012 {
03013 register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
03014
03015
03016
03017
03018 if (p > req_byte_ptr)
03019 {
03020 if (req_byte_caseless)
03021 {
03022 while (p < end_subject)
03023 {
03024 register int pp = *p++;
03025 if (pp == req_byte || pp == req_byte2) { p--; break; }
03026 }
03027 }
03028 else
03029 {
03030 while (p < end_subject)
03031 {
03032 if (*p++ == req_byte) { p--; break; }
03033 }
03034 }
03035
03036
03037
03038
03039 if (p >= end_subject) break;
03040
03041
03042
03043
03044
03045 req_byte_ptr = p;
03046 }
03047 }
03048 }
03049 }
03050
03051
03052
03053 md->start_used_ptr = current_subject;
03054
03055 rc = internal_dfa_exec(
03056 md,
03057 md->start_code,
03058 current_subject,
03059 start_offset,
03060 offsets,
03061 offsetcount,
03062 workspace,
03063 wscount,
03064 re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL),
03065 0,
03066 0);
03067
03068
03069
03070
03071 if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
03072
03073
03074
03075
03076 if (firstline && IS_NEWLINE(current_subject)) break;
03077 current_subject++;
03078 if (utf8)
03079 {
03080 while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
03081 current_subject++;
03082 }
03083 if (current_subject > end_subject) break;
03084
03085
03086
03087
03088
03089 if (current_subject[-1] == CHAR_CR &&
03090 current_subject < end_subject &&
03091 *current_subject == CHAR_NL &&
03092 (re->flags & PCRE_HASCRORLF) == 0 &&
03093 (md->nltype == NLTYPE_ANY ||
03094 md->nltype == NLTYPE_ANYCRLF ||
03095 md->nllen == 2))
03096 current_subject++;
03097
03098 }
03099
03100 return PCRE_ERROR_NOMATCH;
03101 }
03102
03103