• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

src/pcre/pcre_exec.c

00001 /*************************************************
00002 *      Perl-Compatible Regular Expressions       *
00003 *************************************************/
00004 
00005 /* PCRE is a library of functions to support regular expressions whose syntax
00006 and semantics are as close as possible to those of the Perl 5 language.
00007 
00008                        Written by Philip Hazel
00009            Copyright (c) 1997-2009 University of Cambridge
00010 
00011 -----------------------------------------------------------------------------
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014 
00015     * Redistributions of source code must retain the above copyright notice,
00016       this list of conditions and the following disclaimer.
00017 
00018     * Redistributions in binary form must reproduce the above copyright
00019       notice, this list of conditions and the following disclaimer in the
00020       documentation and/or other materials provided with the distribution.
00021 
00022     * Neither the name of the University of Cambridge nor the names of its
00023       contributors may be used to endorse or promote products derived from
00024       this software without specific prior written permission.
00025 
00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00036 POSSIBILITY OF SUCH DAMAGE.
00037 -----------------------------------------------------------------------------
00038 */
00039 
00040 
00041 /* This module contains pcre_exec(), the externally visible function that does
00042 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
00043 possible. There are also some static supporting functions. */
00044 
00045 #ifdef HAVE_CONFIG_H
00046 #include "config.h"
00047 #else if defined(_WINDOWS)
00048 #include <spl/configwin32.h>
00049 #endif
00050 
00051 
00052 #define NLBLOCK md             /* Block containing newline information */
00053 #define PSSTART start_subject  /* Field containing processed string start */
00054 #define PSEND   end_subject    /* Field containing processed string end */
00055 
00056 #include "pcre_internal.h"
00057 
00058 /* Undefine some potentially clashing cpp symbols */
00059 
00060 #undef min
00061 #undef max
00062 
00063 /* Flag bits for the match() function */
00064 
00065 #define match_condassert     0x01  /* Called to check a condition assertion */
00066 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
00067 
00068 /* Non-error returns from the match() function. Error returns are externally
00069 defined PCRE_ERROR_xxx codes, which are all negative. */
00070 
00071 #define MATCH_MATCH        1
00072 #define MATCH_NOMATCH      0
00073 
00074 /* Special internal returns from the match() function. Make them sufficiently
00075 negative to avoid the external error codes. */
00076 
00077 #define MATCH_COMMIT       (-999)
00078 #define MATCH_PRUNE        (-998)
00079 #define MATCH_SKIP         (-997)
00080 #define MATCH_THEN         (-996)
00081 
00082 /* Maximum number of ints of offset to save on the stack for recursive calls.
00083 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
00084 because the offset vector is always a multiple of 3 long. */
00085 
00086 #define REC_STACK_SAVE_MAX 30
00087 
00088 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
00089 
00090 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
00091 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
00092 
00093 
00094 
00095 #ifdef DEBUG
00096 /*************************************************
00097 *        Debugging function to print chars       *
00098 *************************************************/
00099 
00100 /* Print a sequence of chars in printable format, stopping at the end of the
00101 subject if the requested.
00102 
00103 Arguments:
00104   p           points to characters
00105   length      number to print
00106   is_subject  TRUE if printing from within md->start_subject
00107   md          pointer to matching data block, if is_subject is TRUE
00108 
00109 Returns:     nothing
00110 */
00111 
00112 static void
00113 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
00114 {
00115 unsigned int c;
00116 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
00117 while (length-- > 0)
00118   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
00119 }
00120 #endif
00121 
00122 
00123 
00124 /*************************************************
00125 *          Match a back-reference                *
00126 *************************************************/
00127 
00128 /* If a back reference hasn't been set, the length that is passed is greater
00129 than the number of characters left in the string, so the match fails.
00130 
00131 Arguments:
00132   offset      index into the offset vector
00133   eptr        points into the subject
00134   length      length to be matched
00135   md          points to match data block
00136   ims         the ims flags
00137 
00138 Returns:      TRUE if matched
00139 */
00140 
00141 static BOOL
00142 match_ref(int offset, register USPTR eptr, int length, match_data *md,
00143   unsigned long int ims)
00144 {
00145 USPTR p = md->start_subject + md->offset_vector[offset];
00146 
00147 #ifdef DEBUG_PCRE
00148 if (eptr >= md->end_subject)
00149   printf("matching subject <null>");
00150 else
00151   {
00152   printf("matching subject ");
00153   pchars(eptr, length, TRUE, md);
00154   }
00155 printf(" against backref ");
00156 pchars(p, length, FALSE, md);
00157 printf("\n");
00158 #endif
00159 
00160 /* Always fail if not enough characters left */
00161 
00162 if (length > md->end_subject - eptr) return FALSE;
00163 
00164 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
00165 properly if Unicode properties are supported. Otherwise, we can check only
00166 ASCII characters. */
00167 
00168 if ((ims & PCRE_CASELESS) != 0)
00169   {
00170 #ifdef SUPPORT_UTF8
00171 #ifdef SUPPORT_UCP
00172   if (md->utf8)
00173     {
00174     USPTR endptr = eptr + length;
00175     while (eptr < endptr)
00176       {
00177       int c, d;
00178       GETCHARINC(c, eptr);
00179       GETCHARINC(d, p);
00180       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
00181       }
00182     }
00183   else
00184 #endif
00185 #endif
00186 
00187   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
00188   is no UCP support. */
00189 
00190   while (length-- > 0)
00191     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
00192   }
00193 
00194 /* In the caseful case, we can just compare the bytes, whether or not we
00195 are in UTF-8 mode. */
00196 
00197 else
00198   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
00199 
00200 return TRUE;
00201 }
00202 
00203 
00204 
00205 /***************************************************************************
00206 ****************************************************************************
00207                    RECURSION IN THE match() FUNCTION
00208 
00209 The match() function is highly recursive, though not every recursive call
00210 increases the recursive depth. Nevertheless, some regular expressions can cause
00211 it to recurse to a great depth. I was writing for Unix, so I just let it call
00212 itself recursively. This uses the stack for saving everything that has to be
00213 saved for a recursive call. On Unix, the stack can be large, and this works
00214 fine.
00215 
00216 It turns out that on some non-Unix-like systems there are problems with
00217 programs that use a lot of stack. (This despite the fact that every last chip
00218 has oodles of memory these days, and techniques for extending the stack have
00219 been known for decades.) So....
00220 
00221 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
00222 calls by keeping local variables that need to be preserved in blocks of memory
00223 obtained from malloc() instead instead of on the stack. Macros are used to
00224 achieve this so that the actual code doesn't look very different to what it
00225 always used to.
00226 
00227 The original heap-recursive code used longjmp(). However, it seems that this
00228 can be very slow on some operating systems. Following a suggestion from Stan
00229 Switzer, the use of longjmp() has been abolished, at the cost of having to
00230 provide a unique number for each call to RMATCH. There is no way of generating
00231 a sequence of numbers at compile time in C. I have given them names, to make
00232 them stand out more clearly.
00233 
00234 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
00235 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
00236 tests. Furthermore, not using longjmp() means that local dynamic variables
00237 don't have indeterminate values; this has meant that the frame size can be
00238 reduced because the result can be "passed back" by straight setting of the
00239 variable instead of being passed in the frame.
00240 ****************************************************************************
00241 ***************************************************************************/
00242 
00243 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
00244 below must be updated in sync.  */
00245 
00246 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
00247        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
00248        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
00249        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
00250        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
00251        RM51,  RM52, RM53, RM54 };
00252 
00253 /* These versions of the macros use the stack, as normal. There are debugging
00254 versions and production versions. Note that the "rw" argument of RMATCH isn't
00255 actuall used in this definition. */
00256 
00257 #ifndef NO_RECURSE
00258 #define REGISTER register
00259 
00260 #ifdef DEBUG_PCRE
00261 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
00262   { \
00263   printf("match() called in line %d\n", __LINE__); \
00264   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
00265   printf("to line %d\n", __LINE__); \
00266   }
00267 #define RRETURN(ra) \
00268   { \
00269   printf("match() returned %d from line %d ", ra, __LINE__); \
00270   return ra; \
00271   }
00272 #else
00273 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
00274   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
00275 #define RRETURN(ra) return ra
00276 #endif
00277 
00278 #else
00279 
00280 
00281 /* These versions of the macros manage a private stack on the heap. Note that
00282 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
00283 argument of match(), which never changes. */
00284 
00285 #define REGISTER
00286 
00287 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
00288   {\
00289   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
00290   frame->Xwhere = rw; \
00291   newframe->Xeptr = ra;\
00292   newframe->Xecode = rb;\
00293   newframe->Xmstart = mstart;\
00294   newframe->Xoffset_top = rc;\
00295   newframe->Xims = re;\
00296   newframe->Xeptrb = rf;\
00297   newframe->Xflags = rg;\
00298   newframe->Xrdepth = frame->Xrdepth + 1;\
00299   newframe->Xprevframe = frame;\
00300   frame = newframe;\
00301   DPRINTF(("restarting from line %d\n", __LINE__));\
00302   goto HEAP_RECURSE;\
00303   L_##rw:\
00304   DPRINTF(("jumped back to line %d\n", __LINE__));\
00305   }
00306 
00307 #define RRETURN(ra)\
00308   {\
00309   heapframe *newframe = frame;\
00310   frame = newframe->Xprevframe;\
00311   (pcre_stack_free)(newframe);\
00312   if (frame != NULL)\
00313     {\
00314     rrc = ra;\
00315     goto HEAP_RETURN;\
00316     }\
00317   return ra;\
00318   }
00319 
00320 
00321 /* Structure for remembering the local variables in a private frame */
00322 
00323 typedef struct heapframe {
00324   struct heapframe *Xprevframe;
00325 
00326   /* Function arguments that may change */
00327 
00328   USPTR Xeptr;
00329   const uschar *Xecode;
00330   USPTR Xmstart;
00331   int Xoffset_top;
00332   long int Xims;
00333   eptrblock *Xeptrb;
00334   int Xflags;
00335   unsigned int Xrdepth;
00336 
00337   /* Function local variables */
00338 
00339   USPTR Xcallpat;
00340 #ifdef SUPPORT_UTF8
00341   USPTR Xcharptr;
00342 #endif
00343   USPTR Xdata;
00344   USPTR Xnext;
00345   USPTR Xpp;
00346   USPTR Xprev;
00347   USPTR Xsaved_eptr;
00348 
00349   recursion_info Xnew_recursive;
00350 
00351   BOOL Xcur_is_word;
00352   BOOL Xcondition;
00353   BOOL Xprev_is_word;
00354 
00355   unsigned long int Xoriginal_ims;
00356 
00357 #ifdef SUPPORT_UCP
00358   int Xprop_type;
00359   int Xprop_value;
00360   int Xprop_fail_result;
00361   int Xprop_category;
00362   int Xprop_chartype;
00363   int Xprop_script;
00364   int Xoclength;
00365   uschar Xocchars[8];
00366 #endif
00367 
00368   int Xcodelink;
00369   int Xctype;
00370   unsigned int Xfc;
00371   int Xfi;
00372   int Xlength;
00373   int Xmax;
00374   int Xmin;
00375   int Xnumber;
00376   int Xoffset;
00377   int Xop;
00378   int Xsave_capture_last;
00379   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
00380   int Xstacksave[REC_STACK_SAVE_MAX];
00381 
00382   eptrblock Xnewptrb;
00383 
00384   /* Where to jump back to */
00385 
00386   int Xwhere;
00387 
00388 } heapframe;
00389 
00390 #endif
00391 
00392 
00393 /***************************************************************************
00394 ***************************************************************************/
00395 
00396 
00397 
00398 /*************************************************
00399 *         Match from current position            *
00400 *************************************************/
00401 
00402 /* This function is called recursively in many circumstances. Whenever it
00403 returns a negative (error) response, the outer incarnation must also return the
00404 same response. */
00405 
00406 /* These macros pack up tests that are used for partial matching, and which
00407 appears several times in the code. We set the "hit end" flag if the pointer is
00408 at the end of the subject and also past the start of the subject (i.e.
00409 something has been matched). For hard partial matching, we then return
00410 immediately. The second one is used when we already know we are past the end of
00411 the subject. */
00412 
00413 #define CHECK_PARTIAL()\
00414   if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
00415     {\
00416     md->hitend = TRUE;\
00417     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
00418     }
00419 
00420 #define SCHECK_PARTIAL()\
00421   if (md->partial != 0 && eptr > mstart)\
00422     {\
00423     md->hitend = TRUE;\
00424     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
00425     }
00426 
00427 
00428 /* Performance note: It might be tempting to extract commonly used fields from
00429 the md structure (e.g. utf8, end_subject) into individual variables to improve
00430 performance. Tests using gcc on a SPARC disproved this; in the first case, it
00431 made performance worse.
00432 
00433 Arguments:
00434    eptr        pointer to current character in subject
00435    ecode       pointer to current position in compiled code
00436    mstart      pointer to the current match start position (can be modified
00437                  by encountering \K)
00438    offset_top  current top pointer
00439    md          pointer to "static" info for the match
00440    ims         current /i, /m, and /s options
00441    eptrb       pointer to chain of blocks containing eptr at start of
00442                  brackets - for testing for empty matches
00443    flags       can contain
00444                  match_condassert - this is an assertion condition
00445                  match_cbegroup - this is the start of an unlimited repeat
00446                    group that can match an empty string
00447    rdepth      the recursion depth
00448 
00449 Returns:       MATCH_MATCH if matched            )  these values are >= 0
00450                MATCH_NOMATCH if failed to match  )
00451                a negative PCRE_ERROR_xxx value if aborted by an error condition
00452                  (e.g. stopped by repeated call or recursion limit)
00453 */
00454 
00455 static int
00456 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
00457   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
00458   int flags, unsigned int rdepth)
00459 {
00460 /* These variables do not need to be preserved over recursion in this function,
00461 so they can be ordinary variables in all cases. Mark some of them with
00462 "register" because they are used a lot in loops. */
00463 
00464 register int  rrc;         /* Returns from recursive calls */
00465 register int  i;           /* Used for loops not involving calls to RMATCH() */
00466 register unsigned int c;   /* Character values not kept over RMATCH() calls */
00467 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
00468 
00469 BOOL minimize, possessive; /* Quantifier options */
00470 int condcode;
00471 
00472 /* When recursion is not being used, all "local" variables that have to be
00473 preserved over calls to RMATCH() are part of a "frame" which is obtained from
00474 heap storage. Set up the top-level frame here; others are obtained from the
00475 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
00476 
00477 #ifdef NO_RECURSE
00478 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
00479 frame->Xprevframe = NULL;            /* Marks the top level */
00480 
00481 /* Copy in the original argument variables */
00482 
00483 frame->Xeptr = eptr;
00484 frame->Xecode = ecode;
00485 frame->Xmstart = mstart;
00486 frame->Xoffset_top = offset_top;
00487 frame->Xims = ims;
00488 frame->Xeptrb = eptrb;
00489 frame->Xflags = flags;
00490 frame->Xrdepth = rdepth;
00491 
00492 /* This is where control jumps back to to effect "recursion" */
00493 
00494 HEAP_RECURSE:
00495 
00496 /* Macros make the argument variables come from the current frame */
00497 
00498 #define eptr               frame->Xeptr
00499 #define ecode              frame->Xecode
00500 #define mstart             frame->Xmstart
00501 #define offset_top         frame->Xoffset_top
00502 #define ims                frame->Xims
00503 #define eptrb              frame->Xeptrb
00504 #define flags              frame->Xflags
00505 #define rdepth             frame->Xrdepth
00506 
00507 /* Ditto for the local variables */
00508 
00509 #ifdef SUPPORT_UTF8
00510 #define charptr            frame->Xcharptr
00511 #endif
00512 #define callpat            frame->Xcallpat
00513 #define codelink           frame->Xcodelink
00514 #define data               frame->Xdata
00515 #define next               frame->Xnext
00516 #define pp                 frame->Xpp
00517 #define prev               frame->Xprev
00518 #define saved_eptr         frame->Xsaved_eptr
00519 
00520 #define new_recursive      frame->Xnew_recursive
00521 
00522 #define cur_is_word        frame->Xcur_is_word
00523 #define condition          frame->Xcondition
00524 #define prev_is_word       frame->Xprev_is_word
00525 
00526 #define original_ims       frame->Xoriginal_ims
00527 
00528 #ifdef SUPPORT_UCP
00529 #define prop_type          frame->Xprop_type
00530 #define prop_value         frame->Xprop_value
00531 #define prop_fail_result   frame->Xprop_fail_result
00532 #define prop_category      frame->Xprop_category
00533 #define prop_chartype      frame->Xprop_chartype
00534 #define prop_script        frame->Xprop_script
00535 #define oclength           frame->Xoclength
00536 #define occhars            frame->Xocchars
00537 #endif
00538 
00539 #define ctype              frame->Xctype
00540 #define fc                 frame->Xfc
00541 #define fi                 frame->Xfi
00542 #define length             frame->Xlength
00543 #define max                frame->Xmax
00544 #define min                frame->Xmin
00545 #define number             frame->Xnumber
00546 #define offset             frame->Xoffset
00547 #define op                 frame->Xop
00548 #define save_capture_last  frame->Xsave_capture_last
00549 #define save_offset1       frame->Xsave_offset1
00550 #define save_offset2       frame->Xsave_offset2
00551 #define save_offset3       frame->Xsave_offset3
00552 #define stacksave          frame->Xstacksave
00553 
00554 #define newptrb            frame->Xnewptrb
00555 
00556 /* When recursion is being used, local variables are allocated on the stack and
00557 get preserved during recursion in the normal way. In this environment, fi and
00558 i, and fc and c, can be the same variables. */
00559 
00560 #else         /* NO_RECURSE not defined */
00561 #define fi i
00562 #define fc c
00563 
00564 
00565 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
00566 const uschar *charptr;             /* in small blocks of the code. My normal */
00567 #endif                             /* style of coding would have declared    */
00568 const uschar *callpat;             /* them within each of those blocks.      */
00569 const uschar *data;                /* However, in order to accommodate the   */
00570 const uschar *next;                /* version of this code that uses an      */
00571 USPTR         pp;                  /* external "stack" implemented on the    */
00572 const uschar *prev;                /* heap, it is easier to declare them all */
00573 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
00574                                    /* out in a block. The only declarations  */
00575 recursion_info new_recursive;      /* within blocks below are for variables  */
00576                                    /* that do not have to be preserved over  */
00577 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
00578 BOOL condition;
00579 BOOL prev_is_word;
00580 
00581 unsigned long int original_ims;
00582 
00583 #ifdef SUPPORT_UCP
00584 int prop_type;
00585 int prop_value;
00586 int prop_fail_result;
00587 int prop_category;
00588 int prop_chartype;
00589 int prop_script;
00590 int oclength;
00591 uschar occhars[8];
00592 #endif
00593 
00594 int codelink;
00595 int ctype;
00596 int length;
00597 int max;
00598 int min;
00599 int number;
00600 int offset;
00601 int op;
00602 int save_capture_last;
00603 int save_offset1, save_offset2, save_offset3;
00604 int stacksave[REC_STACK_SAVE_MAX];
00605 
00606 eptrblock newptrb;
00607 #endif     /* NO_RECURSE */
00608 
00609 /* These statements are here to stop the compiler complaining about unitialized
00610 variables. */
00611 
00612 #ifdef SUPPORT_UCP
00613 prop_value = 0;
00614 prop_fail_result = 0;
00615 #endif
00616 
00617 
00618 /* This label is used for tail recursion, which is used in a few cases even
00619 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
00620 used. Thanks to Ian Taylor for noticing this possibility and sending the
00621 original patch. */
00622 
00623 TAIL_RECURSE:
00624 
00625 /* OK, now we can get on with the real code of the function. Recursive calls
00626 are specified by the macro RMATCH and RRETURN is used to return. When
00627 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
00628 and a "return", respectively (possibly with some debugging if DEBUG is
00629 defined). However, RMATCH isn't like a function call because it's quite a
00630 complicated macro. It has to be used in one particular way. This shouldn't,
00631 however, impact performance when true recursion is being used. */
00632 
00633 #ifdef SUPPORT_UTF8
00634 utf8 = md->utf8;       /* Local copy of the flag */
00635 #else
00636 utf8 = FALSE;
00637 #endif
00638 
00639 /* First check that we haven't called match() too many times, or that we
00640 haven't exceeded the recursive call limit. */
00641 
00642 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
00643 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
00644 
00645 original_ims = ims;    /* Save for resetting on ')' */
00646 
00647 /* At the start of a group with an unlimited repeat that may match an empty
00648 string, the match_cbegroup flag is set. When this is the case, add the current
00649 subject pointer to the chain of such remembered pointers, to be checked when we
00650 hit the closing ket, in order to break infinite loops that match no characters.
00651 When match() is called in other circumstances, don't add to the chain. The
00652 match_cbegroup flag must NOT be used with tail recursion, because the memory
00653 block that is used is on the stack, so a new one may be required for each
00654 match(). */
00655 
00656 if ((flags & match_cbegroup) != 0)
00657   {
00658   newptrb.epb_saved_eptr = eptr;
00659   newptrb.epb_prev = eptrb;
00660   eptrb = &newptrb;
00661   }
00662 
00663 /* Now start processing the opcodes. */
00664 
00665 for (;;)
00666   {
00667   minimize = possessive = FALSE;
00668   op = *ecode;
00669 
00670   switch(op)
00671     {
00672     case OP_FAIL:
00673     RRETURN(MATCH_NOMATCH);
00674 
00675     case OP_PRUNE:
00676     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00677       ims, eptrb, flags, RM51);
00678     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00679     RRETURN(MATCH_PRUNE);
00680 
00681     case OP_COMMIT:
00682     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00683       ims, eptrb, flags, RM52);
00684     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00685     RRETURN(MATCH_COMMIT);
00686 
00687     case OP_SKIP:
00688     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00689       ims, eptrb, flags, RM53);
00690     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00691     md->start_match_ptr = eptr;   /* Pass back current position */
00692     RRETURN(MATCH_SKIP);
00693 
00694     case OP_THEN:
00695     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00696       ims, eptrb, flags, RM54);
00697     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00698     RRETURN(MATCH_THEN);
00699 
00700     /* Handle a capturing bracket. If there is space in the offset vector, save
00701     the current subject position in the working slot at the top of the vector.
00702     We mustn't change the current values of the data slot, because they may be
00703     set from a previous iteration of this group, and be referred to by a
00704     reference inside the group.
00705 
00706     If the bracket fails to match, we need to restore this value and also the
00707     values of the final offsets, in case they were set by a previous iteration
00708     of the same bracket.
00709 
00710     If there isn't enough space in the offset vector, treat this as if it were
00711     a non-capturing bracket. Don't worry about setting the flag for the error
00712     case here; that is handled in the code for KET. */
00713 
00714     case OP_CBRA:
00715     case OP_SCBRA:
00716     number = GET2(ecode, 1+LINK_SIZE);
00717     offset = number << 1;
00718 
00719 #ifdef DEBUG_PCRE
00720     printf("start bracket %d\n", number);
00721     printf("subject=");
00722     pchars(eptr, 16, TRUE, md);
00723     printf("\n");
00724 #endif
00725 
00726     if (offset < md->offset_max)
00727       {
00728       save_offset1 = md->offset_vector[offset];
00729       save_offset2 = md->offset_vector[offset+1];
00730       save_offset3 = md->offset_vector[md->offset_end - number];
00731       save_capture_last = md->capture_last;
00732 
00733       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
00734       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
00735 
00736       flags = (op == OP_SCBRA)? match_cbegroup : 0;
00737       do
00738         {
00739         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00740           ims, eptrb, flags, RM1);
00741         if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
00742         md->capture_last = save_capture_last;
00743         ecode += GET(ecode, 1);
00744         }
00745       while (*ecode == OP_ALT);
00746 
00747       DPRINTF(("bracket %d failed\n", number));
00748 
00749       md->offset_vector[offset] = save_offset1;
00750       md->offset_vector[offset+1] = save_offset2;
00751       md->offset_vector[md->offset_end - number] = save_offset3;
00752 
00753       RRETURN(MATCH_NOMATCH);
00754       }
00755 
00756     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
00757     as a non-capturing bracket. */
00758 
00759     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
00760     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
00761 
00762     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
00763 
00764     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
00765     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
00766 
00767     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
00768     final alternative within the brackets, we would return the result of a
00769     recursive call to match() whatever happened. We can reduce stack usage by
00770     turning this into a tail recursion, except in the case when match_cbegroup
00771     is set.*/
00772 
00773     case OP_BRA:
00774     case OP_SBRA:
00775     DPRINTF(("start non-capturing bracket\n"));
00776     flags = (op >= OP_SBRA)? match_cbegroup : 0;
00777     for (;;)
00778       {
00779       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
00780         {
00781         if (flags == 0)    /* Not a possibly empty group */
00782           {
00783           ecode += _pcre_OP_lengths[*ecode];
00784           DPRINTF(("bracket 0 tail recursion\n"));
00785           goto TAIL_RECURSE;
00786           }
00787 
00788         /* Possibly empty group; can't use tail recursion. */
00789 
00790         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
00791           eptrb, flags, RM48);
00792         RRETURN(rrc);
00793         }
00794 
00795       /* For non-final alternatives, continue the loop for a NOMATCH result;
00796       otherwise return. */
00797 
00798       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
00799         eptrb, flags, RM2);
00800       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
00801       ecode += GET(ecode, 1);
00802       }
00803     /* Control never reaches here. */
00804 
00805     /* Conditional group: compilation checked that there are no more than
00806     two branches. If the condition is false, skipping the first branch takes us
00807     past the end if there is only one branch, but that's OK because that is
00808     exactly what going to the ket would do. As there is only one branch to be
00809     obeyed, we can use tail recursion to avoid using another stack frame. */
00810 
00811     case OP_COND:
00812     case OP_SCOND:
00813     codelink= GET(ecode, 1);
00814 
00815     /* Because of the way auto-callout works during compile, a callout item is
00816     inserted between OP_COND and an assertion condition. */
00817 
00818     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
00819       {
00820       if (pcre_callout != NULL)
00821         {
00822         pcre_callout_block cb;
00823         cb.version          = 1;   /* Version 1 of the callout block */
00824         cb.callout_number   = ecode[LINK_SIZE+2];
00825         cb.offset_vector    = md->offset_vector;
00826         cb.subject          = (PCRE_SPTR)md->start_subject;
00827         cb.subject_length   = md->end_subject - md->start_subject;
00828         cb.start_match      = mstart - md->start_subject;
00829         cb.current_position = eptr - md->start_subject;
00830         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
00831         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
00832         cb.capture_top      = offset_top/2;
00833         cb.capture_last     = md->capture_last;
00834         cb.callout_data     = md->callout_data;
00835         if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
00836         if (rrc < 0) RRETURN(rrc);
00837         }
00838       ecode += _pcre_OP_lengths[OP_CALLOUT];
00839       }
00840 
00841     condcode = ecode[LINK_SIZE+1];
00842 
00843     /* Now see what the actual condition is */
00844 
00845     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
00846       {
00847       if (md->recursive == NULL)                /* Not recursing => FALSE */
00848         {
00849         condition = FALSE;
00850         ecode += GET(ecode, 1);
00851         }
00852       else
00853         {
00854         int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
00855         condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
00856 
00857         /* If the test is for recursion into a specific subpattern, and it is
00858         false, but the test was set up by name, scan the table to see if the
00859         name refers to any other numbers, and test them. The condition is true
00860         if any one is set. */
00861 
00862         if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
00863           {
00864           uschar *slotA = md->name_table;
00865           for (i = 0; i < md->name_count; i++)
00866             {
00867             if (GET2(slotA, 0) == recno) break;
00868             slotA += md->name_entry_size;
00869             }
00870 
00871           /* Found a name for the number - there can be only one; duplicate
00872           names for different numbers are allowed, but not vice versa. First
00873           scan down for duplicates. */
00874 
00875           if (i < md->name_count)
00876             {
00877             uschar *slotB = slotA;
00878             while (slotB > md->name_table)
00879               {
00880               slotB -= md->name_entry_size;
00881               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
00882                 {
00883                 condition = GET2(slotB, 0) == md->recursive->group_num;
00884                 if (condition) break;
00885                 }
00886               else break;
00887               }
00888 
00889             /* Scan up for duplicates */
00890 
00891             if (!condition)
00892               {
00893               slotB = slotA;
00894               for (i++; i < md->name_count; i++)
00895                 {
00896                 slotB += md->name_entry_size;
00897                 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
00898                   {
00899                   condition = GET2(slotB, 0) == md->recursive->group_num;
00900                   if (condition) break;
00901                   }
00902                 else break;
00903                 }
00904               }
00905             }
00906           }
00907 
00908         /* Chose branch according to the condition */
00909 
00910         ecode += condition? 3 : GET(ecode, 1);
00911         }
00912       }
00913 
00914     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
00915       {
00916       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
00917       condition = offset < offset_top && md->offset_vector[offset] >= 0;
00918 
00919       /* If the numbered capture is unset, but the reference was by name,
00920       scan the table to see if the name refers to any other numbers, and test
00921       them. The condition is true if any one is set. This is tediously similar
00922       to the code above, but not close enough to try to amalgamate. */
00923 
00924       if (!condition && condcode == OP_NCREF)
00925         {
00926         int refno = offset >> 1;
00927         uschar *slotA = md->name_table;
00928 
00929         for (i = 0; i < md->name_count; i++)
00930           {
00931           if (GET2(slotA, 0) == refno) break;
00932           slotA += md->name_entry_size;
00933           }
00934 
00935         /* Found a name for the number - there can be only one; duplicate names
00936         for different numbers are allowed, but not vice versa. First scan down
00937         for duplicates. */
00938 
00939         if (i < md->name_count)
00940           {
00941           uschar *slotB = slotA;
00942           while (slotB > md->name_table)
00943             {
00944             slotB -= md->name_entry_size;
00945             if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
00946               {
00947               offset = GET2(slotB, 0) << 1;
00948               condition = offset < offset_top &&
00949                 md->offset_vector[offset] >= 0;
00950               if (condition) break;
00951               }
00952             else break;
00953             }
00954 
00955           /* Scan up for duplicates */
00956 
00957           if (!condition)
00958             {
00959             slotB = slotA;
00960             for (i++; i < md->name_count; i++)
00961               {
00962               slotB += md->name_entry_size;
00963               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
00964                 {
00965                 offset = GET2(slotB, 0) << 1;
00966                 condition = offset < offset_top &&
00967                   md->offset_vector[offset] >= 0;
00968                 if (condition) break;
00969                 }
00970               else break;
00971               }
00972             }
00973           }
00974         }
00975 
00976       /* Chose branch according to the condition */
00977 
00978       ecode += condition? 3 : GET(ecode, 1);
00979       }
00980 
00981     else if (condcode == OP_DEF)     /* DEFINE - always false */
00982       {
00983       condition = FALSE;
00984       ecode += GET(ecode, 1);
00985       }
00986 
00987     /* The condition is an assertion. Call match() to evaluate it - setting
00988     the final argument match_condassert causes it to stop at the end of an
00989     assertion. */
00990 
00991     else
00992       {
00993       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
00994           match_condassert, RM3);
00995       if (rrc == MATCH_MATCH)
00996         {
00997         condition = TRUE;
00998         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
00999         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
01000         }
01001       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
01002         {
01003         RRETURN(rrc);         /* Need braces because of following else */
01004         }
01005       else
01006         {
01007         condition = FALSE;
01008         ecode += codelink;
01009         }
01010       }
01011 
01012     /* We are now at the branch that is to be obeyed. As there is only one,
01013     we can use tail recursion to avoid using another stack frame, except when
01014     match_cbegroup is required for an unlimited repeat of a possibly empty
01015     group. If the second alternative doesn't exist, we can just plough on. */
01016 
01017     if (condition || *ecode == OP_ALT)
01018       {
01019       ecode += 1 + LINK_SIZE;
01020       if (op == OP_SCOND)        /* Possibly empty group */
01021         {
01022         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
01023         RRETURN(rrc);
01024         }
01025       else                       /* Group must match something */
01026         {
01027         flags = 0;
01028         goto TAIL_RECURSE;
01029         }
01030       }
01031     else                         /* Condition false & no alternative */
01032       {
01033       ecode += 1 + LINK_SIZE;
01034       }
01035     break;
01036 
01037 
01038     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
01039     to close any currently open capturing brackets. */
01040 
01041     case OP_CLOSE:
01042     number = GET2(ecode, 1);
01043     offset = number << 1;
01044 
01045 #ifdef DEBUG_PCRE
01046       printf("end bracket %d at *ACCEPT", number);
01047       printf("\n");
01048 #endif
01049 
01050     md->capture_last = number;
01051     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
01052       {
01053       md->offset_vector[offset] =
01054         md->offset_vector[md->offset_end - number];
01055       md->offset_vector[offset+1] = eptr - md->start_subject;
01056       if (offset_top <= offset) offset_top = offset + 2;
01057       }
01058     ecode += 3;
01059     break;
01060 
01061 
01062     /* End of the pattern, either real or forced. If we are in a top-level
01063     recursion, we should restore the offsets appropriately and continue from
01064     after the call. */
01065 
01066     case OP_ACCEPT:
01067     case OP_END:
01068     if (md->recursive != NULL && md->recursive->group_num == 0)
01069       {
01070       recursion_info *rec = md->recursive;
01071       DPRINTF(("End of pattern in a (?0) recursion\n"));
01072       md->recursive = rec->prevrec;
01073       memmove(md->offset_vector, rec->offset_save,
01074         rec->saved_max * sizeof(int));
01075       offset_top = rec->save_offset_top;
01076       mstart = rec->save_start;
01077       ims = original_ims;
01078       ecode = rec->after_call;
01079       break;
01080       }
01081 
01082     /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
01083     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
01084     the subject. In both cases, backtracking will then try other alternatives,
01085     if any. */
01086 
01087     if (eptr == mstart &&
01088         (md->notempty ||
01089           (md->notempty_atstart &&
01090             mstart == md->start_subject + md->start_offset)))
01091       RRETURN(MATCH_NOMATCH);
01092 
01093     /* Otherwise, we have a match. */
01094 
01095     md->end_match_ptr = eptr;           /* Record where we ended */
01096     md->end_offset_top = offset_top;    /* and how many extracts were taken */
01097     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
01098     RRETURN(MATCH_MATCH);
01099 
01100     /* Change option settings */
01101 
01102     case OP_OPT:
01103     ims = ecode[1];
01104     ecode += 2;
01105     DPRINTF(("ims set to %02lx\n", ims));
01106     break;
01107 
01108     /* Assertion brackets. Check the alternative branches in turn - the
01109     matching won't pass the KET for an assertion. If any one branch matches,
01110     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
01111     start of each branch to move the current point backwards, so the code at
01112     this level is identical to the lookahead case. */
01113 
01114     case OP_ASSERT:
01115     case OP_ASSERTBACK:
01116     do
01117       {
01118       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
01119         RM4);
01120       if (rrc == MATCH_MATCH) break;
01121       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
01122       ecode += GET(ecode, 1);
01123       }
01124     while (*ecode == OP_ALT);
01125     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
01126 
01127     /* If checking an assertion for a condition, return MATCH_MATCH. */
01128 
01129     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
01130 
01131     /* Continue from after the assertion, updating the offsets high water
01132     mark, since extracts may have been taken during the assertion. */
01133 
01134     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
01135     ecode += 1 + LINK_SIZE;
01136     offset_top = md->end_offset_top;
01137     continue;
01138 
01139     /* Negative assertion: all branches must fail to match */
01140 
01141     case OP_ASSERT_NOT:
01142     case OP_ASSERTBACK_NOT:
01143     do
01144       {
01145       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
01146         RM5);
01147       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
01148       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
01149       ecode += GET(ecode,1);
01150       }
01151     while (*ecode == OP_ALT);
01152 
01153     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
01154 
01155     ecode += 1 + LINK_SIZE;
01156     continue;
01157 
01158     /* Move the subject pointer back. This occurs only at the start of
01159     each branch of a lookbehind assertion. If we are too close to the start to
01160     move back, this match function fails. When working with UTF-8 we move
01161     back a number of characters, not bytes. */
01162 
01163     case OP_REVERSE:
01164 #ifdef SUPPORT_UTF8
01165     if (utf8)
01166       {
01167       i = GET(ecode, 1);
01168       while (i-- > 0)
01169         {
01170         eptr--;
01171         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
01172         BACKCHAR(eptr);
01173         }
01174       }
01175     else
01176 #endif
01177 
01178     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
01179 
01180       {
01181       eptr -= GET(ecode, 1);
01182       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
01183       }
01184 
01185     /* Save the earliest consulted character, then skip to next op code */
01186 
01187     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
01188     ecode += 1 + LINK_SIZE;
01189     break;
01190 
01191     /* The callout item calls an external function, if one is provided, passing
01192     details of the match so far. This is mainly for debugging, though the
01193     function is able to force a failure. */
01194 
01195     case OP_CALLOUT:
01196     if (pcre_callout != NULL)
01197       {
01198       pcre_callout_block cb;
01199       cb.version          = 1;   /* Version 1 of the callout block */
01200       cb.callout_number   = ecode[1];
01201       cb.offset_vector    = md->offset_vector;
01202       cb.subject          = (PCRE_SPTR)md->start_subject;
01203       cb.subject_length   = md->end_subject - md->start_subject;
01204       cb.start_match      = mstart - md->start_subject;
01205       cb.current_position = eptr - md->start_subject;
01206       cb.pattern_position = GET(ecode, 2);
01207       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
01208       cb.capture_top      = offset_top/2;
01209       cb.capture_last     = md->capture_last;
01210       cb.callout_data     = md->callout_data;
01211       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
01212       if (rrc < 0) RRETURN(rrc);
01213       }
01214     ecode += 2 + 2*LINK_SIZE;
01215     break;
01216 
01217     /* Recursion either matches the current regex, or some subexpression. The
01218     offset data is the offset to the starting bracket from the start of the
01219     whole pattern. (This is so that it works from duplicated subpatterns.)
01220 
01221     If there are any capturing brackets started but not finished, we have to
01222     save their starting points and reinstate them after the recursion. However,
01223     we don't know how many such there are (offset_top records the completed
01224     total) so we just have to save all the potential data. There may be up to
01225     65535 such values, which is too large to put on the stack, but using malloc
01226     for small numbers seems expensive. As a compromise, the stack is used when
01227     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
01228     is used. A problem is what to do if the malloc fails ... there is no way of
01229     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
01230     values on the stack, and accept that the rest may be wrong.
01231 
01232     There are also other values that have to be saved. We use a chained
01233     sequence of blocks that actually live on the stack. Thanks to Robin Houston
01234     for the original version of this logic. */
01235 
01236     case OP_RECURSE:
01237       {
01238       callpat = md->start_code + GET(ecode, 1);
01239       new_recursive.group_num = (callpat == md->start_code)? 0 :
01240         GET2(callpat, 1 + LINK_SIZE);
01241 
01242       /* Add to "recursing stack" */
01243 
01244       new_recursive.prevrec = md->recursive;
01245       md->recursive = &new_recursive;
01246 
01247       /* Find where to continue from afterwards */
01248 
01249       ecode += 1 + LINK_SIZE;
01250       new_recursive.after_call = ecode;
01251 
01252       /* Now save the offset data. */
01253 
01254       new_recursive.saved_max = md->offset_end;
01255       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
01256         new_recursive.offset_save = stacksave;
01257       else
01258         {
01259         new_recursive.offset_save =
01260           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
01261         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
01262         }
01263 
01264       memcpy(new_recursive.offset_save, md->offset_vector,
01265             new_recursive.saved_max * sizeof(int));
01266       new_recursive.save_start = mstart;
01267       new_recursive.save_offset_top = offset_top;
01268       mstart = eptr;
01269 
01270       /* OK, now we can do the recursion. For each top-level alternative we
01271       restore the offset and recursion data. */
01272 
01273       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
01274       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
01275       do
01276         {
01277         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
01278           md, ims, eptrb, flags, RM6);
01279         if (rrc == MATCH_MATCH)
01280           {
01281           DPRINTF(("Recursion matched\n"));
01282           md->recursive = new_recursive.prevrec;
01283           if (new_recursive.offset_save != stacksave)
01284             (pcre_free)(new_recursive.offset_save);
01285           RRETURN(MATCH_MATCH);
01286           }
01287         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
01288           {
01289           DPRINTF(("Recursion gave error %d\n", rrc));
01290           if (new_recursive.offset_save != stacksave)
01291             (pcre_free)(new_recursive.offset_save);
01292           RRETURN(rrc);
01293           }
01294 
01295         md->recursive = &new_recursive;
01296         memcpy(md->offset_vector, new_recursive.offset_save,
01297             new_recursive.saved_max * sizeof(int));
01298         callpat += GET(callpat, 1);
01299         }
01300       while (*callpat == OP_ALT);
01301 
01302       DPRINTF(("Recursion didn't match\n"));
01303       md->recursive = new_recursive.prevrec;
01304       if (new_recursive.offset_save != stacksave)
01305         (pcre_free)(new_recursive.offset_save);
01306       RRETURN(MATCH_NOMATCH);
01307       }
01308     /* Control never reaches here */
01309 
01310     /* "Once" brackets are like assertion brackets except that after a match,
01311     the point in the subject string is not moved back. Thus there can never be
01312     a move back into the brackets. Friedl calls these "atomic" subpatterns.
01313     Check the alternative branches in turn - the matching won't pass the KET
01314     for this kind of subpattern. If any one branch matches, we carry on as at
01315     the end of a normal bracket, leaving the subject pointer. */
01316 
01317     case OP_ONCE:
01318     prev = ecode;
01319     saved_eptr = eptr;
01320 
01321     do
01322       {
01323       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
01324       if (rrc == MATCH_MATCH) break;
01325       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
01326       ecode += GET(ecode,1);
01327       }
01328     while (*ecode == OP_ALT);
01329 
01330     /* If hit the end of the group (which could be repeated), fail */
01331 
01332     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
01333 
01334     /* Continue as from after the assertion, updating the offsets high water
01335     mark, since extracts may have been taken. */
01336 
01337     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
01338 
01339     offset_top = md->end_offset_top;
01340     eptr = md->end_match_ptr;
01341 
01342     /* For a non-repeating ket, just continue at this level. This also
01343     happens for a repeating ket if no characters were matched in the group.
01344     This is the forcible breaking of infinite loops as implemented in Perl
01345     5.005. If there is an options reset, it will get obeyed in the normal
01346     course of events. */
01347 
01348     if (*ecode == OP_KET || eptr == saved_eptr)
01349       {
01350       ecode += 1+LINK_SIZE;
01351       break;
01352       }
01353 
01354     /* The repeating kets try the rest of the pattern or restart from the
01355     preceding bracket, in the appropriate order. The second "call" of match()
01356     uses tail recursion, to avoid using another stack frame. We need to reset
01357     any options that changed within the bracket before re-running it, so
01358     check the next opcode. */
01359 
01360     if (ecode[1+LINK_SIZE] == OP_OPT)
01361       {
01362       ims = (ims & ~PCRE_IMS) | ecode[4];
01363       DPRINTF(("ims set to %02lx at group repeat\n", ims));
01364       }
01365 
01366     if (*ecode == OP_KETRMIN)
01367       {
01368       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
01369       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01370       ecode = prev;
01371       flags = 0;
01372       goto TAIL_RECURSE;
01373       }
01374     else  /* OP_KETRMAX */
01375       {
01376       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
01377       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01378       ecode += 1 + LINK_SIZE;
01379       flags = 0;
01380       goto TAIL_RECURSE;
01381       }
01382     /* Control never gets here */
01383 
01384     /* An alternation is the end of a branch; scan along to find the end of the
01385     bracketed group and go to there. */
01386 
01387     case OP_ALT:
01388     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
01389     break;
01390 
01391     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
01392     indicating that it may occur zero times. It may repeat infinitely, or not
01393     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
01394     with fixed upper repeat limits are compiled as a number of copies, with the
01395     optional ones preceded by BRAZERO or BRAMINZERO. */
01396 
01397     case OP_BRAZERO:
01398       {
01399       next = ecode+1;
01400       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
01401       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01402       do next += GET(next,1); while (*next == OP_ALT);
01403       ecode = next + 1 + LINK_SIZE;
01404       }
01405     break;
01406 
01407     case OP_BRAMINZERO:
01408       {
01409       next = ecode+1;
01410       do next += GET(next, 1); while (*next == OP_ALT);
01411       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
01412       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01413       ecode++;
01414       }
01415     break;
01416 
01417     case OP_SKIPZERO:
01418       {
01419       next = ecode+1;
01420       do next += GET(next,1); while (*next == OP_ALT);
01421       ecode = next + 1 + LINK_SIZE;
01422       }
01423     break;
01424 
01425     /* End of a group, repeated or non-repeating. */
01426 
01427     case OP_KET:
01428     case OP_KETRMIN:
01429     case OP_KETRMAX:
01430     prev = ecode - GET(ecode, 1);
01431 
01432     /* If this was a group that remembered the subject start, in order to break
01433     infinite repeats of empty string matches, retrieve the subject start from
01434     the chain. Otherwise, set it NULL. */
01435 
01436     if (*prev >= OP_SBRA)
01437       {
01438       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
01439       eptrb = eptrb->epb_prev;              /* Backup to previous group */
01440       }
01441     else saved_eptr = NULL;
01442 
01443     /* If we are at the end of an assertion group, stop matching and return
01444     MATCH_MATCH, but record the current high water mark for use by positive
01445     assertions. Do this also for the "once" (atomic) groups. */
01446 
01447     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
01448         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
01449         *prev == OP_ONCE)
01450       {
01451       md->end_match_ptr = eptr;      /* For ONCE */
01452       md->end_offset_top = offset_top;
01453       RRETURN(MATCH_MATCH);
01454       }
01455 
01456     /* For capturing groups we have to check the group number back at the start
01457     and if necessary complete handling an extraction by setting the offsets and
01458     bumping the high water mark. Note that whole-pattern recursion is coded as
01459     a recurse into group 0, so it won't be picked up here. Instead, we catch it
01460     when the OP_END is reached. Other recursion is handled here. */
01461 
01462     if (*prev == OP_CBRA || *prev == OP_SCBRA)
01463       {
01464       number = GET2(prev, 1+LINK_SIZE);
01465       offset = number << 1;
01466 
01467 #ifdef DEBUG_PCRE
01468       printf("end bracket %d", number);
01469       printf("\n");
01470 #endif
01471 
01472       md->capture_last = number;
01473       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
01474         {
01475         md->offset_vector[offset] =
01476           md->offset_vector[md->offset_end - number];
01477         md->offset_vector[offset+1] = eptr - md->start_subject;
01478         if (offset_top <= offset) offset_top = offset + 2;
01479         }
01480 
01481       /* Handle a recursively called group. Restore the offsets
01482       appropriately and continue from after the call. */
01483 
01484       if (md->recursive != NULL && md->recursive->group_num == number)
01485         {
01486         recursion_info *rec = md->recursive;
01487         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
01488         md->recursive = rec->prevrec;
01489         mstart = rec->save_start;
01490         memcpy(md->offset_vector, rec->offset_save,
01491           rec->saved_max * sizeof(int));
01492         offset_top = rec->save_offset_top;
01493         ecode = rec->after_call;
01494         ims = original_ims;
01495         break;
01496         }
01497       }
01498 
01499     /* For both capturing and non-capturing groups, reset the value of the ims
01500     flags, in case they got changed during the group. */
01501 
01502     ims = original_ims;
01503     DPRINTF(("ims reset to %02lx\n", ims));
01504 
01505     /* For a non-repeating ket, just continue at this level. This also
01506     happens for a repeating ket if no characters were matched in the group.
01507     This is the forcible breaking of infinite loops as implemented in Perl
01508     5.005. If there is an options reset, it will get obeyed in the normal
01509     course of events. */
01510 
01511     if (*ecode == OP_KET || eptr == saved_eptr)
01512       {
01513       ecode += 1 + LINK_SIZE;
01514       break;
01515       }
01516 
01517     /* The repeating kets try the rest of the pattern or restart from the
01518     preceding bracket, in the appropriate order. In the second case, we can use
01519     tail recursion to avoid using another stack frame, unless we have an
01520     unlimited repeat of a group that can match an empty string. */
01521 
01522     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
01523 
01524     if (*ecode == OP_KETRMIN)
01525       {
01526       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
01527       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01528       if (flags != 0)    /* Could match an empty string */
01529         {
01530         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
01531         RRETURN(rrc);
01532         }
01533       ecode = prev;
01534       goto TAIL_RECURSE;
01535       }
01536     else  /* OP_KETRMAX */
01537       {
01538       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
01539       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01540       ecode += 1 + LINK_SIZE;
01541       flags = 0;
01542       goto TAIL_RECURSE;
01543       }
01544     /* Control never gets here */
01545 
01546     /* Start of subject unless notbol, or after internal newline if multiline */
01547 
01548     case OP_CIRC:
01549     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
01550     if ((ims & PCRE_MULTILINE) != 0)
01551       {
01552       if (eptr != md->start_subject &&
01553           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
01554         RRETURN(MATCH_NOMATCH);
01555       ecode++;
01556       break;
01557       }
01558     /* ... else fall through */
01559 
01560     /* Start of subject assertion */
01561 
01562     case OP_SOD:
01563     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
01564     ecode++;
01565     break;
01566 
01567     /* Start of match assertion */
01568 
01569     case OP_SOM:
01570     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
01571     ecode++;
01572     break;
01573 
01574     /* Reset the start of match point */
01575 
01576     case OP_SET_SOM:
01577     mstart = eptr;
01578     ecode++;
01579     break;
01580 
01581     /* Assert before internal newline if multiline, or before a terminating
01582     newline unless endonly is set, else end of subject unless noteol is set. */
01583 
01584     case OP_DOLL:
01585     if ((ims & PCRE_MULTILINE) != 0)
01586       {
01587       if (eptr < md->end_subject)
01588         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
01589       else
01590         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
01591       ecode++;
01592       break;
01593       }
01594     else
01595       {
01596       if (md->noteol) RRETURN(MATCH_NOMATCH);
01597       if (!md->endonly)
01598         {
01599         if (eptr != md->end_subject &&
01600             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
01601           RRETURN(MATCH_NOMATCH);
01602         ecode++;
01603         break;
01604         }
01605       }
01606     /* ... else fall through for endonly */
01607 
01608     /* End of subject assertion (\z) */
01609 
01610     case OP_EOD:
01611     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
01612     ecode++;
01613     break;
01614 
01615     /* End of subject or ending \n assertion (\Z) */
01616 
01617     case OP_EODN:
01618     if (eptr != md->end_subject &&
01619         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
01620       RRETURN(MATCH_NOMATCH);
01621     ecode++;
01622     break;
01623 
01624     /* Word boundary assertions */
01625 
01626     case OP_NOT_WORD_BOUNDARY:
01627     case OP_WORD_BOUNDARY:
01628       {
01629 
01630       /* Find out if the previous and current characters are "word" characters.
01631       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
01632       be "non-word" characters. Remember the earliest consulted character for
01633       partial matching. */
01634 
01635 #ifdef SUPPORT_UTF8
01636       if (utf8)
01637         {
01638         if (eptr == md->start_subject) prev_is_word = FALSE; else
01639           {
01640           USPTR lastptr = eptr - 1;
01641           while((*lastptr & 0xc0) == 0x80) lastptr--;
01642           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
01643           GETCHAR(c, lastptr);
01644           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
01645           }
01646         if (eptr >= md->end_subject)
01647           {
01648           SCHECK_PARTIAL();
01649           cur_is_word = FALSE;
01650           }
01651         else
01652           {
01653           GETCHAR(c, eptr);
01654           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
01655           }
01656         }
01657       else
01658 #endif
01659 
01660       /* Not in UTF-8 mode */
01661 
01662         {
01663         if (eptr == md->start_subject) prev_is_word = FALSE; else
01664           {
01665           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
01666           prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
01667           }
01668         if (eptr >= md->end_subject)
01669           {
01670           SCHECK_PARTIAL();
01671           cur_is_word = FALSE;
01672           }
01673         else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
01674         }
01675 
01676       /* Now see if the situation is what we want */
01677 
01678       if ((*ecode++ == OP_WORD_BOUNDARY)?
01679            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
01680         RRETURN(MATCH_NOMATCH);
01681       }
01682     break;
01683 
01684     /* Match a single character type; inline for speed */
01685 
01686     case OP_ANY:
01687     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
01688     /* Fall through */
01689 
01690     case OP_ALLANY:
01691     if (eptr++ >= md->end_subject)
01692       {
01693       SCHECK_PARTIAL();
01694       RRETURN(MATCH_NOMATCH);
01695       }
01696     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
01697     ecode++;
01698     break;
01699 
01700     /* Match a single byte, even in UTF-8 mode. This opcode really does match
01701     any byte, even newline, independent of the setting of PCRE_DOTALL. */
01702 
01703     case OP_ANYBYTE:
01704     if (eptr++ >= md->end_subject)
01705       {
01706       SCHECK_PARTIAL();
01707       RRETURN(MATCH_NOMATCH);
01708       }
01709     ecode++;
01710     break;
01711 
01712     case OP_NOT_DIGIT:
01713     if (eptr >= md->end_subject)
01714       {
01715       SCHECK_PARTIAL();
01716       RRETURN(MATCH_NOMATCH);
01717       }
01718     GETCHARINCTEST(c, eptr);
01719     if (
01720 #ifdef SUPPORT_UTF8
01721        c < 256 &&
01722 #endif
01723        (md->ctypes[c] & ctype_digit) != 0
01724        )
01725       RRETURN(MATCH_NOMATCH);
01726     ecode++;
01727     break;
01728 
01729     case OP_DIGIT:
01730     if (eptr >= md->end_subject)
01731       {
01732       SCHECK_PARTIAL();
01733       RRETURN(MATCH_NOMATCH);
01734       }
01735     GETCHARINCTEST(c, eptr);
01736     if (
01737 #ifdef SUPPORT_UTF8
01738        c >= 256 ||
01739 #endif
01740        (md->ctypes[c] & ctype_digit) == 0
01741        )
01742       RRETURN(MATCH_NOMATCH);
01743     ecode++;
01744     break;
01745 
01746     case OP_NOT_WHITESPACE:
01747     if (eptr >= md->end_subject)
01748       {
01749       SCHECK_PARTIAL();
01750       RRETURN(MATCH_NOMATCH);
01751       }
01752     GETCHARINCTEST(c, eptr);
01753     if (
01754 #ifdef SUPPORT_UTF8
01755        c < 256 &&
01756 #endif
01757        (md->ctypes[c] & ctype_space) != 0
01758        )
01759       RRETURN(MATCH_NOMATCH);
01760     ecode++;
01761     break;
01762 
01763     case OP_WHITESPACE:
01764     if (eptr >= md->end_subject)
01765       {
01766       SCHECK_PARTIAL();
01767       RRETURN(MATCH_NOMATCH);
01768       }
01769     GETCHARINCTEST(c, eptr);
01770     if (
01771 #ifdef SUPPORT_UTF8
01772        c >= 256 ||
01773 #endif
01774        (md->ctypes[c] & ctype_space) == 0
01775        )
01776       RRETURN(MATCH_NOMATCH);
01777     ecode++;
01778     break;
01779 
01780     case OP_NOT_WORDCHAR:
01781     if (eptr >= md->end_subject)
01782       {
01783       SCHECK_PARTIAL();
01784       RRETURN(MATCH_NOMATCH);
01785       }
01786     GETCHARINCTEST(c, eptr);
01787     if (
01788 #ifdef SUPPORT_UTF8
01789        c < 256 &&
01790 #endif
01791        (md->ctypes[c] & ctype_word) != 0
01792        )
01793       RRETURN(MATCH_NOMATCH);
01794     ecode++;
01795     break;
01796 
01797     case OP_WORDCHAR:
01798     if (eptr >= md->end_subject)
01799       {
01800       SCHECK_PARTIAL();
01801       RRETURN(MATCH_NOMATCH);
01802       }
01803     GETCHARINCTEST(c, eptr);
01804     if (
01805 #ifdef SUPPORT_UTF8
01806        c >= 256 ||
01807 #endif
01808        (md->ctypes[c] & ctype_word) == 0
01809        )
01810       RRETURN(MATCH_NOMATCH);
01811     ecode++;
01812     break;
01813 
01814     case OP_ANYNL:
01815     if (eptr >= md->end_subject)
01816       {
01817       SCHECK_PARTIAL();
01818       RRETURN(MATCH_NOMATCH);
01819       }
01820     GETCHARINCTEST(c, eptr);
01821     switch(c)
01822       {
01823       default: RRETURN(MATCH_NOMATCH);
01824       case 0x000d:
01825       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
01826       break;
01827 
01828       case 0x000a:
01829       break;
01830 
01831       case 0x000b:
01832       case 0x000c:
01833       case 0x0085:
01834       case 0x2028:
01835       case 0x2029:
01836       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
01837       break;
01838       }
01839     ecode++;
01840     break;
01841 
01842     case OP_NOT_HSPACE:
01843     if (eptr >= md->end_subject)
01844       {
01845       SCHECK_PARTIAL();
01846       RRETURN(MATCH_NOMATCH);
01847       }
01848     GETCHARINCTEST(c, eptr);
01849     switch(c)
01850       {
01851       default: break;
01852       case 0x09:      /* HT */
01853       case 0x20:      /* SPACE */
01854       case 0xa0:      /* NBSP */
01855       case 0x1680:    /* OGHAM SPACE MARK */
01856       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
01857       case 0x2000:    /* EN QUAD */
01858       case 0x2001:    /* EM QUAD */
01859       case 0x2002:    /* EN SPACE */
01860       case 0x2003:    /* EM SPACE */
01861       case 0x2004:    /* THREE-PER-EM SPACE */
01862       case 0x2005:    /* FOUR-PER-EM SPACE */
01863       case 0x2006:    /* SIX-PER-EM SPACE */
01864       case 0x2007:    /* FIGURE SPACE */
01865       case 0x2008:    /* PUNCTUATION SPACE */
01866       case 0x2009:    /* THIN SPACE */
01867       case 0x200A:    /* HAIR SPACE */
01868       case 0x202f:    /* NARROW NO-BREAK SPACE */
01869       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
01870       case 0x3000:    /* IDEOGRAPHIC SPACE */
01871       RRETURN(MATCH_NOMATCH);
01872       }
01873     ecode++;
01874     break;
01875 
01876     case OP_HSPACE:
01877     if (eptr >= md->end_subject)
01878       {
01879       SCHECK_PARTIAL();
01880       RRETURN(MATCH_NOMATCH);
01881       }
01882     GETCHARINCTEST(c, eptr);
01883     switch(c)
01884       {
01885       default: RRETURN(MATCH_NOMATCH);
01886       case 0x09:      /* HT */
01887       case 0x20:      /* SPACE */
01888       case 0xa0:      /* NBSP */
01889       case 0x1680:    /* OGHAM SPACE MARK */
01890       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
01891       case 0x2000:    /* EN QUAD */
01892       case 0x2001:    /* EM QUAD */
01893       case 0x2002:    /* EN SPACE */
01894       case 0x2003:    /* EM SPACE */
01895       case 0x2004:    /* THREE-PER-EM SPACE */
01896       case 0x2005:    /* FOUR-PER-EM SPACE */
01897       case 0x2006:    /* SIX-PER-EM SPACE */
01898       case 0x2007:    /* FIGURE SPACE */
01899       case 0x2008:    /* PUNCTUATION SPACE */
01900       case 0x2009:    /* THIN SPACE */
01901       case 0x200A:    /* HAIR SPACE */
01902       case 0x202f:    /* NARROW NO-BREAK SPACE */
01903       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
01904       case 0x3000:    /* IDEOGRAPHIC SPACE */
01905       break;
01906       }
01907     ecode++;
01908     break;
01909 
01910     case OP_NOT_VSPACE:
01911     if (eptr >= md->end_subject)
01912       {
01913       SCHECK_PARTIAL();
01914       RRETURN(MATCH_NOMATCH);
01915       }
01916     GETCHARINCTEST(c, eptr);
01917     switch(c)
01918       {
01919       default: break;
01920       case 0x0a:      /* LF */
01921       case 0x0b:      /* VT */
01922       case 0x0c:      /* FF */
01923       case 0x0d:      /* CR */
01924       case 0x85:      /* NEL */
01925       case 0x2028:    /* LINE SEPARATOR */
01926       case 0x2029:    /* PARAGRAPH SEPARATOR */
01927       RRETURN(MATCH_NOMATCH);
01928       }
01929     ecode++;
01930     break;
01931 
01932     case OP_VSPACE:
01933     if (eptr >= md->end_subject)
01934       {
01935       SCHECK_PARTIAL();
01936       RRETURN(MATCH_NOMATCH);
01937       }
01938     GETCHARINCTEST(c, eptr);
01939     switch(c)
01940       {
01941       default: RRETURN(MATCH_NOMATCH);
01942       case 0x0a:      /* LF */
01943       case 0x0b:      /* VT */
01944       case 0x0c:      /* FF */
01945       case 0x0d:      /* CR */
01946       case 0x85:      /* NEL */
01947       case 0x2028:    /* LINE SEPARATOR */
01948       case 0x2029:    /* PARAGRAPH SEPARATOR */
01949       break;
01950       }
01951     ecode++;
01952     break;
01953 
01954 #ifdef SUPPORT_UCP
01955     /* Check the next character by Unicode property. We will get here only
01956     if the support is in the binary; otherwise a compile-time error occurs. */
01957 
01958     case OP_PROP:
01959     case OP_NOTPROP:
01960     if (eptr >= md->end_subject)
01961       {
01962       SCHECK_PARTIAL();
01963       RRETURN(MATCH_NOMATCH);
01964       }
01965     GETCHARINCTEST(c, eptr);
01966       {
01967       const ucd_record *prop = GET_UCD(c);
01968 
01969       switch(ecode[1])
01970         {
01971         case PT_ANY:
01972         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
01973         break;
01974 
01975         case PT_LAMP:
01976         if ((prop->chartype == ucp_Lu ||
01977              prop->chartype == ucp_Ll ||
01978              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
01979           RRETURN(MATCH_NOMATCH);
01980          break;
01981 
01982         case PT_GC:
01983         if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
01984           RRETURN(MATCH_NOMATCH);
01985         break;
01986 
01987         case PT_PC:
01988         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
01989           RRETURN(MATCH_NOMATCH);
01990         break;
01991 
01992         case PT_SC:
01993         if ((ecode[2] != prop->script) == (op == OP_PROP))
01994           RRETURN(MATCH_NOMATCH);
01995         break;
01996 
01997         default:
01998         RRETURN(PCRE_ERROR_INTERNAL);
01999         }
02000 
02001       ecode += 3;
02002       }
02003     break;
02004 
02005     /* Match an extended Unicode sequence. We will get here only if the support
02006     is in the binary; otherwise a compile-time error occurs. */
02007 
02008     case OP_EXTUNI:
02009     if (eptr >= md->end_subject)
02010       {
02011       SCHECK_PARTIAL();
02012       RRETURN(MATCH_NOMATCH);
02013       }
02014     GETCHARINCTEST(c, eptr);
02015       {
02016       int category = UCD_CATEGORY(c);
02017       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
02018       while (eptr < md->end_subject)
02019         {
02020         int len = 1;
02021         if (!utf8) c = *eptr; else
02022           {
02023           GETCHARLEN(c, eptr, len);
02024           }
02025         category = UCD_CATEGORY(c);
02026         if (category != ucp_M) break;
02027         eptr += len;
02028         }
02029       }
02030     ecode++;
02031     break;
02032 #endif
02033 
02034 
02035     /* Match a back reference, possibly repeatedly. Look past the end of the
02036     item to see if there is repeat information following. The code is similar
02037     to that for character classes, but repeated for efficiency. Then obey
02038     similar code to character type repeats - written out again for speed.
02039     However, if the referenced string is the empty string, always treat
02040     it as matched, any number of times (otherwise there could be infinite
02041     loops). */
02042 
02043     case OP_REF:
02044       {
02045       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
02046       ecode += 3;
02047 
02048       /* If the reference is unset, there are two possibilities:
02049 
02050       (a) In the default, Perl-compatible state, set the length to be longer
02051       than the amount of subject left; this ensures that every attempt at a
02052       match fails. We can't just fail here, because of the possibility of
02053       quantifiers with zero minima.
02054 
02055       (b) If the JavaScript compatibility flag is set, set the length to zero
02056       so that the back reference matches an empty string.
02057 
02058       Otherwise, set the length to the length of what was matched by the
02059       referenced subpattern. */
02060 
02061       if (offset >= offset_top || md->offset_vector[offset] < 0)
02062         length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
02063       else
02064         length = md->offset_vector[offset+1] - md->offset_vector[offset];
02065 
02066       /* Set up for repetition, or handle the non-repeated case */
02067 
02068       switch (*ecode)
02069         {
02070         case OP_CRSTAR:
02071         case OP_CRMINSTAR:
02072         case OP_CRPLUS:
02073         case OP_CRMINPLUS:
02074         case OP_CRQUERY:
02075         case OP_CRMINQUERY:
02076         c = *ecode++ - OP_CRSTAR;
02077         minimize = (c & 1) != 0;
02078         min = rep_min[c];                 /* Pick up values from tables; */
02079         max = rep_max[c];                 /* zero for max => infinity */
02080         if (max == 0) max = INT_MAX;
02081         break;
02082 
02083         case OP_CRRANGE:
02084         case OP_CRMINRANGE:
02085         minimize = (*ecode == OP_CRMINRANGE);
02086         min = GET2(ecode, 1);
02087         max = GET2(ecode, 3);
02088         if (max == 0) max = INT_MAX;
02089         ecode += 5;
02090         break;
02091 
02092         default:               /* No repeat follows */
02093         if (!match_ref(offset, eptr, length, md, ims))
02094           {
02095           CHECK_PARTIAL();
02096           RRETURN(MATCH_NOMATCH);
02097           }
02098         eptr += length;
02099         continue;              /* With the main loop */
02100         }
02101 
02102       /* If the length of the reference is zero, just continue with the
02103       main loop. */
02104 
02105       if (length == 0) continue;
02106 
02107       /* First, ensure the minimum number of matches are present. We get back
02108       the length of the reference string explicitly rather than passing the
02109       address of eptr, so that eptr can be a register variable. */
02110 
02111       for (i = 1; i <= min; i++)
02112         {
02113         if (!match_ref(offset, eptr, length, md, ims))
02114           {
02115           CHECK_PARTIAL();
02116           RRETURN(MATCH_NOMATCH);
02117           }
02118         eptr += length;
02119         }
02120 
02121       /* If min = max, continue at the same level without recursion.
02122       They are not both allowed to be zero. */
02123 
02124       if (min == max) continue;
02125 
02126       /* If minimizing, keep trying and advancing the pointer */
02127 
02128       if (minimize)
02129         {
02130         for (fi = min;; fi++)
02131           {
02132           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
02133           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02134           if (fi >= max) RRETURN(MATCH_NOMATCH);
02135           if (!match_ref(offset, eptr, length, md, ims))
02136             {
02137             CHECK_PARTIAL();
02138             RRETURN(MATCH_NOMATCH);
02139             }
02140           eptr += length;
02141           }
02142         /* Control never gets here */
02143         }
02144 
02145       /* If maximizing, find the longest string and work backwards */
02146 
02147       else
02148         {
02149         pp = eptr;
02150         for (i = min; i < max; i++)
02151           {
02152           if (!match_ref(offset, eptr, length, md, ims))
02153             {
02154             CHECK_PARTIAL();
02155             break;
02156             }
02157           eptr += length;
02158           }
02159         while (eptr >= pp)
02160           {
02161           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
02162           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02163           eptr -= length;
02164           }
02165         RRETURN(MATCH_NOMATCH);
02166         }
02167       }
02168     /* Control never gets here */
02169 
02170     /* Match a bit-mapped character class, possibly repeatedly. This op code is
02171     used when all the characters in the class have values in the range 0-255,
02172     and either the matching is caseful, or the characters are in the range
02173     0-127 when UTF-8 processing is enabled. The only difference between
02174     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
02175     encountered.
02176 
02177     First, look past the end of the item to see if there is repeat information
02178     following. Then obey similar code to character type repeats - written out
02179     again for speed. */
02180 
02181     case OP_NCLASS:
02182     case OP_CLASS:
02183       {
02184       data = ecode + 1;                /* Save for matching */
02185       ecode += 33;                     /* Advance past the item */
02186 
02187       switch (*ecode)
02188         {
02189         case OP_CRSTAR:
02190         case OP_CRMINSTAR:
02191         case OP_CRPLUS:
02192         case OP_CRMINPLUS:
02193         case OP_CRQUERY:
02194         case OP_CRMINQUERY:
02195         c = *ecode++ - OP_CRSTAR;
02196         minimize = (c & 1) != 0;
02197         min = rep_min[c];                 /* Pick up values from tables; */
02198         max = rep_max[c];                 /* zero for max => infinity */
02199         if (max == 0) max = INT_MAX;
02200         break;
02201 
02202         case OP_CRRANGE:
02203         case OP_CRMINRANGE:
02204         minimize = (*ecode == OP_CRMINRANGE);
02205         min = GET2(ecode, 1);
02206         max = GET2(ecode, 3);
02207         if (max == 0) max = INT_MAX;
02208         ecode += 5;
02209         break;
02210 
02211         default:               /* No repeat follows */
02212         min = max = 1;
02213         break;
02214         }
02215 
02216       /* First, ensure the minimum number of matches are present. */
02217 
02218 #ifdef SUPPORT_UTF8
02219       /* UTF-8 mode */
02220       if (utf8)
02221         {
02222         for (i = 1; i <= min; i++)
02223           {
02224           if (eptr >= md->end_subject)
02225             {
02226             SCHECK_PARTIAL();
02227             RRETURN(MATCH_NOMATCH);
02228             }
02229           GETCHARINC(c, eptr);
02230           if (c > 255)
02231             {
02232             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
02233             }
02234           else
02235             {
02236             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
02237             }
02238           }
02239         }
02240       else
02241 #endif
02242       /* Not UTF-8 mode */
02243         {
02244         for (i = 1; i <= min; i++)
02245           {
02246           if (eptr >= md->end_subject)
02247             {
02248             SCHECK_PARTIAL();
02249             RRETURN(MATCH_NOMATCH);
02250             }
02251           c = *eptr++;
02252           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
02253           }
02254         }
02255 
02256       /* If max == min we can continue with the main loop without the
02257       need to recurse. */
02258 
02259       if (min == max) continue;
02260 
02261       /* If minimizing, keep testing the rest of the expression and advancing
02262       the pointer while it matches the class. */
02263 
02264       if (minimize)
02265         {
02266 #ifdef SUPPORT_UTF8
02267         /* UTF-8 mode */
02268         if (utf8)
02269           {
02270           for (fi = min;; fi++)
02271             {
02272             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
02273             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02274             if (fi >= max) RRETURN(MATCH_NOMATCH);
02275             if (eptr >= md->end_subject)
02276               {
02277               SCHECK_PARTIAL();
02278               RRETURN(MATCH_NOMATCH);
02279               }
02280             GETCHARINC(c, eptr);
02281             if (c > 255)
02282               {
02283               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
02284               }
02285             else
02286               {
02287               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
02288               }
02289             }
02290           }
02291         else
02292 #endif
02293         /* Not UTF-8 mode */
02294           {
02295           for (fi = min;; fi++)
02296             {
02297             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
02298             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02299             if (fi >= max) RRETURN(MATCH_NOMATCH);
02300             if (eptr >= md->end_subject)
02301               {
02302               SCHECK_PARTIAL();
02303               RRETURN(MATCH_NOMATCH);
02304               }
02305             c = *eptr++;
02306             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
02307             }
02308           }
02309         /* Control never gets here */
02310         }
02311 
02312       /* If maximizing, find the longest possible run, then work backwards. */
02313 
02314       else
02315         {
02316         pp = eptr;
02317 
02318 #ifdef SUPPORT_UTF8
02319         /* UTF-8 mode */
02320         if (utf8)
02321           {
02322           for (i = min; i < max; i++)
02323             {
02324             int len = 1;
02325             if (eptr >= md->end_subject)
02326               {
02327               SCHECK_PARTIAL();
02328               break;
02329               }
02330             GETCHARLEN(c, eptr, len);
02331             if (c > 255)
02332               {
02333               if (op == OP_CLASS) break;
02334               }
02335             else
02336               {
02337               if ((data[c/8] & (1 << (c&7))) == 0) break;
02338               }
02339             eptr += len;
02340             }
02341           for (;;)
02342             {
02343             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
02344             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02345             if (eptr-- == pp) break;        /* Stop if tried at original pos */
02346             BACKCHAR(eptr);
02347             }
02348           }
02349         else
02350 #endif
02351           /* Not UTF-8 mode */
02352           {
02353           for (i = min; i < max; i++)
02354             {
02355             if (eptr >= md->end_subject)
02356               {
02357               SCHECK_PARTIAL();
02358               break;
02359               }
02360             c = *eptr;
02361             if ((data[c/8] & (1 << (c&7))) == 0) break;
02362             eptr++;
02363             }
02364           while (eptr >= pp)
02365             {
02366             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
02367             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02368             eptr--;
02369             }
02370           }
02371 
02372         RRETURN(MATCH_NOMATCH);
02373         }
02374       }
02375     /* Control never gets here */
02376 
02377 
02378     /* Match an extended character class. This opcode is encountered only
02379     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
02380     mode, because Unicode properties are supported in non-UTF-8 mode. */
02381 
02382 #ifdef SUPPORT_UTF8
02383     case OP_XCLASS:
02384       {
02385       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
02386       ecode += GET(ecode, 1);                      /* Advance past the item */
02387 
02388       switch (*ecode)
02389         {
02390         case OP_CRSTAR:
02391         case OP_CRMINSTAR:
02392         case OP_CRPLUS:
02393         case OP_CRMINPLUS:
02394         case OP_CRQUERY:
02395         case OP_CRMINQUERY:
02396         c = *ecode++ - OP_CRSTAR;
02397         minimize = (c & 1) != 0;
02398         min = rep_min[c];                 /* Pick up values from tables; */
02399         max = rep_max[c];                 /* zero for max => infinity */
02400         if (max == 0) max = INT_MAX;
02401         break;
02402 
02403         case OP_CRRANGE:
02404         case OP_CRMINRANGE:
02405         minimize = (*ecode == OP_CRMINRANGE);
02406         min = GET2(ecode, 1);
02407         max = GET2(ecode, 3);
02408         if (max == 0) max = INT_MAX;
02409         ecode += 5;
02410         break;
02411 
02412         default:               /* No repeat follows */
02413         min = max = 1;
02414         break;
02415         }
02416 
02417       /* First, ensure the minimum number of matches are present. */
02418 
02419       for (i = 1; i <= min; i++)
02420         {
02421         if (eptr >= md->end_subject)
02422           {
02423           SCHECK_PARTIAL();
02424           RRETURN(MATCH_NOMATCH);
02425           }
02426         GETCHARINCTEST(c, eptr);
02427         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
02428         }
02429 
02430       /* If max == min we can continue with the main loop without the
02431       need to recurse. */
02432 
02433       if (min == max) continue;
02434 
02435       /* If minimizing, keep testing the rest of the expression and advancing
02436       the pointer while it matches the class. */
02437 
02438       if (minimize)
02439         {
02440         for (fi = min;; fi++)
02441           {
02442           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
02443           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02444           if (fi >= max) RRETURN(MATCH_NOMATCH);
02445           if (eptr >= md->end_subject)
02446             {
02447             SCHECK_PARTIAL();
02448             RRETURN(MATCH_NOMATCH);
02449             }
02450           GETCHARINCTEST(c, eptr);
02451           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
02452           }
02453         /* Control never gets here */
02454         }
02455 
02456       /* If maximizing, find the longest possible run, then work backwards. */
02457 
02458       else
02459         {
02460         pp = eptr;
02461         for (i = min; i < max; i++)
02462           {
02463           int len = 1;
02464           if (eptr >= md->end_subject)
02465             {
02466             SCHECK_PARTIAL();
02467             break;
02468             }
02469           GETCHARLENTEST(c, eptr, len);
02470           if (!_pcre_xclass(c, data)) break;
02471           eptr += len;
02472           }
02473         for(;;)
02474           {
02475           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
02476           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02477           if (eptr-- == pp) break;        /* Stop if tried at original pos */
02478           if (utf8) BACKCHAR(eptr);
02479           }
02480         RRETURN(MATCH_NOMATCH);
02481         }
02482 
02483       /* Control never gets here */
02484       }
02485 #endif    /* End of XCLASS */
02486 
02487     /* Match a single character, casefully */
02488 
02489     case OP_CHAR:
02490 #ifdef SUPPORT_UTF8
02491     if (utf8)
02492       {
02493       length = 1;
02494       ecode++;
02495       GETCHARLEN(fc, ecode, length);
02496       if (length > md->end_subject - eptr)
02497         {
02498         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
02499         RRETURN(MATCH_NOMATCH);
02500         }
02501       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
02502       }
02503     else
02504 #endif
02505 
02506     /* Non-UTF-8 mode */
02507       {
02508       if (md->end_subject - eptr < 1)
02509         {
02510         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
02511         RRETURN(MATCH_NOMATCH);
02512         }
02513       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
02514       ecode += 2;
02515       }
02516     break;
02517 
02518     /* Match a single character, caselessly */
02519 
02520     case OP_CHARNC:
02521 #ifdef SUPPORT_UTF8
02522     if (utf8)
02523       {
02524       length = 1;
02525       ecode++;
02526       GETCHARLEN(fc, ecode, length);
02527 
02528       if (length > md->end_subject - eptr)
02529         {
02530         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
02531         RRETURN(MATCH_NOMATCH);
02532         }
02533 
02534       /* If the pattern character's value is < 128, we have only one byte, and
02535       can use the fast lookup table. */
02536 
02537       if (fc < 128)
02538         {
02539         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
02540         }
02541 
02542       /* Otherwise we must pick up the subject character */
02543 
02544       else
02545         {
02546         unsigned int dc;
02547         GETCHARINC(dc, eptr);
02548         ecode += length;
02549 
02550         /* If we have Unicode property support, we can use it to test the other
02551         case of the character, if there is one. */
02552 
02553         if (fc != dc)
02554           {
02555 #ifdef SUPPORT_UCP
02556           if (dc != UCD_OTHERCASE(fc))
02557 #endif
02558             RRETURN(MATCH_NOMATCH);
02559           }
02560         }
02561       }
02562     else
02563 #endif   /* SUPPORT_UTF8 */
02564 
02565     /* Non-UTF-8 mode */
02566       {
02567       if (md->end_subject - eptr < 1)
02568         {
02569         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
02570         RRETURN(MATCH_NOMATCH);
02571         }
02572       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
02573       ecode += 2;
02574       }
02575     break;
02576 
02577     /* Match a single character repeatedly. */
02578 
02579     case OP_EXACT:
02580     min = max = GET2(ecode, 1);
02581     ecode += 3;
02582     goto REPEATCHAR;
02583 
02584     case OP_POSUPTO:
02585     possessive = TRUE;
02586     /* Fall through */
02587 
02588     case OP_UPTO:
02589     case OP_MINUPTO:
02590     min = 0;
02591     max = GET2(ecode, 1);
02592     minimize = *ecode == OP_MINUPTO;
02593     ecode += 3;
02594     goto REPEATCHAR;
02595 
02596     case OP_POSSTAR:
02597     possessive = TRUE;
02598     min = 0;
02599     max = INT_MAX;
02600     ecode++;
02601     goto REPEATCHAR;
02602 
02603     case OP_POSPLUS:
02604     possessive = TRUE;
02605     min = 1;
02606     max = INT_MAX;
02607     ecode++;
02608     goto REPEATCHAR;
02609 
02610     case OP_POSQUERY:
02611     possessive = TRUE;
02612     min = 0;
02613     max = 1;
02614     ecode++;
02615     goto REPEATCHAR;
02616 
02617     case OP_STAR:
02618     case OP_MINSTAR:
02619     case OP_PLUS:
02620     case OP_MINPLUS:
02621     case OP_QUERY:
02622     case OP_MINQUERY:
02623     c = *ecode++ - OP_STAR;
02624     minimize = (c & 1) != 0;
02625 
02626     min = rep_min[c];                 /* Pick up values from tables; */
02627     max = rep_max[c];                 /* zero for max => infinity */
02628     if (max == 0) max = INT_MAX;
02629 
02630     /* Common code for all repeated single-character matches. */
02631 
02632     REPEATCHAR:
02633 #ifdef SUPPORT_UTF8
02634     if (utf8)
02635       {
02636       length = 1;
02637       charptr = ecode;
02638       GETCHARLEN(fc, ecode, length);
02639       ecode += length;
02640 
02641       /* Handle multibyte character matching specially here. There is
02642       support for caseless matching if UCP support is present. */
02643 
02644       if (length > 1)
02645         {
02646 #ifdef SUPPORT_UCP
02647         unsigned int othercase;
02648         if ((ims & PCRE_CASELESS) != 0 &&
02649             (othercase = UCD_OTHERCASE(fc)) != fc)
02650           oclength = _pcre_ord2utf8(othercase, occhars);
02651         else oclength = 0;
02652 #endif  /* SUPPORT_UCP */
02653 
02654         for (i = 1; i <= min; i++)
02655           {
02656           if (eptr <= md->end_subject - length &&
02657             memcmp(eptr, charptr, length) == 0) eptr += length;
02658 #ifdef SUPPORT_UCP
02659           else if (oclength > 0 &&
02660                    eptr <= md->end_subject - oclength &&
02661                    memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
02662 #endif  /* SUPPORT_UCP */
02663           else
02664             {
02665             CHECK_PARTIAL();
02666             RRETURN(MATCH_NOMATCH);
02667             }
02668           }
02669 
02670         if (min == max) continue;
02671 
02672         if (minimize)
02673           {
02674           for (fi = min;; fi++)
02675             {
02676             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
02677             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02678             if (fi >= max) RRETURN(MATCH_NOMATCH);
02679             if (eptr <= md->end_subject - length &&
02680               memcmp(eptr, charptr, length) == 0) eptr += length;
02681 #ifdef SUPPORT_UCP
02682             else if (oclength > 0 &&
02683                      eptr <= md->end_subject - oclength &&
02684                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
02685 #endif  /* SUPPORT_UCP */
02686             else
02687               {
02688               CHECK_PARTIAL();
02689               RRETURN(MATCH_NOMATCH);
02690               }
02691             }
02692           /* Control never gets here */
02693           }
02694 
02695         else  /* Maximize */
02696           {
02697           pp = eptr;
02698           for (i = min; i < max; i++)
02699             {
02700             if (eptr <= md->end_subject - length &&
02701                 memcmp(eptr, charptr, length) == 0) eptr += length;
02702 #ifdef SUPPORT_UCP
02703             else if (oclength > 0 &&
02704                      eptr <= md->end_subject - oclength &&
02705                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
02706 #endif  /* SUPPORT_UCP */
02707             else
02708               {
02709               CHECK_PARTIAL();
02710               break;
02711               }
02712             }
02713 
02714           if (possessive) continue;
02715 
02716           for(;;)
02717             {
02718             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
02719             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02720             if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
02721 #ifdef SUPPORT_UCP
02722             eptr--;
02723             BACKCHAR(eptr);
02724 #else   /* without SUPPORT_UCP */
02725             eptr -= length;
02726 #endif  /* SUPPORT_UCP */
02727             }
02728           }
02729         /* Control never gets here */
02730         }
02731 
02732       /* If the length of a UTF-8 character is 1, we fall through here, and
02733       obey the code as for non-UTF-8 characters below, though in this case the
02734       value of fc will always be < 128. */
02735       }
02736     else
02737 #endif  /* SUPPORT_UTF8 */
02738 
02739     /* When not in UTF-8 mode, load a single-byte character. */
02740 
02741     fc = *ecode++;
02742 
02743     /* The value of fc at this point is always less than 256, though we may or
02744     may not be in UTF-8 mode. The code is duplicated for the caseless and
02745     caseful cases, for speed, since matching characters is likely to be quite
02746     common. First, ensure the minimum number of matches are present. If min =
02747     max, continue at the same level without recursing. Otherwise, if
02748     minimizing, keep trying the rest of the expression and advancing one
02749     matching character if failing, up to the maximum. Alternatively, if
02750     maximizing, find the maximum number of characters and work backwards. */
02751 
02752     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
02753       max, eptr));
02754 
02755     if ((ims & PCRE_CASELESS) != 0)
02756       {
02757       fc = md->lcc[fc];
02758       for (i = 1; i <= min; i++)
02759         {
02760         if (eptr >= md->end_subject)
02761           {
02762           SCHECK_PARTIAL();
02763           RRETURN(MATCH_NOMATCH);
02764           }
02765         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
02766         }
02767       if (min == max) continue;
02768       if (minimize)
02769         {
02770         for (fi = min;; fi++)
02771           {
02772           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
02773           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02774           if (fi >= max) RRETURN(MATCH_NOMATCH);
02775           if (eptr >= md->end_subject)
02776             {
02777             SCHECK_PARTIAL();
02778             RRETURN(MATCH_NOMATCH);
02779             }
02780           if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
02781           }
02782         /* Control never gets here */
02783         }
02784       else  /* Maximize */
02785         {
02786         pp = eptr;
02787         for (i = min; i < max; i++)
02788           {
02789           if (eptr >= md->end_subject)
02790             {
02791             SCHECK_PARTIAL();
02792             break;
02793             }
02794           if (fc != md->lcc[*eptr]) break;
02795           eptr++;
02796           }
02797 
02798         if (possessive) continue;
02799 
02800         while (eptr >= pp)
02801           {
02802           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
02803           eptr--;
02804           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02805           }
02806         RRETURN(MATCH_NOMATCH);
02807         }
02808       /* Control never gets here */
02809       }
02810 
02811     /* Caseful comparisons (includes all multi-byte characters) */
02812 
02813     else
02814       {
02815       for (i = 1; i <= min; i++)
02816         {
02817         if (eptr >= md->end_subject)
02818           {
02819           SCHECK_PARTIAL();
02820           RRETURN(MATCH_NOMATCH);
02821           }
02822         if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
02823         }
02824 
02825       if (min == max) continue;
02826 
02827       if (minimize)
02828         {
02829         for (fi = min;; fi++)
02830           {
02831           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
02832           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02833           if (fi >= max) RRETURN(MATCH_NOMATCH);
02834           if (eptr >= md->end_subject)
02835             {
02836             SCHECK_PARTIAL();
02837             RRETURN(MATCH_NOMATCH);
02838             }
02839           if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
02840           }
02841         /* Control never gets here */
02842         }
02843       else  /* Maximize */
02844         {
02845         pp = eptr;
02846         for (i = min; i < max; i++)
02847           {
02848           if (eptr >= md->end_subject)
02849             {
02850             SCHECK_PARTIAL();
02851             break;
02852             }
02853           if (fc != *eptr) break;
02854           eptr++;
02855           }
02856         if (possessive) continue;
02857 
02858         while (eptr >= pp)
02859           {
02860           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
02861           eptr--;
02862           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02863           }
02864         RRETURN(MATCH_NOMATCH);
02865         }
02866       }
02867     /* Control never gets here */
02868 
02869     /* Match a negated single one-byte character. The character we are
02870     checking can be multibyte. */
02871 
02872     case OP_NOT:
02873     if (eptr >= md->end_subject)
02874       {
02875       SCHECK_PARTIAL();
02876       RRETURN(MATCH_NOMATCH);
02877       }
02878     ecode++;
02879     GETCHARINCTEST(c, eptr);
02880     if ((ims & PCRE_CASELESS) != 0)
02881       {
02882 #ifdef SUPPORT_UTF8
02883       if (c < 256)
02884 #endif
02885       c = md->lcc[c];
02886       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
02887       }
02888     else
02889       {
02890       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
02891       }
02892     break;
02893 
02894     /* Match a negated single one-byte character repeatedly. This is almost a
02895     repeat of the code for a repeated single character, but I haven't found a
02896     nice way of commoning these up that doesn't require a test of the
02897     positive/negative option for each character match. Maybe that wouldn't add
02898     very much to the time taken, but character matching *is* what this is all
02899     about... */
02900 
02901     case OP_NOTEXACT:
02902     min = max = GET2(ecode, 1);
02903     ecode += 3;
02904     goto REPEATNOTCHAR;
02905 
02906     case OP_NOTUPTO:
02907     case OP_NOTMINUPTO:
02908     min = 0;
02909     max = GET2(ecode, 1);
02910     minimize = *ecode == OP_NOTMINUPTO;
02911     ecode += 3;
02912     goto REPEATNOTCHAR;
02913 
02914     case OP_NOTPOSSTAR:
02915     possessive = TRUE;
02916     min = 0;
02917     max = INT_MAX;
02918     ecode++;
02919     goto REPEATNOTCHAR;
02920 
02921     case OP_NOTPOSPLUS:
02922     possessive = TRUE;
02923     min = 1;
02924     max = INT_MAX;
02925     ecode++;
02926     goto REPEATNOTCHAR;
02927 
02928     case OP_NOTPOSQUERY:
02929     possessive = TRUE;
02930     min = 0;
02931     max = 1;
02932     ecode++;
02933     goto REPEATNOTCHAR;
02934 
02935     case OP_NOTPOSUPTO:
02936     possessive = TRUE;
02937     min = 0;
02938     max = GET2(ecode, 1);
02939     ecode += 3;
02940     goto REPEATNOTCHAR;
02941 
02942     case OP_NOTSTAR:
02943     case OP_NOTMINSTAR:
02944     case OP_NOTPLUS:
02945     case OP_NOTMINPLUS:
02946     case OP_NOTQUERY:
02947     case OP_NOTMINQUERY:
02948     c = *ecode++ - OP_NOTSTAR;
02949     minimize = (c & 1) != 0;
02950     min = rep_min[c];                 /* Pick up values from tables; */
02951     max = rep_max[c];                 /* zero for max => infinity */
02952     if (max == 0) max = INT_MAX;
02953 
02954     /* Common code for all repeated single-byte matches. */
02955 
02956     REPEATNOTCHAR:
02957     fc = *ecode++;
02958 
02959     /* The code is duplicated for the caseless and caseful cases, for speed,
02960     since matching characters is likely to be quite common. First, ensure the
02961     minimum number of matches are present. If min = max, continue at the same
02962     level without recursing. Otherwise, if minimizing, keep trying the rest of
02963     the expression and advancing one matching character if failing, up to the
02964     maximum. Alternatively, if maximizing, find the maximum number of
02965     characters and work backwards. */
02966 
02967     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
02968       max, eptr));
02969 
02970     if ((ims & PCRE_CASELESS) != 0)
02971       {
02972       fc = md->lcc[fc];
02973 
02974 #ifdef SUPPORT_UTF8
02975       /* UTF-8 mode */
02976       if (utf8)
02977         {
02978         register unsigned int d;
02979         for (i = 1; i <= min; i++)
02980           {
02981           if (eptr >= md->end_subject)
02982             {
02983             SCHECK_PARTIAL();
02984             RRETURN(MATCH_NOMATCH);
02985             }
02986           GETCHARINC(d, eptr);
02987           if (d < 256) d = md->lcc[d];
02988           if (fc == d) RRETURN(MATCH_NOMATCH);
02989           }
02990         }
02991       else
02992 #endif
02993 
02994       /* Not UTF-8 mode */
02995         {
02996         for (i = 1; i <= min; i++)
02997           {
02998           if (eptr >= md->end_subject)
02999             {
03000             SCHECK_PARTIAL();
03001             RRETURN(MATCH_NOMATCH);
03002             }
03003           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
03004           }
03005         }
03006 
03007       if (min == max) continue;
03008 
03009       if (minimize)
03010         {
03011 #ifdef SUPPORT_UTF8
03012         /* UTF-8 mode */
03013         if (utf8)
03014           {
03015           register unsigned int d;
03016           for (fi = min;; fi++)
03017             {
03018             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
03019             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03020             if (fi >= max) RRETURN(MATCH_NOMATCH);
03021             if (eptr >= md->end_subject)
03022               {
03023               SCHECK_PARTIAL();
03024               RRETURN(MATCH_NOMATCH);
03025               }
03026             GETCHARINC(d, eptr);
03027             if (d < 256) d = md->lcc[d];
03028             if (fc == d) RRETURN(MATCH_NOMATCH);
03029             }
03030           }
03031         else
03032 #endif
03033         /* Not UTF-8 mode */
03034           {
03035           for (fi = min;; fi++)
03036             {
03037             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
03038             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03039             if (fi >= max) RRETURN(MATCH_NOMATCH);
03040             if (eptr >= md->end_subject)
03041               {
03042               SCHECK_PARTIAL();
03043               RRETURN(MATCH_NOMATCH);
03044               }
03045             if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
03046             }
03047           }
03048         /* Control never gets here */
03049         }
03050 
03051       /* Maximize case */
03052 
03053       else
03054         {
03055         pp = eptr;
03056 
03057 #ifdef SUPPORT_UTF8
03058         /* UTF-8 mode */
03059         if (utf8)
03060           {
03061           register unsigned int d;
03062           for (i = min; i < max; i++)
03063             {
03064             int len = 1;
03065             if (eptr >= md->end_subject)
03066               {
03067               SCHECK_PARTIAL();
03068               break;
03069               }
03070             GETCHARLEN(d, eptr, len);
03071             if (d < 256) d = md->lcc[d];
03072             if (fc == d) break;
03073             eptr += len;
03074             }
03075         if (possessive) continue;
03076         for(;;)
03077             {
03078             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
03079             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03080             if (eptr-- == pp) break;        /* Stop if tried at original pos */
03081             BACKCHAR(eptr);
03082             }
03083           }
03084         else
03085 #endif
03086         /* Not UTF-8 mode */
03087           {
03088           for (i = min; i < max; i++)
03089             {
03090             if (eptr >= md->end_subject)
03091               {
03092               SCHECK_PARTIAL();
03093               break;
03094               }
03095             if (fc == md->lcc[*eptr]) break;
03096             eptr++;
03097             }
03098           if (possessive) continue;
03099           while (eptr >= pp)
03100             {
03101             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
03102             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03103             eptr--;
03104             }
03105           }
03106 
03107         RRETURN(MATCH_NOMATCH);
03108         }
03109       /* Control never gets here */
03110       }
03111 
03112     /* Caseful comparisons */
03113 
03114     else
03115       {
03116 #ifdef SUPPORT_UTF8
03117       /* UTF-8 mode */
03118       if (utf8)
03119         {
03120         register unsigned int d;
03121         for (i = 1; i <= min; i++)
03122           {
03123           if (eptr >= md->end_subject)
03124             {
03125             SCHECK_PARTIAL();
03126             RRETURN(MATCH_NOMATCH);
03127             }
03128           GETCHARINC(d, eptr);
03129           if (fc == d) RRETURN(MATCH_NOMATCH);
03130           }
03131         }
03132       else
03133 #endif
03134       /* Not UTF-8 mode */
03135         {
03136         for (i = 1; i <= min; i++)
03137           {
03138           if (eptr >= md->end_subject)
03139             {
03140             SCHECK_PARTIAL();
03141             RRETURN(MATCH_NOMATCH);
03142             }
03143           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
03144           }
03145         }
03146 
03147       if (min == max) continue;
03148 
03149       if (minimize)
03150         {
03151 #ifdef SUPPORT_UTF8
03152         /* UTF-8 mode */
03153         if (utf8)
03154           {
03155           register unsigned int d;
03156           for (fi = min;; fi++)
03157             {
03158             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
03159             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03160             if (fi >= max) RRETURN(MATCH_NOMATCH);
03161             if (eptr >= md->end_subject)
03162               {
03163               SCHECK_PARTIAL();
03164               RRETURN(MATCH_NOMATCH);
03165               }
03166             GETCHARINC(d, eptr);
03167             if (fc == d) RRETURN(MATCH_NOMATCH);
03168             }
03169           }
03170         else
03171 #endif
03172         /* Not UTF-8 mode */
03173           {
03174           for (fi = min;; fi++)
03175             {
03176             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
03177             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03178             if (fi >= max) RRETURN(MATCH_NOMATCH);
03179             if (eptr >= md->end_subject)
03180               {
03181               SCHECK_PARTIAL();
03182               RRETURN(MATCH_NOMATCH);
03183               }
03184             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
03185             }
03186           }
03187         /* Control never gets here */
03188         }
03189 
03190       /* Maximize case */
03191 
03192       else
03193         {
03194         pp = eptr;
03195 
03196 #ifdef SUPPORT_UTF8
03197         /* UTF-8 mode */
03198         if (utf8)
03199           {
03200           register unsigned int d;
03201           for (i = min; i < max; i++)
03202             {
03203             int len = 1;
03204             if (eptr >= md->end_subject)
03205               {
03206               SCHECK_PARTIAL();
03207               break;
03208               }
03209             GETCHARLEN(d, eptr, len);
03210             if (fc == d) break;
03211             eptr += len;
03212             }
03213           if (possessive) continue;
03214           for(;;)
03215             {
03216             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
03217             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03218             if (eptr-- == pp) break;        /* Stop if tried at original pos */
03219             BACKCHAR(eptr);
03220             }
03221           }
03222         else
03223 #endif
03224         /* Not UTF-8 mode */
03225           {
03226           for (i = min; i < max; i++)
03227             {
03228             if (eptr >= md->end_subject)
03229               {
03230               SCHECK_PARTIAL();
03231               break;
03232               }
03233             if (fc == *eptr) break;
03234             eptr++;
03235             }
03236           if (possessive) continue;
03237           while (eptr >= pp)
03238             {
03239             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
03240             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03241             eptr--;
03242             }
03243           }
03244 
03245         RRETURN(MATCH_NOMATCH);
03246         }
03247       }
03248     /* Control never gets here */
03249 
03250     /* Match a single character type repeatedly; several different opcodes
03251     share code. This is very similar to the code for single characters, but we
03252     repeat it in the interests of efficiency. */
03253 
03254     case OP_TYPEEXACT:
03255     min = max = GET2(ecode, 1);
03256     minimize = TRUE;
03257     ecode += 3;
03258     goto REPEATTYPE;
03259 
03260     case OP_TYPEUPTO:
03261     case OP_TYPEMINUPTO:
03262     min = 0;
03263     max = GET2(ecode, 1);
03264     minimize = *ecode == OP_TYPEMINUPTO;
03265     ecode += 3;
03266     goto REPEATTYPE;
03267 
03268     case OP_TYPEPOSSTAR:
03269     possessive = TRUE;
03270     min = 0;
03271     max = INT_MAX;
03272     ecode++;
03273     goto REPEATTYPE;
03274 
03275     case OP_TYPEPOSPLUS:
03276     possessive = TRUE;
03277     min = 1;
03278     max = INT_MAX;
03279     ecode++;
03280     goto REPEATTYPE;
03281 
03282     case OP_TYPEPOSQUERY:
03283     possessive = TRUE;
03284     min = 0;
03285     max = 1;
03286     ecode++;
03287     goto REPEATTYPE;
03288 
03289     case OP_TYPEPOSUPTO:
03290     possessive = TRUE;
03291     min = 0;
03292     max = GET2(ecode, 1);
03293     ecode += 3;
03294     goto REPEATTYPE;
03295 
03296     case OP_TYPESTAR:
03297     case OP_TYPEMINSTAR:
03298     case OP_TYPEPLUS:
03299     case OP_TYPEMINPLUS:
03300     case OP_TYPEQUERY:
03301     case OP_TYPEMINQUERY:
03302     c = *ecode++ - OP_TYPESTAR;
03303     minimize = (c & 1) != 0;
03304     min = rep_min[c];                 /* Pick up values from tables; */
03305     max = rep_max[c];                 /* zero for max => infinity */
03306     if (max == 0) max = INT_MAX;
03307 
03308     /* Common code for all repeated single character type matches. Note that
03309     in UTF-8 mode, '.' matches a character of any length, but for the other
03310     character types, the valid characters are all one-byte long. */
03311 
03312     REPEATTYPE:
03313     ctype = *ecode++;      /* Code for the character type */
03314 
03315 #ifdef SUPPORT_UCP
03316     if (ctype == OP_PROP || ctype == OP_NOTPROP)
03317       {
03318       prop_fail_result = ctype == OP_NOTPROP;
03319       prop_type = *ecode++;
03320       prop_value = *ecode++;
03321       }
03322     else prop_type = -1;
03323 #endif
03324 
03325     /* First, ensure the minimum number of matches are present. Use inline
03326     code for maximizing the speed, and do the type test once at the start
03327     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
03328     is tidier. Also separate the UCP code, which can be the same for both UTF-8
03329     and single-bytes. */
03330 
03331     if (min > 0)
03332       {
03333 #ifdef SUPPORT_UCP
03334       if (prop_type >= 0)
03335         {
03336         switch(prop_type)
03337           {
03338           case PT_ANY:
03339           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
03340           for (i = 1; i <= min; i++)
03341             {
03342             if (eptr >= md->end_subject)
03343               {
03344               SCHECK_PARTIAL();
03345               RRETURN(MATCH_NOMATCH);
03346               }
03347             GETCHARINCTEST(c, eptr);
03348             }
03349           break;
03350 
03351           case PT_LAMP:
03352           for (i = 1; i <= min; i++)
03353             {
03354             if (eptr >= md->end_subject)
03355               {
03356               SCHECK_PARTIAL();
03357               RRETURN(MATCH_NOMATCH);
03358               }
03359             GETCHARINCTEST(c, eptr);
03360             prop_chartype = UCD_CHARTYPE(c);
03361             if ((prop_chartype == ucp_Lu ||
03362                  prop_chartype == ucp_Ll ||
03363                  prop_chartype == ucp_Lt) == prop_fail_result)
03364               RRETURN(MATCH_NOMATCH);
03365             }
03366           break;
03367 
03368           case PT_GC:
03369           for (i = 1; i <= min; i++)
03370             {
03371             if (eptr >= md->end_subject)
03372               {
03373               SCHECK_PARTIAL();
03374               RRETURN(MATCH_NOMATCH);
03375               }
03376             GETCHARINCTEST(c, eptr);
03377             prop_category = UCD_CATEGORY(c);
03378             if ((prop_category == prop_value) == prop_fail_result)
03379               RRETURN(MATCH_NOMATCH);
03380             }
03381           break;
03382 
03383           case PT_PC:
03384           for (i = 1; i <= min; i++)
03385             {
03386             if (eptr >= md->end_subject)
03387               {
03388               SCHECK_PARTIAL();
03389               RRETURN(MATCH_NOMATCH);
03390               }
03391             GETCHARINCTEST(c, eptr);
03392             prop_chartype = UCD_CHARTYPE(c);
03393             if ((prop_chartype == prop_value) == prop_fail_result)
03394               RRETURN(MATCH_NOMATCH);
03395             }
03396           break;
03397 
03398           case PT_SC:
03399           for (i = 1; i <= min; i++)
03400             {
03401             if (eptr >= md->end_subject)
03402               {
03403               SCHECK_PARTIAL();
03404               RRETURN(MATCH_NOMATCH);
03405               }
03406             GETCHARINCTEST(c, eptr);
03407             prop_script = UCD_SCRIPT(c);
03408             if ((prop_script == prop_value) == prop_fail_result)
03409               RRETURN(MATCH_NOMATCH);
03410             }
03411           break;
03412 
03413           default:
03414           RRETURN(PCRE_ERROR_INTERNAL);
03415           }
03416         }
03417 
03418       /* Match extended Unicode sequences. We will get here only if the
03419       support is in the binary; otherwise a compile-time error occurs. */
03420 
03421       else if (ctype == OP_EXTUNI)
03422         {
03423         for (i = 1; i <= min; i++)
03424           {
03425           if (eptr >= md->end_subject)
03426             {
03427             SCHECK_PARTIAL();
03428             RRETURN(MATCH_NOMATCH);
03429             }
03430           GETCHARINCTEST(c, eptr);
03431           prop_category = UCD_CATEGORY(c);
03432           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
03433           while (eptr < md->end_subject)
03434             {
03435             int len = 1;
03436             if (!utf8) c = *eptr;
03437               else { GETCHARLEN(c, eptr, len); }
03438             prop_category = UCD_CATEGORY(c);
03439             if (prop_category != ucp_M) break;
03440             eptr += len;
03441             }
03442           }
03443         }
03444 
03445       else
03446 #endif     /* SUPPORT_UCP */
03447 
03448 /* Handle all other cases when the coding is UTF-8 */
03449 
03450 #ifdef SUPPORT_UTF8
03451       if (utf8) switch(ctype)
03452         {
03453         case OP_ANY:
03454         for (i = 1; i <= min; i++)
03455           {
03456           if (eptr >= md->end_subject)
03457             {
03458             SCHECK_PARTIAL();
03459             RRETURN(MATCH_NOMATCH);
03460             }
03461           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
03462           eptr++;
03463           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
03464           }
03465         break;
03466 
03467         case OP_ALLANY:
03468         for (i = 1; i <= min; i++)
03469           {
03470           if (eptr >= md->end_subject)
03471             {
03472             SCHECK_PARTIAL();
03473             RRETURN(MATCH_NOMATCH);
03474             }
03475           eptr++;
03476           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
03477           }
03478         break;
03479 
03480         case OP_ANYBYTE:
03481         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
03482         eptr += min;
03483         break;
03484 
03485         case OP_ANYNL:
03486         for (i = 1; i <= min; i++)
03487           {
03488           if (eptr >= md->end_subject)
03489             {
03490             SCHECK_PARTIAL();
03491             RRETURN(MATCH_NOMATCH);
03492             }
03493           GETCHARINC(c, eptr);
03494           switch(c)
03495             {
03496             default: RRETURN(MATCH_NOMATCH);
03497             case 0x000d:
03498             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
03499             break;
03500 
03501             case 0x000a:
03502             break;
03503 
03504             case 0x000b:
03505             case 0x000c:
03506             case 0x0085:
03507             case 0x2028:
03508             case 0x2029:
03509             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
03510             break;
03511             }
03512           }
03513         break;
03514 
03515         case OP_NOT_HSPACE:
03516         for (i = 1; i <= min; i++)
03517           {
03518           if (eptr >= md->end_subject)
03519             {
03520             SCHECK_PARTIAL();
03521             RRETURN(MATCH_NOMATCH);
03522             }
03523           GETCHARINC(c, eptr);
03524           switch(c)
03525             {
03526             default: break;
03527             case 0x09:      /* HT */
03528             case 0x20:      /* SPACE */
03529             case 0xa0:      /* NBSP */
03530             case 0x1680:    /* OGHAM SPACE MARK */
03531             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
03532             case 0x2000:    /* EN QUAD */
03533             case 0x2001:    /* EM QUAD */
03534             case 0x2002:    /* EN SPACE */
03535             case 0x2003:    /* EM SPACE */
03536             case 0x2004:    /* THREE-PER-EM SPACE */
03537             case 0x2005:    /* FOUR-PER-EM SPACE */
03538             case 0x2006:    /* SIX-PER-EM SPACE */
03539             case 0x2007:    /* FIGURE SPACE */
03540             case 0x2008:    /* PUNCTUATION SPACE */
03541             case 0x2009:    /* THIN SPACE */
03542             case 0x200A:    /* HAIR SPACE */
03543             case 0x202f:    /* NARROW NO-BREAK SPACE */
03544             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
03545             case 0x3000:    /* IDEOGRAPHIC SPACE */
03546             RRETURN(MATCH_NOMATCH);
03547             }
03548           }
03549         break;
03550 
03551         case OP_HSPACE:
03552         for (i = 1; i <= min; i++)
03553           {
03554           if (eptr >= md->end_subject)
03555             {
03556             SCHECK_PARTIAL();
03557             RRETURN(MATCH_NOMATCH);
03558             }
03559           GETCHARINC(c, eptr);
03560           switch(c)
03561             {
03562             default: RRETURN(MATCH_NOMATCH);
03563             case 0x09:      /* HT */
03564             case 0x20:      /* SPACE */
03565             case 0xa0:      /* NBSP */
03566             case 0x1680:    /* OGHAM SPACE MARK */
03567             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
03568             case 0x2000:    /* EN QUAD */
03569             case 0x2001:    /* EM QUAD */
03570             case 0x2002:    /* EN SPACE */
03571             case 0x2003:    /* EM SPACE */
03572             case 0x2004:    /* THREE-PER-EM SPACE */
03573             case 0x2005:    /* FOUR-PER-EM SPACE */
03574             case 0x2006:    /* SIX-PER-EM SPACE */
03575             case 0x2007:    /* FIGURE SPACE */
03576             case 0x2008:    /* PUNCTUATION SPACE */
03577             case 0x2009:    /* THIN SPACE */
03578             case 0x200A:    /* HAIR SPACE */
03579             case 0x202f:    /* NARROW NO-BREAK SPACE */
03580             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
03581             case 0x3000:    /* IDEOGRAPHIC SPACE */
03582             break;
03583             }
03584           }
03585         break;
03586 
03587         case OP_NOT_VSPACE:
03588         for (i = 1; i <= min; i++)
03589           {
03590           if (eptr >= md->end_subject)
03591             {
03592             SCHECK_PARTIAL();
03593             RRETURN(MATCH_NOMATCH);
03594             }
03595           GETCHARINC(c, eptr);
03596           switch(c)
03597             {
03598             default: break;
03599             case 0x0a:      /* LF */
03600             case 0x0b:      /* VT */
03601             case 0x0c:      /* FF */
03602             case 0x0d:      /* CR */
03603             case 0x85:      /* NEL */
03604             case 0x2028:    /* LINE SEPARATOR */
03605             case 0x2029:    /* PARAGRAPH SEPARATOR */
03606             RRETURN(MATCH_NOMATCH);
03607             }
03608           }
03609         break;
03610 
03611         case OP_VSPACE:
03612         for (i = 1; i <= min; i++)
03613           {
03614           if (eptr >= md->end_subject)
03615             {
03616             SCHECK_PARTIAL();
03617             RRETURN(MATCH_NOMATCH);
03618             }
03619           GETCHARINC(c, eptr);
03620           switch(c)
03621             {
03622             default: RRETURN(MATCH_NOMATCH);
03623             case 0x0a:      /* LF */
03624             case 0x0b:      /* VT */
03625             case 0x0c:      /* FF */
03626             case 0x0d:      /* CR */
03627             case 0x85:      /* NEL */
03628             case 0x2028:    /* LINE SEPARATOR */
03629             case 0x2029:    /* PARAGRAPH SEPARATOR */
03630             break;
03631             }
03632           }
03633         break;
03634 
03635         case OP_NOT_DIGIT:
03636         for (i = 1; i <= min; i++)
03637           {
03638           if (eptr >= md->end_subject)
03639             {
03640             SCHECK_PARTIAL();
03641             RRETURN(MATCH_NOMATCH);
03642             }
03643           GETCHARINC(c, eptr);
03644           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
03645             RRETURN(MATCH_NOMATCH);
03646           }
03647         break;
03648 
03649         case OP_DIGIT:
03650         for (i = 1; i <= min; i++)
03651           {
03652           if (eptr >= md->end_subject)
03653             {
03654             SCHECK_PARTIAL();
03655             RRETURN(MATCH_NOMATCH);
03656             }
03657           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
03658             RRETURN(MATCH_NOMATCH);
03659           /* No need to skip more bytes - we know it's a 1-byte character */
03660           }
03661         break;
03662 
03663         case OP_NOT_WHITESPACE:
03664         for (i = 1; i <= min; i++)
03665           {
03666           if (eptr >= md->end_subject)
03667             {
03668             SCHECK_PARTIAL();
03669             RRETURN(MATCH_NOMATCH);
03670             }
03671           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
03672             RRETURN(MATCH_NOMATCH);
03673           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
03674           }
03675         break;
03676 
03677         case OP_WHITESPACE:
03678         for (i = 1; i <= min; i++)
03679           {
03680           if (eptr >= md->end_subject)
03681             {
03682             SCHECK_PARTIAL();
03683             RRETURN(MATCH_NOMATCH);
03684             }
03685           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
03686             RRETURN(MATCH_NOMATCH);
03687           /* No need to skip more bytes - we know it's a 1-byte character */
03688           }
03689         break;
03690 
03691         case OP_NOT_WORDCHAR:
03692         for (i = 1; i <= min; i++)
03693           {
03694           if (eptr >= md->end_subject ||
03695              (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
03696             RRETURN(MATCH_NOMATCH);
03697           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
03698           }
03699         break;
03700 
03701         case OP_WORDCHAR:
03702         for (i = 1; i <= min; i++)
03703           {
03704           if (eptr >= md->end_subject)
03705             {
03706             SCHECK_PARTIAL();
03707             RRETURN(MATCH_NOMATCH);
03708             }
03709           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
03710             RRETURN(MATCH_NOMATCH);
03711           /* No need to skip more bytes - we know it's a 1-byte character */
03712           }
03713         break;
03714 
03715         default:
03716         RRETURN(PCRE_ERROR_INTERNAL);
03717         }  /* End switch(ctype) */
03718 
03719       else
03720 #endif     /* SUPPORT_UTF8 */
03721 
03722       /* Code for the non-UTF-8 case for minimum matching of operators other
03723       than OP_PROP and OP_NOTPROP. */
03724 
03725       switch(ctype)
03726         {
03727         case OP_ANY:
03728         for (i = 1; i <= min; i++)
03729           {
03730           if (eptr >= md->end_subject)
03731             {
03732             SCHECK_PARTIAL();
03733             RRETURN(MATCH_NOMATCH);
03734             }
03735           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
03736           eptr++;
03737           }
03738         break;
03739 
03740         case OP_ALLANY:
03741         if (eptr > md->end_subject - min)
03742           {
03743           SCHECK_PARTIAL();
03744           RRETURN(MATCH_NOMATCH);
03745           }
03746         eptr += min;
03747         break;
03748 
03749         case OP_ANYBYTE:
03750         if (eptr > md->end_subject - min)
03751           {
03752           SCHECK_PARTIAL();
03753           RRETURN(MATCH_NOMATCH);
03754           }
03755         eptr += min;
03756         break;
03757 
03758         case OP_ANYNL:
03759         for (i = 1; i <= min; i++)
03760           {
03761           if (eptr >= md->end_subject)
03762             {
03763             SCHECK_PARTIAL();
03764             RRETURN(MATCH_NOMATCH);
03765             }
03766           switch(*eptr++)
03767             {
03768             default: RRETURN(MATCH_NOMATCH);
03769             case 0x000d:
03770             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
03771             break;
03772             case 0x000a:
03773             break;
03774 
03775             case 0x000b:
03776             case 0x000c:
03777             case 0x0085:
03778             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
03779             break;
03780             }
03781           }
03782         break;
03783 
03784         case OP_NOT_HSPACE:
03785         for (i = 1; i <= min; i++)
03786           {
03787           if (eptr >= md->end_subject)
03788             {
03789             SCHECK_PARTIAL();
03790             RRETURN(MATCH_NOMATCH);
03791             }
03792           switch(*eptr++)
03793             {
03794             default: break;
03795             case 0x09:      /* HT */
03796             case 0x20:      /* SPACE */
03797             case 0xa0:      /* NBSP */
03798             RRETURN(MATCH_NOMATCH);
03799             }
03800           }
03801         break;
03802 
03803         case OP_HSPACE:
03804         for (i = 1; i <= min; i++)
03805           {
03806           if (eptr >= md->end_subject)
03807             {
03808             SCHECK_PARTIAL();
03809             RRETURN(MATCH_NOMATCH);
03810             }
03811           switch(*eptr++)
03812             {
03813             default: RRETURN(MATCH_NOMATCH);
03814             case 0x09:      /* HT */
03815             case 0x20:      /* SPACE */
03816             case 0xa0:      /* NBSP */
03817             break;
03818             }
03819           }
03820         break;
03821 
03822         case OP_NOT_VSPACE:
03823         for (i = 1; i <= min; i++)
03824           {
03825           if (eptr >= md->end_subject)
03826             {
03827             SCHECK_PARTIAL();
03828             RRETURN(MATCH_NOMATCH);
03829             }
03830           switch(*eptr++)
03831             {
03832             default: break;
03833             case 0x0a:      /* LF */
03834             case 0x0b:      /* VT */
03835             case 0x0c:      /* FF */
03836             case 0x0d:      /* CR */
03837             case 0x85:      /* NEL */
03838             RRETURN(MATCH_NOMATCH);
03839             }
03840           }
03841         break;
03842 
03843         case OP_VSPACE:
03844         for (i = 1; i <= min; i++)
03845           {
03846           if (eptr >= md->end_subject)
03847             {
03848             SCHECK_PARTIAL();
03849             RRETURN(MATCH_NOMATCH);
03850             }
03851           switch(*eptr++)
03852             {
03853             default: RRETURN(MATCH_NOMATCH);
03854             case 0x0a:      /* LF */
03855             case 0x0b:      /* VT */
03856             case 0x0c:      /* FF */
03857             case 0x0d:      /* CR */
03858             case 0x85:      /* NEL */
03859             break;
03860             }
03861           }
03862         break;
03863 
03864         case OP_NOT_DIGIT:
03865         for (i = 1; i <= min; i++)
03866           {
03867           if (eptr >= md->end_subject)
03868             {
03869             SCHECK_PARTIAL();
03870             RRETURN(MATCH_NOMATCH);
03871             }
03872           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
03873           }
03874         break;
03875 
03876         case OP_DIGIT:
03877         for (i = 1; i <= min; i++)
03878           {
03879           if (eptr >= md->end_subject)
03880             {
03881             SCHECK_PARTIAL();
03882             RRETURN(MATCH_NOMATCH);
03883             }
03884           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
03885           }
03886         break;
03887 
03888         case OP_NOT_WHITESPACE:
03889         for (i = 1; i <= min; i++)
03890           {
03891           if (eptr >= md->end_subject)
03892             {
03893             SCHECK_PARTIAL();
03894             RRETURN(MATCH_NOMATCH);
03895             }
03896           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
03897           }
03898         break;
03899 
03900         case OP_WHITESPACE:
03901         for (i = 1; i <= min; i++)
03902           {
03903           if (eptr >= md->end_subject)
03904             {
03905             SCHECK_PARTIAL();
03906             RRETURN(MATCH_NOMATCH);
03907             }
03908           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
03909           }
03910         break;
03911 
03912         case OP_NOT_WORDCHAR:
03913         for (i = 1; i <= min; i++)
03914           {
03915           if (eptr >= md->end_subject)
03916             {
03917             SCHECK_PARTIAL();
03918             RRETURN(MATCH_NOMATCH);
03919             }
03920           if ((md->ctypes[*eptr++] & ctype_word) != 0)
03921             RRETURN(MATCH_NOMATCH);
03922           }
03923         break;
03924 
03925         case OP_WORDCHAR:
03926         for (i = 1; i <= min; i++)
03927           {
03928           if (eptr >= md->end_subject)
03929             {
03930             SCHECK_PARTIAL();
03931             RRETURN(MATCH_NOMATCH);
03932             }
03933           if ((md->ctypes[*eptr++] & ctype_word) == 0)
03934             RRETURN(MATCH_NOMATCH);
03935           }
03936         break;
03937 
03938         default:
03939         RRETURN(PCRE_ERROR_INTERNAL);
03940         }
03941       }
03942 
03943     /* If min = max, continue at the same level without recursing */
03944 
03945     if (min == max) continue;
03946 
03947     /* If minimizing, we have to test the rest of the pattern before each
03948     subsequent match. Again, separate the UTF-8 case for speed, and also
03949     separate the UCP cases. */
03950 
03951     if (minimize)
03952       {
03953 #ifdef SUPPORT_UCP
03954       if (prop_type >= 0)
03955         {
03956         switch(prop_type)
03957           {
03958           case PT_ANY:
03959           for (fi = min;; fi++)
03960             {
03961             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
03962             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03963             if (fi >= max) RRETURN(MATCH_NOMATCH);
03964             if (eptr >= md->end_subject)
03965               {
03966               SCHECK_PARTIAL();
03967               RRETURN(MATCH_NOMATCH);
03968               }
03969             GETCHARINC(c, eptr);
03970             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
03971             }
03972           /* Control never gets here */
03973 
03974           case PT_LAMP:
03975           for (fi = min;; fi++)
03976             {
03977             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
03978             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03979             if (fi >= max) RRETURN(MATCH_NOMATCH);
03980             if (eptr >= md->end_subject)
03981               {
03982               SCHECK_PARTIAL();
03983               RRETURN(MATCH_NOMATCH);
03984               }
03985             GETCHARINC(c, eptr);
03986             prop_chartype = UCD_CHARTYPE(c);
03987             if ((prop_chartype == ucp_Lu ||
03988                  prop_chartype == ucp_Ll ||
03989                  prop_chartype == ucp_Lt) == prop_fail_result)
03990               RRETURN(MATCH_NOMATCH);
03991             }
03992           /* Control never gets here */
03993 
03994           case PT_GC:
03995           for (fi = min;; fi++)
03996             {
03997             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
03998             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03999             if (fi >= max) RRETURN(MATCH_NOMATCH);
04000             if (eptr >= md->end_subject)
04001               {
04002               SCHECK_PARTIAL();
04003               RRETURN(MATCH_NOMATCH);
04004               }
04005             GETCHARINC(c, eptr);
04006             prop_category = UCD_CATEGORY(c);
04007             if ((prop_category == prop_value) == prop_fail_result)
04008               RRETURN(MATCH_NOMATCH);
04009             }
04010           /* Control never gets here */
04011 
04012           case PT_PC:
04013           for (fi = min;; fi++)
04014             {
04015             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
04016             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04017             if (fi >= max) RRETURN(MATCH_NOMATCH);
04018             if (eptr >= md->end_subject)
04019               {
04020               SCHECK_PARTIAL();
04021               RRETURN(MATCH_NOMATCH);
04022               }
04023             GETCHARINC(c, eptr);
04024             prop_chartype = UCD_CHARTYPE(c);
04025             if ((prop_chartype == prop_value) == prop_fail_result)
04026               RRETURN(MATCH_NOMATCH);
04027             }
04028           /* Control never gets here */
04029 
04030           case PT_SC:
04031           for (fi = min;; fi++)
04032             {
04033             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
04034             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04035             if (fi >= max) RRETURN(MATCH_NOMATCH);
04036             if (eptr >= md->end_subject)
04037               {
04038               SCHECK_PARTIAL();
04039               RRETURN(MATCH_NOMATCH);
04040               }
04041             GETCHARINC(c, eptr);
04042             prop_script = UCD_SCRIPT(c);
04043             if ((prop_script == prop_value) == prop_fail_result)
04044               RRETURN(MATCH_NOMATCH);
04045             }
04046           /* Control never gets here */
04047 
04048           default:
04049           RRETURN(PCRE_ERROR_INTERNAL);
04050           }
04051         }
04052 
04053       /* Match extended Unicode sequences. We will get here only if the
04054       support is in the binary; otherwise a compile-time error occurs. */
04055 
04056       else if (ctype == OP_EXTUNI)
04057         {
04058         for (fi = min;; fi++)
04059           {
04060           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
04061           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04062           if (fi >= max) RRETURN(MATCH_NOMATCH);
04063           if (eptr >= md->end_subject)
04064             {
04065             SCHECK_PARTIAL();
04066             RRETURN(MATCH_NOMATCH);
04067             }
04068           GETCHARINCTEST(c, eptr);
04069           prop_category = UCD_CATEGORY(c);
04070           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
04071           while (eptr < md->end_subject)
04072             {
04073             int len = 1;
04074             if (!utf8) c = *eptr;
04075               else { GETCHARLEN(c, eptr, len); }
04076             prop_category = UCD_CATEGORY(c);
04077             if (prop_category != ucp_M) break;
04078             eptr += len;
04079             }
04080           }
04081         }
04082 
04083       else
04084 #endif     /* SUPPORT_UCP */
04085 
04086 #ifdef SUPPORT_UTF8
04087       /* UTF-8 mode */
04088       if (utf8)
04089         {
04090         for (fi = min;; fi++)
04091           {
04092           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
04093           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04094           if (fi >= max) RRETURN(MATCH_NOMATCH);
04095           if (eptr >= md->end_subject)
04096             {
04097             SCHECK_PARTIAL();
04098             RRETURN(MATCH_NOMATCH);
04099             }
04100           if (ctype == OP_ANY && IS_NEWLINE(eptr))
04101             RRETURN(MATCH_NOMATCH);
04102           GETCHARINC(c, eptr);
04103           switch(ctype)
04104             {
04105             case OP_ANY:        /* This is the non-NL case */
04106             case OP_ALLANY:
04107             case OP_ANYBYTE:
04108             break;
04109 
04110             case OP_ANYNL:
04111             switch(c)
04112               {
04113               default: RRETURN(MATCH_NOMATCH);
04114               case 0x000d:
04115               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
04116               break;
04117               case 0x000a:
04118               break;
04119 
04120               case 0x000b:
04121               case 0x000c:
04122               case 0x0085:
04123               case 0x2028:
04124               case 0x2029:
04125               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
04126               break;
04127               }
04128             break;
04129 
04130             case OP_NOT_HSPACE:
04131             switch(c)
04132               {
04133               default: break;
04134               case 0x09:      /* HT */
04135               case 0x20:      /* SPACE */
04136               case 0xa0:      /* NBSP */
04137               case 0x1680:    /* OGHAM SPACE MARK */
04138               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
04139               case 0x2000:    /* EN QUAD */
04140               case 0x2001:    /* EM QUAD */
04141               case 0x2002:    /* EN SPACE */
04142               case 0x2003:    /* EM SPACE */
04143               case 0x2004:    /* THREE-PER-EM SPACE */
04144               case 0x2005:    /* FOUR-PER-EM SPACE */
04145               case 0x2006:    /* SIX-PER-EM SPACE */
04146               case 0x2007:    /* FIGURE SPACE */
04147               case 0x2008:    /* PUNCTUATION SPACE */
04148               case 0x2009:    /* THIN SPACE */
04149               case 0x200A:    /* HAIR SPACE */
04150               case 0x202f:    /* NARROW NO-BREAK SPACE */
04151               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
04152               case 0x3000:    /* IDEOGRAPHIC SPACE */
04153               RRETURN(MATCH_NOMATCH);
04154               }
04155             break;
04156 
04157             case OP_HSPACE:
04158             switch(c)
04159               {
04160               default: RRETURN(MATCH_NOMATCH);
04161               case 0x09:      /* HT */
04162               case 0x20:      /* SPACE */
04163               case 0xa0:      /* NBSP */
04164               case 0x1680:    /* OGHAM SPACE MARK */
04165               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
04166               case 0x2000:    /* EN QUAD */
04167               case 0x2001:    /* EM QUAD */
04168               case 0x2002:    /* EN SPACE */
04169               case 0x2003:    /* EM SPACE */
04170               case 0x2004:    /* THREE-PER-EM SPACE */
04171               case 0x2005:    /* FOUR-PER-EM SPACE */
04172               case 0x2006:    /* SIX-PER-EM SPACE */
04173               case 0x2007:    /* FIGURE SPACE */
04174               case 0x2008:    /* PUNCTUATION SPACE */
04175               case 0x2009:    /* THIN SPACE */
04176               case 0x200A:    /* HAIR SPACE */
04177               case 0x202f:    /* NARROW NO-BREAK SPACE */
04178               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
04179               case 0x3000:    /* IDEOGRAPHIC SPACE */
04180               break;
04181               }
04182             break;
04183 
04184             case OP_NOT_VSPACE:
04185             switch(c)
04186               {
04187               default: break;
04188               case 0x0a:      /* LF */
04189               case 0x0b:      /* VT */
04190               case 0x0c:      /* FF */
04191               case 0x0d:      /* CR */
04192               case 0x85:      /* NEL */
04193               case 0x2028:    /* LINE SEPARATOR */
04194               case 0x2029:    /* PARAGRAPH SEPARATOR */
04195               RRETURN(MATCH_NOMATCH);
04196               }
04197             break;
04198 
04199             case OP_VSPACE:
04200             switch(c)
04201               {
04202               default: RRETURN(MATCH_NOMATCH);
04203               case 0x0a:      /* LF */
04204               case 0x0b:      /* VT */
04205               case 0x0c:      /* FF */
04206               case 0x0d:      /* CR */
04207               case 0x85:      /* NEL */
04208               case 0x2028:    /* LINE SEPARATOR */
04209               case 0x2029:    /* PARAGRAPH SEPARATOR */
04210               break;
04211               }
04212             break;
04213 
04214             case OP_NOT_DIGIT:
04215             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
04216               RRETURN(MATCH_NOMATCH);
04217             break;
04218 
04219             case OP_DIGIT:
04220             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
04221               RRETURN(MATCH_NOMATCH);
04222             break;
04223 
04224             case OP_NOT_WHITESPACE:
04225             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
04226               RRETURN(MATCH_NOMATCH);
04227             break;
04228 
04229             case OP_WHITESPACE:
04230             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
04231               RRETURN(MATCH_NOMATCH);
04232             break;
04233 
04234             case OP_NOT_WORDCHAR:
04235             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
04236               RRETURN(MATCH_NOMATCH);
04237             break;
04238 
04239             case OP_WORDCHAR:
04240             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
04241               RRETURN(MATCH_NOMATCH);
04242             break;
04243 
04244             default:
04245             RRETURN(PCRE_ERROR_INTERNAL);
04246             }
04247           }
04248         }
04249       else
04250 #endif
04251       /* Not UTF-8 mode */
04252         {
04253         for (fi = min;; fi++)
04254           {
04255           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
04256           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04257           if (fi >= max) RRETURN(MATCH_NOMATCH);
04258           if (eptr >= md->end_subject)
04259             {
04260             SCHECK_PARTIAL();
04261             RRETURN(MATCH_NOMATCH);
04262             }
04263           if (ctype == OP_ANY && IS_NEWLINE(eptr))
04264             RRETURN(MATCH_NOMATCH);
04265           c = *eptr++;
04266           switch(ctype)
04267             {
04268             case OP_ANY:     /* This is the non-NL case */
04269             case OP_ALLANY:
04270             case OP_ANYBYTE:
04271             break;
04272 
04273             case OP_ANYNL:
04274             switch(c)
04275               {
04276               default: RRETURN(MATCH_NOMATCH);
04277               case 0x000d:
04278               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
04279               break;
04280 
04281               case 0x000a:
04282               break;
04283 
04284               case 0x000b:
04285               case 0x000c:
04286               case 0x0085:
04287               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
04288               break;
04289               }
04290             break;
04291 
04292             case OP_NOT_HSPACE:
04293             switch(c)
04294               {
04295               default: break;
04296               case 0x09:      /* HT */
04297               case 0x20:      /* SPACE */
04298               case 0xa0:      /* NBSP */
04299               RRETURN(MATCH_NOMATCH);
04300               }
04301             break;
04302 
04303             case OP_HSPACE:
04304             switch(c)
04305               {
04306               default: RRETURN(MATCH_NOMATCH);
04307               case 0x09:      /* HT */
04308               case 0x20:      /* SPACE */
04309               case 0xa0:      /* NBSP */
04310               break;
04311               }
04312             break;
04313 
04314             case OP_NOT_VSPACE:
04315             switch(c)
04316               {
04317               default: break;
04318               case 0x0a:      /* LF */
04319               case 0x0b:      /* VT */
04320               case 0x0c:      /* FF */
04321               case 0x0d:      /* CR */
04322               case 0x85:      /* NEL */
04323               RRETURN(MATCH_NOMATCH);
04324               }
04325             break;
04326 
04327             case OP_VSPACE:
04328             switch(c)
04329               {
04330               default: RRETURN(MATCH_NOMATCH);
04331               case 0x0a:      /* LF */
04332               case 0x0b:      /* VT */
04333               case 0x0c:      /* FF */
04334               case 0x0d:      /* CR */
04335               case 0x85:      /* NEL */
04336               break;
04337               }
04338             break;
04339 
04340             case OP_NOT_DIGIT:
04341             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
04342             break;
04343 
04344             case OP_DIGIT:
04345             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
04346             break;
04347 
04348             case OP_NOT_WHITESPACE:
04349             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
04350             break;
04351 
04352             case OP_WHITESPACE:
04353             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
04354             break;
04355 
04356             case OP_NOT_WORDCHAR:
04357             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
04358             break;
04359 
04360             case OP_WORDCHAR:
04361             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
04362             break;
04363 
04364             default:
04365             RRETURN(PCRE_ERROR_INTERNAL);
04366             }
04367           }
04368         }
04369       /* Control never gets here */
04370       }
04371 
04372     /* If maximizing, it is worth using inline code for speed, doing the type
04373     test once at the start (i.e. keep it out of the loop). Again, keep the
04374     UTF-8 and UCP stuff separate. */
04375 
04376     else
04377       {
04378       pp = eptr;  /* Remember where we started */
04379 
04380 #ifdef SUPPORT_UCP
04381       if (prop_type >= 0)
04382         {
04383         switch(prop_type)
04384           {
04385           case PT_ANY:
04386           for (i = min; i < max; i++)
04387             {
04388             int len = 1;
04389             if (eptr >= md->end_subject)
04390               {
04391               SCHECK_PARTIAL();
04392               break;
04393               }
04394             GETCHARLEN(c, eptr, len);
04395             if (prop_fail_result) break;
04396             eptr+= len;
04397             }
04398           break;
04399 
04400           case PT_LAMP:
04401           for (i = min; i < max; i++)
04402             {
04403             int len = 1;
04404             if (eptr >= md->end_subject)
04405               {
04406               SCHECK_PARTIAL();
04407               break;
04408               }
04409             GETCHARLEN(c, eptr, len);
04410             prop_chartype = UCD_CHARTYPE(c);
04411             if ((prop_chartype == ucp_Lu ||
04412                  prop_chartype == ucp_Ll ||
04413                  prop_chartype == ucp_Lt) == prop_fail_result)
04414               break;
04415             eptr+= len;
04416             }
04417           break;
04418 
04419           case PT_GC:
04420           for (i = min; i < max; i++)
04421             {
04422             int len = 1;
04423             if (eptr >= md->end_subject)
04424               {
04425               SCHECK_PARTIAL();
04426               break;
04427               }
04428             GETCHARLEN(c, eptr, len);
04429             prop_category = UCD_CATEGORY(c);
04430             if ((prop_category == prop_value) == prop_fail_result)
04431               break;
04432             eptr+= len;
04433             }
04434           break;
04435 
04436           case PT_PC:
04437           for (i = min; i < max; i++)
04438             {
04439             int len = 1;
04440             if (eptr >= md->end_subject)
04441               {
04442               SCHECK_PARTIAL();
04443               break;
04444               }
04445             GETCHARLEN(c, eptr, len);
04446             prop_chartype = UCD_CHARTYPE(c);
04447             if ((prop_chartype == prop_value) == prop_fail_result)
04448               break;
04449             eptr+= len;
04450             }
04451           break;
04452 
04453           case PT_SC:
04454           for (i = min; i < max; i++)
04455             {
04456             int len = 1;
04457             if (eptr >= md->end_subject)
04458               {
04459               SCHECK_PARTIAL();
04460               break;
04461               }
04462             GETCHARLEN(c, eptr, len);
04463             prop_script = UCD_SCRIPT(c);
04464             if ((prop_script == prop_value) == prop_fail_result)
04465               break;
04466             eptr+= len;
04467             }
04468           break;
04469           }
04470 
04471         /* eptr is now past the end of the maximum run */
04472 
04473         if (possessive) continue;
04474         for(;;)
04475           {
04476           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
04477           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04478           if (eptr-- == pp) break;        /* Stop if tried at original pos */
04479           if (utf8) BACKCHAR(eptr);
04480           }
04481         }
04482 
04483       /* Match extended Unicode sequences. We will get here only if the
04484       support is in the binary; otherwise a compile-time error occurs. */
04485 
04486       else if (ctype == OP_EXTUNI)
04487         {
04488         for (i = min; i < max; i++)
04489           {
04490           if (eptr >= md->end_subject)
04491             {
04492             SCHECK_PARTIAL();
04493             break;
04494             }
04495           GETCHARINCTEST(c, eptr);
04496           prop_category = UCD_CATEGORY(c);
04497           if (prop_category == ucp_M) break;
04498           while (eptr < md->end_subject)
04499             {
04500             int len = 1;
04501             if (!utf8) c = *eptr; else
04502               {
04503               GETCHARLEN(c, eptr, len);
04504               }
04505             prop_category = UCD_CATEGORY(c);
04506             if (prop_category != ucp_M) break;
04507             eptr += len;
04508             }
04509           }
04510 
04511         /* eptr is now past the end of the maximum run */
04512 
04513         if (possessive) continue;
04514 
04515         for(;;)
04516           {
04517           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
04518           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04519           if (eptr-- == pp) break;        /* Stop if tried at original pos */
04520           for (;;)                        /* Move back over one extended */
04521             {
04522             int len = 1;
04523             if (!utf8) c = *eptr; else
04524               {
04525               BACKCHAR(eptr);
04526               GETCHARLEN(c, eptr, len);
04527               }
04528             prop_category = UCD_CATEGORY(c);
04529             if (prop_category != ucp_M) break;
04530             eptr--;
04531             }
04532           }
04533         }
04534 
04535       else
04536 #endif   /* SUPPORT_UCP */
04537 
04538 #ifdef SUPPORT_UTF8
04539       /* UTF-8 mode */
04540 
04541       if (utf8)
04542         {
04543         switch(ctype)
04544           {
04545           case OP_ANY:
04546           if (max < INT_MAX)
04547             {
04548             for (i = min; i < max; i++)
04549               {
04550               if (eptr >= md->end_subject)
04551                 {
04552                 SCHECK_PARTIAL();
04553                 break;
04554                 }
04555               if (IS_NEWLINE(eptr)) break;
04556               eptr++;
04557               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
04558               }
04559             }
04560 
04561           /* Handle unlimited UTF-8 repeat */
04562 
04563           else
04564             {
04565             for (i = min; i < max; i++)
04566               {
04567               if (eptr >= md->end_subject)
04568                 {
04569                 SCHECK_PARTIAL();
04570                 break;
04571                 }
04572               if (IS_NEWLINE(eptr)) break;
04573               eptr++;
04574               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
04575               }
04576             }
04577           break;
04578 
04579           case OP_ALLANY:
04580           if (max < INT_MAX)
04581             {
04582             for (i = min; i < max; i++)
04583               {
04584               if (eptr >= md->end_subject)
04585                 {
04586                 SCHECK_PARTIAL();
04587                 break;
04588                 }
04589               eptr++;
04590               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
04591               }
04592             }
04593           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
04594           break;
04595 
04596           /* The byte case is the same as non-UTF8 */
04597 
04598           case OP_ANYBYTE:
04599           c = max - min;
04600           if (c > (unsigned int)(md->end_subject - eptr))
04601             {
04602             eptr = md->end_subject;
04603             SCHECK_PARTIAL();
04604             }
04605           else eptr += c;
04606           break;
04607 
04608           case OP_ANYNL:
04609           for (i = min; i < max; i++)
04610             {
04611             int len = 1;
04612             if (eptr >= md->end_subject)
04613               {
04614               SCHECK_PARTIAL();
04615               break;
04616               }
04617             GETCHARLEN(c, eptr, len);
04618             if (c == 0x000d)
04619               {
04620               if (++eptr >= md->end_subject) break;
04621               if (*eptr == 0x000a) eptr++;
04622               }
04623             else
04624               {
04625               if (c != 0x000a &&
04626                   (md->bsr_anycrlf ||
04627                    (c != 0x000b && c != 0x000c &&
04628                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
04629                 break;
04630               eptr += len;
04631               }
04632             }
04633           break;
04634 
04635           case OP_NOT_HSPACE:
04636           case OP_HSPACE:
04637           for (i = min; i < max; i++)
04638             {
04639             BOOL gotspace;
04640             int len = 1;
04641             if (eptr >= md->end_subject)
04642               {
04643               SCHECK_PARTIAL();
04644               break;
04645               }
04646             GETCHARLEN(c, eptr, len);
04647             switch(c)
04648               {
04649               default: gotspace = FALSE; break;
04650               case 0x09:      /* HT */
04651               case 0x20:      /* SPACE */
04652               case 0xa0:      /* NBSP */
04653               case 0x1680:    /* OGHAM SPACE MARK */
04654               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
04655               case 0x2000:    /* EN QUAD */
04656               case 0x2001:    /* EM QUAD */
04657               case 0x2002:    /* EN SPACE */
04658               case 0x2003:    /* EM SPACE */
04659               case 0x2004:    /* THREE-PER-EM SPACE */
04660               case 0x2005:    /* FOUR-PER-EM SPACE */
04661               case 0x2006:    /* SIX-PER-EM SPACE */
04662               case 0x2007:    /* FIGURE SPACE */
04663               case 0x2008:    /* PUNCTUATION SPACE */
04664               case 0x2009:    /* THIN SPACE */
04665               case 0x200A:    /* HAIR SPACE */
04666               case 0x202f:    /* NARROW NO-BREAK SPACE */
04667               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
04668               case 0x3000:    /* IDEOGRAPHIC SPACE */
04669               gotspace = TRUE;
04670               break;
04671               }
04672             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
04673             eptr += len;
04674             }
04675           break;
04676 
04677           case OP_NOT_VSPACE:
04678           case OP_VSPACE:
04679           for (i = min; i < max; i++)
04680             {
04681             BOOL gotspace;
04682             int len = 1;
04683             if (eptr >= md->end_subject)
04684               {
04685               SCHECK_PARTIAL();
04686               break;
04687               }
04688             GETCHARLEN(c, eptr, len);
04689             switch(c)
04690               {
04691               default: gotspace = FALSE; break;
04692               case 0x0a:      /* LF */
04693               case 0x0b:      /* VT */
04694               case 0x0c:      /* FF */
04695               case 0x0d:      /* CR */
04696               case 0x85:      /* NEL */
04697               case 0x2028:    /* LINE SEPARATOR */
04698               case 0x2029:    /* PARAGRAPH SEPARATOR */
04699               gotspace = TRUE;
04700               break;
04701               }
04702             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
04703             eptr += len;
04704             }
04705           break;
04706 
04707           case OP_NOT_DIGIT:
04708           for (i = min; i < max; i++)
04709             {
04710             int len = 1;
04711             if (eptr >= md->end_subject)
04712               {
04713               SCHECK_PARTIAL();
04714               break;
04715               }
04716             GETCHARLEN(c, eptr, len);
04717             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
04718             eptr+= len;
04719             }
04720           break;
04721 
04722           case OP_DIGIT:
04723           for (i = min; i < max; i++)
04724             {
04725             int len = 1;
04726             if (eptr >= md->end_subject)
04727               {
04728               SCHECK_PARTIAL();
04729               break;
04730               }
04731             GETCHARLEN(c, eptr, len);
04732             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
04733             eptr+= len;
04734             }
04735           break;
04736 
04737           case OP_NOT_WHITESPACE:
04738           for (i = min; i < max; i++)
04739             {
04740             int len = 1;
04741             if (eptr >= md->end_subject)
04742               {
04743               SCHECK_PARTIAL();
04744               break;
04745               }
04746             GETCHARLEN(c, eptr, len);
04747             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
04748             eptr+= len;
04749             }
04750           break;
04751 
04752           case OP_WHITESPACE:
04753           for (i = min; i < max; i++)
04754             {
04755             int len = 1;
04756             if (eptr >= md->end_subject)
04757               {
04758               SCHECK_PARTIAL();
04759               break;
04760               }
04761             GETCHARLEN(c, eptr, len);
04762             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
04763             eptr+= len;
04764             }
04765           break;
04766 
04767           case OP_NOT_WORDCHAR:
04768           for (i = min; i < max; i++)
04769             {
04770             int len = 1;
04771             if (eptr >= md->end_subject)
04772               {
04773               SCHECK_PARTIAL();
04774               break;
04775               }
04776             GETCHARLEN(c, eptr, len);
04777             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
04778             eptr+= len;
04779             }
04780           break;
04781 
04782           case OP_WORDCHAR:
04783           for (i = min; i < max; i++)
04784             {
04785             int len = 1;
04786             if (eptr >= md->end_subject)
04787               {
04788               SCHECK_PARTIAL();
04789               break;
04790               }
04791             GETCHARLEN(c, eptr, len);
04792             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
04793             eptr+= len;
04794             }
04795           break;
04796 
04797           default:
04798           RRETURN(PCRE_ERROR_INTERNAL);
04799           }
04800 
04801         /* eptr is now past the end of the maximum run */
04802 
04803         if (possessive) continue;
04804         for(;;)
04805           {
04806           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
04807           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04808           if (eptr-- == pp) break;        /* Stop if tried at original pos */
04809           BACKCHAR(eptr);
04810           }
04811         }
04812       else
04813 #endif  /* SUPPORT_UTF8 */
04814 
04815       /* Not UTF-8 mode */
04816         {
04817         switch(ctype)
04818           {
04819           case OP_ANY:
04820           for (i = min; i < max; i++)
04821             {
04822             if (eptr >= md->end_subject)
04823               {
04824               SCHECK_PARTIAL();
04825               break;
04826               }
04827             if (IS_NEWLINE(eptr)) break;
04828             eptr++;
04829             }
04830           break;
04831 
04832           case OP_ALLANY:
04833           case OP_ANYBYTE:
04834           c = max - min;
04835           if (c > (unsigned int)(md->end_subject - eptr))
04836             {
04837             eptr = md->end_subject;
04838             SCHECK_PARTIAL();
04839             }
04840           else eptr += c;
04841           break;
04842 
04843           case OP_ANYNL:
04844           for (i = min; i < max; i++)
04845             {
04846             if (eptr >= md->end_subject)
04847               {
04848               SCHECK_PARTIAL();
04849               break;
04850               }
04851             c = *eptr;
04852             if (c == 0x000d)
04853               {
04854               if (++eptr >= md->end_subject) break;
04855               if (*eptr == 0x000a) eptr++;
04856               }
04857             else
04858               {
04859               if (c != 0x000a &&
04860                   (md->bsr_anycrlf ||
04861                     (c != 0x000b && c != 0x000c && c != 0x0085)))
04862                 break;
04863               eptr++;
04864               }
04865             }
04866           break;
04867 
04868           case OP_NOT_HSPACE:
04869           for (i = min; i < max; i++)
04870             {
04871             if (eptr >= md->end_subject)
04872               {
04873               SCHECK_PARTIAL();
04874               break;
04875               }
04876             c = *eptr;
04877             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
04878             eptr++;
04879             }
04880           break;
04881 
04882           case OP_HSPACE:
04883           for (i = min; i < max; i++)
04884             {
04885             if (eptr >= md->end_subject)
04886               {
04887               SCHECK_PARTIAL();
04888               break;
04889               }
04890             c = *eptr;
04891             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
04892             eptr++;
04893             }
04894           break;
04895 
04896           case OP_NOT_VSPACE:
04897           for (i = min; i < max; i++)
04898             {
04899             if (eptr >= md->end_subject)
04900               {
04901               SCHECK_PARTIAL();
04902               break;
04903               }
04904             c = *eptr;
04905             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
04906               break;
04907             eptr++;
04908             }
04909           break;
04910 
04911           case OP_VSPACE:
04912           for (i = min; i < max; i++)
04913             {
04914             if (eptr >= md->end_subject)
04915               {
04916               SCHECK_PARTIAL();
04917               break;
04918               }
04919             c = *eptr;
04920             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
04921               break;
04922             eptr++;
04923             }
04924           break;
04925 
04926           case OP_NOT_DIGIT:
04927           for (i = min; i < max; i++)
04928             {
04929             if (eptr >= md->end_subject)
04930               {
04931               SCHECK_PARTIAL();
04932               break;
04933               }
04934             if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
04935             eptr++;
04936             }
04937           break;
04938 
04939           case OP_DIGIT:
04940           for (i = min; i < max; i++)
04941             {
04942             if (eptr >= md->end_subject)
04943               {
04944               SCHECK_PARTIAL();
04945               break;
04946               }
04947             if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
04948             eptr++;
04949             }
04950           break;
04951 
04952           case OP_NOT_WHITESPACE:
04953           for (i = min; i < max; i++)
04954             {
04955             if (eptr >= md->end_subject)
04956               {
04957               SCHECK_PARTIAL();
04958               break;
04959               }
04960             if ((md->ctypes[*eptr] & ctype_space) != 0) break;
04961             eptr++;
04962             }
04963           break;
04964 
04965           case OP_WHITESPACE:
04966           for (i = min; i < max; i++)
04967             {
04968             if (eptr >= md->end_subject)
04969               {
04970               SCHECK_PARTIAL();
04971               break;
04972               }
04973             if ((md->ctypes[*eptr] & ctype_space) == 0) break;
04974             eptr++;
04975             }
04976           break;
04977 
04978           case OP_NOT_WORDCHAR:
04979           for (i = min; i < max; i++)
04980             {
04981             if (eptr >= md->end_subject)
04982               {
04983               SCHECK_PARTIAL();
04984               break;
04985               }
04986             if ((md->ctypes[*eptr] & ctype_word) != 0) break;
04987             eptr++;
04988             }
04989           break;
04990 
04991           case OP_WORDCHAR:
04992           for (i = min; i < max; i++)
04993             {
04994             if (eptr >= md->end_subject)
04995               {
04996               SCHECK_PARTIAL();
04997               break;
04998               }
04999             if ((md->ctypes[*eptr] & ctype_word) == 0) break;
05000             eptr++;
05001             }
05002           break;
05003 
05004           default:
05005           RRETURN(PCRE_ERROR_INTERNAL);
05006           }
05007 
05008         /* eptr is now past the end of the maximum run */
05009 
05010         if (possessive) continue;
05011         while (eptr >= pp)
05012           {
05013           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
05014           eptr--;
05015           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
05016           }
05017         }
05018 
05019       /* Get here if we can't make it match with any permitted repetitions */
05020 
05021       RRETURN(MATCH_NOMATCH);
05022       }
05023     /* Control never gets here */
05024 
05025     /* There's been some horrible disaster. Arrival here can only mean there is
05026     something seriously wrong in the code above or the OP_xxx definitions. */
05027 
05028     default:
05029     DPRINTF(("Unknown opcode %d\n", *ecode));
05030     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
05031     }
05032 
05033   /* Do not stick any code in here without much thought; it is assumed
05034   that "continue" in the code above comes out to here to repeat the main
05035   loop. */
05036 
05037   }             /* End of main loop */
05038 /* Control never reaches here */
05039 
05040 
05041 /* When compiling to use the heap rather than the stack for recursive calls to
05042 match(), the RRETURN() macro jumps here. The number that is saved in
05043 frame->Xwhere indicates which label we actually want to return to. */
05044 
05045 #ifdef NO_RECURSE
05046 #define LBL(val) case val: goto L_RM##val;
05047 HEAP_RETURN:
05048 switch (frame->Xwhere)
05049   {
05050   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
05051   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
05052   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
05053   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
05054   LBL(53) LBL(54)
05055 #ifdef SUPPORT_UTF8
05056   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
05057   LBL(32) LBL(34) LBL(42) LBL(46)
05058 #ifdef SUPPORT_UCP
05059   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
05060 #endif  /* SUPPORT_UCP */
05061 #endif  /* SUPPORT_UTF8 */
05062   default:
05063   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
05064   return PCRE_ERROR_INTERNAL;
05065   }
05066 #undef LBL
05067 #endif  /* NO_RECURSE */
05068 }
05069 
05070 
05071 /***************************************************************************
05072 ****************************************************************************
05073                    RECURSION IN THE match() FUNCTION
05074 
05075 Undefine all the macros that were defined above to handle this. */
05076 
05077 #ifdef NO_RECURSE
05078 #undef eptr
05079 #undef ecode
05080 #undef mstart
05081 #undef offset_top
05082 #undef ims
05083 #undef eptrb
05084 #undef flags
05085 
05086 #undef callpat
05087 #undef charptr
05088 #undef data
05089 #undef next
05090 #undef pp
05091 #undef prev
05092 #undef saved_eptr
05093 
05094 #undef new_recursive
05095 
05096 #undef cur_is_word
05097 #undef condition
05098 #undef prev_is_word
05099 
05100 #undef original_ims
05101 
05102 #undef ctype
05103 #undef length
05104 #undef max
05105 #undef min
05106 #undef number
05107 #undef offset
05108 #undef op
05109 #undef save_capture_last
05110 #undef save_offset1
05111 #undef save_offset2
05112 #undef save_offset3
05113 #undef stacksave
05114 
05115 #undef newptrb
05116 
05117 #endif
05118 
05119 /* These two are defined as macros in both cases */
05120 
05121 #undef fc
05122 #undef fi
05123 
05124 /***************************************************************************
05125 ***************************************************************************/
05126 
05127 
05128 
05129 /*************************************************
05130 *         Execute a Regular Expression           *
05131 *************************************************/
05132 
05133 /* This function applies a compiled re to a subject string and picks out
05134 portions of the string if it matches. Two elements in the vector are set for
05135 each substring: the offsets to the start and end of the substring.
05136 
05137 Arguments:
05138   argument_re     points to the compiled expression
05139   extra_data      points to extra data or is NULL
05140   subject         points to the subject string
05141   length          length of subject string (may contain binary zeros)
05142   start_offset    where to start in the subject string
05143   options         option bits
05144   offsets         points to a vector of ints to be filled in with offsets
05145   offsetcount     the number of elements in the vector
05146 
05147 Returns:          > 0 => success; value is the number of elements filled in
05148                   = 0 => success, but offsets is not big enough
05149                    -1 => failed to match
05150                  < -1 => some kind of unexpected problem
05151 */
05152 
05153 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
05154 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
05155   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
05156   int offsetcount)
05157 {
05158 int rc, resetcount, ocount;
05159 int first_byte = -1;
05160 int req_byte = -1;
05161 int req_byte2 = -1;
05162 int newline;
05163 unsigned long int ims;
05164 BOOL using_temporary_offsets = FALSE;
05165 BOOL anchored;
05166 BOOL startline;
05167 BOOL firstline;
05168 BOOL first_byte_caseless = FALSE;
05169 BOOL req_byte_caseless = FALSE;
05170 BOOL utf8;
05171 match_data match_block;
05172 match_data *md = &match_block;
05173 const uschar *tables;
05174 const uschar *start_bits = NULL;
05175 USPTR start_match = (USPTR)subject + start_offset;
05176 USPTR end_subject;
05177 USPTR start_partial = NULL;
05178 USPTR req_byte_ptr = start_match - 1;
05179 
05180 pcre_study_data internal_study;
05181 const pcre_study_data *study;
05182 
05183 real_pcre internal_re;
05184 const real_pcre *external_re = (const real_pcre *)argument_re;
05185 const real_pcre *re = external_re;
05186 
05187 /* Plausibility checks */
05188 
05189 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
05190 if (re == NULL || subject == NULL ||
05191    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
05192 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
05193 
05194 /* This information is for finding all the numbers associated with a given
05195 name, for condition testing. */
05196 
05197 md->name_table = (uschar *)re + re->name_table_offset;
05198 md->name_count = re->name_count;
05199 md->name_entry_size = re->name_entry_size;
05200 
05201 /* Fish out the optional data from the extra_data structure, first setting
05202 the default values. */
05203 
05204 study = NULL;
05205 md->match_limit = MATCH_LIMIT;
05206 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
05207 md->callout_data = NULL;
05208 
05209 /* The table pointer is always in native byte order. */
05210 
05211 tables = external_re->tables;
05212 
05213 if (extra_data != NULL)
05214   {
05215   register unsigned int flags = extra_data->flags;
05216   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
05217     study = (const pcre_study_data *)extra_data->study_data;
05218   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
05219     md->match_limit = extra_data->match_limit;
05220   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
05221     md->match_limit_recursion = extra_data->match_limit_recursion;
05222   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
05223     md->callout_data = extra_data->callout_data;
05224   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
05225   }
05226 
05227 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
05228 is a feature that makes it possible to save compiled regex and re-use them
05229 in other programs later. */
05230 
05231 if (tables == NULL) tables = _pcre_default_tables;
05232 
05233 /* Check that the first field in the block is the magic number. If it is not,
05234 test for a regex that was compiled on a host of opposite endianness. If this is
05235 the case, flipped values are put in internal_re and internal_study if there was
05236 study data too. */
05237 
05238 if (re->magic_number != MAGIC_NUMBER)
05239   {
05240   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
05241   if (re == NULL) return PCRE_ERROR_BADMAGIC;
05242   if (study != NULL) study = &internal_study;
05243   }
05244 
05245 /* Set up other data */
05246 
05247 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
05248 startline = (re->flags & PCRE_STARTLINE) != 0;
05249 firstline = (re->options & PCRE_FIRSTLINE) != 0;
05250 
05251 /* The code starts after the real_pcre block and the capture name table. */
05252 
05253 md->start_code = (const uschar *)external_re + re->name_table_offset +
05254   re->name_count * re->name_entry_size;
05255 
05256 md->start_subject = (USPTR)subject;
05257 md->start_offset = start_offset;
05258 md->end_subject = md->start_subject + length;
05259 end_subject = md->end_subject;
05260 
05261 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
05262 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
05263 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
05264 
05265 md->notbol = (options & PCRE_NOTBOL) != 0;
05266 md->noteol = (options & PCRE_NOTEOL) != 0;
05267 md->notempty = (options & PCRE_NOTEMPTY) != 0;
05268 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
05269 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
05270               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
05271 md->hitend = FALSE;
05272 
05273 md->recursive = NULL;                   /* No recursion at top level */
05274 
05275 md->lcc = tables + lcc_offset;
05276 md->ctypes = tables + ctypes_offset;
05277 
05278 /* Handle different \R options. */
05279 
05280 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
05281   {
05282   case 0:
05283   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
05284     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
05285   else
05286 #ifdef BSR_ANYCRLF
05287   md->bsr_anycrlf = TRUE;
05288 #else
05289   md->bsr_anycrlf = FALSE;
05290 #endif
05291   break;
05292 
05293   case PCRE_BSR_ANYCRLF:
05294   md->bsr_anycrlf = TRUE;
05295   break;
05296 
05297   case PCRE_BSR_UNICODE:
05298   md->bsr_anycrlf = FALSE;
05299   break;
05300 
05301   default: return PCRE_ERROR_BADNEWLINE;
05302   }
05303 
05304 /* Handle different types of newline. The three bits give eight cases. If
05305 nothing is set at run time, whatever was used at compile time applies. */
05306 
05307 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
05308         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
05309   {
05310   case 0: newline = NEWLINE; break;   /* Compile-time default */
05311   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
05312   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
05313   case PCRE_NEWLINE_CR+
05314        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
05315   case PCRE_NEWLINE_ANY: newline = -1; break;
05316   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
05317   default: return PCRE_ERROR_BADNEWLINE;
05318   }
05319 
05320 if (newline == -2)
05321   {
05322   md->nltype = NLTYPE_ANYCRLF;
05323   }
05324 else if (newline < 0)
05325   {
05326   md->nltype = NLTYPE_ANY;
05327   }
05328 else
05329   {
05330   md->nltype = NLTYPE_FIXED;
05331   if (newline > 255)
05332     {
05333     md->nllen = 2;
05334     md->nl[0] = (newline >> 8) & 255;
05335     md->nl[1] = newline & 255;
05336     }
05337   else
05338     {
05339     md->nllen = 1;
05340     md->nl[0] = newline;
05341     }
05342   }
05343 
05344 /* Partial matching was originally supported only for a restricted set of
05345 regexes; from release 8.00 there are no restrictions, but the bits are still
05346 defined (though never set). So there's no harm in leaving this code. */
05347 
05348 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
05349   return PCRE_ERROR_BADPARTIAL;
05350 
05351 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
05352 back the character offset. */
05353 
05354 #ifdef SUPPORT_UTF8
05355 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
05356   {
05357   if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
05358     return PCRE_ERROR_BADUTF8;
05359   if (start_offset > 0 && start_offset < length)
05360     {
05361     int tb = ((USPTR)subject)[start_offset];
05362     if (tb > 127)
05363       {
05364       tb &= 0xc0;
05365       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
05366       }
05367     }
05368   }
05369 #endif
05370 
05371 /* The ims options can vary during the matching as a result of the presence
05372 of (?ims) items in the pattern. They are kept in a local variable so that
05373 restoring at the exit of a group is easy. */
05374 
05375 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
05376 
05377 /* If the expression has got more back references than the offsets supplied can
05378 hold, we get a temporary chunk of working store to use during the matching.
05379 Otherwise, we can use the vector supplied, rounding down its size to a multiple
05380 of 3. */
05381 
05382 ocount = offsetcount - (offsetcount % 3);
05383 
05384 if (re->top_backref > 0 && re->top_backref >= ocount/3)
05385   {
05386   ocount = re->top_backref * 3 + 3;
05387   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
05388   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
05389   using_temporary_offsets = TRUE;
05390   DPRINTF(("Got memory to hold back references\n"));
05391   }
05392 else md->offset_vector = offsets;
05393 
05394 md->offset_end = ocount;
05395 md->offset_max = (2*ocount)/3;
05396 md->offset_overflow = FALSE;
05397 md->capture_last = -1;
05398 
05399 /* Compute the minimum number of offsets that we need to reset each time. Doing
05400 this makes a huge difference to execution time when there aren't many brackets
05401 in the pattern. */
05402 
05403 resetcount = 2 + re->top_bracket * 2;
05404 if (resetcount > offsetcount) resetcount = ocount;
05405 
05406 /* Reset the working variable associated with each extraction. These should
05407 never be used unless previously set, but they get saved and restored, and so we
05408 initialize them to avoid reading uninitialized locations. */
05409 
05410 if (md->offset_vector != NULL)
05411   {
05412   register int *iptr = md->offset_vector + ocount;
05413   register int *iend = iptr - resetcount/2 + 1;
05414   while (--iptr >= iend) *iptr = -1;
05415   }
05416 
05417 /* Set up the first character to match, if available. The first_byte value is
05418 never set for an anchored regular expression, but the anchoring may be forced
05419 at run time, so we have to test for anchoring. The first char may be unset for
05420 an unanchored pattern, of course. If there's no first char and the pattern was
05421 studied, there may be a bitmap of possible first characters. */
05422 
05423 if (!anchored)
05424   {
05425   if ((re->flags & PCRE_FIRSTSET) != 0)
05426     {
05427     first_byte = re->first_byte & 255;
05428     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
05429       first_byte = md->lcc[first_byte];
05430     }
05431   else
05432     if (!startline && study != NULL &&
05433       (study->flags & PCRE_STUDY_MAPPED) != 0)
05434         start_bits = study->start_bits;
05435   }
05436 
05437 /* For anchored or unanchored matches, there may be a "last known required
05438 character" set. */
05439 
05440 if ((re->flags & PCRE_REQCHSET) != 0)
05441   {
05442   req_byte = re->req_byte & 255;
05443   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
05444   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
05445   }
05446 
05447 
05448 /* ==========================================================================*/
05449 
05450 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
05451 the loop runs just once. */
05452 
05453 for(;;)
05454   {
05455   USPTR save_end_subject = end_subject;
05456   USPTR new_start_match;
05457 
05458   /* Reset the maximum number of extractions we might see. */
05459 
05460   if (md->offset_vector != NULL)
05461     {
05462     register int *iptr = md->offset_vector;
05463     register int *iend = iptr + resetcount;
05464     while (iptr < iend) *iptr++ = -1;
05465     }
05466 
05467   /* If firstline is TRUE, the start of the match is constrained to the first
05468   line of a multiline string. That is, the match must be before or at the first
05469   newline. Implement this by temporarily adjusting end_subject so that we stop
05470   scanning at a newline. If the match fails at the newline, later code breaks
05471   this loop. */
05472 
05473   if (firstline)
05474     {
05475     USPTR t = start_match;
05476 #ifdef SUPPORT_UTF8
05477     if (utf8)
05478       {
05479       while (t < md->end_subject && !IS_NEWLINE(t))
05480         {
05481         t++;
05482         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
05483         }
05484       }
05485     else
05486 #endif
05487     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
05488     end_subject = t;
05489     }
05490 
05491   /* There are some optimizations that avoid running the match if a known
05492   starting point is not found, or if a known later character is not present.
05493   However, there is an option that disables these, for testing and for ensuring
05494   that all callouts do actually occur. */
05495 
05496   if ((options & PCRE_NO_START_OPTIMIZE) == 0)
05497     {
05498     /* Advance to a unique first byte if there is one. */
05499 
05500     if (first_byte >= 0)
05501       {
05502       if (first_byte_caseless)
05503         while (start_match < end_subject && md->lcc[*start_match] != first_byte)
05504           start_match++;
05505       else
05506         while (start_match < end_subject && *start_match != first_byte)
05507           start_match++;
05508       }
05509 
05510     /* Or to just after a linebreak for a multiline match */
05511 
05512     else if (startline)
05513       {
05514       if (start_match > md->start_subject + start_offset)
05515         {
05516 #ifdef SUPPORT_UTF8
05517         if (utf8)
05518           {
05519           while (start_match < end_subject && !WAS_NEWLINE(start_match))
05520             {
05521             start_match++;
05522             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
05523               start_match++;
05524             }
05525           }
05526         else
05527 #endif
05528         while (start_match < end_subject && !WAS_NEWLINE(start_match))
05529           start_match++;
05530 
05531         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
05532         and we are now at a LF, advance the match position by one more character.
05533         */
05534 
05535         if (start_match[-1] == CHAR_CR &&
05536              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
05537              start_match < end_subject &&
05538              *start_match == CHAR_NL)
05539           start_match++;
05540         }
05541       }
05542 
05543     /* Or to a non-unique first byte after study */
05544 
05545     else if (start_bits != NULL)
05546       {
05547       while (start_match < end_subject)
05548         {
05549         register unsigned int c = *start_match;
05550         if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
05551           else break;
05552         }
05553       }
05554     }   /* Starting optimizations */
05555 
05556   /* Restore fudged end_subject */
05557 
05558   end_subject = save_end_subject;
05559 
05560   /* The following two optimizations are disabled for partial matching or if
05561   disabling is explicitly requested. */
05562 
05563   if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
05564     {
05565     /* If the pattern was studied, a minimum subject length may be set. This is
05566     a lower bound; no actual string of that length may actually match the
05567     pattern. Although the value is, strictly, in characters, we treat it as
05568     bytes to avoid spending too much time in this optimization. */
05569 
05570     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
05571         end_subject - start_match < study->minlength)
05572       {
05573       rc = MATCH_NOMATCH;
05574       break;
05575       }
05576 
05577     /* If req_byte is set, we know that that character must appear in the
05578     subject for the match to succeed. If the first character is set, req_byte
05579     must be later in the subject; otherwise the test starts at the match point.
05580     This optimization can save a huge amount of backtracking in patterns with
05581     nested unlimited repeats that aren't going to match. Writing separate code
05582     for cased/caseless versions makes it go faster, as does using an
05583     autoincrement and backing off on a match.
05584 
05585     HOWEVER: when the subject string is very, very long, searching to its end
05586     can take a long time, and give bad performance on quite ordinary patterns.
05587     This showed up when somebody was matching something like /^\d+C/ on a
05588     32-megabyte string... so we don't do this when the string is sufficiently
05589     long. */
05590 
05591     if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
05592       {
05593       register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
05594 
05595       /* We don't need to repeat the search if we haven't yet reached the
05596       place we found it at last time. */
05597 
05598       if (p > req_byte_ptr)
05599         {
05600         if (req_byte_caseless)
05601           {
05602           while (p < end_subject)
05603             {
05604             register int pp = *p++;
05605             if (pp == req_byte || pp == req_byte2) { p--; break; }
05606             }
05607           }
05608         else
05609           {
05610           while (p < end_subject)
05611             {
05612             if (*p++ == req_byte) { p--; break; }
05613             }
05614           }
05615 
05616         /* If we can't find the required character, break the matching loop,
05617         forcing a match failure. */
05618 
05619         if (p >= end_subject)
05620           {
05621           rc = MATCH_NOMATCH;
05622           break;
05623           }
05624 
05625         /* If we have found the required character, save the point where we
05626         found it, so that we don't search again next time round the loop if
05627         the start hasn't passed this character yet. */
05628 
05629         req_byte_ptr = p;
05630         }
05631       }
05632     }
05633 
05634 #ifdef DEBUG_PCRE  /* Sigh. Some compilers never learn. */
05635   printf(">>>> Match against: ");
05636   pchars(start_match, end_subject - start_match, TRUE, md);
05637   printf("\n");
05638 #endif
05639 
05640   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
05641   first starting point for which a partial match was found. */
05642 
05643   md->start_match_ptr = start_match;
05644   md->start_used_ptr = start_match;
05645   md->match_call_count = 0;
05646   rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
05647   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
05648 
05649   switch(rc)
05650     {
05651     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
05652     exactly like PRUNE. */
05653 
05654     case MATCH_NOMATCH:
05655     case MATCH_PRUNE:
05656     case MATCH_THEN:
05657     new_start_match = start_match + 1;
05658 #ifdef SUPPORT_UTF8
05659     if (utf8)
05660       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
05661         new_start_match++;
05662 #endif
05663     break;
05664 
05665     /* SKIP passes back the next starting point explicitly. */
05666 
05667     case MATCH_SKIP:
05668     new_start_match = md->start_match_ptr;
05669     break;
05670 
05671     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
05672 
05673     case MATCH_COMMIT:
05674     rc = MATCH_NOMATCH;
05675     goto ENDLOOP;
05676 
05677     /* Any other return is either a match, or some kind of error. */
05678 
05679     default:
05680     goto ENDLOOP;
05681     }
05682 
05683   /* Control reaches here for the various types of "no match at this point"
05684   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
05685 
05686   rc = MATCH_NOMATCH;
05687 
05688   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
05689   newline in the subject (though it may continue over the newline). Therefore,
05690   if we have just failed to match, starting at a newline, do not continue. */
05691 
05692   if (firstline && IS_NEWLINE(start_match)) break;
05693 
05694   /* Advance to new matching position */
05695 
05696   start_match = new_start_match;
05697 
05698   /* Break the loop if the pattern is anchored or if we have passed the end of
05699   the subject. */
05700 
05701   if (anchored || start_match > end_subject) break;
05702 
05703   /* If we have just passed a CR and we are now at a LF, and the pattern does
05704   not contain any explicit matches for \r or \n, and the newline option is CRLF
05705   or ANY or ANYCRLF, advance the match position by one more character. */
05706 
05707   if (start_match[-1] == CHAR_CR &&
05708       start_match < end_subject &&
05709       *start_match == CHAR_NL &&
05710       (re->flags & PCRE_HASCRORLF) == 0 &&
05711         (md->nltype == NLTYPE_ANY ||
05712          md->nltype == NLTYPE_ANYCRLF ||
05713          md->nllen == 2))
05714     start_match++;
05715 
05716   }   /* End of for(;;) "bumpalong" loop */
05717 
05718 /* ==========================================================================*/
05719 
05720 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
05721 conditions is true:
05722 
05723 (1) The pattern is anchored or the match was failed by (*COMMIT);
05724 
05725 (2) We are past the end of the subject;
05726 
05727 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
05728     this option requests that a match occur at or before the first newline in
05729     the subject.
05730 
05731 When we have a match and the offset vector is big enough to deal with any
05732 backreferences, captured substring offsets will already be set up. In the case
05733 where we had to get some local store to hold offsets for backreference
05734 processing, copy those that we can. In this case there need not be overflow if
05735 certain parts of the pattern were not used, even though there are more
05736 capturing parentheses than vector slots. */
05737 
05738 ENDLOOP:
05739 
05740 if (rc == MATCH_MATCH)
05741   {
05742   if (using_temporary_offsets)
05743     {
05744     if (offsetcount >= 4)
05745       {
05746       memcpy(offsets + 2, md->offset_vector + 2,
05747         (offsetcount - 2) * sizeof(int));
05748       DPRINTF(("Copied offsets from temporary memory\n"));
05749       }
05750     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
05751     DPRINTF(("Freeing temporary memory\n"));
05752     (pcre_free)(md->offset_vector);
05753     }
05754 
05755   /* Set the return code to the number of captured strings, or 0 if there are
05756   too many to fit into the vector. */
05757 
05758   rc = md->offset_overflow? 0 : md->end_offset_top/2;
05759 
05760   /* If there is space, set up the whole thing as substring 0. The value of
05761   md->start_match_ptr might be modified if \K was encountered on the success
05762   matching path. */
05763 
05764   if (offsetcount < 2) rc = 0; else
05765     {
05766     offsets[0] = md->start_match_ptr - md->start_subject;
05767     offsets[1] = md->end_match_ptr - md->start_subject;
05768     }
05769 
05770   DPRINTF((">>>> returning %d\n", rc));
05771   return rc;
05772   }
05773 
05774 /* Control gets here if there has been an error, or if the overall match
05775 attempt has failed at all permitted starting positions. */
05776 
05777 if (using_temporary_offsets)
05778   {
05779   DPRINTF(("Freeing temporary memory\n"));
05780   (pcre_free)(md->offset_vector);
05781   }
05782 
05783 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
05784   {
05785   DPRINTF((">>>> error: returning %d\n", rc));
05786   return rc;
05787   }
05788 else if (start_partial != NULL)
05789   {
05790   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
05791   if (offsetcount > 1)
05792     {
05793     offsets[0] = start_partial - (USPTR)subject;
05794     offsets[1] = end_subject - (USPTR)subject;
05795     }
05796   return PCRE_ERROR_PARTIAL;
05797   }
05798 else
05799   {
05800   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
05801   return PCRE_ERROR_NOMATCH;
05802   }
05803 }
05804 
05805 /* End of pcre_exec.c */