$OpenBSD: patch-pcre_compile_c,v 1.6 2015/06/29 17:06:15 jasper Exp $

upstream fixes of several buffer overflow issues:

r1555 Fix forward reference offset bug.
r1556 Fix forward referencing bugs.
r1557 Fix buffer overflow for repeated conditional when referencing a
      duplicate name.
r1558 Fix buffer overflow for named recursive back reference when the
      name is duplicated.
r1559 Fix named forward reference to duplicate group number overflow
      bug.
r1560 Fix buffer overflow for lookbehind within mutually recursive
      subroutines.
r1562 Fix another buffer overflow.
r1571 Fix buffer overflow for forward reference within backward assertion with
      excess closing parenthesis. Bugzilla 1651.

--- pcre_compile.c.orig	Mon Apr 13 17:54:01 2015
+++ pcre_compile.c	Mon Jun 29 15:13:08 2015
@@ -1799,7 +1799,7 @@ for (;;)
     case OP_ASSERTBACK:
     case OP_ASSERTBACK_NOT:
     do cc += GET(cc, 1); while (*cc == OP_ALT);
-    cc += PRIV(OP_lengths)[*cc];
+    cc += 1 + LINK_SIZE;
     break;
 
     /* Skip over things that don't match chars */
@@ -3985,11 +3985,12 @@ have their offsets adjusted. That one of the jobs of t
 is called, the partially compiled regex must be temporarily terminated with
 OP_END.
 
-This function has been extended with the possibility of forward references for
-recursions and subroutine calls. It must also check the list of such references
-for the group we are dealing with. If it finds that one of the recursions in
-the current group is on this list, it adjusts the offset in the list, not the
-value in the reference (which is a group number).
+This function has been extended to cope with forward references for recursions
+and subroutine calls. It must check the list of such references for the
+group we are dealing with. If it finds that one of the recursions in the
+current group is on this list, it does not adjust the value in the reference
+(which is a group number). After the group has been scanned, all the offsets in
+the forward reference list for the group are adjusted.
 
 Arguments:
   group      points to the start of the group
@@ -4005,29 +4006,21 @@ static void
 adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
   size_t save_hwm_offset)
 {
+int offset;
+pcre_uchar *hc;
 pcre_uchar *ptr = group;
 
 while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
   {
-  int offset;
-  pcre_uchar *hc;
-
-  /* See if this recursion is on the forward reference list. If so, adjust the
-  reference. */
-
   for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
        hc += LINK_SIZE)
     {
     offset = (int)GET(hc, 0);
-    if (cd->start_code + offset == ptr + 1)
-      {
-      PUT(hc, 0, offset + adjust);
-      break;
-      }
+    if (cd->start_code + offset == ptr + 1) break;
     }
 
-  /* Otherwise, adjust the recursion offset if it's after the start of this
-  group. */
+  /* If we have not found this recursion on the forward reference list, adjust
+  the recursion's offset if it's after the start of this group. */
 
   if (hc >= cd->hwm)
     {
@@ -4037,6 +4030,15 @@ while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) !=
 
   ptr += 1 + LINK_SIZE;
   }
+
+/* Now adjust all forward reference offsets for the group. */
+
+for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
+     hc += LINK_SIZE)
+  {
+  offset = (int)GET(hc, 0);
+  PUT(hc, 0, offset + adjust);
+  }
 }
 
 
@@ -4465,7 +4467,7 @@ const pcre_uchar *tempptr;
 const pcre_uchar *nestptr = NULL;
 pcre_uchar *previous = NULL;
 pcre_uchar *previous_callout = NULL;
-size_t save_hwm_offset = 0;
+size_t item_hwm_offset = 0;
 pcre_uint8 classbits[32];
 
 /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
@@ -4767,6 +4769,7 @@ for (;; ptr++)
     zeroreqchar = reqchar;
     zeroreqcharflags = reqcharflags;
     previous = code;
+    item_hwm_offset = cd->hwm - cd->start_workspace;
     *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
     break;
 
@@ -4818,6 +4821,7 @@ for (;; ptr++)
     /* Handle a real character class. */
 
     previous = code;
+    item_hwm_offset = cd->hwm - cd->start_workspace;
 
     /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
     they are encountered at the top level, so we'll do that too. */
@@ -5930,7 +5934,7 @@ for (;; ptr++)
       {
       register int i;
       int len = (int)(code - previous);
-      size_t base_hwm_offset = save_hwm_offset;
+      size_t base_hwm_offset = item_hwm_offset;
       pcre_uchar *bralink = NULL;
       pcre_uchar *brazeroptr = NULL;
 
@@ -5985,7 +5989,7 @@ for (;; ptr++)
         if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
           {
           *code = OP_END;
-          adjust_recurse(previous, 1, utf, cd, save_hwm_offset);
+          adjust_recurse(previous, 1, utf, cd, item_hwm_offset);
           memmove(previous + 1, previous, IN_UCHARS(len));
           code++;
           if (repeat_max == 0)
@@ -6009,7 +6013,7 @@ for (;; ptr++)
           {
           int offset;
           *code = OP_END;
-          adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm_offset);
+          adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, item_hwm_offset);
           memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
           code += 2 + LINK_SIZE;
           *previous++ = OP_BRAZERO + repeat_type;
@@ -6267,7 +6271,7 @@ for (;; ptr++)
               {
               int nlen = (int)(code - bracode);
               *code = OP_END;
-              adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
+              adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
               memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
               code += 1 + LINK_SIZE;
               nlen += 1 + LINK_SIZE;
@@ -6401,7 +6405,7 @@ for (;; ptr++)
         else
           {
           *code = OP_END;
-          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
+          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
           memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
           code += 1 + LINK_SIZE;
           len += 1 + LINK_SIZE;
@@ -6450,7 +6454,7 @@ for (;; ptr++)
 
         default:
         *code = OP_END;
-        adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
+        adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
         memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
         code += 1 + LINK_SIZE;
         len += 1 + LINK_SIZE;
@@ -6623,7 +6627,7 @@ for (;; ptr++)
     newoptions = options;
     skipbytes = 0;
     bravalue = OP_CBRA;
-    save_hwm_offset = cd->hwm - cd->start_workspace;
+    item_hwm_offset = cd->hwm - cd->start_workspace;
     reset_bracount = FALSE;
 
     /* Deal with the extended parentheses; all are introduced by '?', and the
@@ -6769,7 +6773,7 @@ for (;; ptr++)
             ptr++;
             }
           namelen = (int)(ptr - name);
-          if (lengthptr != NULL) *lengthptr += IMM2_SIZE;
+          if (lengthptr != NULL) skipbytes += IMM2_SIZE;
           }
 
         /* Check the terminator */
@@ -7173,14 +7177,26 @@ for (;; ptr++)
           number. If the name is not found, set the value to 0 for a forward
           reference. */
 
+          recno = 0;
           ng = cd->named_groups;
           for (i = 0; i < cd->names_found; i++, ng++)
             {
             if (namelen == ng->length &&
                 STRNCMP_UC_UC(name, ng->name, namelen) == 0)
-              break;
+              {
+              open_capitem *oc;
+              recno = ng->number;
+              if (is_recurse) break;
+              for (oc = cd->open_caps; oc != NULL; oc = oc->next)
+                {
+                if (oc->number == recno)
+                  {
+                  oc->flag = TRUE;
+                  break;
+                  }
+                }
+              }
             }
-          recno = (i < cd->names_found)? ng->number : 0;
 
           /* Count named back references. */
 
@@ -7191,6 +7207,19 @@ for (;; ptr++)
           16-bit data item. */
 
           *lengthptr += IMM2_SIZE;
+
+          /* If this is a forward reference and we are within a (?|...) group,
+          the reference may end up as the number of a group which we are
+          currently inside, that is, it could be a recursive reference. In the
+          real compile this will be picked up and the reference wrapped with
+          OP_ONCE to make it atomic, so we must space in case this occurs. */
+
+          /* In fact, this can happen for a non-forward reference because
+          another group with the same number might be created later. This
+          issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance
+          only mode, we finesse the bug by allowing more memory always. */
+
+          /* if (recno == 0) */ *lengthptr += 2 + 2*LINK_SIZE;
           }
 
         /* In the real compile, search the name table. We check the name
@@ -7247,6 +7276,7 @@ for (;; ptr++)
             {
             if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
             previous = code;
+            item_hwm_offset = cd->hwm - cd->start_workspace;
             *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
             PUT2INC(code, 0, index);
             PUT2INC(code, 0, count);
@@ -7323,7 +7353,15 @@ for (;; ptr++)
 
           recno = 0;
           while(IS_DIGIT(*ptr))
+            {
+            if (recno > INT_MAX / 10 - 1) /* Integer overflow */            
+              {                                                             
+              while (IS_DIGIT(*ptr)) ptr++;                                 
+              *errorcodeptr = ERR61;                                        
+              goto FAILED;                                                  
+              }
             recno = recno * 10 + *ptr++ - CHAR_0;
+            } 
 
           if (*ptr != (pcre_uchar)terminator)
             {
@@ -7360,6 +7398,7 @@ for (;; ptr++)
           HANDLE_RECURSION:
 
           previous = code;
+          item_hwm_offset = cd->hwm - cd->start_workspace;
           called = cd->start_code;
 
           /* When we are actually compiling, find the bracket that is being
@@ -7561,7 +7600,11 @@ for (;; ptr++)
       previous = NULL;
       cd->iscondassert = FALSE;
       }
-    else previous = code;
+    else
+      {
+      previous = code;
+      item_hwm_offset = cd->hwm - cd->start_workspace;
+      }
 
     *code = bravalue;
     tempcode = code;
@@ -7809,7 +7852,7 @@ for (;; ptr++)
         const pcre_uchar *p;
         pcre_uint32 cf;
 
-        save_hwm_offset = cd->hwm - cd->start_workspace;   /* Normally this is set when '(' is read */
+        item_hwm_offset = cd->hwm - cd->start_workspace;   /* Normally this is set when '(' is read */
         terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
           CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
 
@@ -7877,6 +7920,7 @@ for (;; ptr++)
         HANDLE_REFERENCE:
         if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
         previous = code;
+        item_hwm_offset = cd->hwm - cd->start_workspace;
         *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
         PUT2INC(code, 0, recno);
         cd->backref_map |= (recno < 32)? (1 << recno) : 1;
@@ -7906,6 +7950,7 @@ for (;; ptr++)
         if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
           goto FAILED;
         previous = code;
+        item_hwm_offset = cd->hwm - cd->start_workspace;
         *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
         *code++ = ptype;
         *code++ = pdata;
@@ -7946,6 +7991,7 @@ for (;; ptr++)
 
           {
           previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;
+          item_hwm_offset = cd->hwm - cd->start_workspace;
           *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
           }
         }
@@ -7989,6 +8035,7 @@ for (;; ptr++)
 
     ONE_CHAR:
     previous = code;
+    item_hwm_offset = cd->hwm - cd->start_workspace;
 
     /* For caseless UTF-8 mode when UCP support is available, check whether
     this character has more than one other case. If so, generate a special
@@ -9380,7 +9427,7 @@ OP_RECURSE that are not fixed length get a diagnosic w
 exceptional ones forgo this. We scan the pattern to check that they are fixed
 length, and set their lengths. */
 
-if (cd->check_lookbehind)
+if (errorcode == 0 && cd->check_lookbehind)
   {
   pcre_uchar *cc = (pcre_uchar *)codestart;
 
